--- include/linux/perf.h | 22 ++++++++++++---------- include/linux/slub_def.h | 14 ++++++++++++-- kernel/Makefile | 1 + mm/slub.c | 1 + 4 files changed, 26 insertions(+), 12 deletions(-) Index: linux-2.6.23-rc1/include/linux/perf.h =================================================================== --- linux-2.6.23-rc1.orig/include/linux/perf.h 2007-07-26 23:20:43.000000000 -0700 +++ linux-2.6.23-rc1/include/linux/perf.h 2007-07-26 23:20:46.000000000 -0700 @@ -20,6 +20,10 @@ enum pc_item { PC_DIRECT_RECLAIM_RECLAIMED, PC_RECLAIM_SCANNED, PC_RECLAIM_RECLAIMED, + PC_SLAB_ALLOC_FAST, + PC_SLAB_ALLOC_SLOW, + PC_SLAB_FREE_FAST, + PC_SLAB_FREE_SLOW, NR_PC_ITEMS }; @@ -29,30 +33,28 @@ enum pc_item { struct pc { unsigned long time; int processor; - enum pc_item item; }; -#define pc_stop(__pc) pc_bytes(__pc, 0) +#define pc_stop(__pc, __nr) pc_bytes(__pc, 0, __nr) #ifdef CONFIG_PERFCOUNT -#define INIT_PC(__var, __item) struct pc __var = \ - { get_cycles(), smp_processor_id(), __item } +#define INIT_PC(__var) struct pc __var = \ + { get_cycles(), smp_processor_id() } -static inline void pc_start(struct pc *pc, enum pc_item nr) +static inline void pc_start(struct pc *pc) { - pc->item = nr; pc->processor = smp_processor_id(); pc->time = get_cycles(); } -void pc_bytes(struct pc *pc, unsigned long bytes); +void pc_bytes(struct pc *pc, unsigned long bytes, enum pc_item nr); #else -#define INIT_PC(__var, __item) do { } while(0) -static inline void pc_start(struct pc *pc, enum pc_item nr) {} -static inline void pc_bytes(struct pc *pc, unsigned long bytes) {} +#define INIT_PC(__var) do { } while(0) +static inline void pc_start(struct pc *pc) {} +static inline void pc_bytes(struct pc *pc, unsigned long bytes, enum pc_item nr) {} #endif Index: linux-2.6.23-rc1/kernel/Makefile =================================================================== --- linux-2.6.23-rc1.orig/kernel/Makefile 2007-07-26 23:19:55.000000000 -0700 +++ linux-2.6.23-rc1/kernel/Makefile 2007-07-26 23:20:46.000000000 -0700 @@ -51,6 +51,7 @@ obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o +obj-$(CONFIG_PERFCOUNT) += perf.o ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is Index: linux-2.6.23-rc1/mm/slub.c =================================================================== --- linux-2.6.23-rc1.orig/mm/slub.c 2007-07-26 23:19:55.000000000 -0700 +++ linux-2.6.23-rc1/mm/slub.c 2007-07-26 23:37:12.000000000 -0700 @@ -20,6 +20,7 @@ #include #include #include +#include /* * Lock order: Index: linux-2.6.23-rc1/include/linux/slub_def.h =================================================================== --- linux-2.6.23-rc1.orig/include/linux/slub_def.h 2007-07-26 23:37:12.000000000 -0700 +++ linux-2.6.23-rc1/include/linux/slub_def.h 2007-07-26 23:39:12.000000000 -0700 @@ -12,6 +12,7 @@ #include #include #include +#include struct kmem_cache_node { spinlock_t list_lock; /* Protect partial list and nr_partial */ @@ -217,6 +218,8 @@ void *__slab_alloc(struct kmem_cache *s, void __slab_free(struct kmem_cache *s, struct page *page, void *x, void *addr); +#include + #ifndef ARCH_HAS_SLAB_ALLOC /* * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) @@ -234,6 +237,7 @@ static void __always_inline *slab_alloc( struct page *page; void **object; unsigned long flags; + INIT_PC(x); local_irq_save(flags); page = s->cpu_slab[smp_processor_id()]; @@ -253,13 +257,16 @@ static void __always_inline *slab_alloc( page->lockless_freelist = object[page->offset]; local_irq_restore(flags); - if (unlikely((gfpflags & __GFP_ZERO) && object)) + if (unlikely((gfpflags & __GFP_ZERO))) memset(object, 0, s->objsize); + pc_bytes(&x, s->objsize, PC_SLAB_ALLOC_FAST); return object; slow: local_irq_restore(flags); - return __slab_alloc(s, gfpflags, node, addr); + object = __slab_alloc(s, gfpflags, node, addr); + pc_bytes(&x, s->objsize, PC_SLAB_ALLOC_SLOW); + return object; } #endif @@ -280,6 +287,7 @@ static void __always_inline slab_free(st { void **object = (void *)x; unsigned long flags; + INIT_PC(xx); local_irq_save(flags); if (unlikely(page != s->cpu_slab[smp_processor_id()])) @@ -291,11 +299,13 @@ static void __always_inline slab_free(st object[page->offset] = page->lockless_freelist; page->lockless_freelist = object; local_irq_restore(flags); + pc_bytes(&xx, s->objsize, PC_SLAB_FREE_FAST); return; slow: local_irq_restore(flags); __slab_free(s, page, x, addr); + pc_bytes(&xx, s->objsize, PC_SLAB_FREE_SLOW); } #endif