From: Paul Jackson The hooks in the slab cache allocator code path for support of NUMA mempolicies and cpuset memory spreading are in an important code path. Many systems will use neither feature. This patch optimizes those hooks down to a single check of some bits in the current tasks task_struct flags. For non NUMA systems, this hook and related code is already ifdef'd out. The optimization is done by using another task flag, set if the task is using a non-default NUMA mempolicy. Taking this flag bit along with the PF_MEM_SPREAD flag bit added earlier in this 'cpuset memory spreading' patch set, one can check for the combination of either of these special case memory placement mechanisms with a single test of the current tasks task_struct flags. This patch also tightens up the code, to save a few bytes of kernel text space, and moves some of it out of line. Due to the nested inlines called from multiple places, we were ending up with three copies of this code, which once we get off the main code path (for local node allocation) seems a bit wasteful of instruction memory. Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton --- include/linux/mempolicy.h | 5 ++++ include/linux/sched.h | 1 kernel/fork.c | 1 mm/mempolicy.c | 18 +++++++++++++++++ mm/slab.c | 37 ++++++++++++++++++++++-------------- 5 files changed, 48 insertions(+), 14 deletions(-) diff -puN include/linux/mempolicy.h~cpuset-memory-spread-slab-cache-optimizations include/linux/mempolicy.h --- devel/include/linux/mempolicy.h~cpuset-memory-spread-slab-cache-optimizations 2006-02-07 13:19:41.000000000 -0800 +++ devel-akpm/include/linux/mempolicy.h 2006-02-07 13:19:41.000000000 -0800 @@ -147,6 +147,7 @@ extern void mpol_rebind_policy(struct me extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new); extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); +extern void mpol_set_task_struct_flag(struct task_struct *p); #define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x)) #ifdef CONFIG_CPUSET @@ -248,6 +249,10 @@ static inline void mpol_rebind_mm(struct { } +static inline void mpol_set_task_struct_flag(struct task_struct *p) +{ +} + #define set_cpuset_being_rebound(x) do {} while (0) static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, diff -puN include/linux/sched.h~cpuset-memory-spread-slab-cache-optimizations include/linux/sched.h --- devel/include/linux/sched.h~cpuset-memory-spread-slab-cache-optimizations 2006-02-07 13:19:41.000000000 -0800 +++ devel-akpm/include/linux/sched.h 2006-02-07 13:19:42.000000000 -0800 @@ -934,6 +934,7 @@ static inline void put_task_struct(struc #define PF_RANDOMIZE 0x00800000 /* randomize virtual address space */ #define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */ #define PF_MEM_SPREAD 0x04000000 /* Spread some memory over cpuset */ +#define PF_MEMPOLICY 0x08000000 /* Non-default NUMA mempolicy */ /* * Only the _current_ task can read/write to tsk->flags, but other diff -puN kernel/fork.c~cpuset-memory-spread-slab-cache-optimizations kernel/fork.c --- devel/kernel/fork.c~cpuset-memory-spread-slab-cache-optimizations 2006-02-07 13:19:41.000000000 -0800 +++ devel-akpm/kernel/fork.c 2006-02-07 13:19:42.000000000 -0800 @@ -1018,6 +1018,7 @@ static task_t *copy_process(unsigned lon p->mempolicy = NULL; goto bad_fork_cleanup_cpuset; } + mpol_set_task_struct_flag(p); #endif #ifdef CONFIG_DEBUG_MUTEXES diff -puN mm/mempolicy.c~cpuset-memory-spread-slab-cache-optimizations mm/mempolicy.c --- devel/mm/mempolicy.c~cpuset-memory-spread-slab-cache-optimizations 2006-02-07 13:19:41.000000000 -0800 +++ devel-akpm/mm/mempolicy.c 2006-02-07 13:19:42.000000000 -0800 @@ -423,6 +423,7 @@ long do_set_mempolicy(int mode, nodemask return PTR_ERR(new); mpol_free(current->mempolicy); current->mempolicy = new; + mpol_set_task_struct_flag(current); if (new && new->policy == MPOL_INTERLEAVE) current->il_next = first_node(new->v.nodes); return 0; @@ -1666,6 +1667,23 @@ void mpol_rebind_mm(struct mm_struct *mm } /* + * Update task->flags PF_MEMPOLICY bit: set iff non-default mempolicy. + * Allows more rapid checking of this (combined perhaps with other + * PF_* flag bits) on memory allocation hot code paths. + * + * The task struct 'p' should either be current or a newly + * forked child that is not visible on the task list yet. + */ + +void mpol_set_task_struct_flag(struct task_struct *p) +{ + if (p->mempolicy) + p->flags |= PF_MEMPOLICY; + else + p->flags &= ~PF_MEMPOLICY; +} + +/* * Display pages allocated per node and memory policy via /proc. */ diff -puN mm/slab.c~cpuset-memory-spread-slab-cache-optimizations mm/slab.c --- devel/mm/slab.c~cpuset-memory-spread-slab-cache-optimizations 2006-02-07 13:19:41.000000000 -0800 +++ devel-akpm/mm/slab.c 2006-02-07 13:19:42.000000000 -0800 @@ -859,6 +859,7 @@ static struct array_cache *alloc_arrayca #ifdef CONFIG_NUMA static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); +static void *alternate_node_alloc(struct kmem_cache *, gfp_t); static struct array_cache **alloc_alien_cache(int node, int limit) { @@ -2754,20 +2755,9 @@ static inline void *____cache_alloc(stru struct array_cache *ac; #ifdef CONFIG_NUMA - if (unlikely(current->mempolicy && !in_interrupt())) { - int nid = slab_node(current->mempolicy); - - if (nid != numa_node_id()) - return __cache_alloc_node(cachep, flags, nid); - } - if (unlikely(cpuset_mem_spread_check() && - (cachep->flags & SLAB_MEM_SPREAD) && - !in_interrupt())) { - int nid = cpuset_mem_spread_node(); - - if (nid != numa_node_id()) - return __cache_alloc_node(cachep, flags, nid); - } + if (unlikely(current->flags & (PF_MEM_SPREAD|PF_MEMPOLICY))) + if ((objp = alternate_node_alloc(cachep, flags)) != NULL) + return objp; #endif check_irq_off(); @@ -2802,6 +2792,25 @@ static __always_inline void *__cache_all #ifdef CONFIG_NUMA /* + * Try allocating on another node if PF_MEM_SPREAD or PF_MEMPOLICY. + */ +static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) +{ + int nid_alloc, nid_here; + + if (in_interrupt()) + return NULL; + nid_alloc = nid_here = numa_node_id(); + if (cpuset_mem_spread_check() && (cachep->flags & SLAB_MEM_SPREAD)) + nid_alloc = cpuset_mem_spread_node(); + else if (current->mempolicy) + nid_alloc = slab_node(current->mempolicy); + if (nid_alloc != nid_here) + return __cache_alloc_node(cachep, flags, nid_alloc); + return NULL; +} + +/* * A interface to enable slab creation on nodeid */ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, _