Index: linux-2.6.15-rc3-mm1/mm/mempolicy.c =================================================================== --- linux-2.6.15-rc3-mm1.orig/mm/mempolicy.c 2005-12-01 18:10:51.000000000 -0800 +++ linux-2.6.15-rc3-mm1/mm/mempolicy.c 2005-12-01 23:14:08.000000000 -0800 @@ -112,6 +112,8 @@ struct mempolicy default_policy = { .policy = MPOL_DEFAULT, }; +struct mempolicy *global_policy[NR_POLS]; + /* Do sanity checking on a policy */ static int mpol_check_policy(int mode, nodemask_t *nodes) { @@ -191,6 +193,12 @@ static struct mempolicy *mpol_new(int mo return policy; } +struct mempolicy *mpol_dup(struct mempolicy *pol) { + if (pol) + atomic_inc(&pol->refcnt); + return pol; +} + static void gather_stats(struct page *, void *); static void migrate_page_add(struct vm_area_struct *vma, struct page *page, struct list_head *pagelist, unsigned long flags); @@ -402,16 +410,50 @@ static int contextualize_policy(int mode long do_set_mempolicy(int mode, nodemask_t *nodes) { struct mempolicy *new; + struct mempolicy **curr = current->mempolicy; if (contextualize_policy(mode, nodes)) return -EINVAL; - new = mpol_new(mode, nodes); + new = mpol_new(mode & MPOL_POL_MASK, nodes); if (IS_ERR(new)) return PTR_ERR(new); - mpol_free(current->mempolicy); - current->mempolicy = new; + + if (mode & MPOL_GLOBAL) + curr = global_policy; + +// if (mode & MPOL_CPUSET) +// curr = cpuset_get_policy(current); + + /* + * No specification means that all policies need to be set for backwards + * compatibility. + */ + if ((mode & MPOL_KIND_MASK) == 0) + mode = MPOL_OTHER | MPOL_PAGECACHE | MPOL_SLAB | MPOL_SHMEM | MPOL_HUGEPAGE; + + if (mode & MPOL_OTHER) { + mpol_free(curr[POL_OTHER]); + curr[POL_OTHER] = mpol_dup(new); + } + if (mode & MPOL_PAGECACHE) { + mpol_free(curr[POL_PAGECACHE]); + curr[POL_PAGECACHE] = mpol_dup(new); + } + if (mode & MPOL_SLAB) { + mpol_free(curr[POL_SLAB]); + curr[POL_SLAB] = mpol_dup(new); + } + if (mode & MPOL_SHMEM) { + mpol_free(curr[POL_SHMEM]); + curr[POL_SHMEM] = mpol_dup(new); + } + if (mode & MPOL_HUGEPAGE) { + mpol_free(curr[POL_HUGEPAGE]); + curr[POL_HUGEPAGE] = mpol_dup(new); + } if (new && new->policy == MPOL_INTERLEAVE) current->il_next = first_node(new->v.nodes); + mpol_free(new); return 0; } @@ -464,10 +506,11 @@ long do_get_mempolicy(int *policy, nodem int err; struct mm_struct *mm = current->mm; struct vm_area_struct *vma = NULL; - struct mempolicy *pol = current->mempolicy; + struct mempolicy *pol; + struct mempolicy *curr; cpuset_update_current_mems_allowed(); - if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR)) + if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_GLOBAL|MPOL_KIND_MASK)) return -EINVAL; if (flags & MPOL_F_ADDR) { down_read(&mm->mmap_sem); @@ -483,6 +526,23 @@ long do_get_mempolicy(int *policy, nodem } else if (addr) return -EINVAL; + if (flags & MPOL_GLOBAL) + curr = global_policy; + else + curr = current; + + if (flags & MPOL_OTHER) + pol = curr[POL_OTHER]; + else if (flags & MPOL_PAGECACHE) + pol = curr[POL_PAGECACHE]; + else if (flags & MPOL_SLAB) + pol = curr[POL_SLAB]; + else if (flags & MPOL_HUGEPAGE) + pol = curr[POL_HUGEPAGE]; + else if (flags & MPOL_SHMEM) + pol = curr[POL_SHMEM]; + else + pol = curr[POL_OTHER]; if (!pol) pol = &default_policy; @@ -492,7 +552,7 @@ long do_get_mempolicy(int *policy, nodem if (err < 0) goto out; *policy = err; - } else if (pol == current->mempolicy && + } else if (pol == curr[POL_OTHER] && pol->policy == MPOL_INTERLEAVE) { *policy = current->il_next; } else { @@ -1073,9 +1133,9 @@ asmlinkage long compat_sys_mbind(compat_ /* Return effective policy for a VMA */ static struct mempolicy * get_vma_policy(struct task_struct *task, - struct vm_area_struct *vma, unsigned long addr) + struct vm_area_struct *vma, unsigned long addr, int poltype) { - struct mempolicy *pol = task->mempolicy; + struct mempolicy *pol = task->mempolicy[poltype]; if (vma) { if (vma->vm_ops && vma->vm_ops->get_policy) @@ -1193,7 +1253,7 @@ static inline unsigned interleave_nid(st /* Return a zonelist suitable for a huge page allocation. */ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) { - struct mempolicy *pol = get_vma_policy(current, vma, addr); + struct mempolicy *pol = get_vma_policy(current, vma, addr, POL_HUGEPAGE); if (pol->policy == MPOL_INTERLEAVE) { unsigned nid; @@ -1246,7 +1306,7 @@ static struct page *alloc_page_interleav struct page * alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) { - struct mempolicy *pol = get_vma_policy(current, vma, addr); + struct mempolicy *pol = get_vma_policy(current, vma, addr, POL_OTHER); cpuset_update_current_mems_allowed(); @@ -1280,7 +1340,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area */ struct page *alloc_pages_current(gfp_t gfp, unsigned order) { - struct mempolicy *pol = current->mempolicy; + struct mempolicy *pol = current->mempolicy[POL_OTHER]; if ((gfp & __GFP_WAIT) && !in_interrupt()) cpuset_update_current_mems_allowed(); @@ -1420,7 +1480,7 @@ static void sp_insert(struct shared_poli struct mempolicy * mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) { - struct mempolicy *pol = NULL; + struct mempolicy *pol = current->mempolicy[POL_SHMEM]; struct sp_node *sn; if (!sp->root.rb_node) @@ -1632,7 +1692,10 @@ static void rebind_policy(struct mempoli */ void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new) { - rebind_policy(current->mempolicy, old, new); + int i; + + for(i = 0; i < NR_POLS; i++) + rebind_policy(current->mempolicy[i], old, new); } /* @@ -1741,7 +1804,7 @@ int show_numa_map(struct seq_file *m, vo if (md->pages) { mpol_to_str(buffer, sizeof(buffer), - get_vma_policy(task, vma, vma->vm_start)); + get_vma_policy(task, vma, vma->vm_start, POL_OTHER)); seq_printf(m, "%08lx %s pages=%lu mapped=%lu maxref=%lu", vma->vm_start, buffer, md->pages, @@ -1763,3 +1826,27 @@ int show_numa_map(struct seq_file *m, vo return 0; } +/* Duplicate a memory policy */ +int mpols_dup(struct mempolicy **p) +{ + int i; + + for(i=0;imempolicy && !in_interrupt())) { - int nid = slab_node(current->mempolicy); + if (unlikely(current->mempolicy[POL_SLAB] && !in_interrupt())) { + int nid = slab_node(current->mempolicy[POL_SLAB]); if (nid != numa_node_id()) return __cache_alloc_node(cachep, flags, nid); Index: linux-2.6.15-rc3-mm1/include/linux/mempolicy.h =================================================================== --- linux-2.6.15-rc3-mm1.orig/include/linux/mempolicy.h 2005-12-01 14:14:20.000000000 -0800 +++ linux-2.6.15-rc3-mm1/include/linux/mempolicy.h 2005-12-01 23:10:06.000000000 -0800 @@ -2,10 +2,11 @@ #define _LINUX_MEMPOLICY_H 1 #include - +#include /* * NUMA memory policies for Linux. * Copyright 2003,2004 Andi Kleen SuSE Labs + * (C) Copyright 2005, 2006 Christoph Lameter, Silicon Graphics, Inc. */ /* Policies */ @@ -15,6 +16,23 @@ #define MPOL_INTERLEAVE 3 #define MPOL_MAX MPOL_INTERLEAVE +#define MPOL_POL_MASK (MPOL_MAX - 1) + +/* Policy specification for the mode in set and get mempolicy */ +#define MPOL_OTHER (1<<2) /* Set / Get Other memory policy */ +#define MPOL_PAGECACHE (1<<3) /* Set / Get pagecache memory policy */ +#define MPOL_SLAB (1<<4) /* Set / Get slab cache policy */ +#define MPOL_SHMEM (1<<5) /* Set / Get shmem policy */ +#define MPOL_HUGEPAGE (1<<6) /* Set / Get hugepage policy */ + +#define MPOL_KIND_MASK 0x7c0 +#define MPOL_KIND_SHIFT 2 + + +/* System policies that may be set */ +#define MPOL_GLOBAL (1<<7) /* Set / Get Global policy */ +#define MPOL_CPUSET (1<<8) /* Set / Get cpuset policy */ + /* Flags for get_mem_policy */ #define MPOL_F_NODE (1<<0) /* return next IL mode instead of node mask */ @@ -26,6 +44,19 @@ #define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */ #define MPOL_MF_INTERNAL (1<<3) /* Internal flags start here */ +#define POL_OTHER 0 +#if NR_POLS == 4 +#define POL_PAGECACHE 1 +#define POL_SLAB 2 +#define POL_SHMEM 3 +#define POL_HUGEPAGE 4 +#else +#define POL_PAGECACHE 0 +#define POL_SLAB 0 +#define POL_SHMEM 0 +#define POL_HUGEPAGE 0 +#endif + #ifdef __KERNEL__ #include @@ -144,6 +175,9 @@ void mpol_free_shared_policy(struct shar struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx); +int mpols_dup(struct mempolicy **); +void mpols_free(struct mempolicy **); + extern void numa_default_policy(void); extern void numa_policy_init(void); extern void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new); @@ -189,6 +223,9 @@ static inline int mpol_set_shared_policy return -EINVAL; } +#define mpols_dup(p) 0 +#define mpols_free(p) do { } while (0) + static inline void mpol_shared_policy_init(struct shared_policy *info) { } Index: linux-2.6.15-rc3-mm1/kernel/fork.c =================================================================== --- linux-2.6.15-rc3-mm1.orig/kernel/fork.c 2005-12-01 14:14:20.000000000 -0800 +++ linux-2.6.15-rc3-mm1/kernel/fork.c 2005-12-01 18:39:21.000000000 -0800 @@ -971,15 +971,9 @@ static task_t *copy_process(unsigned lon p->io_context = NULL; p->io_wait = NULL; p->audit_context = NULL; -#ifdef CONFIG_NUMA - p->mempolicy = mpol_copy(p->mempolicy); - if (IS_ERR(p->mempolicy)) { - retval = PTR_ERR(p->mempolicy); - p->mempolicy = NULL; + retval = mpols_dup(p->mempolicy); + if (retval) goto bad_fork_cleanup; - } -#endif - p->tgid = p->pid; if (clone_flags & CLONE_THREAD) p->tgid = current->tgid; @@ -1177,9 +1171,7 @@ bad_fork_cleanup_audit: bad_fork_cleanup_security: security_task_free(p); bad_fork_cleanup_policy: -#ifdef CONFIG_NUMA - mpol_free(p->mempolicy); -#endif + mpols_free(p->mempolicy); bad_fork_cleanup: if (p->binfmt) module_put(p->binfmt->module); Index: linux-2.6.15-rc3-mm1/kernel/exit.c =================================================================== --- linux-2.6.15-rc3-mm1.orig/kernel/exit.c 2005-12-01 14:14:20.000000000 -0800 +++ linux-2.6.15-rc3-mm1/kernel/exit.c 2005-12-01 18:41:20.000000000 -0800 @@ -866,10 +866,7 @@ fastcall NORET_TYPE void do_exit(long co tsk->exit_code = code; proc_exit_connector(tsk); exit_notify(tsk); -#ifdef CONFIG_NUMA - mpol_free(tsk->mempolicy); - tsk->mempolicy = NULL; -#endif + mpols_free(tsk->mempolicy); /* PF_DEAD causes final put_task_struct after we schedule. */ preempt_disable(); Index: linux-2.6.15-rc3-mm1/include/linux/numa.h =================================================================== --- linux-2.6.15-rc3-mm1.orig/include/linux/numa.h 2005-11-28 19:51:27.000000000 -0800 +++ linux-2.6.15-rc3-mm1/include/linux/numa.h 2005-12-01 18:26:45.000000000 -0800 @@ -13,4 +13,8 @@ #define MAX_NUMNODES (1 << NODES_SHIFT) +#ifndef NR_POLS +#define NR_POLS 1 +#endif + #endif /* _LINUX_NUMA_H */ Index: linux-2.6.15-rc3-mm1/drivers/pci/pci-driver.c =================================================================== --- linux-2.6.15-rc3-mm1.orig/drivers/pci/pci-driver.c 2005-12-01 14:14:18.000000000 -0800 +++ linux-2.6.15-rc3-mm1/drivers/pci/pci-driver.c 2005-12-01 18:49:57.000000000 -0800 @@ -185,15 +185,15 @@ static int pci_call_probe(struct pci_dri if (node >= 0 && node_online(node)) set_cpus_allowed(current, node_to_cpumask(node)); /* And set default memory allocation policy */ - oldpol = current->mempolicy; - current->mempolicy = &default_policy; - mpol_get(current->mempolicy); + oldpol = current->mempolicy[POL_OTHER]; + current->mempolicy[POL_OTHER] = &default_policy; + mpol_get(current->mempolicy[POL_OTHER]); #endif error = drv->probe(dev, id); #ifdef CONFIG_NUMA set_cpus_allowed(current, oldmask); - mpol_free(current->mempolicy); - current->mempolicy = oldpol; + mpol_free(current->mempolicy[POL_OTHER]); + current->mempolicy[POL_OTHER] = oldpol; #endif return error; }