Index: linux-2.6.13-rc5/include/linux/mempolicy.h =================================================================== --- linux-2.6.13-rc5.orig/include/linux/mempolicy.h 2005-08-01 21:45:48.000000000 -0700 +++ linux-2.6.13-rc5/include/linux/mempolicy.h 2005-08-04 12:59:14.000000000 -0700 @@ -45,8 +45,9 @@ struct vm_area_struct; * of the current process. * * Locking policy for interlave: - * In process context there is no locking because only the process accesses - * its own state. All vma manipulation is somewhat protected by a down_read on + * In process context the page_table_lock must be acquired when using + * the policy in the task_struct. + * All vma manipulation is somewhat protected by a down_read on * mmap_sem. For allocating in the interleave policy the page_table_lock * must be also aquired to protect il_next. * Index: linux-2.6.13-rc5/mm/mempolicy.c =================================================================== --- linux-2.6.13-rc5.orig/mm/mempolicy.c 2005-08-01 21:45:48.000000000 -0700 +++ linux-2.6.13-rc5/mm/mempolicy.c 2005-08-04 17:21:01.000000000 -0700 @@ -19,7 +19,9 @@ * is used. * bind Only allocate memory on a specific set of nodes, * no fallback. - * preferred Try a specific node first before normal fallback. + * FIXME: memory is allocated beginning from the first + * node and not localized to where the process is executing. + * preferred Try a specific node first before normal fallback. * As a special case node -1 here means do the allocation * on the local CPU. This is normally identical to default, * but useful to set in a VMA when you have a non default @@ -445,17 +447,23 @@ asmlinkage long sys_set_mempolicy(int mo if (mode < 0 || mode > MPOL_MAX) return -EINVAL; + down_write(¤t->mm->mmap_sem); err = get_nodes(nodes, nmask, maxnode, mode); if (err) - return err; + goto out; new = mpol_new(mode, nodes); - if (IS_ERR(new)) - return PTR_ERR(new); + if (IS_ERR(new)) { + err = PTR_ERR(new); + goto out; + } mpol_free(current->mempolicy); current->mempolicy = new; if (new && new->policy == MPOL_INTERLEAVE) current->il_next = find_first_bit(new->v.nodes, MAX_NUMNODES); - return 0; + err = 0; +out: + up_write(¤t->mm->mmap_sem); + return err; } /* Fill a zone bitmap for a policy */ @@ -524,25 +532,29 @@ asmlinkage long sys_get_mempolicy(int __ int err, pval; struct mm_struct *mm = current->mm; struct vm_area_struct *vma = NULL; - struct mempolicy *pol = current->mempolicy; + struct mempolicy *pol; if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR)) return -EINVAL; if (nmask != NULL && maxnode < MAX_NUMNODES) return -EINVAL; + + down_read(&mm->mmap_sem); + pol = current->mempolicy; if (flags & MPOL_F_ADDR) { - down_read(&mm->mmap_sem); vma = find_vma_intersection(mm, addr, addr+1); if (!vma) { - up_read(&mm->mmap_sem); - return -EFAULT; + err = -EFAULT; + goto out; } if (vma->vm_ops && vma->vm_ops->get_policy) pol = vma->vm_ops->get_policy(vma, addr); else pol = vma->vm_policy; - } else if (addr) - return -EINVAL; + } else if (addr) { + err = -EINVAL; + goto out; + } if (!pol) pol = &default_policy; @@ -563,10 +575,8 @@ asmlinkage long sys_get_mempolicy(int __ } else pval = pol->policy; - if (vma) { - up_read(¤t->mm->mmap_sem); - vma = NULL; - } + up_read(&mm->mmap_sem); + vma = NULL; if (policy && put_user(pval, policy)) return -EFAULT; @@ -579,8 +589,7 @@ asmlinkage long sys_get_mempolicy(int __ } out: - if (vma) - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); return err; } @@ -682,31 +691,16 @@ get_vma_policy(struct vm_area_struct *vm } /* Return a zonelist representing a mempolicy */ -static struct zonelist *zonelist_policy(unsigned int __nocast gfp, struct mempolicy *policy) +static inline struct zonelist *zonelist_policy(unsigned int __nocast gfp, struct mempolicy *policy) { int nd; - switch (policy->policy) { - case MPOL_PREFERRED: + if (policy->policy == MPOL_PREFERRED) nd = policy->v.preferred_node; if (nd < 0) nd = numa_node_id(); - break; - case MPOL_BIND: - /* Lower zones don't get a policy applied */ - /* Careful: current->mems_allowed might have moved */ - if ((gfp & GFP_ZONEMASK) >= policy_zone) - if (cpuset_zonelist_valid_mems_allowed(policy->v.zonelist)) - return policy->v.zonelist; - /*FALL THROUGH*/ - case MPOL_INTERLEAVE: /* should not happen */ - case MPOL_DEFAULT: + } else nd = numa_node_id(); - break; - default: - nd = 0; - BUG(); - } return NODE_DATA(nd)->node_zonelists + (gfp & GFP_ZONEMASK); } @@ -787,9 +781,11 @@ struct page * alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = get_vma_policy(vma, addr); + nodemask_t nodes; cpuset_update_current_mems_allowed(); + nodes = pol->v.nodes; if (unlikely(pol->policy == MPOL_INTERLEAVE)) { unsigned nid; if (vma) { @@ -803,9 +799,9 @@ alloc_page_vma(unsigned int __nocast gfp /* fall back to process interleaving */ nid = interleave_nodes(pol); } - return alloc_page_interleave(gfp, 0, nid); + return alloc_page_interleave(gfp, 0, nid, &nodes); } - return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); + return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol), &nodes); } /**