From: Lee Schermerhorn dequeue_huge_page_vma() is not obeying the MPOL_BIND nodemask with the zonelist rework. It needs to search only zones in the mempolicy nodemask for hugepages. Use for_each_zone_zonelist_nodemask() instead of for_each_zone_zonelist(). Note: this will bloat mm/hugetlb.o a bit until Mel reworks the inlining of the for_each_zone... macros and helpers. Added mempolicy helper function mpol_bind_nodemask() to hide the details of mempolicy from hugetlb and to avoid #ifdef CONFIG_NUMA in dequeue_huge_page_vma(). Signed-off-by: Lee Schermerhorn Cc: Mel Gorman Cc: Christoph Lameter Cc: David Rientjes Cc: Lee Schermerhorn Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Cc: Christoph Lameter Cc: Hugh Dickins Cc: Nick Piggin Cc: Nishanth Aravamudan Signed-off-by: Andrew Morton --- include/linux/mempolicy.h | 13 +++++++++++++ mm/hugetlb.c | 4 +++- 2 files changed, 16 insertions(+), 1 deletion(-) diff -puN include/linux/mempolicy.h~mm-filter-based-on-a-nodemask-as-well-as-a-gfp_mask-make-dequeue_huge_page_vma-obey-mpol_bind-nodemask include/linux/mempolicy.h --- a/include/linux/mempolicy.h~mm-filter-based-on-a-nodemask-as-well-as-a-gfp_mask-make-dequeue_huge_page_vma-obey-mpol_bind-nodemask +++ a/include/linux/mempolicy.h @@ -163,6 +163,14 @@ static inline void check_highest_zone(en policy_zone = k; } +static inline nodemask_t *mpol_bind_nodemask(struct mempolicy *mpol) +{ + if (mpol->policy == MPOL_BIND) + return &mpol->v.nodes; + else + return NULL; +} + int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags); @@ -255,6 +263,11 @@ static inline int do_migrate_pages(struc static inline void check_highest_zone(int k) { } + +static inline nodemask_t *mpol_bind_nodemask(struct mempolicy *mpol) +{ + return NULL; +} #endif /* CONFIG_NUMA */ #endif /* __KERNEL__ */ diff -puN mm/hugetlb.c~mm-filter-based-on-a-nodemask-as-well-as-a-gfp_mask-make-dequeue_huge_page_vma-obey-mpol_bind-nodemask mm/hugetlb.c --- a/mm/hugetlb.c~mm-filter-based-on-a-nodemask-as-well-as-a-gfp_mask-make-dequeue_huge_page_vma-obey-mpol_bind-nodemask +++ a/mm/hugetlb.c @@ -99,8 +99,10 @@ static struct page *dequeue_huge_page_vm htlb_alloc_mask, &mpol); struct zone *zone; struct zoneref *z; + nodemask_t *nodemask = mpol_bind_nodemask(mpol); - for_each_zone_zonelist(zone, z, zonelist, MAX_NR_ZONES - 1) { + for_each_zone_zonelist_nodemask(zone, z, zonelist, + MAX_NR_ZONES - 1, nodemask) { nid = zone_to_nid(zone); if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) && !list_empty(&hugepage_freelists[nid])) { _