From: Christoph Lameter Add a new gfp flag __GFP_THISNODE to avoid fallback to other nodes. This flag is essential if a kernel component requires memory to be located on a certain node. It will be needed for alloc_pages_node() to force allocation on the indicated node and for alloc_pages() to force allocation on the current node. Signed-off-by: Christoph Lameter Cc: Andy Whitcroft Cc: Mel Gorman Signed-off-by: Andrew Morton --- include/linux/gfp.h | 1 + kernel/cpuset.c | 2 +- mm/mempolicy.c | 2 +- mm/page_alloc.c | 3 +++ 4 files changed, 6 insertions(+), 2 deletions(-) diff -puN include/linux/gfp.h~add-__gfp_thisnode-to-avoid-fallback-to-other-nodes-and-ignore include/linux/gfp.h --- a/include/linux/gfp.h~add-__gfp_thisnode-to-avoid-fallback-to-other-nodes-and-ignore +++ a/include/linux/gfp.h @@ -45,6 +45,7 @@ struct vm_area_struct; #define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */ +#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */ #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) diff -puN kernel/cpuset.c~add-__gfp_thisnode-to-avoid-fallback-to-other-nodes-and-ignore kernel/cpuset.c --- a/kernel/cpuset.c~add-__gfp_thisnode-to-avoid-fallback-to-other-nodes-and-ignore +++ a/kernel/cpuset.c @@ -2316,7 +2316,7 @@ int __cpuset_zone_allowed(struct zone *z const struct cpuset *cs; /* current cpuset ancestors */ int allowed; /* is allocation in zone z allowed? */ - if (in_interrupt()) + if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) return 1; node = z->zone_pgdat->node_id; might_sleep_if(!(gfp_mask & __GFP_HARDWALL)); diff -puN mm/mempolicy.c~add-__gfp_thisnode-to-avoid-fallback-to-other-nodes-and-ignore mm/mempolicy.c --- a/mm/mempolicy.c~add-__gfp_thisnode-to-avoid-fallback-to-other-nodes-and-ignore +++ a/mm/mempolicy.c @@ -1290,7 +1290,7 @@ struct page *alloc_pages_current(gfp_t g if ((gfp & __GFP_WAIT) && !in_interrupt()) cpuset_update_task_memory_state(); - if (!pol || in_interrupt()) + if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) pol = &default_policy; if (pol->policy == MPOL_INTERLEAVE) return alloc_page_interleave(gfp, order, interleave_nodes(pol)); diff -puN mm/page_alloc.c~add-__gfp_thisnode-to-avoid-fallback-to-other-nodes-and-ignore mm/page_alloc.c --- a/mm/page_alloc.c~add-__gfp_thisnode-to-avoid-fallback-to-other-nodes-and-ignore +++ a/mm/page_alloc.c @@ -893,6 +893,9 @@ get_page_from_freelist(gfp_t gfp_mask, u * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ do { + if (unlikely((gfp_mask & __GFP_THISNODE) && + (*z)->zone_pgdat != zonelist->zones[0]->zone_pgdat)) + break; if ((alloc_flags & ALLOC_CPUSET) && !cpuset_zone_allowed(*z, gfp_mask)) continue; _