Add __GFP_THISNODE to avoid fallback to other nodes and cpuset/memory policy restrictions. Add a new gfp flag __GFP_THISNODE to avoid fallback to other nodes. This flag is essential if a kernel component requires memory to be located on a certain node. It will be needed for alloc_pages_node() to force allocation on the indicated node and for alloc_pages() to force allocation on the current node. Signed-off-by: Christoph Lameter Index: linux-2.6.18-rc1/mm/page_alloc.c =================================================================== --- linux-2.6.18-rc1.orig/mm/page_alloc.c 2006-07-13 10:27:12.534765446 -0700 +++ linux-2.6.18-rc1/mm/page_alloc.c 2006-07-13 10:27:16.481786427 -0700 @@ -878,9 +878,12 @@ get_page_from_freelist(gfp_t gfp_mask, u */ do { zone = *z; + if (unlikely((gfp_mask & __GFP_THISNODE) && + zone->zone_pgdat != zonelist->zones[0]->zone_pgdat)) + break; if ((alloc_flags & ALLOC_CPUSET) && - !cpuset_zone_allowed(zone, gfp_mask)) - continue; + !cpuset_zone_allowed(zone, gfp_mask)) + continue; if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { unsigned long mark; Index: linux-2.6.18-rc1/include/linux/gfp.h =================================================================== --- linux-2.6.18-rc1.orig/include/linux/gfp.h 2006-07-05 21:09:49.000000000 -0700 +++ linux-2.6.18-rc1/include/linux/gfp.h 2006-07-13 10:28:45.794610139 -0700 @@ -46,6 +46,7 @@ struct vm_area_struct; #define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */ +#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */ #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) Index: linux-2.6.18-rc1/mm/mempolicy.c =================================================================== --- linux-2.6.18-rc1.orig/mm/mempolicy.c 2006-07-05 21:09:49.000000000 -0700 +++ linux-2.6.18-rc1/mm/mempolicy.c 2006-07-13 10:34:14.781057331 -0700 @@ -1277,7 +1277,7 @@ struct page *alloc_pages_current(gfp_t g if ((gfp & __GFP_WAIT) && !in_interrupt()) cpuset_update_task_memory_state(); - if (!pol || in_interrupt()) + if (!pol || in_interrupt() || (gfp & __GFP_THISNODE)) pol = &default_policy; if (pol->policy == MPOL_INTERLEAVE) return alloc_page_interleave(gfp, order, interleave_nodes(pol)); Index: linux-2.6.18-rc1/kernel/cpuset.c =================================================================== --- linux-2.6.18-rc1.orig/kernel/cpuset.c 2006-07-05 21:09:49.000000000 -0700 +++ linux-2.6.18-rc1/kernel/cpuset.c 2006-07-13 10:34:09.245267571 -0700 @@ -2265,7 +2265,7 @@ int __cpuset_zone_allowed(struct zone *z const struct cpuset *cs; /* current cpuset ancestors */ int allowed; /* is allocation in zone z allowed? */ - if (in_interrupt()) + if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) return 1; node = z->zone_pgdat->node_id; might_sleep_if(!(gfp_mask & __GFP_HARDWALL));