[PATCH 1/2] Add __GFP_FAIL Use __GFP_FAIL to signal to the page allocator that an allocation attempt should fail and not attempt any form of reclaim. This is of interest to the implementation of GFP_THISNODE and memory pools. Memorypools currently use __GFP_WAIT in order to cause an immediate failure. However, __GFP_WAIT is typically only cleared for atomic allocations and even if it is cleared, reclaim may still occur. So use __GFP_FAIL there. If we have __GFP_FAIL then the special casing for GFP_THISNODE can be removed from the page allocator. Do that. Signed-off-by: Christoph Lameter --- include/linux/gfp.h | 6 ++++-- mm/mempool.c | 3 ++- mm/page_alloc.c | 10 +--------- 3 files changed, 7 insertions(+), 12 deletions(-) Index: linux-2.6.23-rc1-mm2/include/linux/gfp.h =================================================================== --- linux-2.6.23-rc1-mm2.orig/include/linux/gfp.h 2007-08-03 16:35:33.000000000 -0700 +++ linux-2.6.23-rc1-mm2/include/linux/gfp.h 2007-08-03 16:38:02.000000000 -0700 @@ -43,6 +43,7 @@ struct vm_area_struct; #define __GFP_REPEAT ((__force gfp_t)0x400u) /* Retry the allocation. Might fail */ #define __GFP_NOFAIL ((__force gfp_t)0x800u) /* Retry for ever. Cannot fail */ #define __GFP_NORETRY ((__force gfp_t)0x1000u)/* Do not retry. Might fail */ +#define __GFP_FAIL ((__force gfp_t)0x2000u)/* Fail immediately if there is a problem */ #define __GFP_COMP ((__force gfp_t)0x4000u)/* Add compound page metadata */ #define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ @@ -59,7 +60,7 @@ struct vm_area_struct; __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ __GFP_NOFAIL|__GFP_NORETRY|__GFP_COMP| \ __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE| \ - __GFP_RECLAIMABLE|__GFP_MOVABLE) + __GFP_RECLAIMABLE|__GFP_MOVABLE|__GFP_FAIL) /* This mask makes up all the page movable related flags */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) @@ -87,7 +88,8 @@ struct vm_area_struct; __GFP_MOVABLE) #ifdef CONFIG_NUMA -#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) +#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY |\ + __GFP_FAIL) #else #define GFP_THISNODE ((__force gfp_t)0) #endif Index: linux-2.6.23-rc1-mm2/mm/mempool.c =================================================================== --- linux-2.6.23-rc1-mm2.orig/mm/mempool.c 2007-07-22 13:41:00.000000000 -0700 +++ linux-2.6.23-rc1-mm2/mm/mempool.c 2007-08-03 16:37:38.000000000 -0700 @@ -211,8 +211,9 @@ void * mempool_alloc(mempool_t *pool, gf gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */ gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */ gfp_mask |= __GFP_NOWARN; /* failures are OK */ + gfp_mask |= __GFP_FAIL; /* Fail instead of reclaim */ - gfp_temp = gfp_mask & ~(__GFP_WAIT|__GFP_IO); + gfp_temp = gfp_mask & ~__GFP_IO; repeat_alloc: Index: linux-2.6.23-rc1-mm2/mm/page_alloc.c =================================================================== --- linux-2.6.23-rc1-mm2.orig/mm/page_alloc.c 2007-08-03 16:35:33.000000000 -0700 +++ linux-2.6.23-rc1-mm2/mm/page_alloc.c 2007-08-03 16:37:38.000000000 -0700 @@ -1569,15 +1569,7 @@ restart: if (page) goto got_pg; - /* - * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and - * __GFP_NOWARN set) should not cause reclaim since the subsystem - * (f.e. slab) using GFP_THISNODE may choose to trigger reclaim - * using a larger set of nodes after it has established that the - * allowed per node queues are empty and that nodes are - * over allocated. - */ - if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) + if (gfp_mask & __GFP_FAIL) goto nopage; for (z = zonelist->zones; *z; z++)