GFP_VFALLBACK: Allow fallback of compound pages to virtual mappings This adds a new gfp flag __GFP_VFALLBACK If specified during a higher order allocation then the system will fall back to vmap and attempt to create a virtually contiguous area instead of a physically contiguous area. In many cases the virtually contiguous area can stand in for the physically contiguous area (with some loss of performance). Signed-off-by: Christoph Lameter --- include/linux/gfp.h | 5 ++ mm/page_alloc.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 117 insertions(+), 8 deletions(-) Index: linux-2.6/mm/page_alloc.c =================================================================== --- linux-2.6.orig/mm/page_alloc.c 2007-09-17 23:36:37.000000000 -0700 +++ linux-2.6/mm/page_alloc.c 2007-09-17 23:46:19.000000000 -0700 @@ -1230,6 +1230,83 @@ try_next_zone: } /* + * Virtual Compound Page support. + * + * Virtual Compound Pages are used to fall back to order 0 allocations if large + * linear mappings are not available and __GFP_VFALLBACK is set. They are + * formatted according to compound page conventions. I.e. following + * page->first_page if PageTail(page) is set can be used to determine the + * head page. + */ +struct page *vcompound_alloc(gfp_t gfp_mask, int order, + struct zonelist *zonelist, unsigned long alloc_flags) +{ + void *addr; + struct page *page; + int i; + int nr_pages = 1 << order; + struct page **pages = kzalloc((nr_pages + 1) * sizeof(struct page *), + gfp_mask); + + if (!pages) + return NULL; + + for (i = 0; i < nr_pages; i++) { + page = get_page_from_freelist(gfp_mask, 0, + zonelist, alloc_flags); + if (!page) + goto abort; + + /* Sets PageCompound which makes PageHead(page) true */ + __SetPageVmalloc(page); + if (i) { + page->first_page = pages[0]; + __SetPageTail(page); + } + pages[i] = page; + } + + addr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); + if (!addr) + goto abort; + + return pages[0]; + +abort: + for (i = 0; i < nr_pages; i++) { + page = pages[i]; + __ClearPageTail(page); + __ClearPageHead(page); + __ClearPageVmalloc(page); + __free_page(page); + } + kfree(pages); + return NULL; +} + +static void vcompound_free(void *addr) +{ + struct page **pages = vunmap(addr); + + /* + * First page will have zero refcount since it maintains state + * for the compound + */ + __ClearPageHead(pages[0]); + __ClearPageVmalloc(pages[0]); + free_hot_page(pages[0]); + + while (*(++pages)) { + struct page *page = *pages; + + __ClearPageTail(page); + __ClearPageVmalloc(page); + __free_page(page); + } + kfree(pages); +} + +/* * This is the 'heart' of the zoned buddy allocator. */ struct page * fastcall @@ -1324,12 +1401,12 @@ nofail_alloc: goto nofail_alloc; } } - goto nopage; + goto try_vcompound; } /* Atomic allocations - we can't balance anything */ if (!wait) - goto nopage; + goto try_vcompound; cond_resched(); @@ -1391,6 +1468,13 @@ nofail_alloc: goto rebalance; } +try_vcompound: + if (!page && order && (gfp_mask & __GFP_VFALLBACK)) { + page = vcompound_alloc(gfp_mask, order, + zonelist, alloc_flags); + if (page) + goto got_pg; + } nopage: if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { printk(KERN_WARNING "%s: page allocation failure." @@ -1445,13 +1529,29 @@ void __pagevec_free(struct pagevec *pvec free_hot_cold_page(pvec->pages[i], pvec->cold); } +/* + * Free a virtual compound array allocated with __GFP_VFALLBACK. + * + * Must be called with interrupts enabled since vmalloc_address() + * requires a spinlock. + */ +static void vcompound_free_pages(struct page *page) +{ + BUG_ON(in_interrupt()); + vcompound_free(vmalloc_address(page)); +} + fastcall void __free_pages(struct page *page, unsigned int order) { if (put_page_testzero(page)) { - if (order == 0) - free_hot_page(page); - else - __free_pages_ok(page, order); + if (unlikely(PageVmalloc(page))) + vcompound_free_pages(page); + else { + if (order == 0) + free_hot_page(page); + else + __free_pages_ok(page, order); + } } } @@ -1460,8 +1560,12 @@ EXPORT_SYMBOL(__free_pages); fastcall void free_pages(unsigned long addr, unsigned int order) { if (addr != 0) { - VM_BUG_ON(!virt_addr_valid((void *)addr)); - __free_pages(virt_to_page((void *)addr), order); + if (unlikely(is_vmalloc_addr((void *)addr))) + vcompound_free((void *)addr); + else { + VM_BUG_ON(!virt_addr_valid((void *)addr)); + __free_pages(virt_to_page((void *)addr), order); + } } } Index: linux-2.6/include/linux/gfp.h =================================================================== --- linux-2.6.orig/include/linux/gfp.h 2007-09-17 23:36:37.000000000 -0700 +++ linux-2.6/include/linux/gfp.h 2007-09-17 23:42:59.000000000 -0700 @@ -43,6 +43,7 @@ struct vm_area_struct; #define __GFP_REPEAT ((__force gfp_t)0x400u) /* Retry the allocation. Might fail */ #define __GFP_NOFAIL ((__force gfp_t)0x800u) /* Retry for ever. Cannot fail */ #define __GFP_NORETRY ((__force gfp_t)0x1000u)/* Do not retry. Might fail */ +#define __GFP_VFALLBACK ((__force gfp_t)0x2000u)/* Permit fallback to vmalloc */ #define __GFP_COMP ((__force gfp_t)0x4000u)/* Add compound page metadata */ #define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ @@ -86,6 +87,10 @@ struct vm_area_struct; #define GFP_THISNODE ((__force gfp_t)0) #endif +/* + * Allocate large page but allow fallback to a virtuall mapped page + */ +#define GFP_VFALLBACK (GFP_KERNEL | __GFP_VFALLBACK) /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some platforms, used as appropriate on others */