From e63ff774044b41b1296ce4013907ef7bf78aa902 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 3 Oct 2007 20:42:44 -0700 Subject: [PATCH] vcompound: Core piece Add a new gfp flag __GFP_VFALLBACK If specified during a higher order allocation then the system will fall back to vmap if no physically contiguous pages can be found. This will create a virtually contiguous area instead of a physically contiguous area. In many cases the virtually contiguous area can stand in for the physically contiguous area (with some loss of performance). Signed-off-by: Christoph Lameter --- include/linux/gfp.h | 6 ++ mm/page_alloc.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 141 insertions(+), 5 deletions(-) Index: linux-2.6.25-rc3-mm1/include/linux/gfp.h =================================================================== --- linux-2.6.25-rc3-mm1.orig/include/linux/gfp.h 2008-03-04 17:23:08.623798804 -0800 +++ linux-2.6.25-rc3-mm1/include/linux/gfp.h 2008-03-04 17:23:15.355527455 -0800 @@ -43,6 +43,7 @@ struct vm_area_struct; #define __GFP_REPEAT ((__force gfp_t)0x400u) /* Retry the allocation. Might fail */ #define __GFP_NOFAIL ((__force gfp_t)0x800u) /* Retry for ever. Cannot fail */ #define __GFP_NORETRY ((__force gfp_t)0x1000u)/* Do not retry. Might fail */ +#define __GFP_VFALLBACK ((__force gfp_t)0x2000u)/* Permit fallback to vmalloc */ #define __GFP_COMP ((__force gfp_t)0x4000u)/* Add compound page metadata */ #define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ @@ -82,6 +83,11 @@ struct vm_area_struct; #define GFP_THISNODE ((__force gfp_t)0) #endif +/* + * Attempt to allocate higher order page if not successful use a vmalloc'd page + */ +#define GFP_VFALLBACK (GFP_KERNEL | __GFP_VFALLBACK) + /* This mask makes up all the page movable related flags */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) Index: linux-2.6.25-rc3-mm1/mm/page_alloc.c =================================================================== --- linux-2.6.25-rc3-mm1.orig/mm/page_alloc.c 2008-03-04 17:23:12.195654592 -0800 +++ linux-2.6.25-rc3-mm1/mm/page_alloc.c 2008-03-04 17:25:53.781151920 -0800 @@ -76,6 +76,9 @@ int pageblock_order __read_mostly; #endif static void __free_pages_ok(struct page *page, unsigned int order); +static struct page *alloc_vcompound(gfp_t, nodemask_t *, int, + struct zonelist *, int, unsigned long); +static void destroy_compound_page(struct page *page, unsigned long order); /* * results with 256, 32 in the lowmem_reserve sysctl: @@ -305,9 +308,51 @@ static void bad_page(struct page *page) * This usage means that zero-order pages may not be compound. */ +static void __free_vcompound(void *addr) +{ + struct page **pages; + int i; + struct page *page = vmalloc_to_page(addr); + int order = compound_order(page); + int nr_pages = 1 << order; + + if (!PageVcompound(page) || !PageHead(page)) { + bad_page(page); + return; + } + destroy_compound_page(page, order); + pages = vunmap(addr); + /* + * First page will have zero refcount since it maintains state + * for the compound and was decremented before we got here. + */ + set_page_address(page, NULL); + __ClearPageVcompound(page); + free_hot_page(page); + + for (i = 1; i < nr_pages; i++) { + page = pages[i]; + set_page_address(page, NULL); + __ClearPageVcompound(page); + __free_page(page); + } + kfree(pages); +} + + +static void free_vcompound(void *addr) +{ + __free_vcompound(addr); +} + static void free_compound_page(struct page *page) { - __free_pages_ok(page, compound_order(page)); + if (PageVcompound(page)) + free_vcompound(page_address(page)); + else { + destroy_compound_page(page, compound_order(page)); + __free_pages_ok(page, compound_order(page)); + } } static void prep_compound_page(struct page *page, unsigned long order) @@ -1549,6 +1594,69 @@ static void set_page_owner(struct page * #endif /* CONFIG_PAGE_OWNER */ /* + * Virtual Compound Page support. + * + * Virtual Compound Pages are used to fall back to order 0 allocations if large + * linear mappings are not available and __GFP_VFALLBACK is set. They are + * formatted according to compound page conventions. I.e. following + * page->first_page if PageTail(page) is set can be used to determine the + * head page. + */ +static noinline struct page *alloc_vcompound(gfp_t gfp_mask, + nodemask_t *nodemask, int order, struct zonelist *zonelist, + int high_zoneidx, unsigned long alloc_flags) +{ + struct page *page; + int i; + struct vm_struct *vm; + int nr_pages = 1 << order; + struct page **pages = kmalloc(nr_pages * sizeof(struct page *), + gfp_mask & GFP_RECLAIM_MASK); + struct page **pages2; + + if (!pages) + return NULL; + + gfp_mask &= ~(__GFP_COMP | __GFP_VFALLBACK); + for (i = 0; i < nr_pages; i++) { + page = get_page_from_freelist(gfp_mask, nodemask, 0, zonelist, + high_zoneidx, alloc_flags); + if (!page) + goto abort; + + /* Sets PageCompound which makes PageHead(page) true */ + __SetPageVcompound(page); + pages[i] = page; + } + + vm = get_vm_area_node(nr_pages << PAGE_SHIFT, VM_MAP, + zone_to_nid(zonelist_zone(zonelist->_zonerefs)), + gfp_mask); + pages2 = pages; + if (map_vm_area(vm, PAGE_KERNEL, &pages2)) + goto abort; + + prep_compound_page(pages[0], order); + + for (i = 0; i < nr_pages; i++) + set_page_address(pages[0], vm->addr + (i << PAGE_SHIFT)); + + return pages[0]; + +abort: + while (i-- > 0) { + page = pages[i]; + if (!page) + continue; + set_page_address(page, NULL); + __ClearPageVcompound(page); + __free_page(page); + } + kfree(pages); + return NULL; +} + +/* * This is the 'heart' of the zoned buddy allocator. */ static struct page * @@ -1632,6 +1740,10 @@ restart: if (page) goto got_pg; + if (!page && order && (gfp_mask & __GFP_VFALLBACK)) + page = alloc_vcompound(gfp_mask, nodemask, order, + zonelist, high_zoneidx, alloc_flags); + /* This allocation should allow future memory freeing. */ rebalance: @@ -1649,12 +1761,12 @@ nofail_alloc: goto nofail_alloc; } } - goto nopage; + goto try_vcompound; } /* Atomic allocations - we can't balance anything */ if (!wait) - goto nopage; + goto try_vcompound; cond_resched(); @@ -1730,6 +1842,14 @@ nofail_alloc: goto rebalance; } +try_vcompound: + /* Last chance before failing the allocation */ + if (order && (gfp_mask & __GFP_VFALLBACK)) { + page = alloc_vcompound(gfp_mask, nodemask, order, + zonelist, high_zoneidx, alloc_flags); + if (page) + goto got_pg; + } nopage: if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { printk(KERN_WARNING "%s: page allocation failure." @@ -1808,6 +1928,9 @@ void __free_pages(struct page *page, uns if (order == 0) free_hot_page(page); else + if (unlikely(PageHead(page))) + free_compound_page(page); + else __free_pages_ok(page, order); } } @@ -1817,8 +1940,15 @@ EXPORT_SYMBOL(__free_pages); void free_pages(unsigned long addr, unsigned int order) { if (addr != 0) { - VM_BUG_ON(!virt_addr_valid((void *)addr)); - __free_pages(virt_to_page((void *)addr), order); + struct page *page; + + if (unlikely(addr >= VMALLOC_START && addr < VMALLOC_END)) + page = vmalloc_to_page((void *)addr); + else { + VM_BUG_ON(!virt_addr_valid(addr)); + page = virt_to_page(addr); + }; + __free_pages(page, order); } }