--- include/linux/gfp.h | 6 +++ include/linux/vmalloc.h | 2 + mm/page_alloc.c | 45 +++++++++++++++++++++++- mm/vmalloc.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 139 insertions(+), 2 deletions(-) Index: linux-2.6/include/linux/gfp.h =================================================================== --- linux-2.6.orig/include/linux/gfp.h 2007-09-16 19:30:30.000000000 -0700 +++ linux-2.6/include/linux/gfp.h 2007-09-16 20:33:54.000000000 -0700 @@ -43,6 +43,7 @@ struct vm_area_struct; #define __GFP_REPEAT ((__force gfp_t)0x400u) /* Retry the allocation. Might fail */ #define __GFP_NOFAIL ((__force gfp_t)0x800u) /* Retry for ever. Cannot fail */ #define __GFP_NORETRY ((__force gfp_t)0x1000u)/* Do not retry. Might fail */ +#define __GFP_VFALLBACK ((__force gfp_t)0x2000u)/* Allow fallback to virtual */ #define __GFP_COMP ((__force gfp_t)0x4000u)/* Add compound page metadata */ #define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ @@ -86,6 +87,11 @@ struct vm_area_struct; #define GFP_THISNODE ((__force gfp_t)0) #endif +/* + * Indicate that a large allocation may fallback to use a virtual mapping + * if memory fragmentation prevents the allocation of a linear physical area + */ +#define GFP_VFALLBACK (GFP_KERNEL | __GFP_VFALLBACK) /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some platforms, used as appropriate on others */ Index: linux-2.6/mm/page_alloc.c =================================================================== --- linux-2.6.orig/mm/page_alloc.c 2007-09-16 19:30:30.000000000 -0700 +++ linux-2.6/mm/page_alloc.c 2007-09-16 23:17:45.000000000 -0700 @@ -405,8 +405,13 @@ static inline void __free_one_page(struc unsigned long page_idx; int order_size = 1 << order; - if (unlikely(PageCompound(page))) + if (unlikely(PageCompound(page))) { + if (PageVmalloc(page)) { + vfree_compound(page); + return; + } destroy_compound_page(page, order); + } page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); @@ -990,6 +995,29 @@ static inline int should_fail_alloc_page #endif /* CONFIG_FAIL_PAGE_ALLOC */ + +static struct page *vfallback(gfp_t gfp_mask, int order, int node) +{ + void *addr; + + /* + * Fallback to vmalloc if its a higher order page. We may + * have failed due to fragmentation + */ + if (!order) + return NULL; + + if (!(gfp_mask & __GFP_VFALLBACK)) + return NULL; + + addr = vmalloc_fallback(order, node, mask); + + if (!p) + return NULL; + + return vmalloc_to_page(p); +} + /* * Return 1 if free pages are above 'mark'. This takes into account the order * of the allocation. @@ -1327,7 +1355,10 @@ nofail_alloc: goto nopage; } - /* Atomic allocations - we can't balance anything */ + /* + * Atomic allocations - we can't balance anything and we do not + * support fallback to virtual mappings + */ if (!wait) goto nopage; @@ -1351,6 +1382,7 @@ nofail_alloc: zonelist, alloc_flags); if (page) goto got_pg; + } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { /* * Go through the zonelist yet one more time, keep @@ -1363,6 +1395,10 @@ nofail_alloc: if (page) goto got_pg; + page = vfallback(gfp_mask, order, zonelist); + if (page) + goto got_pg; + /* The OOM killer will not help higher order allocs so fail */ if (order > PAGE_ALLOC_COSTLY_ORDER) goto nopage; @@ -1391,6 +1427,10 @@ nofail_alloc: goto rebalance; } + page = vfallback(gfp_mask, order, zonelist); + if (page) + goto got_pg; + nopage: if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { printk(KERN_WARNING "%s: page allocation failure." @@ -1401,6 +1441,7 @@ nopage: } got_pg: return page; + } EXPORT_SYMBOL(__alloc_pages); Index: linux-2.6/include/linux/vmalloc.h =================================================================== --- linux-2.6.orig/include/linux/vmalloc.h 2007-09-16 20:27:29.000000000 -0700 +++ linux-2.6/include/linux/vmalloc.h 2007-09-16 20:31:44.000000000 -0700 @@ -86,6 +86,8 @@ struct page *vmalloc_to_page(const void unsigned long vmalloc_to_pfn(const void *addr); void *vmalloc_address(struct page *); +void vfree_compound(struct page *); + /* Determine if an address is within the vmalloc range */ static inline int is_vmalloc_addr(void *x) { Index: linux-2.6/mm/vmalloc.c =================================================================== --- linux-2.6.orig/mm/vmalloc.c 2007-09-16 20:27:49.000000000 -0700 +++ linux-2.6/mm/vmalloc.c 2007-09-16 23:32:42.000000000 -0700 @@ -439,6 +439,33 @@ void vfree(const void *addr) } EXPORT_SYMBOL(vfree); +static void vfree_compound_work(struct work_struct *w) +{ + struct page *page = container_of((struct list_head *)w, + struct page, lru); + void *addr = vmalloc_address(page); + + __vunmap(addr, 2); +} + +void vfree_compound(struct page *page) +{ + struct work_struct *w = (struct work_struct *)&page->lru; + + if (unlikely(in_interrupt())) { + /* + * The object is unused and we have at least a page + * at addr. So we can just use the area to put a work_struct + * there in order to defer the free until a time when + * interrupts are enabled. + */ + INIT_WORK(w, vfree_compound_work); + schedule_work(w); + } else + vfree_compound_work(w); +} +EXPORT_SYMBOL(vfree_compound); + /** * vunmap - release virtual mapping obtained by vmap() * @addr: memory base address @@ -573,6 +600,67 @@ void *__vmalloc(unsigned long size, gfp_ } EXPORT_SYMBOL(__vmalloc); +static void *create_compound(gfp_t gfp_mask, int order, struct zzonelist *zl) +{ + struct vm_struct *area; + struct page **pages; + unsigned int nr_pages, array_size, i; + + size = PAGE_SIZE << order; + if (!size || (size >> PAGE_SHIFT) > num_physpages) + return NULL; + + area = get_vm_area(size, VM_ALLOC, gfp_mask); + if (!area) + return NULL; + + nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT; + array_size = (nr_pages * sizeof(struct page *)); + + area->nr_pages = nr_pages; + /* Please note that the recursion is strictly bounded. */ + if (array_size > PAGE_SIZE) { + pages = __vmalloc_node(array_size, gfp_mask | __GFP_ZERO, + PAGE_KERNEL, node); + area->flags |= VM_VPAGES; + } else { + pages = kmalloc_node(array_size, + (gfp_mask & GFP_LEVEL_MASK) | __GFP_ZERO, + node); + } + area->pages = pages; + if (!area->pages) { + remove_vm_area(area->addr); + kfree(area); + return NULL; + } + + for (i = 0; i < area->nr_pages; i++) { + struct page *page; + + page = get_page_from_freelist(gfp_mask, 0, zl, bla); + + if (unlikely(!page)) { + /* Successfully allocated i pages, free them in __vunmap() */ + area->nr_pages = i; + goto fail; + } + __SetPageVmalloc(page); + area->pages[i] = page; + } + + if (map_vm_area(area, prot, &pages)) + goto fail; + return area->addr; + +fail: + vfree(area->addr); + return NULL; +} + +return __vmalloc_area_node(area, gfp_mask, prot, node); +} + /** * vmalloc - allocate virtually contiguous memory * @size: allocation size