--- arch/x86_64/mm/ioremap.c | 3 ++ fs/proc/kcore.c | 3 ++ fs/proc/mmu.c | 3 ++ kernel/wait.c | 2 - mm/page_alloc.c | 38 +++++++++++++++++++++++++++++++------ mm/slub.c | 4 +-- mm/vmalloc.c | 48 +++++++++++++++++++++++++++++++++++++++-------- 7 files changed, 84 insertions(+), 17 deletions(-) Index: linux-2.6/mm/slub.c =================================================================== --- linux-2.6.orig/mm/slub.c 2007-09-13 20:07:36.000000000 -0700 +++ linux-2.6/mm/slub.c 2007-09-13 20:07:56.000000000 -0700 @@ -158,8 +158,8 @@ static inline void ClearSlabDebug(struct /* * Small page size. Make sure that we do not fragment memory */ -#define DEFAULT_MAX_ORDER 1 -#define DEFAULT_MIN_OBJECTS 4 +#define DEFAULT_MAX_ORDER 4 +#define DEFAULT_MIN_OBJECTS 30 #else Index: linux-2.6/arch/x86_64/mm/ioremap.c =================================================================== --- linux-2.6.orig/arch/x86_64/mm/ioremap.c 2007-09-13 21:26:04.000000000 -0700 +++ linux-2.6/arch/x86_64/mm/ioremap.c 2007-09-13 21:26:37.000000000 -0700 @@ -171,6 +171,7 @@ EXPORT_SYMBOL(ioremap_nocache); void iounmap(volatile void __iomem *addr) { struct vm_struct *p, *o; + unsigned long flags; if (addr <= high_memory) return; @@ -184,12 +185,14 @@ void iounmap(volatile void __iomem *addr in parallel. Reuse of the virtual address is prevented by leaving it in the global lists until we're done with it. cpa takes care of the direct mappings. */ + local_irq_save(flags); read_lock(&vmlist_lock); for (p = vmlist; p; p = p->next) { if (p->addr == addr) break; } read_unlock(&vmlist_lock); + local_irq_restore(flags); if (!p) { printk("iounmap: bad address %p\n", addr); Index: linux-2.6/fs/proc/kcore.c =================================================================== --- linux-2.6.orig/fs/proc/kcore.c 2007-09-13 21:26:46.000000000 -0700 +++ linux-2.6/fs/proc/kcore.c 2007-09-13 21:27:14.000000000 -0700 @@ -314,6 +314,7 @@ read_kcore(struct file *file, char __use while (buflen) { struct kcore_list *m; + unsigned long flags; read_lock(&kclist_lock); for (m=kclist; m; m=m->next) { @@ -335,6 +336,7 @@ read_kcore(struct file *file, char __use if (!elf_buf) return -ENOMEM; + local_irq_save(flags); read_lock(&vmlist_lock); for (m=vmlist; m && cursize; m=m->next) { unsigned long vmstart; @@ -364,6 +366,7 @@ read_kcore(struct file *file, char __use (char *)vmstart, vmsize); } read_unlock(&vmlist_lock); + local_irq_restore(flags); if (copy_to_user(buffer, elf_buf, tsz)) { kfree(elf_buf); return -EFAULT; Index: linux-2.6/fs/proc/mmu.c =================================================================== --- linux-2.6.orig/fs/proc/mmu.c 2007-09-13 21:31:34.000000000 -0700 +++ linux-2.6/fs/proc/mmu.c 2007-09-13 21:32:03.000000000 -0700 @@ -36,6 +36,7 @@ void get_vmalloc_info(struct vmalloc_inf struct vm_struct *vma; unsigned long free_area_size; unsigned long prev_end; + unsigned long flags; vmi->used = 0; @@ -47,6 +48,7 @@ void get_vmalloc_info(struct vmalloc_inf prev_end = VMALLOC_START; + local_irq_save(flags); read_lock(&vmlist_lock); for (vma = vmlist; vma; vma = vma->next) { @@ -73,5 +75,6 @@ void get_vmalloc_info(struct vmalloc_inf vmi->largest_chunk = VMALLOC_END - prev_end; read_unlock(&vmlist_lock); + local_irq_restore(flags); } } Index: linux-2.6/kernel/wait.c =================================================================== --- linux-2.6.orig/kernel/wait.c 2007-09-13 20:43:12.000000000 -0700 +++ linux-2.6/kernel/wait.c 2007-09-13 20:43:38.000000000 -0700 @@ -245,7 +245,7 @@ EXPORT_SYMBOL(wake_up_bit); fastcall wait_queue_head_t *bit_waitqueue(void *word, int bit) { const int shift = BITS_PER_LONG == 32 ? 5 : 6; - const struct zone *zone = page_zone(virt_to_page(word)); + const struct zone *zone = page_zone(virt_to_head_page(word)); unsigned long val = (unsigned long)word << shift | bit; return &zone->wait_table[hash_long(val, zone->wait_table_bits)]; Index: linux-2.6/mm/page_alloc.c =================================================================== --- linux-2.6.orig/mm/page_alloc.c 2007-09-13 20:08:21.000000000 -0700 +++ linux-2.6/mm/page_alloc.c 2007-09-13 21:37:52.000000000 -0700 @@ -406,10 +406,6 @@ static inline void __free_one_page(struc int order_size = 1 << order; if (unlikely(PageCompound(page))) { - if (PageVmalloc(page)) { - vfree(vmalloc_address(page)); - return; - } destroy_compound_page(page, order); } @@ -1164,6 +1160,12 @@ get_page_from_freelist(gfp_t gfp_mask, u int did_zlc_setup = 0; /* just call zlc_setup() one time */ enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */ + /* Force use of vmalloc */ + if (order > 2 && (gfp_mask & __GFP_VMALLOC) && + (gfp_mask & __GFP_WAIT) && + system_state == SYSTEM_RUNNING && + !test_thread_flag(TIF_MEMDIE)) + return NULL; zonelist_scan: /* * Scan zonelist, looking for a zone with enough free. @@ -1374,9 +1376,11 @@ nofail_alloc: */ if (order && (gfp_mask & (__GFP_VMALLOC|__GFP_COMP)) == (__GFP_VMALLOC|__GFP_COMP)) { - void *p = __vmalloc(order << PAGE_SHIFT, + void *p = __vmalloc(1 << (PAGE_SHIFT + order), gfp_mask, PAGE_KERNEL); + printk("VMALLOC fallback order=%d result=%p\n", + order, p); if (p) { page = vmalloc_to_page(p); goto got_pg; @@ -1384,7 +1388,7 @@ nofail_alloc: } /* The OOM killer will not help higher order allocs so fail */ if (order > PAGE_ALLOC_COSTLY_ORDER) - goto nopage; + goto last_attempt; out_of_memory(zonelist, gfp_mask, order); goto restart; @@ -1410,6 +1414,24 @@ nofail_alloc: goto rebalance; } +last_attempt: + /* + * Fallback to vmalloc if its a higher order page. We may + * have failed due to fragmentation + */ + if (order && (gfp_mask & (__GFP_VMALLOC|__GFP_COMP)) + == (__GFP_VMALLOC|__GFP_COMP)) { + void *p = __vmalloc(1 << (PAGE_SHIFT + order), + gfp_mask, PAGE_KERNEL); + + printk("VMALLOC fallback2 order=%d result=%p\n", + order, p); + if (p) { + page = vmalloc_to_page(p); + goto got_pg; + } + } + nopage: if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) { printk(KERN_WARNING "%s: page allocation failure." @@ -1466,6 +1488,10 @@ void __pagevec_free(struct pagevec *pvec fastcall void __free_pages(struct page *page, unsigned int order) { + if (unlikely(PageVmalloc(page))) { + vfree(vmalloc_address(page)); + return; + } if (put_page_testzero(page)) { if (order == 0) free_hot_page(page); Index: linux-2.6/mm/vmalloc.c =================================================================== --- linux-2.6.orig/mm/vmalloc.c 2007-09-13 20:47:23.000000000 -0700 +++ linux-2.6/mm/vmalloc.c 2007-09-13 21:18:17.000000000 -0700 @@ -203,7 +203,9 @@ void *vmalloc_address(struct page *page) void *v = NULL; int i; struct vm_struct *p; + unsigned long flags; + local_irq_save(flags); read_lock(&vmlist_lock); for (p = vmlist; p; p = p->next) for (i = 0; i < p->nr_pages; i++) @@ -213,6 +215,7 @@ void *vmalloc_address(struct page *page) } read_unlock(&vmlist_lock); + local_irq_restore(flags); return v; } EXPORT_SYMBOL(vmalloc_address); @@ -226,16 +229,17 @@ unsigned long vmalloc_to_pfn(void *vmall } EXPORT_SYMBOL(vmalloc_to_pfn); -static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags, +static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long vflags, unsigned long start, unsigned long end, int node, gfp_t gfp_mask) { struct vm_struct **p, *tmp, *area; unsigned long align = 1; unsigned long addr; + unsigned long flags; BUG_ON(in_interrupt()); - if (flags & VM_IOREMAP) { + if (vflags & VM_IOREMAP) { int bit = fls(size); if (bit > IOREMAP_MAX_ORDER) @@ -259,6 +263,7 @@ static struct vm_struct *__get_vm_area_n */ size += PAGE_SIZE; + local_irq_save(flags); write_lock(&vmlist_lock); for (p = &vmlist; (tmp = *p) != NULL ;p = &tmp->next) { if ((unsigned long)tmp->addr < addr) { @@ -280,18 +285,19 @@ found: area->next = *p; *p = area; - area->flags = flags; + area->flags = vflags; area->addr = (void *)addr; area->size = size; area->pages = NULL; area->nr_pages = 0; area->phys_addr = 0; write_unlock(&vmlist_lock); - + local_irq_restore(flags); return area; out: write_unlock(&vmlist_lock); + local_irq_restore(flags); kfree(area); if (printk_ratelimit()) printk(KERN_WARNING "allocation failed: out of vmalloc space - use vmalloc= to increase size.\n"); @@ -351,7 +357,6 @@ static struct vm_struct *__remove_vm_are return NULL; found: - unmap_vm_area(tmp); *p = tmp->next; /* @@ -372,9 +377,15 @@ found: struct vm_struct *remove_vm_area(void *addr) { struct vm_struct *v; + unsigned long flags; + + local_irq_save(flags); write_lock(&vmlist_lock); v = __remove_vm_area(addr); write_unlock(&vmlist_lock); + local_irq_restore(flags); + if (v) + unmap_vm_area(v); return v; } @@ -403,7 +414,7 @@ static void __vunmap(void *addr, int dea if (deallocate_pages) { int i; - int comp = PageHead(area->pages[0]); + int comp = PageCompound(area->pages[0]); for (i = 0; i < area->nr_pages; i++) { struct page *page = area->pages[i]; @@ -414,10 +425,15 @@ static void __vunmap(void *addr, int dea if (comp) { if (i) { BUG_ON(page->first_page != area->pages[0]); + BUG_ON(!PageTail(page)); + BUG_ON(PageHead(page)); __ClearPageTail(page); } - else + else { + BUG_ON(PageTail(page)); + BUG_ON(!PageHead(page)); __ClearPageHead(page); + } } __free_page(page); } @@ -444,7 +460,7 @@ static void __vunmap(void *addr, int dea */ void vfree(void *addr) { - BUG_ON(in_interrupt()); +// BUG_ON(in_interrupt()); __vunmap(addr, 1); } EXPORT_SYMBOL(vfree); @@ -615,13 +631,16 @@ void *vmalloc_user(unsigned long size) { struct vm_struct *area; void *ret; + unsigned long flags; ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); if (ret) { + local_irq_save(flags); write_lock(&vmlist_lock); area = __find_vm_area(ret); area->flags |= VM_USERMAP; write_unlock(&vmlist_lock); + local_irq_restore(flags); } return ret; } @@ -697,13 +716,16 @@ void *vmalloc_32_user(unsigned long size { struct vm_struct *area; void *ret; + unsigned long flags; ret = __vmalloc(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL); if (ret) { + local_irq_save(flags); write_lock(&vmlist_lock); area = __find_vm_area(ret); area->flags |= VM_USERMAP; write_unlock(&vmlist_lock); + local_irq_restore(flags); } return ret; } @@ -714,11 +736,13 @@ long vread(char *buf, char *addr, unsign struct vm_struct *tmp; char *vaddr, *buf_start = buf; unsigned long n; + unsigned long flags; /* Don't allow overflow */ if ((unsigned long) addr + count < count) count = -(unsigned long) addr; + local_irq_save(flags); read_lock(&vmlist_lock); for (tmp = vmlist; tmp; tmp = tmp->next) { vaddr = (char *) tmp->addr; @@ -744,6 +768,7 @@ long vread(char *buf, char *addr, unsign } finished: read_unlock(&vmlist_lock); + local_irq_restore(flags); return buf - buf_start; } @@ -752,11 +777,13 @@ long vwrite(char *buf, char *addr, unsig struct vm_struct *tmp; char *vaddr, *buf_start = buf; unsigned long n; + unsigned long flags; /* Don't allow overflow */ if ((unsigned long) addr + count < count) count = -(unsigned long) addr; + local_irq_save(flags); read_lock(&vmlist_lock); for (tmp = vmlist; tmp; tmp = tmp->next) { vaddr = (char *) tmp->addr; @@ -781,6 +808,7 @@ long vwrite(char *buf, char *addr, unsig } finished: read_unlock(&vmlist_lock); + local_irq_restore(flags); return buf - buf_start; } @@ -804,10 +832,12 @@ int remap_vmalloc_range(struct vm_area_s unsigned long uaddr = vma->vm_start; unsigned long usize = vma->vm_end - vma->vm_start; int ret; + unsigned long flags; if ((PAGE_SIZE-1) & (unsigned long)addr) return -EINVAL; + local_irq_save(flags); read_lock(&vmlist_lock); area = __find_vm_area(addr); if (!area) @@ -819,6 +849,7 @@ int remap_vmalloc_range(struct vm_area_s if (usize + (pgoff << PAGE_SHIFT) > area->size - PAGE_SIZE) goto out_einval_locked; read_unlock(&vmlist_lock); + local_irq_restore(flags); addr += pgoff << PAGE_SHIFT; do { @@ -839,6 +870,7 @@ int remap_vmalloc_range(struct vm_area_s out_einval_locked: read_unlock(&vmlist_lock); + local_irq_restore(flags); return -EINVAL; } EXPORT_SYMBOL(remap_vmalloc_range);