Index: linux-2.6.18-mm3/include/asm-ia64/page.h =================================================================== --- linux-2.6.18-mm3.orig/include/asm-ia64/page.h 2006-10-07 16:09:19.437892246 -0700 +++ linux-2.6.18-mm3/include/asm-ia64/page.h 2006-10-07 17:44:18.216335596 -0700 @@ -53,11 +53,7 @@ #ifdef CONFIG_HUGETLB_PAGE # define HPAGE_REGION_BASE RGN_BASE(RGN_HPAGE) # define HPAGE_SHIFT hpage_shift -#ifdef CONFIG_VIRTUAL_MEM_MAP_HUGE -# define HPAGE_SHIFT_DEFAULT 24 /* Reduce memory overhead for virtual mem_map */ -#else # define HPAGE_SHIFT_DEFAULT 28 /* check ia64 SDM for architecture supported size */ -#endif # define HPAGE_SIZE (__IA64_UL_CONST(1) << HPAGE_SHIFT) # define HPAGE_MASK (~(HPAGE_SIZE - 1)) @@ -113,32 +109,40 @@ do { \ #define VIRTUAL_MEM_MAP_SIZE (1UL << (IA64_MAX_PHYS_BITS - PAGE_SHIFT +\ STRUCT_PAGE_ORDER)) -#ifdef CONFIG_VIRTUAL_MEM_MAP_HUGE -/* - * Use huge pages for the virtual memory map. Since we have separate - * huge page region we can use the whole range and leave VMALLOC - * untouched. - */ -#define VIRTUAL_MEM_MAP_REGION RGN_HPAGE -#define VIRTUAL_MEM_MAP RGN_BASE(VIRTUAL_MEM_MAP_REGION) +#else +#define VIRTUAL_MEM_MAP_SIZE 0 +#endif + #define VMALLOC_START (RGN_BASE(RGN_GATE) + 0x200000000UL) +#define VMALLOC_END (RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 9)) - \ + VIRTUAL_MEM_MAP_SIZE) + +/* 1 Megabyte memmap pages seem to be a good default */ +//#define VIRTUAL_MEM_MAP_PAGE_SHIFT 20 +/* But we chicken out .... */ +#define VIRTUAL_MEM_MAP_PAGE_SHIFT PAGE_SHIFT + +#define VIRTUAL_MEM_MAP_PAGE_SIZE (1UL << VIRTUAL_MEM_MAP_PAGE_SHIFT) + +#define VIRTUAL_MEM_MAP VMALLOC_END -#else /* - * Place the virtual memory map in the VMALLOC area reducing the - * available address space of 128 TB by 8 TB. + * Do the same as htlb_page_to_page for virtual memory map pages. + * We cannot overlap with the PFNs for VMALLOC. So we need to create an + * PAGE_SIZE units above VIRTUAL_MEM_MAP for each page of + * VIRTUAL_MEM_MAP_PAGE_SIZE. + * + * In order to get one's spinning head around this, one may try to understand + * what happens in case PAGE_SHIFT == VIRTUAL_MEM_MAP_SHIFT. */ -#define VIRTUAL_MEM_MAP_REGION RGN_GATE -#define VIRTUAL_MEM_MAP (RGN_BASE(VIRTUAL_MEM_MAP_REGION) + 0x200000000UL) -#define VMALLOC_START (VIRTUAL_MEM_MAP + VIRTUAL_MEM_MAP_SIZE) -#endif - -#define VMALLOC_END (RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 9))) +# define VIRTMM_PAGE_TO_PAGE(x) (((unsigned long) REGION_NUMBER(x) << 61) \ + | (__pa(VIRTUAL_MEM_MAP) + ((x - VIRTUAL_MEM_MAP) >> \ + (VIRTUAL_MEM_MAP_PAGE_SHIFT - PAGE_SHIFT)))) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) extern int ia64_pfn_valid (unsigned long pfn); -#elif defined(CONFIG_FLATMEM) +#if defined(CONFIG_FLATMEM) # define ia64_pfn_valid(pfn) 1 #endif Index: linux-2.6.18-mm3/arch/ia64/mm/fault.c =================================================================== --- linux-2.6.18-mm3.orig/arch/ia64/mm/fault.c 2006-10-07 16:09:19.437892246 -0700 +++ linux-2.6.18-mm3/arch/ia64/mm/fault.c 2006-10-07 17:25:29.596526785 -0700 @@ -53,6 +53,11 @@ static inline int notify_page_fault(enum } #endif +static int is_virtual_mem_map(unsigned long address) +{ + return REGION_NUMBER(address) == RGN_GATE && + address >= VIRTUAL_MEM_MAP; +} /* * Return TRUE if ADDRESS points at a page in the kernel's mapped segment * (inside region 5, on ia64) and that page is present. @@ -65,6 +70,12 @@ mapped_kernel_page_is_present (unsigned pmd_t *pmd; pte_t *ptep, pte; + printk("mapped_kernel_page_is_present(%lx)\n", address); + if (is_virtual_mem_map(address)) { + address = VIRTMM_PAGE_TO_PAGE(address); + printk("fixed up virtual mem map page address=%lx\n",address); + } + pgd = pgd_offset_k(address); if (pgd_none(*pgd) || pgd_bad(*pgd)) return 0; @@ -94,6 +105,9 @@ ia64_do_page_fault (unsigned long addres struct siginfo si; unsigned long mask; + printk("ia64_do_page_fault(%lx, %lx,) atomic=%d mm=%lx user_mode=%d\n", address, isr, + in_atomic(), mm, user_mode(regs)); + /* mmap_sem is performance critical.... */ prefetchw(&mm->mmap_sem); @@ -112,7 +126,7 @@ ia64_do_page_fault (unsigned long addres * code. */ - if ((REGION_NUMBER(address) == VIRTUAL_MEM_MAP_REGION) && !user_mode(regs)) + if (is_virtual_mem_map(address) && !user_mode(regs)) goto bad_area_no_up; #endif @@ -257,7 +271,7 @@ ia64_do_page_fault (unsigned long addres * translation, which fixed the problem. So, we check to see if the translation is * valid, and return if it is. */ - if (REGION_NUMBER(address) == 5 && mapped_kernel_page_is_present(address)) + if (REGION_NUMBER(address) == RGN_GATE && mapped_kernel_page_is_present(address)) return; if (ia64_done_with_exception(regs)) Index: linux-2.6.18-mm3/arch/ia64/mm/init.c =================================================================== --- linux-2.6.18-mm3.orig/arch/ia64/mm/init.c 2006-10-07 16:09:19.436915744 -0700 +++ linux-2.6.18-mm3/arch/ia64/mm/init.c 2006-10-07 17:47:00.939693684 -0700 @@ -463,12 +463,6 @@ retry_pte: return hole_next_pfn - pgdat->node_start_pfn; } -#ifdef CONFIG_VIRTUAL_MEM_MAP_HUGE -#define VMEM_MAP_PAGE_SIZE (1UL << hpage_shift) -#else -#define VMEM_MAP_PAGE_SIZE PAGE_SIZE -#endif - int __init create_mem_map_page_table (u64 start, u64 end, void *arg) { @@ -483,33 +477,35 @@ create_mem_map_page_table (u64 start, u6 map_start = virt_to_page(start); map_end = virt_to_page(end); - start_page = (unsigned long) map_start & ~(VMEM_MAP_PAGE_SIZE - 1); - end_page = ALIGN((unsigned long) map_end, VMEM_MAP_PAGE_SIZE); + start_page = (unsigned long)(map_start) & ~(VIRTUAL_MEM_MAP_PAGE_SIZE - 1); + end_page = ALIGN((unsigned long)(map_end), VIRTUAL_MEM_MAP_PAGE_SIZE); node = paddr_to_nid(__pa(start)); - for (address = start_page; address < end_page; address += VMEM_MAP_PAGE_SIZE) { - pgd = pgd_offset_k(address); + for (address = start_page; address < end_page; address += VIRTUAL_MEM_MAP_PAGE_SIZE) { + unsigned long taddr = VIRTMM_PAGE_TO_PAGE(address); + + pgd = pgd_offset_k(taddr); if (pgd_none(*pgd)) pgd_populate(&init_mm, pgd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)); - pud = pud_offset(pgd, address); + pud = pud_offset(pgd, taddr); if (pud_none(*pud)) pud_populate(&init_mm, pud, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)); - pmd = pmd_offset(pud, address); + pmd = pmd_offset(pud, taddr); if (pmd_none(*pmd)) pmd_populate_kernel(&init_mm, pmd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)); - pte = pte_offset_kernel(pmd, address); + pte = pte_offset_kernel(pmd, taddr); if (pte_none(*pte)) { unsigned long addr; addr = __pa(__alloc_bootmem_node(NODE_DATA(node), - VMEM_MAP_PAGE_SIZE, - VMEM_MAP_PAGE_SIZE, + VIRTUAL_MEM_MAP_PAGE_SIZE, + VIRTUAL_MEM_MAP_PAGE_SIZE, __pa(MAX_DMA_ADDRESS))); set_pte(pte, mk_pte_phys(addr, PAGE_KERNEL)); - printk(KERN_CRIT "Virtual mmap range %lx-%lx page @%lx:%lx pte=%lx size=%lu node=%d\n", start, end, address, addr, pte_val(*pte), VMEM_MAP_PAGE_SIZE, node); + printk(KERN_CRIT "Virtual mmap range %lx-%lx page @%lx:%lx:%lx pte=%lx size=%lu node=%d\n", start, end, address, taddr, addr, pte_val(*pte), VIRTUAL_MEM_MAP_PAGE_SIZE, node); } else printk(KERN_CRIT "Virtual mmap %lx-%lx @%lx node %d already present.\n", Index: linux-2.6.18-mm3/arch/ia64/Kconfig =================================================================== --- linux-2.6.18-mm3.orig/arch/ia64/Kconfig 2006-10-07 16:09:19.438868748 -0700 +++ linux-2.6.18-mm3/arch/ia64/Kconfig 2006-10-07 17:36:00.898390033 -0700 @@ -335,7 +335,7 @@ config ARCH_FLATMEM_ENABLE config ARCH_SPARSEMEM_ENABLE def_bool y - depends on ARCH_DISCONTIGMEM_ENABLE + depends on ARCH_DISCONTIGMEM_ENABLE && !VIRTUAL_MEM_MAP config ARCH_DISCONTIGMEM_DEFAULT def_bool y if (IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB) @@ -366,36 +366,19 @@ config ARCH_POPULATES_NODE_MAP # VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP may be functionally equivalent but # the overhead of FLAT_NODE_MEM_MAP is much higher. Its even worse for # a SPARSEMEM configuration that needs indirections through multiple tables -# for elementary VM operations. +# for elementary VM operations and still has to take the penalties of +# TLBs for kernel space on IA64. # # VIRTUAL_MEM_MAP is the best choice for handling large sparse memory maps. # config VIRTUAL_MEM_MAP bool "Virtual mem map" - depends on !SPARSEMEM - default y if !IA64_HP_SIM + default y help - Say Y to compile the kernel with support for a virtual mem map. - This code also only takes effect if a memory hole of greater than - 1 Gb is found during boot. You must turn this option on if you - require the DISCONTIGMEM option for your machine. If you are - unsure, say Y. - -config VIRTUAL_MEM_MAP_HUGE - depends on VIRTUAL_MEM_MAP - help - By default we map the virtual memory map using the default page - size and take a part of VMALLOC space for the map. This option - makes the virtual memory map use huge pages as a base and moves - the virtual memory map out of the VMALLOC space. This has the - effect of decreasing TLB use necessary to access the virtual - memory map. - The default huge page size is decreased from 256M to 16M in order - in order to reduce overhead. A 4M huge page can map ~4GB memory. - A 16k page can map ~4 Megabytes of memory. - Note that changes of the huge page size via a boot option will - then also change the base page size for the virtual memory map. - Too high huge page sizes may lead to memory being wasted. + Virtual memory map support is the recommended default. It is + using the MMU to realize a sparse memory map. The default + page size for the virtual memory map is 1 Megabyte which makes + it require less TLBs than regular kernel memory. config HOLES_IN_ZONE bool Index: linux-2.6.18-mm3/arch/ia64/kernel/ivt.S =================================================================== --- linux-2.6.18-mm3.orig/arch/ia64/kernel/ivt.S 2006-10-07 11:36:55.058719426 -0700 +++ linux-2.6.18-mm3/arch/ia64/kernel/ivt.S 2006-10-07 18:04:52.942885975 -0700 @@ -103,10 +103,9 @@ ENTRY(vhpt_miss) * - the faulting virtual address has no valid page table mapping */ mov r16=cr.ifa // get address that caused the TLB miss -#ifdef CONFIG_HUGETLB_PAGE - movl r18=PAGE_SHIFT + movl r28=IA64_VIRTUAL_MEM_MAP mov r25=cr.itir -#endif + movl r18=PAGE_SHIFT ;; rsm psr.dt // use physical addressing for data mov r31=pr // save the predicate registers @@ -114,22 +113,26 @@ ENTRY(vhpt_miss) shl r21=r16,3 // shift bit 60 into sign bit shr.u r17=r16,61 // get the region number into r17 ;; + cmp.eq p6,p7=RGN_GATE,r17 // is IFA pointing into to region 5? + ;; +(p6) cmp.lt.unc p9,p0=r28,r17 // Check if this is the virtual memmap + ;; shr.u r22=r21,3 -#ifdef CONFIG_HUGETLB_PAGE - extr.u r26=r25,2,6 +(p7) extr.u r26=r25,2,6 // r26 is the intended page size +(p9) mov r26=IA64_VIRTUAL_MEM_MAP_PAGE_SHIFT ;; cmp.ne p8,p0=r18,r26 sub r27=r26,r18 +(p9) sub r22=r22, r28 // Offset is aginst VIRTUAL_MEM_MAP + ;; +(p9) add r22=r22, VIRTUAL_MEM_MAP >> PAGE_SHIFT ;; (p8) dep r25=r18,r25,2,6 (p8) shr r22=r22,r27 -#endif ;; - cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? shr.u r18=r22,PGDIR_SHIFT // get bottom portion of pgd index bit ;; (p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place - srlz.d LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir @@ -181,9 +184,7 @@ ENTRY(vhpt_miss) (p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault) mov cr.ifa=r22 -#ifdef CONFIG_HUGETLB_PAGE (p8) mov cr.itir=r25 // change to default page-size for VHPT -#endif /* * Now compute and insert the TLB entry for the virtual page table. We never Index: linux-2.6.18-mm3/arch/ia64/kernel/asm-offsets.c =================================================================== --- linux-2.6.18-mm3.orig/arch/ia64/kernel/asm-offsets.c 2006-10-06 18:11:41.094742499 -0700 +++ linux-2.6.18-mm3/arch/ia64/kernel/asm-offsets.c 2006-10-07 16:35:58.317807206 -0700 @@ -13,6 +13,7 @@ #include #include #include +#include #include "../kernel/sigframe.h" @@ -268,4 +269,7 @@ void foo(void) DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64); DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32); DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec)); + BLANK(); + DEFINE(IA64_VIRTUAL_MEM_MAP, VIRTUAL_MEM_MAP); + DEFINE(IA64_VIRTUAL_MEM_MAP_PAGE_SHIFT, VIRTUAL_MEM_MAP_PAGE_SHIFT); }