Prefetch ptes a line ahead. Worth 25% on ia64 when doing big forks. Index: linux-2.6/include/asm-generic/pgtable.h =================================================================== --- linux-2.6.orig/include/asm-generic/pgtable.h +++ linux-2.6/include/asm-generic/pgtable.h @@ -196,6 +196,33 @@ static inline void ptep_set_wrprotect(st }) #endif +#ifndef __HAVE_ARCH_PTE_PREFETCH +#define PTES_PER_LINE (L1_CACHE_BYTES / sizeof(pte_t)) +#define PTE_LINE_MASK (~(PTES_PER_LINE - 1)) +#define ADDR_PER_LINE (PTES_PER_LINE << PAGE_SHIFT) +#define ADDR_LINE_MASK (~(ADDR_PER_LINE - 1)) + +#define pte_prefetch(pte, addr, end) \ +({ \ + unsigned long __nextline = ((addr) + ADDR_PER_LINE) & ADDR_LINE_MASK; \ + if (__nextline < (end)) \ + prefetch(pte + PTES_PER_LINE); \ +}) + +#define pte_prefetch_start(pte, addr, end) \ +({ \ + prefetch(pte); \ + pte_prefetch(pte, addr, end); \ +}) + +#define pte_prefetch_next(pte, addr, end) \ +({ \ + unsigned long __addr = (addr); \ + if (!(__addr & ~ADDR_LINE_MASK)) /* We hit a new cacheline */ \ + pte_prefetch(pte, __addr, end); \ +}) +#endif + #ifndef __ASSEMBLY__ /* * When walking page tables, we usually want to skip any p?d_none entries; Index: linux-2.6/mm/memory.c =================================================================== --- linux-2.6.orig/mm/memory.c +++ linux-2.6/mm/memory.c @@ -437,6 +437,8 @@ again: if (!dst_pte) return -ENOMEM; src_pte = pte_offset_map_nested(src_pmd, addr); + pte_prefetch_start(src_pte, addr, end); + src_ptl = pte_lockptr(src_mm, src_pmd); spin_lock(src_ptl); @@ -458,7 +460,8 @@ again: } copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss); progress += 8; - } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); + } while (dst_pte++, src_pte++, addr += PAGE_SIZE, + pte_prefetch_next(src_pte, addr, end), addr != end); spin_unlock(src_ptl); pte_unmap_nested(src_pte - 1); @@ -561,6 +564,7 @@ static void zap_pte_range(struct mmu_gat int anon_rss = 0; pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + pte_prefetch_start(pte, addr, end); do { pte_t ptent = *pte; if (pte_none(ptent)) @@ -624,7 +628,8 @@ static void zap_pte_range(struct mmu_gat if (!pte_file(ptent)) free_swap_and_cache(pte_to_swp_entry(ptent)); pte_clear_full(mm, addr, pte, tlb->fullmm); - } while (pte++, addr += PAGE_SIZE, addr != end); + } while (pte++, addr += PAGE_SIZE, + pte_prefetch_next(pte, addr, end), addr != end); add_mm_rss(mm, file_rss, anon_rss); pte_unmap_unlock(pte - 1, ptl);