Index: linux-2.6/mm/rmap.c =================================================================== --- linux-2.6.orig/mm/rmap.c 2008-01-16 13:40:40.000000000 -0800 +++ linux-2.6/mm/rmap.c 2008-01-16 14:53:49.000000000 -0800 @@ -413,7 +413,8 @@ int page_referenced(struct page *page, i return referenced; } -static int page_mkclean_one(struct page *page, struct vm_area_struct *vma) +static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, + atomic_t *poutstanding) { struct mm_struct *mm = vma->vm_mm; unsigned long address; @@ -437,6 +438,7 @@ static int page_mkclean_one(struct page entry = pte_wrprotect(entry); entry = pte_mkclean(entry); set_pte_at(mm, address, pte, entry); + mmu_notifier(invalidate_page, mm, address, poutstanding); ret = 1; } @@ -445,7 +447,8 @@ out: return ret; } -static int page_mkclean_file(struct address_space *mapping, struct page *page) +static int page_mkclean_file(struct address_space *mapping, struct page *page, + atomic_t *poutstanding) { pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); struct vm_area_struct *vma; @@ -457,7 +460,7 @@ static int page_mkclean_file(struct addr spin_lock(&mapping->i_mmap_lock); vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { if (vma->vm_flags & VM_SHARED) - ret += page_mkclean_one(page, vma); + ret += page_mkclean_one(page, vma, poutstanding); } spin_unlock(&mapping->i_mmap_lock); return ret; @@ -466,13 +469,19 @@ static int page_mkclean_file(struct addr int page_mkclean(struct page *page) { int ret = 0; + atomic_t outstanding = ATOMIC_INIT(0); BUG_ON(!PageLocked(page)); if (page_mapped(page)) { struct address_space *mapping = page_mapping(page); if (mapping) { - ret = page_mkclean_file(mapping, page); + ret = page_mkclean_file(mapping, page, &outstanding); + if (unlikely(atomic_read(&outstanding))) { + do { + schedule_timeout(1); + } while (atomic_read(&outstanding)); + } if (page_test_dirty(page)) { page_clear_dirty(page); ret = 1; @@ -654,7 +663,7 @@ void page_remove_rmap(struct page *page, * repeatedly from either try_to_unmap_anon or try_to_unmap_file. */ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, - int migration) + int migration, atomic_t *poutstanding) { struct mm_struct *mm = vma->vm_mm; unsigned long address; @@ -685,6 +694,7 @@ static int try_to_unmap_one(struct page /* Nuke the page table entry. */ flush_cache_page(vma, address, page_to_pfn(page)); pteval = ptep_clear_flush(vma, address, pte); + mmu_notifier(invalidate_page, mm, address, poutstanding); /* Move the dirty bit to the physical page now the pte is gone. */ if (pte_dirty(pteval)) @@ -766,7 +776,8 @@ out: #define CLUSTER_MASK (~(CLUSTER_SIZE - 1)) static void try_to_unmap_cluster(unsigned long cursor, - unsigned int *mapcount, struct vm_area_struct *vma) + unsigned int *mapcount, struct vm_area_struct *vma, + atomic_t *poutstanding) { struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; @@ -815,6 +826,7 @@ static void try_to_unmap_cluster(unsigne /* Nuke the page table entry. */ flush_cache_page(vma, address, pte_pfn(*pte)); pteval = ptep_clear_flush(vma, address, pte); + mmu_notifier(invalidate_page, mm, address, poutstanding); /* If nonlinear, store the file page offset in the pte. */ if (page->index != linear_page_index(vma, address)) @@ -832,7 +844,8 @@ static void try_to_unmap_cluster(unsigne pte_unmap_unlock(pte - 1, ptl); } -static int try_to_unmap_anon(struct page *page, int migration) +static int try_to_unmap_anon(struct page *page, int migration, + atomic_t *poutstanding) { struct anon_vma *anon_vma; struct vm_area_struct *vma; @@ -843,7 +856,7 @@ static int try_to_unmap_anon(struct page return ret; list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { - ret = try_to_unmap_one(page, vma, migration); + ret = try_to_unmap_one(page, vma, migration, poutstanding); if (ret == SWAP_FAIL || !page_mapped(page)) break; } @@ -861,7 +874,8 @@ static int try_to_unmap_anon(struct page * * This function is only called from try_to_unmap for object-based pages. */ -static int try_to_unmap_file(struct page *page, int migration) +static int try_to_unmap_file(struct page *page, int migration, + atomic_t *poutstanding) { struct address_space *mapping = page->mapping; pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); @@ -875,7 +889,7 @@ static int try_to_unmap_file(struct page spin_lock(&mapping->i_mmap_lock); vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { - ret = try_to_unmap_one(page, vma, migration); + ret = try_to_unmap_one(page, vma, migration, poutstanding); if (ret == SWAP_FAIL || !page_mapped(page)) goto out; } @@ -924,7 +938,8 @@ static int try_to_unmap_file(struct page cursor = (unsigned long) vma->vm_private_data; while ( cursor < max_nl_cursor && cursor < vma->vm_end - vma->vm_start) { - try_to_unmap_cluster(cursor, &mapcount, vma); + try_to_unmap_cluster(cursor, &mapcount, vma, + poutstanding); cursor += CLUSTER_SIZE; vma->vm_private_data = (void *) cursor; if ((int)mapcount <= 0) @@ -963,14 +978,20 @@ out: int try_to_unmap(struct page *page, int migration) { int ret; + atomic_t outstanding = ATOMIC_INIT(0); BUG_ON(!PageLocked(page)); if (PageAnon(page)) - ret = try_to_unmap_anon(page, migration); + ret = try_to_unmap_anon(page, migration, &outstanding); else - ret = try_to_unmap_file(page, migration); + ret = try_to_unmap_file(page, migration, &outstanding); + if (unlikely(atomic_read(&outstanding))) { + do { + schedule_timeout(1); + } while (atomic_read(&outstanding)); + } if (!page_mapped(page)) ret = SWAP_SUCCESS; return ret; Index: linux-2.6/include/asm-generic/pgtable.h =================================================================== --- linux-2.6.orig/include/asm-generic/pgtable.h 2008-01-16 14:29:20.000000000 -0800 +++ linux-2.6/include/asm-generic/pgtable.h 2008-01-16 14:29:29.000000000 -0800 @@ -86,7 +86,6 @@ do { \ pte_t __pte; \ __pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep); \ flush_tlb_page(__vma, __address); \ - mmu_notifier(invalidate_page, (__vma)->vm_mm, __address); \ __pte; \ }) #endif Index: linux-2.6/include/linux/mmu_notifier.h =================================================================== --- linux-2.6.orig/include/linux/mmu_notifier.h 2008-01-16 14:39:31.000000000 -0800 +++ linux-2.6/include/linux/mmu_notifier.h 2008-01-16 14:40:43.000000000 -0800 @@ -13,7 +13,7 @@ struct mmu_notifier_ops { struct mm_struct *mm); void (*invalidate_page)(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address); + unsigned long address, atomic_t *poutstanding); void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end); Index: linux-2.6/mm/memory.c =================================================================== --- linux-2.6.orig/mm/memory.c 2008-01-16 14:41:04.000000000 -0800 +++ linux-2.6/mm/memory.c 2008-01-16 14:56:59.000000000 -0800 @@ -883,13 +883,18 @@ unsigned long zap_page_range(struct vm_a unsigned long end = address + size; unsigned long nr_accounted = 0; + /* + * We are either holding a writelock on mmap sem that prevents creation + * of additional mappings or we are unmapping an inode mapping range + * where we can restart if pages are left. + */ + mmu_notifier(invalidate_range, mm, address, end); lru_add_drain(); tlb = tlb_gather_mmu(mm, 0); update_hiwater_rss(mm); end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details); if (tlb) tlb_finish_mmu(tlb, address, end); - mmu_notifier(invalidate_range, mm, address, end); return end; } Index: linux-2.6/mm/mprotect.c =================================================================== --- linux-2.6.orig/mm/mprotect.c 2008-01-16 14:42:15.000000000 -0800 +++ linux-2.6/mm/mprotect.c 2008-01-16 15:39:35.000000000 -0800 @@ -248,6 +248,7 @@ sys_mprotect(unsigned long start, size_t error = -ENOMEM; if (!vma) goto out; + mmu_notifier(invalidate_range, mm, start, end); if (unlikely(grows & PROT_GROWSDOWN)) { if (vma->vm_start >= end) goto out;