--- include/linux/mmu_notifier.h | 6 +--- mm/Kconfig | 2 - mm/memory.c | 36 ++++++++++++++++------------- mm/mmap.c | 52 ++++++++++++++++++++++++++----------------- 4 files changed, 55 insertions(+), 41 deletions(-) Index: linux-2.6/mm/memory.c =================================================================== --- linux-2.6.orig/mm/memory.c 2008-04-18 14:10:18.000000000 -0700 +++ linux-2.6/mm/memory.c 2008-04-18 14:10:21.000000000 -0700 @@ -602,6 +602,7 @@ int copy_page_range(struct mm_struct *ds unsigned long next; unsigned long addr = vma->vm_start; unsigned long end = vma->vm_end; + int ret = 0; /* * Don't copy ptes where a page fault will fill them correctly. @@ -611,7 +612,7 @@ int copy_page_range(struct mm_struct *ds */ if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP|VM_INSERTPAGE))) { if (!vma->anon_vma) - return 0; + goto out; } if (is_vm_hugetlb_page(vma)) @@ -627,14 +628,17 @@ int copy_page_range(struct mm_struct *ds if (pgd_none_or_clear_bad(src_pgd)) continue; if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, - vma, addr, next)) - return -ENOMEM; + vma, addr, next)) { + ret = -ENOMEM; + break; + } } while (dst_pgd++, src_pgd++, addr = next, addr != end); if (is_cow_mapping(vma->vm_flags)) mmu_notifier_end(src_mm, vma->vm_start, end); - return 0; +out: + return ret; } static unsigned long zap_pte_range(struct mmu_gather *tlb, @@ -1380,6 +1384,7 @@ int remap_pfn_range(struct vm_area_struc BUG_ON(addr >= end); pfn -= addr >> PAGE_SHIFT; pgd = pgd_offset(mm, addr); + mmu_notifier_start(mm, addr, end); flush_cache_range(vma, addr, end); do { next = pgd_addr_end(addr, end); @@ -1388,6 +1393,7 @@ int remap_pfn_range(struct vm_area_struc if (err) break; } while (pgd++, addr = next, addr != end); + mmu_notifier_end(mm, addr, end); return err; } EXPORT_SYMBOL(remap_pfn_range); @@ -1624,8 +1630,10 @@ static int do_wp_page(struct mm_struct * page_table = pte_offset_map_lock(mm, pmd, address, &ptl); new_page = NULL; - if (!pte_same(*page_table, orig_pte)) - goto unlock; + if (!pte_same(*page_table, orig_pte)) { + pte_unmap_unlock(page_table, ptl); + goto check_dirty; + } page_cache_release(old_page); page_mkwrite = 1; @@ -1643,7 +1651,8 @@ static int do_wp_page(struct mm_struct * update_mmu_cache(vma, address, entry); ret |= VM_FAULT_WRITE; old_page = new_page = NULL; - goto unlock; + pte_unmap_unlock(page_table, ptl); + goto check_dirty; } /* @@ -1668,6 +1677,7 @@ gotten: /* * Re-check the pte - we dropped the lock */ + mmu_notifier_start(mm, address, address + PAGE_SIZE); page_table = pte_offset_map_lock(mm, pmd, address, &ptl); if (likely(pte_same(*page_table, orig_pte))) { if (old_page) { @@ -1699,17 +1709,11 @@ gotten: } else mem_cgroup_uncharge_page(new_page); -unlock: pte_unmap_unlock(page_table, ptl); - - if (new_page) { - if (new_page == old_page) { - /* cow happened, notify before releasing old_page */ - mmu_notifier_start(mm, address, address + PAGE_SIZE); - mmu_notifier_end(mm, address, address + PAGE_SIZE); - } + mmu_notifier_end(mm, address, address + PAGE_SIZE); +check_dirty: + if (new_page) page_cache_release(new_page); - } if (old_page) page_cache_release(old_page); Index: linux-2.6/include/linux/mmu_notifier.h =================================================================== --- linux-2.6.orig/include/linux/mmu_notifier.h 2008-04-18 14:09:26.000000000 -0700 +++ linux-2.6/include/linux/mmu_notifier.h 2008-04-18 14:10:21.000000000 -0700 @@ -15,8 +15,7 @@ struct mmu_notifier_ops { * Called when nobody can register any more notifier in the mm * and after the "mn" notifier has been disarmed already. */ - void (*release)(struct mmu_notifier *mn, - struct mm_struct *mm); + void (*release)(struct mmu_notifier *mn, struct mm_struct *mm); /* * clear_flush_young is called after the VM is @@ -25,8 +24,7 @@ struct mmu_notifier_ops { * accesses to the page through the secondary MMUs and not * only to the ones through the Linux pte. */ - int (*clear_flush_young)(struct mmu_notifier *mn, - struct mm_struct *mm, + int (*clear_flush_young)(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long address); /* Index: linux-2.6/mm/Kconfig =================================================================== --- linux-2.6.orig/mm/Kconfig 2008-04-18 14:09:26.000000000 -0700 +++ linux-2.6/mm/Kconfig 2008-04-18 14:10:21.000000000 -0700 @@ -196,4 +196,4 @@ config VIRT_TO_BUS config MMU_NOTIFIER def_bool y - bool "MMU notifier, for paging KVM/RDMA" + bool "MMU notifier for devices/subsystems mapping memory" Index: linux-2.6/mm/mmap.c =================================================================== --- linux-2.6.orig/mm/mmap.c 2008-04-18 14:09:26.000000000 -0700 +++ linux-2.6/mm/mmap.c 2008-04-18 14:10:21.000000000 -0700 @@ -2034,8 +2034,8 @@ void exit_mmap(struct mm_struct *mm) unsigned long end; /* mm's last user has gone, and its about to be pulled down */ - mmu_notifier_release(mm); arch_exit_mmap(mm); + mmu_notifier_release(mm); lru_add_drain(); flush_cache_mm(mm); @@ -2250,6 +2250,35 @@ static int cmp_rwsem(const void *a, cons return -1; } + +static void acquire_locks(struct mm_struct *mm, + struct rw_semaphore **locks, int anon) +{ + struct vm_area_struct *vma; + int i = 0; + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + struct address_space *mapping = vma->vm_file->f_mapping; + + if (mapping) { + if (!anon) + locks[i++] = &mapping->i_mmap_sem; + } else { + if (anon) + locks[i++] = &vma->anon_vma->sem; + } + } + + if (!i) + return; + + sort(locks, i, sizeof(struct rw_semaphore *), cmp_rwsem, NULL); + + while (i > 0) { + i--; + down_read(locks[i]); + } +} /* * This operation locks against the VM for all pte/vma/mm related * operations that could ever happen on a certain mm. This includes @@ -2260,8 +2289,6 @@ static int cmp_rwsem(const void *a, cons int mm_lock(struct mm_struct * mm) { struct rw_semaphore **locks; - struct vm_area_struct *vma; - int i; down_write(&mm->mmap_sem); locks = vmalloc(sizeof(struct rw_semaphore *) * mm->map_count); @@ -2270,23 +2297,8 @@ int mm_lock(struct mm_struct * mm) return -ENOMEM; } - i = 0; - for (vma = mm->mmap; vma; vma = vma->vm_next) { - struct address_space *mapping = vma->vm_file->f_mapping; - struct rw_semaphore *sem; - - if (mapping) - sem = &mapping->i_mmap_sem; - else - sem = &vma->anon_vma->sem; - - locks[i++] = sem; - } - - sort(locks, mm->map_count, sizeof(struct rw_semaphore *), cmp_rwsem, NULL); - - for (i = 0; i < mm->map_count; i++) - down_read(locks[i]); + acquire_locks(mm, locks, 0); + acquire_locks(mm, locks, 1); vfree(locks); return 0;