mmu_notifier: Callbacks to invalidate address ranges The invalidation of address ranges in a mm_struct needs to be performed when pages are removed or permissions etc change. Most of the VM address space changes can use the range invalidate callback. invalidate_range() is generally called with mmap_sem held but no spinlocks are active. If invalidate_range() is called with locks held then we pass a flag into invalidate_range() Comments state that mmap_sem must be held for remap_pfn_range() but various drivers do not seem to do this. Signed-off-by: Andrea Arcangeli Signed-off-by: Robin Holt Signed-off-by: Christoph Lameter --- mm/fremap.c | 2 ++ mm/hugetlb.c | 2 ++ mm/memory.c | 11 +++++++++-- mm/mmap.c | 1 + 4 files changed, 14 insertions(+), 2 deletions(-) Index: linux-2.6/mm/fremap.c =================================================================== --- linux-2.6.orig/mm/fremap.c 2008-01-25 19:31:05.000000000 -0800 +++ linux-2.6/mm/fremap.c 2008-01-25 19:32:49.000000000 -0800 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -211,6 +212,7 @@ asmlinkage long sys_remap_file_pages(uns spin_unlock(&mapping->i_mmap_lock); } + mmu_notifier(invalidate_range, mm, start, start + size, 0); err = populate_range(mm, vma, start, size, pgoff); if (!err && !(flags & MAP_NONBLOCK)) { if (unlikely(has_write_lock)) { Index: linux-2.6/mm/memory.c =================================================================== --- linux-2.6.orig/mm/memory.c 2008-01-25 19:31:05.000000000 -0800 +++ linux-2.6/mm/memory.c 2008-01-25 19:32:49.000000000 -0800 @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -891,6 +892,8 @@ unsigned long zap_page_range(struct vm_a end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details); if (tlb) tlb_finish_mmu(tlb, address, end); + mmu_notifier(invalidate_range, mm, address, end, + (details ? (details->i_mmap_lock != NULL) : 0)); return end; } @@ -1319,7 +1322,7 @@ int remap_pfn_range(struct vm_area_struc { pgd_t *pgd; unsigned long next; - unsigned long end = addr + PAGE_ALIGN(size); + unsigned long start = addr, end = addr + PAGE_ALIGN(size); struct mm_struct *mm = vma->vm_mm; int err; @@ -1360,6 +1363,7 @@ int remap_pfn_range(struct vm_area_struc if (err) break; } while (pgd++, addr = next, addr != end); + mmu_notifier(invalidate_range, mm, start, end, 0); return err; } EXPORT_SYMBOL(remap_pfn_range); @@ -1443,7 +1447,7 @@ int apply_to_page_range(struct mm_struct { pgd_t *pgd; unsigned long next; - unsigned long end = addr + size; + unsigned long start = addr, end = addr + size; int err; BUG_ON(addr >= end); @@ -1454,6 +1458,7 @@ int apply_to_page_range(struct mm_struct if (err) break; } while (pgd++, addr = next, addr != end); + mmu_notifier(invalidate_range, mm, start, end, 0); return err; } EXPORT_SYMBOL_GPL(apply_to_page_range); @@ -1634,6 +1639,8 @@ gotten: /* * Re-check the pte - we dropped the lock */ + mmu_notifier(invalidate_range, mm, address, + address + PAGE_SIZE - 1, 0); page_table = pte_offset_map_lock(mm, pmd, address, &ptl); if (likely(pte_same(*page_table, orig_pte))) { if (old_page) { Index: linux-2.6/mm/mmap.c =================================================================== --- linux-2.6.orig/mm/mmap.c 2008-01-25 19:31:05.000000000 -0800 +++ linux-2.6/mm/mmap.c 2008-01-25 19:32:49.000000000 -0800 @@ -1748,6 +1748,7 @@ static void unmap_region(struct mm_struc free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, next? next->vm_start: 0); tlb_finish_mmu(tlb, start, end); + mmu_notifier(invalidate_range, mm, start, end, 0); } /* Index: linux-2.6/mm/hugetlb.c =================================================================== --- linux-2.6.orig/mm/hugetlb.c 2008-01-25 19:33:58.000000000 -0800 +++ linux-2.6/mm/hugetlb.c 2008-01-25 19:34:13.000000000 -0800 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -763,6 +764,7 @@ void __unmap_hugepage_range(struct vm_ar } spin_unlock(&mm->page_table_lock); flush_tlb_range(vma, start, end); + mmu_notifier(invalidate_range, mm, start, end, 1); list_for_each_entry_safe(page, tmp, &page_list, lru) { list_del(&page->lru); put_page(page);