From: Peter Zijlstra mprotect() resets the page protections, which could result in extra write faults for those pages whose dirty state we track using write faults and are dirty already. Signed-off-by: Peter Zijlstra Cc: Hugh Dickins Signed-off-by: Andrew Morton --- mm/mprotect.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff -puN mm/mprotect.c~mm-optimize-the-new-mprotect-code-a-bit mm/mprotect.c --- a/mm/mprotect.c~mm-optimize-the-new-mprotect-code-a-bit +++ a/mm/mprotect.c @@ -27,7 +27,8 @@ #include static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr, unsigned long end, pgprot_t newprot) + unsigned long addr, unsigned long end, pgprot_t newprot, + int dirty_accountable) { pte_t *pte, oldpte; spinlock_t *ptl; @@ -42,7 +43,14 @@ static void change_pte_range(struct mm_s * bits by wiping the pte and then setting the new pte * into place. */ - ptent = pte_modify(ptep_get_and_clear(mm, addr, pte), newprot); + ptent = ptep_get_and_clear(mm, addr, pte); + ptent = pte_modify(ptent, newprot); + /* + * Avoid taking write faults for pages we know to be + * dirty. + */ + if (dirty_accountable && pte_dirty(ptent)) + ptent = pte_mkwrite(ptent); set_pte_at(mm, addr, pte, ptent); lazy_mmu_prot_update(ptent); #ifdef CONFIG_MIGRATION @@ -66,7 +74,8 @@ static void change_pte_range(struct mm_s } static inline void change_pmd_range(struct mm_struct *mm, pud_t *pud, - unsigned long addr, unsigned long end, pgprot_t newprot) + unsigned long addr, unsigned long end, pgprot_t newprot, + int dirty_accountable) { pmd_t *pmd; unsigned long next; @@ -76,12 +85,13 @@ static inline void change_pmd_range(stru next = pmd_addr_end(addr, end); if (pmd_none_or_clear_bad(pmd)) continue; - change_pte_range(mm, pmd, addr, next, newprot); + change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable); } while (pmd++, addr = next, addr != end); } static inline void change_pud_range(struct mm_struct *mm, pgd_t *pgd, - unsigned long addr, unsigned long end, pgprot_t newprot) + unsigned long addr, unsigned long end, pgprot_t newprot, + int dirty_accountable) { pud_t *pud; unsigned long next; @@ -91,12 +101,13 @@ static inline void change_pud_range(stru next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; - change_pmd_range(mm, pud, addr, next, newprot); + change_pmd_range(mm, pud, addr, next, newprot, dirty_accountable); } while (pud++, addr = next, addr != end); } static void change_protection(struct vm_area_struct *vma, - unsigned long addr, unsigned long end, pgprot_t newprot) + unsigned long addr, unsigned long end, pgprot_t newprot, + int dirty_accountable) { struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; @@ -110,7 +121,7 @@ static void change_protection(struct vm_ next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - change_pud_range(mm, pgd, addr, next, newprot); + change_pud_range(mm, pgd, addr, next, newprot, dirty_accountable); } while (pgd++, addr = next, addr != end); flush_tlb_range(vma, start, end); } @@ -125,6 +136,7 @@ mprotect_fixup(struct vm_area_struct *vm unsigned long charged = 0; pgoff_t pgoff; int error; + int dirty_accountable = 0; if (newflags == oldflags) { *pprev = vma; @@ -181,14 +193,16 @@ success: vma->vm_flags = newflags; vma->vm_page_prot = protection_map[newflags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; - if (vma_wants_writenotify(vma)) + if (vma_wants_writenotify(vma)) { vma->vm_page_prot = protection_map[newflags & (VM_READ|VM_WRITE|VM_EXEC)]; + dirty_accountable = 1; + } if (is_vm_hugetlb_page(vma)) hugetlb_change_protection(vma, start, end, vma->vm_page_prot); else - change_protection(vma, start, end, vma->vm_page_prot); + change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable); vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); vm_stat_account(mm, newflags, vma->vm_file, nrpages); return 0; _