From: Hugh Dickins Divide remove_vm_struct into two parts: first anon_vma_unlink plus unlink_file_vma, to unlink the vma from the list and tree by which rmap or vmtruncate might find it; then remove_vma to close, fput and free. The intention here is to do the anon_vma_unlink and unlink_file_vma earlier, in free_pgtables before freeing any page tables: so we can be sure that any page tables traversed by rmap and vmtruncate are stable (and other, ordinary cases are stabilized by holding mmap_sem). This will be crucial to traversing pgd,pud,pmd without page_table_lock. But testing the split-out patch showed that lifting the page_table_lock is symbiotically necessary to make this change - the lock ordering is wrong to move those unlinks into free_pgtables while it's under ptlock. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton --- include/linux/mm.h | 1 + mm/mmap.c | 41 +++++++++++++++++++++++++++-------------- 2 files changed, 28 insertions(+), 14 deletions(-) diff -puN include/linux/mm.h~mm-unlink_file_vma-remove_vma include/linux/mm.h --- devel/include/linux/mm.h~mm-unlink_file_vma-remove_vma 2005-10-11 00:34:33.000000000 -0700 +++ devel-akpm/include/linux/mm.h 2005-10-11 00:34:33.000000000 -0700 @@ -828,6 +828,7 @@ extern int split_vma(struct mm_struct *, extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, struct rb_node **, struct rb_node *); +extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, pgoff_t pgoff); extern void exit_mmap(struct mm_struct *); diff -puN mm/mmap.c~mm-unlink_file_vma-remove_vma mm/mmap.c --- devel/mm/mmap.c~mm-unlink_file_vma-remove_vma 2005-10-11 00:34:33.000000000 -0700 +++ devel-akpm/mm/mmap.c 2005-10-11 00:34:33.000000000 -0700 @@ -181,26 +181,44 @@ static void __remove_shared_vm_struct(st } /* - * Remove one vm structure and free it. + * Unlink a file-based vm structure from its prio_tree, to hide + * vma from rmap and vmtruncate before freeing its page tables. */ -static void remove_vm_struct(struct vm_area_struct *vma) +void unlink_file_vma(struct vm_area_struct *vma) { struct file *file = vma->vm_file; - might_sleep(); if (file) { struct address_space *mapping = file->f_mapping; spin_lock(&mapping->i_mmap_lock); __remove_shared_vm_struct(vma, file, mapping); spin_unlock(&mapping->i_mmap_lock); } +} + +/* + * Close a vm structure and free it, returning the next. + */ +static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) +{ + struct vm_area_struct *next = vma->vm_next; + + /* + * Hide vma from rmap and vmtruncate before freeing page tables: + * to be moved into free_pgtables once page_table_lock is lifted + * from it, but until then lock ordering forbids that move. + */ + anon_vma_unlink(vma); + unlink_file_vma(vma); + + might_sleep(); if (vma->vm_ops && vma->vm_ops->close) vma->vm_ops->close(vma); - if (file) - fput(file); - anon_vma_unlink(vma); + if (vma->vm_file) + fput(vma->vm_file); mpol_free(vma_policy(vma)); kmem_cache_free(vm_area_cachep, vma); + return next; } asmlinkage unsigned long sys_brk(unsigned long brk) @@ -1612,15 +1630,13 @@ find_extend_vma(struct mm_struct * mm, u static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma) { do { - struct vm_area_struct *next = vma->vm_next; long nrpages = vma_pages(vma); mm->total_vm -= nrpages; if (vma->vm_flags & VM_LOCKED) mm->locked_vm -= nrpages; vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); - remove_vm_struct(vma); - vma = next; + vma = remove_vma(vma); } while (vma); validate_mm(mm); } @@ -1944,11 +1960,8 @@ void exit_mmap(struct mm_struct *mm) * Walk the list again, actually closing and freeing it * without holding any MM locks. */ - while (vma) { - struct vm_area_struct *next = vma->vm_next; - remove_vm_struct(vma); - vma = next; - } + while (vma) + vma = remove_vma(vma); BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); } _