Subject: backout anon-vma-chain, can't debug two things at the same time --- diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2316,7 +2316,6 @@ pfm_smpl_buffer_alloc(struct task_struct DPRINT(("Cannot allocate vma\n")); goto error_kmem; } - INIT_LIST_HEAD(&vma->anon_vma_chain); /* * partially initialize the vma for the sampling buffer diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -117,7 +117,6 @@ ia64_init_addr_space (void) */ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (vma) { - INIT_LIST_HEAD(&vma->anon_vma_chain); vma->vm_mm = current->mm; vma->vm_start = current->thread.rbs_bot & PAGE_MASK; vma->vm_end = vma->vm_start + PAGE_SIZE; @@ -136,7 +135,6 @@ ia64_init_addr_space (void) if (!(current->personality & MMAP_PAGE_ZERO)) { vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (vma) { - INIT_LIST_HEAD(&vma->anon_vma_chain); vma->vm_mm = current->mm; vma->vm_end = PAGE_SIZE; vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); diff --git a/fs/exec.c b/fs/exec.c --- a/fs/exec.c +++ b/fs/exec.c @@ -246,7 +246,6 @@ static int __bprm_mm_init(struct linux_b vma->vm_start = vma->vm_end - PAGE_SIZE; vma->vm_flags = VM_STACK_FLAGS; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); - INIT_LIST_HEAD(&vma->anon_vma_chain); err = insert_vm_struct(mm, vma); if (err) goto err; @@ -517,8 +516,7 @@ static int shift_arg_pages(struct vm_are /* * cover the whole range: [new_start, old_end) */ - if (vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL)) - return -ENOMEM; + vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL); /* * move the page tables downwards, on failure we rely on @@ -549,7 +547,7 @@ static int shift_arg_pages(struct vm_are tlb_finish_mmu(tlb, new_end, old_end); /* - * Shrink the vma to just the new range. Always succeeds. + * shrink the vma to just the new range. */ vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL); diff --git a/include/linux/mm.h b/include/linux/mm.h --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1222,7 +1222,7 @@ static inline void vma_nonlinear_insert( /* mmap.c */ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); -extern int vma_adjust(struct vm_area_struct *vma, unsigned long start, +extern void vma_adjust(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); extern struct vm_area_struct *vma_merge(struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -163,8 +163,7 @@ struct vm_area_struct { * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack * or brk vma (with NULL file) can only be in an anon_vma list. */ - struct list_head anon_vma_chain; /* Serialized by mmap_sem & - * page_table_lock */ + struct list_head anon_vma_node; /* Serialized by anon_vma->lock */ struct anon_vma *anon_vma; /* Serialized by page_table_lock */ /* Function pointers to deal with this struct. */ diff --git a/include/linux/rmap.h b/include/linux/rmap.h --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -37,27 +37,7 @@ struct anon_vma { * is serialized by a system wide lock only visible to * mm_take_all_locks() (mm_all_locks_mutex). */ - struct list_head head; /* Chain of private "related" vmas */ -}; - -/* - * The copy-on-write semantics of fork mean that an anon_vma - * can become associated with multiple processes. Furthermore, - * each child process will have its own anon_vma, where new - * pages for that process are instantiated. - * - * This structure allows us to find the anon_vmas associated - * with a VMA, or the VMAs associated with an anon_vma. - * The "same_vma" list contains the anon_vma_chains linking - * all the anon_vmas associated with this VMA. - * The "same_anon_vma" list contains the anon_vma_chains - * which link all the VMAs associated with this anon_vma. - */ -struct anon_vma_chain { - struct vm_area_struct *vma; - struct anon_vma *anon_vma; - struct list_head same_vma; /* locked by mmap_sem & page_table_lock */ - struct list_head same_anon_vma; /* locked by anon_vma->lock */ + struct list_head head; /* List of private "related" vmas */ }; #ifdef CONFIG_MMU @@ -109,23 +89,15 @@ static inline void anon_vma_unlock(struc */ void anon_vma_init(void); /* create anon_vma_cachep */ int anon_vma_prepare(struct vm_area_struct *); -void unlink_anon_vmas(struct vm_area_struct *); -int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *); -int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); +void __anon_vma_merge(struct vm_area_struct *, struct vm_area_struct *); +void anon_vma_unlink(struct vm_area_struct *); +void anon_vma_link(struct vm_area_struct *); void __anon_vma_link(struct vm_area_struct *); void anon_vma_free(struct anon_vma *); -static inline void anon_vma_merge(struct vm_area_struct *vma, - struct vm_area_struct *next) -{ - VM_BUG_ON(vma->anon_vma != next->anon_vma); - unlink_anon_vmas(next); -} - /* * rmap interfaces called when adding or removing pte of page */ -void page_move_anon_rmap(struct page *, struct vm_area_struct *, unsigned long); void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long); void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long); void page_add_file_rmap(struct page *); diff --git a/kernel/fork.c b/kernel/fork.c --- a/kernel/fork.c +++ b/kernel/fork.c @@ -336,17 +336,15 @@ static int dup_mmap(struct mm_struct *mm if (!tmp) goto fail_nomem; *tmp = *mpnt; - INIT_LIST_HEAD(&tmp->anon_vma_chain); pol = mpol_dup(vma_policy(mpnt)); retval = PTR_ERR(pol); if (IS_ERR(pol)) goto fail_nomem_policy; vma_set_policy(tmp, pol); - if (anon_vma_fork(tmp, mpnt)) - goto fail_nomem_anon_vma_fork; tmp->vm_flags &= ~VM_LOCKED; tmp->vm_mm = mm; tmp->vm_next = NULL; + anon_vma_link(tmp); file = tmp->vm_file; if (file) { struct inode *inode = file->f_path.dentry->d_inode; @@ -401,8 +399,6 @@ out: flush_tlb_mm(oldmm); up_write(&oldmm->mmap_sem); return retval; -fail_nomem_anon_vma_fork: - mpol_put(pol); fail_nomem_policy: kmem_cache_free(vm_area_cachep, tmp); fail_nomem: diff --git a/mm/ksm.c b/mm/ksm.c --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1563,12 +1563,10 @@ int page_referenced_ksm(struct page *pag again: hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { struct anon_vma *anon_vma = rmap_item->anon_vma; - struct anon_vma_chain *vmac; struct vm_area_struct *vma; spin_lock(&anon_vma->lock); - list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) { - vma = vmac->vma; + list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { if (rmap_item->address < vma->vm_start || rmap_item->address >= vma->vm_end) continue; @@ -1616,12 +1614,10 @@ int try_to_unmap_ksm(struct page *page, again: hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { struct anon_vma *anon_vma = rmap_item->anon_vma; - struct anon_vma_chain *vmac; struct vm_area_struct *vma; spin_lock(&anon_vma->lock); - list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) { - vma = vmac->vma; + list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { if (rmap_item->address < vma->vm_start || rmap_item->address >= vma->vm_end) continue; @@ -1668,12 +1664,10 @@ int rmap_walk_ksm(struct page *page, int again: hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { struct anon_vma *anon_vma = rmap_item->anon_vma; - struct anon_vma_chain *vmac; struct vm_area_struct *vma; spin_lock(&anon_vma->lock); - list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) { - vma = vmac->vma; + list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { if (rmap_item->address < vma->vm_start || rmap_item->address >= vma->vm_end) continue; diff --git a/mm/memory-failure.c b/mm/memory-failure.c --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -384,12 +384,9 @@ static void collect_procs_anon(struct pa if (av == NULL) /* Not actually mapped anymore */ goto out; for_each_process (tsk) { - struct anon_vma_chain *vmac; - if (!task_early_kill(tsk)) continue; - list_for_each_entry(vmac, &av->head, same_anon_vma) { - vma = vmac->vma; + list_for_each_entry (vma, &av->head, anon_vma_node) { if (!page_mapped_in_vma(page, vma)) continue; if (vma->vm_mm == tsk->mm) diff --git a/mm/memory.c b/mm/memory.c --- a/mm/memory.c +++ b/mm/memory.c @@ -372,7 +372,7 @@ void free_pgtables(struct mmu_gather *tl * Hide vma from rmap and truncate_pagecache before freeing * pgtables */ - unlink_anon_vmas(vma); + anon_vma_unlink(vma); unlink_file_vma(vma); if (is_vm_hugetlb_page(vma)) { @@ -386,7 +386,7 @@ void free_pgtables(struct mmu_gather *tl && !is_vm_hugetlb_page(next)) { vma = next; next = vma->vm_next; - unlink_anon_vmas(vma); + anon_vma_unlink(vma); unlink_file_vma(vma); } free_pgd_range(tlb, addr, vma->vm_end, @@ -2132,13 +2132,6 @@ static int do_wp_page(struct mm_struct * page_cache_release(old_page); } reuse = reuse_swap_page(old_page); - if (reuse) - /* - * The page is all ours. Move it to our anon_vma so - * the rmap code will not search our parent or siblings. - * Protected against the rmap code by the page lock. - */ - page_move_anon_rmap(old_page, vma, address); unlock_page(old_page); } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED))) { diff --git a/mm/mmap.c b/mm/mmap.c --- a/mm/mmap.c +++ b/mm/mmap.c @@ -437,6 +437,7 @@ __vma_link(struct mm_struct *mm, struct { __vma_link_list(mm, vma, prev, rb_parent); __vma_link_rb(mm, vma, rb_link, rb_parent); + __anon_vma_link(vma); } static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, @@ -498,7 +499,7 @@ __vma_unlink(struct mm_struct *mm, struc * are necessary. The "insert" vma (if any) is to be inserted * before we drop the necessary locks. */ -int vma_adjust(struct vm_area_struct *vma, unsigned long start, +void vma_adjust(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert) { struct mm_struct *mm = vma->vm_mm; @@ -507,12 +508,11 @@ int vma_adjust(struct vm_area_struct *vm struct address_space *mapping = NULL; struct prio_tree_root *root = NULL; struct file *file = vma->vm_file; + struct anon_vma *anon_vma = NULL; long adjust_next = 0; int remove_next = 0; if (next && !insert) { - struct vm_area_struct *exporter = NULL; - if (end >= next->vm_end) { /* * vma expands, overlapping all the next, and @@ -520,7 +520,7 @@ int vma_adjust(struct vm_area_struct *vm */ again: remove_next = 1 + (end > next->vm_end); end = next->vm_end; - exporter = next; + anon_vma = next->anon_vma; importer = vma; } else if (end > next->vm_start) { /* @@ -528,7 +528,7 @@ again: remove_next = 1 + (end > next-> * mprotect case 5 shifting the boundary up. */ adjust_next = (end - next->vm_start) >> PAGE_SHIFT; - exporter = next; + anon_vma = next->anon_vma; importer = vma; } else if (end < vma->vm_end) { /* @@ -537,20 +537,9 @@ again: remove_next = 1 + (end > next-> * mprotect case 4 shifting the boundary down. */ adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT); - exporter = vma; + anon_vma = next->anon_vma; importer = next; } - - /* - * Easily overlooked: when mprotect shifts the boundary, - * make sure the expanding vma has anon_vma set if the - * shrinking vma had, to cover any anon pages imported. - */ - if (exporter && exporter->anon_vma && !importer->anon_vma) { - if (anon_vma_clone(importer, exporter)) - return -ENOMEM; - importer->anon_vma = exporter->anon_vma; - } } if (file) { @@ -578,6 +567,25 @@ again: remove_next = 1 + (end > next-> } } + /* + * When changing only vma->vm_end, we don't really need + * anon_vma lock. + */ + if (vma->anon_vma && (insert || importer || start != vma->vm_start)) + anon_vma = vma->anon_vma; + if (anon_vma) { + spin_lock(&anon_vma->lock); + /* + * Easily overlooked: when mprotect shifts the boundary, + * make sure the expanding vma has anon_vma set if the + * shrinking vma had, to cover any anon pages imported. + */ + if (importer && !importer->anon_vma) { + importer->anon_vma = anon_vma; + __anon_vma_link(importer); + } + } + if (root) { flush_dcache_mmap_lock(mapping); vma_prio_tree_remove(vma, root); @@ -608,6 +616,8 @@ again: remove_next = 1 + (end > next-> __vma_unlink(mm, next, vma); if (file) __remove_shared_vm_struct(next, file, mapping); + if (next->anon_vma) + __anon_vma_merge(vma, next); } else if (insert) { /* * split_vma has split insert from vma, and needs @@ -617,6 +627,8 @@ again: remove_next = 1 + (end > next-> __insert_vm_struct(mm, insert); } + if (anon_vma) + spin_unlock(&anon_vma->lock); if (mapping) spin_unlock(&mapping->i_mmap_lock); @@ -626,8 +638,6 @@ again: remove_next = 1 + (end > next-> if (next->vm_flags & VM_EXECUTABLE) removed_exe_file_vma(mm); } - if (next->anon_vma) - anon_vma_merge(vma, next); mm->map_count--; mpol_put(vma_policy(next)); kmem_cache_free(vm_area_cachep, next); @@ -643,8 +653,6 @@ again: remove_next = 1 + (end > next-> } validate_mm(mm); - - return 0; } /* @@ -751,7 +759,6 @@ struct vm_area_struct *vma_merge(struct { pgoff_t pglen = (end - addr) >> PAGE_SHIFT; struct vm_area_struct *area, *next; - int err; /* * We later require that vma->vm_flags == vm_flags, @@ -785,13 +792,11 @@ struct vm_area_struct *vma_merge(struct is_mergeable_anon_vma(prev->anon_vma, next->anon_vma)) { /* cases 1, 6 */ - err = vma_adjust(prev, prev->vm_start, + vma_adjust(prev, prev->vm_start, next->vm_end, prev->vm_pgoff, NULL); } else /* cases 2, 5, 7 */ - err = vma_adjust(prev, prev->vm_start, + vma_adjust(prev, prev->vm_start, end, prev->vm_pgoff, NULL); - if (err) - return NULL; return prev; } @@ -803,13 +808,11 @@ struct vm_area_struct *vma_merge(struct can_vma_merge_before(next, vm_flags, anon_vma, file, pgoff+pglen)) { if (prev && addr < prev->vm_end) /* case 4 */ - err = vma_adjust(prev, prev->vm_start, + vma_adjust(prev, prev->vm_start, addr, prev->vm_pgoff, NULL); else /* cases 3, 8 */ - err = vma_adjust(area, addr, next->vm_end, + vma_adjust(area, addr, next->vm_end, next->vm_pgoff - pglen, NULL); - if (err) - return NULL; return area; } @@ -817,61 +820,6 @@ struct vm_area_struct *vma_merge(struct } /* - * Rough compatbility check to quickly see if it's even worth looking - * at sharing an anon_vma. - * - * They need to have the same vm_file, and the flags can only differ - * in things that mprotect may change. - * - * NOTE! The fact that we share an anon_vma doesn't _have_ to mean that - * we can merge the two vma's. For example, we refuse to merge a vma if - * there is a vm_ops->close() function, because that indicates that the - * driver is doing some kind of reference counting. But that doesn't - * really matter for the anon_vma sharing case. - */ -static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b) -{ - return a->vm_end == b->vm_start && - mpol_equal(vma_policy(a), vma_policy(b)) && - a->vm_file == b->vm_file && - !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) && - b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT); -} - -/* - * Do some basic sanity checking to see if we can re-use the anon_vma - * from 'old'. The 'a'/'b' vma's are in VM order - one of them will be - * the same as 'old', the other will be the new one that is trying - * to share the anon_vma. - * - * NOTE! This runs with mm_sem held for reading, so it is possible that - * the anon_vma of 'old' is concurrently in the process of being set up - * by another page fault trying to merge _that_. But that's ok: if it - * is being set up, that automatically means that it will be a singleton - * acceptable for merging, so we can do all of this optimistically. But - * we do that ACCESS_ONCE() to make sure that we never re-load the pointer. - * - * IOW: that the "list_is_singular()" test on the anon_vma_chain only - * matters for the 'stable anon_vma' case (ie the thing we want to avoid - * is to return an anon_vma that is "complex" due to having gone through - * a fork). - * - * We also make sure that the two vma's are compatible (adjacent, - * and with the same memory policies). That's all stable, even with just - * a read lock on the mm_sem. - */ -static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b) -{ - if (anon_vma_compatible(a, b)) { - struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma); - - if (anon_vma && list_is_singular(&old->anon_vma_chain)) - return anon_vma; - } - return NULL; -} - -/* * find_mergeable_anon_vma is used by anon_vma_prepare, to check * neighbouring vmas for a suitable anon_vma, before it goes off * to allocate a new anon_vma. It checks because a repetitive @@ -881,16 +829,28 @@ static struct anon_vma *reusable_anon_vm */ struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) { - struct anon_vma *anon_vma; struct vm_area_struct *near; + unsigned long vm_flags; near = vma->vm_next; if (!near) goto try_prev; - anon_vma = reusable_anon_vma(near, vma, near); - if (anon_vma) - return anon_vma; + /* + * Since only mprotect tries to remerge vmas, match flags + * which might be mprotected into each other later on. + * Neither mlock nor madvise tries to remerge at present, + * so leave their flags as obstructing a merge. + */ + vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); + vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); + + if (near->anon_vma && vma->vm_end == near->vm_start && + mpol_equal(vma_policy(vma), vma_policy(near)) && + can_vma_merge_before(near, vm_flags, + NULL, vma->vm_file, vma->vm_pgoff + + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT))) + return near->anon_vma; try_prev: /* * It is potentially slow to have to call find_vma_prev here. @@ -903,9 +863,14 @@ try_prev: if (!near) goto none; - anon_vma = reusable_anon_vma(near, near, vma); - if (anon_vma) - return anon_vma; + vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); + vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); + + if (near->anon_vma && near->vm_end == vma->vm_start && + mpol_equal(vma_policy(near), vma_policy(vma)) && + can_vma_merge_after(near, vm_flags, + NULL, vma->vm_file, vma->vm_pgoff)) + return near->anon_vma; none: /* * There's no absolute need to look only at touching neighbours: @@ -1264,7 +1229,6 @@ munmap_back: vma->vm_flags = vm_flags; vma->vm_page_prot = vm_get_page_prot(vm_flags); vma->vm_pgoff = pgoff; - INIT_LIST_HEAD(&vma->anon_vma_chain); if (file) { error = -EINVAL; @@ -1925,7 +1889,6 @@ static int __split_vma(struct mm_struct { struct mempolicy *pol; struct vm_area_struct *new; - int err = -ENOMEM; if (is_vm_hugetlb_page(vma) && (addr & ~(huge_page_mask(hstate_vma(vma))))) @@ -1933,13 +1896,11 @@ static int __split_vma(struct mm_struct new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!new) - goto out_err; + return -ENOMEM; /* most fields are the same, copy all, and then fixup */ *new = *vma; - INIT_LIST_HEAD(&new->anon_vma_chain); - if (new_below) new->vm_end = addr; else { @@ -1949,14 +1910,11 @@ static int __split_vma(struct mm_struct pol = mpol_dup(vma_policy(vma)); if (IS_ERR(pol)) { - err = PTR_ERR(pol); - goto out_free_vma; + kmem_cache_free(vm_area_cachep, new); + return PTR_ERR(pol); } vma_set_policy(new, pol); - if (anon_vma_clone(new, vma)) - goto out_free_mpol; - if (new->vm_file) { get_file(new->vm_file); if (vma->vm_flags & VM_EXECUTABLE) @@ -1967,28 +1925,12 @@ static int __split_vma(struct mm_struct new->vm_ops->open(new); if (new_below) - err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff + + vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff + ((addr - new->vm_start) >> PAGE_SHIFT), new); else - err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); + vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); - /* Success. */ - if (!err) - return 0; - - /* Clean everything up if vma_adjust failed. */ - new->vm_ops->close(new); - if (new->vm_file) { - if (vma->vm_flags & VM_EXECUTABLE) - removed_exe_file_vma(mm); - fput(new->vm_file); - } - out_free_mpol: - mpol_put(pol); - out_free_vma: - kmem_cache_free(vm_area_cachep, new); - out_err: - return err; + return 0; } /* @@ -2198,7 +2140,6 @@ unsigned long do_brk(unsigned long addr, return -ENOMEM; } - INIT_LIST_HEAD(&vma->anon_vma_chain); vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; @@ -2335,11 +2276,10 @@ struct vm_area_struct *copy_vma(struct v if (new_vma) { *new_vma = *vma; pol = mpol_dup(vma_policy(vma)); - if (IS_ERR(pol)) - goto out_free_vma; - INIT_LIST_HEAD(&new_vma->anon_vma_chain); - if (anon_vma_clone(new_vma, vma)) - goto out_free_mempol; + if (IS_ERR(pol)) { + kmem_cache_free(vm_area_cachep, new_vma); + return NULL; + } vma_set_policy(new_vma, pol); new_vma->vm_start = addr; new_vma->vm_end = addr + len; @@ -2355,12 +2295,6 @@ struct vm_area_struct *copy_vma(struct v } } return new_vma; - - out_free_mempol: - mpol_put(pol); - out_free_vma: - kmem_cache_free(vm_area_cachep, new_vma); - return NULL; } /* @@ -2438,7 +2372,6 @@ int install_special_mapping(struct mm_st if (unlikely(vma == NULL)) return -ENOMEM; - INIT_LIST_HEAD(&vma->anon_vma_chain); vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; @@ -2539,7 +2472,6 @@ static void vm_lock_mapping(struct mm_st int mm_take_all_locks(struct mm_struct *mm) { struct vm_area_struct *vma; - struct anon_vma_chain *avc; int ret = -EINTR; BUG_ON(down_read_trylock(&mm->mmap_sem)); @@ -2557,8 +2489,7 @@ int mm_take_all_locks(struct mm_struct * if (signal_pending(current)) goto out_unlock; if (vma->anon_vma) - list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) - vm_lock_anon_vma(mm, avc->anon_vma); + vm_lock_anon_vma(mm, vma->anon_vma); } ret = 0; @@ -2613,15 +2544,13 @@ static void vm_unlock_mapping(struct add void mm_drop_all_locks(struct mm_struct *mm) { struct vm_area_struct *vma; - struct anon_vma_chain *avc; BUG_ON(down_read_trylock(&mm->mmap_sem)); BUG_ON(!mutex_is_locked(&mm_all_locks_mutex)); for (vma = mm->mmap; vma; vma = vma->vm_next) { if (vma->anon_vma) - list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) - vm_unlock_anon_vma(avc->anon_vma); + vm_unlock_anon_vma(vma->anon_vma); if (vma->vm_file && vma->vm_file->f_mapping) vm_unlock_mapping(vma->vm_file->f_mapping); } diff --git a/mm/mremap.c b/mm/mremap.c --- a/mm/mremap.c +++ b/mm/mremap.c @@ -459,11 +459,8 @@ unsigned long do_mremap(unsigned long ad if (vma_expandable(vma, new_len - old_len)) { int pages = (new_len - old_len) >> PAGE_SHIFT; - if (vma_adjust(vma, vma->vm_start, addr + new_len, - vma->vm_pgoff, NULL)) { - ret = -ENOMEM; - goto out; - } + vma_adjust(vma, vma->vm_start, + addr + new_len, vma->vm_pgoff, NULL); mm->total_vm += pages; vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages); diff --git a/mm/nommu.c b/mm/nommu.c --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1208,7 +1208,7 @@ unsigned long do_mmap_pgoff(struct file region->vm_flags = vm_flags; region->vm_pgoff = pgoff; - INIT_LIST_HEAD(&vma->anon_vma_chain); + INIT_LIST_HEAD(&vma->anon_vma_node); vma->vm_flags = vm_flags; vma->vm_pgoff = pgoff; diff --git a/mm/rmap.c b/mm/rmap.c --- a/mm/rmap.c +++ b/mm/rmap.c @@ -62,7 +62,6 @@ #include "internal.h" static struct kmem_cache *anon_vma_cachep; -static struct kmem_cache *anon_vma_chain_cachep; static inline struct anon_vma *anon_vma_alloc(void) { @@ -74,16 +73,6 @@ void anon_vma_free(struct anon_vma *anon kmem_cache_free(anon_vma_cachep, anon_vma); } -static inline struct anon_vma_chain *anon_vma_chain_alloc(void) -{ - return kmem_cache_alloc(anon_vma_chain_cachep, GFP_KERNEL); -} - -void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain) -{ - kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain); -} - /** * anon_vma_prepare - attach an anon_vma to a memory region * @vma: the memory region in question @@ -114,23 +103,18 @@ void anon_vma_chain_free(struct anon_vma int anon_vma_prepare(struct vm_area_struct *vma) { struct anon_vma *anon_vma = vma->anon_vma; - struct anon_vma_chain *avc; might_sleep(); if (unlikely(!anon_vma)) { struct mm_struct *mm = vma->vm_mm; struct anon_vma *allocated; - avc = anon_vma_chain_alloc(); - if (!avc) - goto out_enomem; - anon_vma = find_mergeable_anon_vma(vma); allocated = NULL; if (!anon_vma) { anon_vma = anon_vma_alloc(); if (unlikely(!anon_vma)) - goto out_enomem_free_avc; + return -ENOMEM; allocated = anon_vma; } spin_lock(&anon_vma->lock); @@ -139,114 +123,53 @@ int anon_vma_prepare(struct vm_area_stru spin_lock(&mm->page_table_lock); if (likely(!vma->anon_vma)) { vma->anon_vma = anon_vma; - avc->anon_vma = anon_vma; - avc->vma = vma; - list_add(&avc->same_vma, &vma->anon_vma_chain); - list_add(&avc->same_anon_vma, &anon_vma->head); + list_add_tail(&vma->anon_vma_node, &anon_vma->head); allocated = NULL; } spin_unlock(&mm->page_table_lock); spin_unlock(&anon_vma->lock); - if (unlikely(allocated)) { + if (unlikely(allocated)) anon_vma_free(allocated); - anon_vma_chain_free(avc); - } } return 0; - - out_enomem_free_avc: - anon_vma_chain_free(avc); - out_enomem: - return -ENOMEM; } -static void anon_vma_chain_link(struct vm_area_struct *vma, - struct anon_vma_chain *avc, - struct anon_vma *anon_vma) +void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next) { - avc->vma = vma; - avc->anon_vma = anon_vma; - list_add(&avc->same_vma, &vma->anon_vma_chain); - - spin_lock(&anon_vma->lock); - list_add_tail(&avc->same_anon_vma, &anon_vma->head); - spin_unlock(&anon_vma->lock); + BUG_ON(vma->anon_vma != next->anon_vma); + list_del(&next->anon_vma_node); } -/* - * Attach the anon_vmas from src to dst. - * Returns 0 on success, -ENOMEM on failure. - */ -int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) +void __anon_vma_link(struct vm_area_struct *vma) { - struct anon_vma_chain *avc, *pavc; + struct anon_vma *anon_vma = vma->anon_vma; - list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) { - avc = anon_vma_chain_alloc(); - if (!avc) - goto enomem_failure; - anon_vma_chain_link(dst, avc, pavc->anon_vma); - } - return 0; - - enomem_failure: - unlink_anon_vmas(dst); - return -ENOMEM; + if (anon_vma) + list_add_tail(&vma->anon_vma_node, &anon_vma->head); } -/* - * Attach vma to its own anon_vma, as well as to the anon_vmas that - * the corresponding VMA in the parent process is attached to. - * Returns 0 on success, non-zero on failure. - */ -int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) +void anon_vma_link(struct vm_area_struct *vma) { - struct anon_vma_chain *avc; - struct anon_vma *anon_vma; + struct anon_vma *anon_vma = vma->anon_vma; - /* Don't bother if the parent process has no anon_vma here. */ - if (!pvma->anon_vma) - return 0; - - /* - * First, attach the new VMA to the parent VMA's anon_vmas, - * so rmap can find non-COWed pages in child processes. - */ - if (anon_vma_clone(vma, pvma)) - return -ENOMEM; - - /* Then add our own anon_vma. */ - anon_vma = anon_vma_alloc(); - if (!anon_vma) - goto out_error; - avc = anon_vma_chain_alloc(); - if (!avc) - goto out_error_free_anon_vma; - anon_vma_chain_link(vma, avc, anon_vma); - /* Mark this anon_vma as the one where our new (COWed) pages go. */ - vma->anon_vma = anon_vma; - - return 0; - - out_error_free_anon_vma: - anon_vma_free(anon_vma); - out_error: - unlink_anon_vmas(vma); - return -ENOMEM; + if (anon_vma) { + spin_lock(&anon_vma->lock); + list_add_tail(&vma->anon_vma_node, &anon_vma->head); + spin_unlock(&anon_vma->lock); + } } -static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain) +void anon_vma_unlink(struct vm_area_struct *vma) { - struct anon_vma *anon_vma = anon_vma_chain->anon_vma; + struct anon_vma *anon_vma = vma->anon_vma; int empty; - /* If anon_vma_fork fails, we can get an empty anon_vma_chain. */ if (!anon_vma) return; spin_lock(&anon_vma->lock); - list_del(&anon_vma_chain->same_anon_vma); + list_del(&vma->anon_vma_node); /* We must garbage collect the anon_vma if it's empty */ empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma); @@ -256,18 +179,6 @@ static void anon_vma_unlink(struct anon_ anon_vma_free(anon_vma); } -void unlink_anon_vmas(struct vm_area_struct *vma) -{ - struct anon_vma_chain *avc, *next; - - /* Unlink each anon_vma chained to the VMA. */ - list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { - anon_vma_unlink(avc); - list_del(&avc->same_vma); - anon_vma_chain_free(avc); - } -} - static void anon_vma_ctor(void *data) { struct anon_vma *anon_vma = data; @@ -281,7 +192,6 @@ void __init anon_vma_init(void) { anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor); - anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC); } /* @@ -486,7 +396,7 @@ static int page_referenced_anon(struct p { unsigned int mapcount; struct anon_vma *anon_vma; - struct anon_vma_chain *avc; + struct vm_area_struct *vma; int referenced = 0; anon_vma = page_lock_anon_vma(page); @@ -494,8 +404,7 @@ static int page_referenced_anon(struct p return referenced; mapcount = page_mapcount(page); - list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { - struct vm_area_struct *vma = avc->vma; + list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { unsigned long address = vma_address(page, vma); if (address == -EFAULT) continue; @@ -702,57 +611,17 @@ int page_mkclean(struct page *page) EXPORT_SYMBOL_GPL(page_mkclean); /** - * page_move_anon_rmap - move a page to our anon_vma - * @page: the page to move to our anon_vma - * @vma: the vma the page belongs to + * __page_set_anon_rmap - setup new anonymous rmap + * @page: the page to add the mapping to + * @vma: the vm area in which the mapping is added * @address: the user virtual address mapped - * - * When a page belongs exclusively to one process after a COW event, - * that page can be moved into the anon_vma that belongs to just that - * process, so the rmap code will not search the parent or sibling - * processes. */ -void page_move_anon_rmap(struct page *page, +static void __page_set_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { struct anon_vma *anon_vma = vma->anon_vma; - VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(!anon_vma); - VM_BUG_ON(page->index != linear_page_index(vma, address)); - - anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; - page->mapping = (struct address_space *) anon_vma; -} - -/** - * __page_set_anon_rmap - setup new anonymous rmap - * @page: the page to add the mapping to - * @vma: the vm area in which the mapping is added - * @address: the user virtual address mapped - * @exclusive: the page is exclusively owned by the current process - */ -static void __page_set_anon_rmap(struct page *page, - struct vm_area_struct *vma, unsigned long address, int exclusive) -{ - struct anon_vma *anon_vma = vma->anon_vma; - BUG_ON(!anon_vma); - - /* - * If the page isn't exclusively mapped into this vma, - * we must use the _oldest_ possible anon_vma for the - * page mapping! - * - * So take the last AVC chain entry in the vma, which is - * the deepest ancestor, and use the anon_vma from that. - */ - if (!exclusive) { - struct anon_vma_chain *avc; - avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma); - anon_vma = avc->anon_vma; - } - anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; page->mapping = (struct address_space *) anon_vma; page->index = linear_page_index(vma, address); @@ -780,6 +649,9 @@ static void __page_check_anon_rmap(struc * are initially only visible via the pagetables, and the pte is locked * over the call to page_add_new_anon_rmap. */ + struct anon_vma *anon_vma = vma->anon_vma; + anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; + BUG_ON(page->mapping != (struct address_space *)anon_vma); BUG_ON(page->index != linear_page_index(vma, address)); #endif } @@ -807,7 +679,7 @@ void page_add_anon_rmap(struct page *pag VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); if (first) - __page_set_anon_rmap(page, vma, address, 0); + __page_set_anon_rmap(page, vma, address); else __page_check_anon_rmap(page, vma, address); } @@ -829,7 +701,7 @@ void page_add_new_anon_rmap(struct page SetPageSwapBacked(page); atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ __inc_zone_page_state(page, NR_ANON_PAGES); - __page_set_anon_rmap(page, vma, address, 1); + __page_set_anon_rmap(page, vma, address); if (page_evictable(page, vma)) lru_cache_add_lru(page, LRU_ACTIVE_ANON); else @@ -1150,15 +1022,14 @@ static int try_to_unmap_cluster(unsigned static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) { struct anon_vma *anon_vma; - struct anon_vma_chain *avc; + struct vm_area_struct *vma; int ret = SWAP_AGAIN; anon_vma = page_lock_anon_vma(page); if (!anon_vma) return ret; - list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { - struct vm_area_struct *vma = avc->vma; + list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { unsigned long address = vma_address(page, vma); if (address == -EFAULT) continue; @@ -1349,7 +1220,7 @@ static int rmap_walk_anon(struct page *p struct vm_area_struct *, unsigned long, void *), void *arg) { struct anon_vma *anon_vma; - struct anon_vma_chain *avc; + struct vm_area_struct *vma; int ret = SWAP_AGAIN; /* @@ -1364,8 +1235,7 @@ static int rmap_walk_anon(struct page *p if (!anon_vma) return ret; spin_lock(&anon_vma->lock); - list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { - struct vm_area_struct *vma = avc->vma; + list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { unsigned long address = vma_address(page, vma); if (address == -EFAULT) continue;