From: Christoph Lameter Subject: mm_lock: Lock a address space against reclaim Provide a way to lock an mm_struct against reclaim (try_to_unmap etc). This is necessary for the invalidate notifier approaches so that they can reliably add and remove a notifier. Changes: - Simplify lock/unlock. - Avoid counting vmas by using the vma counter in the mm_struct Signed-off-by: Andrea Arcangeli Signed-off-by: Christoph Lameter --- include/linux/mm.h | 14 +++++++ mm/mmap.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) Index: linux-2.6/include/linux/mm.h =================================================================== --- linux-2.6.orig/include/linux/mm.h 2008-04-21 14:31:40.000000000 -0700 +++ linux-2.6/include/linux/mm.h 2008-04-21 21:38:19.000000000 -0700 @@ -1049,6 +1049,20 @@ extern int install_special_mapping(struc unsigned long addr, unsigned long len, unsigned long flags, struct page **pages); +/* + * mm_lock will take mmap_sem writably (to prevent all modifications and + * scanning of vmas) and then also takes the mapping locks for each of the + * vmaske all mapping to lockout any scans of pages of this address space. + * This can be used to effectively holding off reclaim from the address space. + * + * mm_lock/mm_unlock() can fail if there is not enough memory to store a pointer + * array to all vmas. + * + * mm_lock and mm_unlock are expensive operations that may take a long time. + */ +extern int mm_lock(struct mm_struct *mm); +extern int mm_unlock(struct mm_struct *mm); + extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, Index: linux-2.6/mm/mmap.c =================================================================== --- linux-2.6.orig/mm/mmap.c 2008-04-21 14:31:42.000000000 -0700 +++ linux-2.6/mm/mmap.c 2008-04-21 21:38:19.000000000 -0700 @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include @@ -2235,3 +2237,100 @@ int install_special_mapping(struct mm_st return 0; } + + +static int cmp_rwsem(const void *a, const void *b) +{ + struct rw_semaphore * const *pa = a; + struct rw_semaphore * const *pb = b; + struct rw_semaphore *va = *pa; + struct rw_semaphore *vb = *pb; + + if (va == vb) + return 0; + if (va > vb) + return 1; + return -1; +} + + +static void __scan_locks(struct mm_struct *mm, struct rw_semaphore **locks, + int anon, int lock) +{ + struct vm_area_struct *vma; + struct rw_semaphore *last; + int i = 0; + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (anon) { + if (vma->anon_vma) + locks[i++] = &vma->anon_vma->sem; + } else { + if (vma->vm_file && vma->vm_file->f_mapping) + locks[i++] = &vma->vm_file->f_mapping->i_mmap_sem; + } + } + + if (!i) + return; + + sort(locks, i, sizeof(struct rw_semaphore *), cmp_rwsem, NULL); + + last = NULL; + while (i-- > 0) { + if (last != locks[i]) { + if (lock) + down_write(locks[i]); + else + up_write(locks[i]); + + last = locks[i]; + } + } +} + +static int scan_locks(struct mm_struct *mm, int lock) +{ + struct rw_semaphore **locks; + + locks = vmalloc(sizeof(struct rw_semaphore *) * mm->map_count); + if (!locks) + return -ENOMEM; + + __scan_locks(mm, locks, 0, lock); + __scan_locks(mm, locks, 1, lock); + + vfree(locks); + return 0; +} + +/* + * This operation locks against the VM for all pte/vma/mm related + * operations that could ever happen on a certain mm. This includes + * vmtruncate, try_to_unmap, and all page faults. The holder + * must not hold any mm related lock. A single task can't take more + * than one mm lock in a row or it would deadlock. + */ +int mm_lock(struct mm_struct * mm) +{ + int rc; + + down_write(&mm->mmap_sem); + + rc = scan_locks(mm, 1); + if (rc) + up_write(&mm->mmap_sem); + + return rc; +} + +int mm_unlock(struct mm_struct *mm) +{ + int rc; + + rc = scan_locks(mm, 0); + if (!rc) + up_write(&mm->mmap_sem); + + return rc; +}