From: Christoph Lameter Subject: mm_lock: Lock a address space against reclaim Provide a way to lock an mm_struct against reclaim (try_to_unmap etc). This is necessary for the invalidate notifier approaches so that they can reliably add and remove a notifier. Changes: - Simplify lock/unlock. - Avoid counting vmas by using the vma counter in the mm_struct Signed-off-by: Andrea Arcangeli Signed-off-by: Christoph Lameter --- include/linux/mm.h | 14 +++++++++ mm/mmap.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) Index: linux-2.6/include/linux/mm.h =================================================================== --- linux-2.6.orig/include/linux/mm.h 2008-04-18 21:57:55.000000000 -0700 +++ linux-2.6/include/linux/mm.h 2008-04-18 22:01:00.000000000 -0700 @@ -1049,6 +1049,20 @@ extern int install_special_mapping(struc unsigned long addr, unsigned long len, unsigned long flags, struct page **pages); +/* + * mm_lock will take mmap_sem writably (to prevent all modifications and + * scanning of vmas) and then also takes the mapping locks for each of the + * vmaske all mapping to lockout any scans of pages of this address space. + * This can be used to effectively holding off reclaim from the address space. + * + * mm_lock() can fail if there is not enough memory to store a pointer + * array to all vmas. + * + * mm_lock and mm_unlock are expensive operations that may take a long time. + */ +extern int mm_lock(struct mm_struct *mm); +extern void mm_unlock(struct mm_struct *mm); + extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, Index: linux-2.6/mm/mmap.c =================================================================== --- linux-2.6.orig/mm/mmap.c 2008-04-18 21:58:48.000000000 -0700 +++ linux-2.6/mm/mmap.c 2008-04-18 22:02:33.000000000 -0700 @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include @@ -2235,3 +2237,83 @@ int install_special_mapping(struct mm_st return 0; } + + +static int cmp_rwsem(const void *a, const void *b) +{ + if (a == b) + return 0; + if (a > b) + return 1; + return -1; +} + + +static void acquire_locks(struct mm_struct *mm, + struct rw_semaphore **locks, int anon) +{ + struct vm_area_struct *vma; + int i = 0; + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + struct address_space *mapping = vma->vm_file->f_mapping; + + if (mapping) { + if (!anon) + locks[i++] = &mapping->i_mmap_sem; + } else { + if (anon) + locks[i++] = &vma->anon_vma->sem; + } + } + + if (!i) + return; + + sort(locks, i, sizeof(struct rw_semaphore *), cmp_rwsem, NULL); + + while (i-- > 0) + down_read(locks[i]); +} +/* + * This operation locks against the VM for all pte/vma/mm related + * operations that could ever happen on a certain mm. This includes + * vmtruncate, try_to_unmap, and all page faults. The holder + * must not hold any mm related lock. A single task can't take more + * than one mm lock in a row or it would deadlock. + */ +int mm_lock(struct mm_struct * mm) +{ + struct rw_semaphore **locks; + + down_write(&mm->mmap_sem); + locks = vmalloc(sizeof(struct rw_semaphore *) * mm->map_count); + if (!locks) { + up_write(&mm->mmap_sem); + return -ENOMEM; + } + + acquire_locks(mm, locks, 0); + acquire_locks(mm, locks, 1); + + vfree(locks); + return 0; +} + +void mm_unlock(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + struct address_space *mapping = vma->vm_file->f_mapping; + struct rw_semaphore *sem; + + if (mapping) + sem = &mapping->i_mmap_sem; + else + sem = &vma->anon_vma->sem; + + up_read(sem); + } + up_write(&mm->mmap_sem); +}