From: Christoph Lameter Subject: mm_lock: Lock a address space against reclaim Provide a way to lock an mm_struct against reclaim (try_to_unmap etc). This is necessary for the invalidate notifier approaches so that they can reliably add and remove a notifier. Signed-off-by: Andrea Arcangeli Signed-off-by: Christoph Lameter --- include/linux/mm.h | 14 ++++++++++ mm/mmap.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) Index: linux-2.6/include/linux/mm.h =================================================================== --- linux-2.6.orig/include/linux/mm.h 2008-04-18 14:06:14.000000000 -0700 +++ linux-2.6/include/linux/mm.h 2008-04-18 14:08:50.000000000 -0700 @@ -1049,6 +1049,20 @@ extern int install_special_mapping(struc unsigned long addr, unsigned long len, unsigned long flags, struct page **pages); +/* + * mm_lock will take mmap_sem writably (to prevent all modifications and + * scanning of vmas and then also takes the mapping locks for each of the + * vmaske all mapping to lockout any scans of pages of this address space. + * This is f.e. effectively holding off reclaim from the address space. + * + * mm_lock() can fail if there is not enough memory to store a pointer + * array to all vmas. + * + * mm_lock and mm_unlock are expensive operations that may take a long time. + */ +extern int mm_lock(struct mm_struct *mm); +extern void mm_unlock(struct mm_struct *mm); + extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, Index: linux-2.6/mm/mmap.c =================================================================== --- linux-2.6.orig/mm/mmap.c 2008-04-18 14:08:31.000000000 -0700 +++ linux-2.6/mm/mmap.c 2008-04-18 14:08:50.000000000 -0700 @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include @@ -2235,3 +2237,73 @@ int install_special_mapping(struct mm_st return 0; } + + +static int cmp_rwsem(const void *a, const void *b) +{ + if (a == b) + return 0; + if (a > b) + return 1; + return -1; +} + +/* + * This operation locks against the VM for all pte/vma/mm related + * operations that could ever happen on a certain mm. This includes + * vmtruncate, try_to_unmap, and all page faults. The holder + * must not hold any mm related lock. A single task can't take more + * than one mm lock in a row or it would deadlock. + */ +int mm_lock(struct mm_struct * mm) +{ + struct rw_semaphore **locks; + struct vm_area_struct *vma; + int i; + + down_write(&mm->mmap_sem); + locks = vmalloc(sizeof(struct rw_semaphore *) * mm->map_count); + if (!locks) { + up_write(&mm->mmap_sem); + return -ENOMEM; + } + + i = 0; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + struct address_space *mapping = vma->vm_file->f_mapping; + struct rw_semaphore *sem; + + if (mapping) + sem = &mapping->i_mmap_sem; + else + sem = &vma->anon_vma->sem; + + locks[i++] = sem; + } + + sort(locks, mm->map_count, sizeof(struct rw_semaphore *), cmp_rwsem, NULL); + + for (i = 0; i < mm->map_count; i++) + down_read(locks[i]); + + vfree(locks); + return 0; +} + +void mm_unlock(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + struct address_space *mapping = vma->vm_file->f_mapping; + struct rw_semaphore *sem; + + if (mapping) + sem = &mapping->i_mmap_sem; + else + sem = &vma->anon_vma->sem; + + up_read(sem); + } + up_write(&mm->mmap_sem); +}