--- fs/aio.c | 5 +--- fs/binfmt_elf.c | 2 - fs/bio.c | 2 - fs/compat.c | 2 - fs/direct-io.c | 2 - fs/exec.c | 2 - fs/fuse/dev.c | 2 - fs/fuse/file.c | 2 - fs/nfs/direct.c | 4 +-- fs/proc/task_mmu.c | 2 - fs/splice.c | 5 +--- include/linux/gfp.h | 3 ++ include/linux/hugetlb.h | 2 - include/linux/mm.h | 15 ++++++++++++- include/linux/mmzone.h | 4 +++ include/linux/page-flags.h | 6 +++++ kernel/futex.c | 2 - mm/hugetlb.c | 7 ++++-- mm/memory.c | 49 ++++++++++++++++++++++++++++++++++----------- mm/page_alloc.c | 22 ++++++++++++++++++++ virt/kvm/kvm_main.c | 3 -- 21 files changed, 109 insertions(+), 34 deletions(-) Index: linux-2.6.25-rc8-mm2/include/linux/gfp.h =================================================================== --- linux-2.6.25-rc8-mm2.orig/include/linux/gfp.h 2008-04-13 22:39:26.405751022 -0700 +++ linux-2.6.25-rc8-mm2/include/linux/gfp.h 2008-04-13 22:39:34.845740039 -0700 @@ -241,4 +241,7 @@ void drain_zone_pages(struct zone *zone, void drain_all_pages(void); void drain_local_pages(void *dummy); +void __pin_page(struct page *page); +void pin_page(struct page *page); +void unpin_page(struct page *page); #endif /* __LINUX_GFP_H */ Index: linux-2.6.25-rc8-mm2/include/linux/mmzone.h =================================================================== --- linux-2.6.25-rc8-mm2.orig/include/linux/mmzone.h 2008-04-13 22:39:26.415739884 -0700 +++ linux-2.6.25-rc8-mm2/include/linux/mmzone.h 2008-04-13 22:39:34.845740039 -0700 @@ -97,6 +97,10 @@ enum zone_stat_item { NR_BOUNCE, NR_VMSCAN_WRITE, NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ +#ifdef CONFIG_PAGEFLAGS_EXTENDED + NR_PINNED, /* Pinned pages */ + NR_MLOCK, /* Mlocked pages */ +#endif #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ NUMA_MISS, /* allocated in non intended node */ Index: linux-2.6.25-rc8-mm2/include/linux/page-flags.h =================================================================== --- linux-2.6.25-rc8-mm2.orig/include/linux/page-flags.h 2008-04-13 22:39:26.425740612 -0700 +++ linux-2.6.25-rc8-mm2/include/linux/page-flags.h 2008-04-13 22:39:34.865750479 -0700 @@ -86,6 +86,8 @@ enum pageflags { #ifdef CONFIG_PAGEFLAGS_EXTENDED PG_head, /* A head page */ PG_tail, /* A tail page */ + PG_pin, /* Page is pinned */ + PG_mlock, /* Page is mlocked */ #else PG_compound, /* A compound page */ #endif @@ -268,6 +270,8 @@ static inline int PageCompound(struct pa return page->flags & ((1L << PG_head) | (1L << PG_tail)); } +__PAGEFLAG(Pin, pin) +__PAGEFLAG(Mlock, mlock) #else /* * Reduce page flag use as much as possible by overlapping @@ -305,6 +309,8 @@ static inline void __ClearPageTail(struc page->flags &= ~PG_head_tail_mask; } +PAGEFLAG_FALSE(Pin) +PAGEFLAG_FALSE(Mlock) #endif /* !PAGEFLAGS_EXTENDED */ #endif /* !__GENERATING_BOUNDS_H */ #endif /* PAGE_FLAGS_H */ Index: linux-2.6.25-rc8-mm2/mm/page_alloc.c =================================================================== --- linux-2.6.25-rc8-mm2.orig/mm/page_alloc.c 2008-04-13 22:39:26.455740070 -0700 +++ linux-2.6.25-rc8-mm2/mm/page_alloc.c 2008-04-13 22:39:34.875750569 -0700 @@ -4610,6 +4610,28 @@ out: spin_unlock_irqrestore(&zone->lock, flags); } +void __pin_page(struct page *page) +{ + __SetPagePin(page); + inc_zone_page_state(page, NR_PINNED); +} +EXPORT_SYMBOL(__pin_page); + +void pin_page(struct page *page) +{ + get_page(page); + __pin_page(page); +} +EXPORT_SYMBOL(pin_page); + +void unpin_page(struct page *page) +{ + __ClearPagePin(page); + dec_zone_page_state(page, NR_PINNED); + put_page(page); +} +EXPORT_SYMBOL(unpin_page); + #ifdef CONFIG_MEMORY_HOTREMOVE /* * All pages in the range must be isolated before calling this. Index: linux-2.6.25-rc8-mm2/include/linux/mm.h =================================================================== --- linux-2.6.25-rc8-mm2.orig/include/linux/mm.h 2008-04-13 22:39:26.435741572 -0700 +++ linux-2.6.25-rc8-mm2/include/linux/mm.h 2008-04-13 22:51:13.095739587 -0700 @@ -806,7 +806,19 @@ extern int make_pages_present(unsigned l extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, - int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); + int len, int write, int force, int pin, struct page **pages, struct vm_area_struct **vmas); + +#define PP_WRITE 0x001 +#define PP_FORCE 0x002 +#define PP_PAGES 0x004 +#define PP_VMAS 0x008 +#define PP_PIN 0x010 + +int process_pages(unsigned long start, int len, unsigned long flags, void *p); +int __process_pages(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, int len, unsigned long flags, void *p); + + void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long); extern int try_to_release_page(struct page * page, gfp_t gfp_mask); @@ -1192,6 +1204,7 @@ struct page *follow_page(struct vm_area_ #define FOLL_TOUCH 0x02 /* mark page accessed */ #define FOLL_GET 0x04 /* do get_page on page */ #define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ +#define FOLL_PIN 0x10 /* get_page to pin page */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); Index: linux-2.6.25-rc8-mm2/mm/memory.c =================================================================== --- linux-2.6.25-rc8-mm2.orig/mm/memory.c 2008-04-13 22:39:26.465741032 -0700 +++ linux-2.6.25-rc8-mm2/mm/memory.c 2008-04-13 22:39:34.905749492 -0700 @@ -993,6 +993,8 @@ struct page *follow_page(struct vm_area_ if (flags & FOLL_GET) get_page(page); + if (flags & FOLL_PIN) + pin_page(page); if (flags & FOLL_TOUCH) { if ((flags & FOLL_WRITE) && !pte_dirty(pte) && !PageDirty(page)) @@ -1018,8 +1020,24 @@ no_page_table: return page; } +int __process_read(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, int len, unsinged long flags, void *x) +{ + if (flasg & PRM_VMAS) + return get_user_pages(tsk, mm, start, len, flags, NULL, x); + else + return get_user_pages(tsk, mm, start, len, flags, x, NULL); +} +EXPORT_SYMBOL(__process_read); + +int process_read(unsigned long start, int len, unsigned long flags, void *x) +{ + return process_read(current, current->mm, start, len, flags, x); +} +EXPORT_SYMBOL(__process_read); + int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, int len, int write, int force, + unsigned long start, int len, unsigned long flags, struct page **pages, struct vm_area_struct **vmas) { int i; @@ -1031,8 +1049,8 @@ int get_user_pages(struct task_struct *t * Require read or write permissions. * If 'force' is set, we only require the "MAY" flags. */ - vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); - vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); + vm_flags = (flags & PRM_WRITE) ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD); + vm_flags &= (flags & PRM_FORCE) ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); i = 0; do { @@ -1085,14 +1103,18 @@ int get_user_pages(struct task_struct *t if (is_vm_hugetlb_page(vma)) { i = follow_hugetlb_page(mm, vma, pages, vmas, - &start, &len, i, write); + &start, &len, i, write, pin); continue; } foll_flags = FOLL_TOUCH; - if (pages) - foll_flags |= FOLL_GET; - if (!write && !(vma->vm_flags & VM_LOCKED) && + if (pages) { + if (flags & PRM_PIN) + foll_flags |= FOLL_PIN; + else + foll_flags |= FOLL_GET; + } + if (!(flags & PRM_WRITE) && !(vma->vm_flags & VM_LOCKED) && (!vma->vm_ops || !vma->vm_ops->fault)) foll_flags |= FOLL_ANON; @@ -1137,6 +1159,8 @@ int get_user_pages(struct task_struct *t if (ret & VM_FAULT_WRITE) foll_flags &= ~FOLL_WRITE; + if (foll_flags & FOLL_PIN) + __pin_page(page); cond_resched(); } if (pages) { @@ -2662,12 +2686,15 @@ int make_pages_present(unsigned long add vma = find_vma(current->mm, addr); if (!vma) return -1; - write = (vma->vm_flags & VM_WRITE) != 0; + if (vma->vm_flags & VM_WRITE) + write = PRM_WRITE; + else + write = 0; + BUG_ON(addr >= end); BUG_ON(end > vma->vm_end); len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE; - ret = get_user_pages(current, current->mm, addr, - len, write, 0, NULL, NULL); + ret = process_read(addr, len, write, NULL); if (ret < 0) return ret; return ret == len ? 0 : -1; @@ -2740,7 +2767,7 @@ int access_process_vm(struct task_struct void *maddr; ret = get_user_pages(tsk, mm, addr, 1, - write, 1, &page, &vma); + write, 1, 0, &page, &vma); if (ret <= 0) break; Index: linux-2.6.25-rc8-mm2/mm/hugetlb.c =================================================================== --- linux-2.6.25-rc8-mm2.orig/mm/hugetlb.c 2008-04-13 22:39:26.475739958 -0700 +++ linux-2.6.25-rc8-mm2/mm/hugetlb.c 2008-04-13 22:39:34.925749827 -0700 @@ -1030,7 +1030,7 @@ int hugetlb_fault(struct mm_struct *mm, int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i, - int write) + int write, int pin) { unsigned long pfn_offset; unsigned long vaddr = *position; @@ -1068,7 +1068,10 @@ int follow_hugetlb_page(struct mm_struct page = pte_page(huge_ptep_get(pte)); same_page: if (pages) { - get_page(page); + if (pin) + pin_page(page); + else + get_page(page); pages[i] = page + pfn_offset; } Index: linux-2.6.25-rc8-mm2/include/linux/hugetlb.h =================================================================== --- linux-2.6.25-rc8-mm2.orig/include/linux/hugetlb.h 2008-04-13 22:39:26.445740149 -0700 +++ linux-2.6.25-rc8-mm2/include/linux/hugetlb.h 2008-04-13 22:39:34.955749578 -0700 @@ -21,7 +21,7 @@ int hugetlb_sysctl_handler(struct ctl_ta int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); -int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int); +int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int, int); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); void __unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); int hugetlb_prefault(struct address_space *, struct vm_area_struct *); Index: linux-2.6.25-rc8-mm2/fs/aio.c =================================================================== --- linux-2.6.25-rc8-mm2.orig/fs/aio.c 2008-04-13 22:39:26.485739663 -0700 +++ linux-2.6.25-rc8-mm2/fs/aio.c 2008-04-13 22:39:34.975749860 -0700 @@ -141,9 +141,8 @@ static int aio_setup_ring(struct kioctx } dprintk("mmap address: 0x%08lx\n", info->mmap_base); - info->nr_pages = get_user_pages(current, ctx->mm, - info->mmap_base, nr_pages, - 1, 0, info->ring_pages, NULL); + info->nr_pages = __process_read(current, ctx->mm, info->mmap_base, + nr_pages, PRM_WRITE|PRM_PAGES, info->ring_pages); up_write(&ctx->mm->mmap_sem); if (unlikely(info->nr_pages != nr_pages)) { Index: linux-2.6.25-rc8-mm2/fs/binfmt_elf.c =================================================================== --- linux-2.6.25-rc8-mm2.orig/fs/binfmt_elf.c 2008-04-13 22:39:26.495741100 -0700 +++ linux-2.6.25-rc8-mm2/fs/binfmt_elf.c 2008-04-13 22:39:35.015749702 -0700 @@ -2020,7 +2020,7 @@ static int elf_core_dump(long signr, str struct vm_area_struct *tmp_vma; if (get_user_pages(current, current->mm, addr, 1, 0, 1, - &page, &tmp_vma) <= 0) { + 0, &page, &tmp_vma) <= 0) { DUMP_SEEK(PAGE_SIZE); } else { if (page == ZERO_PAGE(0)) { Index: linux-2.6.25-rc8-mm2/fs/bio.c =================================================================== --- linux-2.6.25-rc8-mm2.orig/fs/bio.c 2008-04-13 22:39:26.505751759 -0700 +++ linux-2.6.25-rc8-mm2/fs/bio.c 2008-04-13 22:39:35.062440502 -0700 @@ -640,7 +640,7 @@ static struct bio *__bio_map_user_iov(st down_read(¤t->mm->mmap_sem); ret = get_user_pages(current, current->mm, uaddr, local_nr_pages, - write_to_vm, 0, &pages[cur_page], NULL); + write_to_vm, 0, 0, &pages[cur_page], NULL); up_read(¤t->mm->mmap_sem); if (ret < local_nr_pages) { Index: linux-2.6.25-rc8-mm2/fs/compat.c =================================================================== --- linux-2.6.25-rc8-mm2.orig/fs/compat.c 2008-04-13 22:39:26.505751759 -0700 +++ linux-2.6.25-rc8-mm2/fs/compat.c 2008-04-13 22:39:35.112023533 -0700 @@ -1302,7 +1302,7 @@ static int compat_copy_strings(int argc, } #endif ret = get_user_pages(current, bprm->mm, pos, - 1, 1, 1, &page, NULL); + 1, 1, 1, 0, &page, NULL); if (ret <= 0) { /* We've exceed the stack rlimit. */ ret = -E2BIG; Index: linux-2.6.25-rc8-mm2/fs/direct-io.c =================================================================== --- linux-2.6.25-rc8-mm2.orig/fs/direct-io.c 2008-04-13 22:39:26.515739231 -0700 +++ linux-2.6.25-rc8-mm2/fs/direct-io.c 2008-04-13 22:39:35.143249721 -0700 @@ -157,7 +157,7 @@ static int dio_refill_pages(struct dio * dio->curr_user_address, /* Where from? */ nr_pages, /* How many pages? */ dio->rw == READ, /* Write to memory? */ - 0, /* force (?) */ + 0, 0, /* force (?) */ &dio->pages[0], NULL); /* vmas */ up_read(¤t->mm->mmap_sem); Index: linux-2.6.25-rc8-mm2/fs/exec.c =================================================================== --- linux-2.6.25-rc8-mm2.orig/fs/exec.c 2008-04-13 22:39:26.525740049 -0700 +++ linux-2.6.25-rc8-mm2/fs/exec.c 2008-04-13 22:39:35.183249416 -0700 @@ -168,7 +168,7 @@ static struct page *get_arg_page(struct } #endif ret = get_user_pages(current, bprm->mm, pos, - 1, write, 1, &page, NULL); + 1, write, 1, 0, &page, NULL); if (ret <= 0) return NULL; Index: linux-2.6.25-rc8-mm2/fs/fuse/dev.c =================================================================== --- linux-2.6.25-rc8-mm2.orig/fs/fuse/dev.c 2008-04-13 22:39:26.535745212 -0700 +++ linux-2.6.25-rc8-mm2/fs/fuse/dev.c 2008-04-13 22:39:35.213249746 -0700 @@ -543,7 +543,7 @@ static int fuse_copy_fill(struct fuse_co } down_read(¤t->mm->mmap_sem); err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0, - &cs->pg, NULL); + 0, &cs->pg, NULL); up_read(¤t->mm->mmap_sem); if (err < 0) return err; Index: linux-2.6.25-rc8-mm2/fs/fuse/file.c =================================================================== --- linux-2.6.25-rc8-mm2.orig/fs/fuse/file.c 2008-04-13 22:39:26.545739498 -0700 +++ linux-2.6.25-rc8-mm2/fs/fuse/file.c 2008-04-13 22:39:35.285739347 -0700 @@ -931,7 +931,7 @@ static int fuse_get_user_pages(struct fu npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); down_read(¤t->mm->mmap_sem); npages = get_user_pages(current, current->mm, user_addr, npages, write, - 0, req->pages, NULL); + 0, 0, req->pages, NULL); up_read(¤t->mm->mmap_sem); if (npages < 0) return npages; Index: linux-2.6.25-rc8-mm2/fs/nfs/direct.c =================================================================== --- linux-2.6.25-rc8-mm2.orig/fs/nfs/direct.c 2008-04-13 22:39:26.555740341 -0700 +++ linux-2.6.25-rc8-mm2/fs/nfs/direct.c 2008-04-13 22:39:35.305750305 -0700 @@ -301,7 +301,7 @@ static ssize_t nfs_direct_read_schedule_ down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, - data->npages, 1, 0, data->pagevec, NULL); + data->npages, 1, 0, 0, data->pagevec, NULL); up_read(¤t->mm->mmap_sem); if (result < 0) { nfs_readdata_release(data); @@ -708,7 +708,7 @@ static ssize_t nfs_direct_write_schedule down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, - data->npages, 0, 0, data->pagevec, NULL); + data->npages, 0, 0, 0, data->pagevec, NULL); up_read(¤t->mm->mmap_sem); if (result < 0) { nfs_writedata_release(data); Index: linux-2.6.25-rc8-mm2/fs/proc/task_mmu.c =================================================================== --- linux-2.6.25-rc8-mm2.orig/fs/proc/task_mmu.c 2008-04-13 22:39:26.565740660 -0700 +++ linux-2.6.25-rc8-mm2/fs/proc/task_mmu.c 2008-04-13 22:39:35.325750214 -0700 @@ -654,7 +654,7 @@ static ssize_t pagemap_read(struct file down_read(¤t->mm->mmap_sem); ret = get_user_pages(current, current->mm, uaddr, pagecount, - 1, 0, pages, NULL); + 1, 0, 0, pages, NULL); up_read(¤t->mm->mmap_sem); if (ret < 0) Index: linux-2.6.25-rc8-mm2/fs/splice.c =================================================================== --- linux-2.6.25-rc8-mm2.orig/fs/splice.c 2008-04-13 22:39:26.575740207 -0700 +++ linux-2.6.25-rc8-mm2/fs/splice.c 2008-04-13 22:39:35.345750215 -0700 @@ -1238,9 +1238,8 @@ static int get_iovec_page_array(const st if (npages > PIPE_BUFFERS - buffers) npages = PIPE_BUFFERS - buffers; - error = get_user_pages(current, current->mm, - (unsigned long) base, npages, 0, 0, - &pages[buffers], NULL); + error = process_read((unsigned long) base, npages, PRM_PAGES, + &pages[buffers]); if (unlikely(error <= 0)) break; Index: linux-2.6.25-rc8-mm2/kernel/futex.c =================================================================== --- linux-2.6.25-rc8-mm2.orig/kernel/futex.c 2008-04-13 22:39:26.585739557 -0700 +++ linux-2.6.25-rc8-mm2/kernel/futex.c 2008-04-13 22:39:35.375749971 -0700 @@ -264,7 +264,7 @@ static int get_futex_key(u32 __user *uad * from swap. But that's a lot of code to duplicate here * for a rare case, so we simply fetch the page. */ - err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL); + err = process_read(address, 1, PRM_PAGES, &page); if (err >= 0) { key->shared.pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); Index: linux-2.6.25-rc8-mm2/virt/kvm/kvm_main.c =================================================================== --- linux-2.6.25-rc8-mm2.orig/virt/kvm/kvm_main.c 2008-04-13 22:39:26.595740636 -0700 +++ linux-2.6.25-rc8-mm2/virt/kvm/kvm_main.c 2008-04-13 22:39:35.395750239 -0700 @@ -472,8 +472,7 @@ struct page *gfn_to_page(struct kvm *kvm return bad_page; } - npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, - NULL); + npages = read_process(addr, 1, PRM_WRITE|PRM_FORCE|PRM_PAGES, page); if (npages != 1) { get_page(bad_page);