Index: linux-2.6.17-rc3-mm1/mm/migrate.c =================================================================== --- linux-2.6.17-rc3-mm1.orig/mm/migrate.c 2006-05-10 18:45:15.271441876 -0700 +++ linux-2.6.17-rc3-mm1/mm/migrate.c 2006-05-10 23:54:17.867254123 -0700 @@ -29,7 +29,13 @@ #include "internal.h" /* The maximum number of pages to take off the LRU for migration */ -#define MIGRATE_CHUNK_SIZE 256 +#define MIGRATE_CHUNK_SIZE (PAGE_SIZE/sizeof(struct page *)) + +struct migration_info { + struct page *from[MIGRATE_CHUNK_SIZE]; + struct page *to[MIGRATE_CHUNK_SIZE]; + int status[MIGRATE_CHUNK_SIZE]; +}; #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) @@ -639,7 +645,7 @@ out: /* * migrate_pages * - * Two lists are passed to this function. The first list + * Two arrays are passed to this function. The first list * contains the pages isolated from the LRU to be migrated. * The second list contains new pages that the isolated pages * can be moved to. @@ -650,46 +656,47 @@ out: * * Return: Number of pages not migrated when "to" ran empty. */ -int migrate_pages(struct list_head *from, struct list_head *to) +int migrate_pages(int nr, struct page **from, struct page **to, int *status) { - int retry; + int retry = nr; int nr_failed = 0; - int pass = 0; - struct page *page; - struct page *page2; + int i; + int pass; int swapwrite = current->flags & PF_SWAPWRITE; - int rc; if (!swapwrite) current->flags |= PF_SWAPWRITE; -redo: - retry = 0; - - list_for_each_entry_safe(page, page2, from, lru) { - - if (list_empty(to)) - break; - - cond_resched(); - - rc = unmap_and_move(lru_to_page(to), page, pass > 2); - - if (rc) { - if (rc == -EAGAIN) - retry++; - else - /* Permanent failure */ - nr_failed++; + memset(status, -EAGAIN, nr*sizeof(int)); + for(pass = 0; pass <10 && retry; pass++) { + retry = 0; + for(i = 0; i < nr; i++) { + int rc; + if (status[i] != -EAGAIN) + continue; + + rc = status[i] = unmap_and_move(to[i],from[i], pass > 2); + + if (rc) { + if (rc == -EAGAIN) + retry++; + else + nr_failed++; + } + cond_resched(); } } - if (retry && pass++ < 10) - goto redo; + for(i = 0; i < nr;i++) + if (status[i] == -EAGAIN) { + move_to_lru(to[i]); + move_to_lru(from[i]); + nr_failed++; + } if (!swapwrite) current->flags &= ~PF_SWAPWRITE; - return nr_failed + retry; + return nr_failed; } /* @@ -701,55 +708,126 @@ redo: int migrate_pages_to(struct list_head *pagelist, struct vm_area_struct *vma, int dest) { - LIST_HEAD(newlist); int err = 0; - unsigned long offset = 0; - int nr_pages; int nr_failed = 0; - struct page *page; - struct list_head *p; - + int nr_pages = 0; + struct list_head *p, *p2; + struct migration_info *m = kmalloc(GFP_KERNEL, + sizeof(struct migration_info)); + + if (!m) { + putback_lru_pages(pagelist); + return -ENOMEM; + } redo: nr_pages = 0; - list_for_each(p, pagelist) { - if (vma) { - /* - * The address passed to alloc_page_vma is used to - * generate the proper interleave behavior. We fake - * the address here by an increasing offset in order - * to get the proper distribution of pages. - * - * No decision has been made as to which page - * a certain old page is moved to so we cannot - * specify the correct address. - */ + list_for_each_safe(p, p2, pagelist) { + struct page *oldpage, *page; + + oldpage = lru_to_page(p); + if (vma) page = alloc_page_vma(GFP_HIGHUSER, vma, - offset + vma->vm_start); - offset += PAGE_SIZE; - } + page_address_in_vma(oldpage, vma)); else page = alloc_pages_node(dest, GFP_HIGHUSER, 0); if (!page) { + putback_lru_pages(pagelist); + /* Migrate what we have */ + if (nr_pages) + migrate_pages(nr_pages, m->from, m->to, m->status); err = -ENOMEM; goto out; } - list_add_tail(&page->lru, &newlist); - nr_pages++; + m->to[nr_pages] = page; + m->from[nr_pages++] = oldpage; + list_del(&oldpage->lru); if (nr_pages > MIGRATE_CHUNK_SIZE) break; } - err = migrate_pages(pagelist, &newlist); + if (nr_pages) + err = migrate_pages(nr_pages, m->from, m->to, m->status); + + if (err < 0) + goto out; - if (err >= 0) { - nr_failed += err; - if (list_empty(&newlist) && !list_empty(pagelist)) - goto redo; + nr_failed += err; + if (!list_empty(pagelist)) + goto redo; + err = nr_failed; +out: + kfree(m); + return err; +} + +/* + * Move a list of pages in the address space of the currently executing + * process. + */ +asmlinkage long sys_move_pages(unsigned long nr_pages, unsigned long __user *pages, + int __user *nodes, int flags) +{ + int err = 0; + struct migration_info *m; + + /* Check flags */ + if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) + return -EINVAL; + + if ((flags & MPOL_MF_MOVE_ALL) && !cap(SYS_NICE)) + return -EPERM; + + m = kmalloc(GFP_KERNEL, sizeof(struct migration_info)); + if (!m) + return -ENOMEM; + + for(i = 0 ; i < nr_pages; i++) { + unsigned long addr; + struct vm_area_struct *vma; + struct page *page; + + if (nodes && !nodeonline(nodes[i])) { + err = -ENOTFOUND; + goto out; + } + + if (get_user(addr, pages +i)) + return -EFAULT; + + vma = find_vma(current->mm, addr); + if (!vma) + return -ENOADDR; + + page = follow_page(vma, addr, FOLL_GET); + if (!page) + err = -ENOSYS; + goto out; + } + + if (mapcount(from[i]) > 1 && !(flags & MPOL_MF_MOVE_ALL)) { + err = -EPERM; + goto out; + } + } + + for(i = 0; i < nr_pages; i++) + if (nodes) { + to[i] = alloc_pages_node(nodes[i], GFP_HIGHUSER, 0); + if (!to[i]) { + err = -ENOMEM; + goto out; + } + } else + nodes[i] = page_to_nid(from[i]); } + if (nodes) + err = migrate_pages(nr_pages, from, to, status); + else + /* Putback pages onto LRU */ + + /* Push status array to user space */ out: + kfree(m); + return err; - /* Calculate number of leftover pages */ - list_for_each(p, pagelist) - nr_failed++; - return nr_failed; } Index: linux-2.6.17-rc3-mm1/include/linux/migrate.h =================================================================== --- linux-2.6.17-rc3-mm1.orig/include/linux/migrate.h 2006-05-10 18:11:39.714737247 -0700 +++ linux-2.6.17-rc3-mm1/include/linux/migrate.h 2006-05-10 18:45:22.150898137 -0700 @@ -9,7 +9,7 @@ extern int isolate_lru_page(struct page extern int putback_lru_pages(struct list_head *l); extern int migrate_page(struct address_space *, struct page *, struct page *); -extern int migrate_pages(struct list_head *l, struct list_head *t); +extern int migrate_pages(int nr, struct page **, struct page **, int *); extern int migrate_pages_to(struct list_head *pagelist, struct vm_area_struct *vma, int dest); extern int fail_migrate_page(struct address_space *, @@ -22,8 +22,8 @@ extern int migrate_prep(void); static inline int isolate_lru_page(struct page *p, struct list_head *list) { return -ENOSYS; } static inline int putback_lru_pages(struct list_head *l) { return 0; } -static inline int migrate_pages(struct list_head *l, struct list_head *t) - { return -ENOSYS; } +static inline int migrate_pages(int nr, struct page **from, struct page **to, + int *status) { return -ENOSYS; } static inline int migrate_pages_to(struct list_head *pagelist, struct vm_area_struct *vma, int dest) { return 0; }