Index: linux-2.6.17-rc4-mm1/mm/migrate.c =================================================================== --- linux-2.6.17-rc4-mm1.orig/mm/migrate.c 2006-05-15 15:40:49.190141202 -0700 +++ linux-2.6.17-rc4-mm1/mm/migrate.c 2006-05-15 15:40:54.795261912 -0700 @@ -25,14 +25,41 @@ #include #include #include +#include #include "internal.h" /* The maximum number of pages to take off the LRU for migration */ -#define MIGRATE_CHUNK_SIZE 256 +#define MIGRATE_CHUNK_SIZE (PAGE_SIZE/sizeof(struct page *)) + +struct migration_info { + struct page *from; + struct page *to; + int status; +}; #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) +static int _isolate_lru_page(struct page *page) +{ + int ret = -EBUSY; + + if (PageLRU(page)) { + struct zone *zone = page_zone(page); + + spin_lock_irq(&zone->lru_lock); + if (PageLRU(page)) { + ret = 0; + ClearPageLRU(page); + if (PageActive(page)) + del_page_from_active_list(zone, page); + else + del_page_from_inactive_list(zone, page); + } + spin_unlock_irq(&zone->lru_lock); + } + return 0; +} /* * Isolate one page from the LRU lists. If successful put it onto * the indicated list with elevated page count. @@ -639,7 +666,7 @@ out: /* * migrate_pages * - * Two lists are passed to this function. The first list + * Two arrays are passed to this function. The first list * contains the pages isolated from the LRU to be migrated. * The second list contains new pages that the isolated pages * can be moved to. @@ -650,46 +677,49 @@ out: * * Return: Number of pages not migrated when "to" ran empty. */ -int migrate_pages(struct list_head *from, struct list_head *to) +static int migrate_pages(int nr, struct migration_info *m) { - int retry; + int retry = nr; int nr_failed = 0; - int pass = 0; - struct page *page; - struct page *page2; + struct migration_info *p; + int pass; int swapwrite = current->flags & PF_SWAPWRITE; - int rc; if (!swapwrite) current->flags |= PF_SWAPWRITE; -redo: - retry = 0; - - list_for_each_entry_safe(page, page2, from, lru) { + for(p = m; p < m + nr; p++) + p->status = -EAGAIN; - if (list_empty(to)) - break; - - cond_resched(); - - rc = unmap_and_move(lru_to_page(to), page, pass > 2); - - if (rc) { - if (rc == -EAGAIN) - retry++; - else - /* Permanent failure */ - nr_failed++; + for(pass = 0; pass <10 && retry; pass++) { + retry = 0; + for(p = m; p < m+ nr; p++) { + int rc; + if (p->status != -EAGAIN) + continue; + + rc = p->status = unmap_and_move(p->to, p->from, pass > 2); + + if (rc) { + if (rc == -EAGAIN) + retry++; + else + nr_failed++; + } + cond_resched(); } } - if (retry && pass++ < 10) - goto redo; + for(p = m; p < m + nr; p++) + if (p->status == -EAGAIN) { + move_to_lru(p->to); + move_to_lru(p->from); + nr_failed++; + } if (!swapwrite) current->flags &= ~PF_SWAPWRITE; - return nr_failed + retry; + return nr_failed; } /* @@ -701,55 +731,205 @@ redo: int migrate_pages_to(struct list_head *pagelist, struct vm_area_struct *vma, int dest) { - LIST_HEAD(newlist); int err = 0; - unsigned long offset = 0; - int nr_pages; int nr_failed = 0; - struct page *page; - struct list_head *p; - + int nr_pages = 0; + struct list_head *p, *p2; + struct migration_info *m = kmalloc(GFP_KERNEL, + MIGRATE_CHUNK_SIZE * sizeof(struct migration_info)); + + if (!m) { + putback_lru_pages(pagelist); + return -ENOMEM; + } redo: nr_pages = 0; - list_for_each(p, pagelist) { - if (vma) { - /* - * The address passed to alloc_page_vma is used to - * generate the proper interleave behavior. We fake - * the address here by an increasing offset in order - * to get the proper distribution of pages. - * - * No decision has been made as to which page - * a certain old page is moved to so we cannot - * specify the correct address. - */ + list_for_each_safe(p, p2, pagelist) { + struct page *oldpage, *page; + + oldpage = lru_to_page(p); + if (vma) page = alloc_page_vma(GFP_HIGHUSER, vma, - offset + vma->vm_start); - offset += PAGE_SIZE; - } + page_address_in_vma(oldpage, vma)); else page = alloc_pages_node(dest, GFP_HIGHUSER, 0); if (!page) { + putback_lru_pages(pagelist); + /* Migrate what we have */ + if (nr_pages) + migrate_pages(nr_pages, m); err = -ENOMEM; goto out; } - list_add_tail(&page->lru, &newlist); + m[nr_pages].to = page; + m[nr_pages].from = oldpage; nr_pages++; + list_del(&oldpage->lru); if (nr_pages > MIGRATE_CHUNK_SIZE) break; } - err = migrate_pages(pagelist, &newlist); + if (nr_pages) + err = migrate_pages(nr_pages, m); - if (err >= 0) { - nr_failed += err; - if (list_empty(&newlist) && !list_empty(pagelist)) - goto redo; - } + if (err < 0) + goto out; + + nr_failed += err; + if (!list_empty(pagelist)) + goto redo; + err = nr_failed; out: + kfree(m); + return err; +} - /* Calculate number of leftover pages */ - list_for_each(p, pagelist) - nr_failed++; - return nr_failed; +/* + * Move a list of pages in the address space of the currently executing + * process. + */ +asmlinkage long sys_move_pages(int pid, unsigned long nr_pages, + const unsigned long __user *pages, + const int __user *nodes, + int __user *status, int flags) +{ + int err = 0; + int i; + struct migration_info *m; + struct task_struct *task; + nodemask_t task_nodes; + struct mm_struct *mm; + + /* Check flags */ + if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) + return -EINVAL; + + if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE)) + return -EPERM; + + /* Find the mm_struct */ + read_lock(&tasklist_lock); + task = pid ? find_task_by_pid(pid) : current; + if (!task) { + read_unlock(&tasklist_lock); + return -ESRCH; + } + mm = get_task_mm(task); + read_unlock(&tasklist_lock); + + if (!mm) + return -EINVAL; + + /* + * Check if this process has the right to modify the specified + * process. The right exists if the process has administrative + * capabilities, superuser privileges or the same + * userid as the target process. + */ + if ((current->euid != task->suid) && (current->euid != task->uid) && + (current->uid != task->suid) && (current->uid != task->uid) && + !capable(CAP_SYS_NICE)) { + err = -EPERM; + goto out2; + } + + task_nodes = cpuset_mems_allowed(task); + + m = kmalloc(GFP_KERNEL, nr_pages * sizeof(struct migration_info)); + if (!m) + return -ENOMEM; + + down_read(&mm->mmap_sem); + + for(i = 0 ; i < nr_pages; i++) { + unsigned long addr; + struct vm_area_struct *vma; + struct page *page; + + err = -EFAULT; + if (get_user(addr, pages + i)) + goto restore_counts; + + vma = find_vma(mm, addr); + if (!vma) + goto restore_counts; + + page = follow_page(vma, addr, FOLL_GET); + if (!page) + goto restore_counts; + + if (page_mapcount(page) > 1 && !(flags & MPOL_MF_MOVE_ALL)) { + put_page(page); + err = -EPERM; + goto restore_counts; + } + + err = _isolate_lru_page(page); + if (err) + goto restore_counts; + + m[i].from = page; + } + + if (!nodes) { + for(i = 0; i < nr_pages; i++) { + struct page * page = m[i].from; + int node; + + node = page_to_nid(page); + put_user(node, status + i); + move_to_lru(page); + } + err = 0; + } else { + for(i = 0; i < nr_pages; i++) { + int node; + struct page *page; + + err = -EFAULT; + if (get_user(node, nodes + i)) + goto free_to_and_restore_counts; + + err = -ENOENT; + if (!node_online(node)) + goto restore_counts; + + err = -EPERM; + if (!node_isset(node, task_nodes)) + goto restore_counts; + + err = -ENOMEM; + page = alloc_pages_node(node, GFP_HIGHUSER, 0); + if (!page) + goto free_to_and_restore_counts; + + m[i].to = page; + } + + err = migrate_pages(nr_pages, m); + + for(i = 0; i < nr_pages; i++) { + if (put_user(m[i].status, status + i)) { + err = -EFAULT; + goto out; + } + } + } + goto out; + +free_to_and_restore_counts: + while (--i >= 0) + free_cold_page(m[i].to); + + i = nr_pages; +restore_counts: + while (--i >= 0) + move_to_lru(m[i].from); + +out: + up_read(&mm->mmap_sem); + kfree(m); +out2: + mmput(mm); + return err; } Index: linux-2.6.17-rc4-mm1/include/linux/migrate.h =================================================================== --- linux-2.6.17-rc4-mm1.orig/include/linux/migrate.h 2006-05-15 15:40:49.266308347 -0700 +++ linux-2.6.17-rc4-mm1/include/linux/migrate.h 2006-05-15 15:40:54.797214916 -0700 @@ -8,7 +8,6 @@ extern int isolate_lru_page(struct page extern int putback_lru_pages(struct list_head *l); extern int migrate_page(struct address_space *, struct page *, struct page *); -extern int migrate_pages(struct list_head *l, struct list_head *t); extern int migrate_pages_to(struct list_head *pagelist, struct vm_area_struct *vma, int dest); extern int fail_migrate_page(struct address_space *, @@ -21,8 +20,6 @@ extern int migrate_prep(void); static inline int isolate_lru_page(struct page *p, struct list_head *list) { return -ENOSYS; } static inline int putback_lru_pages(struct list_head *l) { return 0; } -static inline int migrate_pages(struct list_head *l, struct list_head *t) - { return -ENOSYS; } static inline int migrate_pages_to(struct list_head *pagelist, struct vm_area_struct *vma, int dest) { return 0; }