page migration: arrays as parameters to migrate_pages() Make migrate_pages() operate on arrays instead of lists. That allows a 1-1 association between entries in each. We know then exactly which page will be migrated to which new page. Use that knowledge to fix the interleave allocation in migrate_pages_to(). Signed-off-by: Christoph Lameter Index: linux-2.6.17-rc3-mm1/mm/migrate.c =================================================================== --- linux-2.6.17-rc3-mm1.orig/mm/migrate.c 2006-05-08 00:27:44.468409839 -0700 +++ linux-2.6.17-rc3-mm1/mm/migrate.c 2006-05-08 00:27:50.723881703 -0700 @@ -29,7 +29,7 @@ #include "internal.h" /* The maximum number of pages to take off the LRU for migration */ -#define MIGRATE_CHUNK_SIZE 256 +#define MIGRATE_CHUNK_SIZE (PAGE_SIZE/sizeof(void *)) #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) @@ -619,74 +619,71 @@ static int unmap_and_move(struct page *n /* * migrate_pages * - * Two lists are passed to this function. The first list + * Two arrays are passed to this function. The first array * contains the pages isolated from the LRU to be migrated. - * The second list contains new pages that the isolated pages + * The second array contains new pages that the isolated pages * can be moved to. * * The function returns after 10 attempts or if no pages - * are movable anymore because to has become empty - * or no retryable pages exist anymore. + * are movable anymore. * - * Return: Number of pages not migrated when "to" ran empty. + * Return: Number of pages not migrated. */ -int migrate_pages(struct list_head *from, struct list_head *to, - struct list_head *moved, struct list_head *failed) +int migrate_pages(int nr, struct page **from, struct page **to, int *status) { - int retry; + int retry = 0; int nr_failed = 0; int pass = 0; struct page *page; - struct page *page2; int swapwrite = current->flags & PF_SWAPWRITE; int rc; + int i; if (!swapwrite) current->flags |= PF_SWAPWRITE; -redo: - retry = 0; - - list_for_each_entry_safe(page, page2, from, lru) { - - if (list_empty(to)) - break; + memset(status, -EAGAIN, nr*sizeof(int)); - cond_resched(); + for(pass = 0; pass < 10 && retry; pass++) { + retry = 0; + for(i = 0; i < nr; i++) { + if (status[i] != -EAGAIN) + continue; + + page = from[i]; + + if (page_count(page) == 1) + /* page was freed from under us. */ + rc = 0; + else { + /* + * Wait for the page to become unlocked if we + * have already done a couple of passes. + */ + if (pass > 2) + wait_on_page_locked(page); + /* + * Only wait on writeback if we have already + * done a pass where we may have triggered + * writeouts for lots of pages. + */ + if (pass > 0) + wait_on_page_writeback(page); - if (page_count(page) == 1) - /* page was freed from under us. So we are done. */ - rc = 0; - else { - /* - * Wait for the page to become unlocked if we have already done - * a couple of passes. - */ - if (pass > 2) - wait_on_page_locked(page); - /* - * Only wait on writeback if we have already done a pass where - * we we may have triggered writeouts for lots of pages. - */ - if (pass > 0) - wait_on_page_writeback(page); + rc = unmap_and_move(to[i], page, + pass > 5); + } - rc = unmap_and_move(lru_to_page(to), page, pass > 5); + if (rc) { + if (rc == -EAGAIN) + retry++; + else + nr_failed++; + } } - if (rc) { - if (rc == -EAGAIN) - retry++; - else { - /* Permanent failure */ - list_move(&page->lru, failed); - nr_failed++; - } - } else - list_move(&page->lru, moved); + cond_resched(); } - if (retry && pass++ < 10) - goto redo; if (!swapwrite) current->flags &= ~PF_SWAPWRITE; @@ -703,65 +700,68 @@ redo: int migrate_pages_to(struct list_head *pagelist, struct vm_area_struct *vma, int dest) { - LIST_HEAD(newlist); - LIST_HEAD(moved); - LIST_HEAD(failed); int err = 0; - unsigned long offset = 0; - int nr_pages; - struct page *page; - struct list_head *p; + int nr_failed = 0; + struct page**from = (void *)get_zeroed_page(GFP_KERNEL); + struct page**to = (void *)get_zeroed_page(GFP_KERNEL); + int *status = (void *)get_zeroed_page(GFP_KERNEL); + + if (!from || !to || !status) + goto err_out; + while (!list_empty(pagelist) && err >=0) { + int nr_pages = 0; + int i; + struct list_head *p, *p2; + + /* Build array of pages to migrate */ + list_for_each_safe(p, p2, pagelist) { + struct page *newpage, *oldpage; + + oldpage = lru_to_page(p); + + if (vma) + newpage = alloc_page_vma(GFP_HIGHUSER, vma, + page_address_in_vma(oldpage, vma)); + else + newpage = alloc_pages_node(dest, + GFP_HIGHUSER, 0); -redo: - nr_pages = 0; - list_for_each(p, pagelist) { - if (vma) { - /* - * The address passed to alloc_page_vma is used to - * generate the proper interleave behavior. We fake - * the address here by an increasing offset in order - * to get the proper distribution of pages. - * - * No decision has been made as to which page - * a certain old page is moved to so we cannot - * specify the correct address. - */ - page = alloc_page_vma(GFP_HIGHUSER, vma, - offset + vma->vm_start); - offset += PAGE_SIZE; - } - else - page = alloc_pages_node(dest, GFP_HIGHUSER, 0); + if (newpage) { + from[nr_pages] = oldpage; + to[nr_pages++] = newpage; + list_del(&oldpage->lru); + } - if (!page) { - err = -ENOMEM; - goto out; + if (!newpage || nr_pages > MIGRATE_CHUNK_SIZE) + break; } - list_add_tail(&page->lru, &newlist); - nr_pages++; - if (nr_pages > MIGRATE_CHUNK_SIZE) - break; - } - err = migrate_pages(pagelist, &newlist, &moved, &failed); - putback_lru_pages(&moved); /* Call release pages instead ?? */ + if (nr_pages) { + /* Perform the actual migration */ + err = migrate_pages(nr_pages, from, to, status); + for(i = 0; i < nr_pages; i++) { + if (status[i]) + /* New page was not used */ + put_page(to[i]); + move_to_lru(from[i]); + } - if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist)) - goto redo; -out: - /* Return leftover allocated pages */ - while (!list_empty(&newlist)) { - page = list_entry(newlist.next, struct page, lru); - list_del(&page->lru); - __free_page(page); + if (err < 0) + goto err_out; + + nr_failed += err; + } } - list_splice(&failed, pagelist); - if (err < 0) - return err; - - /* Calculate number of leftover pages */ - nr_pages = 0; - list_for_each(p, pagelist) - nr_pages++; - return nr_pages; + err = nr_failed; +out: + free_page((unsigned long)from); + free_page((unsigned long)to); + free_page((unsigned long)status); + return err; + +err_out: + putback_lru_pages(pagelist); + err = -ENOMEM; + goto out; } + Index: linux-2.6.17-rc3-mm1/include/linux/migrate.h =================================================================== --- linux-2.6.17-rc3-mm1.orig/include/linux/migrate.h 2006-05-08 00:26:36.822209687 -0700 +++ linux-2.6.17-rc3-mm1/include/linux/migrate.h 2006-05-08 00:27:50.725834707 -0700 @@ -9,8 +9,8 @@ extern int isolate_lru_page(struct page extern int putback_lru_pages(struct list_head *l); extern int migrate_page(struct address_space *, struct page *, struct page *); -extern int migrate_pages(struct list_head *l, struct list_head *t, - struct list_head *moved, struct list_head *failed); +extern int migrate_pages(int, struct page **, struct page **, + int *status); extern int migrate_pages_to(struct list_head *pagelist, struct vm_area_struct *vma, int dest); extern int fail_migrate_page(struct address_space *, @@ -23,8 +23,8 @@ extern int migrate_prep(void); static inline int isolate_lru_page(struct page *p, struct list_head *list) { return -ENOSYS; } static inline int putback_lru_pages(struct list_head *l) { return 0; } -static inline int migrate_pages(struct list_head *l, struct list_head *t, - struct list_head *moved, struct list_head *failed) { return -ENOSYS; } +static inline int migrate_pages(int, struct page **, struct page **, + char status[]) { return -ENOSYS; } static inline int migrate_pages_to(struct list_head *pagelist, struct vm_area_struct *vma, int dest) { return 0; }