Index: linux-2.6.14-rc4-mm1/mm/mempolicy.c =================================================================== --- linux-2.6.14-rc4-mm1.orig/mm/mempolicy.c 2005-10-21 09:35:40.000000000 -0700 +++ linux-2.6.14-rc4-mm1/mm/mempolicy.c 2005-10-21 13:09:12.000000000 -0700 @@ -234,6 +234,45 @@ static void migrate_page_add(struct vm_a } } +/* + * Migrate a list of pages to a certain destination. + * + * return the number of pages not migrated or error code + */ +static int migrate_pages_to(struct list_head *l, int node) +{ + LIST_HEAD(newlist); + int err = 0; + int count; + struct page *page; + struct list_head *p; + + list_for_each(p, l) { + if (node == -1) + page = alloc_page(GFP_USER); + else + page = alloc_pages_node(node, GFP_USER, 0); + + if (!page) { + err = -ENOMEM; + goto out; + } + list_add(&page->lru, &newlist); + } + migrate_pages(l, &newlist); +out: + count = 0; + while (!list_empty(l)) { + page =list_entry(l->next, struct page, lru); + __free_page(page); + list_del(&page->lru); + count++; + } + if (err) + return err; + return count; +} + /* Ensure all existing pages follow the policy. */ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, @@ -496,7 +535,7 @@ long do_mbind(unsigned long start, unsig if (!IS_ERR(vma)) { err = mbind_range(vma, start, end, new); if (!list_empty(&pagelist)) - swapout_pages(&pagelist); + migrate_pages_to(&pagelist, -1); if (!err && !list_empty(&pagelist) && (flags & MPOL_MF_STRICT)) err = -EIO; } @@ -632,6 +671,29 @@ long do_get_mempolicy(int *policy, nodem } /* + * Migrate pages from one node to a target node. + * Returns error or the number of pages not migrated. + */ +int migrate_node(struct mm_struct *mm, int source, int dest, int flags) +{ + nodemask_t nodes; + LIST_HEAD(pagelist); + int err = 0; + + nodes_setall(nodes); + node_clear(source, nodes); + + check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes, + flags | MPOL_MF_DISCONTIG_OK, &pagelist); + if (!list_empty(&pagelist)) { + err = migrate_pages_to(&pagelist, dest); + if (!list_empty(&pagelist)) + putback_lru_pages(&pagelist); + } + return err; +} + +/* * For now migrate_pages simply swaps out the pages from nodes that are in * the source set but not in the target set. In the future, we would * want a function that moves pages between the two nodesets in such @@ -643,22 +705,52 @@ int do_migrate_pages(struct mm_struct *m nodemask_t *from_nodes, nodemask_t *to_nodes, int flags) { LIST_HEAD(pagelist); - int count = 0; - nodemask_t nodes; - - nodes_andnot(nodes, *from_nodes, *to_nodes); - nodes_complement(nodes, nodes); + int err = 0; + int count; + int node; + int tnodes = nodes_weight(*to_nodes); + int targets[tnodes]; + + count = 0; + for_each_node_mask(node, *to_nodes) + targets[count++] = node; down_read(&mm->mmap_sem); - check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes, - flags | MPOL_MF_DISCONTIG_OK, &pagelist); - if (!list_empty(&pagelist)) { - swapout_pages(&pagelist); - if (!list_empty(&pagelist)) - count = putback_lru_pages(&pagelist); + + /* + * Migration needs to happen in such a way that we + * do not migrate too many pages intermittendly on one + * node. + */ + if (first_node(*from_nodes) < first_node(*to_nodes)) { + /* Walk backward through the source nodelist */ + count = tnodes - 1; + + for (node = MAX_NUMNODES-1; node >= 0; node--) + if (node_isset(node, *from_nodes)) { + err = migrate_node(mm, node, targets[count], flags); + if (err) + goto out; + + if (count >0) + count--; + else + count = tnodes-1; + } + } else { + /* Walk forward through the source nodelist */ + count = 0; + for_each_node_mask(node, *from_nodes) { + err = migrate_node(mm, node, targets[count % tnodes], flags); + if (err) + goto out; + + count++; + } } +out: up_read(&mm->mmap_sem); - return count; + return err; } /* Index: linux-2.6.14-rc4-mm1/include/linux/swap.h =================================================================== --- linux-2.6.14-rc4-mm1.orig/include/linux/swap.h 2005-10-20 13:20:53.000000000 -0700 +++ linux-2.6.14-rc4-mm1/include/linux/swap.h 2005-10-21 10:27:41.000000000 -0700 @@ -180,6 +180,7 @@ extern int isolate_lru_page(struct page extern int putback_lru_pages(struct list_head *l); extern int swapout_pages(struct list_head *l); +extern int migrate_pages(struct list_head *source, struct list_head *target); #ifdef CONFIG_MMU /* linux/mm/shmem.c */