Add direct migration support with fall back to swap. Direct migration support on top of the swap based page migration facility. This allows the migration of anonymous pages and the migration of file backed pages by dropping the associated buffers (requires writeout). Fall back to swap out if necessary. Based on lots of patches from the hotplug project. Signed-off-by: Mike Kravetz Signed-off-by: Christoph Lameter Index: linux-2.6.14-rc5-mm1/mm/vmscan.c =================================================================== --- linux-2.6.14-rc5-mm1.orig/mm/vmscan.c 2005-11-02 13:36:10.000000000 -0800 +++ linux-2.6.14-rc5-mm1/mm/vmscan.c 2005-11-02 13:51:39.000000000 -0800 @@ -583,54 +583,218 @@ keep: */ static int swap_page(struct page *page) { + int rc = -EAGAIN; + struct address_space *mapping = page_mapping(page); if (page_mapped(page) && mapping) if (try_to_unmap(page) != SWAP_SUCCESS) - goto unlock_retry; + goto out; if (PageDirty(page)) { /* Page is dirty, try to write it out here */ switch(pageout(page, mapping)) { case PAGE_KEEP: case PAGE_ACTIVATE: - goto unlock_retry; + goto out; + case PAGE_SUCCESS: - goto retry; + return -EAGAIN; + case PAGE_CLEAN: ; /* try to free the page below */ } } if (PagePrivate(page)) { - if (!try_to_release_page(page, GFP_KERNEL)) - goto unlock_retry; - if (!mapping && page_count(page) == 1) - goto free_it; + if (!try_to_release_page(page, GFP_KERNEL) || + (!mapping && page_count(page) == 1)) { + rc = -ENOENT; + goto out; + } } - if (!remove_mapping(mapping, page)) - goto unlock_retry; /* truncate got there first */ + if (remove_mapping(mapping, page)) + rc = 0; +out: + unlock_page(page); + return rc; +} + +static inline void move_to_lru(struct page *page) +{ + list_del(&page->lru); + if (PageActive(page)) { + /* + * lru_cache_add_active checks that + * the PG_active bit is off. + */ + ClearPageActive(page); + lru_cache_add_active(page); + } else + lru_cache_add(page); + put_page(page); +} + +/* + * Page migration was developed in the context of the memory hotplug project. + * The main authors of the migration code are: + * + * IWAMOTO Toshihiro + * Hirokazu Takahashi + * Dave Hansen + * Christoph Lameter + */ + +/* + * Remove references for a page and establish the new page with the correct + * basic settings to be able to stop accesses to the page. + */ +int migrate_page_remove_references(struct page *newpage, struct page *page, int nr_refs) +{ + struct address_space *mapping = page_mapping(page); + struct page *oldpage; + + BUG_ON(!mapping); /* must have added this page to swap so mapping must exist */ + + /* Bail out if there are other users of the page */ + if (page_mapcount(page) + nr_refs != page_count(page)) + return -EAGAIN; + + get_page(newpage); + read_lock_irq(&mapping->tree_lock); + + /* + * Certain minimal information about a page must be available + * in order for other subsystems to properly handle the page if they find + * it through some of the links that we soon will establish. + */ + newpage->index = page_index(page); + if (PageSwapCache(page)) { + SetPageSwapCache(newpage); + set_page_private(newpage, page_private(page)); + } else + newpage->mapping = page->mapping; + + oldpage = radix_tree_replace(&mapping->page_tree, page_index(page), newpage); + read_unlock_irq(&mapping->tree_lock); + + if (!oldpage) { + /* + * Replace failed because truncation is in progress + */ + ClearPageSwapCache(newpage); + set_page_private(newpage, 0); + newpage->mapping = NULL; + newpage->index = 0; + + __put_page(newpage); + return -ENOENT; + } + + __put_page(page); /* mapping removed from page */ -free_it: /* - * We may free pages that were taken off the active list - * by isolate_lru_page. However, free_hot_cold_page will check - * if the active bit is set. So clear it. + * We are now in the critical section where there is no easy way + * out since other processes accessing newpage may have followed + * the mapping that we have exstablished above. We need to succeed! */ + while (page_mapped(page)) { + int rc = try_to_unmap(page); + + if (rc == SWAP_SUCCESS) + break; + /* + * If there are other runnable processes then running + * them may make it possible to unmap the page + */ + schedule(); + + /* + * A really unswappable page should not occur here + * since we should have checked for the page + * not being in a vma that is unswappable + * before entering this function. + * + * Currently we will simply hang if such a page + * is encountered here. + */ + } + + return 0; +} + +/* + * Copy the page to its new location + */ +void migrate_page_copy(struct page *newpage, struct page *page) +{ + + copy_highpage(newpage, page); + + if (PageError(page)) + SetPageError(newpage); + if (PageReferenced(page)) + SetPageReferenced(newpage); + if (PageUptodate(page)) + SetPageUptodate(newpage); + if (PageActive(page)) + SetPageActive(newpage); + if (PageChecked(page)) + SetPageChecked(newpage); + if (PageMappedToDisk(page)) + SetPageMappedToDisk(newpage); + + if (PageDirty(page)) { + clear_page_dirty_for_io(page); + set_page_dirty(newpage); + } + + /* + * Make the old page a zombie page + */ + ClearPageSwapCache(page); ClearPageActive(page); + ClearPagePrivate(page); + set_page_private(page, 0); + page->mapping = NULL; - list_del(&page->lru); - unlock_page(page); - put_page(page); - return 0; + /* + * If any waiters have accumulated on the new page then + * wake them up. + */ + if (PageWriteback(newpage)) + end_page_writeback(newpage); +} -unlock_retry: - unlock_page(page); +/* + * Common logic to directly migrate a single page suitable for + * anonymous pages and file mapped pages that do not use page private. + * + * Pages are locked upon entry and exit. + * + * It has been verified that the page is not + * 1. Undergoing writeback. + * 2. Having any additional references besides the radix tree, + * page tables and the reference from isolate_lru_page(). + * 3. Part of a vma that is not swappable + */ +int migrate_page(struct page *newpage, struct page *page) +{ + int rc; + + BUG_ON(PageWriteback(page)); /* Writeback must be complete */ + + rc = migrate_page_remove_references(newpage, page, 2); -retry: - return 1; + if (rc) + return rc; + + migrate_page_copy(newpage, page); + + return 0; } + /* * migrate_pages * @@ -643,11 +807,6 @@ retry: * The function returns after 10 attempts or if no pages * are movable anymore because t has become empty * or no retryable pages exist anymore. - * - * SIMPLIFIED VERSION: This implementation of migrate_pages - * is only swapping out pages and never touches the second - * list. The direct migration patchset - * extends this function to avoid the use of swap. */ int migrate_pages(struct list_head *l, struct list_head *t) { @@ -658,6 +817,7 @@ int migrate_pages(struct list_head *l, s struct page *page; struct page *page2; int swapwrite = current->flags & PF_SWAPWRITE; + int rc = 0; if (!swapwrite) current->flags |= PF_SWAPWRITE; @@ -666,18 +826,25 @@ redo: retry = 0; list_for_each_entry_safe(page, page2, l, lru) { + struct page *newpage = NULL; + struct address_space *mapping; + cond_resched(); + if (t && list_empty(t)) + break; + /* * Skip locked pages during the first two passes to give the * functions holding the lock time to release the page. Later we use * lock_page to have a higher chance of acquiring the lock. */ + rc = -EAGAIN; if (pass > 2) lock_page(page); else if (TestSetPageLocked(page)) - goto retry_later; + goto next; /* * Only wait on writeback if we have already done a pass where @@ -686,12 +853,15 @@ redo: if (pass > 0) wait_on_page_writeback(page); else - if (PageWriteback(page)) { - unlock_page(page); - goto retry_later; - } + if (PageWriteback(page)) + goto unlock_page; #ifdef CONFIG_SWAP + /* + * Anonymous pages must have swap cache references otherwise + * the information contained in the page maps cannot be + * preserved. + */ if (PageAnon(page) && !PageSwapCache(page)) { if (!add_to_swap(page)) { unlock_page(page); @@ -702,13 +872,67 @@ redo: } #endif /* CONFIG_SWAP */ + if (!t) { + rc = swap_page(page); + goto next; + } + + newpage = lru_to_page(t); + lock_page(newpage); + /* * Page is properly locked and writeback is complete. * Try to migrate the page. */ - if (swap_page(page)) { -retry_later: + mapping = page_mapping(page); + + /* + * If the page is dirty then try_to_release_page + * will fail. swap_page() will then trigger writeout. + * + * If we can release the buffers then do a simple + * migrate. + */ + if (try_to_release_page(page, GFP_KERNEL)) { + rc = migrate_page(newpage, page); + goto unlock_both; + } + + /* + * On early passes with mapped pages simply + * retry. There may be a lock held for some + * buffers that may go away later. Later + * swap them out. + */ + if (pass > 2) { + unlock_page(newpage); + newpage = NULL; + rc = swap_page(page); + goto next; + } + +unlock_both: + unlock_page(newpage); + +unlock_page: + unlock_page(page); + +next: + if (rc == -EAGAIN) retry++; + + else if (rc == -ENOENT) { + /* Page vanished under us */ + move_to_lru(page); + + } else if (rc) { + /* Failed */ + list_move(&page->lru, &failed); + nr_failed++; + } + else if (newpage) { + /* Success */ + move_to_lru(newpage); } } if (retry && pass++ < 10) @@ -869,21 +1093,6 @@ done: pagevec_release(&pvec); } -static inline void move_to_lru(struct page *page) -{ - list_del(&page->lru); - if (PageActive(page)) { - /* - * lru_cache_add_active checks that - * the PG_active bit is off. - */ - ClearPageActive(page); - lru_cache_add_active(page); - } else - lru_cache_add(page); - put_page(page); -} - /* * Add isolated pages on the list back to the LRU * Index: linux-2.6.14-rc5-mm1/include/linux/swap.h =================================================================== --- linux-2.6.14-rc5-mm1.orig/include/linux/swap.h 2005-11-02 13:36:10.000000000 -0800 +++ linux-2.6.14-rc5-mm1/include/linux/swap.h 2005-11-02 13:44:45.000000000 -0800 @@ -181,6 +181,10 @@ extern int putback_lru_pages(struct list #ifdef CONFIG_MIGRATION extern int migrate_pages(struct list_head *l, struct list_head *t); + +extern int migrate_page(struct page *, struct page *); +extern int migrate_page_remove_references(struct page *, struct page *, int); +extern void migrate_page_copy(struct page *, struct page *); #endif #ifdef CONFIG_MMU