Any pages on the LRU lists can be migrated with this patch. Pages on the page-cache, on the swap-cache and anonymous pages are handled in the same way. Signed-off-by: Hirokazu Takahashi Signed-off-by: Dave Hansen --- memhotplug-dave/include/linux/mmigrate.h | 11 memhotplug-dave/mm/Makefile | 1 memhotplug-dave/mm/mmigrate.c | 370 +++++++++++++++++++++++++++++++ 3 files changed, 382 insertions(+) diff -puN /dev/null include/linux/mmigrate.h --- /dev/null 2005-03-30 22:36:15.000000000 -0800 +++ memhotplug-dave/include/linux/mmigrate.h 2005-07-28 13:50:34.000000000 -0700 @@ -0,0 +1,11 @@ +#ifndef _LINUX_MEMHOTPLUG_H +#define _LINUX_MEMHOTPLUG_H + +#include +#include + + +extern struct page * migrate_onepage(struct page *); +extern int try_to_migrate_pages(struct list_head *); + +#endif /* _LINUX_MEMHOTPLUG_H */ diff -puN mm/Makefile~AA-PM-07-memory_migration mm/Makefile --- memhotplug/mm/Makefile~AA-PM-07-memory_migration 2005-07-28 13:50:34.000000000 -0700 +++ memhotplug-dave/mm/Makefile 2005-07-28 13:50:34.000000000 -0700 @@ -19,4 +19,5 @@ obj-$(CONFIG_SPARSEMEM) += sparse.o obj-$(CONFIG_SHMEM) += shmem.o obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o +obj-$(CONFIG_MEMORY_MIGRATE) += mmigrate.o obj-$(CONFIG_FS_XIP) += filemap_xip.o diff -puN /dev/null mm/mmigrate.c --- /dev/null 2005-03-30 22:36:15.000000000 -0800 +++ memhotplug-dave/mm/mmigrate.c 2005-07-28 13:50:34.000000000 -0700 @@ -0,0 +1,370 @@ +/* + * linux/mm/mmigrate.c + * + * Support of memory hotplug + * + * Authors: IWAMOTO Toshihiro + * Hirokazu Takahashi + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * The concept of memory migration is to replace a target page with + * a substitute page on a radix tree. New requests to access the target + * - including system calls and page faults - are redirected to the + * substitute that is locked and not up-to-date, so that all of these + * requests are blocked until the migration has done. Data of the target + * is copied into the substitute and then the requests are unblocked + * after all operations against the target have finished. + * + * By this approach, regular pages in the swapcache/pagecache and + * hugetlbpages can be handled in the same way. + */ + + +/* + * Try to writeback a dirty page to free its buffers. + */ +static int +writeback_and_free_buffers(struct page *page) +{ + struct address_space *mapping = page_mapping(page); + + BUG_ON(!PageLocked(page)); + wait_on_page_writeback(page); + if (!PagePrivate(page)) + return 0; + + if (PageDirty(page)) { + switch(pageout(page, mapping)) { + case PAGE_ACTIVATE: + return -1; + case PAGE_SUCCESS: + lock_page(page); + return 1; + case PAGE_KEEP: + case PAGE_CLEAN: + break; + } + } + if (try_to_release_page(page, GFP_KERNEL)) + return 0; + + return -1; +} + +/* + * Replace "page" with "newpage" on the radix tree, which the page belongs to. + */ +static int +replace_pages(struct page *page, struct page *newpage) +{ + struct address_space *mapping = page_mapping(page); + int ret = 0; + struct page *delpage; + + page_cache_get(newpage); + read_lock_irq(&mapping->tree_lock); + newpage->index = page->index; + if (PageSwapCache(page)) { + SetPageSwapCache(newpage); + newpage->private = page->private; + } else + newpage->mapping = page->mapping; + if (PageWriteback(page)) + SetPageWriteback(newpage); + + delpage = radix_tree_replace(&mapping->page_tree, page_index(page), newpage); + read_unlock_irq(&mapping->tree_lock); + if (delpage == NULL) { + /* + * Migration is unnecessary since truncating the page is + * in progress. Just release the newpage. + */ + page_cache_release(newpage); + ret = -ENOENT; + } + return ret; +} + +/* + * Check whether the page can be migrated or not. + */ +static inline int +page_migratable(struct page *page, struct page *newpage, + int freeable_page_count) +{ + int truncated; + + if (page_mapped(page)) { + switch (try_to_unmap(page)) { + case SWAP_FAIL: + return -EBUSY; + case SWAP_AGAIN: + return -EAGAIN; + } + } + if (PageWriteback(page)) + return -EAGAIN; + /* The page might have been truncated */ + truncated = !PageSwapCache(newpage) && page_mapping(page) == NULL; + if (page_count(page) + truncated <= freeable_page_count) + return truncated ? -ENOENT : 0; + return -EAGAIN; +} + +/* + * Wait for the completion of all operations, which are going on + * against the page, and copy it. + */ +int +migrate_page_common(struct page *page, struct page *newpage) +{ + long timeout = 5000; /* XXXX */ + int ret; + + while (timeout > 0) { + BUG_ON(page_count(page) == 0); + ret = page_migratable(page, newpage, 2); + switch (ret) { + case 0: + case -ENOENT: + copy_highpage(newpage, page); + return ret; + case -EBUSY: + return ret; + case -EAGAIN: + writeback_and_free_buffers(page); + unlock_page(page); + msleep(10); + timeout -= 10; + lock_page(page); + continue; + } + } + return -EBUSY; +} + +/* + * In some cases, a page migration needs to be rolled back. + */ +static int +rewind_page(struct page *page, struct page *newpage) +{ + printk("Roll back migration is not implemented yet.\n"); + BUG(); + return 1; +} + +/* + * Try to migrate one page. Returns non-zero on failure. + * - Lock for the page must be held when invoked. + * - The page must be attached to an address_space. + */ +static int +generic_migrate_page(struct page *page, struct page *newpage) +{ + int ret; + + /* + * Make sure that the newpage must be locked and keep not up-to-date + * during the page migration, so that it's guaranteed that all + * accesses to the newpage will be blocked until everything has + * become ok. + */ + if (TestSetPageLocked(newpage)) + BUG(); + + if ((ret = replace_pages(page, newpage))) + goto out_removing; + + /* + * With cleared PTEs, any accesses via the PTEs to the page + * can be caught and blocked in a pagefault handler. + */ + if (page_mapped(page)) { + while ((ret = try_to_unmap(page)) == SWAP_AGAIN) + msleep(1); + if (ret != SWAP_SUCCESS) { + ret = -EBUSY; + goto out_busy; + } + } + + wait_on_page_writeback(page); + if (PageSwapCache(page)) { + /* + * The page is not mapped from anywhere now. + * Detach it from the swapcache completely. + */ + ClearPageSwapCache(page); + page->private = 0; + page->mapping = NULL; + } + + /* Wait for all operations against the page to finish. */ + ret = migrate_page_common(page, newpage); + switch (ret) { + default: + /* The page is busy. Try it later. */ + goto out_busy; + case -ENOENT: + /* The file the page belongs to has been truncated. */ + page_cache_get(page); + page_cache_release(newpage); + newpage->mapping = NULL; + /* fall thru */ + case 0: + /* fall thru */ + } + + if (PageError(page)) + SetPageError(newpage); + if (PageReferenced(page)) + SetPageReferenced(newpage); + if (PageActive(page)) { + SetPageActive(newpage); + ClearPageActive(page); + } + if (PageMappedToDisk(page)) + SetPageMappedToDisk(newpage); + if (PageChecked(page)) + SetPageChecked(newpage); + if (PageUptodate(page)) + SetPageUptodate(newpage); + if (PageDirty(page)) { + clear_page_dirty_for_io(page); + set_page_dirty(newpage); + } + /* + * Finally, the newpage has become ready! Wake up all waiters, + * which have been waiting for the completion of the migration. + */ + if (PageWriteback(newpage)) + end_page_writeback(newpage); + unlock_page(newpage); + + page->mapping = NULL; + unlock_page(page); + page_cache_release(page); + + return 0; + +out_busy: + /* Roll back all operations. */ + rewind_page(page, newpage); + return ret; + +out_removing: + unlock_page(page); + unlock_page(newpage); + return ret; +} + +/* + * migrate_onepage() can migrate regular pages assigned to pagecache, + * swapcache or anonymous memory. + */ +struct page * +migrate_onepage(struct page *page) +{ + struct page *newpage; + struct address_space *mapping; + int ret; + + lock_page(page); + + /* + * Put the page in a radix tree if it isn't in the tree yet. + */ +#ifdef CONFIG_SWAP + if (PageAnon(page) && !PageSwapCache(page)) + if (!add_to_swap(page, GFP_KERNEL)) { + unlock_page(page); + return ERR_PTR(-ENOSPC); + } +#endif /* CONFIG_SWAP */ + if ((mapping = page_mapping(page)) == NULL) { + /* truncation is in progress */ + if (PagePrivate(page)) + try_to_release_page(page, GFP_KERNEL); + unlock_page(page); + return ERR_PTR(-ENOENT); + } + + /* + * Allocate a new page with the same gfp_mask + * as the target page has. + */ + if ((newpage = page_cache_alloc(mapping)) == NULL) { + unlock_page(page); + return ERR_PTR(-ENOMEM); + } + + ret = generic_migrate_page(page, newpage); + if (ret) { + BUG_ON(page_count(newpage) != 1); + page_cache_release(newpage); + return ERR_PTR(ret); + } + BUG_ON(page_count(page) != 1); + page_cache_release(page); + return newpage; +} + +/* + * This is the main entry point to migrate pages in a specific region. + * If a page is inactive, the page may be just released instead of + * migration. + */ +int try_to_migrate_pages(struct list_head *page_list) +{ + struct page *page, *page2, *newpage; + LIST_HEAD(rest_list); + int nr_busy = 0; + int nr_noswap = 0; + + current->flags |= PF_KSWAPD; /* It's fake */ + list_for_each_entry_safe(page, page2, page_list, lru) { + list_del(&page->lru); + if (IS_ERR(newpage = migrate_onepage(page))) { + if (page_count(page) == 1) { + /* the page is already unused */ + putback_page_to_lru(page_zone(page), page); + page_cache_release(page); + } else { + /* truncation may be in progress now. */ + nr_busy++; + if (PTR_ERR(newpage) == -ENOSPC) + nr_noswap++; + list_add(&page->lru, &rest_list); + } + } else { + putback_page_to_lru(page_zone(newpage), newpage); + page_cache_release(newpage); + } + } + list_splice(&rest_list, page_list); + current->flags &= ~PF_KSWAPD; + if (nr_noswap) { + if (printk_ratelimit()) + printk(KERN_WARNING "memory migration failed: Any swap devices should be added.\n"); + return -ENOSPC; + } + return nr_busy; +} + _