From: Bradley Christiansen The following adds a page->flags bit to keep track of pages that are under a memory removal operation. Note that this is just a simple, fast way to keep track of which pages are targetted for capture (removal). We can do the same thing, with the same interfaces (page_under_capture(), set_page_under_capture(), ...) with a list, for instance, but the implementation would be slightly more complex although it would not use a page->flags bit. Some of these functions could probably stand to be moved out of mm/page_alloc.c if a more suitable place is found. Signed-off-by: Dave Hansen --- memhotplug-dave/include/linux/mm.h | 4 memhotplug-dave/include/linux/page-flags.h | 40 ++++++ memhotplug-dave/mm/page_alloc.c | 182 ++++++++++++++++++++++++++++- 3 files changed, 224 insertions(+), 2 deletions(-) diff -puN include/linux/mm.h~K1-removal-capture_pages include/linux/mm.h --- memhotplug/include/linux/mm.h~K1-removal-capture_pages 2005-07-28 13:51:10.000000000 -0700 +++ memhotplug-dave/include/linux/mm.h 2005-07-28 13:51:10.000000000 -0700 @@ -265,6 +265,10 @@ struct page { #endif /* WANT_PAGE_VIRTUAL */ }; +#ifdef CONFIG_MEMORY_HOTPLUG +extern int capture_page_range(unsigned long pfn, int order); +#endif /* CONFIG_MEMORY_HOTPLUG */ + /* * FIXME: take this include out, include page-flags.h in * files which need it (119 of them) diff -puN include/linux/page-flags.h~K1-removal-capture_pages include/linux/page-flags.h --- memhotplug/include/linux/page-flags.h~K1-removal-capture_pages 2005-07-28 13:51:10.000000000 -0700 +++ memhotplug-dave/include/linux/page-flags.h 2005-07-28 13:51:10.000000000 -0700 @@ -75,6 +75,17 @@ #define PG_reclaim 17 /* To be reclaimed asap */ #define PG_nosave_free 18 /* Free, should not be written */ #define PG_uncached 19 /* Page has been mapped as uncached */ +/* + * Note that this is just a simple, fast way to keep track of which pages + * are targetted for capture (removal). We can do the same thing, with the + * same interfaces, with a list, for instance, but the implementation + * would be slightly more complex, but would not waste a page->flags bit. + * + * Could also use the dynamic page flags stuff that the swsusp people have + * been proposing. + * -- daveh + */ +#define PG_capture 20 /* Remove page for memory hotplug */ /* * Global page accounting. One instance per CPU. Only unsigned longs are @@ -323,4 +334,33 @@ static inline void set_page_writeback(st test_set_page_writeback(page); } +#ifdef CONFIG_MEMORY_HOTPLUG +#define PageUnderCapture(page) test_bit(PG_capture, &(page)->flags) +#define SetPageUnderCapture(page) set_bit(PG_capture, &(page)->flags) +#define ClearPageUnderCapture(page) clear_bit(PG_capture, &(page)->flags) + +static inline void set_page_under_capture(struct page *page) +{ + SetPageUnderCapture(page); +} + +static inline void clear_page_under_capture(struct page *page) +{ + ClearPageUnderCapture(page); +} + +static inline int page_under_capture(struct page *page) +{ + return PageUnderCapture(page); +} +#else +#define PageUnderCapture(page) 0 + +static inline int page_under_capture(struct page *page) +{ + return 0; +} +#endif + #endif /* PAGE_FLAGS_H */ + diff -puN mm/page_alloc.c~K1-removal-capture_pages mm/page_alloc.c --- memhotplug/mm/page_alloc.c~K1-removal-capture_pages 2005-07-28 13:51:10.000000000 -0700 +++ memhotplug-dave/mm/page_alloc.c 2005-07-28 13:51:10.000000000 -0700 @@ -35,6 +35,8 @@ #include #include #include +#include +#include #include #include "internal.h" @@ -399,6 +401,77 @@ static inline void extract_pages(struct area->nr_free--; } +#ifdef CONFIG_MEMORY_HOTPLUG +/* + * Called when trying to allocate or free a page that has been marked for + * removal. + */ +static inline void capture_pages(struct page *page, int order) +{ + int i; + for (i = 0; i < (1 << order); i++) + clear_page_under_capture(&page[i]); + page_zone(page)->present_pages -= (1UL << order); + totalram_pages -= (1UL << order); +} + +static inline int test_remove_range(struct page *page, struct page *base, + int order, int p_order) +{ + /* Test if page is contained in the section to be removed */ + if ((page >= base) && (page < (base + (1 << order))) && + page_under_capture(page)) + return 1; + /* Test if the section to remove is fully contained in the page area */ + if ((base >= page) && (base < (page + (1 << p_order)))) + return 1; + return 0; +} + +/* + * Searches the free_list for the pages that have been marked for removal. + * This function removes the largest order of pages (up to MAX_ORDER) that + * contain a page marked for removal. It is passed the base page to determine + * the zone in which the pages are contained. + */ +static int remove_page_freearea(struct page *base, int order) +{ + struct zone *zone = page_zone(base); + int p_order; + unsigned long flags; + struct free_area *area; + struct list_head *p, *n; + struct page *page; + + /* + * We're not worried about speed here, so taking and releasing the lock + * every iteration of the loop allows other processes to access the + * data when needed. + */ + for (p_order = 0; p_order < MAX_ORDER; p_order++) { + spin_lock_irqsave(&zone->lock, flags); + area = zone->free_area + p_order; + if(list_empty(&area->free_list)) { + spin_unlock_irqrestore(&zone->lock, flags); + continue; + } + list_for_each_safe(p, n, &area->free_list) { + page = list_entry(p, struct page, lru); + if (test_remove_range(page, base, order, p_order)) { + extract_pages(page, zone, p_order, area); + capture_pages(page, p_order); + } + } + spin_unlock_irqrestore(&zone->lock, flags); + } + return 1; +} +#else +static inline void capture_pages(struct page *page, int order) +{ +} +#endif /* CONFIG_MEMORY_HOTPLUG */ + void __free_pages_ok(struct page *page, unsigned int order) { LIST_HEAD(list); @@ -416,6 +489,10 @@ void __free_pages_ok(struct page *page, for (i = 0 ; i < (1 << order) ; ++i) free_pages_check(__FUNCTION__, page + i); + if (page_under_capture(page)) { + capture_pages(page, order); + return; + } list_add(&page->lru, &list); kernel_map_pages(page, 1<free_list.next, struct page, lru); extract_pages(page, zone, order, area); + if (unlikely(page_under_capture(page))) { + capture_pages(page, current_order); + current_order--; + continue; + } return expand(zone, page, order, current_order, area); } @@ -577,7 +659,7 @@ void drain_remote_pages(void) } #endif -#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU) +#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG) static void __drain_pages(unsigned int cpu) { struct zone *zone; @@ -596,7 +678,7 @@ static void __drain_pages(unsigned int c } } } -#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */ +#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU || CONFIG_MEMORY_HOTPLUG */ #ifdef CONFIG_PM @@ -638,6 +720,98 @@ void drain_local_pages(void) } #endif /* CONFIG_PM */ +#ifdef CONFIG_MEMORY_HOTPLUG +static inline int first_uncaptured_page(unsigned long start_pfn, int nr_pages) +{ + int i; + int res = -1; + int count = 0;; + + printk(KERN_DEBUG "%s(%ld, %d) begin\n", + __func__, start_pfn, nr_pages); + + for (i = start_pfn; i < end_pfn; i++) { + if (!page_under_capture(pfn_to_page(i))) + continue; + count++; + if (res == -1) + res = i; + } + printk(KERN_DEBUG "%s(%ld, %d) end\n", + __func__, start_pfn, nr_pages); + return res; +} + +static void +rmb_and_drain_cpu_pages(void * __unused) +{ + /* + * make this CPU see the capture bits + */ + smp_rmb(); + + drain_local_pages(); +} + + +/* + * Flags a given order of pages to be removed from memory, then removes any + * of those pages that are currently in cpu cache or free lists. The page + * pfn passed must be alligned according to the given order. + * + * This function assumes that it has received a valid range of pfns. + */ +int capture_page_range(unsigned long start_pfn, int order) +{ + int fup; + struct page *page; + int i; + unsigned long nr_pages; + unsigned long end_pfn; + + /* If the start_pfn is not aligned with the order return failure */ + if (start_pfn % (1 << order) != 0) + return -EINVAL; + + for (i = 0; i < (1 << order); i++) + set_page_under_capture(pfn_to_page(start_pfn + i)); + /* + * the set_page_under_capture() operations are not barriers, so + * make sure that all the other CPUs can see the capture bits + */ + smp_wmb(); + + /* + * This drains the per-cpu caches, and makes sure that each + * CPU does see the capture bits. + */ + on_each_cpu(rmb_and_drain_cpu_pages, NULL, 1, 0); + + page = pfn_to_page(start_pfn); + remove_page_freearea(page, order); + nr_pages = 1<= 0) { + if (i++ < 100) + while (shrink_all_memory(10000)); + msleep(100); + } + + return 0; +} + +#else /* CONFIG_MEMORY_HOTPLUG */ +int capture_page_range(unsigned long pfn, unsigned long size) +{ + return -ENOSYS; +} +#endif /* CONFIG_MEMORY_HOTPLUG */ + static void zone_statistics(struct zonelist *zonelist, struct zone *z) { #ifdef CONFIG_NUMA @@ -681,6 +855,10 @@ static void fastcall free_hot_cold_page( if (PageAnon(page)) page->mapping = NULL; free_pages_check(__FUNCTION__, page); + if (page_under_capture(page)) { + capture_pages(page, 0); + return; + } pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; local_irq_save(flags); list_add(&page->lru, &pcp->list); _