diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5e9840f..bd71e19 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -30,10 +30,13 @@ enum mem_cgroup_page_stat_item { MEMCG_NR_FILE_MAPPED, /* # of pages charged as file rss */ }; +enum ISOLATE_PAGE_MODE; + extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, - int mode, struct zone *z, + enum ISOLATE_PAGE_MODE mode, + struct zone *z, struct mem_cgroup *mem_cont, int active, int file); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index e39aeec..8e96d92 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -9,12 +9,52 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **); #ifdef CONFIG_MIGRATION #define PAGE_MIGRATION 1 +/* + * Migratelist for compaction is singly linked list instead of double linked list. + * Current list utility is useful in some sense but we can't make sure compatibilty. + * Please use below functions instead of common list's ones. + */ +static inline void INIT_MIGRATE_LIST(struct inorder_lru *list) +{ + list->prev_page = NULL; + list->next = list; +} + +static inline int migratelist_empty(const struct inorder_lru *head) +{ + return head->next == head; +} + +static inline void migratelist_add(struct page *page, + struct page *prev_page, struct inorder_lru *head) +{ + VM_BUG_ON(PageLRU(page)); + + page->ilru.prev_page = prev_page; + page->ilru.next = head->next; + head->next = &page->ilru; +} + +static inline void migratelist_del(struct page *page, struct inorder_lru *head) +{ + head->next = page->ilru.next; +} + +#define list_for_each_migrate_entry list_for_each_entry +#define list_for_each_migrate_entry_safe list_for_each_entry_safe + extern void putback_lru_pages(struct list_head *l); +extern void putback_inorder_lru_pages(struct inorder_lru *l); extern int migrate_page(struct address_space *, struct page *, struct page *); extern int migrate_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, bool sync); + +extern int migrate_inorder_lru_pages(struct inorder_lru *l, new_page_t x, + unsigned long private, bool offlining, + bool sync); + extern int migrate_huge_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, bool sync); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 02aa561..af46614 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -24,6 +24,17 @@ struct address_space; #define USE_SPLIT_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) +struct page; + +/* + * The inorder_lru is used by compaction for keeping LRU order + * during migration. + */ +struct inorder_lru { + struct page *prev_page; /* prev LRU page of isolated page */ + struct inorder_lru *next; /* next pointer for singly linked list*/ +}; + /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the @@ -72,9 +83,12 @@ struct page { pgoff_t index; /* Our offset within mapping. */ void *freelist; /* SLUB: freelist req. slab lock */ }; - struct list_head lru; /* Pageout list, eg. active_list + union { + struct inorder_lru ilru;/* compaction: migrated page list */ + struct list_head lru; /* Pageout list, eg. active_list * protected by zone->lru_lock ! */ + }; /* * On machines where all RAM is mapped into kernel address space, * we can simply calculate the virtual address. On machines with diff --git a/include/linux/swap.h b/include/linux/swap.h index a5c6da5..c3ddfea 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -226,6 +226,8 @@ extern int lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); extern void deactivate_page(struct page *page); extern void swap_setup(void); +extern void update_page_reclaim_stat(struct zone *zone, struct page *page, + int file, int rotated); extern void add_page_to_unevictable_list(struct page *page); @@ -244,9 +246,14 @@ static inline void lru_cache_add_file(struct page *page) } /* LRU Isolation modes. */ -#define ISOLATE_INACTIVE 0 /* Isolate inactive pages. */ -#define ISOLATE_ACTIVE 1 /* Isolate active pages. */ -#define ISOLATE_BOTH 2 /* Isolate both active and inactive pages. */ +enum ISOLATE_PAGE_MODE { + ISOLATE_NONE, + ISOLATE_INACTIVE = 1, /* Isolate inactive pages */ + ISOLATE_ACTIVE = 2, /* Isolate active pages */ + ISOLATE_BOTH = 4, /* Isolate both active and inactive pages */ + ISOLATE_CLEAN = 8, /* Isolate clean file */ + ISOLATE_UNMAPPED = 16, /* Isolate unmapped file */ +}; /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, @@ -258,7 +265,10 @@ extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, unsigned int swappiness, struct zone *zone); -extern int __isolate_lru_page(struct page *page, int mode, int file); +extern int __isolate_inorder_lru_page(struct page *page, enum ISOLATE_PAGE_MODE mode, + int file, struct page **lru_p_page); +extern int __isolate_lru_page(struct page *page, enum ISOLATE_PAGE_MODE mode, + int file); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; extern int remove_mapping(struct address_space *mapping, struct page *page); diff --git a/include/trace/events/inorder_putback.h b/include/trace/events/inorder_putback.h new file mode 100644 index 0000000..c615ed8 --- /dev/null +++ b/include/trace/events/inorder_putback.h @@ -0,0 +1,79 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM inorder_putback + +#if !defined(_TRACE_INP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_INP_H + +#include +#include + +TRACE_EVENT(mm_compaction_inorder, + + TP_PROTO(struct page *page, + struct page *newpage), + + TP_ARGS(page, newpage), + + TP_STRUCT__entry( + __field(struct page *, page) + __field(struct page *, newpage) + ), + + TP_fast_assign( + __entry->page = page; + __entry->newpage = newpage; + ), + + TP_printk("pfn=%lu new pfn=%lu", + page_to_pfn(__entry->page), + page_to_pfn(__entry->newpage)) +); + +TRACE_EVENT(mm_compaction_outoforder, + + TP_PROTO(struct page *page, + struct page *newpage), + + TP_ARGS(page, newpage), + + TP_STRUCT__entry( + __field(struct page *, page) + __field(struct page *, newpage) + ), + + TP_fast_assign( + __entry->page = page; + __entry->newpage = newpage; + ), + + TP_printk("pfn=%lu new pfn=%lu", + page_to_pfn(__entry->page), + page_to_pfn(__entry->newpage)) +); + +TRACE_EVENT(mm_compact_isolate, + + TP_PROTO(struct page *prev_page, + struct page *page), + + TP_ARGS(prev_page, page), + + TP_STRUCT__entry( + __field(struct page *, prev_page) + __field(struct page *, page) + ), + + TP_fast_assign( + __entry->prev_page = prev_page; + __entry->page = page; + ), + + TP_printk("pfn=%lu prev_pfn=%lu", + page_to_pfn(__entry->page), + page_to_pfn(__entry->prev_page)) +); + +#endif /* _TRACE_INP_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index ea422aa..a20d766 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -187,7 +187,7 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + enum ISOLATE_PAGE_MODE isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode), @@ -199,7 +199,7 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, __field(unsigned long, nr_lumpy_taken) __field(unsigned long, nr_lumpy_dirty) __field(unsigned long, nr_lumpy_failed) - __field(int, isolate_mode) + __field(enum ISOLATE_PAGE_MODE, isolate_mode) ), TP_fast_assign( @@ -233,7 +233,7 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + enum ISOLATE_PAGE_MODE isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode) @@ -248,7 +248,7 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + enum ISOLATE_PAGE_MODE isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode) diff --git a/mm/compaction.c b/mm/compaction.c index 021a296..92c180d 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -16,6 +16,7 @@ #include #include "internal.h" +#include #define CREATE_TRACE_POINTS #include @@ -28,17 +29,13 @@ */ struct compact_control { struct list_head freepages; /* List of free pages to migrate to */ - struct list_head migratepages; /* List of pages being migrated */ + struct inorder_lru migratepages;/* List of pages being migrated */ unsigned long nr_freepages; /* Number of isolated free pages */ unsigned long nr_migratepages; /* Number of pages to migrate */ unsigned long free_pfn; /* isolate_freepages search base */ unsigned long migrate_pfn; /* isolate_migratepages search base */ bool sync; /* Synchronous migration */ - /* Account for isolated anon and file pages */ - unsigned long nr_anon; - unsigned long nr_file; - unsigned int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; @@ -212,17 +209,13 @@ static void isolate_freepages(struct zone *zone, static void acct_isolated(struct zone *zone, struct compact_control *cc) { struct page *page; - unsigned int count[NR_LRU_LISTS] = { 0, }; + unsigned int count[2] = { 0, }; - list_for_each_entry(page, &cc->migratepages, lru) { - int lru = page_lru_base_type(page); - count[lru]++; - } + list_for_each_migrate_entry(page, &cc->migratepages, ilru) + count[!!page_is_file_cache(page)]++; - cc->nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; - cc->nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; - __mod_zone_page_state(zone, NR_ISOLATED_ANON, cc->nr_anon); - __mod_zone_page_state(zone, NR_ISOLATED_FILE, cc->nr_file); + __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); + __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); } /* Similar to reclaim, but different enough that they don't share logic */ @@ -250,7 +243,8 @@ static unsigned long isolate_migratepages(struct zone *zone, unsigned long low_pfn, end_pfn; unsigned long last_pageblock_nr = 0, pageblock_nr; unsigned long nr_scanned = 0, nr_isolated = 0; - struct list_head *migratelist = &cc->migratepages; + struct inorder_lru *migratelist = &cc->migratepages; + enum ISOLATE_PAGE_MODE mode = ISOLATE_BOTH; /* Do not scan outside zone boundaries */ low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); @@ -280,7 +274,7 @@ static unsigned long isolate_migratepages(struct zone *zone, cond_resched(); spin_lock_irq(&zone->lru_lock); for (; low_pfn < end_pfn; low_pfn++) { - struct page *page; + struct page *page, *prev_page; bool locked = true; /* give a chance to irqs before checking need_resched() */ @@ -335,14 +329,18 @@ static unsigned long isolate_migratepages(struct zone *zone, } /* Try isolate the page */ - if (__isolate_lru_page(page, ISOLATE_BOTH, 0) != 0) + if (!cc->sync) + mode |= ISOLATE_CLEAN; + if (__isolate_inorder_lru_page(page, mode, 0, &prev_page) != 0) continue; + trace_mm_compact_isolate(prev_page, page); VM_BUG_ON(PageTransCompound(page)); /* Successfully isolated */ del_page_from_lru_list(zone, page, page_lru(page)); - list_add(&page->lru, migratelist); + migratelist_add(page, prev_page, migratelist); + cc->nr_migratepages++; nr_isolated++; @@ -398,7 +396,7 @@ static void update_nr_listpages(struct compact_control *cc) int nr_freepages = 0; struct page *page; - list_for_each_entry(page, &cc->migratepages, lru) + list_for_each_migrate_entry(page, &cc->migratepages, ilru) nr_migratepages++; list_for_each_entry(page, &cc->freepages, lru) nr_freepages++; @@ -526,7 +524,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) continue; nr_migrate = cc->nr_migratepages; - err = migrate_pages(&cc->migratepages, compaction_alloc, + err = migrate_inorder_lru_pages(&cc->migratepages, + compaction_alloc, (unsigned long)cc, false, cc->sync); update_nr_listpages(cc); @@ -541,7 +540,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) /* Release LRU pages not migrated */ if (err) { - putback_lru_pages(&cc->migratepages); + putback_inorder_lru_pages(&cc->migratepages); cc->nr_migratepages = 0; } @@ -567,7 +566,7 @@ unsigned long compact_zone_order(struct zone *zone, .sync = sync, }; INIT_LIST_HEAD(&cc.freepages); - INIT_LIST_HEAD(&cc.migratepages); + INIT_MIGRATE_LIST(&cc.migratepages); return compact_zone(zone, &cc); } @@ -649,12 +648,12 @@ static int compact_node(int nid) cc.zone = zone; INIT_LIST_HEAD(&cc.freepages); - INIT_LIST_HEAD(&cc.migratepages); + INIT_MIGRATE_LIST(&cc.migratepages); compact_zone(zone, &cc); VM_BUG_ON(!list_empty(&cc.freepages)); - VM_BUG_ON(!list_empty(&cc.migratepages)); + VM_BUG_ON(!migratelist_empty(&cc.migratepages)); } return 0; diff --git a/mm/internal.h b/mm/internal.h index 9d0ced8..5cb2370 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -42,6 +42,8 @@ extern unsigned long highest_memmap_pfn; /* * in mm/vmscan.c: */ +extern bool keep_lru_order(struct page *page, struct page *prev); +extern void putback_page_to_lru(struct page *page, struct page *head_page); extern int isolate_lru_page(struct page *page); extern void putback_lru_page(struct page *page); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 010f916..e02daa7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1106,7 +1106,8 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, - int mode, struct zone *z, + enum ISOLATE_PAGE_MODE mode, + struct zone *z, struct mem_cgroup *mem_cont, int active, int file) { diff --git a/mm/migrate.c b/mm/migrate.c index 34132f8..6ec98c1 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -39,6 +39,9 @@ #include "internal.h" +#define CREATE_TRACE_POINTS +#include + #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) /* @@ -68,7 +71,7 @@ int migrate_prep_local(void) } /* - * Add isolated pages on the list back to the LRU under page lock + * Add isolated pages on the list back to the LRU's head under page lock * to avoid leaking evictable pages back onto unevictable list. */ void putback_lru_pages(struct list_head *l) @@ -84,6 +87,32 @@ void putback_lru_pages(struct list_head *l) } } +void putback_inorder_lru_pages(struct inorder_lru *l) +{ + struct zone *zone; + struct page *page, *page2, *prev; + + list_for_each_migrate_entry_safe(page, page2, l, ilru) { + dec_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); + zone = page_zone(page); + spin_lock_irq(&zone->lru_lock); + prev = page->ilru.prev_page; + if (keep_lru_order(page, prev)) { + trace_mm_compaction_inorder(page, page); + putback_page_to_lru(page, prev); + spin_unlock_irq(&zone->lru_lock); + } + else { + trace_mm_compaction_outoforder(page, page); + spin_unlock_irq(&zone->lru_lock); + putback_lru_page(page); + } + + l->next = &page2->ilru; + } +} + /* * Restore a potential migration pte to a working pte entry */ @@ -819,6 +848,252 @@ move_newpage: return rc; } +static inline void adjust_inorder_prev_page(struct inorder_lru *head, + struct page *prev_page, struct page *new_page) +{ + struct page *page; + list_for_each_migrate_entry(page, head, ilru) + if (page->ilru.prev_page == prev_page) + page->ilru.prev_page = new_page; +} + +/* + * Counterpart of unmap_and_move() for compaction. + * The logic is almost same with unmap_and_move. The difference is + * this function handles prev_lru. For inorder-lru compaction, we use + * singly linked list so we need prev pointer handling to delete entry. + */ +static int unmap_and_move_inorder_lru(new_page_t get_new_page, unsigned long private, + struct page *page, int force, bool offlining, bool sync, + struct inorder_lru **prev_lru, struct inorder_lru *head) +{ + int rc = 0; + int *result = NULL; + struct page *newpage = get_new_page(page, private, &result); + int remap_swapcache = 1; + int charge = 0; + struct mem_cgroup *mem; + struct anon_vma *anon_vma = NULL; + struct page *prev_page; + struct zone *zone; + bool del = false; + + VM_BUG_ON(!prev_lru); + + if (!newpage) + return -ENOMEM; + + prev_page = page->ilru.prev_page; + if (page_count(page) == 1) { + /* page was freed from under us. So we are done. */ + goto move_newpage; + } + if (unlikely(PageTransHuge(page))) + if (unlikely(split_huge_page(page))) + goto move_newpage; + + /* prepare cgroup just returns 0 or -ENOMEM */ + rc = -EAGAIN; + + if (!trylock_page(page)) { + if (!force || !sync) + goto move_newpage; + + /* + * It's not safe for direct compaction to call lock_page. + * For example, during page readahead pages are added locked + * to the LRU. Later, when the IO completes the pages are + * marked uptodate and unlocked. However, the queueing + * could be merging multiple pages for one bio (e.g. + * mpage_readpages). If an allocation happens for the + * second or third page, the process can end up locking + * the same page twice and deadlocking. Rather than + * trying to be clever about what pages can be locked, + * avoid the use of lock_page for direct compaction + * altogether. + */ + if (current->flags & PF_MEMALLOC) + goto move_newpage; + lock_page(page); + } + + /* + * Only memory hotplug's offline_pages() caller has locked out KSM, + * and can safely migrate a KSM page. The other cases have skipped + * PageKsm along with PageReserved - but it is only now when we have + * the page lock that we can be certain it will not go KSM beneath us + * (KSM will not upgrade a page from PageAnon to PageKsm when it sees + * its pagecount raised, but only here do we take the page lock which + * serializes that). + */ + if (PageKsm(page) && !offlining) { + rc = -EBUSY; + goto unlock; + } + + /* charge against new page */ + charge = mem_cgroup_prepare_migration(page, newpage, &mem, GFP_KERNEL); + if (charge == -ENOMEM) { + rc = -ENOMEM; + goto unlock; + } + BUG_ON(charge); + + if (PageWriteback(page)) { + /* + * For !sync, there is no point retrying as the retry loop + * is expected to be too short for PageWriteback to be cleared + */ + if (!sync) { + rc = -EBUSY; + goto uncharge; + } + if (!force) + goto uncharge; + wait_on_page_writeback(page); + } + /* + * By try_to_unmap(), page->mapcount goes down to 0 here. In this case, + * we cannot notice that anon_vma is freed while we migrates a page. + * This get_anon_vma() delays freeing anon_vma pointer until the end + * of migration. File cache pages are no problem because of page_lock() + * File Caches may use write_page() or lock_page() in migration, then, + * just care Anon page here. + */ + if (PageAnon(page)) { + /* + * Only page_lock_anon_vma() understands the subtleties of + * getting a hold on an anon_vma from outside one of its mms. + */ + anon_vma = page_lock_anon_vma(page); + if (anon_vma) { + /* + * Take a reference count on the anon_vma if the + * page is mapped so that it is guaranteed to + * exist when the page is remapped later + */ + get_anon_vma(anon_vma); + page_unlock_anon_vma(anon_vma); + } else if (PageSwapCache(page)) { + /* + * We cannot be sure that the anon_vma of an unmapped + * swapcache page is safe to use because we don't + * know in advance if the VMA that this page belonged + * to still exists. If the VMA and others sharing the + * data have been freed, then the anon_vma could + * already be invalid. + * + * To avoid this possibility, swapcache pages get + * migrated but are not remapped when migration + * completes + */ + remap_swapcache = 0; + } else { + goto uncharge; + } + } + + /* + * Corner case handling: + * 1. When a new swap-cache page is read into, it is added to the LRU + * and treated as swapcache but it has no rmap yet. + * Calling try_to_unmap() against a page->mapping==NULL page will + * trigger a BUG. So handle it here. + * 2. An orphaned page (see truncate_complete_page) might have + * fs-private metadata. The page can be picked up due to memory + * offlining. Everywhere else except page reclaim, the page is + * invisible to the vm, so the page can not be migrated. So try to + * free the metadata, so the page can be freed. + */ + if (!page->mapping) { + VM_BUG_ON(PageAnon(page)); + if (page_has_private(page)) { + try_to_free_buffers(page); + goto uncharge; + } + goto skip_unmap; + } + + /* Establish migration ptes or remove ptes */ + try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); + +skip_unmap: + if (!page_mapped(page)) + rc = move_to_new_page(newpage, page, remap_swapcache, sync); + + if (rc && remap_swapcache) + remove_migration_ptes(page, page); + + /* Drop an anon_vma reference if we took one */ + if (anon_vma) + put_anon_vma(anon_vma); + +uncharge: + if (!charge) + mem_cgroup_end_migration(mem, page, newpage, rc == 0); +unlock: + unlock_page(page); + +move_newpage: + if (rc != -EAGAIN) { + /* + * A page that has been migrated has all references + * removed and will be freed. A page that has not been + * migrated will have kepts its references and be + * restored. + */ + migratelist_del(page, *prev_lru); + dec_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); + /* + * Unlike unmap_and_move, we defer putback page + * after inorder handling. Because the page would + * be freed so it doesn't have PG_lru. Then, + * keep_lru_order doesn't work correctly. + */ + del = true; + } + else + *prev_lru = &page->ilru; + + /* + * Move the new page to the LRU. If migration was not successful + * then this will free the page. + */ + zone = page_zone(page); + spin_lock_irq(&zone->lru_lock); + if (keep_lru_order(page, prev_page)) { + putback_page_to_lru(newpage, prev_page); + spin_unlock_irq(&zone->lru_lock); + trace_mm_compaction_inorder(page, newpage); + /* + * The newpage will replace LRU position of old page and + * old one would be freed. So let's adjust prev_page of pages + * remained in migratelist for keep_lru_order. + */ + adjust_inorder_prev_page(head, page, newpage); + put_page(newpage); /* drop ref from isolate */ + } + else { + + spin_unlock_irq(&zone->lru_lock); + trace_mm_compaction_inorder(page, newpage); + putback_lru_page(newpage); + } + + if (del) + putback_lru_page(page); + + if (result) { + if (rc) + *result = rc; + else + *result = page_to_nid(newpage); + } + return rc; +} + + /* * Counterpart of unmap_and_move_page() for hugepage migration. * @@ -960,6 +1235,56 @@ out: return nr_failed + retry; } +int migrate_inorder_lru_pages(struct inorder_lru *head, new_page_t get_new_page, + unsigned long private, bool offlining, bool sync) +{ + int retry = 1; + int nr_failed = 0; + int pass = 0; + struct page *page, *page2; + struct inorder_lru *prev; + int swapwrite = current->flags & PF_SWAPWRITE; + int rc; + + if (!swapwrite) + current->flags |= PF_SWAPWRITE; + + for(pass = 0; pass < 10 && retry; pass++) { + retry = 0; + list_for_each_migrate_entry_safe(page, page2, head, ilru) { + cond_resched(); + + prev = head; + rc = unmap_and_move_inorder_lru(get_new_page, private, + page, pass > 2, offlining, + sync, &prev, head); + + switch(rc) { + case -ENOMEM: + goto out; + case -EAGAIN: + retry++; + break; + case 0: + break; + default: + /* Permanent failure */ + nr_failed++; + break; + } + } + } + rc = 0; +out: + if (!swapwrite) + current->flags &= ~PF_SWAPWRITE; + + if (rc) + return rc; + + return nr_failed + retry; +} + int migrate_huge_pages(struct list_head *from, new_page_t get_new_page, unsigned long private, bool offlining, bool sync) @@ -1004,6 +1329,72 @@ out: return nr_failed + retry; } +int __isolate_inorder_lru_page(struct page *page, enum ISOLATE_PAGE_MODE mode, + int file, struct page **lru_p_page) +{ + int active; + int ret = -EINVAL; + BUG_ON(mode & ISOLATE_BOTH && + (mode & ISOLATE_INACTIVE || mode & ISOLATE_ACTIVE)); + + /* Only take pages on the LRU. */ + if (!PageLRU(page)) + return ret; + + active = PageActive(page); + + /* + * When checking the active state, we need to be sure we are + * dealing with comparible boolean values. Take the logical not + * of each. + */ + if (mode & ISOLATE_ACTIVE && !active) + return ret; + + if (mode & ISOLATE_INACTIVE && active) + return ret; + + /* + * When this function is being called for lumpy reclaim, we + * initially look into all LRU pages, active, inactive and + * unevictable; only give shrink_page_list evictable pages. + */ + if (PageUnevictable(page)) + return ret; + + ret = -EBUSY; + + if (mode & ISOLATE_CLEAN && (PageDirty(page) || PageWriteback(page))) + return ret; + + if (mode & ISOLATE_UNMAPPED && page_mapped(page)) + return ret; + + if (likely(get_page_unless_zero(page))) { + struct zone *zone = page_zone(page); + struct page *prev_page; + enum lru_list l = page_lru(page); + /* + * Be careful not to clear PageLRU until after we're + * sure the page is not being freed elsewhere -- the + * page release code relies on it. + */ + ClearPageLRU(page); + + if (&zone->lru[l].list == page->lru.prev) { + *lru_p_page = NULL; + goto out; + } + + prev_page = list_entry(page->lru.prev, struct page, lru); + *lru_p_page = prev_page; +out: + ret = 0; + } + + return ret; +} + #ifdef CONFIG_NUMA /* * Move a list of individual pages diff --git a/mm/swap.c b/mm/swap.c index 5602f1a..6c24a75 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -252,7 +252,7 @@ void rotate_reclaimable_page(struct page *page) } } -static void update_page_reclaim_stat(struct zone *zone, struct page *page, +void update_page_reclaim_stat(struct zone *zone, struct page *page, int file, int rotated) { struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat; diff --git a/mm/vmscan.c b/mm/vmscan.c index 8bfd450..10e4577 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -49,10 +49,9 @@ #include #include "internal.h" - +#include #define CREATE_TRACE_POINTS #include - /* * reclaim_mode determines how the inactive list is shrunk * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages @@ -550,11 +549,61 @@ int remove_mapping(struct address_space *mapping, struct page *page) return 0; } +/* + * If prev_page is same LRU with page, we can keep LRU order of page. + * zone->lru_lock must be hold. + */ +bool keep_lru_order(struct page *page, struct page *prev) +{ + bool ret = false; + if (!prev || !PageLRU(prev)) + goto out; + + if (unlikely(PageUnevictable(prev))) + goto out; + + if (page_lru_base_type(page) != page_lru_base_type(prev)) + goto out; + + ret = true; +out: + return ret; +} + /** - * putback_lru_page - put previously isolated page onto appropriate LRU list + * putback_page_to_lru - put isolated @page onto @head * @page: page to be put back to appropriate lru list + * @head_page: lru position to be put back * - * Add previously isolated @page to appropriate LRU list. + * Insert previously isolated @page to appropriate position of lru list + * zone->lru_lock must be hold. + */ +void putback_page_to_lru(struct page *page, struct page *head_page) +{ + int lru, active, file; + struct zone *zone = page_zone(page); + + VM_BUG_ON(PageLRU(page)); + + lru = page_lru(head_page); + active = is_active_lru(lru); + file = is_file_lru(lru); + + if (active) + SetPageActive(page); + else + ClearPageActive(page); + + update_page_reclaim_stat(zone, page, file, active); + SetPageLRU(page); + __add_page_to_lru_list(zone, page, lru, &head_page->lru); +} + +/** + * putback_lru_page - put previously isolated page onto appropriate LRU list's head + * @page: page to be put back to appropriate lru list + * + * Add previously isolated @page to appropriate LRU list's head * Page may still be unevictable for other reasons. * * lru_lock must not be held, interrupts must be enabled. @@ -957,23 +1006,29 @@ keep_lumpy: * * returns 0 on success, -ve errno on failure. */ -int __isolate_lru_page(struct page *page, int mode, int file) +int __isolate_lru_page(struct page *page, enum ISOLATE_PAGE_MODE mode, + int file) { + int active; int ret = -EINVAL; + BUG_ON(mode & ISOLATE_BOTH && + (mode & ISOLATE_INACTIVE || mode & ISOLATE_ACTIVE)); /* Only take pages on the LRU. */ if (!PageLRU(page)) return ret; + active = PageActive(page); + /* * When checking the active state, we need to be sure we are * dealing with comparible boolean values. Take the logical not * of each. */ - if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode)) + if (mode & ISOLATE_ACTIVE && !active) return ret; - if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file) + if (mode & ISOLATE_INACTIVE && active) return ret; /* @@ -986,6 +1041,12 @@ int __isolate_lru_page(struct page *page, int mode, int file) ret = -EBUSY; + if (mode & ISOLATE_CLEAN && (PageDirty(page) || PageWriteback(page))) + return ret; + + if (mode & ISOLATE_UNMAPPED && page_mapped(page)) + return ret; + if (likely(get_page_unless_zero(page))) { /* * Be careful not to clear PageLRU until after we're @@ -1021,7 +1082,8 @@ int __isolate_lru_page(struct page *page, int mode, int file) */ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, struct list_head *src, struct list_head *dst, - unsigned long *scanned, int order, int mode, int file) + unsigned long *scanned, int order, enum ISOLATE_PAGE_MODE mode, + int file) { unsigned long nr_taken = 0; unsigned long nr_lumpy_taken = 0; @@ -1134,8 +1196,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, static unsigned long isolate_pages_global(unsigned long nr, struct list_head *dst, unsigned long *scanned, int order, - int mode, struct zone *z, - int active, int file) + enum ISOLATE_PAGE_MODE mode, + struct zone *z, int active, int file) { int lru = LRU_BASE; if (active) @@ -1196,6 +1258,10 @@ static unsigned long clear_active_flags(struct list_head *page_list, * without a stable reference). * (2) the lru_lock must not be held. * (3) interrupts must be enabled. + * + * NOTE : This function removes the page from LRU list and putback_lru_page + * insert the page to LRU list's head. It means it makes LRU churing so you + * have to use the function carefully. */ int isolate_lru_page(struct page *page) { @@ -1378,6 +1444,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, unsigned long nr_taken; unsigned long nr_anon; unsigned long nr_file; + enum ISOLATE_PAGE_MODE mode = ISOLATE_NONE; while (unlikely(too_many_isolated(zone, file, sc))) { congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -1389,13 +1456,20 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, set_reclaim_mode(priority, sc, false); lru_add_drain(); + + if (!sc->may_unmap) + mode |= ISOLATE_UNMAPPED; + if (!sc->may_writepage) + mode |= ISOLATE_CLEAN; + mode |= sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ? + ISOLATE_BOTH : ISOLATE_INACTIVE; + spin_lock_irq(&zone->lru_lock); + if (scanning_global_lru(sc)) { nr_taken = isolate_pages_global(nr_to_scan, - &page_list, &nr_scanned, sc->order, - sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ? - ISOLATE_BOTH : ISOLATE_INACTIVE, + &page_list, &nr_scanned, sc->order, mode, zone, 0, file); zone->pages_scanned += nr_scanned; if (current_is_kswapd()) @@ -1406,9 +1480,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, nr_scanned); } else { nr_taken = mem_cgroup_isolate_pages(nr_to_scan, - &page_list, &nr_scanned, sc->order, - sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ? - ISOLATE_BOTH : ISOLATE_INACTIVE, + &page_list, &nr_scanned, sc->order, mode, zone, sc->mem_cgroup, 0, file); /* @@ -1512,19 +1584,26 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, struct page *page; struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); unsigned long nr_rotated = 0; + enum ISOLATE_PAGE_MODE mode = ISOLATE_ACTIVE; lru_add_drain(); + + if (!sc->may_unmap) + mode |= ISOLATE_UNMAPPED; + if (!sc->may_writepage) + mode |= ISOLATE_CLEAN; + spin_lock_irq(&zone->lru_lock); if (scanning_global_lru(sc)) { nr_taken = isolate_pages_global(nr_pages, &l_hold, &pgscanned, sc->order, - ISOLATE_ACTIVE, zone, + mode, zone, 1, file); zone->pages_scanned += pgscanned; } else { nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order, - ISOLATE_ACTIVE, zone, + mode, zone, sc->mem_cgroup, 1, file); /* * mem_cgroup_isolate_pages() keeps track of