diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5e9840f..91a1162 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -30,10 +30,13 @@ enum mem_cgroup_page_stat_item { MEMCG_NR_FILE_MAPPED, /* # of pages charged as file rss */ }; +enum ISOLATE_MODE; + extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, - int mode, struct zone *z, + enum ISOLATE_MODE mode, + struct zone *z, struct mem_cgroup *mem_cont, int active, int file); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index e39aeec..3858618 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -9,12 +9,52 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **); #ifdef CONFIG_MIGRATION #define PAGE_MIGRATION 1 +/* + * Migratelist for compaction is singly linked list instead of double linked list. + * Current list utility is useful in some sense but we can't make sure compatibilty. + * Please use below functions instead of common list's ones. + */ +static inline void INIT_MIGRATE_LIST(struct inorder_lru *list) +{ + list->prev_page = NULL; + list->next = list; +} + +static inline int migratelist_empty(const struct inorder_lru *head) +{ + return head->next == head; +} + +static inline void migratelist_add(struct page *page, + struct page *prev_page, struct inorder_lru *head) +{ + VM_BUG_ON(PageLRU(page)); + + page->ilru.prev_page = prev_page; + page->ilru.next = head->next; + head->next = &page->ilru; +} + +static inline void migratelist_del(struct page *page, struct inorder_lru *head) +{ + head->next = page->ilru.next; +} + +#define list_for_each_migrate_entry list_for_each_entry +#define list_for_each_migrate_entry_safe list_for_each_entry_safe + extern void putback_lru_pages(struct list_head *l); +extern void putback_ilru_pages(struct inorder_lru *l); extern int migrate_page(struct address_space *, struct page *, struct page *); extern int migrate_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, bool sync); + +extern int migrate_ilru_pages(struct inorder_lru *l, new_page_t x, + unsigned long private, bool offlining, + bool sync); + extern int migrate_huge_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, bool sync); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 02aa561..af46614 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -24,6 +24,17 @@ struct address_space; #define USE_SPLIT_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) +struct page; + +/* + * The inorder_lru is used by compaction for keeping LRU order + * during migration. + */ +struct inorder_lru { + struct page *prev_page; /* prev LRU page of isolated page */ + struct inorder_lru *next; /* next pointer for singly linked list*/ +}; + /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the @@ -72,9 +83,12 @@ struct page { pgoff_t index; /* Our offset within mapping. */ void *freelist; /* SLUB: freelist req. slab lock */ }; - struct list_head lru; /* Pageout list, eg. active_list + union { + struct inorder_lru ilru;/* compaction: migrated page list */ + struct list_head lru; /* Pageout list, eg. active_list * protected by zone->lru_lock ! */ + }; /* * On machines where all RAM is mapped into kernel address space, * we can simply calculate the virtual address. On machines with diff --git a/include/linux/swap.h b/include/linux/swap.h index a5c6da5..854244a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -226,6 +226,8 @@ extern int lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); extern void deactivate_page(struct page *page); extern void swap_setup(void); +extern void update_page_reclaim_stat(struct zone *zone, struct page *page, + int file, int rotated); extern void add_page_to_unevictable_list(struct page *page); @@ -244,9 +246,13 @@ static inline void lru_cache_add_file(struct page *page) } /* LRU Isolation modes. */ -#define ISOLATE_INACTIVE 0 /* Isolate inactive pages. */ -#define ISOLATE_ACTIVE 1 /* Isolate active pages. */ -#define ISOLATE_BOTH 2 /* Isolate both active and inactive pages. */ +enum ISOLATE_MODE { + ISOLATE_NONE, + ISOLATE_INACTIVE = 1, /* Isolate inactive pages */ + ISOLATE_ACTIVE = 2, /* Isolate active pages */ + ISOLATE_CLEAN = 8, /* Isolate clean file */ + ISOLATE_UNMAPPED = 16, /* Isolate unmapped file */ +}; /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, @@ -258,7 +264,8 @@ extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, unsigned int swappiness, struct zone *zone); -extern int __isolate_lru_page(struct page *page, int mode, int file); +extern int __isolate_lru_page(struct page *page, enum ISOLATE_MODE mode, + int file, struct page **prev_page); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; extern int remove_mapping(struct address_space *mapping, struct page *page); diff --git a/include/trace/events/inorder_putback.h b/include/trace/events/inorder_putback.h new file mode 100644 index 0000000..c615ed8 --- /dev/null +++ b/include/trace/events/inorder_putback.h @@ -0,0 +1,79 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM inorder_putback + +#if !defined(_TRACE_INP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_INP_H + +#include +#include + +TRACE_EVENT(mm_compaction_inorder, + + TP_PROTO(struct page *page, + struct page *newpage), + + TP_ARGS(page, newpage), + + TP_STRUCT__entry( + __field(struct page *, page) + __field(struct page *, newpage) + ), + + TP_fast_assign( + __entry->page = page; + __entry->newpage = newpage; + ), + + TP_printk("pfn=%lu new pfn=%lu", + page_to_pfn(__entry->page), + page_to_pfn(__entry->newpage)) +); + +TRACE_EVENT(mm_compaction_outoforder, + + TP_PROTO(struct page *page, + struct page *newpage), + + TP_ARGS(page, newpage), + + TP_STRUCT__entry( + __field(struct page *, page) + __field(struct page *, newpage) + ), + + TP_fast_assign( + __entry->page = page; + __entry->newpage = newpage; + ), + + TP_printk("pfn=%lu new pfn=%lu", + page_to_pfn(__entry->page), + page_to_pfn(__entry->newpage)) +); + +TRACE_EVENT(mm_compact_isolate, + + TP_PROTO(struct page *prev_page, + struct page *page), + + TP_ARGS(prev_page, page), + + TP_STRUCT__entry( + __field(struct page *, prev_page) + __field(struct page *, page) + ), + + TP_fast_assign( + __entry->prev_page = prev_page; + __entry->page = page; + ), + + TP_printk("pfn=%lu prev_pfn=%lu", + page_to_pfn(__entry->page), + page_to_pfn(__entry->prev_page)) +); + +#endif /* _TRACE_INP_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index ea422aa..4f53d43 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -187,7 +187,7 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + enum ISOLATE_MODE isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode), @@ -199,7 +199,7 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, __field(unsigned long, nr_lumpy_taken) __field(unsigned long, nr_lumpy_dirty) __field(unsigned long, nr_lumpy_failed) - __field(int, isolate_mode) + __field(enum ISOLATE_MODE, isolate_mode) ), TP_fast_assign( @@ -233,7 +233,7 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + enum ISOLATE_MODE isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode) @@ -248,7 +248,7 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + enum ISOLATE_MODE isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode) diff --git a/mm/compaction.c b/mm/compaction.c index 021a296..1041251 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -16,6 +16,7 @@ #include #include "internal.h" +#include #define CREATE_TRACE_POINTS #include @@ -28,17 +29,13 @@ */ struct compact_control { struct list_head freepages; /* List of free pages to migrate to */ - struct list_head migratepages; /* List of pages being migrated */ + struct inorder_lru migratepages;/* List of pages being migrated */ unsigned long nr_freepages; /* Number of isolated free pages */ unsigned long nr_migratepages; /* Number of pages to migrate */ unsigned long free_pfn; /* isolate_freepages search base */ unsigned long migrate_pfn; /* isolate_migratepages search base */ bool sync; /* Synchronous migration */ - /* Account for isolated anon and file pages */ - unsigned long nr_anon; - unsigned long nr_file; - unsigned int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; @@ -212,17 +209,13 @@ static void isolate_freepages(struct zone *zone, static void acct_isolated(struct zone *zone, struct compact_control *cc) { struct page *page; - unsigned int count[NR_LRU_LISTS] = { 0, }; + unsigned int count[2] = { 0, }; - list_for_each_entry(page, &cc->migratepages, lru) { - int lru = page_lru_base_type(page); - count[lru]++; - } + list_for_each_migrate_entry(page, &cc->migratepages, ilru) + count[!!page_is_file_cache(page)]++; - cc->nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; - cc->nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; - __mod_zone_page_state(zone, NR_ISOLATED_ANON, cc->nr_anon); - __mod_zone_page_state(zone, NR_ISOLATED_FILE, cc->nr_file); + __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); + __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); } /* Similar to reclaim, but different enough that they don't share logic */ @@ -250,7 +243,8 @@ static unsigned long isolate_migratepages(struct zone *zone, unsigned long low_pfn, end_pfn; unsigned long last_pageblock_nr = 0, pageblock_nr; unsigned long nr_scanned = 0, nr_isolated = 0; - struct list_head *migratelist = &cc->migratepages; + struct inorder_lru *migratelist = &cc->migratepages; + enum ISOLATE_MODE mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE; /* Do not scan outside zone boundaries */ low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); @@ -280,7 +274,7 @@ static unsigned long isolate_migratepages(struct zone *zone, cond_resched(); spin_lock_irq(&zone->lru_lock); for (; low_pfn < end_pfn; low_pfn++) { - struct page *page; + struct page *page, *prev_page; bool locked = true; /* give a chance to irqs before checking need_resched() */ @@ -334,15 +328,19 @@ static unsigned long isolate_migratepages(struct zone *zone, continue; } + if (!cc->sync) + mode |= ISOLATE_CLEAN; + /* Try isolate the page */ - if (__isolate_lru_page(page, ISOLATE_BOTH, 0) != 0) + if (__isolate_lru_page(page, mode, 0, &prev_page) != 0) continue; + trace_mm_compact_isolate(prev_page, page); VM_BUG_ON(PageTransCompound(page)); /* Successfully isolated */ del_page_from_lru_list(zone, page, page_lru(page)); - list_add(&page->lru, migratelist); + migratelist_add(page, prev_page, migratelist); cc->nr_migratepages++; nr_isolated++; @@ -398,7 +396,7 @@ static void update_nr_listpages(struct compact_control *cc) int nr_freepages = 0; struct page *page; - list_for_each_entry(page, &cc->migratepages, lru) + list_for_each_migrate_entry(page, &cc->migratepages, ilru) nr_migratepages++; list_for_each_entry(page, &cc->freepages, lru) nr_freepages++; @@ -526,7 +524,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) continue; nr_migrate = cc->nr_migratepages; - err = migrate_pages(&cc->migratepages, compaction_alloc, + err = migrate_ilru_pages(&cc->migratepages, + compaction_alloc, (unsigned long)cc, false, cc->sync); update_nr_listpages(cc); @@ -541,7 +540,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) /* Release LRU pages not migrated */ if (err) { - putback_lru_pages(&cc->migratepages); + putback_ilru_pages(&cc->migratepages); cc->nr_migratepages = 0; } @@ -567,7 +566,7 @@ unsigned long compact_zone_order(struct zone *zone, .sync = sync, }; INIT_LIST_HEAD(&cc.freepages); - INIT_LIST_HEAD(&cc.migratepages); + INIT_MIGRATE_LIST(&cc.migratepages); return compact_zone(zone, &cc); } @@ -649,12 +648,12 @@ static int compact_node(int nid) cc.zone = zone; INIT_LIST_HEAD(&cc.freepages); - INIT_LIST_HEAD(&cc.migratepages); + INIT_MIGRATE_LIST(&cc.migratepages); compact_zone(zone, &cc); VM_BUG_ON(!list_empty(&cc.freepages)); - VM_BUG_ON(!list_empty(&cc.migratepages)); + VM_BUG_ON(!migratelist_empty(&cc.migratepages)); } return 0; diff --git a/mm/internal.h b/mm/internal.h index 9d0ced8..a08d8c6 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -42,6 +42,8 @@ extern unsigned long highest_memmap_pfn; /* * in mm/vmscan.c: */ +extern bool same_lru(struct page *page, struct page *prev); +extern void putback_page_to_lru(struct page *page, struct page *head_page); extern int isolate_lru_page(struct page *page); extern void putback_lru_page(struct page *page); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 010f916..04d460d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1106,7 +1106,8 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, - int mode, struct zone *z, + enum ISOLATE_MODE mode, + struct zone *z, struct mem_cgroup *mem_cont, int active, int file) { @@ -1140,7 +1141,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, continue; scan++; - ret = __isolate_lru_page(page, mode, file); + ret = __isolate_lru_page(page, mode, file, NULL); switch (ret) { case 0: list_move(&page->lru, dst); diff --git a/mm/migrate.c b/mm/migrate.c index 34132f8..2a8f713 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -39,6 +39,9 @@ #include "internal.h" +#define CREATE_TRACE_POINTS +#include + #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) /* @@ -68,7 +71,7 @@ int migrate_prep_local(void) } /* - * Add isolated pages on the list back to the LRU under page lock + * Add isolated pages on the list back to the LRU's head under page lock * to avoid leaking evictable pages back onto unevictable list. */ void putback_lru_pages(struct list_head *l) @@ -84,6 +87,32 @@ void putback_lru_pages(struct list_head *l) } } +void putback_ilru_pages(struct inorder_lru *l) +{ + struct zone *zone; + struct page *page, *page2, *prev; + + list_for_each_migrate_entry_safe(page, page2, l, ilru) { + dec_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); + zone = page_zone(page); + spin_lock_irq(&zone->lru_lock); + prev = page->ilru.prev_page; + if (same_lru(page, prev)) { + trace_mm_compaction_inorder(page, page); + putback_page_to_lru(page, prev); + spin_unlock_irq(&zone->lru_lock); + } + else { + trace_mm_compaction_outoforder(page, page); + spin_unlock_irq(&zone->lru_lock); + putback_lru_page(page); + } + + l->next = &page2->ilru; + } +} + /* * Restore a potential migration pte to a working pte entry */ @@ -621,38 +650,18 @@ static int move_to_new_page(struct page *newpage, struct page *page, return rc; } -/* - * Obtain the lock on page, remove all ptes and migrate the page - * to the newly allocated page in newpage. - */ -static int unmap_and_move(new_page_t get_new_page, unsigned long private, - struct page *page, int force, bool offlining, bool sync) +static int __unmap_and_move(struct page *page, struct page *newpage, + int force, bool offlining, bool sync) { - int rc = 0; - int *result = NULL; - struct page *newpage = get_new_page(page, private, &result); + int rc = -EAGAIN; int remap_swapcache = 1; int charge = 0; struct mem_cgroup *mem; struct anon_vma *anon_vma = NULL; - if (!newpage) - return -ENOMEM; - - if (page_count(page) == 1) { - /* page was freed from under us. So we are done. */ - goto move_newpage; - } - if (unlikely(PageTransHuge(page))) - if (unlikely(split_huge_page(page))) - goto move_newpage; - - /* prepare cgroup just returns 0 or -ENOMEM */ - rc = -EAGAIN; - if (!trylock_page(page)) { if (!force || !sync) - goto move_newpage; + goto out; /* * It's not safe for direct compaction to call lock_page. @@ -668,7 +677,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, * altogether. */ if (current->flags & PF_MEMALLOC) - goto move_newpage; + goto out; lock_page(page); } @@ -789,9 +798,13 @@ uncharge: mem_cgroup_end_migration(mem, page, newpage, rc == 0); unlock: unlock_page(page); +out: + return rc; +} -move_newpage: - if (rc != -EAGAIN) { +static void __put_lru_pages(struct page *page, struct page *newpage) +{ + if (page != NULL) { /* * A page that has been migrated has all references * removed and will be freed. A page that has not been @@ -803,13 +816,144 @@ move_newpage: page_is_file_cache(page)); putback_lru_page(page); } - /* * Move the new page to the LRU. If migration was not successful * then this will free the page. */ putback_lru_page(newpage); +} + +/* + * Obtain the lock on page, remove all ptes and migrate the page + * to the newly allocated page in newpage. + */ +static int unmap_and_move(new_page_t get_new_page, unsigned long private, + struct page *page, int force, bool offlining, bool sync) +{ + int rc = 0; + int *result = NULL; + struct page *newpage = get_new_page(page, private, &result); + + if (!newpage) + return -ENOMEM; + + if (page_count(page) == 1) { + /* page was freed from under us. So we are done. */ + goto out; + } + + if (unlikely(PageTransHuge(page))) + if (unlikely(split_huge_page(page))) + goto out; + + rc = __unmap_and_move(page, newpage, force, offlining, sync); + if (rc == -EAGAIN) + page = NULL; +out: + __put_lru_pages(page, newpage); + if (result) { + if (rc) + *result = rc; + else + *result = page_to_nid(newpage); + } + return rc; +} + +static inline void adjust_ilru_prev_page(struct inorder_lru *head, + struct page *prev_page, struct page *new_page) +{ + struct page *page; + list_for_each_migrate_entry(page, head, ilru) + if (page->ilru.prev_page == prev_page) + page->ilru.prev_page = new_page; +} + +void __put_ilru_pages(struct page *page, struct page *newpage, + struct inorder_lru **prev_lru, struct inorder_lru *head) +{ + struct zone *zone; + bool del = false; + struct page *prev_page = page->ilru.prev_page; + if (page != NULL) { + /* + * A page that has been migrated has all references + * removed and will be freed. A page that has not been + * migrated will have kepts its references and be + * restored. + */ + migratelist_del(page, *prev_lru); + dec_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); + /* + * Unlike unmap_and_move, we defer putback_lru_page + * after inorder-lru list handling. If we call it, + * the page would be freed then and it doesn't have PG_lru. + * So same_lru doesn't work correctly. + */ + del = true; + } + else + *prev_lru = &page->ilru; + /* + * Move the new page to the LRU. If migration was not successful + * then this will free the page. + */ + zone = page_zone(newpage); + spin_lock_irq(&zone->lru_lock); + if (page && same_lru(page, prev_page)) { + putback_page_to_lru(newpage, prev_page); + spin_unlock_irq(&zone->lru_lock); + trace_mm_compaction_inorder(page, newpage); + /* + * The newpage will replace LRU position of old page and + * old one would be freed. So let's adjust prev_page of pages + * remained in migratelist for same_lru wokring. + */ + adjust_ilru_prev_page(head, page, newpage); + put_page(newpage); /* drop ref from isolate */ + } + else { + spin_unlock_irq(&zone->lru_lock); + trace_mm_compaction_inorder(page, newpage); + putback_lru_page(newpage); + } + + if (del) + putback_lru_page(page); +} + +/* + * Counterpart of unmap_and_move() for compaction. + * The logic is almost same with unmap_and_move. The difference is + * this function handles prev_lru. For inorder-lru compaction, we use + * singly linked list so we need prev pointer handling to delete entry. + */ +static int unmap_and_move_ilru(new_page_t get_new_page, unsigned long private, + struct page *page, int force, bool offlining, bool sync, + struct inorder_lru **prev_lru, struct inorder_lru *head) +{ + int rc = 0; + int *result = NULL; + struct page *newpage = get_new_page(page, private, &result); + + if (!newpage) + return -ENOMEM; + + if (page_count(page) == 1) { + /* page was freed from under us. So we are done. */ + goto out; + } + + if (unlikely(PageTransHuge(page))) + if (unlikely(split_huge_page(page))) + goto out; + rc = __unmap_and_move(page, newpage, force, offlining, sync); + if (rc == -EAGAIN) + page = NULL; +out: + __put_ilru_pages(page, newpage, prev_lru, head); if (result) { if (rc) *result = rc; @@ -817,6 +961,7 @@ move_newpage: *result = page_to_nid(newpage); } return rc; + } /* @@ -960,6 +1105,56 @@ out: return nr_failed + retry; } +int migrate_ilru_pages(struct inorder_lru *head, new_page_t get_new_page, + unsigned long private, bool offlining, bool sync) +{ + int retry = 1; + int nr_failed = 0; + int pass = 0; + struct page *page, *page2; + struct inorder_lru *prev; + int swapwrite = current->flags & PF_SWAPWRITE; + int rc; + + if (!swapwrite) + current->flags |= PF_SWAPWRITE; + + for(pass = 0; pass < 10 && retry; pass++) { + retry = 0; + list_for_each_migrate_entry_safe(page, page2, head, ilru) { + cond_resched(); + + prev = head; + rc = unmap_and_move_ilru(get_new_page, private, + page, pass > 2, offlining, + sync, &prev, head); + + switch(rc) { + case -ENOMEM: + goto out; + case -EAGAIN: + retry++; + break; + case 0: + break; + default: + /* Permanent failure */ + nr_failed++; + break; + } + } + } + rc = 0; +out: + if (!swapwrite) + current->flags &= ~PF_SWAPWRITE; + + if (rc) + return rc; + + return nr_failed + retry; +} + int migrate_huge_pages(struct list_head *from, new_page_t get_new_page, unsigned long private, bool offlining, bool sync) @@ -1363,10 +1558,10 @@ int migrate_vmas(struct mm_struct *mm, const nodemask_t *to, for (vma = mm->mmap; vma && !err; vma = vma->vm_next) { if (vma->vm_ops && vma->vm_ops->migrate) { - err = vma->vm_ops->migrate(vma, to, from, flags); + err = vma->vm_ops->migrate(vma, to, from, flags); if (err) break; - } + } } return err; } diff --git a/mm/swap.c b/mm/swap.c index 5602f1a..6c24a75 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -252,7 +252,7 @@ void rotate_reclaimable_page(struct page *page) } } -static void update_page_reclaim_stat(struct zone *zone, struct page *page, +void update_page_reclaim_stat(struct zone *zone, struct page *page, int file, int rotated) { struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat; diff --git a/mm/vmscan.c b/mm/vmscan.c index 8bfd450..5af1ba0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -49,10 +49,9 @@ #include #include "internal.h" - +#include #define CREATE_TRACE_POINTS #include - /* * reclaim_mode determines how the inactive list is shrunk * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages @@ -550,11 +549,61 @@ int remove_mapping(struct address_space *mapping, struct page *page) return 0; } +/* + * If prev and page are on same LRU still, we can keep LRU order of page. + * zone->lru_lock must be hold. + */ +bool same_lru(struct page *page, struct page *prev) +{ + bool ret = false; + if (!prev || !PageLRU(prev)) + goto out; + + if (unlikely(PageUnevictable(prev))) + goto out; + + if (page_lru_base_type(page) != page_lru_base_type(prev)) + goto out; + + ret = true; +out: + return ret; +} + +/** + * putback_page_to_lru - put isolated @page onto @head + * @page: page to be put back to appropriate lru list + * @head_page: lru position to be put back + * + * Insert previously isolated @page to appropriate position of lru list + * zone->lru_lock must be hold. + */ +void putback_page_to_lru(struct page *page, struct page *head_page) +{ + int lru, active, file; + struct zone *zone = page_zone(page); + + VM_BUG_ON(PageLRU(page)); + + lru = page_lru(head_page); + active = is_active_lru(lru); + file = is_file_lru(lru); + + if (active) + SetPageActive(page); + else + ClearPageActive(page); + + update_page_reclaim_stat(zone, page, file, active); + SetPageLRU(page); + __add_page_to_lru_list(zone, page, lru, &head_page->lru); +} + /** - * putback_lru_page - put previously isolated page onto appropriate LRU list + * putback_lru_page - put previously isolated page onto appropriate LRU list's head * @page: page to be put back to appropriate lru list * - * Add previously isolated @page to appropriate LRU list. + * Add previously isolated @page to appropriate LRU list's head * Page may still be unevictable for other reasons. * * lru_lock must not be held, interrupts must be enabled. @@ -954,26 +1003,33 @@ keep_lumpy: * * page: page to consider * mode: one of the LRU isolation modes defined above + * file: True [1] if isolating file [!anon] pages + * prev_page: prev page of isolated page as LRU order * * returns 0 on success, -ve errno on failure. */ -int __isolate_lru_page(struct page *page, int mode, int file) +int __isolate_lru_page(struct page *page, enum ISOLATE_MODE mode, + int file, struct page **prev_page) { + bool all_lru_mode; int ret = -EINVAL; /* Only take pages on the LRU. */ if (!PageLRU(page)) return ret; + all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) == + (ISOLATE_ACTIVE|ISOLATE_INACTIVE); + /* * When checking the active state, we need to be sure we are * dealing with comparible boolean values. Take the logical not * of each. */ - if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode)) + if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE)) return ret; - if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file) + if (!all_lru_mode && !!page_is_file_cache(page) != file) return ret; /* @@ -986,6 +1042,12 @@ int __isolate_lru_page(struct page *page, int mode, int file) ret = -EBUSY; + if (mode & ISOLATE_CLEAN && (PageDirty(page) || PageWriteback(page))) + return ret; + + if (mode & ISOLATE_UNMAPPED && page_mapped(page)) + return ret; + if (likely(get_page_unless_zero(page))) { /* * Be careful not to clear PageLRU until after we're @@ -993,6 +1055,18 @@ int __isolate_lru_page(struct page *page, int mode, int file) * page release code relies on it. */ ClearPageLRU(page); + if (prev_page != NULL) { + struct zone *zone = page_zone(page); + enum lru_list l = page_lru(page); + + if (&zone->lru[l].list == page->lru.prev) { + *prev_page = NULL; + goto out; + } + + *prev_page = lru_to_page(&page->lru); + } +out: ret = 0; } @@ -1021,7 +1095,8 @@ int __isolate_lru_page(struct page *page, int mode, int file) */ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, struct list_head *src, struct list_head *dst, - unsigned long *scanned, int order, int mode, int file) + unsigned long *scanned, int order, enum ISOLATE_MODE mode, + int file) { unsigned long nr_taken = 0; unsigned long nr_lumpy_taken = 0; @@ -1041,7 +1116,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, VM_BUG_ON(!PageLRU(page)); - switch (__isolate_lru_page(page, mode, file)) { + switch (__isolate_lru_page(page, mode, file, NULL)) { case 0: list_move(&page->lru, dst); mem_cgroup_del_lru(page); @@ -1100,7 +1175,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, !PageSwapCache(cursor_page)) break; - if (__isolate_lru_page(cursor_page, mode, file) == 0) { + if (__isolate_lru_page(cursor_page, + mode, file, NULL) == 0) { list_move(&cursor_page->lru, dst); mem_cgroup_del_lru(cursor_page); nr_taken += hpage_nr_pages(page); @@ -1134,8 +1210,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, static unsigned long isolate_pages_global(unsigned long nr, struct list_head *dst, unsigned long *scanned, int order, - int mode, struct zone *z, - int active, int file) + enum ISOLATE_MODE mode, + struct zone *z, int active, int file) { int lru = LRU_BASE; if (active) @@ -1196,6 +1272,10 @@ static unsigned long clear_active_flags(struct list_head *page_list, * without a stable reference). * (2) the lru_lock must not be held. * (3) interrupts must be enabled. + * + * NOTE : This function removes the page from LRU list and putback_lru_page + * insert the page to LRU list's head. It means it makes LRU churing so you + * have to use the function carefully. */ int isolate_lru_page(struct page *page) { @@ -1378,6 +1458,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, unsigned long nr_taken; unsigned long nr_anon; unsigned long nr_file; + enum ISOLATE_MODE reclaim_mode = ISOLATE_INACTIVE; while (unlikely(too_many_isolated(zone, file, sc))) { congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -1388,15 +1469,21 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, } set_reclaim_mode(priority, sc, false); + if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM) + reclaim_mode |= ISOLATE_ACTIVE; + lru_add_drain(); + + if (!sc->may_unmap) + reclaim_mode |= ISOLATE_UNMAPPED; + if (!sc->may_writepage) + reclaim_mode |= ISOLATE_CLEAN; + spin_lock_irq(&zone->lru_lock); if (scanning_global_lru(sc)) { - nr_taken = isolate_pages_global(nr_to_scan, - &page_list, &nr_scanned, sc->order, - sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ? - ISOLATE_BOTH : ISOLATE_INACTIVE, - zone, 0, file); + nr_taken = isolate_pages_global(nr_to_scan, &page_list, + &nr_scanned, sc->order, reclaim_mode, zone, 0, file); zone->pages_scanned += nr_scanned; if (current_is_kswapd()) __count_zone_vm_events(PGSCAN_KSWAPD, zone, @@ -1405,12 +1492,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scanned); } else { - nr_taken = mem_cgroup_isolate_pages(nr_to_scan, - &page_list, &nr_scanned, sc->order, - sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ? - ISOLATE_BOTH : ISOLATE_INACTIVE, - zone, sc->mem_cgroup, - 0, file); + nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list, + &nr_scanned, sc->order, reclaim_mode, zone, + sc->mem_cgroup, 0, file); /* * mem_cgroup_isolate_pages() keeps track of * scanned pages on its own. @@ -1512,19 +1596,26 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, struct page *page; struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); unsigned long nr_rotated = 0; + enum ISOLATE_MODE reclaim_mode = ISOLATE_ACTIVE; lru_add_drain(); + + if (!sc->may_unmap) + reclaim_mode |= ISOLATE_UNMAPPED; + if (!sc->may_writepage) + reclaim_mode |= ISOLATE_CLEAN; + spin_lock_irq(&zone->lru_lock); if (scanning_global_lru(sc)) { nr_taken = isolate_pages_global(nr_pages, &l_hold, &pgscanned, sc->order, - ISOLATE_ACTIVE, zone, + reclaim_mode, zone, 1, file); zone->pages_scanned += pgscanned; } else { nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order, - ISOLATE_ACTIVE, zone, + reclaim_mode, zone, sc->mem_cgroup, 1, file); /* * mem_cgroup_isolate_pages() keeps track of