diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl index 12cecc8..4a37c47 100644 --- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl +++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl @@ -379,10 +379,10 @@ EVENT_PROCESS: # To closer match vmstat scanning statistics, only count isolate_both # and isolate_inactive as scanning. isolate_active is rotation - # isolate_inactive == 0 - # isolate_active == 1 - # isolate_both == 2 - if ($isolate_mode != 1) { + # isolate_inactive == 1 + # isolate_active == 2 + # isolate_both == 3 + if ($isolate_mode != 2) { $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned; } $perprocesspid{$process_pid}->{HIGH_NR_CONTIG_DIRTY} += $nr_contig_dirty; diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 970f32c..815d16e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -35,7 +35,8 @@ enum mem_cgroup_page_stat_item { extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, - int mode, struct zone *z, + isolate_mode_t mode, + struct zone *z, struct mem_cgroup *mem_cont, int active, int file); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index e39aeec..62724e1 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -9,12 +9,99 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **); #ifdef CONFIG_MIGRATION #define PAGE_MIGRATION 1 +/* How to work inorder_lru + * Assumption : we isolate pages P3-P7 and we consider only prev LRU pointer. + * Notation : (P3,P2) = (isolated page, previous LRU page of isolated page) + * + * H - P1 - P2 - P3 - P4 - P5 - P6 - P7 - P8 - P9 - P10 - T + * + * If we isolate P3, + * + * H - P1 - P2 - P4 - P5 - P6 - P7 - P8 - P9 - P10 - T + * Isolated page list - (P3,P2) + * + * If we isolate P4, + * + * H - P1 - P2 - P5 - P6 - P7 - P8 - P9 - P10 - T + * Isolated page list - (P4,P2) - (P3,P2) + * + * If we isolate P5, + * + * H - P1 - P2 - P6 - P7 - P8 - P9 - P10 - T + * Isolated page list - (P5,P2) - (P4,P2) - (P3,P2) + * + * .. + * + * If we isolate P7, following as + * H - P1 - P2 - P8 - P9 - P10 - T + * Isolated page list - (P7,P2) - (P6,P2) - (P5,P2) - (P4,P2) - (P3,P2) + * + * Let's start putback from P7 + * + * P7. + * H - P1 - P2 - P8 - P9 - P10 - T + * prev P2 is on still LRU so P7 would be located at P2's next. + * H - P1 - P2 - P7 - P8 - P9 - P10 - T + * + * P6. + * H - P1 - P2 - P7 - P8 - P9 - P10 - T + * prev P2 is on still LRU so P6 would be located at P2's next. + * H - P1 - P2 - P6 - P7 - P8 - P9 - P10 - T + * + * P5. + * .. + * + * P3. + * H - P1 - P2 - P4 - P5 - P6 - P7 - P8 - P9 - P10 - T + * prev P2 is on still LRU so P3 would be located at P2's next. + * H - P1 - P2 - P3 - P4 - P5 - P6 - P7 - P8 - P9 - P10 - T + */ + +/* + * ilru_list is singly linked list and used for compaction + * for keeping LRU ordering. + */ +static inline void INIT_ILRU_LIST(struct inorder_lru *list) +{ + list->prev_page = NULL; + list->next = list; +} + +static inline int ilru_list_empty(const struct inorder_lru *head) +{ + return head->next == head; +} + +static inline void ilru_list_add(struct page *page, struct page *prev_page, + struct inorder_lru *head) +{ + VM_BUG_ON(PageLRU(page)); + + page->ilru.prev_page = prev_page; + page->ilru.next = head->next; + head->next = &page->ilru; +} + +static inline void ilru_list_del(struct page *page, struct inorder_lru *head) +{ + head->next = page->ilru.next; +} + +#define list_for_each_ilru_entry list_for_each_entry +#define list_for_each_ilru_entry_safe list_for_each_entry_safe + +extern void putback_ilru_pages(struct inorder_lru *l); extern void putback_lru_pages(struct list_head *l); extern int migrate_page(struct address_space *, struct page *, struct page *); extern int migrate_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, bool sync); + +extern int migrate_ilru_pages(struct inorder_lru *l, new_page_t x, + unsigned long private, bool offlining, + bool sync); + extern int migrate_huge_pages(struct list_head *l, new_page_t x, unsigned long private, bool offlining, bool sync); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 027935c..db192c7 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -24,6 +24,23 @@ struct address_space; #define USE_SPLIT_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) +struct page; + +/* + * The inorder_lru is used by compaction for keeping LRU order + * during migration. + */ +struct inorder_lru { + /* prev LRU page of isolated page */ + struct page *prev_page; + union { + /* next for singly linked list*/ + struct inorder_lru *next; + /* the source page of migration */ + struct page *old_page; + }; +}; + /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the @@ -72,9 +89,12 @@ struct page { pgoff_t index; /* Our offset within mapping. */ void *freelist; /* SLUB: freelist req. slab lock */ }; - struct list_head lru; /* Pageout list, eg. active_list + union { + struct inorder_lru ilru;/* compaction: migrated page list */ + struct list_head lru; /* Pageout list, eg. active_list * protected by zone->lru_lock ! */ + }; /* * On machines where all RAM is mapped into kernel address space, * we can simply calculate the virtual address. On machines with diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 795ec6c..1d1791f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -158,6 +158,18 @@ static inline int is_unevictable_lru(enum lru_list l) return (l == LRU_UNEVICTABLE); } +/* Isolate inactive pages */ +#define ISOLATE_INACTIVE ((__force fmode_t)0x1) +/* Isolate active pages */ +#define ISOLATE_ACTIVE ((__force fmode_t)0x2) +/* Isolate clean file */ +#define ISOLATE_CLEAN ((__force fmode_t)0x4) +/* Isolate unmapped file */ +#define ISOLATE_UNMAPPED ((__force fmode_t)0x8) + +/* LRU Isolation modes. */ +typedef unsigned __bitwise__ isolate_mode_t; + enum zone_watermarks { WMARK_MIN, WMARK_LOW, diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index bab82f4..8f609ea 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -23,6 +23,7 @@ struct pagevec { void __pagevec_release(struct pagevec *pvec); void __pagevec_free(struct pagevec *pvec); void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru); +void ____pagevec_ilru_add(struct pagevec *pvec, enum lru_list lru); void pagevec_strip(struct pagevec *pvec); unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, pgoff_t start, unsigned nr_pages); diff --git a/include/linux/swap.h b/include/linux/swap.h index 808690a..6aafb75 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -217,7 +217,11 @@ extern unsigned int nr_free_pagecache_pages(void); /* linux/mm/swap.c */ +extern void drain_ilru_pagevecs(int cpu); +extern void __ilru_cache_add(struct page *, enum lru_list lru); +extern void __ilru_cache_add(struct page *, enum lru_list lru); extern void __lru_cache_add(struct page *, enum lru_list lru); +extern void lru_cache_add_ilru(struct page *, enum lru_list lru); extern void lru_cache_add_lru(struct page *, enum lru_list lru); extern void lru_add_page_tail(struct zone* zone, struct page *page, struct page *page_tail); @@ -228,6 +232,8 @@ extern int lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); extern void deactivate_page(struct page *page); extern void swap_setup(void); +extern void update_page_reclaim_stat(struct zone *zone, struct page *page, + int file, int rotated); extern void add_page_to_unevictable_list(struct page *page); @@ -245,11 +251,6 @@ static inline void lru_cache_add_file(struct page *page) __lru_cache_add(page, LRU_INACTIVE_FILE); } -/* LRU Isolation modes. */ -#define ISOLATE_INACTIVE 0 /* Isolate inactive pages. */ -#define ISOLATE_ACTIVE 1 /* Isolate active pages. */ -#define ISOLATE_BOTH 2 /* Isolate both active and inactive pages. */ - /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); @@ -261,7 +262,9 @@ extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, unsigned int swappiness, struct zone *zone, unsigned long *nr_scanned); -extern int __isolate_lru_page(struct page *page, int mode, int file); +extern int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file); +extern int isolate_ilru_page(struct page *page, isolate_mode_t mode, int file, + struct page **prev_page); extern unsigned long shrink_all_memory(unsigned long nr_pages); extern int vm_swappiness; extern int remove_mapping(struct address_space *mapping, struct page *page); diff --git a/include/trace/events/inorder_putback.h b/include/trace/events/inorder_putback.h new file mode 100644 index 0000000..fe81742 --- /dev/null +++ b/include/trace/events/inorder_putback.h @@ -0,0 +1,88 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM inorder_putback + +#if !defined(_TRACE_INP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_INP_H + +#include +#include + +TRACE_EVENT(mm_inorder_inorder, + + TP_PROTO(struct page *page, + struct page *old_page, + struct page *prev_page), + + TP_ARGS(page, old_page, prev_page), + + TP_STRUCT__entry( + __field(struct page *, page) + __field(struct page *, old_page) + __field(struct page *, prev_page) + ), + + TP_fast_assign( + __entry->page = page; + __entry->old_page = old_page; + __entry->prev_page = prev_page; + ), + + TP_printk("pfn=%lu old pfn=%lu prev_pfn=%lu active=%d", + page_to_pfn(__entry->page), + page_to_pfn(__entry->old_page), + page_to_pfn(__entry->prev_page), + PageActive(__entry->prev_page)) +); + +TRACE_EVENT(mm_inorder_outoforder, + TP_PROTO(struct page *page, + struct page *old_page, + struct page *prev_page), + + TP_ARGS(page, old_page, prev_page), + + TP_STRUCT__entry( + __field(struct page *, page) + __field(struct page *, old_page) + __field(struct page *, prev_page) + ), + + TP_fast_assign( + __entry->page = page; + __entry->old_page = old_page; + __entry->prev_page = prev_page; + ), + + TP_printk("pfn=%lu old pfn=%lu prev_pfn=%lu active=%d", + page_to_pfn(__entry->page), + page_to_pfn(__entry->old_page), + __entry->prev_page ? page_to_pfn(__entry->prev_page) : 0, + __entry->prev_page ? PageActive(__entry->prev_page) : 0) +); + +TRACE_EVENT(mm_inorder_isolate, + + TP_PROTO(struct page *prev_page, + struct page *page), + + TP_ARGS(prev_page, page), + + TP_STRUCT__entry( + __field(struct page *, prev_page) + __field(struct page *, page) + ), + + TP_fast_assign( + __entry->prev_page = prev_page; + __entry->page = page; + ), + + TP_printk("prev_pfn=%lu pfn=%lu active=%d", + page_to_pfn(__entry->prev_page), + page_to_pfn(__entry->page), PageActive(__entry->prev_page)) +); + +#endif /* _TRACE_INP_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index b2c33bd..04203b8 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -189,7 +189,7 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + isolate_mode_t isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode), @@ -201,7 +201,7 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, __field(unsigned long, nr_lumpy_taken) __field(unsigned long, nr_lumpy_dirty) __field(unsigned long, nr_lumpy_failed) - __field(int, isolate_mode) + __field(isolate_mode_t, isolate_mode) ), TP_fast_assign( @@ -235,7 +235,7 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + isolate_mode_t isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode) @@ -250,7 +250,7 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate, unsigned long nr_lumpy_taken, unsigned long nr_lumpy_dirty, unsigned long nr_lumpy_failed, - int isolate_mode), + isolate_mode_t isolate_mode), TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode) diff --git a/mm/compaction.c b/mm/compaction.c index 6cc604b..a515639 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -28,17 +28,13 @@ */ struct compact_control { struct list_head freepages; /* List of free pages to migrate to */ - struct list_head migratepages; /* List of pages being migrated */ + struct inorder_lru migratepages;/* List of pages being migrated */ unsigned long nr_freepages; /* Number of isolated free pages */ unsigned long nr_migratepages; /* Number of pages to migrate */ unsigned long free_pfn; /* isolate_freepages search base */ unsigned long migrate_pfn; /* isolate_migratepages search base */ bool sync; /* Synchronous migration */ - /* Account for isolated anon and file pages */ - unsigned long nr_anon; - unsigned long nr_file; - unsigned int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; @@ -223,17 +219,13 @@ static void isolate_freepages(struct zone *zone, static void acct_isolated(struct zone *zone, struct compact_control *cc) { struct page *page; - unsigned int count[NR_LRU_LISTS] = { 0, }; + unsigned int count[2] = { 0, }; - list_for_each_entry(page, &cc->migratepages, lru) { - int lru = page_lru_base_type(page); - count[lru]++; - } + list_for_each_ilru_entry(page, &cc->migratepages, ilru) + count[!!page_is_file_cache(page)]++; - cc->nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; - cc->nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; - __mod_zone_page_state(zone, NR_ISOLATED_ANON, cc->nr_anon); - __mod_zone_page_state(zone, NR_ISOLATED_FILE, cc->nr_file); + __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); + __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); } /* Similar to reclaim, but different enough that they don't share logic */ @@ -268,7 +260,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, unsigned long low_pfn, end_pfn; unsigned long last_pageblock_nr = 0, pageblock_nr; unsigned long nr_scanned = 0, nr_isolated = 0; - struct list_head *migratelist = &cc->migratepages; + struct inorder_lru *migratelist = &cc->migratepages; + isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE; /* Do not scan outside zone boundaries */ low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); @@ -302,7 +295,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, cond_resched(); spin_lock_irq(&zone->lru_lock); for (; low_pfn < end_pfn; low_pfn++) { - struct page *page; + struct page *page, *prev_page; bool locked = true; /* give a chance to irqs before checking need_resched() */ @@ -356,15 +349,18 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, continue; } + if (!cc->sync) + mode |= ISOLATE_CLEAN; + /* Try isolate the page */ - if (__isolate_lru_page(page, ISOLATE_BOTH, 0) != 0) + if (isolate_ilru_page(page, mode, 0, &prev_page) != 0) continue; VM_BUG_ON(PageTransCompound(page)); /* Successfully isolated */ del_page_from_lru_list(zone, page, page_lru(page)); - list_add(&page->lru, migratelist); + ilru_list_add(page, prev_page, migratelist); cc->nr_migratepages++; nr_isolated++; @@ -420,7 +416,7 @@ static void update_nr_listpages(struct compact_control *cc) int nr_freepages = 0; struct page *page; - list_for_each_entry(page, &cc->migratepages, lru) + list_for_each_ilru_entry(page, &cc->migratepages, ilru) nr_migratepages++; list_for_each_entry(page, &cc->freepages, lru) nr_freepages++; @@ -557,7 +553,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) } nr_migrate = cc->nr_migratepages; - err = migrate_pages(&cc->migratepages, compaction_alloc, + err = migrate_ilru_pages(&cc->migratepages, + compaction_alloc, (unsigned long)cc, false, cc->sync); update_nr_listpages(cc); @@ -572,10 +569,12 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) /* Release LRU pages not migrated */ if (err) { - putback_lru_pages(&cc->migratepages); + putback_ilru_pages(&cc->migratepages); cc->nr_migratepages = 0; } + drain_ilru_pagevecs(get_cpu()); + put_cpu(); } out: @@ -599,7 +598,7 @@ unsigned long compact_zone_order(struct zone *zone, .sync = sync, }; INIT_LIST_HEAD(&cc.freepages); - INIT_LIST_HEAD(&cc.migratepages); + INIT_ILRU_LIST(&cc.migratepages); return compact_zone(zone, &cc); } @@ -681,12 +680,12 @@ static int compact_node(int nid) cc.zone = zone; INIT_LIST_HEAD(&cc.freepages); - INIT_LIST_HEAD(&cc.migratepages); + INIT_ILRU_LIST(&cc.migratepages); compact_zone(zone, &cc); VM_BUG_ON(!list_empty(&cc.freepages)); - VM_BUG_ON(!list_empty(&cc.migratepages)); + VM_BUG_ON(!ilru_list_empty(&cc.migratepages)); } return 0; diff --git a/mm/internal.h b/mm/internal.h index d071d38..cb969e0 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -43,6 +43,7 @@ extern unsigned long highest_memmap_pfn; * in mm/vmscan.c: */ extern int isolate_lru_page(struct page *page); +extern void putback_ilru_page(struct page *page); extern void putback_lru_page(struct page *page); /* diff --git a/mm/memcontrol.c b/mm/memcontrol.c index aaff06f..5b544f0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1251,7 +1251,8 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, - int mode, struct zone *z, + isolate_mode_t mode, + struct zone *z, struct mem_cgroup *mem_cont, int active, int file) { diff --git a/mm/migrate.c b/mm/migrate.c index 666e4e6..1267c45 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -39,6 +39,9 @@ #include "internal.h" +#define CREATE_TRACE_POINTS +#include + #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) /* @@ -84,6 +87,17 @@ void putback_lru_pages(struct list_head *l) } } +void putback_ilru_pages(struct inorder_lru *l) +{ + struct page *page, *page2; + list_for_each_ilru_entry_safe(page, page2, l, ilru) { + ilru_list_del(page, l); + dec_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); + page->ilru.old_page = page; + putback_ilru_page(page); + } +} /* * Restore a potential migration pte to a working pte entry */ @@ -621,38 +635,18 @@ static int move_to_new_page(struct page *newpage, struct page *page, return rc; } -/* - * Obtain the lock on page, remove all ptes and migrate the page - * to the newly allocated page in newpage. - */ -static int unmap_and_move(new_page_t get_new_page, unsigned long private, - struct page *page, int force, bool offlining, bool sync) +static int __unmap_and_move(struct page *page, struct page *newpage, + int force, bool offlining, bool sync) { - int rc = 0; - int *result = NULL; - struct page *newpage = get_new_page(page, private, &result); + int rc = -EAGAIN; int remap_swapcache = 1; int charge = 0; struct mem_cgroup *mem; struct anon_vma *anon_vma = NULL; - if (!newpage) - return -ENOMEM; - - if (page_count(page) == 1) { - /* page was freed from under us. So we are done. */ - goto move_newpage; - } - if (unlikely(PageTransHuge(page))) - if (unlikely(split_huge_page(page))) - goto move_newpage; - - /* prepare cgroup just returns 0 or -ENOMEM */ - rc = -EAGAIN; - if (!trylock_page(page)) { if (!force || !sync) - goto move_newpage; + goto out; /* * It's not safe for direct compaction to call lock_page. @@ -668,7 +662,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, * altogether. */ if (current->flags & PF_MEMALLOC) - goto move_newpage; + goto out; lock_page(page); } @@ -785,26 +779,124 @@ uncharge: mem_cgroup_end_migration(mem, page, newpage, rc == 0); unlock: unlock_page(page); +out: + return rc; +} -move_newpage: +/* + * Obtain the lock on page, remove all ptes and migrate the page + * to the newly allocated page in newpage. + */ +static int unmap_and_move(new_page_t get_new_page, unsigned long private, + struct page *page, int force, bool offlining, bool sync) +{ + int rc = 0; + int *result = NULL; + struct page *newpage = get_new_page(page, private, &result); + + if (!newpage) + return -ENOMEM; + + if (page_count(page) == 1) { + /* page was freed from under us. So we are done. */ + goto out; + } + + if (unlikely(PageTransHuge(page))) + if (unlikely(split_huge_page(page))) + goto out; + + rc = __unmap_and_move(page, newpage, force, offlining, sync); +out: if (rc != -EAGAIN) { - /* - * A page that has been migrated has all references - * removed and will be freed. A page that has not been - * migrated will have kepts its references and be - * restored. - */ - list_del(&page->lru); + /* + * A page that has been migrated has all references + * removed and will be freed. A page that has not been + * migrated will have kepts its references and be + * restored. + */ + list_del(&page->lru); dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); putback_lru_page(page); } - /* * Move the new page to the LRU. If migration was not successful * then this will free the page. */ putback_lru_page(newpage); + if (result) { + if (rc) + *result = rc; + else + *result = page_to_nid(newpage); + } + return rc; +} + +void __put_ilru_pages(struct page *page, struct page *newpage, + struct inorder_lru *prev_lru) +{ + struct page *prev_page; + prev_page = page->ilru.prev_page; + + newpage->ilru.prev_page = prev_page; + /* + * We need keeping old page which is the source page + * of migration for adjusting prev_page of pages in pagevec. + * Look at adjust_ilru_list. + */ + newpage->ilru.old_page = page; + /* + * A page that has been migrated has all references + * removed and will be freed. A page that has not been + * migrated will have kepts its references and be + * restored. + */ + ilru_list_del(page, prev_lru); + dec_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); + putback_lru_page(page); + + /* + * Move the new page to the LRU. If migration was not successful + * then this will free the page. + */ + putback_ilru_page(newpage); +} + +/* + * Counterpart of unmap_and_move() for compaction. + * The logic is almost same with unmap_and_move. The difference is that + * this function handles inorder_lru for locating new page into old pages's + * LRU position. + */ +static int unmap_and_move_ilru(new_page_t get_new_page, unsigned long private, + struct page *page, int force, bool offlining, bool sync, + struct inorder_lru *prev_lru) +{ + int rc = 0; + int *result = NULL; + struct page *newpage = get_new_page(page, private, &result); + + if (!newpage) + return -ENOMEM; + + if (page_count(page) == 1) { + /* page was freed from under us. So we are done. */ + goto out; + } + + if (unlikely(PageTransHuge(page))) + if (unlikely(split_huge_page(page))) + goto out; + + rc = __unmap_and_move(page, newpage, force, offlining, sync); +out: + if (rc != -EAGAIN) + __put_ilru_pages(page, newpage, prev_lru); + else + putback_lru_page(newpage); if (result) { if (rc) @@ -812,6 +904,7 @@ move_newpage: else *result = page_to_nid(newpage); } + return rc; } @@ -915,7 +1008,7 @@ int migrate_pages(struct list_head *from, if (!swapwrite) current->flags |= PF_SWAPWRITE; - for(pass = 0; pass < 10 && retry; pass++) { + for (pass = 0; pass < 10 && retry; pass++) { retry = 0; list_for_each_entry_safe(page, page2, from, lru) { @@ -951,6 +1044,56 @@ out: return nr_failed + retry; } +int migrate_ilru_pages(struct inorder_lru *ihead, new_page_t get_new_page, + unsigned long private, bool offlining, bool sync) +{ + int retry = 1; + int nr_failed = 0; + int pass = 0; + struct page *page, *page2; + struct inorder_lru *prev; + int swapwrite = current->flags & PF_SWAPWRITE; + int rc; + + if (!swapwrite) + current->flags |= PF_SWAPWRITE; + + for (pass = 0; pass < 10 && retry; pass++) { + retry = 0; + prev = ihead; + list_for_each_ilru_entry_safe(page, page2, ihead, ilru) { + cond_resched(); + + rc = unmap_and_move_ilru(get_new_page, private, + page, pass > 2, offlining, sync, prev); + + switch (rc) { + case -ENOMEM: + goto out; + case -EAGAIN: + retry++; + prev = &page->ilru; + break; + case 0: + break; + default: + /* Permanent failure */ + nr_failed++; + break; + } + } + } + rc = 0; +out: + if (!swapwrite) + current->flags &= ~PF_SWAPWRITE; + + if (rc) + return rc; + + return nr_failed + retry; +} + int migrate_huge_pages(struct list_head *from, new_page_t get_new_page, unsigned long private, bool offlining, bool sync) diff --git a/mm/swap.c b/mm/swap.c index 3a442f1..c2cf0e2 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -32,11 +32,13 @@ #include #include +#include #include "internal.h" /* How many pages do we try to swap or page in/out together? */ int page_cluster; +static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], ilru_add_pvecs); static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs); static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); @@ -179,6 +181,33 @@ void put_pages_list(struct list_head *pages) } EXPORT_SYMBOL(put_pages_list); +static void pagevec_ilru_move_fn(struct pagevec *pvec, + void (*move_fn)(struct page *page, void *arg, int idx), + void *arg) +{ + int i; + struct zone *zone = NULL; + unsigned long flags = 0; + + for (i = 0; i < pagevec_count(pvec); i++) { + struct page *page = pvec->pages[i]; + struct zone *pagezone = page_zone(page); + + if (pagezone != zone) { + if (zone) + spin_unlock_irqrestore(&zone->lru_lock, flags); + zone = pagezone; + spin_lock_irqsave(&zone->lru_lock, flags); + } + + (*move_fn)(page, arg, i); + } + if (zone) + spin_unlock_irqrestore(&zone->lru_lock, flags); + release_pages(pvec->pages, pvec->nr, pvec->cold); + pagevec_reinit(pvec); +} + static void pagevec_lru_move_fn(struct pagevec *pvec, void (*move_fn)(struct page *page, void *arg), void *arg) @@ -252,7 +281,7 @@ void rotate_reclaimable_page(struct page *page) } } -static void update_page_reclaim_stat(struct zone *zone, struct page *page, +void update_page_reclaim_stat(struct zone *zone, struct page *page, int file, int rotated) { struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat; @@ -348,6 +377,16 @@ void mark_page_accessed(struct page *page) EXPORT_SYMBOL(mark_page_accessed); +void __ilru_cache_add(struct page *page, enum lru_list lru) +{ + struct pagevec *pvec = &get_cpu_var(ilru_add_pvecs)[lru]; + + page_cache_get(page); + if (!pagevec_add(pvec, page)) + ____pagevec_ilru_add(pvec, lru); + put_cpu_var(ilru_add_pvecs); +} + void __lru_cache_add(struct page *page, enum lru_list lru) { struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru]; @@ -360,6 +399,25 @@ void __lru_cache_add(struct page *page, enum lru_list lru) EXPORT_SYMBOL(__lru_cache_add); /** + * lru_cache_add_ilru - add a page to a page list + * @page: the page to be added to the LRU. + * @lru: the LRU list to which the page is added. + */ +void lru_cache_add_ilru(struct page *page, enum lru_list lru) +{ + if (PageActive(page)) { + VM_BUG_ON(PageUnevictable(page)); + ClearPageActive(page); + } else if (PageUnevictable(page)) { + VM_BUG_ON(PageActive(page)); + ClearPageUnevictable(page); + } + + VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page)); + __ilru_cache_add(page, lru); +} + +/** * lru_cache_add_lru - add a page to a page list * @page: the page to be added to the LRU. * @lru: the LRU list to which the page is added. @@ -467,6 +525,19 @@ static void lru_deactivate_fn(struct page *page, void *arg) update_page_reclaim_stat(zone, page, file, 0); } +void drain_ilru_pagevecs(int cpu) +{ + struct pagevec *pvecs = per_cpu(ilru_add_pvecs, cpu); + struct pagevec *pvec; + int lru; + + for_each_lru(lru) { + pvec = &pvecs[lru - LRU_BASE]; + if (pagevec_count(pvec)) + ____pagevec_ilru_add(pvec, lru); + } +} + /* * Drain pages out of the cpu's pagevecs. * Either "cpu" is the current CPU, and preemption has already been @@ -484,6 +555,13 @@ static void drain_cpu_pagevecs(int cpu) ____pagevec_lru_add(pvec, lru); } + pvecs = per_cpu(ilru_add_pvecs, cpu); + for_each_lru(lru) { + pvec = &pvecs[lru - LRU_BASE]; + if (pagevec_count(pvec)) + ____pagevec_ilru_add(pvec, lru); + } + pvec = &per_cpu(lru_rotate_pvecs, cpu); if (pagevec_count(pvec)) { unsigned long flags; @@ -669,6 +747,132 @@ void lru_add_page_tail(struct zone* zone, } } +/* + * We need adjust prev_page of ilru_list when we putback newpage + * and free old page. Let's think about it. + * For example, + * + * Notation) + * PHY : page physical layout on memory + * LRU : page logical layout as LRU order + * ilru : inorder_lru list + * PN : old page(ie, source page of migration) + * PN' : new page(ie, destination page of migration) + * + * Let's assume there is below layout. + * PHY : H - P1 - P2 - P3 - P4 - P5 - T + * LRU : H - P5 - P4 - P3 - P2 - P1 - T + * ilru : + * + * We isolate P2,P3,P4 so inorder_lru has following as. + * + * PHY : H - P1 - P2 - P3 - P4 - P5 - T + * LRU : H - P5 - P1 - T + * ilru : (P4,P5) - (P3,P4) - (P2,P3) + * + * After 1st putback happens, + * + * PHY : H - P1 - P2 - P3 - P4 - P5 - T + * LRU : H - P5 - P4' - P1 - T + * ilru : (P3,P4) - (P2,P3) + * P4' is a newpage and P4(ie, old page) would freed + * + * In 2nd putback, P3 would try findding P4 but P4 would be freed. + * so same_lru returns 'false' so that inorder_lru doesn't work any more. + * The bad effect continues until P2. That's too bad. + * For fixing, we define adjust_ilru_list. It works following as. + * + * After 1st putback, + * + * PHY : H - P1 - P2 - P3 - P4 - P5 - T + * LRU : H - P5 - P4' - P1 - T + * ilru : (P3,P4') - (P2,P3) + * It replaces prev pointer of pages remained in inorder_lru list with + * new one's so in 2nd putback, + * + * PHY : H - P1 - P2 - P3 - P4 - P5 - T + * LRU : H - P5 - P4' - P3' - P1 - T + * ilru : (P2,P3') + * + * In 3rd putback, + * + * PHY : H - P1 - P2 - P3 - P4 - P5 - T + * LRU : H - P5 - P4' - P3' - P2' - P1 - T + * ilru : + */ +static inline void adjust_ilru_list(enum lru_list lru, + struct page *old_page, struct page *new_page, int idx) +{ + int i; + struct pagevec *pvec = &get_cpu_var(ilru_add_pvecs)[lru]; + for (i = idx + 1; i < pagevec_count(pvec); i++) { + struct page *page = pvec->pages[i]; + if (page->ilru.prev_page == old_page) + page->ilru.prev_page = new_page; + } +} + +/* + * Check if page and prev are on same LRU. + * zone->lru_lock must be hold. + */ +static bool same_lru(struct page *page, struct page *prev) +{ + bool ret = false; + if (!prev || !PageLRU(prev)) + goto out; + + if (unlikely(PageUnevictable(prev))) + goto out; + + if (page_lru_base_type(page) != page_lru_base_type(prev)) + goto out; + + ret = true; +out: + return ret; +} + +static void ____pagevec_ilru_add_fn(struct page *page, void *arg, int idx) +{ + enum lru_list lru = (enum lru_list)arg; + struct zone *zone = page_zone(page); + int file, active; + + struct page *prev_page = page->ilru.prev_page; + struct page *old_page = page->ilru.old_page; + + VM_BUG_ON(PageActive(page)); + VM_BUG_ON(PageUnevictable(page)); + VM_BUG_ON(PageLRU(page)); + + SetPageLRU(page); + + if (same_lru(page, prev_page)) { + active = PageActive(prev_page); + file = page_is_file_cache(page); + if (active) + SetPageActive(page); + /* + * The newpage will replace LRU position of old page. + * So let's adjust prev_page of pages remained + * in ilru_add_pvecs for same_lru wokring. + */ + adjust_ilru_list(lru, old_page, page, idx); + __add_page_to_lru_list(zone, page, lru, &prev_page->lru); + trace_mm_inorder_inorder(page, old_page, prev_page); + } else { + file = is_file_lru(lru); + active = is_active_lru(lru); + if (active) + SetPageActive(page); + add_page_to_lru_list(zone, page, lru); + trace_mm_inorder_outoforder(page, old_page, prev_page); + } + + update_page_reclaim_stat(zone, page, file, active); +} + static void ____pagevec_lru_add_fn(struct page *page, void *arg) { enum lru_list lru = (enum lru_list)arg; @@ -691,6 +895,17 @@ static void ____pagevec_lru_add_fn(struct page *page, void *arg) * Add the passed pages to the LRU, then drop the caller's refcount * on them. Reinitialises the caller's pagevec. */ +void ____pagevec_ilru_add(struct pagevec *pvec, enum lru_list lru) +{ + VM_BUG_ON(is_unevictable_lru(lru)); + + pagevec_ilru_move_fn(pvec, ____pagevec_ilru_add_fn, (void *)lru); +} + +/* + * Add the passed pages to the LRU, then drop the caller's refcount + * on them. Reinitialises the caller's pagevec. + */ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) { VM_BUG_ON(is_unevictable_lru(lru)); diff --git a/mm/vmscan.c b/mm/vmscan.c index 5ed24b9..48e0007 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -50,10 +50,9 @@ #include #include "internal.h" - +#include #define CREATE_TRACE_POINTS #include - /* * reclaim_mode determines how the inactive list is shrunk * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages @@ -565,16 +564,7 @@ int remove_mapping(struct address_space *mapping, struct page *page) return 0; } -/** - * putback_lru_page - put previously isolated page onto appropriate LRU list - * @page: page to be put back to appropriate lru list - * - * Add previously isolated @page to appropriate LRU list. - * Page may still be unevictable for other reasons. - * - * lru_lock must not be held, interrupts must be enabled. - */ -void putback_lru_page(struct page *page) +static void __putback_lru_core(struct page *page, bool inorder) { int lru; int active = !!TestClearPageActive(page); @@ -593,7 +583,10 @@ redo: * We know how to handle that. */ lru = active + page_lru_base_type(page); - lru_cache_add_lru(page, lru); + if (inorder) + lru_cache_add_ilru(page, lru); + else + lru_cache_add_lru(page, lru); } else { /* * Put unevictable pages directly on zone's unevictable @@ -621,6 +614,7 @@ redo: if (lru == LRU_UNEVICTABLE && page_evictable(page, NULL)) { if (!isolate_lru_page(page)) { put_page(page); + inorder = false; goto redo; } /* This means someone else dropped this page from LRU @@ -637,6 +631,25 @@ redo: put_page(page); /* drop ref from isolate */ } +/** + * putback_lru_page - put previously isolated page onto appropriate LRU list's head + * @page: page to be put back to appropriate lru list + * + * Add previously isolated @page to appropriate LRU list's head + * Page may still be unevictable for other reasons. + * + * lru_lock must not be held, interrupts must be enabled. + */ +void putback_lru_page(struct page *page) +{ + __putback_lru_core(page, false); +} + +void putback_ilru_page(struct page *page) +{ + __putback_lru_core(page, true); +} + enum page_references { PAGEREF_RECLAIM, PAGEREF_RECLAIM_CLEAN, @@ -972,23 +985,27 @@ keep_lumpy: * * returns 0 on success, -ve errno on failure. */ -int __isolate_lru_page(struct page *page, int mode, int file) +int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) { + bool all_lru_mode; int ret = -EINVAL; /* Only take pages on the LRU. */ if (!PageLRU(page)) return ret; + all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) == + (ISOLATE_ACTIVE|ISOLATE_INACTIVE); + /* * When checking the active state, we need to be sure we are * dealing with comparible boolean values. Take the logical not * of each. */ - if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode)) + if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE)) return ret; - if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file) + if (!all_lru_mode && !!page_is_file_cache(page) != file) return ret; /* @@ -1001,6 +1018,12 @@ int __isolate_lru_page(struct page *page, int mode, int file) ret = -EBUSY; + if ((mode & ISOLATE_CLEAN) && (PageDirty(page) || PageWriteback(page))) + return ret; + + if ((mode & ISOLATE_UNMAPPED) && page_mapped(page)) + return ret; + if (likely(get_page_unless_zero(page))) { /* * Be careful not to clear PageLRU until after we're @@ -1015,6 +1038,29 @@ int __isolate_lru_page(struct page *page, int mode, int file) } /* + * It's same with __isolate_lru_page except that it returns previous page + * of page isolated as LRU order if isolation is successful. + */ +int isolate_ilru_page(struct page *page, isolate_mode_t mode, int file, + struct page **prev_page) +{ + int ret = __isolate_lru_page(page, mode, file); + if (!ret) { + struct zone *zone = page_zone(page); + enum lru_list l = page_lru(page); + if (&zone->lru[l].list == page->lru.prev) { + *prev_page = NULL; + return ret; + } + + *prev_page = lru_to_page(&page->lru); + trace_mm_inorder_isolate(*prev_page, page); + } + + return ret; +} + +/* * zone->lru_lock is heavily contended. Some of the functions that * shrink the lists perform better by taking out a batch of pages * and working on them outside the LRU lock. @@ -1036,7 +1082,8 @@ int __isolate_lru_page(struct page *page, int mode, int file) */ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, struct list_head *src, struct list_head *dst, - unsigned long *scanned, int order, int mode, int file) + unsigned long *scanned, int order, isolate_mode_t mode, + int file) { unsigned long nr_taken = 0; unsigned long nr_lumpy_taken = 0; @@ -1161,8 +1208,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, static unsigned long isolate_pages_global(unsigned long nr, struct list_head *dst, unsigned long *scanned, int order, - int mode, struct zone *z, - int active, int file) + isolate_mode_t mode, + struct zone *z, int active, int file) { int lru = LRU_BASE; if (active) @@ -1408,6 +1455,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, unsigned long nr_taken; unsigned long nr_anon; unsigned long nr_file; + isolate_mode_t reclaim_mode = ISOLATE_INACTIVE; while (unlikely(too_many_isolated(zone, file, sc))) { congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -1418,15 +1466,21 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, } set_reclaim_mode(priority, sc, false); + if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM) + reclaim_mode |= ISOLATE_ACTIVE; + lru_add_drain(); + + if (!sc->may_unmap) + reclaim_mode |= ISOLATE_UNMAPPED; + if (!sc->may_writepage) + reclaim_mode |= ISOLATE_CLEAN; + spin_lock_irq(&zone->lru_lock); if (scanning_global_lru(sc)) { - nr_taken = isolate_pages_global(nr_to_scan, - &page_list, &nr_scanned, sc->order, - sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ? - ISOLATE_BOTH : ISOLATE_INACTIVE, - zone, 0, file); + nr_taken = isolate_pages_global(nr_to_scan, &page_list, + &nr_scanned, sc->order, reclaim_mode, zone, 0, file); zone->pages_scanned += nr_scanned; if (current_is_kswapd()) __count_zone_vm_events(PGSCAN_KSWAPD, zone, @@ -1435,12 +1489,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scanned); } else { - nr_taken = mem_cgroup_isolate_pages(nr_to_scan, - &page_list, &nr_scanned, sc->order, - sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM ? - ISOLATE_BOTH : ISOLATE_INACTIVE, - zone, sc->mem_cgroup, - 0, file); + nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list, + &nr_scanned, sc->order, reclaim_mode, zone, + sc->mem_cgroup, 0, file); /* * mem_cgroup_isolate_pages() keeps track of * scanned pages on its own. @@ -1542,19 +1593,26 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, struct page *page; struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); unsigned long nr_rotated = 0; + isolate_mode_t reclaim_mode = ISOLATE_ACTIVE; lru_add_drain(); + + if (!sc->may_unmap) + reclaim_mode |= ISOLATE_UNMAPPED; + if (!sc->may_writepage) + reclaim_mode |= ISOLATE_CLEAN; + spin_lock_irq(&zone->lru_lock); if (scanning_global_lru(sc)) { nr_taken = isolate_pages_global(nr_pages, &l_hold, &pgscanned, sc->order, - ISOLATE_ACTIVE, zone, + reclaim_mode, zone, 1, file); zone->pages_scanned += pgscanned; } else { nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order, - ISOLATE_ACTIVE, zone, + reclaim_mode, zone, sc->mem_cgroup, 1, file); /* * mem_cgroup_isolate_pages() keeps track of