Subject: remove lumpy_reclaim From: Andrea Arcangeli Even with memory compaction enabled, without this patch, after: echo always >/sys/kernel/mm/transparent_hugepage/defrag the system becomes unusable and swaps despite gigabytes of clean cache available in the inactive list. Any frequent high order allocation (despite being done with noretry/nomemalloc/nowarn) grinds the system to unusable state. Signed-off-by: Andrea Arcangeli --- diff --git a/mm/vmscan.c b/mm/vmscan.c --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -75,12 +75,6 @@ struct scan_control { int order; - /* - * Intend to reclaim enough contenious memory rather than to reclaim - * enough amount memory. I.e, it's the mode for high order allocation. - */ - bool lumpy_reclaim_mode; - /* Which cgroup do we reclaim from */ struct mem_cgroup *mem_cgroup; @@ -572,10 +566,6 @@ static enum page_references page_check_r referenced_ptes = page_referenced(page, 1, sc->mem_cgroup, &vm_flags); referenced_page = TestClearPageReferenced(page); - /* Lumpy reclaim - ignore references */ - if (sc->lumpy_reclaim_mode) - return PAGEREF_RECLAIM; - /* * Mlock lost the isolation race with us. Let try_to_unmap() * move the page to the unevictable list. @@ -1131,7 +1121,6 @@ static unsigned long shrink_inactive_lis return SWAP_CLUSTER_MAX; } - pagevec_init(&pvec, 1); lru_add_drain(); @@ -1143,14 +1132,14 @@ static unsigned long shrink_inactive_lis unsigned long nr_freed; unsigned long nr_active; unsigned int count[NR_LRU_LISTS] = { 0, }; - int mode = sc->lumpy_reclaim_mode ? ISOLATE_BOTH : ISOLATE_INACTIVE; unsigned long nr_anon; unsigned long nr_file; if (scanning_global_lru(sc)) { nr_taken = isolate_pages_global(SWAP_CLUSTER_MAX, &page_list, &nr_scan, - sc->order, mode, + sc->order, + ISOLATE_INACTIVE, zone, 0, file); zone->pages_scanned += nr_scan; if (current_is_kswapd()) @@ -1162,7 +1151,8 @@ static unsigned long shrink_inactive_lis } else { nr_taken = mem_cgroup_isolate_pages(SWAP_CLUSTER_MAX, &page_list, &nr_scan, - sc->order, mode, + sc->order, + ISOLATE_INACTIVE, zone, sc->mem_cgroup, 0, file); /* @@ -1199,27 +1189,6 @@ static unsigned long shrink_inactive_lis nr_scanned += nr_scan; nr_freed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC); - /* - * If we are direct reclaiming for contiguous pages and we do - * not reclaim everything in the list, try again and wait - * for IO to complete. This will stall high-order allocations - * but that should be acceptable to the caller - */ - if (nr_freed < nr_taken && !current_is_kswapd() && - sc->lumpy_reclaim_mode) { - congestion_wait(BLK_RW_ASYNC, HZ/10); - - /* - * The attempt at page out may have made some - * of the pages active, mark them inactive again. - */ - nr_active = clear_active_flags(&page_list, count); - count_vm_events(PGDEACTIVATE, nr_active); - - nr_freed += shrink_page_list(&page_list, sc, - PAGEOUT_IO_SYNC); - } - nr_reclaimed += nr_freed; local_irq_disable(); @@ -1641,21 +1610,6 @@ out: } } -static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc) -{ - /* - * If we need a large contiguous chunk of memory, or have - * trouble getting a small set of contiguous pages, we - * will reclaim both active and inactive pages. - */ - if (sc->order > PAGE_ALLOC_COSTLY_ORDER) - sc->lumpy_reclaim_mode = 1; - else if (sc->order && priority < DEF_PRIORITY - 2) - sc->lumpy_reclaim_mode = 1; - else - sc->lumpy_reclaim_mode = 0; -} - /* * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. */ @@ -1670,8 +1624,6 @@ static void shrink_zone(int priority, st get_scan_count(zone, sc, nr, priority); - set_lumpy_reclaim_mode(priority, sc); - while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) { for_each_evictable_lru(l) {