Subject: use compaction for GFP_ATOMIC order > 0 From: Andrea Arcangeli This takes advantage of memory compaction to properly generate pages of order > 0 if regular page reclaim fails and priority level becomes more severe and we don't reach the proper watermarks. Signed-off-by: Andrea Arcangeli --- diff --git a/include/linux/compaction.h b/include/linux/compaction.h --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -11,6 +11,9 @@ /* The full zone was compacted */ #define COMPACT_COMPLETE 3 +#define COMPACT_MODE_DIRECT_RECLAIM 0 +#define COMPACT_MODE_KSWAPD 1 + #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, @@ -20,6 +23,9 @@ extern int sysctl_extfrag_handler(struct void __user *buffer, size_t *length, loff_t *ppos); extern int fragmentation_index(struct zone *zone, unsigned int order); +extern unsigned long compact_zone_order(struct zone *zone, + int order, gfp_t gfp_mask, + int compact_mode); extern unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); @@ -59,6 +65,13 @@ static inline unsigned long try_to_compa return COMPACT_CONTINUE; } +static inline unsigned long compact_zone_order(struct zone *zone, + int order, gfp_t gfp_mask, + int compact_mode) +{ + return COMPACT_CONTINUE; +} + static inline void defer_compaction(struct zone *zone) { } diff --git a/mm/compaction.c b/mm/compaction.c --- a/mm/compaction.c +++ b/mm/compaction.c @@ -38,6 +38,8 @@ struct compact_control { unsigned int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; + + int compact_mode; }; static unsigned long release_freepages(struct list_head *freelist) @@ -356,10 +358,10 @@ static void update_nr_listpages(struct c } static int compact_finished(struct zone *zone, - struct compact_control *cc) + struct compact_control *cc) { unsigned int order; - unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order); + unsigned long watermark; if (fatal_signal_pending(current)) return COMPACT_PARTIAL; @@ -369,12 +371,27 @@ static int compact_finished(struct zone return COMPACT_COMPLETE; /* Compaction run is not finished if the watermark is not met */ + if (cc->compact_mode != COMPACT_MODE_KSWAPD) + watermark = low_wmark_pages(zone); + else + watermark = high_wmark_pages(zone); + watermark += (1 << cc->order); + if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) return COMPACT_CONTINUE; if (cc->order == -1) return COMPACT_CONTINUE; + /* + * Generating only one page of the right order is not enough + * for kswapd, we must continue until we're above the high + * watermark as a pool for high order GFP_ATOMIC allocations + * too. + */ + if (cc->compact_mode == COMPACT_MODE_KSWAPD) + return COMPACT_CONTINUE; + /* Direct compactor: Is a suitable page free? */ for (order = cc->order; order < MAX_ORDER; order++) { /* Job done if page is free of the right migratetype */ @@ -432,8 +449,9 @@ static int compact_zone(struct zone *zon return ret; } -static unsigned long compact_zone_order(struct zone *zone, - int order, gfp_t gfp_mask) +unsigned long compact_zone_order(struct zone *zone, + int order, gfp_t gfp_mask, + int compact_mode) { struct compact_control cc = { .nr_freepages = 0, @@ -441,6 +459,7 @@ static unsigned long compact_zone_order( .order = order, .migratetype = allocflags_to_migratetype(gfp_mask), .zone = zone, + .compact_mode = compact_mode, }; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); @@ -475,7 +494,7 @@ unsigned long try_to_compact_pages(struc * made because an assumption is made that the page allocator can satisfy * the "cheaper" orders without taking special steps */ - if (order <= PAGE_ALLOC_COSTLY_ORDER || !may_enter_fs || !may_perform_io) + if (!order || !may_enter_fs || !may_perform_io) return rc; count_vm_event(COMPACTSTALL); @@ -516,7 +535,8 @@ unsigned long try_to_compact_pages(struc break; } - status = compact_zone_order(zone, order, gfp_mask); + status = compact_zone_order(zone, order, gfp_mask, + COMPACT_MODE_DIRECT_RECLAIM); rc = max(status, rc); if (zone_watermark_ok(zone, order, watermark, 0, 0)) @@ -546,6 +566,7 @@ static int compact_node(int nid) .nr_freepages = 0, .nr_migratepages = 0, .order = -1, + .compact_mode = COMPACT_MODE_DIRECT_RECLAIM, }; zone = &pgdat->node_zones[zoneid]; diff --git a/mm/vmscan.c b/mm/vmscan.c --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -2056,6 +2057,7 @@ loop_again: struct zone *zone = pgdat->node_zones + i; int nr_slab; int nid, zid; + int compaction; if (!populated_zone(zone)) continue; @@ -2087,9 +2089,25 @@ loop_again: lru_pages); sc.nr_reclaimed += reclaim_state->reclaimed_slab; total_scanned += sc.nr_scanned; + compaction = 0; + + if (order && + zone_watermark_ok(zone, 0, + high_wmark_pages(zone), + end_zone, 0) && + !zone_watermark_ok(zone, order, + high_wmark_pages(zone), + end_zone, 0)) { + compact_zone_order(zone, + order, + sc.gfp_mask, + COMPACT_MODE_KSWAPD); + compaction = 1; + } + if (zone->all_unreclaimable) continue; - if (nr_slab == 0 && + if (!compaction && nr_slab == 0 && zone->pages_scanned >= (zone_reclaimable_pages(zone) * 6)) zone->all_unreclaimable = 1; /*