From: mel@skynet.ie (Mel Gorman) kswapd normally reclaims at order 0 unless there is a higher-order allocation currently being serviced. However, in some cases it is known that there is a minimum order size that is generally required such as when SLUB is configured to use higher orders for performance reasons. This patch allows a minumum order to be set, such that min_free_kbytes pages are kept at higher orders. This depends on lumpy-reclaim to work. [clameter@sgi.com: Call raise_kswapd_order() on kmem_cache_open()] Acked-by: Andy Whitcroft Acked-by: Christoph Lameter Signed-off-by: Mel Gorman Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 1 + mm/slub.c | 1 + mm/vmscan.c | 34 +++++++++++++++++++++++++++++++--- 3 files changed, 33 insertions(+), 3 deletions(-) diff -puN include/linux/mmzone.h~have-kswapd-keep-a-minimum-order-free-other-than-order-0 include/linux/mmzone.h --- a/include/linux/mmzone.h~have-kswapd-keep-a-minimum-order-free-other-than-order-0 +++ a/include/linux/mmzone.h @@ -507,6 +507,7 @@ typedef struct pglist_data { void get_zone_counts(unsigned long *active, unsigned long *inactive, unsigned long *free); void build_all_zonelists(void); +void raise_kswapd_order(unsigned int order); void wakeup_kswapd(struct zone *zone, int order); int zone_watermark_ok(struct zone *z, int order, unsigned long mark, int classzone_idx, int alloc_flags); diff -puN mm/slub.c~have-kswapd-keep-a-minimum-order-free-other-than-order-0 mm/slub.c --- a/mm/slub.c~have-kswapd-keep-a-minimum-order-free-other-than-order-0 +++ a/mm/slub.c @@ -2031,6 +2031,7 @@ static int kmem_cache_open(struct kmem_c #ifdef CONFIG_NUMA s->defrag_ratio = 100; #endif + raise_kswapd_order(s->order); if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) return 1; diff -puN mm/vmscan.c~have-kswapd-keep-a-minimum-order-free-other-than-order-0 mm/vmscan.c --- a/mm/vmscan.c~have-kswapd-keep-a-minimum-order-free-other-than-order-0 +++ a/mm/vmscan.c @@ -1402,6 +1402,34 @@ out: return nr_reclaimed; } +static unsigned int kswapd_min_order __read_mostly; + +static inline int kswapd_order(unsigned int order) +{ + return max(kswapd_min_order, order); +} + +/** + * raise_kswapd_order - Raise the minimum order that kswapd reclaims + * @order: The minimum order kswapd should reclaim at + * + * kswapd normally reclaims at order 0 unless there is a higher-order + * allocation being serviced. This function is used to set the minimum + * order that kswapd reclaims at when it is known there will be regular + * high-order allocations at a given order. + */ +void raise_kswapd_order(unsigned int order) +{ + if (order >= MAX_ORDER) + return; + + /* Update order if necessary and inform if changed */ + if (order > kswapd_min_order) { + kswapd_min_order = order; + printk(KERN_INFO "kswapd reclaim order set to %d\n", order); + } +} + /* * The background pageout daemon, started as a kernel thread * from the init process. @@ -1445,12 +1473,12 @@ static int kswapd(void *p) */ tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; - order = 0; + order = kswapd_order(0); for ( ; ; ) { unsigned long new_order; prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); - new_order = pgdat->kswapd_max_order; + new_order = kswapd_order(pgdat->kswapd_max_order); pgdat->kswapd_max_order = 0; if (order < new_order) { /* @@ -1462,7 +1490,7 @@ static int kswapd(void *p) if (!freezing(current)) schedule(); - order = pgdat->kswapd_max_order; + order = kswapd_order(pgdat->kswapd_max_order); } finish_wait(&pgdat->kswapd_wait, &wait); _