Index: linux-2.6.16-rc1-mm3/mm/page_alloc.c =================================================================== --- linux-2.6.16-rc1-mm3.orig/mm/page_alloc.c 2006-01-25 18:29:02.000000000 -0800 +++ linux-2.6.16-rc1-mm3/mm/page_alloc.c 2006-01-25 18:29:18.000000000 -0800 @@ -12,6 +12,7 @@ * Zone balancing, Kanoj Sarcar, SGI, Jan 2000 * Per cpu hot/cold page lists, bulk allocation, Martin J. Bligh, Sept 2002 * (lots of bits borrowed from Ingo Molnar & Andrew Morton) + * Page zeroing by Christoph Lameter, Silicon Graphics, Inc, January 2006 */ #include @@ -794,19 +795,28 @@ static struct page *buffered_rmqueue(str { unsigned long flags; struct page *page; - int cold = !!(gfp_flags & __GFP_COLD); + int area, list; int cpu; + /* Determine the per cpu cache and the area to use */ + list = PER_CPU_HOT; + area = FREE_AREA_DEFAULT; + if (gfp_flags & __GFP_COLD) + list = PER_CPU_COLD; + if (gfp_flags & __GFP_ZERO) { + list = PER_CPU_ZERO; + area = FREE_AREA_ZEROED; + } again: cpu = get_cpu(); if (likely(order == 0)) { struct per_cpu_pages *pcp; - pcp = &zone_pcp(zone, cpu)->pcp[cold]; + pcp = &zone_pcp(zone, cpu)->pcp[list]; local_irq_save(flags); if (!pcp->count) { pcp->count += rmqueue_bulk(zone, 0, - pcp->batch, &pcp->list, FREE_AREA_DEFAULT); + pcp->batch, &pcp->list, area); if (unlikely(!pcp->count)) goto failed; } @@ -815,7 +825,21 @@ again: pcp->count--; } else { spin_lock_irqsave(&zone->lock, flags); - page = __rmqueue(zone, order, FREE_AREA_DEFAULT); + page = __rmqueue(zone, order, area); + + /* + * If we failed to obtain a zero and/or unzeroed page + * then we may still be able to obtain the page from + * another area. + */ + if (!page) { + if (area == FREE_AREA_ZEROED) { + page = __rmqueue(zone, order, FREE_AREA_DEFAULT); + list = PER_CPU_COLD; + } else + page = __rmqueue(zone, order, FREE_AREA_ZEROED); + } + spin_unlock(&zone->lock); if (!page) goto failed; @@ -830,7 +854,11 @@ again: if (prep_new_page(page, order)) goto again; - if (gfp_flags & __GFP_ZERO) + /* + * If we were able to get the page from a prezeroed list then + * we can avoid zeroing here. + */ + if ((gfp_flags & __GFP_ZERO) && list != PER_CPU_ZERO) prep_zero_page(page, order, gfp_flags); if (order && (gfp_flags & __GFP_COMP)) @@ -1458,7 +1486,7 @@ void si_meminfo_node(struct sysinfo *val #define K(x) ((x) << (PAGE_SHIFT-10)) -const char *temperature_descr[] = { "cold", "hot" }; +const char *temperature_descr[] = { "cold", "hot", "zero" }; /* * Show free area list (used inside shift_scroll-lock stuff) @@ -1952,6 +1980,12 @@ inline void setup_pageset(struct per_cpu pcp->high = 2 * batch; pcp->batch = max(1UL, batch/2); INIT_LIST_HEAD(&pcp->list); + + pcp = &p->pcp[PER_CPU_ZERO]; + pcp->count = 0; + pcp->high = 2 * batch; + pcp->batch = max(1UL, batch/2); + INIT_LIST_HEAD(&pcp->list); } /* Index: linux-2.6.16-rc1-mm3/include/linux/mmzone.h =================================================================== --- linux-2.6.16-rc1-mm3.orig/include/linux/mmzone.h 2006-01-25 17:15:18.000000000 -0800 +++ linux-2.6.16-rc1-mm3/include/linux/mmzone.h 2006-01-25 18:29:05.000000000 -0800 @@ -22,16 +22,18 @@ #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER #endif -#define NR_PER_CPU_PAGES 2 +#define NR_PER_CPU_PAGES 3 /* Types of per cpu pages */ #define PER_CPU_HOT 0 #define PER_CPU_COLD 1 +#define PER_CPU_ZERO 2 -#define NR_FREE_AREAS 1 +#define NR_FREE_AREAS 2 /* Types of free areas */ #define FREE_AREA_DEFAULT 0 +#define FREE_AREA_ZEROED 1 struct free_area { struct list_head free_list;