Index: linux-2.6.16-rc1-mm4/mm/page_alloc.c =================================================================== --- linux-2.6.16-rc1-mm4.orig/mm/page_alloc.c 2006-01-30 11:28:29.000000000 -0800 +++ linux-2.6.16-rc1-mm4/mm/page_alloc.c 2006-01-30 11:28:30.000000000 -0800 @@ -12,6 +12,7 @@ * Zone balancing, Kanoj Sarcar, SGI, Jan 2000 * Per cpu hot/cold page lists, bulk allocation, Martin J. Bligh, Sept 2002 * (lots of bits borrowed from Ingo Molnar & Andrew Morton) + * Page zeroing Christoph Lameter, Silicon Graphics, Inc, January 2006. */ #include @@ -321,7 +322,7 @@ static inline int page_is_buddy(struct p * -- wli */ -static inline void __free_one_page(struct page *page, +static void __free_one_page(struct page *page, struct zone *zone, unsigned int order) { unsigned long page_idx; @@ -340,17 +341,39 @@ static inline void __free_one_page(struc unsigned long combined_idx; struct free_area *area; struct page *buddy; + int both_zeroed; buddy = __page_find_buddy(page, page_idx, order); if (!page_is_buddy(buddy, order)) break; /* Move the buddy up one level. */ + both_zeroed = PageZeroed(buddy) & PageZeroed(page); + if (PageZeroed(buddy) ^ PageZeroed(page)) { + /* + * We are merging a zeroed with an unzeroed + * page and therefore lose a zeroed page. + * + * This should be a rare occurrence since + * pages are zeroed beginning with the largest + * order. + */ + __ClearPageZeroed(page); + __ClearPageZeroed(buddy); + } list_del(&buddy->lru); area = zone->free_area + order; area->nr_free--; rmv_page_order(buddy); combined_idx = __find_combined_index(page_idx, order); page = page + (combined_idx - page_idx); + if (both_zeroed) + /* + * The first page's status determines the zero status + * of the combined page. We know that that is set. + * However, the zero bit of the second half must + * be cleared + */ + __SetPageZeroed(page + (1 << order)); page_idx = combined_idx; order++; } @@ -496,15 +519,28 @@ static inline void expand(struct zone *z int low, int high, struct free_area *area) { unsigned long size = 1 << high; + struct page *newpage; while (high > low) { area--; high--; size >>= 1; - BUG_ON(bad_range(zone, &page[size])); - list_add(&page[size].lru, &area->free_list); + newpage = &page[size]; + BUG_ON(bad_range(zone, newpage)); + + if (PageZeroed(page)) { + /* + * Splitting a zeroed page. The new page is also + * already zeroed and will go to the end of the list. + */ + __SetPageZeroed(newpage); + list_add_tail(&newpage->lru, &area->free_list); + } + else + list_add(&newpage->lru, &area->free_list); + area->nr_free++; - set_page_order(&page[size], high); + set_page_order(newpage, high); } } @@ -538,7 +574,8 @@ static int prep_new_page(struct page *pa page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_referenced | 1 << PG_arch_1 | - 1 << PG_checked | 1 << PG_mappedtodisk); + 1 << PG_checked | 1 << PG_mappedtodisk | + 1 << PG_zeroed); set_page_private(page, 0); set_page_refs(page, order); kernel_map_pages(page, 1 << order, 1); @@ -791,7 +828,6 @@ static struct page *buffered_rmqueue(str { unsigned long flags; struct page *page; - int cold = !!(gfp_flags & __GFP_COLD); int zero = !!(gfp_flags & __GFP_ZERO); int cpu; @@ -799,8 +835,15 @@ again: cpu = get_cpu(); if (likely(order == 0)) { struct per_cpu_pages *pcp; + int list; - pcp = &zone_pcp(zone, cpu)->pcp[cold]; + list = PER_CPU_HOT; + if (gfp_flags & __GFP_COLD) + list = PER_CPU_COLD; + if (zero) + list = PER_CPU_ZEROED; + + pcp = &zone_pcp(zone, cpu)->pcp[list]; local_irq_save(flags); if (!pcp->count) { pcp->count += rmqueue_bulk(zone, 0, @@ -825,12 +868,13 @@ again: put_cpu(); BUG_ON(bad_range(zone, page)); - if (prep_new_page(page, order)) - goto again; - if (gfp_flags & __GFP_ZERO) + if (gfp_flags & __GFP_ZERO && !PageZeroed(page)) prep_zero_page(page, order, gfp_flags); + if (prep_new_page(page, order)) + goto again; + if (order && (gfp_flags & __GFP_COMP)) prep_compound_page(page, order); return page; @@ -1455,7 +1499,7 @@ void si_meminfo_node(struct sysinfo *val #define K(x) ((x) << (PAGE_SHIFT-10)) -static const char *temperature_descr[] = { "cold", "hot" }; +static const char *temperature_descr[] = { "cold", "hot", "zeroed" }; /* * Show free area list (used inside shift_scroll-lock stuff) @@ -1946,6 +1990,12 @@ inline void setup_pageset(struct per_cpu pcp->high = 2 * batch; pcp->batch = max(1UL, batch/2); INIT_LIST_HEAD(&pcp->list); + + pcp = &p->pcp[PER_CPU_ZEROED]; + pcp->count = 0; + pcp->high = 2 * batch; + pcp->batch = max(1UL, batch/2); + INIT_LIST_HEAD(&pcp->list); } /* Index: linux-2.6.16-rc1-mm4/include/linux/page-flags.h =================================================================== --- linux-2.6.16-rc1-mm4.orig/include/linux/page-flags.h 2006-01-30 11:27:37.000000000 -0800 +++ linux-2.6.16-rc1-mm4/include/linux/page-flags.h 2006-01-30 11:28:30.000000000 -0800 @@ -76,6 +76,7 @@ #define PG_reclaim 17 /* To be reclaimed asap */ #define PG_nosave_free 18 /* Free, should not be written */ #define PG_uncached 19 /* Page has been mapped as uncached */ +#define PG_zeroed 20 /* Page was zeroed while on the free lists */ /* * Global page accounting. One instance per CPU. Only unsigned longs are @@ -270,6 +271,12 @@ extern void __mod_page_state_offset(unsi #define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) #define __ClearPageReserved(page) __clear_bit(PG_reserved, &(page)->flags) +#define PageZeroed(page) test_bit(PG_zeroed, &(page)->flags) +#define SetPageZeroed(page) set_bit(PG_zeroed, &(page)->flags) +#define __SetPageZeroed(page) __set_bit(PG_zeroed, &(page)->flags) +#define ClearPageZeroed(page) clear_bit(PG_zeroed, &(page)->flags) +#define __ClearPageZeroed(page) __clear_bit(PG_zeroed, &(page)->flags) + #define SetPagePrivate(page) set_bit(PG_private, &(page)->flags) #define ClearPagePrivate(page) clear_bit(PG_private, &(page)->flags) #define PagePrivate(page) test_bit(PG_private, &(page)->flags) Index: linux-2.6.16-rc1-mm4/include/linux/mmzone.h =================================================================== --- linux-2.6.16-rc1-mm4.orig/include/linux/mmzone.h 2006-01-30 11:28:27.000000000 -0800 +++ linux-2.6.16-rc1-mm4/include/linux/mmzone.h 2006-01-30 11:28:30.000000000 -0800 @@ -22,13 +22,19 @@ #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER #endif -#define NR_PER_CPU_PAGES 2 +#define NR_PER_CPU_PAGES 3 /* Types of per cpu pages */ #define PER_CPU_HOT 0 #define PER_CPU_COLD 1 +#define PER_CPU_ZEROED 2 struct free_area { + /* + * The free list contains free pages of a specific order. + * Zeroed pages are at the back of the list and unzeroed ones + * at the front. + */ struct list_head free_list; unsigned long nr_free; };