Optimise page_state manipulations by introducing a direct accessor to page_state fields without disabling interrupts (callers must do this). Index: linux-2.6/include/linux/page-flags.h =================================================================== --- linux-2.6.orig/include/linux/page-flags.h +++ linux-2.6/include/linux/page-flags.h @@ -138,6 +138,7 @@ extern void get_page_state_node(struct p extern void get_full_page_state(struct page_state *ret); extern unsigned long __read_page_state(unsigned long offset); extern void __mod_page_state(unsigned long offset, unsigned long delta); +extern unsigned long *__page_state(unsigned long offset); #define read_page_state(member) \ __read_page_state(offsetof(struct page_state, member)) @@ -150,16 +151,26 @@ extern void __mod_page_state(unsigned lo #define add_page_state(member,delta) mod_page_state(member, (delta)) #define sub_page_state(member,delta) mod_page_state(member, 0UL - (delta)) -#define mod_page_state_zone(zone, member, delta) \ - do { \ - unsigned offset; \ - if (is_highmem(zone)) \ - offset = offsetof(struct page_state, member##_high); \ - else if (is_normal(zone)) \ - offset = offsetof(struct page_state, member##_normal); \ - else \ - offset = offsetof(struct page_state, member##_dma); \ - __mod_page_state(offset, (delta)); \ +#define page_state(member) (*__page_state(offsetof(struct page_state, member))) + +#define state_zone_offset(zone, member) \ +({ \ + unsigned offset; \ + if (is_highmem(zone)) \ + offset = offsetof(struct page_state, member##_high); \ + else if (is_normal(zone)) \ + offset = offsetof(struct page_state, member##_normal); \ + else \ + offset = offsetof(struct page_state, member##_dma); \ + offset; \ +}) + +#define page_state_zone(zone, member) \ + (*__page_state(state_zone_offset(zone, member))) + +#define mod_page_state_zone(zone, member, delta) \ + do { \ + __mod_page_state(state_zone_offset(zone, member), (delta)); \ } while (0) /* Index: linux-2.6/mm/page_alloc.c =================================================================== --- linux-2.6.orig/mm/page_alloc.c +++ linux-2.6/mm/page_alloc.c @@ -375,8 +375,6 @@ void __free_pages_ok(struct page *page, arch_free_page(page, order); - mod_page_state(pgfree, 1 << order); - #ifndef CONFIG_MMU if (order > 0) for (i = 1 ; i < (1 << order) ; ++i) @@ -388,6 +386,7 @@ void __free_pages_ok(struct page *page, list_add(&page->lru, &list); kernel_map_pages(page, 1<mapping = NULL; free_pages_check(page); pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; local_irq_save(flags); + page_state(pgfree)++; list_add(&page->lru, &pcp->list); pcp->count++; if (pcp->count >= pcp->high) @@ -679,42 +678,50 @@ static struct page * buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) { unsigned long flags; - struct page *page = NULL; + struct page *page; int cold = !!(gfp_flags & __GFP_COLD); + int cpu = get_cpu(); if (order == 0) { struct per_cpu_pages *pcp; - pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; + pcp = &zone_pcp(zone, cpu)->pcp[cold]; local_irq_save(flags); - if (!pcp->count) + if (!pcp->count) { pcp->count += rmqueue_bulk(zone, 0, pcp->batch, &pcp->list); - if (likely(pcp->count)) { - page = list_entry(pcp->list.next, struct page, lru); - list_del(&page->lru); - pcp->count--; + if (unlikely(!pcp->count)) + goto failed; } - local_irq_restore(flags); - put_cpu(); + page = list_entry(pcp->list.next, struct page, lru); + list_del(&page->lru); + pcp->count--; } else { spin_lock_irqsave(&zone->lock, flags); page = __rmqueue(zone, order); - spin_unlock_irqrestore(&zone->lock, flags); + spin_unlock(&zone->lock); + if (!page) + goto failed; } - if (page != NULL) { - BUG_ON(bad_range(zone, page)); - mod_page_state_zone(zone, pgalloc, 1 << order); - prep_new_page(page, order); + page_state_zone(zone, pgalloc) += 1 << order; + local_irq_restore(flags); + put_cpu(); - if (gfp_flags & __GFP_ZERO) - prep_zero_page(page, order, gfp_flags); + BUG_ON(bad_range(zone, page)); + prep_new_page(page, order); - if (order && (gfp_flags & __GFP_COMP)) - prep_compound_page(page, order); - } + if (gfp_flags & __GFP_ZERO) + prep_zero_page(page, order, gfp_flags); + + if (order && (gfp_flags & __GFP_COMP)) + prep_compound_page(page, order); return page; + +failed: + local_irq_restore(flags); + put_cpu(); + return NULL; } /* @@ -1190,6 +1197,15 @@ unsigned long __read_page_state(unsigned return ret; } +unsigned long *__page_state(unsigned long offset) +{ + void* ptr; + ptr = &__get_cpu_var(page_states); + return (unsigned long*)(ptr + offset); +} + +EXPORT_SYMBOL(__page_state); + void __mod_page_state(unsigned long offset, unsigned long delta) { unsigned long flags; Index: linux-2.6/mm/vmscan.c =================================================================== --- linux-2.6.orig/mm/vmscan.c +++ linux-2.6/mm/vmscan.c @@ -639,17 +639,18 @@ static void shrink_cache(struct zone *zo goto done; max_scan -= nr_scan; - if (current_is_kswapd()) - mod_page_state_zone(zone, pgscan_kswapd, nr_scan); - else - mod_page_state_zone(zone, pgscan_direct, nr_scan); nr_freed = shrink_list(&page_list, sc); - if (current_is_kswapd()) - mod_page_state(kswapd_steal, nr_freed); - mod_page_state_zone(zone, pgsteal, nr_freed); sc->nr_to_reclaim -= nr_freed; - spin_lock_irq(&zone->lru_lock); + local_irq_disable(); + if (current_is_kswapd()) { + page_state_zone(zone, pgscan_kswapd) += nr_scan; + page_state(kswapd_steal) += nr_freed; + } else + page_state_zone(zone, pgscan_direct) += nr_scan; + page_state_zone(zone, pgsteal) += nr_freed; + + spin_lock(&zone->lru_lock); /* * Put back any unfreeable pages. */ @@ -811,11 +812,13 @@ refill_inactive_zone(struct zone *zone, } } zone->nr_active += pgmoved; - spin_unlock_irq(&zone->lru_lock); - pagevec_release(&pvec); + spin_unlock(&zone->lru_lock); + + page_state_zone(zone, pgrefill) += pgscanned; + page_state(pgdeactivate) += pgdeactivate; + local_irq_enable(); - mod_page_state_zone(zone, pgrefill, pgscanned); - mod_page_state(pgdeactivate, pgdeactivate); + pagevec_release(&pvec); } /* Index: linux-2.6/mm/rmap.c =================================================================== --- linux-2.6.orig/mm/rmap.c +++ linux-2.6/mm/rmap.c @@ -453,7 +453,12 @@ static void __page_set_anon_rmap(struct page->index = linear_page_index(vma, address); - inc_page_state(nr_mapped); + /* + * nr_mapped state can be updated without turning off interrupts + * because it is never updated via interrupt. This should not usually + * be done with page_state, however this is a special hot case. + */ + page_state(nr_mapped)++; } /** @@ -500,7 +505,7 @@ void page_add_file_rmap(struct page *pag BUG_ON(!pfn_valid(page_to_pfn(page))); if (atomic_inc_and_test(&page->_mapcount)) - inc_page_state(nr_mapped); + page_state(nr_mapped)++; } /** @@ -532,7 +537,7 @@ void page_remove_rmap(struct page *page) * Leaving it set also helps swapoff to reinstate ptes * faster for those pages still in swapcache. */ - dec_page_state(nr_mapped); + page_state(nr_mapped)--; } /*