Add PageMlocked() infrastructure This adds a new PG_mlocked to mark pages that were taken off the LRU because they have a reference from a VM_LOCKED vma. Also add pagevec handling for returning mlocked pages to the LRU. Signed-off-by: Christoph Lameter Index: current/include/linux/page-flags.h =================================================================== --- current.orig/include/linux/page-flags.h 2007-02-07 00:26:39.000000000 -0800 +++ current/include/linux/page-flags.h 2007-02-07 00:26:46.000000000 -0800 @@ -87,13 +87,12 @@ #define PG_swapcache 15 /* Swap page: swp_entry_t in private */ #define PG_mappedtodisk 16 /* Has blocks allocated on-disk */ -#define PG_reclaim 17 /* To be reclaimed asap */ +#define PG_mlocked 17 /* Page is mlocked */ #define PG_nosave_free 18 /* Used for system suspend/resume */ #define PG_buddy 19 /* Page is free, on buddy lists */ #define PG_readahead 20 /* Reminder to do read-ahead */ - #if (BITS_PER_LONG > 32) /* * 64-bit-only flags build down from bit 31 @@ -230,6 +229,16 @@ static inline void SetPageUptodate(struc #define SetPageReadahead(page) set_bit(PG_readahead, &(page)->flags) #define ClearPageReadahead(page) clear_bit(PG_readahead, &(page)->flags) +/* + * PageMlocked set means that the page was taken off the LRU because + * a VM_LOCKED vma does exist. PageMlocked must be cleared before a + * page is put back onto the LRU. PageMlocked is only modified + * under the zone->lru_lock like PageLRU. + */ +#define PageMlocked(page) test_bit(PG_mlocked, &(page)->flags) +#define SetPageMlocked(page) set_bit(PG_mlocked, &(page)->flags) +#define ClearPageMlocked(page) clear_bit(PG_mlocked, &(page)->flags) + struct page; /* forward declaration */ extern void cancel_dirty_page(struct page *page, unsigned int account_size); Index: current/include/linux/pagevec.h =================================================================== --- current.orig/include/linux/pagevec.h 2007-02-07 00:26:20.000000000 -0800 +++ current/include/linux/pagevec.h 2007-02-07 00:26:46.000000000 -0800 @@ -25,6 +25,7 @@ void __pagevec_release_nonlru(struct pag void __pagevec_free(struct pagevec *pvec); void __pagevec_lru_add(struct pagevec *pvec); void __pagevec_lru_add_active(struct pagevec *pvec); +void __pagevec_lru_add_mlock(struct pagevec *pvec); void pagevec_strip(struct pagevec *pvec); unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, pgoff_t start, unsigned nr_pages); Index: current/include/linux/swap.h =================================================================== --- current.orig/include/linux/swap.h 2007-02-07 00:26:20.000000000 -0800 +++ current/include/linux/swap.h 2007-02-07 00:26:46.000000000 -0800 @@ -181,6 +181,7 @@ extern unsigned int nr_free_pagecache_pa extern void FASTCALL(lru_cache_add(struct page *)); extern void FASTCALL(lru_cache_add_active(struct page *)); extern void FASTCALL(lru_cache_add_tail(struct page *)); +extern void FASTCALL(lru_cache_add_mlock(struct page *)); extern void FASTCALL(activate_page(struct page *)); extern void FASTCALL(mark_page_accessed(struct page *)); extern void lru_add_drain(void); Index: current/mm/swap.c =================================================================== --- current.orig/mm/swap.c 2007-02-07 00:26:20.000000000 -0800 +++ current/mm/swap.c 2007-02-07 00:26:46.000000000 -0800 @@ -137,6 +137,7 @@ EXPORT_SYMBOL(mark_page_accessed); static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, }; static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, }; static DEFINE_PER_CPU(struct pagevec, lru_add_tail_pvecs) = { 0, }; +static DEFINE_PER_CPU(struct pagevec, lru_add_mlock_pvecs) = { 0, }; void fastcall lru_cache_add(struct page *page) { @@ -158,6 +159,16 @@ void fastcall lru_cache_add_active(struc put_cpu_var(lru_add_active_pvecs); } +void fastcall lru_cache_add_mlock(struct page *page) +{ + struct pagevec *pvec = &get_cpu_var(lru_add_mlock_pvecs); + + page_cache_get(page); + if (!pagevec_add(pvec, page)) + __pagevec_lru_add_mlock(pvec); + put_cpu_var(lru_add_mlock_pvecs); +} + static void __pagevec_lru_add_tail(struct pagevec *pvec) { int i; @@ -196,6 +207,9 @@ static void __lru_add_drain(int cpu) pvec = &per_cpu(lru_add_tail_pvecs, cpu); if (pagevec_count(pvec)) __pagevec_lru_add_tail(pvec); + pvec = &per_cpu(lru_add_mlock_pvecs, cpu); + if (pagevec_count(pvec)) + __pagevec_lru_add_mlock(pvec); } void lru_add_drain(void) @@ -353,6 +367,7 @@ void __pagevec_lru_add(struct pagevec *p spin_lock_irq(&zone->lru_lock); } VM_BUG_ON(PageLRU(page)); + VM_BUG_ON(PageMlocked(page)); SetPageLRU(page); add_page_to_inactive_list(zone, page); } @@ -382,6 +397,7 @@ void __pagevec_lru_add_active(struct pag VM_BUG_ON(PageLRU(page)); SetPageLRU(page); VM_BUG_ON(PageActive(page)); + VM_BUG_ON(PageMlocked(page)); SetPageActive(page); add_page_to_active_list(zone, page); } @@ -391,6 +407,36 @@ void __pagevec_lru_add_active(struct pag pagevec_reinit(pvec); } +void __pagevec_lru_add_mlock(struct pagevec *pvec) +{ + int i; + struct zone *zone = NULL; + + for (i = 0; i < pagevec_count(pvec); i++) { + struct page *page = pvec->pages[i]; + struct zone *pagezone = page_zone(page); + + if (pagezone != zone) { + if (zone) + spin_unlock_irq(&zone->lru_lock); + zone = pagezone; + spin_lock_irq(&zone->lru_lock); + } + BUG_ON(PageLRU(page)); + if (!PageMlocked(page)) + continue; + ClearPageMlocked(page); + smp_wmb(); + __dec_zone_state(zone, NR_MLOCK); + SetPageLRU(page); + add_page_to_active_list(zone, page); + } + if (zone) + spin_unlock_irq(&zone->lru_lock); + release_pages(pvec->pages, pvec->nr, pvec->cold); + pagevec_reinit(pvec); +} + /* * Function used uniquely to put pages back to the lru at the end of the * inactive list to preserve the lru order. Currently only used by swap