If vmscan finds a zero refcount page on the lru list, never ClearPageLRU. This means the release code need not hold ->lru_lock to stabalise PageLRU, so the lock may be skipped entirely when !PageLRU. Signed-off-by: Nick Piggin Index: linux-2.6/mm/vmscan.c =================================================================== --- linux-2.6.orig/mm/vmscan.c +++ linux-2.6/mm/vmscan.c @@ -1079,21 +1079,25 @@ static int isolate_lru_pages(int nr_to_s page = lru_to_page(src); prefetchw_prev_lru_page(page, src, flags); - if (!TestClearPageLRU(page)) - BUG(); list_del(&page->lru); - if (get_page_testone(page)) { + if (unlikely(get_page_testone(page))) { /* * It is being freed elsewhere */ __put_page(page); - SetPageLRU(page); list_add(&page->lru, src); continue; - } else { - list_add(&page->lru, dst); - nr_taken++; } + + /* + * Be careful not to clear PageLRU until after we're sure + * the page is not being freed elsewhere -- the page release + * code relies on it. + */ + if (!TestClearPageLRU(page)) + BUG(); + list_add(&page->lru, dst); + nr_taken++; } *scanned = scan; Index: linux-2.6/mm/swap.c =================================================================== --- linux-2.6.orig/mm/swap.c +++ linux-2.6/mm/swap.c @@ -209,17 +209,18 @@ int lru_add_drain_all(void) */ void fastcall __page_cache_release(struct page *page) { - unsigned long flags; - struct zone *zone = page_zone(page); + if (PageLRU(page)) { + unsigned long flags; - spin_lock_irqsave(&zone->lru_lock, flags); - if (TestClearPageLRU(page)) + struct zone *zone = page_zone(page); + spin_lock_irqsave(&zone->lru_lock, flags); + if (!TestClearPageLRU(page)) + BUG(); del_page_from_lru(zone, page); - if (page_count(page) != 0) - page = NULL; - spin_unlock_irqrestore(&zone->lru_lock, flags); - if (page) - free_hot_page(page); + spin_unlock_irqrestore(&zone->lru_lock, flags); + } + + free_hot_page(page); } EXPORT_SYMBOL(__page_cache_release); @@ -245,7 +246,6 @@ void release_pages(struct page **pages, pagevec_init(&pages_to_free, cold); for (i = 0; i < nr; i++) { struct page *page = pages[i]; - struct zone *pagezone; if (unlikely(PageCompound(page))) { if (zone) { @@ -259,22 +259,26 @@ void release_pages(struct page **pages, if (!put_page_testzero(page)) continue; - pagezone = page_zone(page); - if (pagezone != zone) { - if (zone) - spin_unlock_irq(&zone->lru_lock); - zone = pagezone; - spin_lock_irq(&zone->lru_lock); - } - if (TestClearPageLRU(page)) + if (PageLRU(page)) { + struct zone *pagezone = page_zone(page); + if (pagezone != zone) { + if (zone) + spin_unlock_irq(&zone->lru_lock); + zone = pagezone; + spin_lock_irq(&zone->lru_lock); + } + if (!TestClearPageLRU(page)) + BUG(); del_page_from_lru(zone, page); - if (page_count(page) == 0) { - if (!pagevec_add(&pages_to_free, page)) { + } + + if (!pagevec_add(&pages_to_free, page)) { + if (zone) { spin_unlock_irq(&zone->lru_lock); - __pagevec_free(&pages_to_free); - pagevec_reinit(&pages_to_free); - zone = NULL; /* No lock is held */ + zone = NULL; } + __pagevec_free(&pages_to_free); + pagevec_reinit(&pages_to_free); } } if (zone)