Speed up unlock_page by introducing a new page flag to signal that there are page waitqueue waiters for PG_locked. This means a memory barrier and a random waitqueue hash cacheline load can be avoided in the fastpath when there is no contention. XXX: put in bad_page checks (removed to make tracking easier) Index: linux-2.6/include/linux/page-flags.h =================================================================== --- linux-2.6.orig/include/linux/page-flags.h +++ linux-2.6/include/linux/page-flags.h @@ -90,6 +90,8 @@ #define PG_reclaim 17 /* To be reclaimed asap */ #define PG_buddy 19 /* Page is free, on buddy lists */ +#define PG_waiters 20 /* Page has PG_locked waiters */ + /* PG_owner_priv_1 users should have descriptive aliases */ #define PG_checked PG_owner_priv_1 /* Used by some filesystems */ @@ -112,6 +114,10 @@ #define SetPageLocked(page) \ set_bit(PG_locked, &(page)->flags) +#define PageWaiters(page) test_bit(PG_waiters, &(page)->flags) +#define SetPageWaiters(page) set_bit(PG_waiters, &(page)->flags) +#define ClearPageWaiters(page) clear_bit(PG_waiters, &(page)->flags) + #define PageError(page) test_bit(PG_error, &(page)->flags) #define SetPageError(page) set_bit(PG_error, &(page)->flags) #define ClearPageError(page) clear_bit(PG_error, &(page)->flags) Index: linux-2.6/include/linux/pagemap.h =================================================================== --- linux-2.6.orig/include/linux/pagemap.h +++ linux-2.6/include/linux/pagemap.h @@ -155,6 +155,7 @@ static inline pgoff_t linear_page_index( extern void FASTCALL(__lock_page(struct page *page)); extern void FASTCALL(__lock_page_nosync(struct page *page)); +extern void FASTCALL(__wait_on_page_locked(struct page *page)); extern void FASTCALL(unlock_page(struct page *page)); static inline int trylock_page(struct page *page) @@ -182,7 +183,7 @@ static inline void lock_page_nosync(stru if (!trylock_page(page)) __lock_page_nosync(page); } - + /* * This is exported only for wait_on_page_locked/wait_on_page_writeback. * Never use this directly! @@ -198,8 +199,9 @@ extern void FASTCALL(wait_on_page_bit(st */ static inline void wait_on_page_locked(struct page *page) { + might_sleep(); if (PageLocked(page)) - wait_on_page_bit(page, PG_locked); + __wait_on_page_locked(page); } /* @@ -207,6 +209,7 @@ static inline void wait_on_page_locked(s */ static inline void wait_on_page_writeback(struct page *page) { + might_sleep(); if (PageWriteback(page)) wait_on_page_bit(page, PG_writeback); } Index: linux-2.6/mm/filemap.c =================================================================== --- linux-2.6.orig/mm/filemap.c +++ linux-2.6/mm/filemap.c @@ -478,12 +478,6 @@ struct page *__page_cache_alloc(gfp_t gf EXPORT_SYMBOL(__page_cache_alloc); #endif -static int __sleep_on_page_lock(void *word) -{ - io_schedule(); - return 0; -} - /* * In order to wait for pages to become available there must be * waitqueues associated with pages. By using a hash table of @@ -516,24 +510,52 @@ void fastcall wait_on_page_bit(struct pa } EXPORT_SYMBOL(wait_on_page_bit); +/* + * Protocol for page locking is as follows: + * To try to lock the page, a test_and_set_bit_lock is performed on PG_locked. + * If this fails and we want to wait until the bit becomes unlocked, then + * the task should add itself to the page's waitqueue, then PG_waiters set, + * and finally PG_locked must be retested before going to sleep. + * + * When unlocking, a task must clear the PG_locked bit, and then test the + * PG_waiters bit. If PG_waiters is clear, then there is no chance of a waiter, + * so no need to check the waitqueue. No memory barriers are required in this + * fastpath, because all operations are occuring on the same unsigned long, + * so cache coherency ensures correct memory ordering. + * + * If the unlock finds PG_waiters set, it must clear PG_waiters, and then wake + * up all waiters on the waitqueue (this is done by __wake_up_page_waiters). + */ + +/* + * If PageWaiters was found to be set at unlock-time, __wake_page_waiters + * should be called to actually perform the wakeups of waiters. + */ +static void __wake_page_waiters(struct page *page) +{ + ClearPageWaiters(page); + /* + * The mb is necessary to enforce ordering between the clear_bit and + * the read of the waitqueue (to avoid SMP races with a parallel + * wait_on_page_locked() + */ + smp_mb__after_clear_bit(); + + wake_up_page(page, PG_locked); +} + /** * unlock_page - unlock a locked page * @page: the page * - * Unlocks the page and wakes up sleepers in ___wait_on_page_locked(). - * Also wakes sleepers in wait_on_page_writeback() because the wakeup - * mechananism between PageLocked pages and PageWriteback pages is shared. - * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep. - * - * The mb is necessary to enforce ordering between the clear_bit and the read - * of the waitqueue (to avoid SMP races with a parallel wait_on_page_locked()). + * Unlocks the page and wakes up sleepers if PageWaiters was set. */ void fastcall unlock_page(struct page *page) { VM_BUG_ON(!PageLocked(page)); clear_bit_unlock(PG_locked, &page->flags); - smp_mb__after_clear_bit(); - wake_up_page(page, PG_locked); + if (unlikely(PageWaiters(page))) + __wake_page_waiters(page); } EXPORT_SYMBOL(unlock_page); @@ -563,10 +585,16 @@ EXPORT_SYMBOL(end_page_writeback); */ void fastcall __lock_page(struct page *page) { + wait_queue_head_t *wq = page_waitqueue(page); DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); - __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page, - TASK_UNINTERRUPTIBLE); + do { + prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + SetPageWaiters(page); + if (likely(PageLocked(page))) + sync_page(page); + } while (!trylock_page(page)); + finish_wait(wq, &wait.wait); } EXPORT_SYMBOL(__lock_page); @@ -576,10 +604,39 @@ EXPORT_SYMBOL(__lock_page); */ void fastcall __lock_page_nosync(struct page *page) { + wait_queue_head_t *wq = page_waitqueue(page); DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); - __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock, - TASK_UNINTERRUPTIBLE); + + do { + prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + SetPageWaiters(page); + if (likely(PageLocked(page))) + io_schedule(); + } while (!trylock_page(page)); + finish_wait(wq, &wait.wait); +} + +void fastcall __wait_on_page_locked(struct page *page) +{ + wait_queue_head_t *wq = page_waitqueue(page); + DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); + + do { + prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + SetPageWaiters(page); + if (likely(PageLocked(page))) + sync_page(page); + } while (PageLocked(page)); + finish_wait(wq, &wait.wait); + + /* + * Could skip this, but that would leave PG_waiters dangling + * for random pages. This keeps it tidy. + */ + if (unlikely(PageWaiters(page))) + __wake_page_waiters(page); } +EXPORT_SYMBOL(__wait_on_page_locked); /** * find_get_page - find and get a page reference Index: linux-2.6/kernel/wait.c =================================================================== --- linux-2.6.orig/kernel/wait.c +++ linux-2.6/kernel/wait.c @@ -144,8 +144,7 @@ int wake_bit_function(wait_queue_t *wait = container_of(wait, struct wait_bit_queue, wait); if (wait_bit->key.flags != key->flags || - wait_bit->key.bit_nr != key->bit_nr || - test_bit(key->bit_nr, key->flags)) + wait_bit->key.bit_nr != key->bit_nr) return 0; else return autoremove_wake_function(wait, mode, sync, key);