From: Christoph Lameter Wait for migrating page after incr of page count under anon_vma lock This patch replaces the yield() in do_swap_page with a call to migration_entry_wait() in the migration code. migration_entry_wait() locks the anonymous vma of the page and then safely increments page count before waiting for the page to become unlocked. Migration entries are only removed while holding the anon_vma lock (See remove_migration_ptes). Therefore we can be sure that the migration pte is not modified and the underlying page is not removed while holding this lock. Also make is_migration_entry() unlikely and clean up a unnecessary BUG_ON. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton --- include/linux/swapops.h | 6 ++-- mm/memory.c | 4 +- mm/migrate.c | 51 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 4 deletions(-) diff -puN include/linux/swapops.h~wait-for-migrating-page-after-incr-of-page-count-under-anon_vma-lock include/linux/swapops.h --- devel/include/linux/swapops.h~wait-for-migrating-page-after-incr-of-page-count-under-anon_vma-lock 2006-04-14 23:06:35.000000000 -0700 +++ devel-akpm/include/linux/swapops.h 2006-04-14 23:06:35.000000000 -0700 @@ -77,7 +77,7 @@ static inline swp_entry_t make_migration static inline int is_migration_entry(swp_entry_t entry) { - return swp_type(entry) == SWP_TYPE_MIGRATION; + return unlikely(swp_type(entry) == SWP_TYPE_MIGRATION); } static inline struct page *migration_entry_to_page(swp_entry_t entry) @@ -88,14 +88,16 @@ static inline struct page *migration_ent * corresponding page is locked */ BUG_ON(!PageLocked(p)); - BUG_ON(!is_migration_entry(entry)); return p; } + +extern void migration_entry_wait(swp_entry_t, pte_t *); #else #define make_migration_entry(page) swp_entry(0, 0) #define is_migration_entry(swp) 0 #define migration_entry_to_page(swp) NULL +static inline void migration_entry_wait(swp_entry_t entry, pte_t *ptep) { } #endif diff -puN mm/memory.c~wait-for-migrating-page-after-incr-of-page-count-under-anon_vma-lock mm/memory.c --- devel/mm/memory.c~wait-for-migrating-page-after-incr-of-page-count-under-anon_vma-lock 2006-04-14 23:06:35.000000000 -0700 +++ devel-akpm/mm/memory.c 2006-04-14 23:06:35.000000000 -0700 @@ -1880,8 +1880,8 @@ static int do_swap_page(struct mm_struct entry = pte_to_swp_entry(orig_pte); - if (unlikely(is_migration_entry(entry))) { - yield(); + if (is_migration_entry(entry)) { + migration_entry_wait(entry, page_table); goto out; } diff -puN mm/migrate.c~wait-for-migrating-page-after-incr-of-page-count-under-anon_vma-lock mm/migrate.c --- devel/mm/migrate.c~wait-for-migrating-page-after-incr-of-page-count-under-anon_vma-lock 2006-04-14 23:06:35.000000000 -0700 +++ devel-akpm/mm/migrate.c 2006-04-14 23:06:35.000000000 -0700 @@ -174,6 +174,57 @@ out: } /* + * Something used the pte of a page under migration. We need to + * get to the page and wait until migration is finished. + * When we return from this function the fault will be retried. + * + * This function is called from do_swap_page(). + */ +void migration_entry_wait(swp_entry_t entry, pte_t *ptep) +{ + struct page *page = migration_entry_to_page(entry); + unsigned long mapping = (unsigned long)page->mapping; + struct anon_vma *anon_vma; + pte_t pte; + + if (!mapping || + (mapping & PAGE_MAPPING_ANON) == 0) + return; + /* + * We hold the mmap_sem lock. + */ + anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON); + + /* + * The anon_vma lock is also taken while removing the migration + * entries. Take the lock here to insure that the migration pte + * is not modified while we increment the page count. + * This is similar to find_get_page(). + */ + spin_lock(&anon_vma->lock); + pte = *ptep; + if (pte_present(pte) || pte_none(pte) || pte_file(pte)) { + spin_unlock(&anon_vma->lock); + return; + } + entry = pte_to_swp_entry(pte); + if (!is_migration_entry(entry) || + migration_entry_to_page(entry) != page) { + /* Migration entry is gone */ + spin_unlock(&anon_vma->lock); + return; + } + /* Pages with migration entries must be locked */ + BUG_ON(!PageLocked(page)); + + /* Phew. Finally we can increment the refcount */ + get_page(page); + spin_unlock(&anon_vma->lock); + wait_on_page_locked(page); + put_page(page); +} + +/* * Get rid of all migration entries and replace them by * references to the indicated page. * _