--- Documentation/mmu_notifier/skeleton.c | 90 +++++++++++++---------------- Documentation/mmu_notifier/skeleton_rmap.c | 67 ++++++++++++--------- 2 files changed, 83 insertions(+), 74 deletions(-) Index: linux-2.6/Documentation/mmu_notifier/skeleton.c =================================================================== --- linux-2.6.orig/Documentation/mmu_notifier/skeleton.c 2008-02-14 19:10:37.000000000 -0800 +++ linux-2.6/Documentation/mmu_notifier/skeleton.c 2008-02-14 20:54:11.000000000 -0800 @@ -61,25 +61,23 @@ static void my_mmu_zap_range(struct my_m /* * Zap an individual page. * - * The page must be locked and a refcount on the page must - * be held when this function is called. The page lock is also - * acquired when new references are established and the - * page lock effecively takes on the role of synchronization. + * The m->lock serializes page invalidation vs. the populate function. * - * The m->lock is only taken to preserve the integrity fo the - * drivers data structures since we may also race with - * invalidate_range() which will likely access the same mmu - * control structures. - * m->lock is therefore optional here. + * If the invalidate is not successful then the VM will try again later. + * + * A trylock must be used here because invalidate_page() is called while + * the pte lock is help from try_to_unmap. The populate function takes + * the m->lock and then calls follow page (which acquires the pte lock). */ static void my_mmu_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long address) { struct my_mmu *m = container_of(mn, struct my_mmu, notifier); - spin_lock(&m->lock); - my_mmu_zap_page(m, address); - spin_unlock(&m->lock); + if (spin_trylock(&m->lock)) { + my_mmu_zap_page(m, address); + spin_unlock(&m->lock); + } } /* @@ -132,57 +130,55 @@ struct page *my_mmu_populate_page(struct { struct page *page = ERR_PTR(-EAGAIN); int err; + int done = 0; /* No need to do anything if a range invalidate is running */ if (m->invalidates) - goto out; + return page; if (atomic) { - if (!down_read_trylock(&vma->vm_mm->mmap_sem)) - goto out; + return page; + } else + down_read(&vma->vm_mm->mmap_sem); - /* No concurrent invalidates */ - page = follow_page(vma, address, FOLL_GET + - (write ? FOLL_WRITE : 0)); + do { + /* + * Take the m->lock here to hold off concurrent invalidates. + */ + spin_lock(&m->lock); + if (m->invalidates) { + spin_unlock(&m->lock); + break; + } - up_read(&vma->vm_mm->mmap_sem); - if (!page || IS_ERR(page) || TestSetPageLocked(page)) - goto out; + page = follow_page(vma, address, (write ? FOLL_WRITE : 0)); - } else { + if (page && !IS_ERR(page)) { + my_mmu_insert_page(m, address, page_to_pfn(page)); + done = 1; + } - down_read(&vma->vm_mm->mmap_sem); + spin_unlock(&m->lock); + + if (done || atomic) + break; + + /* + * PTE is not there. We need to have more complex handling through + * get_user_pages() + */ err = get_user_pages(current, vma->vm_mm, address, 1, - write, 1, &page, NULL); + write, 1, NULL, NULL); - up_read(&vma->vm_mm->mmap_sem); if (err < 0) { page = ERR_PTR(err); - goto out; + break; } - lock_page(page); - } + } while (!done); - /* - * The page is now locked and we are holding a refcount on it. - * So things are tied down. Now we can check the page status. - */ - if (page_mapped(page)) { - /* - * Must take the m->lock here to hold off concurrent - * invalidate_range_b/e. Serialization with invalidate_page() - * occurs because we are holding the page lock. - */ - spin_lock(&m->lock); - if (!m->invalidates) - my_mmu_insert_page(m, address, page_to_pfn(page)); - spin_unlock(&m->lock); - } - unlock_page(page); - put_page(page); -out: + up_read(&vma->vm_mm->mmap_sem); return page; } @@ -217,7 +213,7 @@ int my_mmu_attach_to_process(struct mm_s return -ENOMEM; m->notifier.ops = &my_mmu_ops; - spin_lock_init(&mm->lock); + spin_lock_init(&m->lock); /* * mmap_sem handling can be omitted if it is guaranteed that Index: linux-2.6/Documentation/mmu_notifier/skeleton_rmap.c =================================================================== --- linux-2.6.orig/Documentation/mmu_notifier/skeleton_rmap.c 2008-02-14 19:46:15.000000000 -0800 +++ linux-2.6/Documentation/mmu_notifier/skeleton_rmap.c 2008-02-14 20:54:43.000000000 -0800 @@ -112,40 +112,53 @@ struct page *my_mmu_populate_page(struct { struct page *page = ERR_PTR(-EAGAIN); int err; + int done = 0; - /* - * No need to do anything if a range invalidate is running - * Could use a wait queue here to avoid returning -EAGAIN. - */ + /* No need to do anything if a range invalidate is running */ if (m->invalidates) - goto out; + return page; down_read(&vma->vm_mm->mmap_sem); - err = get_user_pages(current, vma->vm_mm, address, 1, - write, 1, &page, NULL); - - up_read(&vma->vm_mm->mmap_sem); - if (err < 0) { - page = ERR_PTR(err); - goto out; - } - lock_page(page); - - /* - * The page is now locked and we are holding a refcount on it. - * So things are tied down. Now we can check the page status. - */ - if (page_mapped(page)) { - /* Could do some preprocessing here. Can sleep */ + do { + /* + * Take the m->lock here to hold off concurrent invalidates. + */ spin_lock(&m->lock); - if (!m->invalidates) + if (m->invalidates) { + spin_unlock(&m->lock); + /* + * Could block on a waitqueue here instead of + * returning -EAGAIN + */ + break; + } + + page = follow_page(vma, address, (write ? FOLL_WRITE : 0)); + if (page && !IS_ERR(page)) { my_mmu_insert_page(m, address, page_to_pfn(page)); + done = 1; + } + spin_unlock(&m->lock); - /* Could do some postprocessing here. Can sleep */ - } - unlock_page(page); - put_page(page); -out: + + if (done) + break; + + /* + * PTE is not there. We need to have more complex handling through + * get_user_pages() + */ + err = get_user_pages(current, vma->vm_mm, address, 1, + write, 1, NULL, NULL); + + if (err < 0) { + page = ERR_PTR(err); + break; + } + + } while (!done); + + up_read(&vma->vm_mm->mmap_sem); return page; }