--- mm/mmu_skeleton.c | 235 ++++++++++++++++++++++++++++++++++++++++++++ mm/mmu_skeleton_rmap.c | 256 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 491 insertions(+) Index: linux-2.6/mm/mmu_skeleton.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6/mm/mmu_skeleton.c 2008-02-08 11:08:51.000000000 -0800 @@ -0,0 +1,235 @@ +#include +#include +#include +#include +#include + +/* + * Skeleton for an mmu notifier without rmap callbacks and no need to slepp + * during invalidate_page(). + * + * (C) 2008 Silicon Graphics, Inc. + * Christoph Lameter + * + * Note that the locking is fairly basic. One can add various optimizations + * here and there. There is a single lock for an address space which should be + * satisfactory for most cases. If not then the lock can be split like the + * pte_lock in Linux. It is most likely best to place the locks in the + * page table structure or into whatever the external mmu uses to + * track the mappings. + */ + +struct my_mmu { + /* MMU notifier specific fields */ + struct mmu_notifier notifier; + spinlock_t lock; /* Protects counter and invidual zaps */ + int invalidates; /* Number of active range_invalidate */ +}; + +/* + * Called with m->lock held + */ +static void my_mmu_insert_page(struct my_mmu *m, + unsigned long address, unsigned long pfn) +{ + /* Must be provided */ + printk(KERN_INFO "insert page %p address=%lx pfn=%ld\n", + m, address, pfn); +} + +/* + * Called with m->lock held (optional but usually required to + * protect data structures of the driver). + */ +static void my_mmu_zap_page(struct my_mmu *m, unsigned long address) +{ + /* Must be provided */ + printk(KERN_INFO "zap page %p address=%lx\n", m, address); +} + +/* + * Called with m->lock held + */ +static void my_mmu_zap_range(struct my_mmu *m, + unsigned long start, unsigned long end, int atomic) +{ + /* Must be provided */ + printk(KERN_INFO "zap range %p address=%lx-%lx atomic=%d\n", + m, start, end, atomic); +} + +/* + * Zap an individual page. + * + * The page must be locked and a refcount on the page must + * be held when this function is called. The page lock is also + * acquired when new references are established and so we the + * page lock effecively takes on the role of synchronization. + * + * The m->lock is only taken to preserve the integrity fo the + * drivers data structures since we may also race with + * invalidate_range() which will likely access the same mmu + * control structures. + * m->lock is therefore optional here. + */ +static void my_mmu_invalidate_page(struct mmu_notifier *mn, + struct mm_struct *mm, unsigned long address) +{ + struct my_mmu *m = container_of(mn, struct my_mmu, notifier); + + spin_lock(&m->lock); + my_mmu_zap_page(m, address); + spin_unlock(&m->lock); +} + +/* + * Increment and decrement of the number of range invalidates + */ +static inline void inc_active(struct my_mmu *m) +{ + spin_lock(&m->lock); + m->invalidates++; + spin_unlock(&m->lock); +} + +static inline void dec_active(struct my_mmu *m) +{ + spin_lock(&m->lock); + m->invalidates--; + spin_unlock(&m->lock); +} + +static void my_mmu_invalidate_range_begin(struct mmu_notifier *mn, + struct mm_struct *mm, unsigned long start, unsigned long end, + int atomic) +{ + struct my_mmu *m = container_of(mn, struct my_mmu, notifier); + + inc_active(m); /* Holds off new references */ + my_mmu_zap_range(m, start, end, atomic); +} + +static void my_mmu_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, unsigned long start, unsigned long end, + int atomic) +{ + struct my_mmu *m = container_of(mn, struct my_mmu, notifier); + + dec_active(m); /* Enables new references */ +} + +/* + * Populate a page. + * + * A return value of-EAGAIN means please retry this operation. + * + * Acuisition of mmap_sem can be omitted if the caller already holds + * the semaphore. + */ +struct page *my_mmu_populate_page(struct my_mmu *m, + struct vm_area_struct *vma, + unsigned long address, int atomic, int write) +{ + struct page *page = ERR_PTR(-EAGAIN); + int err; + + /* No need to do anything if a range invalidate is running */ + if (m->invalidates) + goto out; + + if (atomic) { + + if (!down_read_trylock(&vma->vm_mm->mmap_sem)) + goto out; + + /* No concurrent invalidates */ + page = follow_page(vma, address, FOLL_GET + + (write ? FOLL_WRITE : 0)); + + up_read(&vma->vm_mm->mmap_sem); + if (!page || IS_ERR(page) || TestSetPageLocked(page)) + goto out; + + } else { + + down_read(&vma->vm_mm->mmap_sem); + err = get_user_pages(current, vma->vm_mm, address, 1, + write, 1, &page, NULL); + + up_read(&vma->vm_mm->mmap_sem); + if (err < 0) { + page = ERR_PTR(err); + goto out; + } + lock_page(page); + + } + + /* + * The page is now locked and we are holding a refcount on it. + * So things are tied down. Now we can check the page status. + */ + if (page_mapped(page)) { + /* + * Must take the m->lock here to hold off concurrent + * invalidate_range_b/e. Serialization with invalidate_page() + * occurs because we are holding the page lock. + */ + spin_lock(&m->lock); + if (!m->invalidates) + my_mmu_insert_page(m, address, page_to_pfn(page)); + spin_unlock(&m->lock); + } + unlock_page(page); + put_page(page); +out: + return page; +} + +/* + * All other threads accessing this mm_struct must have terminated by now. + */ +static void my_mmu_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct my_mmu *m = container_of(mn, struct my_mmu, notifier); + + my_mmu_zap_range(m, 0, TASK_SIZE, 0); + kfree(m); + printk(KERN_INFO "MMU Notifier terminating\n"); +} + +struct mmu_notifier_ops my_mmu_ops = { + my_mmu_release, + NULL, /* No aging function */ + my_mmu_invalidate_page, + my_mmu_invalidate_range_begin, + my_mmu_invalidate_range_end +}; + +/* + * This function must be called to activate callbacks from a process + */ +int my_mmu_attach_to_process(struct mm_struct *mm) +{ + struct my_mmu *m = kzalloc(sizeof(struct my_mmu), GFP_KERNEL); + + m->notifier.ops = &my_mmu_ops; + + /* + * mmap_sem handling can be omitted if it is guaranteed that + * the context from which my_mmu_attach_to_process is called + * is already holding a writelock on mmap_sem. + */ + down_write(&mm->mmap_sem); + mmu_notifier_register(&m->notifier, mm); + up_write(&mm->mmap_sem); + + /* + * RCU sync is expensive but necessary if we need to guarantee + * that multiple threads running on other cpus have seen the + * notifier changes. + */ + synchronize_rcu(); + return 0; +} + Index: linux-2.6/mm/mmu_skeleton_rmap.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6/mm/mmu_skeleton_rmap.c 2008-02-08 11:14:45.000000000 -0800 @@ -0,0 +1,256 @@ +#include +#include +#include +#include +#include + +/* + * Skeleton for an mmu notifier with rmap callbacks and sleeping during + * invalidate_page. + * + * (C) 2008 Silicon Graphics, Inc. + * Christoph Lameter + * + * Note that the locking is fairly basic. One can add various optimizations + * here and there. There is a single lock for an address space which should be + * satisfactory for most cases. If not then the lock can be split like the + * pte_lock in Linux. It is most likely best to place the locks in the + * page table structure or into whatever the external mmu uses to + * track the mappings. + */ + +struct my_mmu { + /* MMU notifier specific fields */ + struct mmu_notifier notifier; + spinlock_t lock; /* Protects counter and invidual zaps */ + int invalidates; /* Number of active range_invalidate */ + + /* Rmap support */ + struct list_head list; /* rmap list of my_mmu structs */ + unsigned long base; +}; + +/* + * Called with m->lock held + */ +static void my_mmu_insert_page(struct my_mmu *m, + unsigned long address, unsigned long pfn) +{ + /* Must be provided */ + printk(KERN_INFO "insert page %p address=%lx pfn=%ld\n", + m, address, pfn); +} + +/* + * Called with m->lock held + */ +static void my_mmu_zap_range(struct my_mmu *m, + unsigned long start, unsigned long end, int atomic) +{ + /* Must be provided */ + printk(KERN_INFO "zap range %p address=%lx-%lx atomic=%d\n", + m, start, end, atomic); +} + +/* + * Called with m->lock held (optional but usually required to + * protect data structures of the driver). + */ +static void my_mmu_zap_page(struct my_mmu *m, unsigned long address) +{ + /* Must be provided */ + printk(KERN_INFO "zap page %p address=%lx\n", m, address); +} + +/* + * Increment and decrement of the number of range invalidates + */ +static inline void inc_active(struct my_mmu *m) +{ + spin_lock(&m->lock); + m->invalidates++; + spin_unlock(&m->lock); +} + +static inline void dec_active(struct my_mmu *m) +{ + spin_lock(&m->lock); + m->invalidates--; + spin_unlock(&m->lock); +} + +static void my_mmu_invalidate_range_begin(struct mmu_notifier *mn, + struct mm_struct *mm, unsigned long start, unsigned long end, + int atomic) +{ + struct my_mmu *m = container_of(mn, struct my_mmu, notifier); + + inc_active(m); /* Holds off new references */ + my_mmu_zap_range(m, start, end, atomic); +} + +static void my_mmu_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, unsigned long start, unsigned long end, + int atomic) +{ + struct my_mmu *m = container_of(mn, struct my_mmu, notifier); + + dec_active(m); /* Enables new references */ +} + +/* + * Populate a page. + * + * A return value of-EAGAIN means please retry this operation. + * + * Acuisition of mmap_sem can be omitted if the caller already holds + * the semaphore. + */ +struct page *my_mmu_populate_page(struct my_mmu *m, + struct vm_area_struct *vma, + unsigned long address, int write) +{ + struct page *page = ERR_PTR(-EAGAIN); + int err; + + /* + * No need to do anything if a range invalidate is running + * Could use a wait queu here to avoid return -EAGAIN. + */ + if (m->invalidates) + goto out; + + down_read(&vma->vm_mm->mmap_sem); + err = get_user_pages(current, vma->vm_mm, address, 1, + write, 1, &page, NULL); + + up_read(&vma->vm_mm->mmap_sem); + if (err < 0) { + page = ERR_PTR(err); + goto out; + } + lock_page(page); + + /* + * The page is now locked and we are holding a refcount on it. + * So things are tied down. Now we can check the page status. + */ + if (page_mapped(page)) { + /* Could do some preprocessing here that can sleep */ + spin_lock(&m->lock); + if (!m->invalidates) + my_mmu_insert_page(m, address, page_to_pfn(page)); + spin_unlock(&m->lock); + /* Could do some postprocessing here that could also sleep */ + } + unlock_page(page); + put_page(page); +out: + return page; +} + +/* + * All other threads accessing this mm_struct must have terminated by now. + */ +static void my_mmu_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct my_mmu *m = container_of(mn, struct my_mmu, notifier); + + my_mmu_zap_range(m, 0, TASK_SIZE, 0); + list_del(&m->list); + kfree(m); + printk(KERN_INFO "MMU Notifier terminating\n"); +} + +struct mmu_notifier_ops my_mmu_ops = { + my_mmu_release, + NULL, /* No aging function */ + NULL, /* No atomic invalidate_page function */ + my_mmu_invalidate_range_begin, + my_mmu_invalidate_range_end +}; + +/* Rmap specific fields */ +LIST_HEAD(my_mmu_list); + +/* + * This function must be called to activate callbacks from a process + */ +int my_mmu_attach_to_process(struct mm_struct *mm) +{ + struct my_mmu *m = kzalloc(sizeof(struct my_mmu), GFP_KERNEL); + + m->notifier.ops = &my_mmu_ops; + + /* + * mmap_sem handling can be omitted if it is guaranteed that + * the context from which my_mmu_attach_to_process is called + * is already holding a writelock on mmap_sem. + */ + down_write(&mm->mmap_sem); + list_add(&m->list, &my_mmu_list); + mmu_notifier_register(&m->notifier, mm); + up_write(&mm->mmap_sem); + + /* + * RCU sync is expensive but necessary if we need to guarantee + * that multiple threads running on other cpus have seen the + * notifier changes. + */ + synchronize_rcu(); + return 0; +} + + +static void my_sleeping_invalidate_page(struct my_mmu *m, unsigned long address) +{ + /* Must be provided */ + + spin_lock(&m->lock); /* Only taken to ensure mmu data integrity */ + my_mmu_zap_page(m, address); + spin_unlock(&m->lock); + printk(KERN_INFO "Sleeping invalidate_page %p address=%lx\n", + m, address); +} + +static unsigned long my_mmu_find_addr(struct my_mmu *m, struct page *page) +{ + /* Determine the VMA for a page in a mmu segment */ + return 0; +} + +/* + * A reference must be held on the page passed and the page passed + * must be locked. No spinlocks are held. invalidate_page() is held + * off by us holding the page lock. + */ +static void my_mmu_rmap_invalidate_page(struct mmu_rmap_notifier *mrn, + struct page *page) +{ + struct my_mmu *m; + + BUG_ON(!PageLocked(page)); + list_for_each_entry(m, &my_mmu_list, list) { + unsigned long address = my_mmu_find_addr(m, page); + + if (address != -EFAULT) + my_sleeping_invalidate_page(m, address); + } +} + +struct mmu_rmap_notifier_ops my_mmu_rmap_ops = { + .invalidate_page = my_mmu_rmap_invalidate_page +}; + +struct mmu_rmap_notifier my_mmu_rmap_notifier = { + .ops = &my_mmu_rmap_ops +}; + +int my_mmu_init(void) +{ + mmu_rmap_notifier_register(&my_mmu_rmap_notifier); + return 0; +} + +late_initcall(my_mmu_init); +