With practially all the read locks gone from mapping->tree_lock, convert the lock from an rwlock back to a spinlock. The remaining locks including the read locks mainly deal with IO submission and not the lookup fastpaths. Index: linux-2.6/fs/buffer.c =================================================================== --- linux-2.6.orig/fs/buffer.c +++ linux-2.6/fs/buffer.c @@ -855,7 +855,7 @@ int __set_page_dirty_buffers(struct page spin_unlock(&mapping->private_lock); if (!TestSetPageDirty(page)) { - write_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); if (page->mapping) { /* Race with truncate? */ if (mapping_cap_account_dirty(mapping)) __inc_page_state(nr_dirty); @@ -863,7 +863,7 @@ int __set_page_dirty_buffers(struct page page_index(page), PAGECACHE_TAG_DIRTY); } - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } Index: linux-2.6/fs/inode.c =================================================================== --- linux-2.6.orig/fs/inode.c +++ linux-2.6/fs/inode.c @@ -196,7 +196,7 @@ void inode_init_once(struct inode *inode mutex_init(&inode->i_mutex); init_rwsem(&inode->i_alloc_sem); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); - rwlock_init(&inode->i_data.tree_lock); + spin_lock_init(&inode->i_data.tree_lock); spin_lock_init(&inode->i_data.i_mmap_lock); INIT_LIST_HEAD(&inode->i_data.private_list); spin_lock_init(&inode->i_data.private_lock); Index: linux-2.6/include/linux/fs.h =================================================================== --- linux-2.6.orig/include/linux/fs.h +++ linux-2.6/include/linux/fs.h @@ -372,7 +372,7 @@ struct backing_dev_info; struct address_space { struct inode *host; /* owner: inode, block_device */ struct radix_tree_root page_tree; /* radix tree of all pages */ - rwlock_t tree_lock; /* and rwlock protecting it */ + spinlock_t tree_lock; /* and lock protecting it */ unsigned int i_mmap_writable;/* count VM_SHARED mappings */ struct prio_tree_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ Index: linux-2.6/mm/filemap.c =================================================================== --- linux-2.6.orig/mm/filemap.c +++ linux-2.6/mm/filemap.c @@ -110,7 +110,7 @@ generic_file_direct_IO(int rw, struct ki /* * Remove a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage - * is safe. The caller must hold a write_lock on the mapping's tree_lock. + * is safe. The caller must hold the mapping's tree_lock. */ void __remove_from_page_cache(struct page *page) { @@ -128,9 +128,9 @@ void remove_from_page_cache(struct page BUG_ON(!PageLocked(page)); - write_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); __remove_from_page_cache(page); - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); } static int sync_page(void *word) @@ -409,13 +409,13 @@ int add_to_page_cache(struct page *page, page->mapping = mapping; page->index = offset; - write_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); error = radix_tree_insert(&mapping->page_tree, offset, page); if (!error) { mapping->nrpages++; pagecache_acct(1); } - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); radix_tree_preload_end(); if (error) { @@ -440,7 +440,7 @@ int __add_to_page_cache(struct page *pag if (error == 0) { SetPageNoNewRefs(page); - write_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); error = radix_tree_insert(&mapping->page_tree, offset, page); if (!error) { page_cache_get(page); @@ -450,7 +450,7 @@ int __add_to_page_cache(struct page *pag mapping->nrpages++; pagecache_acct(1); } - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); ClearPageNoNewRefs(page); radix_tree_preload_end(); } @@ -708,12 +708,12 @@ unsigned find_get_pages(struct address_s unsigned int i; unsigned int ret; - read_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); ret = radix_tree_gang_lookup(&mapping->page_tree, (void **)pages, start, nr_pages); for (i = 0; i < ret; i++) page_cache_get(pages[i]); - read_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); return ret; } @@ -753,14 +753,14 @@ unsigned find_get_pages_tag(struct addre unsigned int i; unsigned int ret; - read_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); ret = radix_tree_gang_lookup_tag(&mapping->page_tree, (void **)pages, *index, nr_pages, tag); for (i = 0; i < ret; i++) page_cache_get(pages[i]); if (ret) *index = pages[ret - 1]->index + 1; - read_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); return ret; } Index: linux-2.6/mm/swap_state.c =================================================================== --- linux-2.6.orig/mm/swap_state.c +++ linux-2.6/mm/swap_state.c @@ -37,7 +37,7 @@ static struct backing_dev_info swap_back struct address_space swapper_space = { .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), - .tree_lock = RW_LOCK_UNLOCKED, + .tree_lock = SPIN_LOCK_UNLOCKED, .a_ops = &swap_aops, .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), .backing_dev_info = &swap_backing_dev_info, @@ -78,7 +78,7 @@ static int __add_to_swap_cache(struct pa error = radix_tree_preload(gfp_mask); if (!error) { SetPageNoNewRefs(page); - write_lock_irq(&swapper_space.tree_lock); + spin_lock_irq(&swapper_space.tree_lock); error = radix_tree_insert(&swapper_space.page_tree, entry.val, page); if (!error) { @@ -89,7 +89,7 @@ static int __add_to_swap_cache(struct pa total_swapcache_pages++; pagecache_acct(1); } - write_unlock_irq(&swapper_space.tree_lock); + spin_unlock_irq(&swapper_space.tree_lock); ClearPageNoNewRefs(page); radix_tree_preload_end(); } @@ -202,9 +202,9 @@ void delete_from_swap_cache(struct page entry.val = page_private(page); - write_lock_irq(&swapper_space.tree_lock); + spin_lock_irq(&swapper_space.tree_lock); __delete_from_swap_cache(page); - write_unlock_irq(&swapper_space.tree_lock); + spin_unlock_irq(&swapper_space.tree_lock); swap_free(entry); page_cache_release(page); Index: linux-2.6/mm/swapfile.c =================================================================== --- linux-2.6.orig/mm/swapfile.c +++ linux-2.6/mm/swapfile.c @@ -367,13 +367,13 @@ int remove_exclusive_swap_page(struct pa /* Is the only swap cache user the cache itself? */ retval = 0; if (p->swap_map[swp_offset(entry)] == 1) { - write_lock_irq(&swapper_space.tree_lock); + spin_lock_irq(&swapper_space.tree_lock); if (!PageWriteback(page)) { __delete_from_swap_cache(page); SetPageDirty(page); retval = 1; } - write_unlock_irq(&swapper_space.tree_lock); + spin_unlock_irq(&swapper_space.tree_lock); } spin_unlock(&swap_lock); Index: linux-2.6/mm/truncate.c =================================================================== --- linux-2.6.orig/mm/truncate.c +++ linux-2.6/mm/truncate.c @@ -67,15 +67,15 @@ invalidate_complete_page(struct address_ if (PagePrivate(page) && !try_to_release_page(page, 0)) return 0; - write_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); if (PageDirty(page)) { - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); return 0; } BUG_ON(PagePrivate(page)); __remove_from_page_cache(page); - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); ClearPageUptodate(page); page_cache_release(page); /* pagecache ref */ return 1; Index: linux-2.6/mm/vmscan.c =================================================================== --- linux-2.6.orig/mm/vmscan.c +++ linux-2.6/mm/vmscan.c @@ -384,7 +384,7 @@ static int remove_mapping(struct address return 0; /* truncate got there first */ SetPageNoNewRefs(page); - write_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); /* * The non-racy check for busy page. It is critical to check @@ -400,13 +400,13 @@ static int remove_mapping(struct address if (PageSwapCache(page)) { swp_entry_t swap = { .val = page_private(page) }; __delete_from_swap_cache(page); - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); swap_free(swap); goto free_it; } __remove_from_page_cache(page); - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); free_it: __ClearPageNoNewRefs(page); @@ -414,7 +414,7 @@ free_it: return 1; cannot_free: - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); ClearPageNoNewRefs(page); return 0; } @@ -736,7 +736,7 @@ int migrate_page_remove_references(struc return 1; SetPageNoNewRefs(page); - write_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); radix_pointer = (struct page **)radix_tree_lookup_slot( &mapping->page_tree, @@ -744,7 +744,7 @@ int migrate_page_remove_references(struc if (!page_mapping(page) || page_count(page) != nr_refs || *radix_pointer != page) { - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); ClearPageNoNewRefs(page); return 1; } @@ -768,7 +768,7 @@ int migrate_page_remove_references(struc rcu_assign_pointer(*radix_pointer, newpage); - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); __put_page(page); ClearPageNoNewRefs(page); ClearPageNoNewRefs(newpage); Index: linux-2.6/mm/page-writeback.c =================================================================== --- linux-2.6.orig/mm/page-writeback.c +++ linux-2.6/mm/page-writeback.c @@ -628,7 +628,7 @@ int __set_page_dirty_nobuffers(struct pa struct address_space *mapping2; if (mapping) { - write_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); mapping2 = page_mapping(page); if (mapping2) { /* Race with truncate? */ BUG_ON(mapping2 != mapping); @@ -637,7 +637,7 @@ int __set_page_dirty_nobuffers(struct pa radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); if (mapping->host) { /* !PageAnon && !swapper_space */ __mark_inode_dirty(mapping->host, @@ -709,21 +709,22 @@ EXPORT_SYMBOL(set_page_dirty_lock); int test_clear_page_dirty(struct page *page) { struct address_space *mapping = page_mapping(page); - unsigned long flags; if (mapping) { - write_lock_irqsave(&mapping->tree_lock, flags); - if (TestClearPageDirty(page)) { + unsigned long flags; + int ret; + + spin_lock_irqsave(&mapping->tree_lock, flags); + ret = TestClearPageDirty(page); + if (ret) { radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); if (mapping_cap_account_dirty(mapping)) __dec_page_state(nr_dirty); - write_unlock_irqrestore(&mapping->tree_lock, flags); - return 1; } - write_unlock_irqrestore(&mapping->tree_lock, flags); - return 0; + spin_unlock_irqrestore(&mapping->tree_lock, flags); + return ret; } return TestClearPageDirty(page); } @@ -762,33 +763,32 @@ EXPORT_SYMBOL(clear_page_dirty_for_io); int test_clear_page_writeback(struct page *page) { struct address_space *mapping = page_mapping(page); - int ret; if (mapping) { unsigned long flags; + int ret; - write_lock_irqsave(&mapping->tree_lock, flags); + spin_lock_irqsave(&mapping->tree_lock, flags); ret = TestClearPageWriteback(page); if (ret) radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_WRITEBACK); - write_unlock_irqrestore(&mapping->tree_lock, flags); - } else { - ret = TestClearPageWriteback(page); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + return ret; } - return ret; + return TestClearPageWriteback(page); } int test_set_page_writeback(struct page *page) { struct address_space *mapping = page_mapping(page); - int ret; if (mapping) { unsigned long flags; + int ret; - write_lock_irqsave(&mapping->tree_lock, flags); + spin_lock_irqsave(&mapping->tree_lock, flags); ret = TestSetPageWriteback(page); if (!ret) radix_tree_tag_set(&mapping->page_tree, @@ -798,11 +798,10 @@ int test_set_page_writeback(struct page radix_tree_tag_clear(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); - write_unlock_irqrestore(&mapping->tree_lock, flags); - } else { - ret = TestSetPageWriteback(page); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + return ret; } - return ret; + return TestSetPageWriteback(page); } EXPORT_SYMBOL(test_set_page_writeback); Index: linux-2.6/include/asm-arm/cacheflush.h =================================================================== --- linux-2.6.orig/include/asm-arm/cacheflush.h +++ linux-2.6/include/asm-arm/cacheflush.h @@ -319,9 +319,9 @@ extern void flush_cache_page(struct vm_a extern void flush_dcache_page(struct page *); #define flush_dcache_mmap_lock(mapping) \ - write_lock_irq(&(mapping)->tree_lock) + spin_lock_irq(&(mapping)->tree_lock) #define flush_dcache_mmap_unlock(mapping) \ - write_unlock_irq(&(mapping)->tree_lock) + spin_unlock_irq(&(mapping)->tree_lock) #define flush_icache_user_range(vma,page,addr,len) \ flush_dcache_page(page) Index: linux-2.6/include/asm-parisc/cacheflush.h =================================================================== --- linux-2.6.orig/include/asm-parisc/cacheflush.h +++ linux-2.6/include/asm-parisc/cacheflush.h @@ -58,9 +58,9 @@ flush_user_icache_range(unsigned long st extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping) \ - write_lock_irq(&(mapping)->tree_lock) + spin_lock_irq(&(mapping)->tree_lock) #define flush_dcache_mmap_unlock(mapping) \ - write_unlock_irq(&(mapping)->tree_lock) + spin_unlock_irq(&(mapping)->tree_lock) #define flush_icache_page(vma,page) do { flush_kernel_dcache_page(page_address(page)); flush_kernel_icache_page(page_address(page)); } while (0) Index: linux-2.6/drivers/mtd/devices/block2mtd.c =================================================================== --- linux-2.6.orig/drivers/mtd/devices/block2mtd.c +++ linux-2.6/drivers/mtd/devices/block2mtd.c @@ -58,28 +58,27 @@ static void cache_readahead(struct addre end_index = ((isize - 1) >> PAGE_CACHE_SHIFT); - read_lock_irq(&mapping->tree_lock); for (i = 0; i < PAGE_READAHEAD; i++) { pagei = index + i; if (pagei > end_index) { INFO("Overrun end of disk in cache readahead\n"); break; } + /* Don't need mapping->tree_lock - lookup can be racy */ + rcu_read_lock(); page = radix_tree_lookup(&mapping->page_tree, pagei); + rcu_read_unlock(); if (page && (!i)) break; if (page) continue; - read_unlock_irq(&mapping->tree_lock); page = page_cache_alloc_cold(mapping); - read_lock_irq(&mapping->tree_lock); if (!page) break; page->index = pagei; list_add(&page->lru, &page_pool); ret++; } - read_unlock_irq(&mapping->tree_lock); if (ret) read_cache_pages(mapping, &page_pool, filler, NULL); }