Index: linux-2.6.8.1/fs/dcache.c =================================================================== --- linux-2.6.8.1.orig/fs/dcache.c 2004-08-17 15:22:45.893155591 +1000 +++ linux-2.6.8.1/fs/dcache.c 2004-08-17 15:24:37.503327400 +1000 @@ -379,6 +379,11 @@ static void prune_dcache(int count) struct dentry *dentry; struct list_head *tmp; + if (unlikely((count & 255) == 0)) { + spin_unlock(&dcache_lock); + cpu_relax(); + spin_lock(&dcache_lock); + } tmp = dentry_unused.prev; if (tmp == &dentry_unused) break; Index: linux-2.6.8.1/fs/jbd/checkpoint.c =================================================================== --- linux-2.6.8.1.orig/fs/jbd/checkpoint.c 2004-08-17 15:22:47.604881807 +1000 +++ linux-2.6.8.1/fs/jbd/checkpoint.c 2004-08-17 15:24:36.636465698 +1000 @@ -132,6 +132,7 @@ static int __cleanup_transaction(journal { struct journal_head *jh, *next_jh, *last_jh; struct buffer_head *bh; + unsigned char nr_buffers = 1; int ret = 0; assert_spin_locked(&journal->j_list_lock); @@ -185,9 +186,15 @@ static int __cleanup_transaction(journal journal_remove_journal_head(bh); __brelse(bh); ret = 1; + nr_buffers++; } else { jbd_unlock_bh_state(bh); } + if (nr_buffers == 0) { + spin_lock(&journal->j_list_lock); + cpu_relax(); + goto out_return_1; + } jh = next_jh; } while (jh != last_jh); @@ -455,9 +462,8 @@ int cleanup_journal_tail(journal_t *jour * * Find all the written-back checkpoint buffers in the journal and release them. * - * Called with the journal locked. - * Called with j_list_lock held. - * Returns number of bufers reaped (for debug) + * Called with j_list_lock held, drops it. + * Returns number of bufers reaped */ int __journal_clean_checkpoint_list(journal_t *journal) @@ -467,7 +473,7 @@ int __journal_clean_checkpoint_list(jour transaction = journal->j_checkpoint_transactions; if (transaction == 0) - goto out; + goto out_unlock; last_transaction = transaction->t_cpprev; next_transaction = transaction; @@ -484,13 +490,41 @@ int __journal_clean_checkpoint_list(jour do { jh = next_jh; next_jh = jh->b_cpnext; - /* Use trylock because of the ranknig */ + /* Use trylock because of the ranking */ if (jbd_trylock_bh_state(jh2bh(jh))) ret += __try_to_free_cp_buf(jh); } while (jh != last_jh); } +#ifdef CONFIG_PREEMPT + /* + * This is potentially sucky: semi-quadratic performance if + * there are a lot of dirty buffers. So only do it if the user + * has chosen a preemptible kernel. If !CONFIG_PREEMPT we're + * optimimising for straight-line performance, after all. + * We don't test cond_resched() here because another CPU could + * be waiting on j_list_lock() while holding a different lock. + */ + if ((ret & 127) == 127) { + spin_unlock(&journal->j_list_lock); + /* + * We need to schedule away. Rotate both this + * transaction's buffer list and the checkpoint list to + * try to avoid quadratic behaviour. + */ + jh = transaction->t_checkpoint_list; + if (jh) + transaction->t_checkpoint_list = jh->b_cpnext; + + transaction = journal->j_checkpoint_transactions; + if (transaction) + journal->j_checkpoint_transactions = + transaction->t_cpnext; + return ret; + } +#endif } while (transaction != last_transaction); -out: +out_unlock: + spin_unlock(&journal->j_list_lock); return ret; } Index: linux-2.6.8.1/fs/jbd/commit.c =================================================================== --- linux-2.6.8.1.orig/fs/jbd/commit.c 2004-08-17 15:22:46.321087144 +1000 +++ linux-2.6.8.1/fs/jbd/commit.c 2004-08-17 15:24:37.061397905 +1000 @@ -114,6 +114,7 @@ void journal_commit_transaction(journal_ int space_left = 0; int first_tag = 0; int tag_flag; + int nr_buffers = 0; int i; /* @@ -208,9 +209,16 @@ void journal_commit_transaction(journal_ * checkpoint lists. We do this *before* commit because it potentially * frees some memory */ - spin_lock(&journal->j_list_lock); - __journal_clean_checkpoint_list(journal); - spin_unlock(&journal->j_list_lock); + spin_unlock(&journal->j_state_lock); + { + int nr_cleaned; + + do { + spin_lock(&journal->j_list_lock); + nr_cleaned = __journal_clean_checkpoint_list(journal); + } while (nr_cleaned); + } + spin_lock(&journal->j_state_lock); jbd_debug (3, "JBD: commit phase 1\n"); @@ -262,8 +270,10 @@ write_out_data: __journal_file_buffer(jh, commit_transaction, BJ_Locked); jbd_unlock_bh_state(bh); - if (need_resched()) { + nr_buffers++; + if ((nr_buffers & 15) == 0 || need_resched()) { spin_unlock(&journal->j_list_lock); + cpu_relax(); goto write_out_data; } } else { @@ -288,8 +298,10 @@ write_out_data: jbd_unlock_bh_state(bh); journal_remove_journal_head(bh); put_bh(bh); - if (need_resched()) { + nr_buffers++; + if ((nr_buffers & 15) == 0 || need_resched()) { spin_unlock(&journal->j_list_lock); + cpu_relax(); goto write_out_data; } } @@ -333,7 +345,8 @@ write_out_data: jbd_unlock_bh_state(bh); } put_bh(bh); - if (need_resched()) { + nr_buffers++; + if ((nr_buffers & 15) == 0 || need_resched()) { spin_unlock(&journal->j_list_lock); cond_resched(); spin_lock(&journal->j_list_lock); Index: linux-2.6.8.1/fs/jbd/recovery.c =================================================================== --- linux-2.6.8.1.orig/fs/jbd/recovery.c 2004-08-17 15:22:46.423070832 +1000 +++ linux-2.6.8.1/fs/jbd/recovery.c 2004-08-17 15:24:36.109549763 +1000 @@ -354,6 +354,8 @@ static int do_one_pass(journal_t *journa struct buffer_head * obh; struct buffer_head * nbh; + cond_resched(); /* We're under lock_kernel() */ + /* If we already know where to stop the log traversal, * check right now that we haven't gone past the end of * the log. */ Index: linux-2.6.8.1/mm/memory.c =================================================================== --- linux-2.6.8.1.orig/mm/memory.c 2004-08-17 15:24:32.320154356 +1000 +++ linux-2.6.8.1/mm/memory.c 2004-08-17 15:24:38.823116847 +1000 @@ -567,12 +567,15 @@ int unmap_vmas(struct mmu_gather **tlbp, zap_bytes -= block; if ((long)zap_bytes > 0) continue; - if (!atomic && need_resched()) { + if (!atomic) { int fullmm = tlb_is_full_mm(*tlbp); + tlb_finish_mmu(*tlbp, tlb_start, start); - cond_resched_lock(&mm->page_table_lock); - *tlbp = tlb_gather_mmu(mm, fullmm); + spin_unlock(&mm->page_table_lock); + cond_resched(); tlb_start_valid = 0; + spin_lock(&mm->page_table_lock); + *tlbp = tlb_gather_mmu(mm, fullmm); } zap_bytes = ZAP_BLOCK_SIZE; } Index: linux-2.6.8.1/mm/msync.c =================================================================== --- linux-2.6.8.1.orig/mm/msync.c 2004-08-17 15:22:47.084964964 +1000 +++ linux-2.6.8.1/mm/msync.c 2004-08-17 15:24:35.532641805 +1000 @@ -92,8 +92,8 @@ static inline int filemap_sync_pmd_range return error; } -static int filemap_sync(struct vm_area_struct * vma, unsigned long address, - size_t size, unsigned int flags) +static int __filemap_sync(struct vm_area_struct *vma, unsigned long address, + size_t size, unsigned int flags) { pgd_t * dir; unsigned long end = address + size; @@ -131,6 +131,30 @@ static int filemap_sync(struct vm_area_s return error; } +#ifdef CONFIG_PREEMPT +static int filemap_sync(struct vm_area_struct *vma, unsigned long address, + size_t size, unsigned int flags) +{ + const size_t chunk = 64 * 1024; /* bytes */ + int error = 0; + + while (size) { + size_t sz = min(size, chunk); + + error |= __filemap_sync(vma, address, sz, flags); + address += sz; + size -= sz; + } + return error; +} +#else +static int filemap_sync(struct vm_area_struct *vma, unsigned long address, + size_t size, unsigned int flags) +{ + return __filemap_sync(vma, address, size, flags); +} +#endif + /* * MS_SYNC syncs the entire file - including mappings. * Index: linux-2.6.8.1/mm/slab.c =================================================================== --- linux-2.6.8.1.orig/mm/slab.c 2004-08-17 15:22:45.706185497 +1000 +++ linux-2.6.8.1/mm/slab.c 2004-08-17 15:24:37.832274921 +1000 @@ -2624,6 +2624,10 @@ static void enable_cpucache (kmem_cache_ if (limit > 32) limit = 32; #endif +#ifdef CONFIG_PREEMPT + if (limit > 16) + limit = 16; +#endif err = do_tune_cpucache(cachep, limit, (limit+1)/2, shared); if (err) printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", Index: linux-2.6.8.1/mm/truncate.c =================================================================== --- linux-2.6.8.1.orig/mm/truncate.c 2004-08-17 15:22:45.565208047 +1000 +++ linux-2.6.8.1/mm/truncate.c 2004-08-17 15:24:38.179219571 +1000 @@ -155,6 +155,7 @@ void truncate_inode_pages(struct address next = start; for ( ; ; ) { + cond_resched(); if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { if (next == start) break;