From: Hans Reiser This patch removes reiser4 spinlock macroses. It also contains few bug fixes. Signed-off-by: Vladimir V. Saveliev Signed-off-by: Andrew Morton --- dev/null | 474 ------- fs/reiser4/as_ops.c | 74 - fs/reiser4/block_alloc.c | 106 - fs/reiser4/blocknrset.c | 2 fs/reiser4/carry.c | 25 fs/reiser4/carry_ops.c | 53 fs/reiser4/cluster.c | 79 - fs/reiser4/cluster.h | 37 fs/reiser4/context.c | 2 fs/reiser4/context.h | 2 fs/reiser4/coord.c | 2 fs/reiser4/crypt.c | 83 - fs/reiser4/crypt.h | 58 fs/reiser4/debug.c | 11 fs/reiser4/debug.h | 26 fs/reiser4/emergency_flush.c | 113 - fs/reiser4/emergency_flush.h | 2 fs/reiser4/entd.c | 282 +--- fs/reiser4/entd.h | 30 fs/reiser4/eottl.c | 29 fs/reiser4/estimate.c | 30 fs/reiser4/flush.c | 101 - fs/reiser4/flush_queue.c | 120 - fs/reiser4/fsdata.c | 27 fs/reiser4/fsdata.h | 7 fs/reiser4/init_super.c | 8 fs/reiser4/inode.c | 27 fs/reiser4/inode.h | 73 - fs/reiser4/jnode.c | 158 +- fs/reiser4/jnode.h | 89 - fs/reiser4/ktxnmgrd.h | 1 fs/reiser4/lock.c | 352 ++--- fs/reiser4/lock.h | 117 + fs/reiser4/oid.c | 24 fs/reiser4/page_cache.c | 59 fs/reiser4/page_cache.h | 2 fs/reiser4/plugin/compress/compress.c | 46 fs/reiser4/plugin/compress/compress.h | 1 fs/reiser4/plugin/compress/compress_mode.c | 27 fs/reiser4/plugin/digest.c | 34 fs/reiser4/plugin/file/cryptcompress.c | 1204 ++++++++++++------- fs/reiser4/plugin/file/cryptcompress.h | 173 +- fs/reiser4/plugin/file/file.c | 188 +- fs/reiser4/plugin/file/funcs.h | 5 fs/reiser4/plugin/file/tail_conversion.c | 13 fs/reiser4/plugin/file_ops.c | 15 fs/reiser4/plugin/file_plugin_common.c | 19 fs/reiser4/plugin/item/ctail.c | 131 -- fs/reiser4/plugin/item/extent_file_ops.c | 61 fs/reiser4/plugin/item/extent_flush_ops.c | 39 fs/reiser4/plugin/item/extent_item_ops.c | 21 fs/reiser4/plugin/item/internal.c | 34 fs/reiser4/plugin/item/static_stat.c | 77 - fs/reiser4/plugin/node/node40.c | 45 fs/reiser4/plugin/object.c | 11 fs/reiser4/plugin/object.h | 7 fs/reiser4/plugin/plugin.h | 53 fs/reiser4/plugin/space/bitmap.c | 12 fs/reiser4/readahead.c | 8 fs/reiser4/reiser4.h | 21 fs/reiser4/seal.c | 8 fs/reiser4/search.c | 110 - fs/reiser4/super.c | 8 fs/reiser4/super.h | 58 fs/reiser4/super_ops.c | 2 fs/reiser4/tree.c | 79 - fs/reiser4/tree.h | 177 +- fs/reiser4/tree_mod.c | 24 fs/reiser4/tree_walk.c | 65 - fs/reiser4/txnmgr.c | 711 +++++------ fs/reiser4/txnmgr.h | 168 ++ fs/reiser4/vfs_ops.c | 29 fs/reiser4/vfs_ops.h | 3 fs/reiser4/wander.c | 830 +++---------- fs/reiser4/writeout.h | 1 fs/reiser4/znode.c | 112 - fs/reiser4/znode.h | 7 77 files changed, 3484 insertions(+), 3838 deletions(-) diff -puN fs/reiser4/as_ops.c~reiser4-spinlock-cleanup fs/reiser4/as_ops.c --- devel/fs/reiser4/as_ops.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/as_ops.c 2006-02-16 14:17:05.000000000 -0800 @@ -47,26 +47,6 @@ /* address space operations */ -/* clear PAGECACHE_TAG_DIRTY tag of a page. This is used in uncapture_page. This resembles test_clear_page_dirty. The - only difference is that page's mapping exists and REISER4_MOVED tag is checked */ -void reiser4_clear_page_dirty(struct page *page) -{ - struct address_space *mapping; - unsigned long flags; - - mapping = page->mapping; - BUG_ON(mapping == NULL); - - read_lock_irqsave(&mapping->tree_lock, flags); - if (TestClearPageDirty(page)) { - read_unlock_irqrestore(&mapping->tree_lock, flags); - if (mapping_cap_account_dirty(mapping)) - dec_page_state(nr_dirty); - return; - } - read_unlock_irqrestore(&mapping->tree_lock, flags); -} - /** * reiser4_set_page_dirty - set dirty bit, tag in page tree, dirty accounting * @page: page to be dirtied @@ -223,12 +203,13 @@ int reiser4_invalidatepage(struct page * assert("vs-1427", page->mapping == jnode_get_mapping(jnode_by_page(page))); assert("", jprivate(page) != NULL); - assert("", offset == 0); + assert("", ergo(inode_file_plugin(inode) != + file_plugin_by_id(CRC_FILE_PLUGIN_ID), offset == 0)); node = jprivate(page); - LOCK_JNODE(node); - if (!JF_ISSET(node, JNODE_DIRTY) && !JF_ISSET(node, JNODE_FLUSH_QUEUED) && - !JF_ISSET(node, JNODE_WRITEBACK)) { + spin_lock_jnode(node); + if (!(node->state & ((1 << JNODE_DIRTY) | (1<< JNODE_FLUSH_QUEUED) | + (1 << JNODE_WRITEBACK) | (1 << JNODE_OVRWR)))) { /* there is not need to capture */ jref(node); JF_SET(node, JNODE_HEARD_BANSHEE); @@ -238,7 +219,7 @@ int reiser4_invalidatepage(struct page * jput(node); return 0; } - UNLOCK_JNODE(node); + spin_unlock_jnode(node); ctx = init_context(inode->i_sb); @@ -247,10 +228,8 @@ int reiser4_invalidatepage(struct page * /* capture page being truncated. */ ret = try_capture_page_to_invalidate(page); - if (ret != 0) { + if (ret != 0) warning("nikita-3141", "Cannot capture: %i", ret); - print_page("page", page); - } if (offset == 0) { /* remove jnode from transaction and detach it from page. */ @@ -263,8 +242,9 @@ int reiser4_invalidatepage(struct page * /* this detaches page from jnode, so that jdelete will not try * to lock page which is already locked */ - UNDER_SPIN_VOID(jnode, - node, page_clear_jnode(page, node)); + spin_lock_jnode(node); + page_clear_jnode(page, node); + spin_unlock_jnode(node); unhash_unformatted_jnode(node); jput(node); @@ -274,18 +254,13 @@ int reiser4_invalidatepage(struct page * return ret; } -#define INC_STAT(page, node, counter) \ - reiser4_stat_inc_at(page->mapping->host->i_sb, \ - level[jnode_get_level(node)].counter); - -#define INC_NSTAT(node, counter) INC_STAT(jnode_page(node), node, counter) - /* help function called from reiser4_releasepage(). It returns true if jnode * can be detached from its page and page released. */ -static int releasable(const jnode * node /* node to check */ ) +int jnode_is_releasable(jnode * node /* node to check */ ) { assert("nikita-2781", node != NULL); - assert("nikita-2783", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); + assert_spin_locked(&(node->load)); /* is some thread is currently using jnode page, later cannot be * detached */ @@ -317,7 +292,7 @@ static int releasable(const jnode * node } /* dirty jnode cannot be released. It can however be submitted to disk * as part of early flushing, but only after getting flush-prepped. */ - if (jnode_is_dirty(node)) { + if (JF_ISSET(node, JNODE_DIRTY)) { return 0; } /* overwrite set is only written by log writer. */ @@ -343,13 +318,6 @@ static int releasable(const jnode * node return 1; } -#if REISER4_DEBUG -int jnode_is_releasable(jnode * node) -{ - return UNDER_SPIN(jload, node, releasable(node)); -} -#endif - /* * ->releasepage method for reiser4 * @@ -387,9 +355,9 @@ int reiser4_releasepage(struct page *pag /* releasable() needs jnode lock, because it looks at the jnode fields * and we need jload_lock here to avoid races with jload(). */ - LOCK_JNODE(node); - LOCK_JLOAD(node); - if (releasable(node)) { + spin_lock_jnode(node); + spin_lock(&(node->load)); + if (jnode_is_releasable(node)) { struct address_space *mapping; mapping = page->mapping; @@ -398,8 +366,8 @@ int reiser4_releasepage(struct page *pag * jnode_extent_write() here, because pages seen by * jnode_extent_write() are !releasable(). */ page_clear_jnode(page, node); - UNLOCK_JLOAD(node); - UNLOCK_JNODE(node); + spin_unlock(&(node->load)); + spin_unlock_jnode(node); /* we are under memory pressure so release jnode also. */ jput(node); @@ -414,8 +382,8 @@ int reiser4_releasepage(struct page *pag return 1; } else { - UNLOCK_JLOAD(node); - UNLOCK_JNODE(node); + spin_unlock(&(node->load)); + spin_unlock_jnode(node); assert("nikita-3020", schedulable()); return 0; } diff -puN fs/reiser4/block_alloc.c~reiser4-spinlock-cleanup fs/reiser4/block_alloc.c --- devel/fs/reiser4/block_alloc.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/block_alloc.c 2006-02-16 14:17:05.000000000 -0800 @@ -202,7 +202,7 @@ sub_from_cluster_reserved(reiser4_super_ static void add_to_atom_flush_reserved_nolock(txn_atom * atom, __u32 count) { assert("zam-772", atom != NULL); - assert("zam-773", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); atom->flush_reserved += count; } @@ -210,7 +210,7 @@ static void add_to_atom_flush_reserved_n static void sub_from_atom_flush_reserved_nolock(txn_atom * atom, __u32 count) { assert("zam-774", atom != NULL); - assert("zam-775", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); assert("nikita-2790", atom->flush_reserved >= count); atom->flush_reserved -= count; } @@ -275,7 +275,7 @@ reiser4_grab(reiser4_context * ctx, __u6 sbinfo = get_super_private(ctx->super); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); free_blocks = sbinfo->blocks_free; @@ -300,7 +300,7 @@ reiser4_grab(reiser4_context * ctx, __u6 ctx->grab_enabled = 0; unlock_and_ret: - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); return ret; } @@ -409,7 +409,7 @@ static reiser4_super_info_data *grabbed2 sub_from_ctx_grabbed(ctx, 1); sbinfo = get_super_private(ctx->super); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); sub_from_sb_grabbed(sbinfo, 1); /* return sbinfo locked */ @@ -427,7 +427,7 @@ static void grabbed2fake_allocated_forma assert("vs-922", check_block_counters(reiser4_get_current_sb())); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } static void grabbed2fake_allocated_unformatted(void) @@ -439,7 +439,7 @@ static void grabbed2fake_allocated_unfor assert("vs-9221", check_block_counters(reiser4_get_current_sb())); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } void grabbed2cluster_reserved(int count) @@ -451,14 +451,14 @@ void grabbed2cluster_reserved(int count) sub_from_ctx_grabbed(ctx, count); sbinfo = get_super_private(ctx->super); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); sub_from_sb_grabbed(sbinfo, count); sbinfo->blocks_clustered += count; assert("edward-504", check_block_counters(ctx->super)); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } void cluster_reserved2grabbed(int count) @@ -469,14 +469,14 @@ void cluster_reserved2grabbed(int count) ctx = get_current_context(); sbinfo = get_super_private(ctx->super); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); sub_from_cluster_reserved(sbinfo, count); sbinfo->blocks_grabbed += count; assert("edward-505", check_block_counters(ctx->super)); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); add_to_ctx_grabbed(ctx, count); } @@ -489,14 +489,14 @@ void cluster_reserved2free(int count) ctx = get_current_context(); sbinfo = get_super_private(ctx->super); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); sub_from_cluster_reserved(sbinfo, count); sbinfo->blocks_free += count; assert("edward-502", check_block_counters(ctx->super)); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } static spinlock_t fake_lock = SPIN_LOCK_UNLOCKED; @@ -543,14 +543,14 @@ grabbed2used(reiser4_context * ctx, reis { sub_from_ctx_grabbed(ctx, count); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); sub_from_sb_grabbed(sbinfo, count); sbinfo->blocks_used += count; assert("nikita-2679", check_block_counters(ctx->super)); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } /* adjust sb block counters when @count unallocated blocks get mapped to disk */ @@ -558,14 +558,14 @@ static void fake_allocated2used(reiser4_super_info_data * sbinfo, __u64 count, reiser4_ba_flags_t flags) { - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); sub_from_sb_fake_allocated(sbinfo, count, flags); sbinfo->blocks_used += count; assert("nikita-2680", check_block_counters(reiser4_get_current_sb())); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } static void flush_reserved2used(txn_atom * atom, __u64 count) @@ -573,19 +573,19 @@ static void flush_reserved2used(txn_atom reiser4_super_info_data *sbinfo; assert("zam-787", atom != NULL); - assert("zam-788", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); sub_from_atom_flush_reserved_nolock(atom, (__u32) count); sbinfo = get_current_super_private(); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); sub_from_sb_flush_reserved(sbinfo, count); sbinfo->blocks_used += count; assert("zam-789", check_block_counters(reiser4_get_current_sb())); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } /* update the per fs blocknr hint default value. */ @@ -597,7 +597,7 @@ update_blocknr_hint_default(const struct assert("nikita-3342", !blocknr_is_fake(block)); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); if (*block < sbinfo->block_count) { sbinfo->blocknr_hint_default = *block; } else { @@ -607,7 +607,7 @@ update_blocknr_hint_default(const struct dump_stack(); DEBUGON(1); } - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } /* get current value of the default blocknr hint. */ @@ -615,10 +615,10 @@ void get_blocknr_hint_default(reiser4_bl { reiser4_super_info_data *sbinfo = get_current_super_private(); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); *result = sbinfo->blocknr_hint_default; assert("zam-677", *result < sbinfo->block_count); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } /* Allocate "real" disk blocks by calling a proper space allocation plugin @@ -679,7 +679,7 @@ reiser4_alloc_blocks(reiser4_blocknr_hin /* we assume that current atom exists at this moment */ txn_atom *atom = get_current_atom_locked(); atom->nr_blocks_allocated += *len; - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } switch (hint->block_stage) { @@ -694,7 +694,7 @@ reiser4_alloc_blocks(reiser4_blocknr_hin { txn_atom *atom = get_current_atom_locked(); flush_reserved2used(atom, *len); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } break; default: @@ -719,7 +719,7 @@ static void used2fake_allocated(reiser4_super_info_data * sbinfo, __u64 count, int formatted) { - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); if (formatted) sbinfo->blocks_fake_allocated += count; @@ -730,7 +730,7 @@ used2fake_allocated(reiser4_super_info_d assert("nikita-2681", check_block_counters(reiser4_get_current_sb())); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } static void @@ -738,11 +738,11 @@ used2flush_reserved(reiser4_super_info_d __u64 count, reiser4_ba_flags_t flags UNUSED_ARG) { assert("nikita-2791", atom != NULL); - assert("nikita-2792", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); add_to_atom_flush_reserved_nolock(atom, (__u32) count); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); sbinfo->blocks_flush_reserved += count; /*add_to_sb_flush_reserved(sbinfo, count); */ @@ -750,7 +750,7 @@ used2flush_reserved(reiser4_super_info_d assert("nikita-2681", check_block_counters(reiser4_get_current_sb())); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } /* disk space, virtually used by fake block numbers is counted as "grabbed" again. */ @@ -760,7 +760,7 @@ fake_allocated2grabbed(reiser4_context * { add_to_ctx_grabbed(ctx, count); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); assert("nikita-2682", check_block_counters(ctx->super)); @@ -769,7 +769,7 @@ fake_allocated2grabbed(reiser4_context * assert("nikita-2683", check_block_counters(ctx->super)); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } void fake_allocated2free(__u64 count, reiser4_ba_flags_t flags) @@ -804,13 +804,13 @@ grabbed2free(reiser4_context * ctx, reis { sub_from_ctx_grabbed(ctx, count); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); sub_from_sb_grabbed(sbinfo, count); sbinfo->blocks_free += count; assert("nikita-2684", check_block_counters(ctx->super)); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } void grabbed2flush_reserved_nolock(txn_atom * atom, __u64 count) @@ -827,14 +827,14 @@ void grabbed2flush_reserved_nolock(txn_a add_to_atom_flush_reserved_nolock(atom, count); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); sbinfo->blocks_flush_reserved += count; sub_from_sb_grabbed(sbinfo, count); assert("vpf-292", check_block_counters(ctx->super)); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } void grabbed2flush_reserved(__u64 count) @@ -843,7 +843,7 @@ void grabbed2flush_reserved(__u64 count) grabbed2flush_reserved_nolock(atom, count); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } void flush_reserved2grabbed(txn_atom * atom, __u64 count) @@ -852,7 +852,7 @@ void flush_reserved2grabbed(txn_atom * a reiser4_super_info_data *sbinfo; assert("nikita-2788", atom != NULL); - assert("nikita-2789", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); ctx = get_current_context(); sbinfo = get_super_private(ctx->super); @@ -861,14 +861,14 @@ void flush_reserved2grabbed(txn_atom * a sub_from_atom_flush_reserved_nolock(atom, (__u32) count); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); sbinfo->blocks_grabbed += count; sub_from_sb_flush_reserved(sbinfo, count); assert("vpf-292", check_block_counters(ctx->super)); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } /* release all blocks grabbed in context which where not used. */ @@ -887,27 +887,27 @@ used2grabbed(reiser4_context * ctx, reis { add_to_ctx_grabbed(ctx, count); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); sbinfo->blocks_grabbed += count; sub_from_sb_used(sbinfo, count); assert("nikita-2685", check_block_counters(ctx->super)); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } /* this used to be done through used2grabbed and grabbed2free*/ static void used2free(reiser4_super_info_data * sbinfo, __u64 count) { - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); sbinfo->blocks_free += count; sub_from_sb_used(sbinfo, count); assert("nikita-2685", check_block_counters(reiser4_get_current_sb())); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } #if REISER4_DEBUG @@ -960,9 +960,9 @@ reiser4_dealloc_blocks(const reiser4_blo assert("zam-432", *start != 0); assert("zam-558", !blocknr_is_fake(start)); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); assert("zam-562", *start < sbinfo->block_count); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } if (flags & BA_DEFER) { @@ -987,7 +987,7 @@ reiser4_dealloc_blocks(const reiser4_blo assert("zam-477", ret == 0); assert("zam-433", atom != NULL); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } else { assert("zam-425", get_current_super_private() != NULL); @@ -999,7 +999,7 @@ reiser4_dealloc_blocks(const reiser4_blo * back if allocation is discarded. */ txn_atom *atom = get_current_atom_locked(); atom->nr_blocks_allocated -= *len; - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } switch (target_stage) { @@ -1023,7 +1023,7 @@ reiser4_dealloc_blocks(const reiser4_blo atom = get_current_atom_locked(); used2flush_reserved(sbinfo, atom, *len, flags & BA_FORMATTED); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); break; } default: @@ -1062,12 +1062,12 @@ apply_dset(txn_atom * atom UNUSED_ARG, c len = *b; if (REISER4_DEBUG) { - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); assert("zam-554", *a < reiser4_block_count(ctx->super)); assert("zam-555", *a + len <= reiser4_block_count(ctx->super)); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } sa_dealloc_blocks(&sbinfo->space_allocator, *a, len); @@ -1082,7 +1082,7 @@ void post_commit_hook(void) atom = get_current_atom_locked(); assert("zam-452", atom->stage == ASTAGE_POST_COMMIT); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); /* do the block deallocation which was deferred until commit is done */ diff -puN fs/reiser4/blocknrset.c~reiser4-spinlock-cleanup fs/reiser4/blocknrset.c --- devel/fs/reiser4/blocknrset.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/blocknrset.c 2006-02-16 14:17:05.000000000 -0800 @@ -156,7 +156,7 @@ static int blocknr_set_add(txn_atom *ato bse_avail(list_entry(bset->entries.next, blocknr_set_entry, link)) < entries_needed) { /* See if a bse was previously allocated. */ if (*new_bsep == NULL) { - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); *new_bsep = bse_alloc(); return (*new_bsep != NULL) ? -E_REPEAT : RETERR(-ENOMEM); diff -puN fs/reiser4/carry.c~reiser4-spinlock-cleanup fs/reiser4/carry.c --- devel/fs/reiser4/carry.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/carry.c 2006-02-16 14:17:05.000000000 -0800 @@ -762,8 +762,8 @@ static void sync_dkeys(znode * spot /* n assert("nikita-1612", LOCK_CNT_NIL(rw_locked_dk)); tree = znode_get_tree(spot); - RLOCK_TREE(tree); - WLOCK_DK(tree); + read_lock_tree(tree); + write_lock_dk(tree); assert("nikita-2192", znode_is_loaded(spot)); @@ -798,8 +798,8 @@ static void sync_dkeys(znode * spot /* n break; } - WUNLOCK_DK(tree); - RUNLOCK_TREE(tree); + write_unlock_dk(tree); + read_unlock_tree(tree); } /* unlock all carry nodes in @level */ @@ -914,6 +914,7 @@ int lock_carry_node(carry_level * level znode *reference_point; lock_handle lh; lock_handle tmp_lh; + reiser4_tree *tree; assert("nikita-887", level != NULL); assert("nikita-882", node != NULL); @@ -944,9 +945,10 @@ int lock_carry_node(carry_level * level and thus, their sibling linkage cannot change. */ - reference_point = UNDER_RW - (tree, znode_get_tree(reference_point), read, - find_begetting_brother(node, level)->node); + tree = znode_get_tree(reference_point); + read_lock_tree(tree); + reference_point = find_begetting_brother(node, level)->node; + read_unlock_tree(tree); assert("nikita-1186", reference_point != NULL); } if (node->parent && (result == 0)) { @@ -1222,11 +1224,11 @@ carry_node *add_new_znode(znode * brothe add_pointer->u.insert.child = fresh; add_pointer->u.insert.brother = brother; /* initially new node spawns empty key range */ - WLOCK_DK(znode_get_tree(brother)); + write_lock_dk(znode_get_tree(brother)); znode_set_ld_key(new_znode, znode_set_rd_key(new_znode, znode_get_rd_key(brother))); - WUNLOCK_DK(znode_get_tree(brother)); + write_unlock_dk(znode_get_tree(brother)); return fresh; } @@ -1287,8 +1289,7 @@ static int carry_level_invariant(carry_l continue; if (!keyle(leftmost_key_in_node(left, &lkey), leftmost_key_in_node(right, &rkey))) { - print_znode("left", left); - print_znode("right", right); + warning("", "wrong key order"); return 0; } } @@ -1343,8 +1344,6 @@ static void print_carry(const char *pref ("%s: %p parent: %s, left: %s, unlock: %s, free: %s, dealloc: %s\n", prefix, node, tf(node->parent), tf(node->left), tf(node->unlock), tf(node->free), tf(node->deallocate)); - print_znode("\tnode", node->node); - print_znode("\treal_node", carry_real(node)); } /* dump information about carry operation */ diff -puN fs/reiser4/carry_ops.c~reiser4-spinlock-cleanup fs/reiser4/carry_ops.c --- devel/fs/reiser4/carry_ops.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/carry_ops.c 2006-02-16 14:17:05.000000000 -0800 @@ -49,13 +49,13 @@ static carry_node *find_left_neighbor(ca node = op->node; tree = current_tree; - RLOCK_TREE(tree); + read_lock_tree(tree); /* first, check whether left neighbor is already in a @doing queue */ if (carry_real(node)->left != NULL) { /* NOTE: there is locking subtlety here. Look into * find_right_neighbor() for more info */ if (find_carry_node(doing, carry_real(node)->left) != NULL) { - RUNLOCK_TREE(tree); + read_unlock_tree(tree); left = node; do { left = list_entry(left->header.level_linkage.prev, @@ -66,7 +66,7 @@ static carry_node *find_left_neighbor(ca return left; } } - RUNLOCK_TREE(tree); + read_unlock_tree(tree); left = add_carry_skip(doing, POOLO_BEFORE, node); if (IS_ERR(left)) @@ -131,7 +131,7 @@ static carry_node *find_right_neighbor(c node = op->node; tree = current_tree; - RLOCK_TREE(tree); + read_lock_tree(tree); /* first, check whether right neighbor is already in a @doing queue */ if (carry_real(node)->right != NULL) { /* @@ -155,7 +155,7 @@ static carry_node *find_right_neighbor(c * locked neighbors. */ if (find_carry_node(doing, carry_real(node)->right) != NULL) { - RUNLOCK_TREE(tree); + read_unlock_tree(tree); /* * What we are doing here (this is also applicable to * the find_left_neighbor()). @@ -194,7 +194,7 @@ static carry_node *find_right_neighbor(c return right; } } - RUNLOCK_TREE(tree); + read_unlock_tree(tree); flags = GN_CAN_USE_UPPER_LEVELS; if (!op->u.insert.flags & COPI_LOAD_RIGHT) @@ -463,7 +463,6 @@ static int make_space(carry_op * op /* c warning("nikita-924", "Error accessing left neighbor: %li", PTR_ERR(left)); - print_znode("node", node); } } else if (left != NULL) { @@ -494,7 +493,6 @@ static int make_space(carry_op * op /* c warning("nikita-1065", "Error accessing right neighbor: %li", PTR_ERR(right)); - print_znode("node", node); } else if (right != NULL) { /* node containing insertion point, and its right neighbor node are write locked by now. @@ -552,8 +550,6 @@ static int make_space(carry_op * op /* c if (result != 0) { warning("nikita-947", "Cannot lock new node: %i", result); - print_znode("new", carry_real(fresh)); - print_znode("node", node); return result; } @@ -699,7 +695,6 @@ static int insert_paste_common(carry_op if ((intra_node != NS_FOUND) && (intra_node != NS_NOT_FOUND)) { warning("nikita-1715", "Intra node lookup failure: %i", intra_node); - print_znode("node", node); return intra_node; } } else if (op->u.insert.type == COPT_CHILD) { @@ -720,8 +715,6 @@ static int insert_paste_common(carry_op warning("nikita-993", "Cannot find a place for child pointer: %i", result); - print_znode("child", child); - print_znode("parent", carry_real(op->node)); return result; } /* This only happens when we did multiple insertions at @@ -784,10 +777,10 @@ static int insert_paste_common(carry_op * internal item and its key is (by the very definition of * search tree) is leftmost key in the child node. */ - op->u.insert.d->key = UNDER_RW(dk, znode_get_tree(child), read, - leftmost_key_in_node(child, - znode_get_ld_key - (child))); + write_lock_dk(znode_get_tree(child)); + op->u.insert.d->key = leftmost_key_in_node(child, + znode_get_ld_key(child)); + write_unlock_dk(znode_get_tree(child)); op->u.insert.d->data->arg = op->u.insert.brother; } else { assert("vs-243", op->u.insert.d->coord != NULL); @@ -1237,7 +1230,7 @@ static int carry_delete(carry_op * op /* child = op->u.delete.child ? carry_real(op->u.delete.child) : op->node->node; tree = znode_get_tree(child); - RLOCK_TREE(tree); + read_lock_tree(tree); /* * @parent was determined when carry entered parent level @@ -1251,7 +1244,7 @@ static int carry_delete(carry_op * op /* parent = znode_parent(child); assert("nikita-2581", find_carry_node(doing, parent)); } - RUNLOCK_TREE(tree); + read_unlock_tree(tree); assert("nikita-1213", znode_get_level(parent) > LEAF_LEVEL); @@ -1264,11 +1257,11 @@ static int carry_delete(carry_op * op /* znode_get_level(parent) <= REISER4_MIN_TREE_HEIGHT && node_num_items(parent) == 1) { /* Delimiting key manipulations. */ - WLOCK_DK(tree); + write_lock_dk(tree); znode_set_ld_key(child, znode_set_ld_key(parent, min_key())); znode_set_rd_key(child, znode_set_rd_key(parent, max_key())); ZF_SET(child, JNODE_DKSET); - WUNLOCK_DK(tree); + write_unlock_dk(tree); /* @child escaped imminent death! */ ZF_CLR(child, JNODE_HEARD_BANSHEE); @@ -1279,8 +1272,6 @@ static int carry_delete(carry_op * op /* result = find_child_ptr(parent, child, &coord); if (result != NS_FOUND) { warning("nikita-994", "Cannot find child pointer: %i", result); - print_znode("child", child); - print_znode("parent", parent); print_coord_content("coord", &coord); return result; } @@ -1719,9 +1710,11 @@ static int update_delimiting_key(znode * if (!ZF_ISSET(right, JNODE_HEARD_BANSHEE)) leftmost_key_in_node(right, &ldkey); - else - UNDER_RW_VOID(dk, znode_get_tree(parent), read, - ldkey = *znode_get_rd_key(right)); + else { + read_lock_dk(znode_get_tree(parent)); + ldkey = *znode_get_rd_key(right); + read_unlock_dk(znode_get_tree(parent)); + } node_plugin_by_node(parent)->update_item_key(&right_pos, &ldkey, &info); doing->restartable = 0; znode_make_dirty(parent); @@ -1772,9 +1765,9 @@ static int carry_update(carry_op * op /* left = NULL; tree = znode_get_tree(rchild->node); - RLOCK_TREE(tree); + read_lock_tree(tree); right = znode_parent(rchild->node); - RUNLOCK_TREE(tree); + read_unlock_tree(tree); if (right != NULL) { result = update_delimiting_key(right, @@ -1791,10 +1784,6 @@ static int carry_update(carry_op * op /* if (result != 0) { warning("nikita-999", "Error updating delimiting key: %s (%i)", error_msg ? : "", result); - print_znode("left", left); - print_znode("right", right); - print_znode("lchild", lchild ? lchild->node : NULL); - print_znode("rchild", rchild->node); } return result; } diff -puN fs/reiser4/cluster.c~reiser4-spinlock-cleanup fs/reiser4/cluster.c --- devel/fs/reiser4/cluster.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/cluster.c 2006-02-16 14:17:05.000000000 -0800 @@ -1,78 +1,13 @@ /* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ -/* Contains cluster operations for cryptcompress object plugin (see - http://www.namesys.com/cryptcompress_design.txt for details). */ +/* Contains reiser4 cluster plugins (see + http://www.namesys.com/cryptcompress_design.html + "Concepts of clustering" for details). */ #include "plugin/plugin_header.h" #include "plugin/plugin.h" #include "inode.h" -/* Concepts of clustering. Definition of cluster size. - Data clusters, page clusters, disk clusters. - - In order to compress plain text we first should split it into chunks. - Then we process each chunk independently by the following function: - - void alg(char *input_ptr, int input_length, char *output_ptr, int *output_length); - - where: - input_ptr is a pointer to the first byte of input chunk (that contains plain text), - input_len is a length of input chunk, - output_ptr is a pointer to the first byte of output chunk (that contains processed text), - *output_len is a length of output chunk. - - the length of output chunk depends both on input_len and on the content of - input chunk. input_len (which can be assigned an arbitrary value) affects the - compression quality (the more input_len the better the compression quality). - For each cryptcompress file we assign special attribute - cluster size: - - Cluster size is a file attribute, which determines the maximal size - of input chunk that we use for compression. - - So if we wanna compress a 10K-file with a cluster size of 4K, we split this file - into three chunks (first and second - 4K, third - 2K). Those chunks are - clusters in the space of file offsets (data clusters). - - Cluster sizes are represented as (PAGE_CACHE_SIZE << shift), where - shift (= 0, 1, 2,... ). You'll note that this representation - affects the allowed values for cluster size. This is stored in - disk stat-data (CLUSTER_STAT, layout is in reiser4_cluster_stat (see - (plugin/item/static_stat.h) for details). - Note that working with - cluster_size > PAGE_SIZE (when cluster_shift > 0, and cluster contains more - then one page) is suboptimal because before compression we should assemble - all cluster pages into one flow (this means superfluous memcpy during - read/write). So the better way to increase cluster size (and therefore - compression quality) is making PAGE_SIZE larger (for instance by page - clustering stuff of William Lee). But if you need PAGE_SIZE < cluster_size, - then use the page clustering offered by reiser4. - - The inode mapping of a cryptcompress file contains pages filled by plain text. - Cluster size also defines clustering in address space. For example, - 101K-file with cluster size 16K (cluster shift = 2), which can be mapped - into 26 pages, has 7 "page clusters": first six clusters contains 4 pages - and one cluster contains 2 pages (for the file tail). - - We split each output (compressed) chunk into special items to provide - tight packing of data on disk (currently only ctails hold compressed data). - This set of items we call a "disk cluster". - - Each cluster is defined (like pages are) by its index (e.g. offset, - but the unit is cluster size instead of PAGE_SIZE). Key offset of - the first unit of the first item of each disk cluster (we call this a - "key of disk cluster") is a multiple of the cluster index. - - All read/write/truncate operations are performed upon clusters. - For example, if we wanna read 40K of a cryptcompress file with cluster size 16K - from offset = 20K, we first need to read two clusters (of indexes 1, 2). This - means that all main methods of cryptcompress object plugin call appropriate - cluster operation. - - For the same index we use one structure (type reiser4_cluster_t) to - represent all data/page/disk clusters. (EDWARD-FIXME-HANS: are you - sure that is good style? and where is the code that goes with this comment....;-) ) -*/ - static int change_cluster(struct inode *inode, reiser4_plugin * plugin) { int result = 0; @@ -112,11 +47,11 @@ static reiser4_plugin_ops cluster_plugin } cluster_plugin cluster_plugins[LAST_CLUSTER_ID] = { - SUPPORT_CLUSTER(12, 4K, "4K", "Minimal"), - SUPPORT_CLUSTER(13, 8K, "8K", "Small"), - SUPPORT_CLUSTER(14, 16K, "16K", "Average"), + SUPPORT_CLUSTER(16, 64K, "64K", "Large"), SUPPORT_CLUSTER(15, 32K, "32K", "Big"), - SUPPORT_CLUSTER(16, 64K, "64K", "Large") + SUPPORT_CLUSTER(14, 16K, "16K", "Average"), + SUPPORT_CLUSTER(13, 8K, "8K", "Small"), + SUPPORT_CLUSTER(12, 4K, "4K", "Minimal") }; /* diff -puN fs/reiser4/cluster.h~reiser4-spinlock-cleanup fs/reiser4/cluster.h --- devel/fs/reiser4/cluster.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/cluster.h 2006-02-16 14:17:05.000000000 -0800 @@ -171,6 +171,12 @@ fsize_to_count(reiser4_cluster_t * clust return off_to_count(inode->i_size, clust->index, inode); } +static inline int +cluster_is_complete(reiser4_cluster_t * clust, struct inode * inode) +{ + return clust->tc.lsize == inode_cluster_size(inode); +} + static inline void reiser4_slide_init(reiser4_slide_t * win) { assert("edward-1084", win != NULL); @@ -178,14 +184,34 @@ static inline void reiser4_slide_init(re } static inline void -reiser4_cluster_init(reiser4_cluster_t * clust, reiser4_slide_t * window) +tfm_cluster_init_act(tfm_cluster_t * tc, tfm_action act) { + assert("edward-1356", tc != NULL); + assert("edward-1357", act != TFM_INVAL); + tc->act = act; +} + +static inline void +cluster_init_act (reiser4_cluster_t * clust, tfm_action act, reiser4_slide_t * window){ assert("edward-84", clust != NULL); memset(clust, 0, sizeof *clust); + tfm_cluster_init_act(&clust->tc, act); clust->dstat = INVAL_DISK_CLUSTER; clust->win = window; } +static inline void +cluster_init_read(reiser4_cluster_t * clust, reiser4_slide_t * window) +{ + cluster_init_act (clust, TFM_READ, window); +} + +static inline void +cluster_init_write(reiser4_cluster_t * clust, reiser4_slide_t * window) +{ + cluster_init_act (clust, TFM_WRITE, window); +} + static inline int dclust_get_extension(hint_t * hint) { return hint->ext_coord.extension.ctail.shift; @@ -229,15 +255,18 @@ void truncate_page_cluster(struct inode void set_hint_cluster(struct inode *inode, hint_t * hint, unsigned long index, znode_lock_mode mode); void invalidate_hint_cluster(reiser4_cluster_t * clust); +void put_hint_cluster(reiser4_cluster_t * clust, struct inode *inode, + znode_lock_mode mode); int get_disk_cluster_locked(reiser4_cluster_t * clust, struct inode *inode, znode_lock_mode lock_mode); void reset_cluster_params(reiser4_cluster_t * clust); +int set_cluster_by_page(reiser4_cluster_t * clust, struct page * page, + int count); int prepare_page_cluster(struct inode *inode, reiser4_cluster_t * clust, int capture); void release_cluster_pages_nocapture(reiser4_cluster_t *); -void put_cluster_handle(reiser4_cluster_t * clust, tfm_action act); -int grab_tfm_stream(struct inode *inode, tfm_cluster_t * tc, tfm_action act, - tfm_stream_id id); +void put_cluster_handle(reiser4_cluster_t * clust); +int grab_tfm_stream(struct inode *inode, tfm_cluster_t * tc, tfm_stream_id id); int tfm_cluster_is_uptodate(tfm_cluster_t * tc); void tfm_cluster_set_uptodate(tfm_cluster_t * tc); void tfm_cluster_clr_uptodate(tfm_cluster_t * tc); diff -puN fs/reiser4/context.c~reiser4-spinlock-cleanup fs/reiser4/context.c --- devel/fs/reiser4/context.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/context.c 2006-02-16 14:17:05.000000000 -0800 @@ -244,7 +244,7 @@ void reiser4_exit_context(reiser4_contex if (atom) { atom->flags |= ATOM_FORCE_COMMIT; context->trans->flags &= ~TXNH_DONT_COMMIT; - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } } txn_end(context); diff -puN fs/reiser4/context.h~reiser4-spinlock-cleanup fs/reiser4/context.h --- devel/fs/reiser4/context.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/context.h 2006-02-16 14:17:05.000000000 -0800 @@ -8,7 +8,6 @@ #include "forward.h" #include "debug.h" -#include "spin_macros.h" #include "dformat.h" #include "tap.h" #include "lock.h" @@ -93,6 +92,7 @@ struct reiser4_context { /* information about last error encountered by reiser4 */ err_site err; #endif + void *vp; }; extern reiser4_context *get_context_by_lock_stack(lock_stack *); diff -puN fs/reiser4/coord.c~reiser4-spinlock-cleanup fs/reiser4/coord.c --- devel/fs/reiser4/coord.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/coord.c 2006-02-16 14:17:05.000000000 -0800 @@ -899,8 +899,6 @@ void print_coord(const char *mes, const printk("%s: item_pos = %d, unit_pos %d, tween=%s, iplug=%d\n", mes, coord->item_pos, coord->unit_pos, coord_tween_tostring(coord->between), coord->iplugid); - if (node) - print_znode("\tnode", coord->node); } int diff -puN fs/reiser4/crypt.c~reiser4-spinlock-cleanup fs/reiser4/crypt.c --- devel/fs/reiser4/crypt.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/crypt.c 2006-02-16 14:17:05.000000000 -0800 @@ -1,5 +1,6 @@ -/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ -/* Crypto-plugins for reiser4 cryptcompress objects */ +/* Copyright 2001, 2002, 2003 by Hans Reiser, + licensing governed by reiser4/README */ +/* Reiser4 cipher transform plugins */ #include "debug.h" #include "plugin/plugin.h" @@ -8,28 +9,24 @@ #include #define MAX_CRYPTO_BLOCKSIZE 128 -#define NONE_EXPKEY_WORDS 8 -#define NONE_BLOCKSIZE 8 /* - Default align() method of the crypto-plugin (look for description of this method - in plugin/plugin.h) + Default align() method of the crypto-plugin (look for description of this + method in plugin/plugin.h) -1) creates the aligning armored format of the input flow before encryption. - "armored" means that padding is filled by private data (for example, - pseudo-random sequence of bytes is not private data). -2) returns length of appended padding + 1) creates the aligning armored format of the input flow before encryption. + "armored" means that padding is filled by private data (for example, + pseudo-random sequence of bytes is not private data). + 2) returns length of appended padding [ flow | aligning_padding ] ^ | @pad */ -UNUSED_ARG static int -align_stream_common(__u8 * - pad /* pointer to the first byte of aligning format */ , - int flow_size /* size of non-aligned flow */ , - int blocksize /* crypto-block size */ ) +static int align_stream_common(__u8 * pad, + int flow_size /* size of non-aligned flow */, + int blocksize /* crypto-block size */) { int pad_size; @@ -43,40 +40,66 @@ align_stream_common(__u8 * return pad_size; } -/* common scale method (look for description of this method in plugin/plugin.h) - for all symmetric algorithms which doesn't scale anything -*/ -static loff_t scale_common(struct inode *inode UNUSED_ARG, size_t blocksize UNUSED_ARG /* crypto block size, which is returned - by blocksize method of crypto plugin */ , +/* This is used for all the cipher algorithms which do not inflate + block-aligned data */ +static loff_t scale_common(struct inode *inode, size_t blocksize, loff_t src_off /* offset to scale */ ) { return src_off; } -REGISTER_NONE_ALG(crypt, CRYPTO) +static void free_aes (struct crypto_tfm * tfm) +{ +#if REISER4_AES + crypto_free_tfm(tfm); +#endif + return; +} + +static struct crypto_tfm * alloc_aes (void) +{ +#if REISER4_AES + return crypto_alloc_tfm ("aes", 0); +#else + warning("edward-1417", "aes unsupported"); + return ERR_PTR(-EINVAL); +#endif /* REISER4_AES */ +} -/* EDWARD-FIXME-HANS: why is this not in the plugin directory? */ -/* crypto plugins */ crypto_plugin crypto_plugins[LAST_CRYPTO_ID] = { [NONE_CRYPTO_ID] = { .h = { .type_id = REISER4_CRYPTO_PLUGIN_TYPE, .id = NONE_CRYPTO_ID, .pops = NULL, - /* If you wanna your files to not be crypto - transformed, specify this crypto pluigin */ .label = "none", - .desc = "absence of crypto transform", + .desc = "no cipher transform", .linkage = {NULL, NULL} }, - .alloc = alloc_none_crypt, - .free = free_none_crypt, - .nr_keywords = NONE_EXPKEY_WORDS, - .scale = scale_common, + .alloc = NULL, + .free = NULL, + .scale = NULL, .align_stream = NULL, .setkey = NULL, .encrypt = NULL, .decrypt = NULL + }, + [AES_CRYPTO_ID] = { + .h = { + .type_id = REISER4_CRYPTO_PLUGIN_TYPE, + .id = AES_CRYPTO_ID, + .pops = NULL, + .label = "aes", + .desc = "aes cipher transform", + .linkage = {NULL, NULL} + }, + .alloc = alloc_aes, + .free = free_aes, + .scale = scale_common, + .align_stream = align_stream_common, + .setkey = NULL, + .encrypt = NULL, + .decrypt = NULL } }; diff -puN /dev/null fs/reiser4/crypt.h --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ devel-akpm/fs/reiser4/crypt.h 2006-02-16 14:17:05.000000000 -0800 @@ -0,0 +1,58 @@ +#if !defined( __FS_REISER4_CRYPT_H__ ) +#define __FS_REISER4_CRYPT_H__ + +#include + +/* Crypto transforms involved in ciphering process and + supported by reiser4 via appropriate transform plugins */ +typedef enum { + CIPHER_TFM, /* cipher transform */ + DIGEST_TFM, /* digest transform */ + LAST_TFM +} reiser4_tfm; + +/* This represents a transform from the set above */ +typedef struct reiser4_tfma { + reiser4_plugin * plug; /* transform plugin */ + struct crypto_tfm * tfm; /* per-transform allocated info, + belongs to the crypto-api. */ +} reiser4_tfma_t; + +/* This contains cipher related info copied from user space */ +typedef struct crypto_data { + int keysize; /* key size */ + __u8 * key; /* uninstantiated key */ + int keyid_size; /* size of passphrase */ + __u8 * keyid; /* passphrase (uninstantiated keyid) */ +} crypto_data_t; + +/* Dynamically allocated per instantiated key info */ +typedef struct crypto_stat { + reiser4_tfma_t tfma[LAST_TFM]; +// cipher_key_plugin * kplug; *//* key manager responsible for +// inheriting, validating, etc... */ + __u8 * keyid; /* fingerprint (instantiated keyid) of + the cipher key prepared by digest + plugin, supposed to be stored in + disk stat-data */ + int inst; /* this indicates if the ciper key + is instantiated in the system */ + int keysize; /* uninstantiated key size (bytes), + supposed to be stored in disk + stat-data */ + int keyload_count; /* number of the objects which has + this crypto-stat attached */ +} crypto_stat_t; + +#endif /* __FS_REISER4_CRYPT_H__ */ + +/* + Local variables: + c-indentation-style: "K&R" + mode-name: "LC" + c-basic-offset: 8 + tab-width: 8 + fill-column: 120 + scroll-step: 1 + End: +*/ diff -puN fs/reiser4/debug.c~reiser4-spinlock-cleanup fs/reiser4/debug.c --- devel/fs/reiser4/debug.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/debug.c 2006-02-16 14:17:05.000000000 -0800 @@ -159,10 +159,10 @@ void print_lock_counters(const char *pre printk("%s: jnode: %i, tree: %i (r:%i,w:%i), dk: %i (r:%i,w:%i)\n" "jload: %i, " "txnh: %i, atom: %i, stack: %i, txnmgr: %i, " - "ktxnmgrd: %i, fq: %i, reiser4_sb: %i\n" + "ktxnmgrd: %i, fq: %i\n" "inode: %i, " "cbk_cache: %i (r:%i,w%i), " - "epoch: %i, eflush: %i, " + "eflush: %i, " "zlock: %i (r:%i, w:%i)\n" "spin: %i, long: %i inode_sem: (r:%i,w:%i)\n" "d: %i, x: %i, t: %i\n", prefix, @@ -174,12 +174,11 @@ void print_lock_counters(const char *pre info->spin_locked_txnh, info->spin_locked_atom, info->spin_locked_stack, info->spin_locked_txnmgr, info->spin_locked_ktxnmgrd, - info->spin_locked_fq, info->spin_locked_super, - info->spin_locked_inode_object, + info->spin_locked_fq, + info->spin_locked_inode, info->rw_locked_cbk_cache, info->read_locked_cbk_cache, info->write_locked_cbk_cache, - info->spin_locked_epoch, info->spin_locked_super_eflush, info->rw_locked_zlock, info->read_locked_zlock, @@ -213,7 +212,7 @@ int no_counters_are_held(void) (counters->spin_locked_atom == 0) && (counters->spin_locked_stack == 0) && (counters->spin_locked_txnmgr == 0) && - (counters->spin_locked_inode_object == 0) && + (counters->spin_locked_inode == 0) && (counters->spin_locked == 0) && (counters->long_term_locked_znode == 0) && (counters->inode_sem_r == 0) && diff -puN fs/reiser4/debug.h~reiser4-spinlock-cleanup fs/reiser4/debug.h --- devel/fs/reiser4/debug.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/debug.h 2006-02-16 14:17:05.000000000 -0800 @@ -12,11 +12,13 @@ whatever standard prefixes/postfixes we want. "Fun" is a function that will be actually called, can be printk, panic etc. This is for use by other debugging macros, not by users. */ -#define DCALL(lev, fun, reperr, label, format, ...) \ -({ \ - reiser4_print_prefix(lev, reperr, label, \ - __FUNCTION__, __FILE__, __LINE__); \ - fun(lev format "\n" , ## __VA_ARGS__); \ +#define DCALL(lev, fun, reperr, label, format, ...) \ +({ \ +/* reiser4_print_prefix(lev, reperr, label,*/ \ +/* __FUNCTION__, __FILE__, __LINE__);*/ \ + fun(lev "%sreiser4[%.16s(%i)]: %s (%s:%i)[%s]:\n" format "\n" , \ + lev, current->comm, current->pid, __FUNCTION__, \ + __FILE__, __LINE__, label, ## __VA_ARGS__); \ }) /* @@ -113,9 +115,7 @@ typedef struct lock_counters_info { int spin_locked_txnmgr; int spin_locked_ktxnmgrd; int spin_locked_fq; - int spin_locked_super; - int spin_locked_inode_object; - int spin_locked_epoch; + int spin_locked_inode; int spin_locked_super_eflush; int spin_locked; int long_term_locked_znode; @@ -143,6 +143,7 @@ extern lock_counters_info *lock_counters /* check that lock-counter is greater than zero. This is for use in * assertions */ #define LOCK_CNT_GTZ(counter) IN_CONTEXT(lock_counters()->counter > 0, 1) +#define LOCK_CNT_LT(counter,n) IN_CONTEXT(lock_counters()->counter < n, 1) #else /* REISER4_DEBUG */ @@ -156,9 +157,18 @@ typedef struct lock_counters_info { #define LOCK_CNT_DEC(counter) noop #define LOCK_CNT_NIL(counter) (1) #define LOCK_CNT_GTZ(counter) (1) +#define LOCK_CNT_LT(counter,n) (1) #endif /* REISER4_DEBUG */ +#define assert_spin_not_locked(lock) BUG_ON(0) +#define assert_rw_write_locked(lock) BUG_ON(0) +#define assert_rw_read_locked(lock) BUG_ON(0) +#define assert_rw_locked(lock) BUG_ON(0) +#define assert_rw_not_write_locked(lock) BUG_ON(0) +#define assert_rw_not_read_locked(lock) BUG_ON(0) +#define assert_rw_not_locked(lock) BUG_ON(0) + /* flags controlling debugging behavior. Are set through debug_flags=N mount option. */ typedef enum { diff -puN fs/reiser4/emergency_flush.c~reiser4-spinlock-cleanup fs/reiser4/emergency_flush.c --- devel/fs/reiser4/emergency_flush.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/emergency_flush.c 2006-02-16 14:17:05.000000000 -0800 @@ -246,7 +246,7 @@ #if REISER4_USE_EFLUSH -static int flushable(const jnode * node, struct page *page, int); +static int flushable(jnode * node, struct page *page, int); static int needs_allocation(const jnode * node); static eflush_node_t *ef_alloc(unsigned int flags); static reiser4_ba_flags_t ef_block_flags(const jnode * node); @@ -294,7 +294,7 @@ int emergency_flush(struct page *page) jref(node); result = 0; - LOCK_JNODE(node); + spin_lock_jnode(node); /* * page was dirty and under eflush. This is (only?) possible if page * was re-dirtied through mmap(2) after eflush IO was submitted, but @@ -302,7 +302,7 @@ int emergency_flush(struct page *page) */ eflush_del(node, 1); - LOCK_JLOAD(node); + spin_lock(&(node->load)); if (flushable(node, page, 1)) { if (needs_allocation(node)) { reiser4_block_nr blk; @@ -328,8 +328,8 @@ int emergency_flush(struct page *page) GFP_NOFS | __GFP_HIGH); } else { JF_CLR(node, JNODE_EFLUSH); - UNLOCK_JLOAD(node); - UNLOCK_JNODE(node); + spin_unlock(&(node->load)); + spin_unlock_jnode(node); if (blk != 0ull) { ef_free_block(node, &blk, hint.block_stage, efnode); @@ -352,11 +352,12 @@ int emergency_flush(struct page *page) atom = node->atom; - if (!flushable(node, page, 1) || needs_allocation(node) - || !jnode_is_dirty(node)) { - UNLOCK_JLOAD(node); - UNLOCK_JNODE(node); - UNLOCK_ATOM(atom); + if (!flushable(node, page, 1) || + needs_allocation(node) || + !JF_ISSET(node, JNODE_DIRTY)) { + spin_unlock(&(node->load)); + spin_unlock_jnode(node); + spin_unlock_atom(atom); fq_put(fq); return 1; } @@ -366,9 +367,9 @@ int emergency_flush(struct page *page) queue_jnode(fq, node); - UNLOCK_JLOAD(node); - UNLOCK_JNODE(node); - UNLOCK_ATOM(atom); + spin_unlock(&(node->load)); + spin_unlock_jnode(node); + spin_unlock_atom(atom); result = write_fq(fq, NULL, 0); if (result != 0) @@ -380,8 +381,8 @@ int emergency_flush(struct page *page) } } else { - UNLOCK_JLOAD(node); - UNLOCK_JNODE(node); + spin_unlock(&(node->load)); + spin_unlock_jnode(node); result = 1; } @@ -389,11 +390,11 @@ int emergency_flush(struct page *page) return result; } -static int flushable(const jnode * node, struct page *page, int check_eflush) +static int flushable(jnode * node, struct page *page, int check_eflush) { assert("nikita-2725", node != NULL); - assert("nikita-2726", spin_jnode_is_locked(node)); - assert("nikita-3388", spin_jload_is_locked(node)); + assert_spin_locked(&(node->guard)); + assert_spin_locked(&(node->load)); if (jnode_is_loaded(node)) { /* loaded */ return 0; @@ -418,7 +419,7 @@ static int flushable(const jnode * node, return 0; } /* don't flush cluster pages */ - if (jnode_is_cluster_page(node)) { + if (jnode_of_cluster(node, page)) { return 0; } if (check_eflush && JF_ISSET(node, JNODE_EFLUSH)) { /* already flushed */ @@ -519,9 +520,9 @@ static void inc_unfm_ef(void) reiser4_super_info_data *sbinfo; sbinfo = get_super_private(get_current_context()->super); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); sbinfo->eflushed_unformatted++; - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } static void dec_unfm_ef(void) @@ -529,10 +530,10 @@ static void dec_unfm_ef(void) reiser4_super_info_data *sbinfo; sbinfo = get_super_private(get_current_context()->super); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); BUG_ON(sbinfo->eflushed_unformatted == 0); sbinfo->eflushed_unformatted--; - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } #define EFLUSH_MAGIC 4335203 @@ -545,8 +546,8 @@ eflush_add(jnode * node, reiser4_block_n assert("nikita-2737", node != NULL); assert("nikita-2738", JF_ISSET(node, JNODE_EFLUSH)); assert("nikita-3382", !JF_ISSET(node, JNODE_EPROTECTED)); - assert("nikita-2765", spin_jnode_is_locked(node)); - assert("nikita-3381", spin_jload_is_locked(node)); + assert_spin_locked(&(node->guard)); + assert_spin_locked(&(node->load)); tree = jnode_get_tree(node); @@ -555,10 +556,10 @@ eflush_add(jnode * node, reiser4_block_n ef->hadatom = (node->atom != NULL); ef->incatom = 0; jref(node); - spin_lock_eflush(tree->super); + spin_lock(&(get_super_private(tree->super)->eflush_guard)); ef_hash_insert(get_jnode_enhash(node), ef); ON_DEBUG(++get_super_private(tree->super)->eflushed); - spin_unlock_eflush(tree->super); + spin_unlock(&(get_super_private(tree->super)->eflush_guard)); if (jnode_is_unformatted(node)) { struct inode *inode; @@ -578,7 +579,7 @@ eflush_add(jnode * node, reiser4_block_n } /* FIXME: do we need it here, if eflush add/del are protected by page lock? */ - UNLOCK_JLOAD(node); + spin_unlock(&(node->load)); /* * jnode_get_atom() can possible release jnode spin lock. This @@ -594,30 +595,30 @@ eflush_add(jnode * node, reiser4_block_n if (atom != NULL) { ++atom->flushed; ef->incatom = 1; - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } } - UNLOCK_JNODE(node); + spin_unlock_jnode(node); return 0; } /* Arrghh... cast to keep hash table code happy. */ #define C(node) ((jnode *const *)&(node)) -reiser4_block_nr *eflush_get(const jnode * node) +reiser4_block_nr *eflush_get(jnode * node) { eflush_node_t *ef; reiser4_tree *tree; assert("nikita-2740", node != NULL); assert("nikita-2741", JF_ISSET(node, JNODE_EFLUSH)); - assert("nikita-2767", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); tree = jnode_get_tree(node); - spin_lock_eflush(tree->super); + spin_lock(&(get_super_private(tree->super)->eflush_guard)); ef = ef_hash_find(get_jnode_enhash(node), C(node)); - spin_unlock_eflush(tree->super); + spin_unlock(&(get_super_private(tree->super)->eflush_guard)); assert("nikita-2742", ef != NULL); return &ef->blocknr; @@ -633,25 +634,25 @@ void eflush_free(jnode * node) struct inode *inode = NULL; reiser4_block_nr blk; - assert("zam-1026", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); table = get_jnode_enhash(node); tree = jnode_get_tree(node); - spin_lock_eflush(tree->super); + spin_lock(&(get_super_private(tree->super)->eflush_guard)); ef = ef_hash_find(table, C(node)); BUG_ON(ef == NULL); assert("nikita-2745", ef != NULL); blk = ef->blocknr; ef_hash_remove(table, ef); ON_DEBUG(--get_super_private(tree->super)->eflushed); - spin_unlock_eflush(tree->super); + spin_unlock(&(get_super_private(tree->super)->eflush_guard)); if (ef->incatom) { atom = jnode_get_atom(node); assert("nikita-3311", atom != NULL); --atom->flushed; - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } assert("vs-1215", JF_ISSET(node, JNODE_EFLUSH)); @@ -675,7 +676,7 @@ void eflush_free(jnode * node) jnode_tree_by_reiser4_inode(info)->rnode != NULL); dec_unfm_ef(); } - UNLOCK_JNODE(node); + spin_unlock_jnode(node); #if REISER4_DEBUG if (blocknr_is_fake(jnode_get_block(node))) @@ -692,7 +693,7 @@ void eflush_free(jnode * node) kmem_cache_free(eflush_slab, ef); - LOCK_JNODE(node); + spin_lock_jnode(node); } void eflush_del(jnode * node, int page_locked) @@ -700,7 +701,7 @@ void eflush_del(jnode * node, int page_l struct page *page; assert("nikita-2743", node != NULL); - assert("nikita-2770", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); if (!JF_ISSET(node, JNODE_EFLUSH)) return; @@ -710,9 +711,9 @@ void eflush_del(jnode * node, int page_l assert("nikita-2806", page != NULL); assert("nikita-2807", PageLocked(page)); } else { - UNLOCK_JNODE(node); + spin_unlock_jnode(node); page = jnode_get_page_locked(node, GFP_NOFS); - LOCK_JNODE(node); + spin_lock_jnode(node); if (page == NULL) { warning("zam-1025", "eflush_del failed to get page back\n"); @@ -724,11 +725,11 @@ void eflush_del(jnode * node, int page_l } if (PageWriteback(page)) { - UNLOCK_JNODE(node); + spin_unlock_jnode(node); page_cache_get(page); reiser4_wait_page_writeback(page); page_cache_release(page); - LOCK_JNODE(node); + spin_lock_jnode(node); if (unlikely(!JF_ISSET(node, JNODE_EFLUSH))) /* race: some other thread unflushed jnode. */ goto out; @@ -796,13 +797,13 @@ static int ef_free_block(jnode * node, if (ef->reserve) { /* further, transfer block from grabbed into flush * reserved space. */ - LOCK_JNODE(node); + spin_lock_jnode(node); atom = jnode_get_atom(node); assert("nikita-2785", atom != NULL); grabbed2flush_reserved_nolock(atom, 1); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); JF_SET(node, JNODE_FLUSH_RESERVED); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); } else { reiser4_context *ctx = get_current_context(); grabbed2free(ctx, get_super_private(ctx->super), @@ -822,8 +823,8 @@ ef_prepare(jnode * node, reiser4_block_n assert("nikita-2760", node != NULL); assert("nikita-2761", blk != NULL); assert("nikita-2762", efnode != NULL); - assert("nikita-2763", spin_jnode_is_locked(node)); - assert("nikita-3387", spin_jload_is_locked(node)); + assert_spin_locked(&(node->guard)); + assert_spin_locked(&(node->load)); hint->blk = EFLUSH_START_BLOCK; hint->max_dist = 0; @@ -846,10 +847,10 @@ ef_prepare(jnode * node, reiser4_block_n usedreserve = 1; flush_reserved2grabbed(atom, 1); JF_CLR(node, JNODE_FLUSH_RESERVED); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); break; } else - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } /* * fall through. @@ -873,8 +874,8 @@ ef_prepare(jnode * node, reiser4_block_n * XXX protect @node from being concurrently eflushed. Otherwise, there * is a danger of underflowing block space */ - UNLOCK_JLOAD(node); - UNLOCK_JNODE(node); + spin_unlock(&(node->load)); + spin_unlock_jnode(node); *efnode = ef_alloc(GFP_NOFS | __GFP_HIGH); if (*efnode == NULL) { @@ -890,8 +891,8 @@ ef_prepare(jnode * node, reiser4_block_n if (result) kmem_cache_free(eflush_slab, *efnode); out: - LOCK_JNODE(node); - LOCK_JLOAD(node); + spin_lock_jnode(node); + spin_lock(&(node->load)); return result; } diff -puN fs/reiser4/emergency_flush.h~reiser4-spinlock-cleanup fs/reiser4/emergency_flush.h --- devel/fs/reiser4/emergency_flush.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/emergency_flush.h 2006-02-16 14:17:05.000000000 -0800 @@ -34,7 +34,7 @@ extern void done_eflush(void); extern int eflush_init_at(struct super_block *super); extern void eflush_done_at(struct super_block *super); -extern reiser4_block_nr *eflush_get(const jnode * node); +extern reiser4_block_nr *eflush_get(jnode * node); extern void eflush_del(jnode * node, int page_locked); extern void eflush_free(jnode *); diff -puN fs/reiser4/entd.c~reiser4-spinlock-cleanup fs/reiser4/entd.c --- devel/fs/reiser4/entd.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/entd.c 2006-02-16 14:17:05.000000000 -0800 @@ -25,9 +25,8 @@ #define DEF_PRIORITY 12 #define MAX_ENTD_ITERS 10 -#define ENTD_ASYNC_REQUESTS_LIMIT 0 -static void entd_flush(struct super_block *); +static void entd_flush(struct super_block *, struct wbq *); static int entd(void *arg); /* @@ -37,12 +36,6 @@ static int entd(void *arg); snprintf(current->comm, sizeof(current->comm), \ "ent:%s%s", super->s_id, (state)) -/* get ent context for the @super */ -static inline entd_context *get_entd_context(struct super_block *super) -{ - return &get_super_private(super)->entd; -} - /** * init_entd - initialize entd context and start kernel daemon * @super: super block to start ent thread for @@ -64,7 +57,9 @@ int init_entd(struct super_block *super) #if REISER4_DEBUG INIT_LIST_HEAD(&ctx->flushers_list); #endif - INIT_LIST_HEAD(&ctx->wbq_list); + /* lists of writepage requests */ + INIT_LIST_HEAD(&ctx->todo_list); + INIT_LIST_HEAD(&ctx->done_list); /* start entd */ ctx->tsk = kthread_run(entd, super, "ent:%s", super->s_id); if (IS_ERR(ctx->tsk)) @@ -72,9 +67,8 @@ int init_entd(struct super_block *super) return 0; } -static void __put_wbq(entd_context * ent, struct wbq *rq) +static void __put_wbq(entd_context *ent, struct wbq *rq) { - rq->wbc->nr_to_write--; up(&rq->sem); } @@ -83,12 +77,11 @@ static struct wbq *__get_wbq(entd_contex { struct wbq *wbq; - if (list_empty_careful(&ent->wbq_list)) { + if (list_empty_careful(&ent->todo_list)) return NULL; - } - ent->nr_synchronous_requests --; - ent->nr_all_requests --; - wbq = list_entry(ent->wbq_list.next, struct wbq, link); + + ent->nr_todo_reqs --; + wbq = list_entry(ent->todo_list.next, struct wbq, link); list_del_init(&wbq->link); return wbq; } @@ -144,27 +137,37 @@ static int entd(void *arg) try_to_freeze(); spin_lock(&ent->guard); - while (ent->nr_all_requests != 0) { - assert("zam-1043", - ent->nr_all_requests >= - ent->nr_synchronous_requests); - if (ent->nr_synchronous_requests != 0) { - struct wbq *rq = list_entry(ent->wbq_list.next, struct wbq, link); - - if (++rq->nr_entd_iters > MAX_ENTD_ITERS) { - rq = __get_wbq(ent); - __put_wbq(ent, rq); - continue; - } - } else { - /* endless loop avoidance. */ - ent->nr_all_requests--; - } + while (ent->nr_todo_reqs != 0) { + struct wbq *rq, *next; + assert("", list_empty_careful(&ent->done_list)); + + /* take request from the queue head */ + rq = __get_wbq(ent); + assert("", rq != NULL); + ent->cur_request = rq; spin_unlock(&ent->guard); + entd_set_comm("!"); - entd_flush(super); + entd_flush(super, rq); + + iput(rq->mapping->host); + up(&(rq->sem)); + + /* + * wakeup all requestors and iput their inodes + */ spin_lock(&ent->guard); + list_for_each_entry_safe(rq, next, &ent->done_list, link) { + list_del_init(&(rq->link)); + ent->nr_done_reqs --; + spin_unlock(&ent->guard); + + assert("", rq->written == 1); + iput(rq->mapping->host); + up(&(rq->sem)); + spin_lock(&ent->guard); + } } spin_unlock(&ent->guard); @@ -179,13 +182,16 @@ static int entd(void *arg) done = 1; break; } - if (ent->nr_all_requests != 0) + if (ent->nr_todo_reqs != 0) break; schedule(); } while (0); finish_wait(&ent->wait, &__wait); } } + spin_lock(&ent->guard); + BUG_ON(ent->nr_todo_reqs != 0); + spin_unlock(&ent->guard); wakeup_all_wbq(ent); return 0; } @@ -240,7 +246,7 @@ void leave_flush(struct super_block *sup spin_lock(&ent->guard); ent->flushers--; - wake_up_ent = (ent->flushers == 0 && ent->nr_synchronous_requests != 0); + wake_up_ent = (ent->flushers == 0 && ent->nr_todo_reqs != 0); #if REISER4_DEBUG list_del_init(&get_current_context()->flushers_link); #endif @@ -249,184 +255,118 @@ void leave_flush(struct super_block *sup wake_up(&ent->wait); } -#define ENTD_CAPTURE_APAGE_BURST (32l) - -/* Ask as_ops->writepages() to process given page */ -static jnode * capture_given_page(struct page *page) -{ - struct address_space * mapping; - struct writeback_control wbc = { - .bdi = NULL, - .sync_mode = WB_SYNC_NONE, - .older_than_this = NULL, - .nonblocking = 0, - .start = page->index << PAGE_CACHE_SHIFT, - .end = page->index << PAGE_CACHE_SHIFT, - .nr_to_write = 1, - }; - jnode * node; - - mapping = page->mapping; - if (mapping == NULL) - return NULL; - if (mapping->a_ops && mapping->a_ops->writepages) - mapping->a_ops->writepages(mapping, &wbc); - lock_page(page); - node = jprivate(page); - if (node != NULL) - jref(node); - unlock_page(page); - return node; -} - -jnode * get_jnode_by_wbq(struct super_block *super, struct wbq *rq) -{ - struct page * page = NULL; - jnode * node = NULL; - int result; - - if (rq == NULL) - return NULL; - - assert("zam-1052", rq->page != NULL); - - page = rq->page; - node = capture_given_page(page); - if (node == NULL) - return NULL; - spin_lock_jnode(node); - result = try_capture(node, ZNODE_WRITE_LOCK, TXN_CAPTURE_NONBLOCKING, 0); - spin_unlock_jnode(node); - if (result) { - jput(node); - return NULL; - } - return node; -} +#define ENTD_CAPTURE_APAGE_BURST SWAP_CLUSTER_MAX -static void entd_flush(struct super_block *super) +static void entd_flush(struct super_block *super, struct wbq *rq) { reiser4_context ctx; - struct writeback_control wbc = { - .bdi = NULL, - .sync_mode = WB_SYNC_NONE, - .older_than_this = NULL, - .nr_to_write = ENTD_CAPTURE_APAGE_BURST, - .nonblocking = 0, - }; + int tmp; init_stack_context(&ctx, super); ctx.entd = 1; - generic_sync_sb_inodes(super, &wbc); + rq->wbc->start = rq->page->index << PAGE_CACHE_SHIFT; + rq->wbc->end = (rq->page->index + ENTD_CAPTURE_APAGE_BURST) << PAGE_CACHE_SHIFT; + tmp = rq->wbc->nr_to_write; + rq->mapping->a_ops->writepages(rq->mapping, rq->wbc); + + if (rq->wbc->nr_to_write > 0) { + rq->wbc->start = 0; + rq->wbc->end = 0; + generic_sync_sb_inodes(super, rq->wbc); + } + rq->wbc->nr_to_write = ENTD_CAPTURE_APAGE_BURST; + writeout(super, rq->wbc); - wbc.nr_to_write = ENTD_CAPTURE_APAGE_BURST; - writeout(super, &wbc); context_set_commit_async(&ctx); reiser4_exit_context(&ctx); } +/** + * write_page_by_ent - ask entd thread to flush this page as part of slum + * @page: page to be written + * @wbc: writeback control passed to reiser4_writepage + * + * Creates a request, puts it on entd list of requests, wakeups entd if + * necessary, waits until entd completes with the request. + */ int write_page_by_ent(struct page *page, struct writeback_control *wbc) { struct super_block *sb; + struct inode *inode; entd_context *ent; struct wbq rq; - int phantom; - int wake_up_entd; + + assert("", PageLocked(page)); + assert("", page->mapping != NULL); sb = page->mapping->host->i_sb; ent = get_entd_context(sb); - if (ent == NULL || ent->done) - /* entd is not running. */ - return 0; + assert("", ent && ent->done == 0); - phantom = jprivate(page) == NULL || !jnode_check_dirty(jprivate(page)); -#if 1 - BUG_ON(page->mapping == NULL); - /* re-dirty page */ - if (!TestSetPageDirty(page)) { - if (mapping_cap_account_dirty(page->mapping)) - inc_page_state(nr_dirty); - } - /*reiser4_set_page_dirty(page);*/ - /*set_page_dirty_internal(page, phantom);*/ - /* unlock it to avoid deadlocks with the thread which will do actual i/o */ + /* + * pin inode in memory, unlock page, entd_flush will iput. We can not + * iput here becasue we can not allow delete_inode to be called here + */ + inode = igrab(page->mapping->host); unlock_page(page); -#endif + if (inode == NULL) + /* inode is getting freed */ + return 0; /* init wbq */ INIT_LIST_HEAD(&rq.link); - rq.nr_entd_iters = 0; - rq.page = page; + rq.magic = WBQ_MAGIC; rq.wbc = wbc; - rq.phantom = phantom; + rq.page = page; + rq.mapping = inode->i_mapping; + rq.node = NULL; + rq.written = 0; + rq.caller = get_current_context_check(); + sema_init(&rq.sem, 0); + /* add request to entd's list of writepage requests */ spin_lock(&ent->guard); - wake_up_entd = (ent->flushers == 0); - ent->nr_all_requests++; - if (ent->nr_all_requests <= - ent->nr_synchronous_requests + ENTD_ASYNC_REQUESTS_LIMIT) { - BUG_ON(1); - spin_unlock(&ent->guard); - if (wake_up_entd) - wake_up(&ent->wait); - lock_page(page); - return 0; - } + ent->nr_todo_reqs++; + list_add_tail(&rq.link, &ent->todo_list); + if (ent->nr_todo_reqs == 1) + wake_up(&ent->wait); - sema_init(&rq.sem, 0); - list_add_tail(&rq.link, &ent->wbq_list); - ent->nr_synchronous_requests++; spin_unlock(&ent->guard); - if (wake_up_entd) - wake_up(&ent->wait); + + /* wait until entd finishes */ down(&rq.sem); - /* don't release rq until wakeup_wbq stops using it. */ + /* + * spin until entd thread which did up(&rq.sem) does not need rq + * anymore + */ spin_lock(&ent->guard); spin_unlock(&ent->guard); - if (!PageDirty(page)) { + + if (rq.written) /* Eventually ENTD has written the page to disk. */ return 1; - } - lock_page(page); - return WRITEPAGE_ACTIVATE; -} - -void ent_writes_page(struct super_block *sb, struct page *page) -{ - entd_context *ent = get_entd_context(sb); - struct wbq *rq; - - assert("zam-1041", ent != NULL); - - if (PageActive(page) || ent->nr_all_requests == 0) - return; - - SetPageReclaim(page); - spin_lock(&ent->guard); - if (ent->nr_all_requests > 0) { - rq = __get_wbq(ent); - if (rq != NULL) - __put_wbq(ent, rq); - } - spin_unlock(&ent->guard); + lock_page(page); + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; } int wbq_available(void) { struct super_block *sb = reiser4_get_current_sb(); entd_context *ent = get_entd_context(sb); - return ent->nr_all_requests; + return ent->nr_todo_reqs; } -/* Make Linus happy. - Local variables: - c-indentation-style: "K&R" - mode-name: "LC" - c-basic-offset: 8 - tab-width: 8 - fill-column: 80 - End: -*/ +/* + * Local variables: + * c-indentation-style: "K&R" + * mode-name: "LC" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 79 + * End: + */ diff -puN fs/reiser4/entd.h~reiser4-spinlock-cleanup fs/reiser4/entd.h --- devel/fs/reiser4/entd.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/entd.h 2006-02-16 14:17:05.000000000 -0800 @@ -13,14 +13,19 @@ #include #include /* for struct task_struct */ +#define WBQ_MAGIC 0x7876dc76 + /* write-back request. */ struct wbq { + int magic; struct list_head link; /* list head of this list is in entd context */ struct writeback_control *wbc; struct page *page; + struct address_space *mapping; struct semaphore sem; - int nr_entd_iters; - unsigned int phantom:1; + jnode *node; /* set if ent thread captured requested page */ + int written; /* set if ent thread wrote requested page */ + reiser4_context *caller; }; /* ent-thread context. This is used to synchronize starting/stopping ent @@ -37,13 +42,28 @@ typedef struct entd_context { int done; /* counter of active flushers */ int flushers; + /* + * when reiser4_writepage asks entd to write a page - it adds struct + * wbq to this list + */ + struct list_head todo_list; + /* number of elements on the above list */ + int nr_todo_reqs; + + struct wbq *cur_request; + /* + * when entd writes a page it moves write-back request from todo_list + * to done_list. This list is used at the end of entd iteration to + * wakeup requestors and iput inodes. + */ + struct list_head done_list; + /* number of elements on the above list */ + int nr_done_reqs; + #if REISER4_DEBUG /* list of all active flushers */ struct list_head flushers_list; #endif - int nr_all_requests; - int nr_synchronous_requests; - struct list_head wbq_list; /* struct wbq are elements of this list */ } entd_context; extern int init_entd(struct super_block *); diff -puN fs/reiser4/eottl.c~reiser4-spinlock-cleanup fs/reiser4/eottl.c --- devel/fs/reiser4/eottl.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/eottl.c 2006-02-16 14:17:05.000000000 -0800 @@ -142,8 +142,9 @@ is_next_item_internal(coord_t *coord, co * concurrent thread could get their first and insert item with a key * smaller than @key */ - result = UNDER_RW(dk, current_tree, read, - keycmp(key, znode_get_rd_key(coord->node))); + read_lock_dk(current_tree); + result = keycmp(key, znode_get_rd_key(coord->node)); + read_unlock_dk(current_tree); assert("vs-6", result != EQUAL_TO); if (result == GREATER_THAN) return 2; @@ -170,8 +171,9 @@ is_next_item_internal(coord_t *coord, co * check whether concurrent thread managed to insert item with a key * smaller than @key */ - result = UNDER_RW(dk, current_tree, read, - keycmp(key, znode_get_ld_key(rn.node))); + read_lock_dk(current_tree); + result = keycmp(key, znode_get_ld_key(rn.node)); + read_unlock_dk(current_tree); assert("vs-6", result != EQUAL_TO); if (result == GREATER_THAN) { done_lh(&rn); @@ -224,19 +226,18 @@ static reiser4_key *rd_key(const coord_t assert("nikita-2281", coord_is_between_items(coord)); coord_dup(&dup, coord); - RLOCK_DK(current_tree); - if (coord_set_to_right(&dup) == 0) /* next item is in this node. Return its key. */ unit_key_by_coord(&dup, key); - else + else { /* * next item either does not exist or is in right * neighbor. Return znode's right delimiting key. */ + read_lock_dk(current_tree); *key = *znode_get_rd_key(coord->node); - - RUNLOCK_DK(current_tree); + read_unlock_dk(current_tree); + } return key; } @@ -250,7 +251,6 @@ static reiser4_key *rd_key(const coord_t * Inserts empty leaf node between two extent items. It is necessary when we * have to insert an item on leaf level between two extents (items on the twig * level). - * */ static int add_empty_leaf(coord_t *insert_coord, lock_handle *lh, @@ -272,12 +272,12 @@ add_empty_leaf(coord_t *insert_coord, lo return PTR_ERR(node); /* setup delimiting keys for node being inserted */ - WLOCK_DK(tree); + write_lock_dk(tree); znode_set_ld_key(node, key); znode_set_rd_key(node, rdkey); ON_DEBUG(node->creator = current); ON_DEBUG(node->first_key = *key); - WUNLOCK_DK(tree); + write_unlock_dk(tree); ZF_SET(node, JNODE_ORPHAN); @@ -339,13 +339,13 @@ add_empty_leaf(coord_t *insert_coord, lo * neighbor was not known. Do it * here */ - WLOCK_TREE(tree); + write_lock_tree(tree); assert("nikita-3312", znode_is_right_connected(node)); assert("nikita-2984", node->right == NULL); ZF_CLR(node, JNODE_RIGHT_CONNECTED); - WUNLOCK_TREE(tree); + write_unlock_tree(tree); result = connect_znode(insert_coord, node); if (result == 0) @@ -359,7 +359,6 @@ add_empty_leaf(coord_t *insert_coord, lo } else { warning("nikita-3136", "Cannot lock child"); - print_znode("child", node); } done_lh(&local_lh); zrelse(node); diff -puN fs/reiser4/estimate.c~reiser4-spinlock-cleanup fs/reiser4/estimate.c --- devel/fs/reiser4/estimate.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/estimate.c 2006-02-16 14:17:05.000000000 -0800 @@ -70,19 +70,33 @@ reiser4_block_nr estimate_insert_flow(tr } /* returnes max number of nodes can be occupied by disk cluster */ -reiser4_block_nr estimate_disk_cluster(struct inode * inode) +reiser4_block_nr estimate_cluster(struct inode * inode, int unprepped) { - return 2 + cluster_nrpages(inode); + int per_cluster; + per_cluster = (unprepped ? 1 : cluster_nrpages(inode)); + return 3 + per_cluster + + max_balance_overhead(3 + per_cluster, + REISER4_MAX_ZTREE_HEIGHT); } -/* how many nodes might get dirty and added nodes during insertion of a disk cluster */ -reiser4_block_nr estimate_insert_cluster(struct inode * inode, int unprepped) +/* how many nodes might get dirty and added + during insertion of a disk cluster */ +reiser4_block_nr estimate_insert_cluster(struct inode * inode) { - int per_cluster; - per_cluster = (unprepped ? 1 : cluster_nrpages(inode)); + return estimate_cluster(inode, 1); /* 24 */ +} - return 3 + per_cluster + max_balance_overhead(3 + per_cluster, - REISER4_MAX_ZTREE_HEIGHT); +/* how many nodes might get dirty and added + during update of a (prepped or unprepped) disk cluster */ +reiser4_block_nr estimate_update_cluster(struct inode * inode) +{ + return estimate_cluster(inode, 0); /* 44, for 64K-cluster */ +} + +/* how many nodes occupied by a disk cluster might get dirty */ +reiser4_block_nr estimate_dirty_cluster(struct inode * inode) +{ + return 2 + cluster_nrpages(inode); } /* Make Linus happy. diff -puN fs/reiser4/flush.c~reiser4-spinlock-cleanup fs/reiser4/flush.c --- devel/fs/reiser4/flush.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/flush.c 2006-02-16 14:17:05.000000000 -0800 @@ -919,13 +919,13 @@ static jnode * find_flush_start_jnode( jnode * node; if (start != NULL) { - LOCK_JNODE(start); - if (jnode_is_dirty(start) && !JF_ISSET(start, JNODE_OVRWR)) { + spin_lock_jnode(start); + if (JF_ISSET(start, JNODE_DIRTY) && !JF_ISSET(start, JNODE_OVRWR)) { assert("zam-1056", start->atom == atom); node = start; goto enter; } - UNLOCK_JNODE(start); + spin_unlock_jnode(start); } /* * In this loop we process all already prepped (RELOC or OVRWR) and dirtied again @@ -933,9 +933,9 @@ static jnode * find_flush_start_jnode( * not prepped node found in the atom dirty lists. */ while ((node = find_first_dirty_jnode(atom, flags))) { - LOCK_JNODE(node); + spin_lock_jnode(node); enter: - assert("zam-881", jnode_is_dirty(node)); + assert("zam-881", JF_ISSET(node, JNODE_DIRTY)); assert("zam-898", !JF_ISSET(node, JNODE_OVRWR)); if (JF_ISSET(node, JNODE_WRITEBACK)) { @@ -966,7 +966,7 @@ static jnode * find_flush_start_jnode( } else break; - UNLOCK_JNODE(node); + spin_unlock_jnode(node); } return node; } @@ -986,7 +986,7 @@ flush_current_atom(int flags, long nr_to int ret; assert("zam-889", atom != NULL && *atom != NULL); - assert("zam-890", spin_atom_is_locked(*atom)); + assert_spin_locked(&((*atom)->alock)); assert("zam-892", get_current_context()->trans->atom == *atom); nr_to_write = LONG_MAX; @@ -999,7 +999,7 @@ flush_current_atom(int flags, long nr_to if (ret) return ret; - assert("zam-891", spin_atom_is_locked(*atom)); + assert_spin_locked(&((*atom)->alock)); /* parallel flushers limit */ if (sinfo->tmgr.atom_max_flushers != 0) { @@ -1029,12 +1029,12 @@ flush_current_atom(int flags, long nr_to writeout_mode_disable(); return 0; } - UNLOCK_ATOM(*atom); + spin_unlock_atom(*atom); } else { jref(node); BUG_ON((*atom)->super != node->tree->super); - UNLOCK_ATOM(*atom); - UNLOCK_JNODE(node); + spin_unlock_atom(*atom); + spin_unlock_jnode(node); BUG_ON(nr_to_write == 0); ret = jnode_flush(node, nr_to_write, nr_submitted, fq, flags); jput(node); @@ -1048,7 +1048,7 @@ flush_current_atom(int flags, long nr_to (*atom)->nr_flushers--; fq_put_nolock(fq); atom_send_event(*atom); - UNLOCK_ATOM(*atom); + spin_unlock_atom(*atom); writeout_mode_disable(); @@ -1151,7 +1151,7 @@ reverse_relocate_check_dirty_parent(jnod { int ret; - if (!znode_check_dirty(parent_coord->node)) { + if (!JF_ISSET(ZJNODE(parent_coord->node), JNODE_DIRTY)) { ret = reverse_relocate_test(node, parent_coord, pos); if (ret < 0) { @@ -2141,7 +2141,7 @@ static int handle_pos_end_of_twig(flush_ goto out; /* right twig could be not dirty */ - if (znode_check_dirty(right_lock.node)) { + if (JF_ISSET(ZJNODE(right_lock.node), JNODE_DIRTY)) { /* If right twig node is dirty we always attempt to squeeze it * content to the left... */ became_dirty: @@ -2196,7 +2196,7 @@ static int handle_pos_end_of_twig(flush_ &at_right, pos); if (ret) goto out; - if (znode_check_dirty(right_lock.node)) + if (JF_ISSET(ZJNODE(right_lock.node), JNODE_DIRTY)) goto became_dirty; } } @@ -2384,7 +2384,7 @@ static void update_ldkey(znode * node) { reiser4_key ldkey; - assert("vs-1630", rw_dk_is_write_locked(znode_get_tree(node))); + assert_rw_write_locked(&(znode_get_tree(node)->dk_lock)); if (node_is_empty(node)) return; @@ -2396,9 +2396,9 @@ static void update_ldkey(znode * node) and @right correspondingly and sets right delimiting key of @left to first key of @right */ static void update_znode_dkeys(znode * left, znode * right) { - assert("nikita-1470", rw_dk_is_write_locked(znode_get_tree(right))); - assert("vs-1629", znode_is_write_locked(left) - && znode_is_write_locked(right)); + assert_rw_write_locked(&(znode_get_tree(right)->dk_lock)); + assert("vs-1629", (znode_is_write_locked(left) && + znode_is_write_locked(right))); /* we need to update left delimiting of left if it was empty before shift */ update_ldkey(left); @@ -2442,7 +2442,8 @@ static int squeeze_right_non_twig(znode assert("nikita-2246", znode_get_level(left) == znode_get_level(right)); - if (!znode_is_dirty(left) || !znode_is_dirty(right)) + if (!JF_ISSET(ZJNODE(left), JNODE_DIRTY) || + !JF_ISSET(ZJNODE(right), JNODE_DIRTY)) return SQUEEZE_TARGET_FULL; pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo)); @@ -2465,7 +2466,9 @@ static int squeeze_right_non_twig(znode node's operation. But it can not be done there. Nobody remembers why, though */ tree = znode_get_tree(left); - UNDER_RW_VOID(dk, tree, write, update_znode_dkeys(left, right)); + write_lock_dk(tree); + update_znode_dkeys(left, right); + write_unlock_dk(tree); /* Carry is called to update delimiting key and, maybe, to remove empty node. */ @@ -2486,6 +2489,18 @@ static int squeeze_right_non_twig(znode return ret; } +#if REISER4_DEBUG +static int sibling_link_is_ok(const znode *left, const znode *right) +{ + int result; + + read_lock_tree(znode_get_tree(left)); + result = (left->right == right && left == right->left); + read_unlock_tree(znode_get_tree(left)); + return result; +} +#endif + /* Shift first unit of first item if it is an internal one. Return SQUEEZE_TARGET_FULL if it fails to shift an item, otherwise return SUBTREE_MOVED. */ @@ -2501,15 +2516,12 @@ static int shift_one_internal_unit(znode assert("nikita-2247", znode_get_level(left) == znode_get_level(right)); assert("nikita-2435", znode_is_write_locked(left)); assert("nikita-2436", znode_is_write_locked(right)); - assert("nikita-2434", - UNDER_RW(tree, znode_get_tree(left), read, - left->right == right)); - - pool = - init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo) + sizeof(*coord) + - sizeof(*info) + assert("nikita-2434", sibling_link_is_ok(left, right)); + + pool = init_carry_pool(sizeof(*pool) + 3 * sizeof(*todo) + + sizeof(*coord) + sizeof(*info) #if REISER4_DEBUG - + sizeof(*coord) + 2 * sizeof(reiser4_key) + + sizeof(*coord) + 2 * sizeof(reiser4_key) #endif ); if (IS_ERR(pool)) @@ -2565,7 +2577,9 @@ static int shift_one_internal_unit(znode znode_make_dirty(left); znode_make_dirty(right); tree = znode_get_tree(left); - UNDER_RW_VOID(dk, tree, write, update_znode_dkeys(left, right)); + write_lock_dk(tree); + update_znode_dkeys(left, right); + write_unlock_dk(tree); /* reserve space for delimiting keys after shifting */ grabbed = get_current_context()->grabbed_blocks; @@ -2808,7 +2822,9 @@ allocate_znode_update(znode * node, cons uber = uber_lock.node; - UNDER_RW_VOID(tree, tree, write, tree->root_block = blk); + write_lock_tree(tree); + tree->root_block = blk; + write_unlock_tree(tree); znode_make_dirty(uber); } @@ -2877,13 +2893,13 @@ jnode_lock_parent_coord(jnode * node, * because coord_by_key() will just fail to find appropriate * extent. */ - LOCK_JNODE(node); + spin_lock_jnode(node); if (!JF_ISSET(node, JNODE_HEARD_BANSHEE)) { jnode_build_key(node, &key); ret = 0; } else ret = RETERR(-ENOENT); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); if (ret != 0) return ret; @@ -2901,10 +2917,8 @@ jnode_lock_parent_coord(jnode * node, assert("edward-1038", ergo(jnode_is_cluster_page(node), JF_ISSET(node, JNODE_HEARD_BANSHEE))); - if (!JF_ISSET(node, JNODE_HEARD_BANSHEE)) { + if (!JF_ISSET(node, JNODE_HEARD_BANSHEE)) warning("nikita-3177", "Parent not found"); - print_jnode("node", node); - } return ret; case CBK_COORD_FOUND: if (coord->between != AT_UNIT) { @@ -2914,7 +2928,6 @@ jnode_lock_parent_coord(jnode * node, warning("nikita-3178", "Found but not happy: %i", coord->between); - print_jnode("node", node); } return RETERR(-ENOENT); } @@ -3004,7 +3017,7 @@ static int neighbor_in_slum(znode * node if (!check_dirty) return 0; /* Check dirty bit of locked znode, no races here */ - if (znode_check_dirty(lock->node)) + if (JF_ISSET(ZJNODE(lock->node), JNODE_DIRTY)) return 0; done_lh(lock); @@ -3015,13 +3028,17 @@ static int neighbor_in_slum(znode * node write-locked (for squeezing) so no tree lock is needed. */ static int znode_same_parents(znode * a, znode * b) { + int result; + assert("jmacd-7011", znode_is_write_locked(a)); assert("jmacd-7012", znode_is_write_locked(b)); /* We lock the whole tree for this check.... I really don't like whole tree * locks... -Hans */ - return UNDER_RW(tree, znode_get_tree(a), read, - (znode_parent(a) == znode_parent(b))); + read_lock_tree(znode_get_tree(a)); + result = (znode_parent(a) == znode_parent(b)); + read_unlock_tree(znode_get_tree(a)); + return result; } /* FLUSH SCAN */ @@ -3333,7 +3350,7 @@ static int scan_formatted(flush_scan * s } /* Lock the tree, check-for and reference the next sibling. */ - RLOCK_TREE(znode_get_tree(node)); + read_lock_tree(znode_get_tree(node)); /* It may be that a node is inserted or removed between a node and its left sibling while the tree lock is released, but the flush-scan count @@ -3344,7 +3361,7 @@ static int scan_formatted(flush_scan * s zref(neighbor); } - RUNLOCK_TREE(znode_get_tree(node)); + read_unlock_tree(znode_get_tree(node)); /* If neighbor is NULL at the leaf level, need to check for an unformatted sibling using the parent--break in any case. */ diff -puN fs/reiser4/flush_queue.c~reiser4-spinlock-cleanup fs/reiser4/flush_queue.c --- devel/fs/reiser4/flush_queue.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/flush_queue.c 2006-02-16 14:17:05.000000000 -0800 @@ -22,12 +22,6 @@ kept on the flush queue until memory pressure or atom commit asks flush queues to write some or all from their jnodes. */ -#if REISER4_DEBUG -# define spin_ordering_pred_fq(fq) (1) -#endif - -SPIN_LOCK_FUNCTIONS(fq, flush_queue_t, guard); - /* LOCKING: @@ -56,13 +50,13 @@ SPIN_LOCK_FUNCTIONS(fq, flush_queue_t, g #define mark_fq_ready(fq) do { (fq)->state &= ~FQ_IN_USE; } while (0) /* get lock on atom from locked flush queue object */ -static txn_atom *atom_get_locked_by_fq(flush_queue_t * fq) +static txn_atom *atom_locked_by_fq_nolock(flush_queue_t * fq) { /* This code is similar to jnode_get_atom(), look at it for the * explanation. */ txn_atom *atom; - assert("zam-729", spin_fq_is_locked(fq)); + assert_spin_locked(&(fq->guard)); while (1) { atom = fq->atom; @@ -73,18 +67,18 @@ static txn_atom *atom_get_locked_by_fq(f break; atomic_inc(&atom->refcount); - spin_unlock_fq(fq); - LOCK_ATOM(atom); - spin_lock_fq(fq); + spin_unlock(&(fq->guard)); + spin_lock_atom(atom); + spin_lock(&(fq->guard)); if (fq->atom == atom) { atomic_dec(&atom->refcount); break; } - spin_unlock_fq(fq); + spin_unlock(&(fq->guard)); atom_dec_and_unlock(atom); - spin_lock_fq(fq); + spin_lock(&(fq->guard)); } return atom; @@ -92,7 +86,12 @@ static txn_atom *atom_get_locked_by_fq(f txn_atom *atom_locked_by_fq(flush_queue_t * fq) { - return UNDER_SPIN(fq, fq, atom_get_locked_by_fq(fq)); + txn_atom *atom; + + spin_lock(&(fq->guard)); + atom = atom_locked_by_fq_nolock(fq); + spin_unlock(&(fq->guard)); + return atom; } static void init_fq(flush_queue_t * fq) @@ -104,7 +103,7 @@ static void init_fq(flush_queue_t * fq) INIT_LIST_HEAD(ATOM_FQ_LIST(fq)); sema_init(&fq->io_sem, 0); - spin_fq_init(fq); + spin_lock_init(&fq->guard); } /* slab for flush queues */ @@ -164,7 +163,7 @@ static void count_dequeued_node(flush_qu /* attach flush queue object to the atom */ static void attach_fq(txn_atom *atom, flush_queue_t *fq) { - assert("zam-718", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); list_add(&fq->alink, &atom->flush_queues); fq->atom = atom; ON_DEBUG(atom->nr_flush_queues++); @@ -172,14 +171,14 @@ static void attach_fq(txn_atom *atom, fl static void detach_fq(flush_queue_t * fq) { - assert("zam-731", spin_atom_is_locked(fq->atom)); + assert_spin_locked(&(fq->atom->alock)); - spin_lock_fq(fq); + spin_lock(&(fq->guard)); list_del_init(&fq->alink); assert("vs-1456", fq->atom->nr_flush_queues > 0); ON_DEBUG(fq->atom->nr_flush_queues--); fq->atom = NULL; - spin_unlock_fq(fq); + spin_unlock(&(fq->guard)); } /* destroy flush queue object */ @@ -202,14 +201,14 @@ void mark_jnode_queued(flush_queue_t * f spin-locked. */ void queue_jnode(flush_queue_t * fq, jnode * node) { - assert("zam-711", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); assert("zam-713", node->atom != NULL); - assert("zam-712", spin_atom_is_locked(node->atom)); - assert("zam-714", jnode_is_dirty(node)); + assert_spin_locked(&(node->atom->alock)); assert("zam-716", fq->atom != NULL); assert("zam-717", fq->atom == node->atom); assert("zam-907", fq_in_use(fq)); + assert("zam-714", JF_ISSET(node, JNODE_DIRTY)); assert("zam-826", JF_ISSET(node, JNODE_RELOC)); assert("vs-1481", !JF_ISSET(node, JNODE_FLUSH_QUEUED)); assert("vs-1481", NODE_LIST(node) != FQ_LIST); @@ -226,14 +225,14 @@ void queue_jnode(flush_queue_t * fq, jno static int wait_io(flush_queue_t * fq, int *nr_io_errors) { assert("zam-738", fq->atom != NULL); - assert("zam-739", spin_atom_is_locked(fq->atom)); + assert_spin_locked(&(fq->atom->alock)); assert("zam-736", fq_in_use(fq)); assert("zam-911", list_empty_careful(ATOM_FQ_LIST(fq))); if (atomic_read(&fq->nr_submitted) != 0) { struct super_block *super; - UNLOCK_ATOM(fq->atom); + spin_unlock_atom(fq->atom); assert("nikita-3013", schedulable()); @@ -262,7 +261,7 @@ static int finish_fq(flush_queue_t * fq, txn_atom *atom = fq->atom; assert("zam-801", atom != NULL); - assert("zam-744", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); assert("zam-762", fq_in_use(fq)); ret = wait_io(fq, nr_io_errors); @@ -283,7 +282,7 @@ static int finish_all_fq(txn_atom * atom { flush_queue_t *fq; - assert("zam-730", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); if (list_empty_careful(&atom->flush_queues)) return 0; @@ -305,7 +304,7 @@ static int finish_all_fq(txn_atom * atom return ret; } - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); return -E_REPEAT; } @@ -336,9 +335,9 @@ int current_atom_finish_all_fq(void) -EBUSY are two return codes when atom remains locked after finish_all_fq */ if (!ret) - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); - assert("nikita-2696", spin_atom_is_not_locked(atom)); + assert_spin_not_locked(&(atom->alock)); if (ret) return ret; @@ -356,9 +355,9 @@ scan_fq_and_update_atom_ref(struct list_ jnode *cur; list_for_each_entry(cur, list, capture_link) { - LOCK_JNODE(cur); + spin_lock_jnode(cur); cur->atom = atom; - UNLOCK_JNODE(cur); + spin_unlock_jnode(cur); } } @@ -367,14 +366,14 @@ void fuse_fq(txn_atom *to, txn_atom *fro { flush_queue_t *fq; - assert("zam-720", spin_atom_is_locked(to)); - assert("zam-721", spin_atom_is_locked(from)); + assert_spin_locked(&(to->alock)); + assert_spin_locked(&(from->alock)); list_for_each_entry(fq, &from->flush_queues, alink) { scan_fq_and_update_atom_ref(ATOM_FQ_LIST(fq), to); - spin_lock_fq(fq); + spin_lock(&(fq->guard)); fq->atom = to; - spin_unlock_fq(fq); + spin_unlock(&(fq->guard)); } list_splice_init(&from->flush_queues, to->flush_queues.prev); @@ -396,7 +395,7 @@ int atom_fq_parts_are_clean(txn_atom * a /* Bio i/o completion routine for reiser4 write operations. */ static int end_io_handler(struct bio *bio, unsigned int bytes_done UNUSED_ARG, - int err UNUSED_ARG) + int err) { int i; int nr_errors = 0; @@ -408,6 +407,9 @@ end_io_handler(struct bio *bio, unsigned if (bio->bi_size != 0) return 1; + if (err == -EOPNOTSUPP) + set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); + /* we expect that bio->private is set to NULL or fq object which is used * for synchronization and error counting. */ fq = bio->bi_private; @@ -467,7 +469,7 @@ static void release_prepped_list(flush_q txn_atom *atom; assert("zam-904", fq_in_use(fq)); - atom = UNDER_SPIN(fq, fq, atom_get_locked_by_fq(fq)); + atom = atom_locked_by_fq(fq); while (!list_empty(ATOM_FQ_LIST(fq))) { jnode *cur; @@ -476,7 +478,7 @@ static void release_prepped_list(flush_q list_del_init(&cur->capture_link); count_dequeued_node(fq); - LOCK_JNODE(cur); + spin_lock_jnode(cur); assert("nikita-3154", !JF_ISSET(cur, JNODE_OVRWR)); assert("nikita-3154", JF_ISSET(cur, JNODE_RELOC)); assert("nikita-3154", JF_ISSET(cur, JNODE_FLUSH_QUEUED)); @@ -493,13 +495,13 @@ static void release_prepped_list(flush_q CLEAN_LIST, 1)); } - UNLOCK_JNODE(cur); + spin_unlock_jnode(cur); } if (--atom->nr_running_queues == 0) atom_send_event(atom); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } /* Submit write requests for nodes on the already filled flush queue @fq. @@ -513,7 +515,7 @@ int write_fq(flush_queue_t * fq, long *n txn_atom *atom; while (1) { - atom = UNDER_SPIN(fq, fq, atom_get_locked_by_fq(fq)); + atom = atom_locked_by_fq(fq); assert("zam-924", atom); /* do not write fq in parallel. */ if (atom->nr_running_queues == 0 @@ -523,7 +525,7 @@ int write_fq(flush_queue_t * fq, long *n } atom->nr_running_queues++; - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); ret = write_jnode_list(ATOM_FQ_LIST(fq), fq, nr_submitted, flags); release_prepped_list(fq); @@ -542,17 +544,17 @@ static int fq_by_atom_gfp(txn_atom *atom { flush_queue_t *fq; - assert("zam-745", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); fq = list_entry(atom->flush_queues.next, flush_queue_t, alink); while (&atom->flush_queues != &fq->alink) { - spin_lock_fq(fq); + spin_lock(&(fq->guard)); if (fq_ready(fq)) { mark_fq_in_use(fq); assert("vs-1246", fq->owner == NULL); ON_DEBUG(fq->owner = current); - spin_unlock_fq(fq); + spin_unlock(&(fq->guard)); if (*new_fq) done_fq(*new_fq); @@ -562,7 +564,7 @@ static int fq_by_atom_gfp(txn_atom *atom return 0; } - spin_unlock_fq(fq); + spin_unlock(&(fq->guard)); fq = list_entry(fq->alink.next, flush_queue_t, alink); } @@ -577,7 +579,7 @@ static int fq_by_atom_gfp(txn_atom *atom return 0; } - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); *new_fq = create_fq(gfp); @@ -624,16 +626,16 @@ void fq_put(flush_queue_t * fq) { txn_atom *atom; - spin_lock_fq(fq); - atom = atom_get_locked_by_fq(fq); + spin_lock(&(fq->guard)); + atom = atom_locked_by_fq_nolock(fq); assert("zam-746", atom != NULL); fq_put_nolock(fq); atom_send_event(atom); - spin_unlock_fq(fq); - UNLOCK_ATOM(atom); + spin_unlock(&(fq->guard)); + spin_unlock_atom(atom); } /* A part of atom object initialization related to the embedded flush queue @@ -652,14 +654,14 @@ int fq_by_jnode_gfp(jnode * node, flush_ txn_atom *atom; int ret; - assert("zam-835", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); *fq = NULL; while (1) { /* begin with taking lock on atom */ atom = jnode_get_atom(node); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); if (atom == NULL) { /* jnode does not point to the atom anymore, it is @@ -685,7 +687,7 @@ int fq_by_jnode_gfp(jnode * node, flush_ } /* It is correct to lock atom first, then lock a jnode */ - LOCK_JNODE(node); + spin_lock_jnode(node); if (node->atom == atom) break; /* Yes! it is our jnode. We got all of them: @@ -694,15 +696,15 @@ int fq_by_jnode_gfp(jnode * node, flush_ /* release all locks and allocated objects and restart from * locked jnode. */ - UNLOCK_JNODE(node); + spin_unlock_jnode(node); fq_put(*fq); fq = NULL; - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); lock_again: - LOCK_JNODE(node); + spin_lock_jnode(node); } return 0; @@ -719,11 +721,11 @@ void check_fq(const txn_atom *atom) count = 0; list_for_each_entry(fq, &atom->flush_queues, alink) { - spin_lock_fq(fq); + spin_lock(&(fq->guard)); /* calculate number of jnodes on fq' list of prepped jnodes */ list_for_each(pos, ATOM_FQ_LIST(fq)) count++; - spin_unlock_fq(fq); + spin_unlock(&(fq->guard)); } if (count != atom->fq) warning("", "fq counter %d, real %d\n", atom->fq, count); diff -puN fs/reiser4/fsdata.c~reiser4-spinlock-cleanup fs/reiser4/fsdata.c --- devel/fs/reiser4/fsdata.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/fsdata.c 2006-02-16 14:17:05.000000000 -0800 @@ -16,7 +16,7 @@ static LIST_HEAD(cursor_cache); static unsigned long d_cursor_unused = 0; /* spinlock protecting manipulations with dir_cursor's hash table and lists */ -static spinlock_t d_lock = SPIN_LOCK_UNLOCKED; +spinlock_t d_lock = SPIN_LOCK_UNLOCKED; static void kill_cursor(dir_cursor *); @@ -291,6 +291,8 @@ static __u32 cid_counter = 0; #define CID_SHIFT (20) #define CID_MASK (0xfffffull) +static void free_file_fsdata_nolock(struct file *); + /** * insert_cursor - allocate file_fsdata, insert cursor to tree and hash table * @cursor: @@ -337,7 +339,7 @@ static int insert_cursor(dir_cursor *cur warning("", "file has fsdata already"); #endif clean_fsdata(file); - reiser4_free_file_fsdata(file); + free_file_fsdata_nolock(file); file->private_data = fsdata; fsdata->cursor = cursor; spin_unlock_inode(inode); @@ -554,7 +556,7 @@ int try_to_attach_fsdata(struct file *fi spin_lock_inode(inode); assert("nikita-3556", cursor->fsdata->back == NULL); clean_fsdata(file); - reiser4_free_file_fsdata(file); + free_file_fsdata_nolock(file); file->private_data = cursor->fsdata; spin_unlock_inode(inode); } @@ -747,17 +749,17 @@ reiser4_file_fsdata *reiser4_get_file_fs } /** - * reiser4_free_file_fsdata - detach from struct file and free reiser4_file_fsdata + * free_file_fsdata_nolock - detach and free reiser4_file_fsdata * @file: * * Detaches reiser4_file_fsdata from @file, removes reiser4_file_fsdata from * readdir list, frees if it is not linked to d_cursor object. */ -void reiser4_free_file_fsdata(struct file *file) +static void free_file_fsdata_nolock(struct file *file) { reiser4_file_fsdata *fsdata; - spin_lock_inode(file->f_dentry->d_inode); + assert("", spin_inode_is_locked(file->f_dentry->d_inode)); fsdata = file->private_data; if (fsdata != NULL) { list_del_init(&fsdata->dir.linkage); @@ -765,12 +767,21 @@ void reiser4_free_file_fsdata(struct fil free_fsdata(fsdata); } file->private_data = NULL; +} +/** + * reiser4_free_file_fsdata - detach from struct file and free reiser4_file_fsdata + * @file: + * + * Spinlocks inode and calls free_file_fsdata_nolock to do the work. + */ +void reiser4_free_file_fsdata(struct file *file) +{ + spin_lock_inode(file->f_dentry->d_inode); + free_file_fsdata_nolock(file); spin_unlock_inode(file->f_dentry->d_inode); } - - /* * Local variables: * c-indentation-style: "K&R" diff -puN fs/reiser4/fsdata.h~reiser4-spinlock-cleanup fs/reiser4/fsdata.h --- devel/fs/reiser4/fsdata.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/fsdata.h 2006-02-16 14:17:05.000000000 -0800 @@ -193,10 +193,6 @@ extern void detach_fsdata(struct file *) void dispose_cursors(struct inode *inode); void load_cursors(struct inode *inode); void kill_cursors(struct inode *inode); - - - - void adjust_dir_file(struct inode *dir, const struct dentry *de, int offset, int adj); /* @@ -208,6 +204,9 @@ struct d_cursor_info { struct radix_tree_root tree; }; +/* spinlock protecting readdir cursors */ +extern spinlock_t d_lock; + /* __REISER4_FSDATA_H__ */ #endif diff -puN fs/reiser4/init_super.c~reiser4-spinlock-cleanup fs/reiser4/init_super.c --- devel/fs/reiser4/init_super.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/init_super.c 2006-02-16 14:17:05.000000000 -0800 @@ -31,9 +31,9 @@ int init_fs_info(struct super_block *sup sema_init(&sbinfo->delete_sema, 1); sema_init(&sbinfo->flush_sema, 1); - spin_super_init(sbinfo); + spin_lock_init(&(sbinfo->guard)); #if REISER4_USE_EFLUSH - spin_super_eflush_init(sbinfo); + spin_lock_init(&(sbinfo->eflush_guard)); #endif /* initialize per-super-block d_cursor resources */ init_super_d_info(super); @@ -448,6 +448,8 @@ do { \ PUSH_BIT_OPT("dont_load_bitmap", REISER4_DONT_LOAD_BITMAP); /* disable transaction commits during write() */ PUSH_BIT_OPT("atomic_write", REISER4_ATOMIC_WRITE); + /* disable use of write barriers in the reiser4 log writer. */ + PUSH_BIT_OPT("no_write_barrier", REISER4_NO_WRITE_BARRIER); PUSH_OPT( { @@ -644,7 +646,7 @@ static struct { }, [PSET_DIGEST] = { .type = REISER4_DIGEST_PLUGIN_TYPE, - .id = NONE_DIGEST_ID + .id = SHA256_32_DIGEST_ID }, [PSET_COMPRESSION] = { .type = REISER4_COMPRESSION_PLUGIN_TYPE, diff -puN fs/reiser4/inode.c~reiser4-spinlock-cleanup fs/reiser4/inode.c --- devel/fs/reiser4/inode.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/inode.c 2006-02-16 14:17:05.000000000 -0800 @@ -603,8 +603,7 @@ void inode_set_extension(struct inode *i assert("nikita-2716", inode != NULL); assert("nikita-2717", ext < LAST_SD_EXTENSION); - assert("nikita-3491", - spin_inode_object_is_locked(reiser4_inode_data(inode))); + assert("nikita-3491", spin_inode_is_locked(inode)); state = reiser4_inode_data(inode); state->extmask |= 1 << ext; @@ -674,12 +673,10 @@ znode *inode_get_vroot(struct inode *ino { reiser4_block_nr blk; znode *result; - reiser4_inode *info; - info = reiser4_inode_data(inode); - LOCK_INODE(info); - blk = info->vroot; - UNLOCK_INODE(info); + spin_lock_inode(inode); + blk = reiser4_inode_data(inode)->vroot; + spin_unlock_inode(inode); if (!disk_addr_eq(&UBER_TREE_ADDR, &blk)) result = zlook(tree_by_inode(inode), &blk); else @@ -687,24 +684,18 @@ znode *inode_get_vroot(struct inode *ino return result; } -void inode_set_vroot(struct inode *inode, znode * vroot) +void inode_set_vroot(struct inode *inode, znode *vroot) { - reiser4_inode *info; - - info = reiser4_inode_data(inode); - LOCK_INODE(info); - info->vroot = *znode_get_block(vroot); - UNLOCK_INODE(info); + spin_lock_inode(inode); + reiser4_inode_data(inode)->vroot = *znode_get_block(vroot); + spin_unlock_inode(inode); } #if REISER4_DEBUG void inode_invariant(const struct inode *inode) { - reiser4_inode *object; - - object = reiser4_inode_data(inode); - assert("nikita-3077", spin_inode_object_is_locked(object)); + assert("nikita-3077", spin_inode_is_locked(inode)); } int inode_has_no_jnodes(reiser4_inode * r4_inode) diff -puN fs/reiser4/inode.h~reiser4-spinlock-cleanup fs/reiser4/inode.h --- devel/fs/reiser4/inode.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/inode.h 2006-02-16 14:17:05.000000000 -0800 @@ -7,7 +7,6 @@ #include "forward.h" #include "debug.h" -#include "spin_macros.h" #include "key.h" #include "seal.h" #include "plugin/plugin.h" @@ -99,7 +98,7 @@ typedef __u32 oid_hi_t; struct reiser4_inode { /* spin lock protecting fields of this structure. */ - reiser4_spin_data guard; + spinlock_t guard; /* object plugins */ plugin_set *pset; /* plugins set for inheritance */ @@ -252,18 +251,6 @@ static inline struct inode *unix_file_in p.file_plugin_data.unix_file_info)->vfs_inode; } -/* ordering predicate for inode spin lock: only jnode lock can be held */ -#define spin_ordering_pred_inode_object(inode) \ - ( lock_counters() -> rw_locked_dk == 0 ) && \ - ( lock_counters() -> rw_locked_tree == 0 ) && \ - ( lock_counters() -> spin_locked_txnh == 0 ) && \ - ( lock_counters() -> rw_locked_zlock == 0 ) && \ - ( lock_counters() -> spin_locked_jnode == 0 ) && \ - ( lock_counters() -> spin_locked_atom == 0 ) && \ - ( lock_counters() -> spin_locked_ktxnmgrd == 0 ) && \ - ( lock_counters() -> spin_locked_txnmgr == 0 ) - -SPIN_LOCK_FUNCTIONS(inode_object, reiser4_inode, guard); extern ino_t oid_to_ino(oid_t oid) __attribute__ ((const)); extern ino_t oid_to_uino(oid_t oid) __attribute__ ((const)); @@ -277,17 +264,55 @@ extern int inode_has_no_jnodes(reiser4_i #define inode_invariant(inode) noop #endif -#define spin_lock_inode(inode) \ -({ \ - LOCK_INODE(reiser4_inode_data(inode)); \ - inode_invariant(inode); \ -}) +static inline int spin_inode_is_locked(const struct inode *inode) +{ + assert_spin_locked(&reiser4_inode_data(inode)->guard); + return 1; +} + +/** + * spin_lock_inode - lock reiser4_inode' embedded spinlock + * @inode: inode to lock + * + * In debug mode it checks that lower priority locks are not held and + * increments reiser4_context's lock counters on which lock ordering checking + * is based. + */ +static inline void spin_lock_inode(struct inode *inode) +{ + assert("", LOCK_CNT_NIL(spin_locked)); + /* check lock ordering */ + assert_spin_not_locked(&d_lock); + + spin_lock(&reiser4_inode_data(inode)->guard); + + LOCK_CNT_INC(spin_locked_inode); + LOCK_CNT_INC(spin_locked); + + inode_invariant(inode); +} + +/** + * spin_unlock_inode - unlock reiser4_inode' embedded spinlock + * @inode: inode to unlock + * + * In debug mode it checks that spinlock is held and decrements + * reiser4_context's lock counters on which lock ordering checking is based. + */ +static inline void spin_unlock_inode(struct inode *inode) +{ + assert_spin_locked(&reiser4_inode_data(inode)->guard); + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_inode)); + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); + + inode_invariant(inode); + + LOCK_CNT_DEC(spin_locked_inode); + LOCK_CNT_DEC(spin_locked); + + spin_unlock(&reiser4_inode_data(inode)->guard); +} -#define spin_unlock_inode(inode) \ -({ \ - inode_invariant(inode); \ - UNLOCK_INODE(reiser4_inode_data(inode)); \ -}) extern znode *inode_get_vroot(struct inode *inode); extern void inode_set_vroot(struct inode *inode, znode * vroot); diff -puN fs/reiser4/jnode.c~reiser4-spinlock-cleanup fs/reiser4/jnode.c --- devel/fs/reiser4/jnode.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/jnode.c 2006-02-16 14:17:05.000000000 -0800 @@ -239,8 +239,8 @@ void jnode_init(jnode * node, reiser4_tr jnode_set_type(node, type); atomic_set(&node->d_count, 0); atomic_set(&node->x_count, 0); - spin_jnode_init(node); - spin_jload_init(node); + spin_lock_init(&node->guard); + spin_lock_init(&node->load); node->atom = NULL; node->tree = tree; INIT_LIST_HEAD(&node->capture_link); @@ -413,11 +413,11 @@ jnode *jfind(struct address_space * mapp assert("vs-1694", mapping->host != NULL); tree = tree_by_inode(mapping->host); - RLOCK_TREE(tree); + read_lock_tree(tree); node = jfind_nolock(mapping, index); if (node != NULL) jref(node); - RUNLOCK_TREE(tree); + read_unlock_tree(tree); return node; } @@ -427,7 +427,7 @@ static void inode_attach_jnode(jnode * n reiser4_inode *info; struct radix_tree_root *rtree; - assert("nikita-34391", rw_tree_is_write_locked(jnode_get_tree(node))); + assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock)); assert("zam-1043", node->key.j.mapping != NULL); inode = node->key.j.mapping->host; info = reiser4_inode_data(inode); @@ -451,7 +451,7 @@ static void inode_detach_jnode(jnode * n reiser4_inode *info; struct radix_tree_root *rtree; - assert("nikita-34392", rw_tree_is_write_locked(jnode_get_tree(node))); + assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock)); assert("zam-1044", node->key.j.mapping != NULL); inode = node->key.j.mapping->host; info = reiser4_inode_data(inode); @@ -487,7 +487,7 @@ hash_unformatted_jnode(jnode * node, str assert("vs-1442", node->key.j.mapping == 0); assert("vs-1443", node->key.j.objectid == 0); assert("vs-1444", node->key.j.index == (unsigned long)-1); - assert("nikita-3439", rw_tree_is_write_locked(jnode_get_tree(node))); + assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock)); node->key.j.mapping = mapping; node->key.j.objectid = get_inode_oid(mapping->host); @@ -528,11 +528,10 @@ static void unhash_unformatted_node_nolo void unhash_unformatted_jnode(jnode * node) { assert("vs-1445", jnode_is_unformatted(node)); - WLOCK_TREE(node->tree); + write_lock_tree(node->tree); unhash_unformatted_node_nolock(node); - - WUNLOCK_TREE(node->tree); + write_unlock_tree(node->tree); } /* @@ -556,7 +555,7 @@ jnode *find_get_jnode(reiser4_tree * tre if (preload != 0) return ERR_PTR(preload); - WLOCK_TREE(tree); + write_lock_tree(tree); shadow = jfind_nolock(mapping, index); if (likely(shadow == NULL)) { /* add new jnode to hash table and inode's radix tree of jnodes */ @@ -569,7 +568,7 @@ jnode *find_get_jnode(reiser4_tree * tre assert("vs-1498", shadow->key.j.mapping == mapping); result = shadow; } - WUNLOCK_TREE(tree); + write_unlock_tree(tree); assert("nikita-2955", ergo(result != NULL, jnode_invariant(result, 0, 0))); @@ -610,7 +609,9 @@ static jnode *do_jget(reiser4_tree * tre /* check hash-table first */ result = jfind(pg->mapping, pg->index); if (unlikely(result != NULL)) { - UNDER_SPIN_VOID(jnode, result, jnode_attach_page(result, pg)); + spin_lock_jnode(result); + jnode_attach_page(result, pg); + spin_unlock_jnode(result); result->key.j.mapping = pg->mapping; return result; } @@ -619,7 +620,9 @@ static jnode *do_jget(reiser4_tree * tre if (unlikely(IS_ERR(result))) return result; /* attach jnode to page */ - UNDER_SPIN_VOID(jnode, result, jnode_attach_page(result, pg)); + spin_lock_jnode(result); + jnode_attach_page(result, pg); + spin_unlock_jnode(result); return result; } @@ -669,7 +672,7 @@ void jnode_attach_page(jnode * node, str assert("vs-1741", node->pg == NULL); assert("nikita-2396", PageLocked(pg)); - assert("nikita-2397", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); page_cache_get(pg); set_page_private(pg, (unsigned long)node); @@ -683,7 +686,7 @@ void page_clear_jnode(struct page *page, assert("nikita-2424", page != NULL); assert("nikita-2425", PageLocked(page)); assert("nikita-2426", node != NULL); - assert("nikita-2427", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); assert("nikita-2428", PagePrivate(page)); assert("nikita-3551", !PageWriteback(page)); @@ -708,8 +711,9 @@ page_detach_jnode(struct page *page, str jnode *node; node = jprivate(page); - assert("nikita-2399", spin_jnode_is_not_locked(node)); - UNDER_SPIN_VOID(jnode, node, page_clear_jnode(page, node)); + spin_lock_jnode(node); + page_clear_jnode(page, node); + spin_unlock_jnode(node); } unlock_page(page); } @@ -726,11 +730,11 @@ static struct page *jnode_lock_page(jnod struct page *page; assert("nikita-2052", node != NULL); - assert("nikita-2401", spin_jnode_is_not_locked(node)); + assert("nikita-2401", LOCK_CNT_NIL(spin_locked_jnode)); while (1) { - LOCK_JNODE(node); + spin_lock_jnode(node); page = jnode_page(node); if (page == NULL) { break; @@ -747,7 +751,7 @@ static struct page *jnode_lock_page(jnod /* Page is locked by someone else. */ page_cache_get(page); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); wait_on_page_locked(page); /* it is possible that page was detached from jnode and returned to the free pool, or re-assigned while we were @@ -771,14 +775,14 @@ static inline int jparse(jnode * node) assert("nikita-2466", node != NULL); - LOCK_JNODE(node); + spin_lock_jnode(node); if (likely(!jnode_is_parsed(node))) { result = jnode_ops(node)->parse(node); if (likely(result == 0)) JF_SET(node, JNODE_PARSED); } else result = 0; - UNLOCK_JNODE(node); + spin_unlock_jnode(node); return result; } @@ -788,30 +792,30 @@ struct page *jnode_get_page_locked(jnode { struct page *page; - LOCK_JNODE(node); + spin_lock_jnode(node); page = jnode_page(node); if (page == NULL) { - UNLOCK_JNODE(node); + spin_unlock_jnode(node); page = find_or_create_page(jnode_get_mapping(node), jnode_get_index(node), gfp_flags); if (page == NULL) return ERR_PTR(RETERR(-ENOMEM)); } else { if (!TestSetPageLocked(page)) { - UNLOCK_JNODE(node); + spin_unlock_jnode(node); return page; } page_cache_get(page); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); lock_page(page); assert("nikita-3134", page->mapping == jnode_get_mapping(node)); } - LOCK_JNODE(node); + spin_lock_jnode(node); if (!jnode_page(node)) jnode_attach_page(node, page); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); page_cache_release(page); assert("zam-894", jnode_page(node) == page); @@ -882,10 +886,10 @@ int jload_gfp(jnode * node /* node to lo * should be atomic, otherwise there is a race against * reiser4_releasepage(). */ - LOCK_JLOAD(node); + spin_lock(&(node->load)); add_d_ref(node); parsed = jnode_is_parsed(node); - UNLOCK_JLOAD(node); + spin_unlock(&(node->load)); if (unlikely(!parsed)) { page = jnode_get_page_locked(node, gfp_flags); @@ -921,8 +925,11 @@ int jload_gfp(jnode * node /* node to lo node->data = kmap(page); } - if (unlikely(JF_ISSET(node, JNODE_EFLUSH))) - UNDER_SPIN_VOID(jnode, node, eflush_del(node, 0)); + if (unlikely(JF_ISSET(node, JNODE_EFLUSH))) { + spin_lock_jnode(node); + eflush_del(node, 0); + spin_unlock_jnode(node); + } if (!is_writeout_mode()) /* We do not mark pages active if jload is called as a part of @@ -977,7 +984,9 @@ int jinit_new(jnode * node, int gfp_flag if (!jnode_is_parsed(node)) { jnode_plugin *jplug = jnode_ops(node); - result = UNDER_SPIN(jnode, node, jplug->init(node)); + spin_lock_jnode(node); + result = jplug->init(node); + spin_unlock_jnode(node); if (result) { kunmap(page); goto failed; @@ -1010,7 +1019,7 @@ void jrelse(jnode * node /* jnode to rel struct page *page; assert("nikita-487", node != NULL); - assert("nikita-1906", spin_jnode_is_not_locked(node)); + assert_spin_not_locked(&(node->guard)); page = jnode_page(node); if (likely(page != NULL)) { @@ -1035,15 +1044,15 @@ static void jnode_finish_io(jnode * node assert("nikita-2922", node != NULL); - LOCK_JNODE(node); + spin_lock_jnode(node); page = jnode_page(node); if (page != NULL) { page_cache_get(page); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); wait_on_page_writeback(page); page_cache_release(page); } else - UNLOCK_JNODE(node); + spin_unlock_jnode(node); } /* @@ -1216,7 +1225,7 @@ static unsigned long index_is_address(co } /* resolve race with jput */ -jnode *jnode_rip_sync(reiser4_tree * t, jnode * node) +jnode *jnode_rip_sync(reiser4_tree *tree, jnode *node) { /* * This is used as part of RCU-based jnode handling. @@ -1239,12 +1248,12 @@ jnode *jnode_rip_sync(reiser4_tree * t, * jnode. */ if (unlikely(JF_ISSET(node, JNODE_RIP))) { - RLOCK_TREE(t); + read_lock_tree(tree); if (JF_ISSET(node, JNODE_RIP)) { dec_x_ref(node); node = NULL; } - RUNLOCK_TREE(t); + read_unlock_tree(tree); } return node; } @@ -1293,7 +1302,7 @@ static void delete_znode(jnode * node, r { znode *z; - assert("nikita-2128", rw_tree_is_write_locked(tree)); + assert_rw_write_locked(&(tree->tree_lock)); assert("vs-898", JF_ISSET(node, JNODE_HEARD_BANSHEE)); z = JZNODE(node); @@ -1310,7 +1319,7 @@ static int remove_znode(jnode * node, re { znode *z; - assert("nikita-2128", rw_tree_is_locked(tree)); + assert_rw_write_locked(&(tree->tree_lock)); z = JZNODE(node); if (z->c_count == 0) { @@ -1577,15 +1586,15 @@ static int jnode_try_drop(jnode * node) tree = jnode_get_tree(node); jtype = jnode_get_type(node); - LOCK_JNODE(node); - WLOCK_TREE(tree); + spin_lock_jnode(node); + write_lock_tree(tree); /* * if jnode has a page---leave it alone. Memory pressure will * eventually kill page and jnode. */ if (jnode_page(node) != NULL) { - UNLOCK_JNODE(node); - WUNLOCK_TREE(tree); + write_unlock_tree(tree); + spin_unlock_jnode(node); JF_CLR(node, JNODE_RIP); return RETERR(-EBUSY); } @@ -1597,16 +1606,16 @@ static int jnode_try_drop(jnode * node) assert("nikita-3223", !JF_ISSET(node, JNODE_EFLUSH)); assert("jmacd-511/b", atomic_read(&node->d_count) == 0); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); /* no page and no references---despatch him. */ jnode_remove(node, jtype, tree); - WUNLOCK_TREE(tree); + write_unlock_tree(tree); jnode_free(node, jtype); } else { /* busy check failed: reference was acquired by concurrent * thread. */ - WUNLOCK_TREE(tree); - UNLOCK_JNODE(node); + write_unlock_tree(tree); + spin_unlock_jnode(node); JF_CLR(node, JNODE_RIP); } return result; @@ -1629,11 +1638,11 @@ static int jdelete(jnode * node /* jnode jtype = jnode_get_type(node); page = jnode_lock_page(node); - assert("nikita-2402", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); tree = jnode_get_tree(node); - WLOCK_TREE(tree); + write_lock_tree(tree); /* re-check ->x_count under tree lock. */ result = jnode_is_busy(node, jtype); if (likely(!result)) { @@ -1647,10 +1656,10 @@ static int jdelete(jnode * node /* jnode */ page_clear_jnode(page, node); } - UNLOCK_JNODE(node); + spin_unlock_jnode(node); /* goodbye */ jnode_delete(node, jtype, tree); - WUNLOCK_TREE(tree); + write_unlock_tree(tree); jnode_free(node, jtype); /* @node is no longer valid pointer */ if (page != NULL) @@ -1659,8 +1668,8 @@ static int jdelete(jnode * node /* jnode /* busy check failed: reference was acquired by concurrent * thread. */ JF_CLR(node, JNODE_RIP); - WUNLOCK_TREE(tree); - UNLOCK_JNODE(node); + write_unlock_tree(tree); + spin_unlock_jnode(node); if (page != NULL) unlock_page(page); } @@ -1683,16 +1692,16 @@ static int jdrop_in_tree(jnode * node, r int result; assert("zam-602", node != NULL); - assert("nikita-2362", rw_tree_is_not_locked(tree)); + assert_rw_not_read_locked(&(tree->tree_lock)); + assert_rw_not_write_locked(&(tree->tree_lock)); assert("nikita-2403", !JF_ISSET(node, JNODE_HEARD_BANSHEE)); - // assert( "nikita-2532", JF_ISSET( node, JNODE_RIP ) ); jtype = jnode_get_type(node); page = jnode_lock_page(node); - assert("nikita-2405", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); - WLOCK_TREE(tree); + write_lock_tree(tree); /* re-check ->x_count under tree lock. */ result = jnode_is_busy(node, jtype); @@ -1705,9 +1714,9 @@ static int jdrop_in_tree(jnode * node, r assert("nikita-2181", PageLocked(page)); page_clear_jnode(page, node); } - UNLOCK_JNODE(node); + spin_unlock_jnode(node); jnode_remove(node, jtype, tree); - WUNLOCK_TREE(tree); + write_unlock_tree(tree); jnode_free(node, jtype); if (page != NULL) { drop_page(page); @@ -1716,8 +1725,8 @@ static int jdrop_in_tree(jnode * node, r /* busy check failed: reference was acquired by concurrent * thread. */ JF_CLR(node, JNODE_RIP); - WUNLOCK_TREE(tree); - UNLOCK_JNODE(node); + write_unlock_tree(tree); + spin_unlock_jnode(node); if (page != NULL) unlock_page(page); } @@ -1796,7 +1805,7 @@ int jnode_invariant_f(const jnode * node _check(node->jnodes.next != NULL) && /* [jnode-dirty] invariant */ /* dirty inode is part of atom */ - _ergo(jnode_is_dirty(node), node->atom != NULL) && + _ergo(JF_ISSET(node, JNODE_DIRTY), node->atom != NULL) && /* [jnode-oid] invariant */ /* for unformatted node ->objectid and ->mapping fields are * consistent */ @@ -1828,18 +1837,18 @@ static int jnode_invariant(const jnode * assert("umka-064321", tree != NULL); if (!jlocked && !tlocked) - LOCK_JNODE((jnode *) node); + spin_lock_jnode((jnode *) node); if (!tlocked) - RLOCK_TREE(jnode_get_tree(node)); + read_lock_tree(jnode_get_tree(node)); result = jnode_invariant_f(node, &failed_msg); if (!result) { info_jnode("corrupted node", node); warning("jmacd-555", "Condition %s failed", failed_msg); } if (!tlocked) - RUNLOCK_TREE(jnode_get_tree(node)); + read_unlock_tree(jnode_get_tree(node)); if (!jlocked && !tlocked) - UNLOCK_JNODE((jnode *) node); + spin_unlock_jnode((jnode *) node); return result; } @@ -1915,15 +1924,6 @@ void info_jnode(const char *prefix /* pr } } -/* debugging aid: output human readable information about @node */ -void print_jnode(const char *prefix /* prefix to print */ , - const jnode * node /* node to print */ ) -{ - if (jnode_is_znode(node)) - print_znode(prefix, JZNODE(node)); - else - info_jnode(prefix, node); -} #endif /* REISER4_DEBUG */ diff -puN fs/reiser4/jnode.h~reiser4-spinlock-cleanup fs/reiser4/jnode.h --- devel/fs/reiser4/jnode.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/jnode.h 2006-02-16 14:17:05.000000000 -0800 @@ -12,7 +12,6 @@ #include "key.h" #include "debug.h" #include "dformat.h" -#include "spin_macros.h" #include "emergency_flush.h" #include "plugin/plugin.h" @@ -107,7 +106,7 @@ struct jnode { /* 0 */ unsigned long state; /* lock, protecting jnode's fields. */ - /* 4 */ reiser4_spin_data load; + /* 4 */ spinlock_t load; /* counter of references to jnode itself. Increased on jref(). Decreased on jput(). @@ -148,7 +147,7 @@ struct jnode { /* FOURTH CACHE LINE: atom related fields */ - /* 48 */ reiser4_spin_data guard; + /* 48 */ spinlock_t guard; /* atom the block is in, if any */ /* 52 */ txn_atom *atom; @@ -324,28 +323,32 @@ static inline int JF_TEST_AND_SET(jnode return test_and_set_bit(f, &j->state); } -/* ordering constraint for znode spin lock: znode lock is weaker than - tree lock and dk lock */ -#define spin_ordering_pred_jnode( node ) \ - ( ( lock_counters() -> rw_locked_tree == 0 ) && \ - ( lock_counters() -> spin_locked_txnh == 0 ) && \ - ( lock_counters() -> rw_locked_zlock == 0 ) && \ - ( lock_counters() -> rw_locked_dk == 0 ) && \ - /* \ - in addition you cannot hold more than one jnode spin lock at a \ - time. \ - */ \ - ( lock_counters() -> spin_locked_jnode < 2 ) ) - -/* Define spin_lock_jnode, spin_unlock_jnode, and spin_jnode_is_locked. - Take and release short-term spinlocks. Don't hold these across - io. -*/ -SPIN_LOCK_FUNCTIONS(jnode, jnode, guard); +static inline void spin_lock_jnode(jnode *node) +{ + /* check that spinlocks of lower priorities are not held */ + assert("", (LOCK_CNT_NIL(rw_locked_tree) && + LOCK_CNT_NIL(spin_locked_txnh) && + LOCK_CNT_NIL(rw_locked_zlock) && + LOCK_CNT_NIL(rw_locked_dk) && + LOCK_CNT_LT(spin_locked_jnode, 2))); + + spin_lock(&(node->guard)); + + LOCK_CNT_INC(spin_locked_jnode); + LOCK_CNT_INC(spin_locked); +} + +static inline void spin_unlock_jnode(jnode *node) +{ + assert_spin_locked(&(node->guard)); + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_jnode)); + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); -#define spin_ordering_pred_jload(node) (1) + LOCK_CNT_DEC(spin_locked_jnode); + LOCK_CNT_DEC(spin_locked); -SPIN_LOCK_FUNCTIONS(jload, jnode, load); + spin_unlock(&(node->guard)); +} static inline int jnode_is_in_deleteset(const jnode * node) { @@ -398,10 +401,10 @@ static inline const reiser4_block_nr *jn /* block number for IO. Usually this is the same as jnode_get_block(), unless * jnode was emergency flushed---then block number chosen by eflush is * used. */ -static inline const reiser4_block_nr *jnode_get_io_block(const jnode * node) +static inline const reiser4_block_nr *jnode_get_io_block(jnode * node) { assert("nikita-2768", node != NULL); - assert("nikita-2769", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); if (unlikely(JF_ISSET(node, JNODE_EFLUSH))) return eflush_get(node); @@ -447,13 +450,11 @@ extern int jnodes_tree_done(reiser4_tree extern int znode_is_any_locked(const znode * node); extern void jnode_list_remove(jnode * node); extern void info_jnode(const char *prefix, const jnode * node); -extern void print_jnode(const char *prefix, const jnode * node); #else #define jnode_list_remove(node) noop #define info_jnode(p, n) noop -#define print_jnode(p, n) noop #endif @@ -582,29 +583,12 @@ static inline int jnode_is_znode(const j return jnode_get_type(node) == JNODE_FORMATTED_BLOCK; } -/* return true if "node" is dirty */ -static inline int jnode_is_dirty(const jnode * node) -{ - assert("nikita-782", node != NULL); - assert("jmacd-1800", spin_jnode_is_locked(node) - || (jnode_is_znode(node) && znode_is_any_locked(JZNODE(node)))); - return JF_ISSET(node, JNODE_DIRTY); -} - -/* return true if "node" is dirty, node is unlocked */ -static inline int jnode_check_dirty(jnode * node) -{ - assert("jmacd-7798", node != NULL); - assert("jmacd-7799", spin_jnode_is_not_locked(node)); - return UNDER_SPIN(jnode, node, jnode_is_dirty(node)); -} - -static inline int jnode_is_flushprepped(const jnode * node) +static inline int jnode_is_flushprepped(jnode * node) { assert("jmacd-78212", node != NULL); - assert("jmacd-71276", spin_jnode_is_locked(node)); - return !jnode_is_dirty(node) || JF_ISSET(node, JNODE_RELOC) - || JF_ISSET(node, JNODE_OVRWR); + assert_spin_locked(&(node->guard)); + return !JF_ISSET(node, JNODE_DIRTY) || JF_ISSET(node, JNODE_RELOC) || + JF_ISSET(node, JNODE_OVRWR); } /* Return true if @node has already been processed by the squeeze and allocate @@ -613,9 +597,13 @@ static inline int jnode_is_flushprepped( returns true you may use the block number as a hint. */ static inline int jnode_check_flushprepped(jnode * node) { + int result; + /* It must be clean or relocated or wandered. New allocations are set to relocate. */ - assert("jmacd-71275", spin_jnode_is_not_locked(node)); - return UNDER_SPIN(jnode, node, jnode_is_flushprepped(node)); + spin_lock_jnode(node); + result = jnode_is_flushprepped(node); + spin_unlock_jnode(node); + return result; } /* returns true if node is unformatted */ @@ -691,7 +679,6 @@ static inline void jput(jnode * node) { assert("jmacd-509", node != NULL); assert("jmacd-510", atomic_read(&node->x_count) > 0); - assert("nikita-3065", spin_jnode_is_not_locked(node)); assert("zam-926", schedulable()); LOCK_CNT_DEC(x_refs); diff -puN fs/reiser4/ktxnmgrd.h~reiser4-spinlock-cleanup fs/reiser4/ktxnmgrd.h --- devel/fs/reiser4/ktxnmgrd.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/ktxnmgrd.h 2006-02-16 14:17:05.000000000 -0800 @@ -7,7 +7,6 @@ #define __KTXNMGRD_H__ #include "txnmgr.h" -#include "spin_macros.h" #include #include diff -puN fs/reiser4/lock.c~reiser4-spinlock-cleanup fs/reiser4/lock.c --- devel/fs/reiser4/lock.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/lock.c 2006-02-16 14:17:05.000000000 -0800 @@ -86,8 +86,12 @@ # # ############################################# - Note that a low-priority process - delays node releasing if another high-priority process owns this node. So, slightly more strictly speaking, to have a deadlock capable cycle you must have a loop in which a high priority process is waiting on a low priority process to yield a node, which is slightly different from saying a high priority process is waiting on a node owned by a low priority process. + Note that a low-priority process delays node releasing if another + high-priority process owns this node. So, slightly more strictly speaking, + to have a deadlock capable cycle you must have a loop in which a high + priority process is waiting on a low priority process to yield a node, which + is slightly different from saying a high priority process is waiting on a + node owned by a low priority process. It is enough to avoid deadlocks if we prevent any low-priority process from falling asleep if its locked set contains a node which satisfies the @@ -109,13 +113,17 @@ V4 LOCKING DRAWBACKS - If we have already balanced on one level, and we are propagating our changes upward to a higher level, it could be - very messy to surrender all locks on the lower level because we put so much computational work into it, and reverting - them to their state before they were locked might be very complex. We also don't want to acquire all locks before - performing balancing because that would either be almost as much work as the balancing, or it would be too - conservative and lock too much. We want balancing to be done only at high priority. Yet, we might want to go to the - left one node and use some of its empty space... So we make one attempt at getting the node to the left using - try_lock, and if it fails we do without it, because we didn't really need it, it was only a nice to have. + If we have already balanced on one level, and we are propagating our changes + upward to a higher level, it could be very messy to surrender all locks on + the lower level because we put so much computational work into it, and + reverting them to their state before they were locked might be very complex. + We also don't want to acquire all locks before performing balancing because + that would either be almost as much work as the balancing, or it would be + too conservative and lock too much. We want balancing to be done only at + high priority. Yet, we might want to go to the left one node and use some + of its empty space... So we make one attempt at getting the node to the left + using try_lock, and if it fails we do without it, because we didn't really + need it, it was only a nice to have. LOCK STRUCTURES DESCRIPTION @@ -148,14 +156,18 @@ | Z1 | | Z2 | | Z3 | +---------+ +---------+ +---------+ - Thread 1 locked znodes Z1 and Z2, thread 2 locked znodes Z2 and Z3. The picture above shows that lock stack LS1 has a - list of 2 lock handles LH1 and LH2, lock stack LS2 has a list with lock handles LH3 and LH4 on it. Znode Z1 is - locked by only one thread, znode has only one lock handle LH1 on its list, similar situation is for Z3 which is - locked by the thread 2 only. Z2 is locked (for read) twice by different threads and two lock handles are on its - list. Each lock handle represents a single relation of a locking of a znode by a thread. Locking of a znode is an - establishing of a locking relation between the lock stack and the znode by adding of a new lock handle to a list of - lock handles, the lock stack. The lock stack links all lock handles for all znodes locked by the lock stack. The znode - list groups all lock handles for all locks stacks which locked the znode. + Thread 1 locked znodes Z1 and Z2, thread 2 locked znodes Z2 and Z3. The + picture above shows that lock stack LS1 has a list of 2 lock handles LH1 and + LH2, lock stack LS2 has a list with lock handles LH3 and LH4 on it. Znode + Z1 is locked by only one thread, znode has only one lock handle LH1 on its + list, similar situation is for Z3 which is locked by the thread 2 only. Z2 + is locked (for read) twice by different threads and two lock handles are on + its list. Each lock handle represents a single relation of a locking of a + znode by a thread. Locking of a znode is an establishing of a locking + relation between the lock stack and the znode by adding of a new lock handle + to a list of lock handles, the lock stack. The lock stack links all lock + handles for all znodes locked by the lock stack. The znode list groups all + lock handles for all locks stacks which locked the znode. Yet another relation may exist between znode and lock owners. If lock procedure cannot immediately take lock on an object it adds the lock owner @@ -231,7 +243,7 @@ static void wake_up_all_lopri_owners(zno { lock_handle *handle; - assert("nikita-1824", rw_zlock_is_locked(&node->lock)); + assert_rw_locked(&(node->lock.guard)); list_for_each_entry(handle, &node->lock.owners, owners_link) { spin_lock_stack(handle->owner); @@ -257,8 +269,7 @@ static inline void link_object(lock_handle * handle, lock_stack * owner, znode * node) { assert("jmacd-810", handle->owner == NULL); - assert("nikita-1828", owner == get_current_lock_stack()); - assert("nikita-1830", rw_zlock_is_locked(&node->lock)); + assert_rw_locked(&(node->lock.guard)); handle->owner = owner; handle->node = node; @@ -280,7 +291,7 @@ static inline void unlink_object(lock_ha { assert("zam-354", handle->owner != NULL); assert("nikita-1608", handle->node != NULL); - assert("nikita-1633", rw_zlock_is_locked(&handle->node->lock)); + assert_rw_locked(&(handle->node->lock.guard)); assert("nikita-1829", handle->owner == get_current_lock_stack()); assert("reiser4-5", handle->owner->nr_locks > 0); @@ -303,11 +314,10 @@ static void lock_object(lock_stack * own { lock_request *request; znode *node; - assert("nikita-1839", owner == get_current_lock_stack()); request = &owner->request; node = request->node; - assert("nikita-1834", rw_zlock_is_locked(&node->lock)); + assert_rw_locked(&(node->lock.guard)); if (request->mode == ZNODE_READ_LOCK) { node->lock.nr_readers++; } else { @@ -337,7 +347,7 @@ static int recursive(lock_stack * owner) /* Owners list is not empty for a locked node */ assert("zam-314", !list_empty_careful(&node->lock.owners)); assert("nikita-1841", owner == get_current_lock_stack()); - assert("nikita-1848", rw_zlock_is_locked(&node->lock)); + assert_rw_locked(&(node->lock.guard)); lh = list_entry(node->lock.owners.next, lock_handle, owners_link); @@ -420,35 +430,39 @@ int znode_is_write_locked(const znode * */ static inline int check_deadlock_condition(znode * node) { - assert("nikita-1833", rw_zlock_is_locked(&node->lock)); + assert_rw_locked(&(node->lock.guard)); return node->lock.nr_hipri_requests > 0 && node->lock.nr_hipri_owners == 0; } +static int check_livelock_condition(znode * node, znode_lock_mode mode) +{ + zlock * lock = &node->lock; + + return mode == ZNODE_READ_LOCK && + lock -> nr_readers >= 0 && lock->nr_hipri_write_requests > 0; +} + /* checks lock/request compatibility */ static int can_lock_object(lock_stack * owner) { znode *node = owner->request.node; - assert("nikita-1842", owner == get_current_lock_stack()); - assert("nikita-1843", rw_zlock_is_locked(&node->lock)); + assert_rw_locked(&(node->lock.guard)); /* See if the node is disconnected. */ - if (unlikely(ZF_ISSET(node, JNODE_IS_DYING))) { + if (unlikely(ZF_ISSET(node, JNODE_IS_DYING))) return RETERR(-EINVAL); - } /* Do not ever try to take a lock if we are going in low priority direction and a node have a high priority request without high priority owners. */ - if (unlikely(!owner->curpri && check_deadlock_condition(node))) { + if (unlikely(!owner->curpri && check_deadlock_condition(node))) return RETERR(-E_REPEAT); - } - - if (unlikely(!is_lock_compatible(node, owner->request.mode))) { + if (unlikely(owner->curpri && check_livelock_condition(node, owner->request.mode))) + return RETERR(-E_REPEAT); + if (unlikely(!is_lock_compatible(node, owner->request.mode))) return RETERR(-E_REPEAT); - } - return 0; } @@ -471,7 +485,7 @@ static void set_high_priority(lock_stack while (&owner->locks != &item->locks_link) { znode *node = item->node; - WLOCK_ZLOCK(&node->lock); + write_lock_zlock(&node->lock); node->lock.nr_hipri_owners++; @@ -479,7 +493,7 @@ static void set_high_priority(lock_stack previous statement (nr_hipri_owners ++) guarantees that signaled will be never set again. */ item->signaled = 0; - WUNLOCK_ZLOCK(&node->lock); + write_unlock_zlock(&node->lock); item = list_entry(item->locks_link.next, lock_handle, locks_link); } @@ -501,7 +515,7 @@ static void set_low_priority(lock_stack lock_handle *handle = list_entry(owner->locks.next, lock_handle, locks_link); while (&owner->locks != &handle->locks_link) { znode *node = handle->node; - WLOCK_ZLOCK(&node->lock); + write_lock_zlock(&node->lock); /* this thread just was hipri owner of @node, so nr_hipri_owners has to be greater than zero. */ assert("nikita-1835", node->lock.nr_hipri_owners > 0); @@ -517,115 +531,59 @@ static void set_low_priority(lock_stack handle->signaled = 1; atomic_inc(&owner->nr_signaled); } - WUNLOCK_ZLOCK(&node->lock); + write_unlock_zlock(&node->lock); handle = list_entry(handle->locks_link.next, lock_handle, locks_link); } owner->curpri = 0; } } -#define MAX_CONVOY_SIZE ((NR_CPUS - 1)) - -/* helper function used by longterm_unlock_znode() to wake up requestor(s). */ -/* - * In certain multi threaded work loads jnode spin lock is the most - * contented one. Wake up of threads waiting for znode is, thus, - * important to do right. There are three well known strategies: - * - * (1) direct hand-off. Hasn't been tried. - * - * (2) wake all (thundering herd). This degrades performance in our - * case. - * - * (3) wake one. Simplest solution where requestor in the front of - * requestors list is awaken under znode spin lock is not very - * good on the SMP, because first thing requestor will try to do - * after waking up on another CPU is to acquire znode spin lock - * that is still held by this thread. As an optimization we grab - * lock stack spin lock, release znode spin lock and wake - * requestor. done_context() synchronize against stack spin lock - * to avoid (impossible) case where requestor has been waked by - * some other thread (wake_up_all_lopri_owners(), or something - * similar) and managed to exit before we waked it up. - * - * Effect of this optimization wasn't big, after all. - * - */ -static void wake_up_requestor(znode * node) +static void remove_lock_request(lock_stack * requestor) { -#if NR_CPUS > 2 - struct list_head *creditors; - lock_stack *convoy[MAX_CONVOY_SIZE]; - int convoyused; - int convoylimit; - - assert("nikita-3180", node != NULL); - assert("nikita-3181", rw_zlock_is_locked(&node->lock)); - - convoyused = 0; - convoylimit = min(num_online_cpus() - 1, MAX_CONVOY_SIZE); - creditors = &node->lock.requestors; - if (!list_empty_careful(creditors)) { - convoy[0] = list_entry(creditors->next, lock_stack, requestors_link); - convoyused = 1; - /* - * it has been verified experimentally, that there are no - * convoys on the leaf level. - */ - if (znode_get_level(node) != LEAF_LEVEL && - convoy[0]->request.mode == ZNODE_READ_LOCK && - convoylimit > 1) { - lock_stack *item; - - for (item = list_entry(convoy[0]->requestors_link.next, lock_stack, requestors_link); - creditors != &item->requestors_link; - item = list_entry(item->requestors_link.next, lock_stack, requestors_link)) { - if (item->request.mode == ZNODE_READ_LOCK) { - convoy[convoyused] = item; - ++convoyused; - /* - * it is safe to spin lock multiple - * lock stacks here, because lock - * stack cannot sleep on more than one - * requestors queue. - */ - /* - * use raw spin_lock in stead of macro - * wrappers, because spin lock - * profiling code cannot cope with so - * many locks held at the same time. - */ - spin_lock(&item->sguard.lock); - if (convoyused == convoylimit) - break; - } - } - } - spin_lock(&convoy[0]->sguard.lock); + zlock * lock = &requestor->request.node->lock; + + if (requestor->curpri) { + assert("nikita-1838", lock->nr_hipri_requests > 0); + lock->nr_hipri_requests--; + if (requestor->request.mode == ZNODE_WRITE_LOCK) + lock->nr_hipri_write_requests --; } + list_del_init(&requestor->requestors_link); +} - WUNLOCK_ZLOCK(&node->lock); - while (convoyused > 0) { - --convoyused; - __reiser4_wake_up(convoy[convoyused]); - spin_unlock(&convoy[convoyused]->sguard.lock); - } -#else - /* uniprocessor case: keep it simple */ - if (!list_empty_careful(&node->lock.requestors)) { - lock_stack *requestor; +static void invalidate_all_lock_requests(znode * node) +{ + lock_stack *requestor, *tmp; + + assert_rw_write_locked(&(node->lock.guard)); - requestor = list_entry(node->lock.requestors.next, lock_stack, - requestors_link); + list_for_each_entry_safe(requestor, tmp, &node->lock.requestors, requestors_link) { + remove_lock_request(requestor); + requestor->request.ret_code = -EINVAL; reiser4_wake_up(requestor); + requestor->request.mode = ZNODE_NO_LOCK; } - - WUNLOCK_ZLOCK(&node->lock); -#endif } -#undef MAX_CONVOY_SIZE +static void dispatch_lock_requests(znode * node) +{ + lock_stack *requestor, *tmp; + + assert_rw_write_locked(&(node->lock.guard)); + + list_for_each_entry_safe(requestor, tmp, &node->lock.requestors, requestors_link) { + if (znode_is_write_locked(node)) + break; + if (!can_lock_object(requestor)) { + lock_object(requestor); + remove_lock_request(requestor); + requestor->request.ret_code = 0; + reiser4_wake_up(requestor); + requestor->request.mode = ZNODE_NO_LOCK; + } + } +} /* release long-term lock, acquired by longterm_lock_znode() */ void longterm_unlock_znode(lock_handle * handle) @@ -664,7 +622,7 @@ void longterm_unlock_znode(lock_handle * /* true if node is to die and write lock is released */ youdie = ZF_ISSET(node, JNODE_HEARD_BANSHEE) && (readers < 0); - WLOCK_ZLOCK(&node->lock); + write_lock_zlock(&node->lock); assert("zam-101", znode_is_locked(node)); @@ -704,11 +662,11 @@ void longterm_unlock_znode(lock_handle * /* If there are pending lock requests we wake up a requestor */ if (!znode_is_wlocked(node)) - wake_up_requestor(node); - else - WUNLOCK_ZLOCK(&node->lock); + dispatch_lock_requests(node); + if (check_deadlock_condition(node)) + wake_up_all_lopri_owners(node); + write_unlock_zlock(&node->lock); - assert("nikita-3182", rw_zlock_is_not_locked(&node->lock)); /* minus one reference from handle->node */ handle->node = NULL; assert("nikita-2190", znode_invariant(node)); @@ -719,26 +677,16 @@ void longterm_unlock_znode(lock_handle * /* final portion of longterm-lock */ static int -lock_tail(lock_stack * owner, int wake_up_next, int ok, znode_lock_mode mode) +lock_tail(lock_stack * owner, int ok, znode_lock_mode mode) { znode *node = owner->request.node; - assert("jmacd-807", rw_zlock_is_locked(&node->lock)); + assert_rw_write_locked(&(node->lock.guard)); /* If we broke with (ok == 0) it means we can_lock, now do it. */ if (ok == 0) { lock_object(owner); owner->request.mode = 0; - if (mode == ZNODE_READ_LOCK) - wake_up_next = 1; - } - - if (wake_up_next) - wake_up_requestor(node); - else - WUNLOCK_ZLOCK(&node->lock); - - if (ok == 0) { /* count a reference from lockhandle->node znode was already referenced at the entry to this function, @@ -749,7 +697,7 @@ lock_tail(lock_stack * owner, int wake_u LOCK_CNT_INC(long_term_locked_znode); } - + write_unlock_zlock(&node->lock); ON_DEBUG(check_lock_data()); ON_DEBUG(check_lock_node_data(node)); return ok; @@ -763,7 +711,6 @@ lock_tail(lock_stack * owner, int wake_u static int longterm_lock_tryfast(lock_stack * owner) { int result; - int wake_up_next = 0; znode *node; zlock *lock; @@ -774,8 +721,9 @@ static int longterm_lock_tryfast(lock_st assert("nikita-3341", request_is_deadlock_safe(node, ZNODE_READ_LOCK, ZNODE_LOCK_LOPRI)); - - result = UNDER_RW(zlock, lock, read, can_lock_object(owner)); + read_lock_zlock(lock); + result = can_lock_object(owner); + read_unlock_zlock(lock); if (likely(result != -EINVAL)) { spin_lock_znode(node); @@ -783,19 +731,18 @@ static int longterm_lock_tryfast(lock_st try_capture(ZJNODE(node), ZNODE_READ_LOCK, 0, 1 /* can copy on capture */ ); spin_unlock_znode(node); - WLOCK_ZLOCK(lock); + write_lock_zlock(lock); if (unlikely(result != 0)) { owner->request.mode = 0; - wake_up_next = 1; } else { result = can_lock_object(owner); if (unlikely(result == -E_REPEAT)) { /* fall back to longterm_lock_znode() */ - WUNLOCK_ZLOCK(lock); + write_unlock_zlock(lock); return 1; } } - return lock_tail(owner, wake_up_next, result, ZNODE_READ_LOCK); + return lock_tail(owner, result, ZNODE_READ_LOCK); } else return 1; } @@ -813,7 +760,6 @@ int longterm_lock_znode( znode_lock_request request) { int ret; int hipri = (request & ZNODE_LOCK_HIPRI) != 0; - int wake_up_next = 0; int non_blocking = 0; int has_atom; txn_capture cap_flags; @@ -829,6 +775,7 @@ int longterm_lock_znode( assert("jmacd-808", handle->owner == NULL); assert("nikita-3026", schedulable()); assert("nikita-3219", request_is_deadlock_safe(node, mode, request)); + assert("zam-1056", atomic_read(&ZJNODE(node)->x_count) > 0); /* long term locks are not allowed in the VM contexts (->writepage(), * prune_{d,i}cache()). * @@ -836,6 +783,7 @@ int longterm_lock_znode( * bug caused by d_splice_alias() only working for directories. */ assert("nikita-3547", 1 || ((current->flags & PF_MEMALLOC) == 0)); + assert ("zam-1055", mode != ZNODE_NO_LOCK); cap_flags = 0; if (request & ZNODE_LOCK_NONBLOCK) { @@ -873,11 +821,11 @@ int longterm_lock_znode( has_atom = (txnh->atom != NULL); /* Synchronize on node's zlock guard lock. */ - WLOCK_ZLOCK(lock); + write_lock_zlock(lock); if (znode_is_locked(node) && mode == ZNODE_WRITE_LOCK && recursive(owner)) - return lock_tail(owner, 0, 0, mode); + return lock_tail(owner, 0, mode); for (;;) { /* Check the lock's availability: if it is unavaiable we get @@ -887,8 +835,6 @@ int longterm_lock_znode( if (unlikely(ret == -EINVAL)) { /* @node is dying. Leave it alone. */ - /* wakeup next requestor to support lock invalidating */ - wake_up_next = 1; break; } @@ -967,13 +913,13 @@ int longterm_lock_znode( * JNODE_IS_DYING and this will be noted by * can_lock_object() below. */ - WUNLOCK_ZLOCK(lock); + write_unlock_zlock(lock); spin_lock_znode(node); ret = try_capture(ZJNODE(node), mode, cap_flags, 1 /* can copy on capture */ ); spin_unlock_znode(node); - WLOCK_ZLOCK(lock); + write_lock_zlock(lock); if (unlikely(ret != 0)) { /* In the failure case, the txnmgr releases the znode's lock (or in some cases, it was @@ -981,8 +927,6 @@ int longterm_lock_znode( reacquire it so we should return here, avoid releasing the lock. */ owner->request.mode = 0; - /* next requestor may not fail */ - wake_up_next = 1; break; } @@ -1008,45 +952,43 @@ int longterm_lock_znode( break; } - assert("nikita-1837", rw_zlock_is_locked(&node->lock)); + assert_rw_locked(&(node->lock.guard)); if (hipri) { /* If we are going in high priority direction then increase high priority requests counter for the node */ lock->nr_hipri_requests++; + if (mode == ZNODE_WRITE_LOCK) + lock->nr_hipri_write_requests ++; /* If there are no high priority owners for a node, then immediately wake up low priority owners, so they can detect possible deadlock */ if (lock->nr_hipri_owners == 0) wake_up_all_lopri_owners(node); - /* And prepare a lock request */ - list_add(&owner->requestors_link, &lock->requestors); - } else { - /* If we are going in low priority direction then we - set low priority to our process. This is the only - case when a process may become low priority */ - /* And finally prepare a lock request */ - list_add_tail(&owner->requestors_link, &lock->requestors); } + list_add_tail(&owner->requestors_link, &lock->requestors); /* Ok, here we have prepared a lock request, so unlock a znode ... */ - WUNLOCK_ZLOCK(lock); + write_unlock_zlock(lock); /* ... and sleep */ go_to_sleep(owner); - - WLOCK_ZLOCK(lock); - - if (hipri) { - assert("nikita-1838", lock->nr_hipri_requests > 0); - lock->nr_hipri_requests--; + if (owner->request.mode == ZNODE_NO_LOCK) + goto request_is_done; + write_lock_zlock(lock); + if (owner->request.mode == ZNODE_NO_LOCK) { + write_unlock_zlock(lock); + request_is_done: + if (owner->request.ret_code == 0) { + LOCK_CNT_INC(long_term_locked_znode); + zref(node); + } + return owner->request.ret_code; } - - list_del_init(&owner->requestors_link); + remove_lock_request(owner); } - assert("jmacd-807/a", rw_zlock_is_locked(&node->lock)); - return lock_tail(owner, wake_up_next, ret, mode); + return lock_tail(owner, ret, mode); } /* lock object invalidation means changing of lock object state to `INVALID' @@ -1058,7 +1000,6 @@ void invalidate_lock(lock_handle * handl { znode *node = handle->node; lock_stack *owner = handle->owner; - lock_stack *rq; assert("zam-325", owner == get_current_lock_stack()); assert("zam-103", znode_is_write_locked(node)); @@ -1066,7 +1007,7 @@ void invalidate_lock(lock_handle * handl assert("nikita-1793", !ZF_ISSET(node, JNODE_RIGHT_CONNECTED)); assert("nikita-1394", ZF_ISSET(node, JNODE_HEARD_BANSHEE)); assert("nikita-3097", znode_is_wlocked_once(node)); - assert("nikita-3338", rw_zlock_is_locked(&node->lock)); + assert_rw_locked(&(node->lock.guard)); if (handle->signaled) atomic_dec(&owner->nr_signaled); @@ -1075,25 +1016,8 @@ void invalidate_lock(lock_handle * handl unlink_object(handle); node->lock.nr_readers = 0; - /* all requestors will be informed that lock is invalidated. */ - list_for_each_entry(rq, &node->lock.requestors, requestors_link) - reiser4_wake_up(rq); - - /* We use that each unlock() will wakeup first item from requestors - list; our lock stack is the last one. */ - while (!list_empty_careful(&node->lock.requestors)) { - list_add_tail(&owner->requestors_link, &node->lock.requestors); - - prepare_to_sleep(owner); - - WUNLOCK_ZLOCK(&node->lock); - go_to_sleep(owner); - WLOCK_ZLOCK(&node->lock); - - list_del_init(&owner->requestors_link); - } - - WUNLOCK_ZLOCK(&node->lock); + invalidate_all_lock_requests(node); + write_unlock_zlock(&node->lock); } /* Initializes lock_stack. */ @@ -1103,7 +1027,7 @@ void init_lock_stack(lock_stack * owner { INIT_LIST_HEAD(&owner->locks); INIT_LIST_HEAD(&owner->requestors_link); - spin_stack_init(owner); + spin_lock_init(&owner->sguard); owner->curpri = 1; sema_init(&owner->sema, 0); } @@ -1114,7 +1038,7 @@ void reiser4_init_lock(zlock * lock /* p * structure. */ ) { memset(lock, 0, sizeof(zlock)); - rw_zlock_init(lock); + rwlock_init(&lock->guard); INIT_LIST_HEAD(&lock->requestors); INIT_LIST_HEAD(&lock->owners); } @@ -1151,7 +1075,7 @@ move_lh_internal(lock_handle * new, lock assert("nikita-1827", owner == get_current_lock_stack()); assert("nikita-1831", new->owner == NULL); - WLOCK_ZLOCK(&node->lock); + write_lock_zlock(&node->lock); signaled = old->signaled; if (unlink_old) { @@ -1175,7 +1099,7 @@ move_lh_internal(lock_handle * new, lock link_object(new, owner, node); new->signaled = signaled; - WUNLOCK_ZLOCK(&node->lock); + write_unlock_zlock(&node->lock); } void move_lh(lock_handle * new, lock_handle * old) @@ -1320,10 +1244,10 @@ void check_lock_data(void) /* check consistency of locking data structures for @node */ void check_lock_node_data(znode * node) { - RLOCK_ZLOCK(&node->lock); + read_lock_zlock(&node->lock); list_check(&node->lock.owners); list_check(&node->lock.requestors); - RUNLOCK_ZLOCK(&node->lock); + read_unlock_zlock(&node->lock); } /* check that given lock request is dead lock safe. This check is, of course, @@ -1381,6 +1305,6 @@ const char *lock_mode_name(znode_lock_mo mode-name: "LC" c-basic-offset: 8 tab-width: 8 - fill-column: 120 + fill-column: 79 End: */ diff -puN fs/reiser4/lock.h~reiser4-spinlock-cleanup fs/reiser4/lock.h --- devel/fs/reiser4/lock.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/lock.h 2006-02-16 14:17:05.000000000 -0800 @@ -8,7 +8,6 @@ #include "forward.h" #include "debug.h" #include "dformat.h" -#include "spin_macros.h" #include "key.h" #include "coord.h" #include "plugin/node/node.h" @@ -23,7 +22,7 @@ /* Per-znode lock object */ struct zlock { - reiser4_rw_data guard; + rwlock_t guard; /* The number of readers if positive; the number of recursively taken write locks if negative. Protected by zlock spin lock. */ int nr_readers; @@ -34,25 +33,79 @@ struct zlock { unsigned nr_hipri_requests; /* A linked list of lock_handle objects that contains pointers for all lock_stacks which have this lock object locked */ + unsigned nr_hipri_write_requests; struct list_head owners; /* A linked list of lock_stacks that wait for this lock */ struct list_head requestors; }; -#define rw_ordering_pred_zlock(lock) \ - (lock_counters()->spin_locked_stack == 0) +static inline void read_lock_zlock(zlock *lock) +{ + /* check that zlock is not locked */ + assert("", (LOCK_CNT_NIL(rw_locked_zlock) && + LOCK_CNT_NIL(read_locked_zlock) && + LOCK_CNT_NIL(write_locked_zlock))); + /* check that spinlocks of lower priorities are not held */ + assert("", LOCK_CNT_NIL(spin_locked_stack)); + + read_lock(&(lock->guard)); + + LOCK_CNT_INC(read_locked_zlock); + LOCK_CNT_INC(rw_locked_zlock); + LOCK_CNT_INC(spin_locked); +} + +static inline void read_unlock_zlock(zlock *lock) +{ + assert("nikita-1375", LOCK_CNT_GTZ(read_locked_zlock)); + assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_zlock)); + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); + + LOCK_CNT_DEC(read_locked_zlock); + LOCK_CNT_DEC(rw_locked_zlock); + LOCK_CNT_DEC(spin_locked); + + read_unlock(&(lock->guard)); +} + +static inline void write_lock_zlock(zlock *lock) +{ + /* check that zlock is not locked */ + assert("", (LOCK_CNT_NIL(rw_locked_zlock) && + LOCK_CNT_NIL(read_locked_zlock) && + LOCK_CNT_NIL(write_locked_zlock))); + /* check that spinlocks of lower priorities are not held */ + assert("", LOCK_CNT_NIL(spin_locked_stack)); + + write_lock(&(lock->guard)); + + LOCK_CNT_INC(write_locked_zlock); + LOCK_CNT_INC(rw_locked_zlock); + LOCK_CNT_INC(spin_locked); +} + +static inline void write_unlock_zlock(zlock *lock) +{ + assert("nikita-1375", LOCK_CNT_GTZ(write_locked_zlock)); + assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_zlock)); + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); + + LOCK_CNT_DEC(write_locked_zlock); + LOCK_CNT_DEC(rw_locked_zlock); + LOCK_CNT_DEC(spin_locked); + + write_unlock(&(lock->guard)); +} -/* Define spin_lock_zlock, spin_unlock_zlock, etc. */ -RW_LOCK_FUNCTIONS(zlock, zlock, guard); #define lock_is_locked(lock) ((lock)->nr_readers != 0) #define lock_is_rlocked(lock) ((lock)->nr_readers > 0) #define lock_is_wlocked(lock) ((lock)->nr_readers < 0) #define lock_is_wlocked_once(lock) ((lock)->nr_readers == -1) #define lock_can_be_rlocked(lock) ((lock)->nr_readers >=0) -#define lock_mode_compatible(lock, mode) \ - (((mode) == ZNODE_WRITE_LOCK && !lock_is_locked(lock)) \ - || ((mode) == ZNODE_READ_LOCK && lock_can_be_rlocked(lock))) +#define lock_mode_compatible(lock, mode) \ + (((mode) == ZNODE_WRITE_LOCK && !lock_is_locked(lock)) || \ + ((mode) == ZNODE_READ_LOCK && lock_can_be_rlocked(lock))) /* Since we have R/W znode locks we need additional bidirectional `link' objects to implement n<->m relationship between lock owners and lock @@ -84,12 +137,14 @@ typedef struct lock_request { znode *node; /* Lock mode (ZNODE_READ_LOCK or ZNODE_WRITE_LOCK) */ znode_lock_mode mode; + /* how dispatch_lock_requests() returns lock request result code */ + int ret_code; } lock_request; /* A lock stack structure for accumulating locks owned by a process */ struct lock_stack { /* A guard lock protecting a lock stack */ - reiser4_spin_data sguard; + spinlock_t sguard; /* number of znodes which were requested by high priority processes */ atomic_t nr_signaled; /* Current priority of a process @@ -176,26 +231,32 @@ extern int lock_stack_isclean(lock_stack extern int znode_is_write_locked(const znode *); extern void invalidate_lock(lock_handle *); -#if REISER4_DEBUG -#define spin_ordering_pred_stack_addendum (1) -#else -#define spin_ordering_pred_stack_addendum \ - ((lock_counters()->rw_locked_dk == 0) && \ - (lock_counters()->rw_locked_tree == 0)) -#endif /* lock ordering is: first take zlock spin lock, then lock stack spin lock */ -#define spin_ordering_pred_stack(stack) \ - ((lock_counters()->spin_locked_stack == 0) && \ - (lock_counters()->spin_locked_txnmgr == 0) && \ - (lock_counters()->spin_locked_super == 0) && \ - (lock_counters()->spin_locked_inode_object == 0) && \ - (lock_counters()->rw_locked_cbk_cache == 0) && \ - (lock_counters()->spin_locked_epoch == 0) && \ - (lock_counters()->spin_locked_super_eflush == 0) && \ - spin_ordering_pred_stack_addendum) +#define spin_ordering_pred_stack(stack) \ + (LOCK_CNT_NIL(spin_locked_stack) && \ + LOCK_CNT_NIL(spin_locked_txnmgr) && \ + LOCK_CNT_NIL(spin_locked_inode) && \ + LOCK_CNT_NIL(rw_locked_cbk_cache) && \ + LOCK_CNT_NIL(spin_locked_super_eflush) ) + +static inline void spin_lock_stack(lock_stack *stack) +{ + assert("", spin_ordering_pred_stack(stack)); + spin_lock(&(stack->sguard)); + LOCK_CNT_INC(spin_locked_stack); + LOCK_CNT_INC(spin_locked); +} + +static inline void spin_unlock_stack(lock_stack *stack) +{ + assert_spin_locked(&(stack->sguard)); + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_stack)); + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); + LOCK_CNT_DEC(spin_locked_stack); + LOCK_CNT_DEC(spin_locked); + spin_unlock(&(stack->sguard)); +} -/* Same for lock_stack */ -SPIN_LOCK_FUNCTIONS(stack, lock_stack, sguard); static inline void reiser4_wake_up(lock_stack * owner) { diff -puN fs/reiser4/oid.c~reiser4-spinlock-cleanup fs/reiser4/oid.c --- devel/fs/reiser4/oid.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/oid.c 2006-02-16 14:17:05.000000000 -0800 @@ -36,13 +36,13 @@ oid_t oid_allocate(struct super_block * sbinfo = get_super_private(super); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); if (sbinfo->next_to_use != ABSOLUTE_MAX_OID) { oid = sbinfo->next_to_use++; sbinfo->oids_in_use++; } else oid = ABSOLUTE_MAX_OID; - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); return oid; } @@ -55,9 +55,9 @@ int oid_release(struct super_block *supe sbinfo = get_super_private(super); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); sbinfo->oids_in_use--; - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); return 0; } @@ -73,9 +73,9 @@ oid_t oid_next(const struct super_block sbinfo = get_super_private(super); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); oid = sbinfo->next_to_use; - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); return oid; } @@ -91,9 +91,9 @@ long oids_used(const struct super_block sbinfo = get_super_private(super); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); used = sbinfo->oids_in_use; - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); if (used < (__u64) ((long)~0) >> 1) return (long)used; else @@ -111,9 +111,9 @@ long oids_free(const struct super_block sbinfo = get_super_private(super); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); oids = ABSOLUTE_MAX_OID - OIDS_RESERVED - sbinfo->next_to_use; - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); if (oids < (__u64) ((long)~0) >> 1) return (long)oids; else @@ -132,7 +132,7 @@ void oid_count_allocated(void) atom = get_current_atom_locked(); atom->nr_objects_created++; - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } /* @@ -146,7 +146,7 @@ void oid_count_released(void) atom = get_current_atom_locked(); atom->nr_objects_deleted++; - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } /* diff -puN fs/reiser4/page_cache.c~reiser4-spinlock-cleanup fs/reiser4/page_cache.c --- devel/fs/reiser4/page_cache.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/page_cache.c 2006-02-16 14:17:05.000000000 -0800 @@ -432,7 +432,9 @@ static struct bio *page_bio(struct page blksz = super->s_blocksize; assert("nikita-2028", blksz == (int)PAGE_CACHE_SIZE); - blocknr = *UNDER_SPIN(jnode, node, jnode_get_io_block(node)); + spin_lock_jnode(node); + blocknr = *jnode_get_io_block(node); + spin_unlock_jnode(node); assert("nikita-2275", blocknr != (reiser4_block_nr) 0); assert("nikita-2276", !blocknr_is_fake(&blocknr)); @@ -459,7 +461,7 @@ static struct bio *page_bio(struct page } /* this function is internally called by jnode_make_dirty() */ -int set_page_dirty_internal(struct page *page, int tag_as_moved) +int set_page_dirty_internal(struct page *page) { struct address_space *mapping; @@ -527,22 +529,22 @@ int reiser4_writepage(struct page *page assert("nikita-2419", node != NULL); - LOCK_JNODE(node); + spin_lock_jnode(node); /* * page was dirty, but jnode is not. This is (only?) * possible if page was modified through mmap(). We * want to handle such jnodes specially. */ - phantom = !jnode_is_dirty(node); + phantom = !JF_ISSET(node, JNODE_DIRTY); atom = jnode_get_atom(node); if (atom != NULL) { if (!(atom->flags & ATOM_FORCE_COMMIT)) { atom->flags |= ATOM_FORCE_COMMIT; ktxnmgrd_kick(&get_super_private(s)->tmgr); } - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } - UNLOCK_JNODE(node); + spin_unlock_jnode(node); result = emergency_flush(page); if (result == 0) @@ -558,7 +560,7 @@ int reiser4_writepage(struct page *page * list when clearing dirty flag. So it is enough to * just set dirty bit. */ - set_page_dirty_internal(page, 0); + set_page_dirty_internal(page); unlock_page(page); } return result; @@ -624,7 +626,7 @@ static struct address_space_operations f void drop_page(struct page *page) { assert("nikita-2181", PageLocked(page)); - clear_page_dirty(page); + clear_page_dirty_for_io(page); ClearPageUptodate(page); #if defined(PG_skipped) ClearPageSkipped(page); @@ -646,13 +648,13 @@ static void invalidate_unformatted(jnode { struct page *page; - LOCK_JNODE(node); + spin_lock_jnode(node); page = node->pg; if (page) { loff_t from, to; page_cache_get(page); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); /* FIXME: use truncate_complete_page instead */ from = (loff_t) page->index << PAGE_CACHE_SHIFT; to = from + PAGE_CACHE_SIZE - 1; @@ -693,7 +695,7 @@ truncate_jnodes_range(struct inode *inod assert("nikita-3466", index <= end); - RLOCK_TREE(tree); + read_lock_tree(tree); taken = radix_tree_gang_lookup(jnode_tree_by_reiser4_inode(info), (void **)gang, index, @@ -705,7 +707,7 @@ truncate_jnodes_range(struct inode *inod else gang[i] = NULL; } - RUNLOCK_TREE(tree); + read_unlock_tree(tree); for (i = 0; i < taken; ++i) { node = gang[i]; @@ -740,39 +742,6 @@ reiser4_invalidate_pages(struct address_ truncate_jnodes_range(mapping->host, from, count); } -#if REISER4_DEBUG - -#define page_flag_name( page, flag ) \ - ( test_bit( ( flag ), &( page ) -> flags ) ? ((#flag "|")+3) : "" ) - -void print_page(const char *prefix, struct page *page) -{ - if (page == NULL) { - printk("null page\n"); - return; - } - printk("%s: page index: %lu mapping: %p count: %i private: %lx\n", - prefix, page->index, page->mapping, page_count(page), - page_private(page)); - printk("\tflags: %s%s%s%s %s%s%s %s%s%s %s%s\n", - page_flag_name(page, PG_locked), page_flag_name(page, PG_error), - page_flag_name(page, PG_referenced), page_flag_name(page, - PG_uptodate), - page_flag_name(page, PG_dirty), page_flag_name(page, PG_lru), - page_flag_name(page, PG_slab), page_flag_name(page, PG_checked), - page_flag_name(page, PG_reserved), page_flag_name(page, - PG_private), - page_flag_name(page, PG_writeback), page_flag_name(page, - PG_nosave)); - if (jprivate(page) != NULL) { - print_jnode("\tpage jnode", jprivate(page)); - printk("\n"); - } -} - -#endif - - /* * Local variables: * c-indentation-style: "K&R" diff -puN fs/reiser4/page_cache.h~reiser4-spinlock-cleanup fs/reiser4/page_cache.h --- devel/fs/reiser4/page_cache.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/page_cache.h 2006-02-16 14:17:05.000000000 -0800 @@ -18,7 +18,7 @@ extern void done_formatted_fake(struct s extern reiser4_tree *tree_by_page(const struct page *page); -extern int set_page_dirty_internal(struct page *page, int tag_as_moved); +extern int set_page_dirty_internal(struct page *page); #define reiser4_submit_bio(rw, bio) submit_bio((rw), (bio)) diff -puN fs/reiser4/plugin/compress/compress.c~reiser4-spinlock-cleanup fs/reiser4/plugin/compress/compress.c --- devel/fs/reiser4/plugin/compress/compress.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/compress/compress.c 2006-02-16 14:17:05.000000000 -0800 @@ -45,10 +45,12 @@ static reiser4_plugin_ops compression_pl static int gzip1_init(void) { - int ret = -ENOSYS; -#if defined(REISER4_GZIP_TFM) + int ret = -EINVAL; +#if REISER4_ZLIB ret = 0; #endif + if (ret == -EINVAL) + warning("edward-1337", "Zlib not compiled into kernel"); return ret; } @@ -60,7 +62,7 @@ static int gzip1_overrun(unsigned src_le static coa_t gzip1_alloc(tfm_action act) { coa_t coa = NULL; -#if defined(REISER4_GZIP_TFM) +#if REISER4_ZLIB int ret = 0; switch (act) { case TFM_WRITE: /* compress */ @@ -71,7 +73,7 @@ static coa_t gzip1_alloc(tfm_action act) } memset(coa, 0, zlib_deflate_workspacesize()); break; - case TFM_READ: /* decompress */ + case TFM_READ: /* decompress */ coa = vmalloc(zlib_inflate_workspacesize()); if (!coa) { ret = -ENOMEM; @@ -96,12 +98,12 @@ static coa_t gzip1_alloc(tfm_action act) static coa_t gzip1_nocompress_alloc(tfm_action act) { coa_t coa = NULL; -#if defined(REISER4_GZIP_TFM) +#if REISER4_ZLIB int ret = 0; switch (act) { case TFM_WRITE: /* compress */ break; - case TFM_READ: /* decompress */ + case TFM_READ: /* decompress */ coa = vmalloc(zlib_inflate_workspacesize()); if (!coa) { ret = -ENOMEM; @@ -146,6 +148,7 @@ static void gzip1_nocompress_free(coa_t switch (act) { case TFM_READ: /* decompress */ vfree(coa); + break; case TFM_WRITE: /* compress */ impossible("edward-1302", "trying to free non-allocated workspace"); @@ -164,6 +167,7 @@ static void gzip1_compress(coa_t coa, __u8 * src_first, unsigned src_len, __u8 * dst_first, unsigned *dst_len) { +#if REISER4_ZLIB int ret = 0; struct z_stream_s stream; @@ -199,6 +203,7 @@ gzip1_compress(coa_t coa, __u8 * src_fir return; rollback: *dst_len = src_len; +#endif return; } @@ -206,6 +211,7 @@ static void gzip1_decompress(coa_t coa, __u8 * src_first, unsigned src_len, __u8 * dst_first, unsigned *dst_len) { +#if REISER4_ZLIB int ret = 0; struct z_stream_s stream; @@ -248,6 +254,7 @@ gzip1_decompress(coa_t coa, __u8 * src_f return; } *dst_len = stream.total_out; +#endif return; } @@ -255,6 +262,15 @@ gzip1_decompress(coa_t coa, __u8 * src_f /* lzo1 compression */ /******************************************************************************/ +static int lzo1_init(void) +{ + int ret; + ret = lzo_init(); + if (ret != LZO_E_OK) + warning("edward-848", "lzo_init() failed with ret = %d\n", ret); + return ret; +} + static int lzo1_overrun(unsigned in_len) { return in_len / 64 + 16 + 3; @@ -322,13 +338,6 @@ lzo1_compress(coa_t coa, __u8 * src_firs assert("edward-846", coa != NULL); assert("edward-847", src_len != 0); - result = lzo_init(); - - if (result != LZO_E_OK) { - warning("edward-848", "lzo_init() failed\n"); - goto out; - } - result = lzo1x_1_compress(src_first, src_len, dst_first, dst_len, coa); if (result != LZO_E_OK) { warning("edward-849", "lzo1x_1_compress failed\n"); @@ -353,13 +362,6 @@ lzo1_decompress(coa_t coa, __u8 * src_fi assert("edward-851", coa == NULL); assert("edward-852", src_len != 0); - result = lzo_init(); - - if (result != LZO_E_OK) { - warning("edward-888", "lzo_init() failed\n"); - return; - } - result = lzo1x_decompress(src_first, src_len, dst_first, dst_len, NULL); if (result != LZO_E_OK) warning("edward-853", "lzo1x_1_decompress failed\n"); @@ -377,7 +379,7 @@ compression_plugin compression_plugins[L .linkage = {NULL, NULL} }, .dual = LZO1_NO_COMPRESSION_ID, - .init = NULL, + .init = lzo1_init, .overrun = lzo1_overrun, .alloc = lzo1_alloc, .free = lzo1_free, @@ -396,7 +398,7 @@ compression_plugin compression_plugins[L .linkage = {NULL, NULL} }, .dual = LZO1_COMPRESSION_ID, - .init = NULL, + .init = lzo1_init, .overrun = NULL, .alloc = NULL, .free = NULL, diff -puN fs/reiser4/plugin/compress/compress.h~reiser4-spinlock-cleanup fs/reiser4/plugin/compress/compress.h --- devel/fs/reiser4/plugin/compress/compress.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/compress/compress.h 2006-02-16 14:17:05.000000000 -0800 @@ -5,6 +5,7 @@ #include typedef enum { + TFM_INVAL, TFM_READ, TFM_WRITE } tfm_action; diff -puN fs/reiser4/plugin/compress/compress_mode.c~reiser4-spinlock-cleanup fs/reiser4/plugin/compress/compress_mode.c --- devel/fs/reiser4/plugin/compress/compress_mode.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/compress/compress_mode.c 2006-02-16 14:17:05.000000000 -0800 @@ -11,9 +11,8 @@ static int should_deflate_test(cloff_t i return !test_bit(0, &index); } -/* plugin->discard_deflate() */ - -static int discard_deflate_nocond(struct inode *inode, cloff_t index) +/* plugin->discard_hook() */ +static int discard_nocond(struct inode *inode, cloff_t index) { int result; @@ -24,15 +23,15 @@ static int discard_deflate_nocond(struct (inode_compression_plugin(inode)))); if (result) return result; - mark_inode_dirty(inode); + __mark_inode_dirty(inode, I_DIRTY_PAGES); return 0; } -static int discard_deflate_first(struct inode *inode, cloff_t index) +static int discard_first(struct inode *inode, cloff_t index) { assert("edward-1308", inode != NULL); - return (index ? 0 : discard_deflate_nocond(inode, index)); + return (index ? 0 : discard_nocond(inode, index)); } /* compression mode_plugins */ @@ -47,8 +46,8 @@ compression_mode_plugin compression_mode .linkage = {NULL, NULL} }, .should_deflate = NULL, - .save_deflate = NULL, - .discard_deflate = discard_deflate_first + .accept_hook = NULL, + .discard_hook = discard_first }, [LAZY_COMPRESSION_MODE_ID] = { .h = { @@ -60,8 +59,8 @@ compression_mode_plugin compression_mode .linkage = {NULL, NULL} }, .should_deflate = NULL, - .save_deflate = NULL, - .discard_deflate = discard_deflate_nocond + .accept_hook = NULL, + .discard_hook = discard_nocond }, [FORCE_COMPRESSION_MODE_ID] = { .h = { @@ -73,8 +72,8 @@ compression_mode_plugin compression_mode .linkage = {NULL, NULL} }, .should_deflate = NULL, - .save_deflate = NULL, - .discard_deflate = NULL + .accept_hook = NULL, + .discard_hook = NULL }, [TEST_COMPRESSION_MODE_ID] = { .h = { @@ -86,8 +85,8 @@ compression_mode_plugin compression_mode .linkage = {NULL, NULL} }, .should_deflate = should_deflate_test, - .save_deflate = NULL, - .discard_deflate = NULL + .accept_hook = NULL, + .discard_hook = NULL } }; diff -puN fs/reiser4/plugin/digest.c~reiser4-spinlock-cleanup fs/reiser4/plugin/digest.c --- devel/fs/reiser4/plugin/digest.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/digest.c 2006-02-16 14:17:05.000000000 -0800 @@ -9,24 +9,40 @@ #include -#define NONE_DIGEST_SIZE 0 +extern digest_plugin digest_plugins[LAST_DIGEST_ID]; -REGISTER_NONE_ALG(digest, DIGEST) +static struct crypto_tfm * alloc_sha256 (void) +{ +#if REISER4_SHA256 + return crypto_alloc_tfm ("sha256", 0); +#else + warning("edward-1418", "sha256 unsupported"); + return ERR_PTR(-EINVAL); +#endif +} + +static void free_sha256 (struct crypto_tfm * tfm) +{ +#if REISER4_SHA256 + crypto_free_tfm(tfm); +#endif + return; +} /* digest plugins */ digest_plugin digest_plugins[LAST_DIGEST_ID] = { - [NONE_DIGEST_ID] = { + [SHA256_32_DIGEST_ID] = { .h = { .type_id = REISER4_DIGEST_PLUGIN_TYPE, - .id = NONE_DIGEST_ID, + .id = SHA256_32_DIGEST_ID, .pops = NULL, - .label = "none", - .desc = "trivial digest", + .label = "sha256_32", + .desc = "sha256_32 digest transform", .linkage = {NULL, NULL} }, - .dsize = NONE_DIGEST_SIZE, - .alloc = alloc_none_digest, - .free = free_none_digest + .fipsize = sizeof(__u32), + .alloc = alloc_sha256, + .free = free_sha256 } }; diff -puN fs/reiser4/plugin/file/cryptcompress.c~reiser4-spinlock-cleanup fs/reiser4/plugin/file/cryptcompress.c --- devel/fs/reiser4/plugin/file/cryptcompress.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/file/cryptcompress.c 2006-02-16 14:17:05.000000000 -0800 @@ -1,28 +1,7 @@ -/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README +/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ -This file contains all cluster operations and methods of the reiser4 -cryptcompress object plugin (see http://www.namesys.com/cryptcompress_design.html -for details). - -Cryptcompress specific fields of reiser4 inode/stat-data: - - Incore inode Disk stat-data -******************************************************************************************** -* data structure * field * data structure * field * -******************************************************************************************** -* plugin_set *file plugin id * reiser4_plugin_stat *file plugin id * -* *crypto plugin id * *crypto plugin id * -* *digest plugin id * *digest plugin id * -* *compression plugin id * *compression plugin id* -******************************************************************************************** -* crypto_stat_t * keysize * reiser4_crypto_stat * keysize * -* * keyid * * keyid * -******************************************************************************************** -* cluster_stat_t * cluster_shift * reiser4_cluster_stat * cluster_shift * -******************************************************************************************** -* cryptcompress_info_t * crypto_tfm * * * -******************************************************************************************** -*/ +/* This file contains methods of the reiser4 cryptcompress object plugin + (see http://www.namesys.com/cryptcompress_design.html for details). */ #include "../../page_cache.h" #include "../../inode.h" @@ -36,6 +15,7 @@ Cryptcompress specific fields of reiser4 #include #include #include +#include /* get cryptcompress specific portion of inode */ cryptcompress_info_t *cryptcompress_inode_data(const struct inode *inode) @@ -67,10 +47,7 @@ static int crc_generic_check_ok(void) int crc_inode_ok(struct inode *inode) { - cryptcompress_info_t *data = cryptcompress_inode_data(inode); - - if (cluster_shift_ok(inode_cluster_shift(inode)) && - (data->tfm[CRYPTO_TFM] == NULL) && (data->tfm[DIGEST_TFM] == NULL)) + if (cluster_shift_ok(inode_cluster_shift(inode))) return 1; assert("edward-686", 0); return 0; @@ -80,7 +57,6 @@ int crc_inode_ok(struct inode *inode) static int check_cryptcompress(struct inode *inode) { int result = 0; - assert("edward-1307", inode_compression_plugin(inode) != NULL); if (inode_cluster_size(inode) < PAGE_CACHE_SIZE) { @@ -96,208 +72,365 @@ static int check_cryptcompress(struct in return result; } -static crypto_stat_t *inode_crypto_stat(struct inode *inode) +crypto_stat_t * inode_crypto_stat (struct inode * inode) { assert("edward-90", inode != NULL); assert("edward-91", reiser4_inode_data(inode) != NULL); - - return (cryptcompress_inode_data(inode)->crypt); + return cryptcompress_inode_data(inode)->crypt; } -/* NOTE-EDWARD: Do not use crypto without digest */ -static int alloc_crypto_tfm(struct inode *inode, struct inode *parent) +static void +set_inode_crypto_stat (struct inode * inode, crypto_stat_t * stat) { - int result; - crypto_plugin *cplug = inode_crypto_plugin(parent); - digest_plugin *dplug = inode_digest_plugin(parent); + cryptcompress_inode_data(inode)->crypt = stat; +} - assert("edward-414", dplug != NULL); - assert("edward-415", cplug != NULL); +crypto_stat_t * alloc_crypto_stat (struct inode * inode) +{ + crypto_stat_t * info; + int fipsize; - result = dplug->alloc(inode); - if (result) - return result; - result = cplug->alloc(inode); - if (result) { - dplug->free(inode); - return result; + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + memset(info, 0, sizeof (*info)); + fipsize = inode_digest_plugin(inode)->fipsize; + info->keyid = kmalloc(fipsize, GFP_KERNEL); + if (!info->keyid) { + kfree(info); + return ERR_PTR(-ENOMEM); } - return 0; + return info; } -static void free_crypto_tfm(struct inode *inode) +static int +alloc_crypto_tfms(plugin_set * pset, crypto_stat_t * info) { - reiser4_inode *info; + struct crypto_tfm * ret = NULL; + crypto_plugin * cplug = pset->crypto; + digest_plugin * dplug = pset->digest; - assert("edward-410", inode != NULL); + assert("edward-1363", info != NULL); + assert("edward-414", cplug != NULL); + assert("edward-415", dplug != NULL); - info = reiser4_inode_data(inode); + if (cplug->alloc) { + ret = cplug->alloc(); + if (ret == NULL) { + warning("edward-1364", + "Can not allocate info for %s\n", + cplug->h.desc); + return RETERR(-EINVAL); + } + } + info_set_tfm(info, CIPHER_TFM, ret); + if (dplug->alloc) { + ret = dplug->alloc(); + if (ret == NULL) { + warning("edward-1365", + "Can not allocate info for %s\n", + dplug->h.desc); + goto err; + } + } + info_set_tfm(info, DIGEST_TFM, ret); + return 0; + err: + if (cplug->free) { + cplug->free(info->tfma[CIPHER_TFM].tfm); + info_set_tfm(info, CIPHER_TFM, 0); + } + return RETERR(-EINVAL); +} - if (!inode_get_crypto(inode)) +static void +free_crypto_tfms(crypto_stat_t * info) +{ + assert("edward-1366", info != NULL); + if (!info_cipher_tfm(info)) return; + info_cipher_plugin(info)->free(info_cipher_tfm(info)); + info_set_tfm(info, CIPHER_TFM, 0); + info_digest_plugin(info)->free(info_digest_tfm(info)); + info_set_tfm(info, DIGEST_TFM, 0); + return; +} + +static int create_keyid (crypto_stat_t * info, crypto_data_t * data) +{ + int ret = -ENOMEM; + size_t blk, pad; + __u8 * dmem; + __u8 * cmem; + struct crypto_tfm * dtfm; + struct crypto_tfm * ctfm; + struct scatterlist sg; + + assert("edward-1367", info != NULL); + assert("edward-1368", info->keyid != NULL); - assert("edward-411", inode_crypto_plugin(inode)); - assert("edward-763", inode_digest_plugin(inode)); + dtfm = info_digest_tfm(info); + ctfm = info_cipher_tfm(info); - inode_crypto_plugin(inode)->free(inode); - inode_digest_plugin(inode)->free(inode); + dmem = kmalloc((size_t)crypto_tfm_alg_digestsize(dtfm), + GFP_KERNEL); + if (!dmem) + goto exit1; + + blk = crypto_tfm_alg_blocksize(ctfm); + + pad = data->keyid_size % blk; + pad = (pad ? blk - pad : 0); + + cmem = kmalloc((size_t)data->keyid_size + pad, GFP_KERNEL); + if (!cmem) + goto exit2; + memcpy(cmem, data->keyid, data->keyid_size); + memset(cmem + data->keyid_size, 0, pad); + + sg.page = virt_to_page(cmem); + sg.offset = offset_in_page(cmem); + sg.length = data->keyid_size + pad; + + ret = crypto_cipher_encrypt(ctfm, &sg, &sg, data->keyid_size + pad); + if (ret) { + warning("edward-1369", + "encryption failed flags=%x\n", ctfm->crt_flags); + goto exit3; + } + crypto_digest_init (dtfm); + crypto_digest_update (dtfm, &sg, 1); + crypto_digest_final (dtfm, dmem); + memcpy(info->keyid, dmem, info_digest_plugin(info)->fipsize); + exit3: + kfree(cmem); + exit2: + kfree(dmem); + exit1: + return ret; +} + +static void destroy_keyid(crypto_stat_t * info) +{ + assert("edward-1370", info != NULL); + assert("edward-1371", info->keyid != NULL); + kfree(info->keyid); + return; } -static int attach_crypto_stat(struct inode *inode, crypto_data_t * data) +static void free_crypto_stat (crypto_stat_t * info) { - __u8 *txt; + assert("edward-1372", info != NULL); - crypto_stat_t *stat; - struct scatterlist sg; - struct crypto_tfm *dtfm; + free_crypto_tfms(info); + destroy_keyid(info); + kfree(info); +} - assert("edward-690", inode_get_crypto(inode)); - assert("edward-766", inode_get_digest(inode)); +static void instantiate_crypto_stat(crypto_stat_t * info) +{ + assert("edward-1373", info != NULL); + assert("edward-1374", info->inst == 0); + info->inst = 1; +} - dtfm = inode_get_digest(inode); +static void uninstantiate_crypto_stat(crypto_stat_t * info) +{ + assert("edward-1375", info != NULL); + info->inst = 0; +} - stat = kmalloc(sizeof(*stat), GFP_KERNEL); - if (!stat) - return -ENOMEM; - - stat->keyid = - kmalloc((size_t) crypto_tfm_alg_digestsize(dtfm), - GFP_KERNEL); - if (!stat->keyid) { - kfree(stat); - return -ENOMEM; - } - txt = kmalloc(data->keyid_size, GFP_KERNEL); - if (!txt) { - kfree(stat->keyid); - kfree(stat); - return -ENOMEM; - } - memcpy(txt, data->keyid, data->keyid_size); - sg.page = virt_to_page(txt); - sg.offset = offset_in_page(txt); - sg.length = data->keyid_size; - - crypto_digest_init(dtfm); - crypto_digest_update(dtfm, &sg, 1); - crypto_digest_final(dtfm, stat->keyid); +int crypto_stat_instantiated(crypto_stat_t * info) +{ + return info->inst; +} - cryptcompress_inode_data(inode)->crypt = stat; - kfree(txt); +static int inode_has_cipher_key(struct inode * inode) +{ + assert("edward-1376", inode != NULL); + return inode_crypto_stat(inode) && + crypto_stat_instantiated(inode_crypto_stat(inode)); +} - return 0; +static void inode_free_crypto_stat (struct inode * inode) +{ + uninstantiate_crypto_stat(inode_crypto_stat(inode)); + free_crypto_stat(inode_crypto_stat(inode)); } -static void detach_crypto_stat(struct inode *object) +/* Instantiate a crypto-stat represented by low-lewel @data for the @object */ +crypto_stat_t * +create_crypto_stat(struct inode * object, crypto_data_t * data) { - crypto_stat_t *stat; + int ret; + crypto_stat_t * info; - stat = inode_crypto_stat(object); + assert("edward-1377", data != NULL); + assert("edward-1378", need_cipher(object)); - assert("edward-691", crc_inode_ok(object)); + if (inode_file_plugin(object) != + file_plugin_by_id(DIRECTORY_FILE_PLUGIN_ID)) + return ERR_PTR(-EINVAL); - if (!inode_get_crypto(object)) - return; + info = alloc_crypto_stat(object); + if (IS_ERR(info)) + return info; + ret = alloc_crypto_tfms(reiser4_inode_data(object)->pset, info); + if (ret) + goto err; + /* Someone can change plugins of the host, so + we keep the original ones it the crypto-stat. */ + info_set_crypto_plugin(info, inode_crypto_plugin(object)); + info_set_digest_plugin(info, inode_digest_plugin(object)); + + ret = crypto_cipher_setkey(info_cipher_tfm(info), + data->key, + data->keysize); + if (ret) { + warning("edward-1379", + "setkey failed flags=%x\n", + info_cipher_tfm(info)->crt_flags); + goto err; + } + info->keysize = data->keysize; + ret = create_keyid(info, data); + if (ret) + goto err; + instantiate_crypto_stat(info); + return info; + err: + free_crypto_stat(info); + return ERR_PTR(ret); +} + +void load_crypto_stat(crypto_stat_t * info) +{ + assert("edward-1380", info != NULL); + inc_keyload_count(info); +} - assert("edward-412", stat != NULL); +static void +unload_crypto_stat(struct inode * inode) +{ + crypto_stat_t * info = inode_crypto_stat(inode); + assert("edward-1381", info->keyload_count > 0); - kfree(stat->keyid); - kfree(stat); + dec_keyload_count(inode_crypto_stat(inode)); + if (info->keyload_count == 0) + inode_free_crypto_stat(inode); } -/* 1) fill crypto specific part of inode - 2) set inode crypto stat which is supposed to be saved in stat-data */ -static int -inode_set_crypto(struct inode *object, struct inode *parent, - crypto_data_t * data) +void attach_crypto_stat(struct inode * inode, crypto_stat_t * info) { - int result; - struct crypto_tfm *tfm; - crypto_plugin *cplug; - digest_plugin *dplug; - reiser4_inode *info = reiser4_inode_data(object); + assert("edward-1382", inode != NULL); + assert("edward-1383", info != NULL); + assert("edward-1384", inode_crypto_stat(inode) == NULL); - cplug = inode_crypto_plugin(parent); - dplug = inode_digest_plugin(parent); + set_inode_crypto_stat(inode, info); + load_crypto_stat(info); +} - plugin_set_crypto(&info->pset, cplug); - plugin_set_digest(&info->pset, dplug); +void detach_crypto_stat(struct inode * inode) +{ + assert("edward-1385", inode != NULL); + assert("edward-1386", host_allows_crypto_stat(inode)); - result = alloc_crypto_tfm(object, parent); - if (!result) - return result; + if (inode_crypto_stat(inode)) + unload_crypto_stat(inode); + set_inode_crypto_stat(inode, NULL); +} - if (!inode_get_crypto(object)) - /* nothing to do anymore */ +static int keyid_eq(crypto_stat_t * child, crypto_stat_t * parent) +{ + return !memcmp(child->keyid, parent->keyid, info_digest_plugin(parent)->fipsize); +} + +int can_inherit_crypto_crc(struct inode *child, struct inode *parent) +{ + if (!need_cipher(child)) + return 0; + if (!inode_crypto_stat(child)) + /* the file being created */ + return 1; + /* the file being looked up */ + if (!inode_crypto_stat(parent)) return 0; + return (inode_crypto_plugin(child) == inode_crypto_plugin(parent) && + inode_digest_plugin(child) == inode_digest_plugin(parent) && + inode_crypto_stat(child)->keysize == inode_crypto_stat(parent)->keysize && + keyid_eq(inode_crypto_stat(child), inode_crypto_stat(parent))); +} - assert("edward-414", dplug != NULL); - assert("edward-415", cplug != NULL); - assert("edward-417", data->key != NULL); - assert("edward-88", data->keyid != NULL); - assert("edward-83", data->keyid_size != 0); - assert("edward-89", data->keysize != 0); +int need_cipher(struct inode * inode) +{ + return inode_crypto_plugin(inode) != + crypto_plugin_by_id(NONE_CRYPTO_ID); +} - tfm = inode_get_tfm(object, CRYPTO_TFM); - assert("edward-695", tfm != NULL); +/* returns true, if crypto stat can be attached to the @host */ +int host_allows_crypto_stat(struct inode * host) +{ + int ret; + file_plugin * fplug = inode_file_plugin(host); - result = cplug->setkey(tfm, data->key, data->keysize); - if (result) { - free_crypto_tfm(object); - return result; + switch (fplug->h.id) { + case CRC_FILE_PLUGIN_ID: + ret = 1; + break; + default: + ret = 0; } - assert("edward-34", - !inode_get_flag(object, REISER4_SECRET_KEY_INSTALLED)); - inode_set_flag(object, REISER4_SECRET_KEY_INSTALLED); + return ret; +} +static int inode_set_crypto(struct inode * object) +{ + reiser4_inode * info; + if (!inode_crypto_stat(object)) { + if (need_cipher(object)) + return RETERR(-EINVAL); + /* the file is not to be encrypted */ + return 0; + } + info = reiser4_inode_data(object); info->extmask |= (1 << CRYPTO_STAT); - - result = attach_crypto_stat(object, data); - if (result) - goto error; - info->plugin_mask |= (1 << PSET_CRYPTO) | (1 << PSET_DIGEST); - - return 0; - error: - free_crypto_tfm(object); - inode_clr_flag(object, REISER4_SECRET_KEY_INSTALLED); - return result; + return 0; } -static int inode_set_compression(struct inode *object, struct inode *parent) +static int +inode_set_compression(struct inode * object) { int result = 0; - compression_plugin *cplug; - reiser4_inode *info = reiser4_inode_data(object); + compression_plugin * cplug; + reiser4_inode * info = reiser4_inode_data(object); - cplug = inode_compression_plugin(parent); + cplug = inode_compression_plugin(object); if (cplug->init != NULL) { result = cplug->init(); if (result) return result; } - plugin_set_compression(&info->pset, cplug); info->plugin_mask |= (1 << PSET_COMPRESSION); return 0; } static void -inode_set_compression_mode(struct inode *object, struct inode *parent) +inode_set_compression_mode(struct inode * object) { - compression_mode_plugin *mplug; - reiser4_inode *info = reiser4_inode_data(object); + compression_mode_plugin * mplug; + reiser4_inode * info = reiser4_inode_data(object); - mplug = inode_compression_mode_plugin(parent); + mplug = inode_compression_mode_plugin(object); plugin_set_compression_mode(&info->pset, mplug); info->plugin_mask |= (1 << PSET_COMPRESSION_MODE); return; } -static int inode_set_cluster(struct inode *object, struct inode *parent) +static int inode_set_cluster(struct inode *object) { reiser4_inode *info; cluster_plugin *cplug; @@ -305,15 +438,13 @@ static int inode_set_cluster(struct inod assert("edward-696", object != NULL); info = reiser4_inode_data(object); - cplug = inode_cluster_plugin(parent); + cplug = inode_cluster_plugin(object); if (cplug->shift < PAGE_CACHE_SHIFT) { warning("edward-1320", "Can not support cluster size %p", cplug->h.label); return RETERR(-EINVAL); } - plugin_set_cluster(&info->pset, cplug); - info->plugin_mask |= (1 << PSET_CLUSTER); return 0; } @@ -347,17 +478,17 @@ create_cryptcompress(struct inode *objec info->plugin_mask |= (1 << PSET_FILE); /* set crypto */ - result = inode_set_crypto(object, parent, data->crypto); + result = inode_set_crypto(object); if (result) goto error; /* set compression */ - result = inode_set_compression(object, parent); + result = inode_set_compression(object); if (result) goto error; - inode_set_compression_mode(object, parent); + inode_set_compression_mode(object); /* set cluster info */ - result = inode_set_cluster(object, parent); + result = inode_set_cluster(object); if (result) goto error; /* set plugin mask */ @@ -367,41 +498,54 @@ create_cryptcompress(struct inode *objec result = write_sd_by_inode_common(object); if (!result) return 0; - /* save() method failed, release attached crypto info */ - inode_clr_flag(object, REISER4_CRYPTO_STAT_LOADED); - error: - free_crypto_tfm(object); + error: detach_crypto_stat(object); - inode_clr_flag(object, REISER4_SECRET_KEY_INSTALLED); return result; } -/* plugin->destroy_inode() */ -void destroy_inode_cryptcompress(struct inode *inode) +int open_cryptcompress(struct inode * inode, struct file * file) { - assert("edward-802", + assert("edward-1394", inode != NULL); + assert("edward-1395", file != NULL); + assert("edward-1396", file != NULL); + assert("edward-1397", file->f_dentry->d_inode == inode); + assert("edward-1398", file->f_dentry->d_parent != NULL); + assert("edward-1399", file->f_dentry->d_parent->d_inode != NULL); + assert("edward-698", inode_file_plugin(inode) == file_plugin_by_id(CRC_FILE_PLUGIN_ID)); - assert("edward-803", !is_bad_inode(inode) && is_inode_loaded(inode)); - - free_crypto_tfm(inode); - if (inode_get_flag(inode, REISER4_CRYPTO_STAT_LOADED)) - detach_crypto_stat(inode); + struct inode * parent; + if (!need_cipher(inode)) + /* the file is not to be ciphered */ + return 0; + parent = file->f_dentry->d_parent->d_inode; + if (!inode_has_cipher_key(inode)) + return RETERR(-EINVAL); + return 0; +} - inode_clr_flag(inode, REISER4_CRYPTO_STAT_LOADED); - inode_clr_flag(inode, REISER4_SECRET_KEY_INSTALLED); +static unsigned int +crypto_blocksize(struct inode * inode) +{ + assert("edward-758", need_cipher(inode)); + assert("edward-1400", inode_crypto_stat(inode) != NULL); + return crypto_tfm_alg_blocksize + (info_cipher_tfm(inode_crypto_stat(inode))); } /* returns translated offset */ -static loff_t inode_scaled_offset(struct inode *inode, - const loff_t src_off /* input offset */ ) +static loff_t inode_scaled_offset (struct inode * inode, + const loff_t src_off /* input offset */) { assert("edward-97", inode != NULL); - if (!inode_get_crypto(inode) || src_off == get_key_offset(max_key())) + if (!need_cipher(inode) || + src_off == get_key_offset(min_key()) || + src_off == get_key_offset(max_key())) return src_off; - return inode_crypto_plugin(inode)->scale(inode, crypto_blocksize(inode), + return inode_crypto_plugin(inode)->scale(inode, + crypto_blocksize(inode), src_off); } @@ -543,23 +687,20 @@ static int reserve4cluster(struct inode } assert("edward-442", jprivate(clust->pages[0]) != NULL); - result = reiser4_grab_space_force( /* for prepped disk cluster */ - estimate_insert_cluster(inode, - 0) + - /* for unprepped disk cluster */ - estimate_insert_cluster(inode, - 1), - BA_CAN_COMMIT); + result = reiser4_grab_space_force(estimate_insert_cluster(inode) + + estimate_update_cluster(inode), + BA_CAN_COMMIT); if (result) return result; clust->reserved = 1; - grabbed2cluster_reserved(estimate_insert_cluster(inode, 0) + - estimate_insert_cluster(inode, 1)); + grabbed2cluster_reserved(estimate_insert_cluster(inode) + + estimate_update_cluster(inode)); #if REISER4_DEBUG - clust->reserved_prepped = estimate_insert_cluster(inode, 0); - clust->reserved_unprepped = estimate_insert_cluster(inode, 1); + clust->reserved_prepped = estimate_update_cluster(inode); + clust->reserved_unprepped = estimate_insert_cluster(inode); #endif - assert("edward-1262", get_current_context()->grabbed_blocks == 0); + /* there can be space grabbed by txnmgr_force_commit_all */ + all_grabbed2free(); return 0; } @@ -572,14 +713,6 @@ free_reserved4cluster(struct inode *inod clust->reserved = 0; } -#if REISER4_DEBUG -static int eq_to_ldk(znode * node, const reiser4_key * key) -{ - return UNDER_RW(dk, current_tree, read, - keyeq(key, znode_get_ld_key(node))); -} -#endif - /* The core search procedure. If returned value is not cbk_errored, current znode is locked */ static int find_cluster_item(hint_t * hint, const reiser4_key * key, /* key of the item we are @@ -617,7 +750,7 @@ static int find_cluster_item(hint_t * hi } if (result) return result; - assert("edward-1218", eq_to_ldk(coord->node, key)); + assert("edward-1218", equal_to_ldk(coord->node, key)); } else { coord->item_pos++; coord->unit_pos = 0; @@ -661,12 +794,68 @@ static int find_cluster_item(hint_t * hi return result; } +/* The following function is called by defalte[inflate] manager + * to check if we need to align[cut] the overhead. + * If true, @oh repersents the size of unaligned[aligned] overhead. + */ +static int +need_cut_or_align(struct inode * inode, reiser4_cluster_t * clust, + rw_op rw, int * oh) +{ + tfm_cluster_t * tc = &clust->tc; + switch (rw) { + case WRITE_OP: /* estimate align */ + *oh = tc->len % crypto_blocksize(inode); + if (*oh != 0) + return 1; + break; + case READ_OP: /* estimate cut */ + *oh = *(tfm_output_data(clust) + tc->len - 1); + break; + default: + impossible("edward-1401", "bad option"); + } + return (tc->len != tc->lsize); +} + +/* align or cut the overheads of + . input stream if @rw is WRITE_OP + . output stream if @rw is READ_OP */ +static void +align_or_cut_overhead(struct inode * inode, reiser4_cluster_t * clust, rw_op rw) +{ + int oh; + crypto_plugin * cplug = inode_crypto_plugin(inode); + + assert("edward-1402", need_cipher(inode)); + + if (!need_cut_or_align(inode, clust, rw, &oh)) + return; + switch (rw) { + case WRITE_OP: /* do align */ + clust->tc.len += + cplug->align_stream(tfm_input_data(clust) + + clust->tc.len, clust->tc.len, + crypto_blocksize(inode)); + *(tfm_input_data(clust) + clust->tc.len - 1) = + crypto_blocksize(inode) - oh; + break; + case READ_OP: /* do cut */ + assert("edward-1403", oh <= crypto_blocksize(inode)); + clust->tc.len -= oh; + break; + default: + impossible("edward-1404", "bad option"); + } + return; +} + /* the following two functions are to evaluate results of compression transform */ -static unsigned max_crypto_overhead(struct inode *inode) +static unsigned +max_crypto_overhead(struct inode * inode) { - if (!inode_get_crypto(inode) - || !inode_crypto_plugin(inode)->align_stream) + if (!need_cipher(inode) || !inode_crypto_plugin(inode)->align_stream) return 0; return crypto_blocksize(inode); } @@ -681,10 +870,13 @@ static int deflate_overhead(struct inode static unsigned deflate_overrun(struct inode *inode, int in_len) { return (inode_compression_plugin(inode)->overrun != NULL ? - inode_compression_plugin(inode)->overrun(in_len) : 0); + inode_compression_plugin(inode)->overrun(in_len) : + 0); } -/* The following two functions represent reiser4 compression policy */ +/* Estimating compressibility of a logical cluster. + This is a sanity check which uses various policies represented by + compression mode plugin. */ static int try_compress(tfm_cluster_t * tc, cloff_t index, struct inode *inode) { compression_plugin *cplug = inode_compression_plugin(inode); @@ -695,21 +887,23 @@ static int try_compress(tfm_cluster_t * assert("edward-1323", mplug != NULL); return (cplug->compress != NULL) && - (mplug->should_deflate != NULL ? mplug->should_deflate(index) : 1) - && (cplug->min_size_deflate != NULL ? tc->len >= - cplug->min_size_deflate() : 1); -} - -static int try_encrypt(struct inode *inode) -{ - return inode_get_crypto(inode) != NULL; + /* estimate by size */ + (cplug->min_size_deflate != NULL ? + tc->len >= cplug->min_size_deflate() : + 1) && + /* estimate by content */ + (mplug->should_deflate != NULL ? + mplug->should_deflate(index) : + 1); } -/* Evaluation results of compression transform. */ -static int save_compressed(int old_size, int new_size, struct inode *inode) +/* Evaluating the results of compression transform. + Returns true, if we need to accept the */ +static int +save_compressed(int size_before, int size_after, struct inode * inode) { - return (new_size + deflate_overhead(inode) + - max_crypto_overhead(inode) < old_size); + return (size_after + deflate_overhead(inode) + + max_crypto_overhead(inode) < size_before); } /* Guess result of the evaluation above */ @@ -724,11 +918,28 @@ need_inflate(reiser4_cluster_t * clust, return tc->len < (encrypted ? - inode_scaled_offset(inode, fsize_to_count(clust, inode)) : - fsize_to_count(clust, inode)); + inode_scaled_offset(inode, tc->lsize) : + tc->lsize); } -/* append checksum at the end of input transform stream +/* If logical cluster got compressed we add a checksum to catch possible + disk cluster corruptions. The following is a format of the data stored + in disk clusters: + + data This is (transformed) logical cluster. + crypto_overhead This is created by ->align() method + of crypto-plugin. May be absent. + checksum (4) This is created by ->checksum method + of compression plugin to check + integrity. May be absent. + + Crypto overhead format: + + data + control_byte (1) contains aligned overhead size: + 1 <= overhead <= crypto_blksize +*/ +/* Append checksum at the end of input transform stream and increase its length */ static void dc_set_checksum(compression_plugin * cplug, tfm_cluster_t * tc) { @@ -752,34 +963,36 @@ static int dc_check_checksum(compression assert("edward-1313", tc->len > (int)DC_CHECKSUM_SIZE); assert("edward-1314", cplug->checksum != NULL); - if (cplug-> - checksum(tfm_stream_data(tc, INPUT_STREAM), - tc->len - (int)DC_CHECKSUM_SIZE) != - le32_to_cpu(get_unaligned((d32 *) (tfm_stream_data(tc, INPUT_STREAM) + tc->len - - (int)DC_CHECKSUM_SIZE)))) { + if (cplug->checksum(tfm_stream_data(tc, INPUT_STREAM), + tc->len - (int)DC_CHECKSUM_SIZE) != + le32_to_cpu(get_unaligned((d32 *) + (tfm_stream_data(tc, INPUT_STREAM) + + tc->len - (int)DC_CHECKSUM_SIZE)))) { warning("edward-156", - "bad disk cluster checksum %d, (should be %d)\n", - (int) - le32_to_cpu(get_unaligned((d32 *) (tfm_stream_data(tc, INPUT_STREAM) + - tc->len - (int)DC_CHECKSUM_SIZE))), - (int)cplug->checksum(tfm_stream_data(tc, INPUT_STREAM), - tc->len - (int)DC_CHECKSUM_SIZE)); + "Bad disk cluster checksum %d, (should be %d) Fsck?\n", + (int)le32_to_cpu + (get_unaligned((d32 *) + (tfm_stream_data(tc, INPUT_STREAM) + + tc->len - (int)DC_CHECKSUM_SIZE))), + (int)cplug->checksum + (tfm_stream_data(tc, INPUT_STREAM), + tc->len - (int)DC_CHECKSUM_SIZE)); return 1; } tc->len -= (int)DC_CHECKSUM_SIZE; return 0; } -int -grab_tfm_stream(struct inode *inode, tfm_cluster_t * tc, - tfm_action act, tfm_stream_id id) +int grab_tfm_stream(struct inode * inode, tfm_cluster_t * tc, + tfm_stream_id id) { size_t size = inode_scaled_cluster_size(inode); assert("edward-901", tc != NULL); + assert("edward-1347", tc->act != TFM_INVAL); assert("edward-1027", inode_compression_plugin(inode) != NULL); - if (act == TFM_WRITE) + if (tc->act == TFM_WRITE) size += deflate_overrun(inode, inode_cluster_size(inode)); if (!tfm_stream(tc, id) && id == INPUT_STREAM) @@ -794,32 +1007,35 @@ grab_tfm_stream(struct inode *inode, tfm return 0; } -/* Common deflate cluster manager */ -int deflate_cluster(reiser4_cluster_t * clust, struct inode *inode) +/* Common deflate manager */ +int deflate_cluster(reiser4_cluster_t * clust, struct inode * inode) { int result = 0; - int transformed = 0; - tfm_cluster_t *tc = &clust->tc; + int compressed = 0; + int encrypted = 0; + tfm_cluster_t * tc = &clust->tc; + compression_plugin * coplug; assert("edward-401", inode != NULL); assert("edward-903", tfm_stream_is_set(tc, INPUT_STREAM)); + assert("edward-1348", tc->act == TFM_WRITE); assert("edward-498", !tfm_cluster_is_uptodate(tc)); + coplug = inode_compression_plugin(inode); if (try_compress(tc, clust->index, inode)) { /* try to compress, discard bad results */ __u32 dst_len; - compression_plugin *cplug = inode_compression_plugin(inode); - compression_mode_plugin *mplug = - inode_compression_mode_plugin(inode); - assert("edward-602", cplug != NULL); + compression_mode_plugin * mplug = + inode_compression_mode_plugin(inode); + assert("edward-602", coplug != NULL); - result = grab_tfm_stream(inode, tc, TFM_WRITE, OUTPUT_STREAM); + result = grab_tfm_stream(inode, tc, OUTPUT_STREAM); if (result) return result; dst_len = tfm_stream_size(tc, OUTPUT_STREAM); - cplug->compress(get_coa(tc, cplug->h.id), - tfm_stream_data(tc, INPUT_STREAM), tc->len, - tfm_stream_data(tc, OUTPUT_STREAM), &dst_len); + coplug->compress(get_coa(tc, coplug->h.id), + tfm_input_data(clust), tc->len, + tfm_output_data(clust), &dst_len); /* make sure we didn't overwrite extra bytes */ assert("edward-603", @@ -829,122 +1045,131 @@ int deflate_cluster(reiser4_cluster_t * if (save_compressed(tc->len, dst_len, inode)) { /* good result, accept */ tc->len = dst_len; - if (cplug->checksum != NULL) - dc_set_checksum(cplug, tc); - transformed = 1; - if (mplug->save_deflate != NULL) - mplug->save_deflate(inode); - } else { + if (mplug->accept_hook != NULL) + mplug->accept_hook(inode); + compressed = 1; + } + else { /* bad result, discard */ #if REISER4_DEBUG - warning("edward-1309", + warning("edward-1338", "incompressible data: inode %llu, cluster %lu", (unsigned long long)get_inode_oid(inode), clust->index); #endif - if (mplug->discard_deflate != NULL) { - result = - mplug->discard_deflate(inode, clust->index); + if (mplug->discard_hook != NULL && + cluster_is_complete(clust, inode)) { + result = mplug->discard_hook(inode, + clust->index); if (result) return result; } } } - if (try_encrypt(inode)) { - crypto_plugin *cplug; - /* FIXME-EDWARD */ - assert("edward-904", 0); - - cplug = inode_crypto_plugin(inode); - if (transformed) + if (need_cipher(inode)) { + crypto_plugin * ciplug; + struct crypto_tfm * tfm; + struct scatterlist src; + struct scatterlist dst; + + ciplug = inode_crypto_plugin(inode); + tfm = info_cipher_tfm(inode_crypto_stat(inode)); + if (compressed) alternate_streams(tc); - result = grab_tfm_stream(inode, tc, TFM_WRITE, OUTPUT_STREAM); + result = grab_tfm_stream(inode, tc, OUTPUT_STREAM); if (result) return result; - /* FIXME: set src_len, dst_len, encrypt */ - transformed = 1; + + align_or_cut_overhead(inode, clust, WRITE_OP); + src.page = virt_to_page(tfm_input_data(clust)); + src.offset = offset_in_page(tfm_input_data(clust)); + src.length = tc->len; + + dst.page = virt_to_page(tfm_output_data(clust)); + dst.offset = offset_in_page(tfm_output_data(clust)); + dst.length = tc->len; + + result = crypto_cipher_encrypt(tfm, &dst, &src, tc->len); + if (result) { + warning("edward-1405", + "encryption failed flags=%x\n", tfm->crt_flags); + return result; + } + encrypted = 1; } - if (!transformed) + if (compressed && coplug->checksum != NULL) + dc_set_checksum(coplug, tc); + if (!compressed && !encrypted) alternate_streams(tc); return result; } -/* Common inflate cluster manager. - Is used in readpage() or readpages() methods of - cryptcompress object plugins. */ -int inflate_cluster(reiser4_cluster_t * clust, struct inode *inode) +/* Common inflate cluster manager. */ +int inflate_cluster(reiser4_cluster_t * clust, struct inode * inode) { int result = 0; int transformed = 0; - - tfm_cluster_t *tc = &clust->tc; + tfm_cluster_t * tc = &clust->tc; + compression_plugin * coplug; assert("edward-905", inode != NULL); assert("edward-1178", clust->dstat == PREP_DISK_CLUSTER); assert("edward-906", tfm_stream_is_set(&clust->tc, INPUT_STREAM)); + assert("edward-1349", tc->act == TFM_READ); assert("edward-907", !tfm_cluster_is_uptodate(tc)); - if (inode_get_crypto(inode) != NULL) { - crypto_plugin *cplug; - - /* FIXME-EDWARD: isn't supported yet */ - assert("edward-908", 0); - cplug = inode_crypto_plugin(inode); - assert("edward-617", cplug != NULL); - - result = grab_tfm_stream(inode, tc, TFM_READ, OUTPUT_STREAM); + /* Handle a checksum (if any) */ + coplug = inode_compression_plugin(inode); + if (need_inflate(clust, inode, need_cipher(inode)) && + coplug->checksum != NULL) { + result = dc_check_checksum(coplug, tc); + if (result) + return RETERR(-EIO); + } + if (need_cipher(inode)) { + crypto_plugin * ciplug; + struct crypto_tfm * tfm; + struct scatterlist src; + struct scatterlist dst; + + ciplug = inode_crypto_plugin(inode); + tfm = info_cipher_tfm(inode_crypto_stat(inode)); + result = grab_tfm_stream(inode, tc, OUTPUT_STREAM); if (result) return result; assert("edward-909", tfm_cluster_is_set(tc)); - /* set src_len, dst_len and decrypt */ - /* tc->len = dst_len */ + src.page = virt_to_page(tfm_input_data(clust)); + src.offset = offset_in_page(tfm_input_data(clust)); + src.length = tc->len; + + dst.page = virt_to_page(tfm_output_data(clust)); + dst.offset = offset_in_page(tfm_output_data(clust)); + dst.length = tc->len; + result = crypto_cipher_decrypt(tfm, &dst, &src, tc->len); + if (result) + return result; + align_or_cut_overhead(inode, clust, READ_OP); transformed = 1; } if (need_inflate(clust, inode, 0)) { unsigned dst_len = inode_cluster_size(inode); - compression_plugin *cplug = inode_compression_plugin(inode); - - if (transformed) + if(transformed) alternate_streams(tc); - result = grab_tfm_stream(inode, tc, TFM_READ, OUTPUT_STREAM); + result = grab_tfm_stream(inode, tc, OUTPUT_STREAM); if (result) return result; - assert("edward-1305", cplug->decompress != NULL); + assert("edward-1305", coplug->decompress != NULL); assert("edward-910", tfm_cluster_is_set(tc)); - /* Check compression checksum for possible IO errors. - - End-of-cluster format created before encryption: - - data - checksum (4) Indicates presence of compression - infrastructure, should be private. - Can be absent. - crypto_overhead Created by ->align() method of crypto-plugin, - Can be absent. - - Crypto overhead format: - - data - tail_size (1) size of aligning tail, - 1 <= tail_size <= blksize - */ - if (cplug->checksum != NULL) { - result = dc_check_checksum(cplug, tc); - if (result) - return RETERR(-EIO); - } - /* decompress cluster */ - cplug->decompress(get_coa(tc, cplug->h.id), - tfm_stream_data(tc, INPUT_STREAM), tc->len, - tfm_stream_data(tc, OUTPUT_STREAM), &dst_len); - + coplug->decompress(get_coa(tc, coplug->h.id), + tfm_input_data(clust), tc->len, + tfm_output_data(clust), &dst_len); /* check length */ tc->len = dst_len; - assert("edward-157", dst_len == fsize_to_count(clust, inode)); + assert("edward-157", dst_len == tc->lsize); transformed = 1; } if (!transformed) @@ -952,13 +1177,8 @@ int inflate_cluster(reiser4_cluster_t * return result; } -/* plugin->read() : - * generic_file_read() - * All key offsets don't make sense in traditional unix semantics unless they - * represent the beginning of clusters, so the only thing we can do is start - * right from mapping to the address space (this is precisely what filemap - * generic method does) */ -/* plugin->readpage() */ +/* This is implementation of readpage method of struct + address_space_operations for cryptcompress plugin. */ int readpage_cryptcompress(struct file *file, struct page *page) { reiser4_context *ctx; @@ -978,22 +1198,21 @@ int readpage_cryptcompress(struct file * reiser4_exit_context(ctx); return result; } - if (file) - assert("edward-113", - page->mapping == file->f_dentry->d_inode->i_mapping); + assert("edward-113", + ergo(file != NULL, + page->mapping == file->f_dentry->d_inode->i_mapping)); if (PageUptodate(page)) { - printk - ("readpage_cryptcompress: page became already uptodate\n"); + warning("edward-1338", "page is already uptodate\n"); unlock_page(page); reiser4_exit_context(ctx); return 0; } - reiser4_cluster_init(&clust, NULL); + cluster_init_read(&clust, 0); clust.file = file; iplug = item_plugin_by_id(CTAIL_ID); if (!iplug->s.file.readpage) { - put_cluster_handle(&clust, TFM_READ); + put_cluster_handle(&clust); reiser4_exit_context(ctx); return -EINVAL; } @@ -1001,11 +1220,7 @@ int readpage_cryptcompress(struct file * assert("edward-64", ergo(result == 0, (PageLocked(page) || PageUptodate(page)))); - /* if page has jnode - that jnode is mapped - assert("edward-65", ergo(result == 0 && PagePrivate(page), - jnode_mapped(jprivate(page)))); - */ - put_cluster_handle(&clust, TFM_READ); + put_cluster_handle(&clust); reiser4_exit_context(ctx); return result; } @@ -1065,7 +1280,7 @@ static void set_cluster_pages_dirty(reis assert("edward-1065", PageUptodate(pg)); - set_page_dirty_internal(pg, 0); + set_page_dirty_internal(pg); if (!PageReferenced(pg)) SetPageReferenced(pg); @@ -1084,17 +1299,22 @@ static void clear_cluster_pages_dirty(re assert("edward-1276", clust->pages[i] != NULL); lock_page(clust->pages[i]); - if (!PageDirty(clust->pages[i])) { + if (PageDirty(clust->pages[i])) { + assert("edward-1277", PageUptodate(clust->pages[i])); + clear_page_dirty_for_io(clust->pages[i]); + } +#if REISER4_DEBUG + else + /* Race between flush and write: + some pages became clean when write() (or another + process which modifies data) capture the cluster. */ warning("edward-985", "Page of index %lu (inode %llu)" " is not dirty\n", clust->pages[i]->index, (unsigned long long)get_inode_oid(clust-> pages[i]-> mapping-> host)); - } else { - assert("edward-1277", PageUptodate(clust->pages[i])); - reiser4_clear_page_dirty(clust->pages[i]); - } +#endif unlock_page(clust->pages[i]); } } @@ -1145,13 +1365,13 @@ make_cluster_jnode_dirty_locked(reiser4_ assert("edward-221", node != NULL); assert("edward-971", clust->reserved == 1); - assert("edward-1028", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); assert("edward-972", node->page_count < cluster_nrpages(inode)); assert("edward-1263", - clust->reserved_prepped == estimate_insert_cluster(inode, 0)); + clust->reserved_prepped == estimate_update_cluster(inode)); assert("edward-1264", clust->reserved_unprepped == 0); - if (jnode_is_dirty(node)) { + if (JF_ISSET(node, JNODE_DIRTY)) { /* there are >= 1 pages already referenced by this jnode */ assert("edward-973", count_to_nrpages(off_to_count @@ -1162,7 +1382,7 @@ make_cluster_jnode_dirty_locked(reiser4_ /* space for the disk cluster is already reserved */ free_reserved4cluster(inode, clust, - estimate_insert_cluster(inode, 0)); + estimate_update_cluster(inode)); } else { /* there is only one page referenced by this jnode */ assert("edward-1043", node->page_count == 0); @@ -1171,7 +1391,7 @@ make_cluster_jnode_dirty_locked(reiser4_ clust->reserved = 0; } #if REISER4_DEBUG - clust->reserved_prepped -= estimate_insert_cluster(inode, 0); + clust->reserved_prepped -= estimate_update_cluster(inode); #endif new_refcnt = cluster_nrpages_to_capture(clust) - 1; @@ -1218,7 +1438,7 @@ static int try_capture_cluster(reiser4_c assert("edward-1035", node != NULL); - LOCK_JNODE(node); + spin_lock_jnode(node); if (clust->win) inode_set_new_size(clust, inode); @@ -1228,7 +1448,7 @@ static int try_capture_cluster(reiser4_c make_cluster_jnode_dirty_locked(clust, node, &old_size, inode); exit: assert("edward-1034", !result); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); jput(node); return result; } @@ -1261,11 +1481,13 @@ grab_cluster_pages_jnode(struct inode *i } if (i == 0) { node = jnode_of_page(clust->pages[i]); - unlock_page(clust->pages[i]); if (IS_ERR(node)) { result = PTR_ERR(node); + unlock_page(clust->pages[i]); break; } + JF_SET(node, JNODE_CLUSTER_PAGE); + unlock_page(clust->pages[i]); assert("edward-919", node); continue; } @@ -1279,9 +1501,6 @@ grab_cluster_pages_jnode(struct inode *i return result; } assert("edward-920", jprivate(clust->pages[0])); - LOCK_JNODE(node); - JF_SET(node, JNODE_CLUSTER_PAGE); - UNLOCK_JNODE(node); return 0; } @@ -1312,6 +1531,35 @@ static int grab_cluster_pages(struct ino return result; } +/* @node might be attached by reiser4_writepage(), not by + cryptcompress plugin code, but emergency flush should + understand that pages of cryptcompress files are not + flushable. +*/ +int jnode_of_cluster(const jnode * node, struct page * page) +{ + assert("edward-1339", node != NULL); + assert("edward-1340", page != NULL); + assert("edward-1341", page->mapping != NULL); + assert("edward-1342", page->mapping->host != NULL); + assert("edward-1343", + ergo(jnode_is_unformatted(node), + get_inode_oid(page->mapping->host) == + node->key.j.objectid)); + if (inode_file_plugin(page->mapping->host) == + file_plugin_by_id(CRC_FILE_PLUGIN_ID)) { +#if REISER4_DEBUG + if (!jnode_is_cluster_page(node)) + warning("edward-1345", + "inode %llu: cluster page of index %lu became private", + (unsigned long long)get_inode_oid(page->mapping->host), + page->index); +#endif + return 1; + } + return 0; +} + /* put cluster pages */ static void release_cluster_pages(reiser4_cluster_t * clust, int from) { @@ -1459,22 +1707,24 @@ static int update_sd_cryptcompress(struc return result; } + +/* NOTE-Edward: this is too similar to reiser4/txnmgr.c:uncapture_jnode() */ static void uncapture_cluster_jnode(jnode * node) { txn_atom *atom; - assert("edward-1023", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); /*jnode_make_clean(node); */ atom = jnode_get_atom(node); if (atom == NULL) { - assert("jmacd-7111", !jnode_is_dirty(node)); - UNLOCK_JNODE(node); + assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY)); + spin_unlock_jnode(node); return; } uncapture_block(node); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); jput(node); } @@ -1508,27 +1758,27 @@ flush_cluster_pages(reiser4_cluster_t * assert("edward-241", schedulable()); assert("edward-718", crc_inode_ok(inode)); - LOCK_JNODE(node); + spin_lock_jnode(node); - if (!jnode_is_dirty(node)) { + if (!JF_ISSET(node, JNODE_DIRTY)) { assert("edward-981", node->page_count == 0); + + /* race with another flush */ + spin_unlock_jnode(node); warning("edward-982", "flush_cluster_pages: jnode is not dirty " "clust %lu, inode %llu\n", clust->index, (unsigned long long)get_inode_oid(inode)); - - /* race with another flush */ - UNLOCK_JNODE(node); return RETERR(-E_REPEAT); } - tc->len = fsize_to_count(clust, inode); + tc->len = tc->lsize = fsize_to_count(clust, inode); clust->nr_pages = count_to_nrpages(tc->len); assert("edward-983", clust->nr_pages == node->page_count + 1); #if REISER4_DEBUG node->page_count = 0; #endif - cluster_reserved2grabbed(estimate_insert_cluster(inode, 0)); + cluster_reserved2grabbed(estimate_update_cluster(inode)); uncapture_cluster_jnode(node); /* Try to create input stream for the found size (tc->len). @@ -1538,7 +1788,7 @@ flush_cluster_pages(reiser4_cluster_t * assert("edward-1224", schedulable()); - result = grab_tfm_stream(inode, tc, TFM_WRITE, INPUT_STREAM); + result = grab_tfm_stream(inode, tc, INPUT_STREAM); if (result) return result; @@ -1638,8 +1888,7 @@ void invalidate_hint_cluster(reiser4_clu clust->hint->ext_coord.valid = 0; } -static void -put_hint_cluster(reiser4_cluster_t * clust, struct inode *inode, +void put_hint_cluster(reiser4_cluster_t * clust, struct inode *inode, znode_lock_mode mode) { assert("edward-1286", clust != NULL); @@ -1756,29 +2005,25 @@ find_cluster(reiser4_cluster_t * clust, { flow_t f; hint_t *hint; - int result; + int result = 0; unsigned long cl_idx; ra_info_t ra_info; file_plugin *fplug; item_plugin *iplug; tfm_cluster_t *tc; + int was_grabbed; -#if REISER4_DEBUG - reiser4_context *ctx; - ctx = get_current_context(); -#endif assert("edward-138", clust != NULL); assert("edward-728", clust->hint != NULL); assert("edward-225", read || write); assert("edward-226", schedulable()); assert("edward-137", inode != NULL); assert("edward-729", crc_inode_ok(inode)); - assert("edward-474", get_current_context()->grabbed_blocks == 0); hint = clust->hint; cl_idx = clust->index; fplug = inode_file_plugin(inode); - + was_grabbed = get_current_context()->grabbed_blocks; tc = &clust->tc; assert("edward-462", !tfm_cluster_is_uptodate(tc)); @@ -1794,11 +2039,11 @@ find_cluster(reiser4_cluster_t * clust, /* reserve for flush to make dirty all the leaf nodes which contain disk cluster */ result = - reiser4_grab_space_force(estimate_disk_cluster(inode), + reiser4_grab_space_force(estimate_dirty_cluster(inode), BA_CAN_COMMIT); assert("edward-990", !result); if (result) - goto out2; + goto out; } ra_info.key_to_stop = f.key; @@ -1812,6 +2057,7 @@ find_cluster(reiser4_cluster_t * clust, (write ? CBK_FOR_INSERT : 0)); switch (result) { case CBK_COORD_NOTFOUND: + result = 0; if (inode_scaled_offset (inode, clust_to_off(cl_idx, @@ -1819,8 +2065,7 @@ find_cluster(reiser4_cluster_t * clust, /* first item not found, this is treated as disk cluster is absent */ clust->dstat = FAKE_DISK_CLUSTER; - result = 0; - goto out2; + goto out; } /* we are outside the cluster, stop search here */ assert("edward-146", @@ -1835,15 +2080,17 @@ find_cluster(reiser4_cluster_t * clust, coord_clear_iplug(&hint->ext_coord.coord); result = zload_ra(hint->ext_coord.coord.node, &ra_info); if (unlikely(result)) - goto out2; + goto out; iplug = item_plugin_by_coord(&hint->ext_coord.coord); assert("edward-147", item_id_by_coord(&hint->ext_coord.coord) == CTAIL_ID); result = iplug->s.file.read(NULL, &f, hint); - if (result) + if (result) { + zrelse(hint->ext_coord.coord.node); goto out; + } if (write) { znode_make_dirty(hint->ext_coord.coord.node); znode_set_convertible(hint->ext_coord.coord. @@ -1852,25 +2099,28 @@ find_cluster(reiser4_cluster_t * clust, zrelse(hint->ext_coord.coord.node); break; default: - goto out2; + goto out; } } - ok: + ok: /* at least one item was found */ - /* NOTE-EDWARD: Callers should handle the case when disk cluster is incomplete (-EIO) */ + /* NOTE-EDWARD: Callers should handle the case + when disk cluster is incomplete (-EIO) */ tc->len = inode_scaled_cluster_size(inode) - f.length; + tc->lsize = fsize_to_count(clust, inode); assert("edward-1196", tc->len > 0); + assert("edward-1406", tc->lsize > 0); if (hint_is_unprepped_dclust(clust->hint)) clust->dstat = UNPR_DISK_CLUSTER; else clust->dstat = PREP_DISK_CLUSTER; - all_grabbed2free(); - return 0; - out: - zrelse(hint->ext_coord.coord.node); - out2: - all_grabbed2free(); + out: + assert("edward-1339", + get_current_context()->grabbed_blocks >= was_grabbed); + grabbed2free(get_current_context(), + get_current_super_private(), + get_current_context()->grabbed_blocks - was_grabbed); return result; } @@ -1990,8 +2240,7 @@ read_some_cluster_pages(struct inode *in break; } if (!tfm_cluster_is_uptodate(&clust->tc)) { - result = - ctail_read_cluster(clust, inode, 1 /* write */ ); + result = ctail_read_disk_cluster(clust, inode, 1); assert("edward-992", !result); if (result) goto out; @@ -2013,12 +2262,10 @@ read_some_cluster_pages(struct inode *in to make flush update convert its content */ result = find_cluster(clust, inode, 0 /* do not read */ , - 1 /*write */ ); - assert("edward-994", !cbk_errored(result)); - if (!cbk_errored(result)) - result = 0; + 1 /* write */ ); + assert("edward-994", !result); } - out: + out: tfm_cluster_clr_uptodate(&clust->tc); return result; } @@ -2058,12 +2305,12 @@ crc_make_unprepped_cluster(reiser4_clust assert("edward-1266", get_current_context()->grabbed_blocks == 0); if (clust->reserved) { - cluster_reserved2grabbed(estimate_insert_cluster(inode, 1)); + cluster_reserved2grabbed(estimate_insert_cluster(inode)); #if REISER4_DEBUG assert("edward-1267", clust->reserved_unprepped == - estimate_insert_cluster(inode, 1)); - clust->reserved_unprepped -= estimate_insert_cluster(inode, 1); + estimate_insert_cluster(inode)); + clust->reserved_unprepped -= estimate_insert_cluster(inode); #endif } if (!should_create_unprepped_cluster(clust, inode)) { @@ -2102,17 +2349,17 @@ static int jnode_truncate_ok(struct inod static int jnodes_truncate_ok(struct inode *inode, cloff_t start) { int result; - jnode *node; + jnode *node = NULL; reiser4_inode *info = reiser4_inode_data(inode); reiser4_tree *tree = tree_by_inode(inode); - RLOCK_TREE(tree); + read_lock_tree(tree); result = radix_tree_gang_lookup(jnode_tree_by_reiser4_inode(info), (void **)&node, clust_to_pg(start, inode), 1); - RUNLOCK_TREE(tree); + read_unlock_tree(tree); if (result) warning("edward-1332", "Untruncated jnode %p\n", node); return !result; @@ -2166,14 +2413,14 @@ void truncate_page_cluster(struct inode found = find_get_pages(inode->i_mapping, clust_to_pg(index, inode), nr_pages, pages); - LOCK_JNODE(node); - if (jnode_is_dirty(node)) { + spin_lock_jnode(node); + if (JF_ISSET(node, JNODE_DIRTY)) { /* jnode is dirty => space for disk cluster conversion grabbed */ - cluster_reserved2grabbed(estimate_insert_cluster(inode, 0)); + cluster_reserved2grabbed(estimate_update_cluster(inode)); grabbed2free(get_current_context(), get_current_super_private(), - estimate_insert_cluster(inode, 0)); + estimate_update_cluster(inode)); assert("edward-1198", found == nr_pages); /* This will clear dirty bit so concurrent flush @@ -2187,7 +2434,7 @@ void truncate_page_cluster(struct inode page_cache_release(pages[i]); } } else - UNLOCK_JNODE(node); + spin_unlock_jnode(node); /* now drop pages and jnode */ /* FIXME-EDWARD: Use truncate_complete_page in the loop above instead */ @@ -2242,8 +2489,8 @@ prepare_cluster(struct inode *inode, if (result) { free_reserved4cluster(inode, clust, - estimate_insert_cluster(inode, 0) + - estimate_insert_cluster(inode, 1)); + estimate_update_cluster(inode) + + estimate_insert_cluster(inode)); goto err1; } assert("edward-1124", clust->dstat != INVAL_DISK_CLUSTER); @@ -2258,7 +2505,8 @@ prepare_cluster(struct inode *inode, } return 0; err2: - free_reserved4cluster(inode, clust, estimate_insert_cluster(inode, 0)); + free_reserved4cluster(inode, clust, + estimate_update_cluster(inode)); err1: page_cache_release(clust->pages[0]); release_cluster_pages_and_jnode(clust); @@ -2286,8 +2534,8 @@ set_window(reiser4_cluster_t * clust, re } static int -set_cluster_params(struct inode *inode, reiser4_cluster_t * clust, - reiser4_slide_t * win, flow_t * f, loff_t file_off) +set_cluster_by_window(struct inode *inode, reiser4_cluster_t * clust, + reiser4_slide_t * win, flow_t * f, loff_t file_off) { int result; @@ -2318,6 +2566,23 @@ set_cluster_params(struct inode *inode, return 0; } +int set_cluster_by_page(reiser4_cluster_t * clust, struct page * page, + int count) +{ + int result = 0; + int (*setting_actor)(reiser4_cluster_t * clust, int count); + + assert("edward-1358", clust != NULL); + assert("edward-1359", page != NULL); + assert("edward-1360", page->mapping != NULL); + assert("edward-1361", page->mapping->host != NULL); + + setting_actor = (clust->pages ? reset_cluster_pgset : alloc_cluster_pgset); + result = setting_actor(clust, count); + clust->index = pg_to_clust(page->index, page->mapping->host); + return result; +} + /* reset all the params that not get updated */ void reset_cluster_params(reiser4_cluster_t * clust) { @@ -2375,10 +2640,10 @@ write_cryptcompress_flow(struct file *fi /* current write position in file */ file_off = pos; reiser4_slide_init(&win); - reiser4_cluster_init(&clust, &win); + cluster_init_read(&clust, &win); clust.hint = hint; - result = set_cluster_params(inode, &clust, &win, &f, file_off); + result = set_cluster_by_window(inode, &clust, &win, &f, file_off); if (result) goto out; @@ -2471,16 +2736,15 @@ write_cryptcompress_flow(struct file *fi if (clust.reserved) free_reserved4cluster(inode, &clust, - estimate_insert_cluster(inode, - 0)); + estimate_update_cluster(inode)); break; } while (f.length); out: done_lh(&hint->lh); if (result == -EEXIST) - printk("write returns EEXIST!\n"); + warning("edward-1407", "write returns EEXIST!\n"); - put_cluster_handle(&clust, TFM_READ); + put_cluster_handle(&clust); save_file_hint(file, hint); kfree(hint); if (buf) { @@ -2551,14 +2815,22 @@ ssize_t write_cryptcompress(struct file { ssize_t result; struct inode *inode; + reiser4_context *ctx; inode = file->f_dentry->d_inode; + ctx = init_context(inode->i_sb); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + down(&inode->i_sem); result = write_crc_file(file, inode, buf, count, off); up(&inode->i_sem); + + context_set_commit_async(ctx); + reiser4_exit_context(ctx); return result; } @@ -2605,6 +2877,7 @@ ssize_t read_cryptcompress(struct file * { ssize_t result; struct inode *inode; + reiser4_context *ctx; reiser4_file_fsdata *fsdata; cryptcompress_info_t *info; reiser4_block_nr needed; @@ -2612,15 +2885,20 @@ ssize_t read_cryptcompress(struct file * inode = file->f_dentry->d_inode; assert("edward-1194", !inode_get_flag(inode, REISER4_NO_SD)); + ctx = init_context(inode->i_sb); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + info = cryptcompress_inode_data(inode); needed = cryptcompress_estimate_read(inode); + /* FIXME-EDWARD: Grab space for sd_update so find_cluster will be happy */ -#if 0 result = reiser4_grab_space(needed, BA_CAN_COMMIT); - if (result != 0) + if (result != 0) { + reiser4_exit_context(ctx); return result; -#endif + } fsdata = reiser4_get_file_fsdata(file); fsdata->ra2.data = file; fsdata->ra2.readpages = readpages_crc; @@ -2633,6 +2911,9 @@ ssize_t read_cryptcompress(struct file * up_read(&info->lock); LOCK_CNT_DEC(inode_sem_r); + context_set_commit_async(ctx); + reiser4_exit_context(ctx); + return result; } @@ -2893,7 +3174,7 @@ cryptcompress_append_hole(struct inode * lh = &hint->lh; reiser4_slide_init(&win); - reiser4_cluster_init(&clust, &win); + cluster_init_read(&clust, &win); clust.hint = hint; /* set cluster handle */ @@ -2931,7 +3212,7 @@ cryptcompress_append_hole(struct inode * out: done_lh(lh); kfree(hint); - put_cluster_handle(&clust, TFM_READ); + put_cluster_handle(&clust); return result; } @@ -3017,7 +3298,7 @@ prune_cryptcompress(struct inode *inode, lh = &hint->lh; reiser4_slide_init(&win); - reiser4_cluster_init(&clust, &win); + cluster_init_read(&clust, &win); clust.hint = hint; /* rightmost completely truncated cluster */ @@ -3088,8 +3369,8 @@ prune_cryptcompress(struct inode *inode, assert("edward-1335", jnodes_truncate_ok(inode, count_to_nrclust(new_size, inode))); done_lh(lh); - kfree(lh); - put_cluster_handle(&clust, TFM_READ); + kfree(hint); + put_cluster_handle(&clust); return result; } @@ -3166,7 +3447,7 @@ static int cryptcompress_truncate(struct /* page cluser is anonymous if it contains at least one anonymous page */ static int -capture_anonymous_cluster(reiser4_cluster_t * clust, struct inode *inode) +capture_page_cluster(reiser4_cluster_t * clust, struct inode *inode) { int result; @@ -3186,7 +3467,7 @@ capture_anonymous_cluster(reiser4_cluste return result; } -#define MAX_CLUSTERS_TO_CAPTURE(inode) (1024 >> inode_cluster_shift(inode)) +#define MAX_CLUSTERS_TO_CAPTURE(inode) (1024 >> cluster_nrpages_shift(inode)) /* read lock should be acquired */ static int @@ -3210,7 +3491,7 @@ capture_anonymous_clusters(struct addres hint_init_zero(hint); lh = &hint->lh; - reiser4_cluster_init(&clust, NULL); + cluster_init_read(&clust, NULL); clust.hint = hint; result = alloc_cluster_pgset(&clust, cluster_nrpages(mapping->host)); @@ -3229,7 +3510,7 @@ capture_anonymous_clusters(struct addres move_cluster_forward(&clust, mapping->host, page->index, &progress); - result = capture_anonymous_cluster(&clust, mapping->host); + result = capture_page_cluster(&clust, mapping->host); page_cache_release(page); if (result) break; @@ -3252,7 +3533,7 @@ capture_anonymous_clusters(struct addres out: done_lh(lh); kfree(hint); - put_cluster_handle(&clust, TFM_READ); + put_cluster_handle(&clust); return result; } @@ -3347,6 +3628,45 @@ writepages_cryptcompress(struct address_ return result; } +int capturepage_cryptcompress(struct page * page) { + int result = 0; + assert("edward-1350", PageLocked(page)); + assert("edward-1351", page->mapping != NULL); + assert("edward-1352", page->mapping->host != NULL); + if (PagePrivate(page) && JF_ISSET(jnode_by_page(page), JNODE_DIRTY)) { + assert("edward-1353", PageDirty(page)); + return 0; + } + else { + hint_t hint; + lock_handle lh; + reiser4_cluster_t clust; + + init_lh(&lh); + hint_init_zero(&hint); + hint.ext_coord.lh = &lh; + cluster_init_read(&clust, 0); + clust.hint = &hint; + + page_cache_get(page); + unlock_page(page); + + result = set_cluster_by_page(&clust, + page, + cluster_nrpages(page->mapping->host)); + if (result) + goto out; + result = capture_page_cluster(&clust, page->mapping->host); + out: + done_lh(&lh); + put_cluster_handle(&clust); + + lock_page(page); + page_cache_release(page); + } + return result; +} + /* plugin->u.file.mmap */ int mmap_cryptcompress(struct file *file, struct vm_area_struct *vma) { @@ -3366,7 +3686,8 @@ sector_t bmap_cryptcompress(struct addre sector_t block; inode = mapping->host; - if (current_blocksize != inode_cluster_size(inode)) + if (off_to_cloff ((loff_t)block * current_blocksize, inode)) + /* mapping not cluster offsets is meaningless */ return RETERR(-EINVAL); else { int result; @@ -3419,8 +3740,8 @@ sector_t bmap_cryptcompress(struct addre } } -/* this is implementation of delete method of file plugin for cryptcompress - */ +/* this is implementation of delete method of file plugin for + cryptcompress objects */ int delete_cryptcompress(struct inode *inode) { int result; @@ -3475,10 +3796,15 @@ int setattr_cryptcompress(struct dentry /* truncate does reservation itself and requires exclusive access obtained */ if (inode->i_size != attr->ia_size) { + reiser4_context *ctx; loff_t old_size; cryptcompress_info_t *info = cryptcompress_inode_data(inode); + ctx = init_context(dentry->d_inode->i_sb); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + down_write(&info->lock); LOCK_CNT_INC(inode_sem_w); @@ -3499,6 +3825,8 @@ int setattr_cryptcompress(struct dentry } up_write(&info->lock); LOCK_CNT_DEC(inode_sem_w); + context_set_commit_async(ctx); + reiser4_exit_context(ctx); } else result = 0; } else diff -puN fs/reiser4/plugin/file/cryptcompress.h~reiser4-spinlock-cleanup fs/reiser4/plugin/file/cryptcompress.h --- devel/fs/reiser4/plugin/file/cryptcompress.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/file/cryptcompress.h 2006-02-16 14:17:05.000000000 -0800 @@ -5,16 +5,15 @@ #define __FS_REISER4_CRYPTCOMPRESS_H__ #include "../compress/compress.h" +#include "../../crypt.h" #include -#include #include #define MIN_CLUSTER_SIZE PAGE_CACHE_SIZE #define MIN_CLUSTER_SHIFT PAGE_CACHE_SHIFT #define MAX_CLUSTER_SHIFT 16 #define MAX_CLUSTER_NRPAGES (1U << MAX_CLUSTER_SHIFT >> PAGE_CACHE_SHIFT) -#define DEFAULT_CLUSTER_SHIFT 0 #define DC_CHECKSUM_SIZE 4 #define MIN_CRYPTO_BLOCKSIZE 8 @@ -25,15 +24,6 @@ static inline int cluster_shift_ok(int s } #endif -/* Set of transform id's supported by reiser4, - each transform is implemented by appropriate transform plugin: */ -typedef enum { - CRYPTO_TFM, /* crypto plugin */ - DIGEST_TFM, /* digest plugin */ - COMPRESS_TFM, /* compression plugin */ - LAST_TFM -} reiser4_tfm; - typedef struct tfm_stream { __u8 *data; size_t size; @@ -130,8 +120,10 @@ typedef enum { typedef struct tfm_cluster { coa_set coa; tfm_unit tun; + tfm_action act; int uptodate; - int len; + int lsize; /* size of the logical cluster */ + int len; /* length of the transform stream */ } tfm_cluster_t; static inline coa_t get_coa(tfm_cluster_t * tc, reiser4_compression_id id) @@ -146,18 +138,20 @@ set_coa(tfm_cluster_t * tc, reiser4_comp } static inline int -alloc_coa(tfm_cluster_t * tc, compression_plugin * cplug, tfm_action act) +alloc_coa(tfm_cluster_t * tc, compression_plugin * cplug) { coa_t coa; - coa = cplug->alloc(act); + assert("edward-1408", tc->act != TFM_INVAL); + + coa = cplug->alloc(tc->act); if (IS_ERR(coa)) return PTR_ERR(coa); set_coa(tc, cplug->h.id, coa); return 0; } -static inline void free_coa_set(tfm_cluster_t * tc, tfm_action act) +static inline void free_coa_set(tfm_cluster_t * tc) { reiser4_compression_id i; compression_plugin *cplug; @@ -167,9 +161,10 @@ static inline void free_coa_set(tfm_clus for (i = 0; i < LAST_COMPRESSION_ID; i++) { if (!get_coa(tc, i)) continue; + assert("edward-1409", tc->act != TFM_INVAL); cplug = compression_plugin_by_id(i); assert("edward-812", cplug->free != NULL); - cplug->free(get_coa(tc, i), act); + cplug->free(get_coa(tc, i), tc->act); set_coa(tc, i, 0); } return; @@ -246,10 +241,10 @@ static inline void free_tfm_unit(tfm_clu } } -static inline void put_tfm_cluster(tfm_cluster_t * tc, tfm_action act) +static inline void put_tfm_cluster(tfm_cluster_t * tc) { assert("edward-942", tc != NULL); - free_coa_set(tc, act); + free_coa_set(tc); free_tfm_unit(tc); } @@ -353,15 +348,27 @@ typedef struct reiser4_cluster { #endif } reiser4_cluster_t; -static inline void reset_cluster_pgset(reiser4_cluster_t * clust, int nrpages) +static inline __u8 * tfm_input_data (reiser4_cluster_t * clust) +{ + return tfm_stream_data(&clust->tc, INPUT_STREAM); +} + +static inline __u8 * tfm_output_data (reiser4_cluster_t * clust) +{ + return tfm_stream_data(&clust->tc, OUTPUT_STREAM); +} + +static inline int reset_cluster_pgset(reiser4_cluster_t * clust, int nrpages) { assert("edward-1057", clust->pages != NULL); memset(clust->pages, 0, sizeof(*clust->pages) * nrpages); + return 0; } static inline int alloc_cluster_pgset(reiser4_cluster_t * clust, int nrpages) { assert("edward-949", clust != NULL); + assert("edward-1362", clust->pages == NULL); assert("edward-950", nrpages != 0 && nrpages <= MAX_CLUSTER_NRPAGES); clust->pages = @@ -378,28 +385,32 @@ static inline void free_cluster_pgset(re kfree(clust->pages); } -static inline void put_cluster_handle(reiser4_cluster_t * clust, tfm_action act) +static inline void put_cluster_handle(reiser4_cluster_t * clust) { assert("edward-435", clust != NULL); - put_tfm_cluster(&clust->tc, act); + put_tfm_cluster(&clust->tc); if (clust->pages) free_cluster_pgset(clust); memset(clust, 0, sizeof *clust); } -/* security attributes supposed to be stored on disk - are loaded by stat-data methods (see plugin/item/static_stat.c */ -typedef struct crypto_stat { - __u8 *keyid; /* pointer to a fingerprint */ - __u16 keysize; /* key size, bits */ - __u32 *expkey; -} crypto_stat_t; +static inline void inc_keyload_count(crypto_stat_t * data) +{ + assert("edward-1410", data != NULL); + data->keyload_count++; +} + +static inline void dec_keyload_count(crypto_stat_t * data) +{ + assert("edward-1411", data != NULL); + assert("edward-1412", data->keyload_count > 0); + data->keyload_count--; +} /* cryptcompress specific part of reiser4_inode */ typedef struct cryptcompress_info { struct rw_semaphore lock; - struct crypto_tfm *tfm[LAST_TFM]; crypto_stat_t *crypt; } cryptcompress_info_t; @@ -409,31 +420,86 @@ int goto_right_neighbor(coord_t *, lock_ int load_file_hint(struct file *, hint_t *); void save_file_hint(struct file *, const hint_t *); void hint_init_zero(hint_t *); +int need_cipher (struct inode *); +int host_allows_crypto_stat(struct inode * inode); int crc_inode_ok(struct inode *inode); -extern int ctail_read_cluster (reiser4_cluster_t *, struct inode *, int); +int jnode_of_cluster(const jnode * node, struct page * page); +extern int ctail_read_disk_cluster (reiser4_cluster_t *, struct inode *, int); extern int do_readpage_ctail(reiser4_cluster_t *, struct page * page); -extern int ctail_insert_unprepped_cluster(reiser4_cluster_t * clust, struct inode * inode); +extern int ctail_insert_unprepped_cluster(reiser4_cluster_t * clust, + struct inode * inode); +int bind_cryptcompress(struct inode *child, struct inode *parent); +void destroy_inode_cryptcompress(struct inode * inode); +crypto_stat_t * inode_crypto_stat (struct inode * inode); +void inherit_crypto_stat_common(struct inode * parent, struct inode * object, + int (*can_inherit)(struct inode * child, + struct inode * parent)); +crypto_stat_t * create_crypto_stat(struct inode * parent, crypto_data_t * data); +int crypto_stat_instantiated(crypto_stat_t * info); +void attach_crypto_stat(struct inode * inode, crypto_stat_t * info); +void detach_crypto_stat(struct inode * inode); +void change_crypto_stat(struct inode * inode, crypto_stat_t * new); +int can_inherit_crypto_crc(struct inode *child, struct inode *parent); +crypto_stat_t * alloc_crypto_stat (struct inode * inode); + +static inline reiser4_tfma_t * +info_get_tfma (crypto_stat_t * info, reiser4_tfm id) +{ + return &info->tfma[id]; +} + +static inline struct crypto_tfm * +info_get_tfm (crypto_stat_t * info, reiser4_tfm id) +{ + return info_get_tfma(info, id)->tfm; +} + +static inline void +info_set_tfm (crypto_stat_t * info, reiser4_tfm id, struct crypto_tfm * tfm) +{ + info_get_tfma(info, id)->tfm = tfm; +} -static inline struct crypto_tfm *inode_get_tfm(struct inode *inode, - reiser4_tfm tfm) +static inline struct crypto_tfm * +info_cipher_tfm (crypto_stat_t * info) { - return cryptcompress_inode_data(inode)->tfm[tfm]; + return info_get_tfm(info, CIPHER_TFM); } -static inline struct crypto_tfm *inode_get_crypto(struct inode *inode) +static inline struct crypto_tfm * +info_digest_tfm (crypto_stat_t * info) { - return (inode_get_tfm(inode, CRYPTO_TFM)); + return info_get_tfm(info, DIGEST_TFM); } -static inline struct crypto_tfm *inode_get_digest(struct inode *inode) +static inline crypto_plugin * +info_cipher_plugin (crypto_stat_t * info) { - return (inode_get_tfm(inode, DIGEST_TFM)); + return &info_get_tfma(info, CIPHER_TFM)->plug->crypto; } -static inline unsigned int crypto_blocksize(struct inode *inode) +static inline digest_plugin * +info_digest_plugin (crypto_stat_t * info) { - assert("edward-758", inode_get_tfm(inode, CRYPTO_TFM) != NULL); - return crypto_tfm_alg_blocksize(inode_get_tfm(inode, CRYPTO_TFM)); + return &info_get_tfma(info, DIGEST_TFM)->plug->digest; +} + +static inline void +info_set_plugin(crypto_stat_t * info, reiser4_tfm id, reiser4_plugin * plugin) +{ + info_get_tfma(info, id)->plug = plugin; +} + +static inline void +info_set_crypto_plugin(crypto_stat_t * info, crypto_plugin * cplug) +{ + info_set_plugin(info, CIPHER_TFM, crypto_plugin_to_plugin(cplug)); +} + +static inline void +info_set_digest_plugin(crypto_stat_t * info, digest_plugin * plug) +{ + info_set_plugin(info, DIGEST_TFM, digest_plugin_to_plugin(plug)); } static inline compression_plugin *dual_compression_plugin(compression_plugin * @@ -442,31 +508,6 @@ static inline compression_plugin *dual_c return compression_plugin_by_id(cplug->dual); } -#define REGISTER_NONE_ALG(ALG, TFM) \ -static int alloc_none_ ## ALG (struct inode * inode) \ -{ \ - cryptcompress_info_t * info; \ - assert("edward-760", inode != NULL); \ - \ - info = cryptcompress_inode_data(inode); \ - \ - \ - cryptcompress_inode_data(inode)->tfm[TFM ## _TFM] = NULL; \ - return 0; \ - \ -} \ -static void free_none_ ## ALG (struct inode * inode) \ -{ \ - cryptcompress_info_t * info; \ - assert("edward-761", inode != NULL); \ - \ - info = cryptcompress_inode_data(inode); \ - \ - assert("edward-762", info != NULL); \ - \ - info->tfm[TFM ## _TFM] = NULL; \ -} - #endif /* __FS_REISER4_CRYPTCOMPRESS_H__ */ /* Make Linus happy. diff -puN fs/reiser4/plugin/file/file.c~reiser4-spinlock-cleanup fs/reiser4/plugin/file/file.c --- devel/fs/reiser4/plugin/file/file.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/file/file.c 2006-02-16 14:17:05.000000000 -0800 @@ -62,30 +62,46 @@ static void set_file_state_unknown(struc unix_file_inode_data(inode)->container = UF_CONTAINER_UNKNOWN; } -static int less_than_ldk(znode * node, const reiser4_key * key) +static int less_than_ldk(znode *node, const reiser4_key *key) { - return UNDER_RW(dk, current_tree, read, - keylt(key, znode_get_ld_key(node))); + int result; + + read_lock_dk(znode_get_tree(node)); + result = keylt(key, znode_get_ld_key(node)); + read_unlock_dk(znode_get_tree(node)); + return result; } -int equal_to_rdk(znode * node, const reiser4_key * key) +int equal_to_rdk(znode *node, const reiser4_key *key) { - return UNDER_RW(dk, current_tree, read, - keyeq(key, znode_get_rd_key(node))); + int result; + + read_lock_dk(znode_get_tree(node)); + result = keyeq(key, znode_get_rd_key(node)); + read_unlock_dk(znode_get_tree(node)); + return result; } #if REISER4_DEBUG static int less_than_rdk(znode * node, const reiser4_key * key) { - return UNDER_RW(dk, current_tree, read, - keylt(key, znode_get_rd_key(node))); + int result; + + read_lock_dk(znode_get_tree(node)); + result = keylt(key, znode_get_rd_key(node)); + read_unlock_dk(znode_get_tree(node)); + return result; } -static int equal_to_ldk(znode * node, const reiser4_key * key) +int equal_to_ldk(znode * node, const reiser4_key * key) { - return UNDER_RW(dk, current_tree, read, - keyeq(key, znode_get_ld_key(node))); + int result; + + read_lock_dk(znode_get_tree(node)); + result = keyeq(key, znode_get_ld_key(node)); + read_unlock_dk(znode_get_tree(node)); + return result; } /* get key of item next to one @coord is set to */ @@ -94,8 +110,9 @@ static reiser4_key *get_next_item_key(co { if (coord->item_pos == node_num_items(coord->node) - 1) { /* get key of next item if it is in right neighbor */ - UNDER_RW_VOID(dk, znode_get_tree(coord->node), read, - *next_key = *znode_get_rd_key(coord->node)); + read_lock_dk(znode_get_tree(coord->node)); + *next_key = *znode_get_rd_key(coord->node); + read_unlock_dk(znode_get_tree(coord->node)); } else { /* get key of next item if it is in the same node */ coord_t next; @@ -210,10 +227,7 @@ write_mode_t how_to_write(uf_coord_t * u * space, for example) and leaves empty leaf * lingering. Nothing prevents us from reusing it. */ - assert("vs-1000", UNDER_RW(dk, current_tree, read, - keylt(key, - znode_get_rd_key(coord-> - node)))); + assert("vs-1000", less_than_rdk(coord->node, key)); assert("vs-1002", coord->between == EMPTY_NODE); result = FIRST_ITEM; uf_coord->valid = 1; @@ -789,9 +803,10 @@ void save_file_hint(struct file *file, c { reiser4_file_fsdata *fsdata; + assert("edward-1337", hint != NULL); + if (!file || !seal_is_set(&hint->seal)) return; - fsdata = reiser4_get_file_fsdata(file); assert("vs-965", !IS_ERR(fsdata)); assert("nikita-19891", @@ -1323,14 +1338,20 @@ static int sync_page(struct page *page) lock_page(page); node = jprivate(page); - if (node != NULL) - atom = UNDER_SPIN(jnode, node, jnode_get_atom(node)); - else + if (node != NULL) { + spin_lock_jnode(node); + atom = jnode_get_atom(node); + spin_unlock_jnode(node); + } else atom = NULL; unlock_page(page); result = sync_atom(atom); } while (result == -E_REPEAT); -/* ZAM-FIXME-HANS: document the logic of this loop, is it just to handle the case where more pages get added to the atom while we are syncing it? */ + /* + * ZAM-FIXME-HANS: document the logic of this loop, is it just to + * handle the case where more pages get added to the atom while we are + * syncing it? + */ assert("nikita-3485", ergo(result == 0, get_current_context()->trans->atom == NULL)); return result; @@ -1484,6 +1505,7 @@ writepages_unix_file(struct address_spac uf_info = unix_file_inode_data(inode); do { reiser4_context *ctx; + int dont_get_nea; if (wbc->sync_mode != WB_SYNC_ALL) to_capture = min(wbc->nr_to_write, CAPTURE_APAGE_BURST); @@ -1498,23 +1520,36 @@ writepages_unix_file(struct address_spac /* avoid recursive calls to ->sync_inodes */ ctx->nobalance = 1; assert("zam-760", lock_stack_isclean(get_current_lock_stack())); - /* - * locking: creation of extent requires read-semaphore on - * file. _But_, this function can also be called in the - * context of write system call from - * balance_dirty_pages(). So, write keeps semaphore (possible - * in write mode) on file A, and this function tries to - * acquire semaphore on (possibly) different file B. A/B - * deadlock is on a way. To avoid this try-lock is used - * here. When invoked from sys_fsync() and sys_fdatasync(), - * this function is out of reiser4 context and may safely - * sleep on semaphore. - */ assert("", LOCK_CNT_NIL(inode_sem_w)); assert("", LOCK_CNT_NIL(inode_sem_r)); txn_restart_current(); - get_nonexclusive_access(uf_info, 0); + + /* + * suppose thread T1 has got nonexlusive access (NEA) on a file + * F, asked entd to flush to reclaim some memory and waits + * until entd completes. Another thread T2 tries to get + * exclusive access to file F. Then entd will deadlock on + * getting NEA to file F (because read-down request get blocked + * if there is write request in a queue in linux read-write + * semaphore implementation). To avoid this problem we make + * entd to not get NEA to F if it is obtained by T1. + */ + dont_get_nea = 0; + if (get_current_context()->entd) { + entd_context *ent = get_entd_context(inode->i_sb); + + if (ent->cur_request->caller != NULL && + mapping == ent->cur_request->caller->vp) + /* + * process which is waiting for entd has got + * NEA on a file we are about to capture pages + * of. Skip getting NEA therefore. + */ + dont_get_nea = 1; + } + if (dont_get_nea == 0) + get_nonexclusive_access(uf_info, 0); while (to_capture > 0) { pgoff_t start; @@ -1554,7 +1589,8 @@ writepages_unix_file(struct address_spac /* there may be left more pages */ __mark_inode_dirty(inode, I_DIRTY_PAGES); - drop_nonexclusive_access(uf_info); + if (dont_get_nea == 0) + drop_nonexclusive_access(uf_info); if (result < 0) { /* error happened */ reiser4_exit_context(ctx); @@ -1626,9 +1662,9 @@ int sync_unix_file(struct file *file, st node = jref(ZJNODE(coord.node)); done_lh(&lh); txn_restart_current(); - LOCK_JNODE(node); + spin_lock_jnode(node); atom = jnode_get_atom(node); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); result = sync_atom(atom); jput(node); } else @@ -2282,15 +2318,15 @@ static int check_pages_unix_file(struct return unpack(inode, 0 /* not forever */ ); } -/* implentation of vfs' mmap method of struct file_operations for unix file - plugin - - make sure that file is built of extent blocks. An estimation is in - tail2extent - - This sets inode flags: file has mapping. if file is mmaped with VM_MAYWRITE - - invalidate pages and convert. -*/ +/** + * mmap_unix_file - mmap of struct file_operations + * @file: file to mmap + * @vma: + * + * This is implementation of vfs's mmap method of struct file_operations for + * unix file plugin. It converts file to extent if necessary. Sets + * reiser4_inode's flag - REISER4_HAS_MMAP. + */ int mmap_unix_file(struct file *file, struct vm_area_struct *vma) { reiser4_context *ctx; @@ -2304,25 +2340,17 @@ int mmap_unix_file(struct file *file, st if (IS_ERR(ctx)) return PTR_ERR(ctx); - /* - * generic_file_mmap will do update_atime. Grab space for stat data - * update - */ - needed = inode_file_plugin(inode)->estimate.update(inode); - result = reiser4_grab_space(needed, BA_CAN_COMMIT); - if (result) { - reiser4_exit_context(ctx); - return result; - } - uf_info = unix_file_inode_data(inode); down(&uf_info->write); get_exclusive_access(uf_info); if (!IS_RDONLY(inode) && (vma->vm_flags & (VM_MAYWRITE | VM_SHARED))) { - /* we need file built of extent items. If it is still built of tail items we have to convert it. Find - what items the file is built of */ + /* + * we need file built of extent items. If it is still built of + * tail items we have to convert it. Find what items the file + * is built of + */ result = finish_conversion(inode); if (result) { drop_exclusive_access(uf_info); @@ -2343,7 +2371,10 @@ int mmap_unix_file(struct file *file, st uf_info->container == UF_CONTAINER_EXTENTS || uf_info->container == UF_CONTAINER_EMPTY)); if (uf_info->container == UF_CONTAINER_TAILS) { - /* invalidate all pages and convert file from tails to extents */ + /* + * invalidate all pages and convert file from tails to + * extents + */ result = check_pages_unix_file(inode); if (result) { drop_exclusive_access(uf_info); @@ -2354,6 +2385,19 @@ int mmap_unix_file(struct file *file, st } } + /* + * generic_file_mmap will do update_atime. Grab space for stat data + * update. + */ + needed = inode_file_plugin(inode)->estimate.update(inode); + result = reiser4_grab_space_force(needed, BA_CAN_COMMIT); + if (result) { + drop_exclusive_access(uf_info); + up(&uf_info->write); + reiser4_exit_context(ctx); + return result; + } + result = generic_file_mmap(file, vma); if (result == 0) { /* mark file as having mapping. */ @@ -2614,9 +2658,15 @@ ssize_t write_unix_file(struct file *fil return count ? count : result; } -/* this is implementation of vfs's release method of struct - file_operations for unix file plugin - convert all extent items into tail items if necessary */ +/** + * release_unix_file - release of struct file_operations + * @inode: inode of released file + * @file: file to release + * + * Implementation of release method of struct file_operations for unix file + * plugin. If last reference to indode is released - convert all extent items + * into tail items if necessary. Frees reiser4 specific file data. + */ int release_unix_file(struct inode *inode, struct file *file) { reiser4_context *ctx; @@ -2975,8 +3025,14 @@ int delete_object_unix_file(struct inode unix_file_info_t *uf_info; int result; - assert("", (get_current_context() && - get_current_context()->trans->atom == NULL)); + /* + * transaction can be open already. For example: + * writeback_inodes->sync_sb_inodes->reiser4_sync_inodes-> + * generic_sync_sb_inodes->iput->generic_drop_inode-> + * generic_delete_inode->reiser4_delete_inode->delete_object_unix_file. + * So, restart transaction to avoid deadlock with file rw semaphore. + */ + txn_restart_current(); if (inode_get_flag(inode, REISER4_NO_SD)) return 0; diff -puN fs/reiser4/plugin/file/funcs.h~reiser4-spinlock-cleanup fs/reiser4/plugin/file/funcs.h --- devel/fs/reiser4/plugin/file/funcs.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/file/funcs.h 2006-02-16 14:17:05.000000000 -0800 @@ -1,6 +1,6 @@ /* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by reiser4/README */ -/* this prototyles functions used by both file.c and tail_conversion.c */ +/* this prototypes functions used by both file.c and tail_conversion.c */ void get_exclusive_access(unix_file_info_t *); void drop_exclusive_access(unix_file_info_t *); void get_nonexclusive_access(unix_file_info_t *, int); @@ -17,6 +17,9 @@ int find_file_item_nohint(coord_t *, loc int goto_right_neighbor(coord_t *, lock_handle *); int find_or_create_extent(struct page *); write_mode_t how_to_write(uf_coord_t *, const reiser4_key *); +#if REISER4_DEBUG +int equal_to_ldk(znode *, const reiser4_key *); +#endif extern inline int cbk_errored(int cbk_result) { diff -puN fs/reiser4/plugin/file_ops.c~reiser4-spinlock-cleanup fs/reiser4/plugin/file_ops.c --- devel/fs/reiser4/plugin/file_ops.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/file_ops.c 2006-02-16 14:17:05.000000000 -0800 @@ -20,12 +20,23 @@ loff_t llseek_common_dir(struct file *, */ int readdir_common(struct file *, void *dirent, filldir_t); -/* this is implementation of vfs's release method of struct file_operations for - typical directory +/** + * release_dir_common - release of struct file_operations + * @inode: inode of released file + * @file: file to release + * + * Implementation of release method of struct file_operations for typical + * directory. All it does is freeing of reiser4 specific file data. */ int release_dir_common(struct inode *inode, struct file *file) { + reiser4_context *ctx; + + ctx = init_context(inode->i_sb); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); reiser4_free_file_fsdata(file); + reiser4_exit_context(ctx); return 0; } diff -puN fs/reiser4/plugin/file_plugin_common.c~reiser4-spinlock-cleanup fs/reiser4/plugin/file_plugin_common.c --- devel/fs/reiser4/plugin/file_plugin_common.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/file_plugin_common.c 2006-02-16 14:17:05.000000000 -0800 @@ -144,6 +144,25 @@ int adjust_to_parent_common_dir(struct i return result; } +int adjust_to_parent_cryptcompress(struct inode *object /* new object */ , + struct inode *parent /* parent directory */, + struct inode *root /* root directory */) +{ + int result; + result = adjust_to_parent_common(object, parent, root); + if (result) + return result; + assert("edward-1416", parent != NULL); + + grab_plugin(object, parent, PSET_CLUSTER); + grab_plugin(object, parent, PSET_CRYPTO); + grab_plugin(object, parent, PSET_DIGEST); + grab_plugin(object, parent, PSET_COMPRESSION); + grab_plugin(object, parent, PSET_COMPRESSION_MODE); + + return 0; +} + /* this is common implementation of create_object method of file plugin */ int diff -puN fs/reiser4/plugin/file/tail_conversion.c~reiser4-spinlock-cleanup fs/reiser4/plugin/file/tail_conversion.c --- devel/fs/reiser4/plugin/file/tail_conversion.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/file/tail_conversion.c 2006-02-16 14:17:05.000000000 -0800 @@ -57,11 +57,19 @@ void get_nonexclusive_access(unix_file_i get_current_context()->trans->atom == NULL)); BUG_ON(atom_may_exist == 0 && get_current_context()->trans->atom != NULL); + assert("", get_current_context()->vp == NULL); + down_read(&uf_info->latch); + /* + * this is to avoid rwsem deadlock on ent thread. See comment in + * writepages_unix_file + */ + get_current_context()->vp = unix_file_info_to_inode(uf_info)->i_mapping; + LOCK_CNT_INC(inode_sem_r); assert("vs-1716", uf_info->ea_owner == NULL); - ON_DEBUG(atomic_inc(&uf_info->nr_neas)); #if REISER4_DEBUG + atomic_inc(&uf_info->nr_neas); uf_info->last_reader = current; #endif } @@ -71,7 +79,10 @@ void drop_nonexclusive_access(unix_file_ assert("vs-1718", uf_info->ea_owner == NULL); assert("vs-1719", atomic_read(&uf_info->nr_neas) > 0); ON_DEBUG(atomic_dec(&uf_info->nr_neas)); + + get_current_context()->vp = NULL; up_read(&uf_info->latch); + LOCK_CNT_DEC(inode_sem_r); } diff -puN fs/reiser4/plugin/item/ctail.c~reiser4-spinlock-cleanup fs/reiser4/plugin/item/ctail.c --- devel/fs/reiser4/plugin/item/ctail.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/item/ctail.c 2006-02-16 14:17:05.000000000 -0800 @@ -402,12 +402,8 @@ static int ctail_convertible(const coord NULL) << cluster_shift_by_coord(coord)); if (!child) return 0; - LOCK_JNODE(child); - if (jnode_is_dirty(child)) - result = 1; - else - result = 0; - UNLOCK_JNODE(child); + /* NOTE-Edward: jnode spin lock is removed here: test_bit is atomic */ + result = JF_ISSET(child, JNODE_DIRTY); jput(child); return result; } @@ -538,8 +534,8 @@ int read_ctail(struct file *file UNUSED_ /* Reads a disk cluster consists of ctail items, attaches a transform stream with plain text */ -int -ctail_read_cluster(reiser4_cluster_t * clust, struct inode *inode, int write) +int ctail_read_disk_cluster(reiser4_cluster_t * clust, struct inode *inode, + int write) { int result; compression_plugin *cplug; @@ -552,19 +548,22 @@ ctail_read_cluster(reiser4_cluster_t * c assert("edward-672", crc_inode_ok(inode)); /* set input stream */ - result = grab_tfm_stream(inode, &clust->tc, TFM_READ, INPUT_STREAM); + result = grab_tfm_stream(inode, &clust->tc, INPUT_STREAM); if (result) return result; result = find_cluster(clust, inode, 1 /* read */ , write); - if (cbk_errored(result)) + assert("edward-1340", !result); + if (result) return result; if (!write) - set_hint_cluster(inode, clust->hint, - clust->index + 1, ZNODE_READ_LOCK); + /* write still need the lock to insert unprepped + items, etc... */ + put_hint_cluster(clust, inode, ZNODE_READ_LOCK); - assert("edward-673", znode_is_any_locked(clust->hint->lh.node)); + assert("edward-673", + ergo(write, znode_is_write_locked(clust->hint->lh.node))); if (clust->dstat == FAKE_DISK_CLUSTER || clust->dstat == UNPR_DISK_CLUSTER) { @@ -573,7 +572,7 @@ ctail_read_cluster(reiser4_cluster_t * c } cplug = inode_compression_plugin(inode); if (cplug->alloc && !get_coa(&clust->tc, cplug->h.id)) { - result = alloc_coa(&clust->tc, cplug, TFM_READ); + result = alloc_coa(&clust->tc, cplug); if (result) return result; } @@ -604,7 +603,7 @@ int do_readpage_ctail(reiser4_cluster_t if (!tfm_cluster_is_uptodate(&clust->tc)) { clust->index = pg_to_clust(page->index, inode); unlock_page(page); - ret = ctail_read_cluster(clust, inode, 0 /* read only */ ); + ret = ctail_read_disk_cluster(clust, inode, 0 /* read */ ); lock_page(page); if (ret) return ret; @@ -695,18 +694,17 @@ int readpage_ctail(void *vp, struct page ergo(!result, tfm_cluster_is_uptodate(&clust->tc))); unlock_page(page); - + done_lh(&hint->lh); hint->ext_coord.valid = 0; save_file_hint(clust->file, hint); - done_lh(&hint->lh); kfree(hint); tfm_cluster_clr_uptodate(&clust->tc); return result; } -/* Unconditionally reads a disk cluster. - This is used by ->readpages() */ +/* This unconditionally reads a disk cluster. + Helper function for ->readpages() */ static int ctail_read_page_cluster(reiser4_cluster_t * clust, struct inode *inode) { @@ -719,10 +717,10 @@ ctail_read_page_cluster(reiser4_cluster_ result = prepare_page_cluster(inode, clust, 0 /* do not capture */ ); if (result) return result; - result = ctail_read_cluster(clust, inode, 0 /* read */ ); + result = ctail_read_disk_cluster(clust, inode, 0 /* read */ ); if (result) goto out; - /* stream is attached at this point */ + /* at this point stream with valid plain text is attached */ assert("edward-781", tfm_cluster_is_uptodate(&clust->tc)); for (i = 0; i < clust->nr_pages; i++) { @@ -736,8 +734,6 @@ ctail_read_page_cluster(reiser4_cluster_ tfm_cluster_clr_uptodate(&clust->tc); out: release_cluster_pages_nocapture(clust); - assert("edward-1060", !result); - return result; } @@ -750,11 +746,10 @@ assert("edward-214", ergo(!list_empty(pa list_to_page(pages)->index < list_to_next_page(pages)->index)) #endif -/* plugin->s.file.writepage */ - /* plugin->u.item.s.file.readpages - populate an address space with page clusters, and start reads against them. - FIXME_EDWARD: this function should return errors + Populate an address space with some page clusters, + and start reads against them. + FIXME-EDWARD: this function should return errors? */ void readpages_ctail(void *vp, struct address_space *mapping, @@ -773,21 +768,20 @@ readpages_ctail(void *vp, struct address list_to_page(pages)->index < list_to_next_page(pages)->index)); pagevec_init(&lru_pvec, 0); - reiser4_cluster_init(&clust, NULL); + cluster_init_read(&clust, NULL); clust.file = vp; hint = kmalloc(sizeof(*hint), GFP_KERNEL); if (hint == NULL) { warning("vs-28", "failed to allocate hint"); - return; + goto exit1; } clust.hint = hint; - - ret = alloc_cluster_pgset(&clust, cluster_nrpages(inode)); - if (ret) - goto out; ret = load_file_hint(clust.file, hint); if (ret) - goto out; + goto exit2; + ret = alloc_cluster_pgset(&clust, cluster_nrpages(inode)); + if (ret) + goto exit3; assert("vs-26", hint->ext_coord.lh == &hint->lh); /* address_space-level file readahead doesn't know about @@ -811,24 +805,16 @@ readpages_ctail(void *vp, struct address move_cluster_forward(&clust, inode, page->index, &progress); ret = ctail_read_page_cluster(&clust, inode); if (ret) - goto exit; + break; assert("edward-869", !tfm_cluster_is_uptodate(&clust.tc)); - lock_page(page); + ret = do_readpage_ctail(&clust, page); if (!pagevec_add(&lru_pvec, page)) __pagevec_lru_add(&lru_pvec); if (ret) { warning("edward-215", "do_readpage_ctail failed"); unlock_page(page); - exit: - while (!list_empty(pages)) { - struct page *victim; - - victim = list_to_page(pages); - list_del(&victim->lru); - page_cache_release(victim); - } break; } assert("edward-1061", PageUptodate(page)); @@ -836,12 +822,20 @@ readpages_ctail(void *vp, struct address unlock_page(page); } assert("edward-870", !tfm_cluster_is_uptodate(&clust.tc)); - save_file_hint(clust.file, hint); - out: + exit3: done_lh(&hint->lh); + save_file_hint(clust.file, hint); hint->ext_coord.valid = 0; + exit2: kfree(hint); - put_cluster_handle(&clust, TFM_READ); + exit1: + while (!list_empty(pages)) { + struct page *victim; + victim = list_to_page(pages); + list_del(&victim->lru); + page_cache_release(victim); + } + put_cluster_handle(&clust); pagevec_lru_add(&lru_pvec); return; } @@ -969,7 +963,7 @@ insert_crc_flow_in_place(coord_t * coord ret = insert_crc_flow(&pos, &lock, f, inode); done_lh(&lock); - + assert("edward-1347", znode_is_write_locked(lh->node)); assert("edward-1228", !ret); return ret; } @@ -1022,13 +1016,12 @@ int ctail_insert_unprepped_cluster(reiser4_cluster_t * clust, struct inode *inode) { int result; - assert("edward-1244", inode != NULL); assert("edward-1245", clust->hint != NULL); assert("edward-1246", clust->dstat == FAKE_DISK_CLUSTER); assert("edward-1247", clust->reserved == 1); assert("edward-1248", get_current_context()->grabbed_blocks == - estimate_insert_cluster(inode, 1)); + estimate_insert_cluster(inode)); result = get_disk_cluster_locked(clust, inode, ZNODE_WRITE_LOCK); if (cbk_errored(result)) @@ -1110,22 +1103,20 @@ int scan_ctail(flush_scan * scan) if (!scanning_left(scan)) return result; - if (!znode_is_dirty(scan->parent_lock.node)) + if (!ZF_ISSET(scan->parent_lock.node, JNODE_DIRTY)) znode_make_dirty(scan->parent_lock.node); if (!znode_convertible(scan->parent_lock.node)) { - LOCK_JNODE(scan->node); - if (jnode_is_dirty(scan->node)) { + /* NOTE-Edward: jnode spinlock is removed. test_bit is atomic */ + if (JF_ISSET(scan->node, JNODE_DIRTY)) { warning("edward-873", "child is dirty but parent not squeezable"); znode_set_convertible(scan->parent_lock.node); } else { warning("edward-681", "cluster page is already processed"); - UNLOCK_JNODE(scan->node); return -EAGAIN; } - UNLOCK_JNODE(scan->node); } return result; } @@ -1146,10 +1137,10 @@ static int should_attach_convert_idata(f if (!pos->child) return 0; - LOCK_JNODE(pos->child); - result = jnode_is_dirty(pos->child) && - pos->child->atom == ZJNODE(pos->coord.node)->atom; - UNLOCK_JNODE(pos->child); + spin_lock_jnode(pos->child); + result = (JF_ISSET(pos->child, JNODE_DIRTY) && + pos->child->atom == ZJNODE(pos->coord.node)->atom); + spin_unlock_jnode(pos->child); if (!result && pos->child) { /* existing child isn't to attach, clear up this one */ jput(pos->child); @@ -1203,6 +1194,7 @@ static int alloc_convert_data(flush_pos_ if (!pos->sq) return RETERR(-ENOMEM); memset(pos->sq, 0, sizeof(*pos->sq)); + cluster_init_write(&pos->sq->clust, 0); return 0; } @@ -1216,7 +1208,7 @@ void free_convert_data(flush_pos_t * pos sq = pos->sq; if (sq->itm) free_item_convert_data(sq); - put_cluster_handle(&sq->clust, TFM_WRITE); + put_cluster_handle(&sq->clust); kfree(pos->sq); pos->sq = NULL; return; @@ -1265,18 +1257,15 @@ static int attach_convert_idata(flush_po } clust = &pos->sq->clust; if (cplug->alloc && !get_coa(&clust->tc, cplug->h.id)) { - ret = alloc_coa(&clust->tc, cplug, TFM_WRITE); - if (ret) - goto err; - } - - if (convert_data(pos)->clust.pages == NULL) { - ret = alloc_cluster_pgset(&convert_data(pos)->clust, - MAX_CLUSTER_NRPAGES); + ret = alloc_coa(&clust->tc, cplug); if (ret) goto err; } - reset_cluster_pgset(&convert_data(pos)->clust, MAX_CLUSTER_NRPAGES); + ret = set_cluster_by_page(clust, + jnode_page(pos->child), + MAX_CLUSTER_NRPAGES); + if (ret) + goto err; assert("edward-829", pos->sq != NULL); assert("edward-250", item_convert_data(pos) == NULL); @@ -1291,8 +1280,6 @@ static int attach_convert_idata(flush_po goto err; info = item_convert_data(pos); - clust->index = pg_to_clust(jnode_page(pos->child)->index, inode); - ret = flush_cluster_pages(clust, pos->child, inode); if (ret) goto err; @@ -1436,7 +1423,7 @@ static int next_item_dc_stat(flush_pos_t item_convert_data(pos)->d_next = DC_CHAINED_ITEM; - if (!znode_is_dirty(lh.node)) { + if (!ZF_ISSET(lh.node, JNODE_DIRTY)) { /* warning("edward-1024", "next slum item mergeable, " diff -puN fs/reiser4/plugin/item/extent_file_ops.c~reiser4-spinlock-cleanup fs/reiser4/plugin/item/extent_file_ops.c --- devel/fs/reiser4/plugin/item/extent_file_ops.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/item/extent_file_ops.c 2006-02-16 14:17:05.000000000 -0800 @@ -856,11 +856,11 @@ static int extent_write_flow(struct inod process because page attached to jnode is locked */ - LOCK_JNODE(j); + spin_lock_jnode(j); assign_jnode_blocknr(j, h->blocknr, h->created); blocknr_set = 1; - UNLOCK_JNODE(j); + spin_unlock_jnode(j); } result = page_io(page, j, READ, GFP_KERNEL); @@ -875,12 +875,12 @@ static int extent_write_flow(struct inod } /* assign blocknr to jnode if it is not assigned yet */ - LOCK_JNODE(j); + spin_lock_jnode(j); eflush_del(j, 1); if (blocknr_set == 0) assign_jnode_blocknr(j, h->blocknr, h->created); - UNLOCK_JNODE(j); + spin_unlock_jnode(j); } else { /* new page added to the file. No need to carry about data it might contain. Zero content of @@ -890,21 +890,22 @@ static int extent_write_flow(struct inod /* assign blocknr to jnode if it is not assigned yet */ - LOCK_JNODE(j); + spin_lock_jnode(j); assign_jnode_blocknr(j, h->blocknr, h->created); - UNLOCK_JNODE(j); + spin_unlock_jnode(j); } } else { - LOCK_JNODE(j); + spin_lock_jnode(j); eflush_del(j, 1); assign_jnode_blocknr(j, h->blocknr, h->created); - UNLOCK_JNODE(j); + spin_unlock_jnode(j); } - - assert("vs-1503", - UNDER_SPIN(jnode, j, - (!JF_ISSET(j, JNODE_EFLUSH) - && jnode_page(j) == page))); +#if REISER4_DEBUG + spin_lock_jnode(j); + assert("vs-1503", (!JF_ISSET(j, JNODE_EFLUSH) && + jnode_page(j) == page)); + spin_unlock_jnode(j); +#endif assert("nikita-3033", schedulable()); /* copy user data into page */ @@ -920,7 +921,7 @@ static int extent_write_flow(struct inod goto exit3; } - set_page_dirty_internal(page, 0); + set_page_dirty_internal(page); SetPageUptodate(page); if (!PageReferenced(page)) SetPageReferenced(page); @@ -930,16 +931,16 @@ static int extent_write_flow(struct inod gets into clean list in try_capture and then in jnode_mark_dirty gets moved to dirty list. So, it would be more optimal to put jnode directly to dirty list */ - LOCK_JNODE(j); + spin_lock_jnode(j); result = try_capture(j, ZNODE_WRITE_LOCK, 0, 1 /* can_coc */ ); if (result) { - UNLOCK_JNODE(j); + spin_unlock_jnode(j); page_cache_release(page); goto exit2; } jnode_make_dirty_locked(j); JF_CLR(j, JNODE_KEEPME); - UNLOCK_JNODE(j); + spin_unlock_jnode(j); page_cache_release(page); jput(j); @@ -1104,7 +1105,7 @@ do_readpage_extent(reiser4_extent * ext, zero_page(page); return 0; } - LOCK_JNODE(j); + spin_lock_jnode(j); if (!jnode_page(j)) { jnode_attach_page(j, page); } else { @@ -1112,7 +1113,7 @@ do_readpage_extent(reiser4_extent * ext, assert("vs-1504", jnode_page(j) == page); } - UNLOCK_JNODE(j); + spin_unlock_jnode(j); break; case ALLOCATED_EXTENT: @@ -1134,7 +1135,9 @@ do_readpage_extent(reiser4_extent * ext, assert("nikita-2688", j); assert("vs-1426", jnode_page(j) == NULL); - UNDER_SPIN_VOID(jnode, j, jnode_attach_page(j, page)); + spin_lock_jnode(j); + jnode_attach_page(j, page); + spin_unlock_jnode(j); /* page is locked, it is safe to check JNODE_EFLUSH */ assert("vs-1668", JF_ISSET(j, JNODE_EFLUSH)); @@ -1610,12 +1613,11 @@ capture_extent(reiser4_key *key, uf_coor done_lh(uf_coord->lh); return PTR_ERR(j); } - UNDER_SPIN_VOID(jnode, j, eflush_del(j, 1)); + spin_lock_jnode(j); + eflush_del(j, 1); unlock_page(page); - LOCK_JNODE(j); - BUG_ON(JF_ISSET(j, JNODE_EFLUSH)); if (h->created) { /* extent corresponding to this jnode was just created */ @@ -1633,24 +1635,29 @@ capture_extent(reiser4_key *key, uf_coor assert("vs-1507", ergo(h->blocknr, *jnode_get_block(j) == h->blocknr)); } - UNLOCK_JNODE(j); + spin_unlock_jnode(j); done_lh(h->uf_coord->lh); - LOCK_JNODE(j); + spin_lock_jnode(j); result = try_capture(j, ZNODE_WRITE_LOCK, 0, 1 /* can_coc */ ); if (result != 0) reiser4_panic("nikita-3324", "Cannot capture jnode: %i", result); jnode_make_dirty_locked(j); JF_CLR(j, JNODE_KEEPME); - UNLOCK_JNODE(j); + spin_unlock_jnode(j); jput(j); if (h->created) reiser4_update_sd(page->mapping->host); - /* warning about failure of this is issued already */ + if (get_current_context()->entd) { + entd_context *ent = get_entd_context(j->tree->super); + + if (ent->cur_request->page == page) + ent->cur_request->node = j; + } kfree(h); return 0; } diff -puN fs/reiser4/plugin/item/extent_flush_ops.c~reiser4-spinlock-cleanup fs/reiser4/plugin/item/extent_flush_ops.c --- devel/fs/reiser4/plugin/item/extent_flush_ops.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/item/extent_flush_ops.c 2006-02-16 14:17:05.000000000 -0800 @@ -138,7 +138,7 @@ int scan_extent(flush_scan * scan) int ret = 0, allocated, incr; reiser4_tree *tree; - if (!jnode_check_dirty(scan->node)) { + if (!JF_ISSET(scan->node, JNODE_DIRTY)) { scan->stop = 1; return 0; /* Race with truncate, this node is already * truncated. */ @@ -439,9 +439,11 @@ unprotect_extent_nodes(flush_pos_t *flus do { count--; junprotect(node); - ON_DEBUG(LOCK_JNODE(node); - count_jnode(atom, node, PROTECT_LIST, DIRTY_LIST, 0); - UNLOCK_JNODE(node);); + ON_DEBUG( + spin_lock_jnode(node); + count_jnode(atom, node, PROTECT_LIST, DIRTY_LIST, 0); + spin_unlock_jnode(node); + ); if (count == 0) { break; } @@ -454,7 +456,7 @@ unprotect_extent_nodes(flush_pos_t *flus protected_list_split(protected_nodes, &unprotected_nodes, node); list_splice_init(&unprotected_nodes, ATOM_DIRTY_LIST(atom, LEAF_LEVEL)->prev); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } extern int getjevent(void); @@ -464,8 +466,8 @@ static void protect_reloc_node(struct li { assert("zam-836", !JF_ISSET(node, JNODE_EPROTECTED)); assert("vs-1216", jnode_is_unformatted(node)); - assert("vs-1477", spin_atom_is_locked(node->atom)); - assert("nikita-3390", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->atom->alock)); + assert_spin_locked(&(node->guard)); JF_SET(node, JNODE_EPROTECTED); list_del_init(&node->capture_link); @@ -517,31 +519,31 @@ protect_extent_nodes(flush_pos_t *flush_ break; } - LOCK_JNODE(node); + spin_lock_jnode(node); assert("vs-1476", atomic_read(&node->x_count) > 1); assert("nikita-3393", !JF_ISSET(node, JNODE_EPROTECTED)); if (JF_ISSET(node, JNODE_EFLUSH)) { if (eflushed == JNODES_TO_UNFLUSH) { - UNLOCK_JNODE(node); + spin_unlock_jnode(node); atomic_dec(&node->x_count); break; } buf[eflushed] = node; eflushed++; protect_reloc_node(protected_nodes, node); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); } else { assert("nikita-3384", node->atom == atom); protect_reloc_node(protected_nodes, node); assert("nikita-3383", !JF_ISSET(node, JNODE_EFLUSH)); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); atomic_dec(&node->x_count); } (*protected)++; } - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); /* start io for eflushed nodes */ for (j = 0; j < eflushed; ++j) @@ -554,7 +556,6 @@ protect_extent_nodes(flush_pos_t *flush_ if (result != 0) { warning("nikita-3179", "unflush failed: %i", result); - print_jnode("node", buf[j]); } } jput(buf[j]); @@ -706,7 +707,7 @@ assign_real_blocknrs(flush_pos_t *flush_ i = 0; list_for_each_entry(node, protected_nodes, capture_link) { - LOCK_JNODE(node); + spin_lock_jnode(node); assert("vs-1132", ergo(state == UNALLOCATED_EXTENT, blocknr_is_fake(jnode_get_block(node)))); @@ -720,7 +721,7 @@ assign_real_blocknrs(flush_pos_t *flush_ FQ_LIST, 0)); junprotect(node); assert("", NODE_LIST(node) == FQ_LIST); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); first++; i++; } @@ -730,7 +731,7 @@ assign_real_blocknrs(flush_pos_t *flush_ assert("vs-1687", count == i); if (state == UNALLOCATED_EXTENT) dec_unalloc_unfm_ptrs(count); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } /** @@ -744,7 +745,7 @@ assign_real_blocknrs(flush_pos_t *flush_ */ static void make_node_ovrwr(struct list_head *jnodes, jnode *node) { - LOCK_JNODE(node); + spin_lock_jnode(node); assert("zam-917", !JF_ISSET(node, JNODE_RELOC)); assert("zam-918", !JF_ISSET(node, JNODE_OVRWR)); @@ -754,7 +755,7 @@ static void make_node_ovrwr(struct list_ list_add_tail(&node->capture_link, jnodes); ON_DEBUG(count_jnode(node->atom, node, DIRTY_LIST, OVRWR_LIST, 0)); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); } /** @@ -799,7 +800,7 @@ static void mark_jnodes_overwrite(flush_ } list_splice_init(&jnodes, ATOM_OVRWR_LIST(atom)->prev); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } /* this is called by handle_pos_on_twig to proceed extent unit flush_pos->coord is set to. It is to prepare for flushing diff -puN fs/reiser4/plugin/item/extent_item_ops.c~reiser4-spinlock-cleanup fs/reiser4/plugin/item/extent_item_ops.c --- devel/fs/reiser4/plugin/item/extent_item_ops.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/item/extent_item_ops.c 2006-02-16 14:17:05.000000000 -0800 @@ -263,8 +263,8 @@ int create_hook_extent(const coord_t * c assert("nikita-3246", znode_get_level(child_coord->node) == LEAF_LEVEL); - WLOCK_TREE(tree); - WLOCK_DK(tree); + write_lock_tree(tree); + write_lock_dk(tree); /* find a node on the left level for which right delimiting key has to be updated */ if (coord_wrt(child_coord) == COORD_ON_THE_LEFT) { @@ -291,8 +291,8 @@ int create_hook_extent(const coord_t * c node->right = NULL; } } - WUNLOCK_DK(tree); - WUNLOCK_TREE(tree); + write_unlock_dk(tree); + write_unlock_tree(tree); return 0; } @@ -373,8 +373,8 @@ kill_hook_extent(const coord_t * coord, */ /* if neighbors of item being removed are znodes - * link them */ - WLOCK_TREE(tree); - WLOCK_DK(tree); + write_lock_tree(tree); + write_lock_dk(tree); link_left_and_right(left, right); if (left) { /* update right delimiting key of left @@ -390,8 +390,8 @@ kill_hook_extent(const coord_t * coord, item_key_by_coord(next, key); znode_set_rd_key(left, key); } - WUNLOCK_DK(tree); - WUNLOCK_TREE(tree); + write_unlock_dk(tree); + write_unlock_tree(tree); from_off = get_key_offset(min_item_key) >> PAGE_CACHE_SHIFT; @@ -426,8 +426,9 @@ kill_hook_extent(const coord_t * coord, *key = *pto_key; set_key_offset(key, get_key_offset(pto_key) + 1); - UNDER_RW_VOID(dk, current_tree, write, - znode_set_rd_key(kdata->left->node, key)); + write_lock_dk(current_tree); + znode_set_rd_key(kdata->left->node, key); + write_unlock_dk(current_tree); } from_off = get_key_offset(pfrom_key) >> PAGE_CACHE_SHIFT; diff -puN fs/reiser4/plugin/item/internal.c~reiser4-spinlock-cleanup fs/reiser4/plugin/item/internal.c --- devel/fs/reiser4/plugin/item/internal.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/item/internal.c 2006-02-16 14:17:05.000000000 -0800 @@ -186,22 +186,22 @@ int check__internal(const coord_t * coor assert("nikita-3256", znode_invariant(child)); if (coord_prev_item(&cpy) == 0 && item_is_internal(&cpy)) { left_child = znode_at(&cpy, cpy.node); - RLOCK_TREE(znode_get_tree(child)); - if (left_child != NULL) + if (left_child != NULL) { + read_lock_tree(znode_get_tree(child)); check_link(left_child, child); - RUNLOCK_TREE(znode_get_tree(child)); - if (left_child != NULL) + read_unlock_tree(znode_get_tree(child)); zput(left_child); + } } coord_dup(&cpy, coord); if (coord_next_item(&cpy) == 0 && item_is_internal(&cpy)) { right_child = znode_at(&cpy, cpy.node); - RLOCK_TREE(znode_get_tree(child)); - if (right_child != NULL) + if (right_child != NULL) { + read_lock_tree(znode_get_tree(child)); check_link(child, right_child); - RUNLOCK_TREE(znode_get_tree(child)); - if (right_child != NULL) + read_unlock_tree(znode_get_tree(child)); zput(right_child); + } } zput(child); } @@ -256,8 +256,8 @@ int create_hook_internal(const coord_t * left = arg; tree = znode_get_tree(item->node); - WLOCK_TREE(tree); - WLOCK_DK(tree); + write_lock_tree(tree); + write_lock_dk(tree); assert("nikita-1400", (child->in_parent.node == NULL) || (znode_above_root(child->in_parent.node))); ++item->node->c_count; @@ -271,8 +271,8 @@ int create_hook_internal(const coord_t * znode_get_rd_key(child))) { znode_set_rd_key(child, znode_get_rd_key(left)); } - WUNLOCK_DK(tree); - WUNLOCK_TREE(tree); + write_unlock_dk(tree); + write_unlock_tree(tree); zput(child); return result; } else { @@ -320,17 +320,15 @@ int kill_hook_internal(const coord_t * i assert("nikita-2546", ZF_ISSET(child, JNODE_HEARD_BANSHEE)); tree = znode_get_tree(item->node); - WLOCK_TREE(tree); + write_lock_tree(tree); init_parent_coord(&child->in_parent, NULL); --item->node->c_count; - WUNLOCK_TREE(tree); + write_unlock_tree(tree); zput(child); return 0; } else { warning("nikita-1223", "Cowardly refuse to remove link to non-empty node"); - print_znode("parent", item->node); - print_znode("child", child); zput(child); return RETERR(-EIO); } @@ -363,7 +361,7 @@ int shift_hook_internal(const coord_t * if (child == NULL) return 0; if (!IS_ERR(child)) { - WLOCK_TREE(tree); + write_lock_tree(tree); ++new_node->c_count; assert("nikita-1395", znode_parent(child) == old_node); assert("nikita-1396", old_node->c_count > 0); @@ -372,7 +370,7 @@ int shift_hook_internal(const coord_t * assert("nikita-1782", check_tree_pointer(item, child) == NS_FOUND); --old_node->c_count; - WUNLOCK_TREE(tree); + write_unlock_tree(tree); zput(child); return 0; } else diff -puN fs/reiser4/plugin/item/static_stat.c~reiser4-spinlock-cleanup fs/reiser4/plugin/item/static_stat.c --- devel/fs/reiser4/plugin/item/static_stat.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/item/static_stat.c 2006-02-16 14:17:05.000000000 -0800 @@ -807,30 +807,20 @@ static int save_plugin_sd(struct inode * /* helper function for crypto_sd_present(), crypto_sd_save. Allocates memory for crypto stat, keyid and attaches it to the inode */ - -static int crypto_stat_to_inode(struct inode *inode, - reiser4_crypto_stat * sd, - unsigned int size /* fingerprint size */ ) +static int extract_crypto_stat (struct inode * inode, + reiser4_crypto_stat * sd) { - crypto_stat_t *stat; - - assert("edward-11", (cryptcompress_inode_data(inode))->crypt == NULL); - assert("edward-33", !inode_get_flag(inode, REISER4_CRYPTO_STAT_LOADED)); - - stat = kmalloc(sizeof(*stat), GFP_KERNEL); - if (!stat) - return RETERR(-ENOMEM); - memset(stat, 0, sizeof *stat); - stat->keyid = kmalloc((size_t) size, GFP_KERNEL); - if (!stat->keyid) { - kfree(stat); - return RETERR(-ENOMEM); - } - /* load inode crypto-stat */ - stat->keysize = le16_to_cpu(get_unaligned(&sd->keysize)); - memcpy(stat->keyid, sd->keyid, (size_t) size); - cryptcompress_inode_data(inode)->crypt = stat; - + crypto_stat_t * info; + assert("edward-11", !inode_crypto_stat(inode)); + assert("edward-1413", + !inode_get_flag(inode, REISER4_CRYPTO_STAT_LOADED)); + /* create and attach a crypto-stat without secret key loaded */ + info = alloc_crypto_stat(inode); + if (IS_ERR(info)) + return PTR_ERR(info); + info->keysize = le16_to_cpu(get_unaligned(&sd->keysize)); + memcpy(info->keyid, sd->keyid, inode_digest_plugin(inode)->fipsize); + attach_crypto_stat(inode, info); inode_set_flag(inode, REISER4_CRYPTO_STAT_LOADED); return 0; } @@ -842,10 +832,9 @@ static int present_crypto_sd(struct inod int result; reiser4_crypto_stat *sd; digest_plugin *dplug = inode_digest_plugin(inode); - unsigned int keyid_size; assert("edward-06", dplug != NULL); - assert("edward-684", dplug->dsize); + assert("edward-684", dplug->fipsize); assert("edward-07", area != NULL); assert("edward-08", *area != NULL); assert("edward-09", len != NULL); @@ -854,56 +843,47 @@ static int present_crypto_sd(struct inod if (*len < (int)sizeof(reiser4_crypto_stat)) { return not_enough_space(inode, "crypto-sd"); } - keyid_size = dplug->dsize; /* *len is number of bytes in stat data item from *area to the end of item. It must be not less than size of this extension */ - assert("edward-75", sizeof(*sd) + keyid_size <= *len); + assert("edward-75", sizeof(*sd) + dplug->fipsize <= *len); sd = (reiser4_crypto_stat *) * area; + result = extract_crypto_stat(inode, sd); + move_on(len, area, sizeof(*sd) + dplug->fipsize); - result = crypto_stat_to_inode(inode, sd, keyid_size); - move_on(len, area, sizeof(*sd) + keyid_size); return result; } -static int absent_crypto_sd(struct inode *inode) -{ - return -EIO; -} - static int save_len_crypto_sd(struct inode *inode) { - return (sizeof(reiser4_crypto_stat) + - inode_digest_plugin(inode)->dsize); + return sizeof(reiser4_crypto_stat) + + inode_digest_plugin(inode)->fipsize; } static int save_crypto_sd(struct inode *inode, char **area) { int result = 0; reiser4_crypto_stat *sd; + crypto_stat_t * info = inode_crypto_stat(inode); digest_plugin *dplug = inode_digest_plugin(inode); assert("edward-12", dplug != NULL); assert("edward-13", area != NULL); assert("edward-14", *area != NULL); + assert("edward-15", info != NULL); + assert("edward-1414", info->keyid != NULL); + assert("edward-1415", info->keysize != 0); assert("edward-76", reiser4_inode_data(inode) != NULL); - sd = (reiser4_crypto_stat *) * area; if (!inode_get_flag(inode, REISER4_CRYPTO_STAT_LOADED)) { /* file is just created */ - crypto_stat_t *stat; - stat = cryptcompress_inode_data(inode)->crypt; - - assert("edward-15", stat != NULL); - + sd = (reiser4_crypto_stat *) *area; /* copy everything but private key to the disk stat-data */ - put_unaligned(cpu_to_le16(stat->keysize), &sd->keysize); - memcpy(sd->keyid, stat->keyid, (size_t) dplug->dsize); + put_unaligned(cpu_to_le16(info->keysize), &sd->keysize); + memcpy(sd->keyid, info->keyid, (size_t) dplug->fipsize); inode_set_flag(inode, REISER4_CRYPTO_STAT_LOADED); - } else { - /* do nothing */ } - *area += (sizeof(*sd) + dplug->dsize); + *area += (sizeof(*sd) + dplug->fipsize); return result; } @@ -1030,8 +1010,7 @@ sd_ext_plugin sd_ext_plugins[LAST_SD_EXT .linkage = {NULL, NULL} }, .present = present_crypto_sd, - .absent = absent_crypto_sd, - /* return IO_ERROR if smthng is wrong */ + .absent = NULL, .save_len = save_len_crypto_sd, .save = save_crypto_sd, .alignment = 8 diff -puN fs/reiser4/plugin/node/node40.c~reiser4-spinlock-cleanup fs/reiser4/plugin/node/node40.c --- devel/fs/reiser4/plugin/node/node40.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/node/node40.c 2006-02-16 14:17:05.000000000 -0800 @@ -391,7 +391,6 @@ node_search_result lookup_node40(znode * left); print_key("key", key); print_key("min", &bstop->key); - print_znode("node", node); print_coord_content("coord", coord); return RETERR(-EIO); } else { @@ -406,7 +405,6 @@ node_search_result lookup_node40(znode * warning("nikita-588", "Unknown plugin %i", le16_to_cpu(get_unaligned(&bstop->plugin_id))); print_key("key", key); - print_znode("node", node); print_coord_content("coord", coord); return RETERR(-EIO); } @@ -475,6 +473,7 @@ int check_node40(const znode * node /* n unsigned old_offset; tree_level level; coord_t coord; + int result; assert("nikita-580", node != NULL); assert("nikita-581", error != NULL); @@ -591,25 +590,26 @@ int check_node40(const znode * node /* n iplug->s.file.append_key(&coord, &mkey); set_key_offset(&mkey, get_key_offset(&mkey) - 1); - if (UNDER_RW - (dk, current_tree, read, - keygt(&mkey, znode_get_rd_key((znode *) node)))) { + read_lock_dk(current_tree); + result = keygt(&mkey, znode_get_rd_key((znode *) node)); + read_unlock_dk(current_tree); + if (result) { *error = "key of rightmost item is too large"; return -1; } } } if (flags & REISER4_NODE_DKEYS) { - RLOCK_TREE(current_tree); - RLOCK_DK(current_tree); + read_lock_tree(current_tree); + read_lock_dk(current_tree); flags |= REISER4_NODE_TREE_STABLE; if (keygt(&prev, znode_get_rd_key((znode *) node))) { if (flags & REISER4_NODE_TREE_STABLE) { *error = "Last key is greater than rdkey"; - RUNLOCK_DK(current_tree); - RUNLOCK_TREE(current_tree); + read_unlock_dk(current_tree); + read_unlock_tree(current_tree); return -1; } } @@ -617,8 +617,8 @@ int check_node40(const znode * node /* n (znode_get_ld_key((znode *) node), znode_get_rd_key((znode *) node))) { *error = "ldkey is greater than rdkey"; - RUNLOCK_DK(current_tree); - RUNLOCK_TREE(current_tree); + read_unlock_dk(current_tree); + read_unlock_tree(current_tree); return -1; } if (ZF_ISSET(node, JNODE_LEFT_CONNECTED) && @@ -631,8 +631,8 @@ int check_node40(const znode * node /* n keygt(znode_get_rd_key(node->left), znode_get_ld_key((znode *) node)))) { *error = "left rdkey or ldkey is wrong"; - RUNLOCK_DK(current_tree); - RUNLOCK_TREE(current_tree); + read_unlock_dk(current_tree); + read_unlock_tree(current_tree); return -1; } if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && @@ -645,13 +645,13 @@ int check_node40(const znode * node /* n keygt(znode_get_rd_key((znode *) node), znode_get_ld_key(node->right)))) { *error = "rdkey or right ldkey is wrong"; - RUNLOCK_DK(current_tree); - RUNLOCK_TREE(current_tree); + read_unlock_dk(current_tree); + read_unlock_tree(current_tree); return -1; } - RUNLOCK_DK(current_tree); - RUNLOCK_TREE(current_tree); + read_unlock_dk(current_tree); + read_unlock_tree(current_tree); } return 0; @@ -2084,6 +2084,7 @@ prepare_for_update(znode * left, znode * int prepare_removal_node40(znode * empty, carry_plugin_info * info) { carry_op *op; + reiser4_tree *tree; if (!should_notify_parent(empty)) return 0; @@ -2098,14 +2099,14 @@ int prepare_removal_node40(znode * empty op->u.delete.flags = 0; /* fare thee well */ - - RLOCK_TREE(current_tree); - WLOCK_DK(current_tree); + tree = znode_get_tree(empty); + read_lock_tree(tree); + write_lock_dk(tree); znode_set_ld_key(empty, znode_get_rd_key(empty)); if (znode_is_left_connected(empty) && empty->left) znode_set_rd_key(empty->left, znode_get_rd_key(empty)); - WUNLOCK_DK(current_tree); - RUNLOCK_TREE(current_tree); + write_unlock_dk(tree); + read_unlock_tree(tree); ZF_SET(empty, JNODE_HEARD_BANSHEE); return 0; diff -puN fs/reiser4/plugin/object.c~reiser4-spinlock-cleanup fs/reiser4/plugin/object.c --- devel/fs/reiser4/plugin/object.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/object.c 2006-02-16 14:17:05.000000000 -0800 @@ -190,7 +190,7 @@ file_plugin file_plugins[LAST_FILE_PLUGI .done = wire_done_common }, .init_inode_data = init_inode_ordering, - .cut_tree_worker = cut_tree_worker_common + .cut_tree_worker = cut_tree_worker_common, }, [SYMLINK_FILE_PLUGIN_ID] = { .h = { @@ -315,21 +315,22 @@ file_plugin file_plugins[LAST_FILE_PLUGI .writepages = writepages_cryptcompress, .set_page_dirty = reiser4_set_page_dirty, .readpages = reiser4_readpages, - .prepare_write = prepare_write_common + .prepare_write = prepare_write_common, + .invalidatepage = reiser4_invalidatepage }, .write_sd_by_inode = write_sd_by_inode_common, .flow_by_inode = flow_by_inode_cryptcompress, .key_by_inode = key_by_inode_cryptcompress, .set_plug_in_inode = set_plug_in_inode_common, - .adjust_to_parent = adjust_to_parent_common, + .adjust_to_parent = adjust_to_parent_cryptcompress, .create_object = create_cryptcompress, + .open_object = open_cryptcompress, .delete_object = delete_cryptcompress, .add_link = add_link_common, .rem_link = rem_link_common, .owns_item = owns_item_common, .can_add_link = can_add_link_common, .detach = dummyop, - .bind = dummyop, .safelink = safelink_common, .estimate = { .create = estimate_create_common, @@ -338,7 +339,7 @@ file_plugin file_plugins[LAST_FILE_PLUGI }, .init_inode_data = init_inode_data_cryptcompress, .cut_tree_worker = cut_tree_worker_cryptcompress, - .destroy_inode = destroy_inode_cryptcompress, + .destroy_inode = detach_crypto_stat, .wire = { .write = wire_write_common, .read = wire_read_common, diff -puN fs/reiser4/plugin/object.h~reiser4-spinlock-cleanup fs/reiser4/plugin/object.h --- devel/fs/reiser4/plugin/object.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/object.h 2006-02-16 14:17:05.000000000 -0800 @@ -49,8 +49,10 @@ int set_plug_in_inode_common(struct inod reiser4_object_create_data *); int adjust_to_parent_common(struct inode *object, struct inode *parent, struct inode *root); -int adjust_to_parent_common_dir(struct inode *object, - struct inode *parent, struct inode *root); +int adjust_to_parent_common_dir(struct inode *object, struct inode *parent, + struct inode *root); +int adjust_to_parent_cryptcompress(struct inode *object, struct inode *parent, + struct inode *root); int create_object_common(struct inode *object, struct inode *parent, reiser4_object_create_data *); int delete_object_common(struct inode *); @@ -63,6 +65,7 @@ int owns_item_common_dir(const struct in int can_add_link_common(const struct inode *); int can_rem_link_common_dir(const struct inode *); int detach_common_dir(struct inode *child, struct inode *parent); +int open_cryptcompress(struct inode * inode, struct file * file); int bind_common_dir(struct inode *child, struct inode *parent); int safelink_common(struct inode *, reiser4_safe_link_t, __u64 value); reiser4_block_nr estimate_create_common(const struct inode *); diff -puN fs/reiser4/plugin/plugin.h~reiser4-spinlock-cleanup fs/reiser4/plugin/plugin.h --- devel/fs/reiser4/plugin/plugin.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/plugin.h 2006-02-16 14:17:05.000000000 -0800 @@ -11,6 +11,7 @@ #include "../dformat.h" #include "../key.h" #include "compress/compress.h" +#include "../crypt.h" #include "plugin_header.h" #include "item/static_stat.h" #include "item/internal.h" @@ -239,7 +240,6 @@ typedef struct file_plugin { directory. */ int (*adjust_to_parent) (struct inode *object, struct inode *parent, struct inode *root); - /* * this does whatever is necessary to do when object is created. For * instance, for unix files stat data is inserted. It is supposed to be @@ -248,6 +248,8 @@ typedef struct file_plugin { int (*create_object) (struct inode *object, struct inode *parent, reiser4_object_create_data *); + /* this does whatever is necessary to do when object is opened */ + int (*open_object) (struct inode * inode, struct file * file); /* * this method should check REISER4_NO_SD and set REISER4_NO_SD on * success. Deletion of an object usually includes removal of items @@ -447,10 +449,8 @@ typedef struct hash_plugin { typedef struct crypto_plugin { /* generic fields */ plugin_header h; - int (*alloc) (struct inode * inode); - void (*free) (struct inode * inode); - /* number of cpu expkey words */ - unsigned nr_keywords; + struct crypto_tfm * (*alloc) (void); + void (*free) (struct crypto_tfm * tfm); /* Offset translator. For each offset this returns (k * offset), where k (k >= 1) is a coefficient of expansion of the crypto algorithm. For all symmetric algorithms k == 1. For asymmetric algorithms (which @@ -474,10 +474,10 @@ typedef struct crypto_plugin { typedef struct digest_plugin { /* generic fields */ plugin_header h; - /* digest size */ - int dsize; - int (*alloc) (struct inode * inode); - void (*free) (struct inode * inode); + /* fingerprint size in bytes */ + int fipsize; + struct crypto_tfm * (*alloc) (void); + void (*free) (struct crypto_tfm * tfm); } digest_plugin; typedef struct compression_plugin { @@ -504,12 +504,13 @@ typedef struct compression_plugin { typedef struct compression_mode_plugin { /* generic fields */ plugin_header h; - /* called before compression transform */ + /* this is called when estimating compressibility + of a logical cluster by its content */ int (*should_deflate) (cloff_t index); - /* called when results of compression should be saved */ - void (*save_deflate) (struct inode * inode); - /* called when results of compression should be discarded */ - int (*discard_deflate) (struct inode * inode, cloff_t index); + /* this is called when results of compression should be saved */ + void (*accept_hook) (struct inode * inode); + /* this is called when results of compression should be discarded */ + int (*discard_hook) (struct inode * inode, cloff_t index); } compression_mode_plugin; typedef struct regular_plugin { @@ -698,13 +699,14 @@ typedef enum { typedef enum { NONE_CRYPTO_ID, + AES_CRYPTO_ID, LAST_CRYPTO_ID } reiser4_crypto_id; /* builtin digest plugins */ typedef enum { - NONE_DIGEST_ID, + SHA256_32_DIGEST_ID, LAST_DIGEST_ID } reiser4_digest_id; @@ -719,11 +721,11 @@ typedef enum { /* builtin cluster plugins */ typedef enum { - CLUSTER_4K_ID, - CLUSTER_8K_ID, - CLUSTER_16K_ID, - CLUSTER_32K_ID, CLUSTER_64K_ID, + CLUSTER_32K_ID, + CLUSTER_16K_ID, + CLUSTER_8K_ID, + CLUSTER_4K_ID, LAST_CLUSTER_ID } reiser4_cluster_id; @@ -743,16 +745,6 @@ typedef enum { LAST_TAIL_FORMATTING_ID } reiser4_formatting_id; -/* Encapsulations of crypto specific data */ -typedef struct crypto_data { - reiser4_crypto_id cra; /* id of the crypto algorithm */ - reiser4_digest_id dia; /* id of the digest algorithm */ - __u8 *key; /* secret key */ - __u16 keysize; /* key size, bits */ - __u8 *keyid; /* keyid */ - __u16 keyid_size; /* keyid size, bytes */ -} crypto_data_t; - /* compression/clustering specific data */ typedef struct compression_data { reiser4_compression_id coa; /* id of the compression algorithm */ @@ -774,7 +766,8 @@ struct reiser4_object_create_data { const char *name; /* add here something for non-standard objects you invent, like query for interpolation file etc. */ - crypto_data_t *crypto; + + crypto_stat_t * crypto; compression_data_t *compression; cluster_data_t *cluster; diff -puN fs/reiser4/plugin/space/bitmap.c~reiser4-spinlock-cleanup fs/reiser4/plugin/space/bitmap.c --- devel/fs/reiser4/plugin/space/bitmap.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/plugin/space/bitmap.c 2006-02-16 14:17:05.000000000 -0800 @@ -1283,8 +1283,8 @@ static void cond_add_to_overwrite_set(tx assert("zam-547", atom->stage == ASTAGE_PRE_COMMIT); assert("zam-548", node != NULL); - LOCK_ATOM(atom); - LOCK_JNODE(node); + spin_lock_atom(atom); + spin_lock_jnode(node); if (node->atom == NULL) { JF_SET(node, JNODE_OVRWR); @@ -1293,8 +1293,8 @@ static void cond_add_to_overwrite_set(tx assert("zam-549", node->atom == atom); } - UNLOCK_JNODE(node); - UNLOCK_ATOM(atom); + spin_unlock_jnode(node); + spin_unlock_atom(atom); } /* an actor which applies delete set to COMMIT bitmap pages and link modified @@ -1559,9 +1559,9 @@ int pre_commit_hook_bitmap(void) sbinfo = get_super_private(super); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); sbinfo->blocks_free_committed += blocks_freed; - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } return 0; diff -puN fs/reiser4/readahead.c~reiser4-spinlock-cleanup fs/reiser4/readahead.c --- devel/fs/reiser4/readahead.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/readahead.c 2006-02-16 14:17:05.000000000 -0800 @@ -26,8 +26,12 @@ static inline int ra_adjacent_only(int f if right neighbor's first key is less or equal to readahead's stop key */ static int should_readahead_neighbor(znode * node, ra_info_t * info) { - return (UNDER_RW(dk, ZJNODE(node)->tree, read, - keyle(znode_get_rd_key(node), &info->key_to_stop))); + int result; + + read_lock_dk(znode_get_tree(node)); + result = keyle(znode_get_rd_key(node), &info->key_to_stop); + read_unlock_dk(znode_get_tree(node)); + return result; } #define LOW_MEM_PERCENTAGE (5) diff -puN fs/reiser4/reiser4.h~reiser4-spinlock-cleanup fs/reiser4/reiser4.h --- devel/fs/reiser4/reiser4.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/reiser4.h 2006-02-16 14:17:05.000000000 -0800 @@ -25,6 +25,25 @@ #define REISER4_DEBUG (0) #endif +#if defined(CONFIG_ZLIB_INFLATE) +/* turn on zlib */ +#define REISER4_ZLIB (1) +#else +#define REISER4_ZLIB (0) +#endif + +#if defined(CONFIG_CRYPTO_SHA256) +#define REISER4_SHA256 (1) +#else +#define REISER4_SHA256 (0) +#endif + +#if defined(CONFIG_CRYPTO_AES_586) +#define REISER4_AES (1) +#else +#define REISER4_AES (0) +#endif + #if defined(CONFIG_REISER4_COPY_ON_CAPTURE) /* * Turns on copy-on-capture (COC) optimization. See @@ -97,7 +116,7 @@ extern const int REISER4_MAGIC_OFFSET; / #define REISER4_USE_ENTD (1) /* Using of emergency flush is an option. */ -#define REISER4_USE_EFLUSH (0) +#define REISER4_USE_EFLUSH (1) /* key allocation is Plan-A */ #define REISER4_PLANA_KEY_ALLOCATION (1) diff -puN fs/reiser4/seal.c~reiser4-spinlock-cleanup fs/reiser4/seal.c --- devel/fs/reiser4/seal.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/seal.c 2006-02-16 14:17:05.000000000 -0800 @@ -194,11 +194,15 @@ static znode *seal_node(const seal_t * s static int seal_matches(const seal_t * seal /* seal to check */ , znode * node /* node to check */ ) { + int result; + assert("nikita-1991", seal != NULL); assert("nikita-1993", node != NULL); - return UNDER_SPIN(jnode, ZJNODE(node), - (seal->version == node->version)); + spin_lock_znode(node); + result = (seal->version == node->version); + spin_unlock_znode(node); + return result; } /* Make Linus happy. diff -puN fs/reiser4/search.c~reiser4-spinlock-cleanup fs/reiser4/search.c --- devel/fs/reiser4/search.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/search.c 2006-02-16 14:17:05.000000000 -0800 @@ -64,7 +64,7 @@ int cbk_cache_init(cbk_cache *cache /* c cbk_cache_init_slot(cache->slot + i); list_add_tail(&((cache->slot + i)->lru), &cache->lru); } - rw_cbk_cache_init(cache); + rwlock_init(&cache->guard); return 0; } @@ -107,7 +107,7 @@ static int cbk_cache_invariant(const cbk assert("nikita-2469", cache != NULL); unused = 0; result = 1; - read_lock_cbk_cache((cbk_cache *) cache); + read_lock(&((cbk_cache *)cache)->guard); for_all_slots(cache, slot) { /* in LRU first go all `used' slots followed by `unused' */ if (unused && (slot->node != NULL)) @@ -130,7 +130,7 @@ static int cbk_cache_invariant(const cbk if (!result) break; } - read_unlock_cbk_cache((cbk_cache *) cache); + read_unlock(&((cbk_cache *)cache)->guard); return result; } @@ -150,7 +150,7 @@ void cbk_cache_invalidate(const znode * cache = &tree->cbk_cache; assert("nikita-2470", cbk_cache_invariant(cache)); - write_lock_cbk_cache(cache); + write_lock(&(cache->guard)); for (i = 0, slot = cache->slot; i < cache->nr_slots; ++i, ++slot) { if (slot->node == node) { list_del(&slot->lru); @@ -159,7 +159,7 @@ void cbk_cache_invalidate(const znode * break; } } - write_unlock_cbk_cache(cache); + write_unlock(&(cache->guard)); assert("nikita-2471", cbk_cache_invariant(cache)); } @@ -179,7 +179,7 @@ static void cbk_cache_add(const znode *n if (cache->nr_slots == 0) return; - write_lock_cbk_cache(cache); + write_lock(&(cache->guard)); /* find slot to update/add */ for (i = 0, slot = cache->slot; i < cache->nr_slots; ++i, ++slot) { /* oops, this node is already in a cache */ @@ -193,7 +193,7 @@ static void cbk_cache_add(const znode *n } list_del(&slot->lru); list_add(&slot->lru, &cache->lru); - write_unlock_cbk_cache(cache); + write_unlock(&(cache->guard)); assert("nikita-2473", cbk_cache_invariant(cache)); } @@ -605,12 +605,10 @@ static int prepare_object_lookup(cbk_han isunique = h->flags & CBK_UNIQUE; /* check that key is inside vroot */ - inside = - UNDER_RW(dk, h->tree, read, - znode_contains_key_strict(vroot, - h->key, - isunique)) && - !ZF_ISSET(vroot, JNODE_HEARD_BANSHEE); + read_lock_dk(h->tree); + inside = (znode_contains_key_strict(vroot, h->key, isunique) && + !ZF_ISSET(vroot, JNODE_HEARD_BANSHEE)); + read_unlock_dk(h->tree); if (inside) { h->result = zload(vroot); if (h->result == 0) { @@ -736,8 +734,6 @@ static lookup_result traverse_tree(cbk_h reiser4_print_address("block", &h->block); print_key("key", h->key); print_coord_content("coord", h->coord); - print_znode("active", h->active_lh->node); - print_znode("parent", h->parent_lh->node); } /* `unlikely' error case */ if (unlikely(IS_CBKERR(h->result))) { @@ -774,7 +770,7 @@ static void find_child_delimiting_keys(z coord_t neighbor; assert("nikita-1484", parent != NULL); - assert("nikita-1485", rw_dk_is_locked(znode_get_tree(parent))); + assert_rw_locked(&(znode_get_tree(parent)->dk_lock)); coord_dup(&neighbor, parent_coord); @@ -819,7 +815,7 @@ set_child_delimiting_keys(znode * parent * JNODE_DKSET is never cleared once set. */ if (!ZF_ISSET(child, JNODE_DKSET)) { tree = znode_get_tree(parent); - WLOCK_DK(tree); + write_lock_dk(tree); if (likely(!ZF_ISSET(child, JNODE_DKSET))) { find_child_delimiting_keys(parent, coord, &child->ld_key, @@ -830,7 +826,7 @@ set_child_delimiting_keys(znode * parent atomic_inc_return(&delim_key_version);); ZF_SET(child, JNODE_DKSET); } - WUNLOCK_DK(tree); + write_unlock_dk(tree); return 1; } return 0; @@ -895,10 +891,10 @@ static level_lookup_result cbk_level_loo setdk = set_child_delimiting_keys(parent, h->coord, active); else { - UNDER_RW_VOID(dk, h->tree, read, - find_child_delimiting_keys(parent, - h->coord, - &ldkey, &key)); + read_lock_dk(h->tree); + find_child_delimiting_keys(parent, h->coord, &ldkey, + &key); + read_unlock_dk(h->tree); ldkeyset = 1; } zrelse(parent); @@ -911,13 +907,13 @@ static level_lookup_result cbk_level_loo h->coord->between = AT_UNIT; if (znode_just_created(active) && (h->coord->node != NULL)) { - WLOCK_TREE(h->tree); + write_lock_tree(h->tree); /* if we are going to load znode right now, setup ->in_parent: coord where pointer to this node is stored in parent. */ coord_to_parent_coord(h->coord, &active->in_parent); - WUNLOCK_TREE(h->tree); + write_unlock_tree(h->tree); } /* check connectedness without holding tree lock---false negatives @@ -1003,8 +999,8 @@ void check_dkeys(znode * node) znode *left; znode *right; - RLOCK_TREE(current_tree); - RLOCK_DK(current_tree); + read_lock_tree(current_tree); + read_lock_dk(current_tree); assert("vs-1710", znode_is_any_locked(node)); assert("vs-1197", @@ -1029,8 +1025,8 @@ void check_dkeys(znode * node) (keyeq(znode_get_rd_key(node), znode_get_ld_key(right)) || ZF_ISSET(right, JNODE_HEARD_BANSHEE))); - RUNLOCK_DK(current_tree); - RUNLOCK_TREE(current_tree); + read_unlock_dk(current_tree); + read_unlock_tree(current_tree); } #endif @@ -1042,10 +1038,10 @@ static int key_is_ld(znode * node, const assert("nikita-1716", node != NULL); assert("nikita-1758", key != NULL); - RLOCK_DK(znode_get_tree(node)); + read_lock_dk(znode_get_tree(node)); assert("nikita-1759", znode_contains_key(node, key)); ld = keyeq(znode_get_ld_key(node), key); - RUNLOCK_DK(znode_get_tree(node)); + read_unlock_dk(znode_get_tree(node)); return ld; } @@ -1179,7 +1175,7 @@ static int cbk_cache_scan_slots(cbk_hand */ rcu_read_lock(); - read_lock_cbk_cache(cache); + read_lock(&((cbk_cache *)cache)->guard); slot = list_entry(cache->lru.next, cbk_cache_slot, lru); slot = list_entry(slot->lru.prev, cbk_cache_slot, lru); @@ -1207,11 +1203,11 @@ static int cbk_cache_scan_slots(cbk_hand znode_contains_key_strict(node, key, isunique)) { zref(node); result = 0; - spin_lock_prefetch(&tree->tree_lock.lock); + spin_lock_prefetch(&tree->tree_lock); break; } } - read_unlock_cbk_cache(cache); + read_unlock(&((cbk_cache *)cache)->guard); assert("nikita-2475", cbk_cache_invariant(cache)); @@ -1236,11 +1232,10 @@ static int cbk_cache_scan_slots(cbk_hand return result; /* recheck keys */ - result = - UNDER_RW(dk, tree, read, - znode_contains_key_strict(node, key, isunique)) && - !ZF_ISSET(node, JNODE_HEARD_BANSHEE); - + read_lock_dk(tree); + result = (znode_contains_key_strict(node, key, isunique) && + !ZF_ISSET(node, JNODE_HEARD_BANSHEE)); + read_unlock_dk(tree); if (result) { /* do lookup inside node */ llr = cbk_node_lookup(h); @@ -1258,14 +1253,14 @@ static int cbk_cache_scan_slots(cbk_hand /* good. Either item found or definitely not found. */ result = 0; - write_lock_cbk_cache(cache); + write_lock(&(cache->guard)); if (slot->node == h->active_lh->node /*node */ ) { /* if this node is still in cbk cache---move its slot to the head of the LRU list. */ list_del(&slot->lru); list_add(&slot->lru, &cache->lru); } - write_unlock_cbk_cache(cache); + write_unlock(&(cache->guard)); } } else { /* race. While this thread was waiting for the lock, node was @@ -1337,8 +1332,8 @@ static void stale_dk(reiser4_tree * tree { znode *right; - RLOCK_TREE(tree); - WLOCK_DK(tree); + read_lock_tree(tree); + write_lock_dk(tree); right = node->right; if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && @@ -1346,8 +1341,8 @@ static void stale_dk(reiser4_tree * tree !keyeq(znode_get_rd_key(node), znode_get_ld_key(right))) znode_set_rd_key(node, znode_get_ld_key(right)); - WUNLOCK_DK(tree); - RUNLOCK_TREE(tree); + write_unlock_dk(tree); + read_unlock_tree(tree); } /* check for possibly outdated delimiting keys, and update them if @@ -1357,8 +1352,8 @@ static void update_stale_dk(reiser4_tree znode *right; reiser4_key rd; - RLOCK_TREE(tree); - RLOCK_DK(tree); + read_lock_tree(tree); + read_lock_dk(tree); rd = *znode_get_rd_key(node); right = node->right; if (unlikely(ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && @@ -1367,13 +1362,13 @@ static void update_stale_dk(reiser4_tree /* does this ever happen? */ warning("nikita-38210", "stale dk"); assert("nikita-38211", ZF_ISSET(node, JNODE_DKSET)); - RUNLOCK_DK(tree); - RUNLOCK_TREE(tree); + read_unlock_dk(tree); + read_unlock_tree(tree); stale_dk(tree, node); return; } - RUNLOCK_DK(tree); - RUNLOCK_TREE(tree); + read_unlock_dk(tree); + read_unlock_tree(tree); } /* @@ -1452,10 +1447,10 @@ static level_lookup_result search_to_lef default: /* some other error */ result = LOOKUP_DONE; } else if (h->result == NS_FOUND) { - RLOCK_DK(znode_get_tree(neighbor)); + read_lock_dk(znode_get_tree(neighbor)); h->rd_key = *znode_get_ld_key(node); leftmost_key_in_node(neighbor, &h->ld_key); - RUNLOCK_DK(znode_get_tree(neighbor)); + read_unlock_dk(znode_get_tree(neighbor)); h->flags |= CBK_DKSET; h->block = *znode_get_block(neighbor); @@ -1465,8 +1460,10 @@ static level_lookup_result search_to_lef Parent hint was set up by reiser4_get_left_neighbor() */ - UNDER_RW_VOID(tree, znode_get_tree(neighbor), - write, h->coord->node = NULL); + /* FIXME: why do we have to spinlock here? */ + write_lock_tree(znode_get_tree(neighbor)); + h->coord->node = NULL; + write_unlock_tree(znode_get_tree(neighbor)); result = LOOKUP_CONT; } else { result = LOOKUP_DONE; @@ -1511,7 +1508,6 @@ void print_coord_content(const char *pre && coord_is_existing_item(p)) printk("%s: data: %p, length: %i\n", prefix, item_body_by_coord(p), item_length_by_coord(p)); - print_znode(prefix, p->node); if (znode_is_loaded(p->node)) { item_key_by_coord(p, &key); print_key(prefix, &key); @@ -1574,13 +1570,13 @@ static int setup_delimiting_keys(cbk_han * JNODE_DKSET is never cleared once set. */ if (!ZF_ISSET(active, JNODE_DKSET)) { tree = znode_get_tree(active); - WLOCK_DK(tree); + write_lock_dk(tree); if (!ZF_ISSET(active, JNODE_DKSET)) { znode_set_ld_key(active, &h->ld_key); znode_set_rd_key(active, &h->rd_key); ZF_SET(active, JNODE_DKSET); } - WUNLOCK_DK(tree); + write_unlock_dk(tree); return 1; } return 0; diff -L fs/reiser4/spin_macros.h -puN fs/reiser4/spin_macros.h~reiser4-spinlock-cleanup /dev/null --- devel/fs/reiser4/spin_macros.h +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,474 +0,0 @@ -/* Copyright 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */ - -/* Wrapper functions/macros for spin locks. */ - -/* - * This file implements wrapper functions and macros to work with spin locks - * and read write locks embedded into kernel objects. Wrapper functions - * provide following functionality: - * - * (1) encapsulation of locks: in stead of writing spin_lock(&obj->lock), - * where obj is object of type foo, one writes spin_lock_foo(obj). - * - * (2) optional keeping (in per-thread reiser4_context->locks) information - * about number of locks of particular type currently held by thread. This - * is done if REISER4_DEBUG is on. - * - * (3) optional checking of lock ordering. For object type foo, it is - * possible to provide "lock ordering predicate" (possibly using - * information stored in reiser4_context->locks) checking that locks are - * acquired in the proper order. This is done if REISER4_DEBUG is on. - * - * (4) optional collection of spin lock contention statistics. In this mode - * two sysfs objects (located in /sys/profregion) are associated with each - * spin lock type. One object (foo_t) shows how much time was spent trying - * to acquire spin locks of foo type. Another (foo_h) shows how much time - * spin locks of the type foo were held locked. See spinprof.h for more - * details on this. - * - */ - -#ifndef __SPIN_MACROS_H__ -#define __SPIN_MACROS_H__ - -#include -#include - -#include "debug.h" - -/* Checks that read write lock @s is locked (or not) by the -current- - * thread. not yet implemented */ -#define check_is_write_locked(s) ((void)(s), 1) -#define check_is_read_locked(s) ((void)(s), 1) -#define check_is_not_read_locked(s) ((void)(s), 1) -#define check_is_not_write_locked(s) ((void)(s), 1) - -/* Checks that spin lock @s is locked (or not) by the -current- thread. */ -#define check_spin_is_not_locked(s) ((void)(s), 1) -#define spin_is_not_locked(s) ((void)(s), 1) -#if defined(CONFIG_SMP) -# define check_spin_is_locked(s) spin_is_locked(s) -#else -# define check_spin_is_locked(s) ((void)(s), 1) -#endif - -/* - * Data structure embedded into kernel objects together with spin lock. - */ -typedef struct reiser4_spin_data { - /* spin lock proper */ - spinlock_t lock; -} reiser4_spin_data; - -/* - * Data structure embedded into kernel objects together with read write lock. - */ -typedef struct reiser4_rw_data { - /* read write lock proper */ - rwlock_t lock; -} reiser4_rw_data; - -#if REISER4_DEBUG -#define __ODCA(l, e) ON_DEBUG_CONTEXT(assert(l, e)) -#else -#define __ODCA(l, e) noop -#endif - -/* Define several inline functions for each type of spinlock. This is long - * monster macro definition. */ -#define SPIN_LOCK_FUNCTIONS(NAME,TYPE,FIELD) \ - \ -/* Initialize spin lock embedded in @x */ \ -static inline void spin_ ## NAME ## _init(TYPE *x) \ -{ \ - __ODCA("nikita-2987", x != NULL); \ - spin_lock_init(& x->FIELD.lock); \ -} \ - \ -/* Increment per-thread lock counter for this lock type and total counter */ \ -/* of acquired spin locks. This is helper function used by spin lock */ \ -/* acquiring functions below */ \ -static inline void spin_ ## NAME ## _inc(void) \ -{ \ - LOCK_CNT_INC(spin_locked_ ## NAME); \ - LOCK_CNT_INC(spin_locked); \ -} \ - \ -/* Decrement per-thread lock counter and total counter of acquired spin */ \ -/* locks. This is helper function used by spin lock releasing functions */ \ -/* below. */ \ -static inline void spin_ ## NAME ## _dec(void) \ -{ \ - LOCK_CNT_DEC(spin_locked_ ## NAME); \ - LOCK_CNT_DEC(spin_locked); \ -} \ - \ -/* Return true of spin lock embedded in @x is acquired by -current- */ \ -/* thread */ \ -static inline int spin_ ## NAME ## _is_locked (const TYPE *x) \ -{ \ - return check_spin_is_locked (& x->FIELD.lock) && \ - LOCK_CNT_GTZ(spin_locked_ ## NAME); \ -} \ - \ -/* Return true of spin lock embedded in @x is not acquired by -current- */ \ -/* thread */ \ -static inline int spin_ ## NAME ## _is_not_locked (TYPE *x) \ -{ \ - return check_spin_is_not_locked (& x->FIELD.lock); \ -} \ - \ -/* Acquire spin lock embedded in @x without checking lock ordering. */ \ -/* This is useful when, for example, locking just created object. */ \ -static inline void spin_lock_ ## NAME ## _no_ord (TYPE *x) \ -{ \ - __ODCA("nikita-2703", spin_ ## NAME ## _is_not_locked(x)); \ - spin_lock(&x->FIELD.lock); \ - spin_ ## NAME ## _inc(); \ -} \ - \ -/* Account for spin lock acquired by some other means. For example */ \ -/* through atomic_dec_and_lock() or similar. */ \ -static inline void spin_lock_ ## NAME ## _acc (TYPE *x) \ -{ \ - spin_ ## NAME ## _inc(); \ -} \ - \ -/* Lock @x with explicit indication of spin lock profiling "sites". */ \ -/* Locksite is used by spin lock profiling code (spinprof.[ch]) to */ \ -/* identify fragment of code that locks @x. */ \ -/* */ \ -/* If clock interrupt finds that current thread is spinning waiting for */ \ -/* the lock on @x, counters in @t will be incremented. */ \ -/* */ \ -/* If clock interrupt finds that current thread holds the lock on @x, */ \ -/* counters in @h will be incremented. */ \ -/* */ \ -static inline void spin_lock_ ## NAME ## _at (TYPE *x) \ -{ \ - __ODCA("nikita-1383", spin_ordering_pred_ ## NAME(x)); \ - spin_lock_ ## NAME ## _no_ord(x); \ -} \ - \ -/* Lock @x. */ \ -static inline void spin_lock_ ## NAME (TYPE *x) \ -{ \ - __ODCA("nikita-1383", spin_ordering_pred_ ## NAME(x)); \ - spin_lock_ ## NAME ## _no_ord(x); \ -} \ - \ -/* Try to obtain lock @x. On success, returns 1 with @x locked. */ \ -/* If @x is already locked, return 0 immediately. */ \ -static inline int spin_trylock_ ## NAME (TYPE *x) \ -{ \ - if (spin_trylock (& x->FIELD.lock)) { \ - spin_ ## NAME ## _inc(); \ - return 1; \ - } \ - return 0; \ -} \ - \ -/* Unlock @x. */ \ -static inline void spin_unlock_ ## NAME (TYPE *x) \ -{ \ - __ODCA("nikita-1375", LOCK_CNT_GTZ(spin_locked_ ## NAME)); \ - __ODCA("nikita-1376", LOCK_CNT_GTZ(spin_locked)); \ - __ODCA("nikita-2703", spin_ ## NAME ## _is_locked(x)); \ - \ - spin_ ## NAME ## _dec(); \ - spin_unlock (& x->FIELD.lock); \ -} \ - \ -typedef struct { int foo; } NAME ## _spin_dummy - -/* - * Helper macro to perform a simple operation that requires taking of spin - * lock. - * - * 1. Acquire spin lock on object @obj of type @obj_type. - * - * 2. Execute @exp under spin lock, and store result. - * - * 3. Release spin lock. - * - * 4. Return result of @exp. - * - * Example: - * - * right_delimiting_key = UNDER_SPIN(dk, current_tree, *znode_get_rd_key(node)); - * - */ -#define UNDER_SPIN(obj_type, obj, exp) \ -({ \ - typeof (obj) __obj; \ - typeof (exp) __result; \ - \ - __obj = (obj); \ - __ODCA("nikita-2492", __obj != NULL); \ - spin_lock_ ## obj_type ## _at (__obj); \ - __result = exp; \ - spin_unlock_ ## obj_type (__obj); \ - __result; \ -}) - -/* - * The same as UNDER_SPIN, but without storing and returning @exp's result. - */ -#define UNDER_SPIN_VOID(obj_type, obj, exp) \ -({ \ - typeof (obj) __obj; \ - \ - __obj = (obj); \ - __ODCA("nikita-2492", __obj != NULL); \ - spin_lock_ ## obj_type ## _at (__obj); \ - exp; \ - spin_unlock_ ## obj_type (__obj); \ -}) - -/* Define several inline functions for each type of read write lock. This is - * insanely long macro definition. */ -#define RW_LOCK_FUNCTIONS(NAME,TYPE,FIELD) \ - \ - \ -/* Initialize read write lock embedded into @x. */ \ -static inline void rw_ ## NAME ## _init(TYPE *x) \ -{ \ - __ODCA("nikita-2988", x != NULL); \ - rwlock_init(& x->FIELD.lock); \ -} \ - \ -/* True, if @x is read locked by the -current- thread. */ \ -static inline int rw_ ## NAME ## _is_read_locked (const TYPE *x) \ -{ \ - return check_is_read_locked (& x->FIELD.lock); \ -} \ - \ -/* True, if @x is write locked by the -current- thread. */ \ -static inline int rw_ ## NAME ## _is_write_locked (const TYPE *x) \ -{ \ - return check_is_write_locked (& x->FIELD.lock); \ -} \ - \ -/* True, if @x is not read locked by the -current- thread. */ \ -static inline int rw_ ## NAME ## _is_not_read_locked (TYPE *x) \ -{ \ - return check_is_not_read_locked (& x->FIELD.lock); \ -} \ - \ -/* True, if @x is not write locked by the -current- thread. */ \ -static inline int rw_ ## NAME ## _is_not_write_locked (TYPE *x) \ -{ \ - return check_is_not_write_locked (& x->FIELD.lock); \ -} \ - \ -/* True, if @x is either read or write locked by the -current- thread. */ \ -static inline int rw_ ## NAME ## _is_locked (const TYPE *x) \ -{ \ - return check_is_read_locked (& x->FIELD.lock) || \ - check_is_write_locked (& x->FIELD.lock); \ -} \ - \ -/* True, if @x is neither read nor write locked by the -current- thread. */ \ -static inline int rw_ ## NAME ## _is_not_locked (const TYPE *x) \ -{ \ - return check_is_not_read_locked (& x->FIELD.lock) && \ - check_is_not_write_locked (& x->FIELD.lock); \ -} \ - \ -/* This is helper function used by lock acquiring functions below */ \ -static inline void read_ ## NAME ## _inc(void) \ -{ \ - LOCK_CNT_INC(read_locked_ ## NAME); \ - LOCK_CNT_INC(rw_locked_ ## NAME); \ - LOCK_CNT_INC(spin_locked); \ -} \ - \ -/* This is helper function used by lock acquiring functions below */ \ -static inline void read_ ## NAME ## _dec(void) \ -{ \ - LOCK_CNT_DEC(read_locked_ ## NAME); \ - LOCK_CNT_DEC(rw_locked_ ## NAME); \ - LOCK_CNT_DEC(spin_locked); \ -} \ - \ -/* This is helper function used by lock acquiring functions below */ \ -static inline void write_ ## NAME ## _inc(void) \ -{ \ - LOCK_CNT_INC(write_locked_ ## NAME); \ - LOCK_CNT_INC(rw_locked_ ## NAME); \ - LOCK_CNT_INC(spin_locked); \ -} \ - \ -/* This is helper function used by lock acquiring functions below */ \ -static inline void write_ ## NAME ## _dec(void) \ -{ \ - LOCK_CNT_DEC(write_locked_ ## NAME); \ - LOCK_CNT_DEC(rw_locked_ ## NAME); \ - LOCK_CNT_DEC(spin_locked); \ -} \ - \ -/* Acquire read lock on @x without checking lock ordering predicates. */ \ -/* This is useful when, for example, locking just created object. */ \ -static inline void read_lock_ ## NAME ## _no_ord (TYPE *x) \ -{ \ - __ODCA("nikita-2976", rw_ ## NAME ## _is_not_read_locked(x)); \ - read_lock(&x->FIELD.lock); \ - read_ ## NAME ## _inc(); \ -} \ - \ -/* Acquire write lock on @x without checking lock ordering predicates. */ \ -/* This is useful when, for example, locking just created object. */ \ -static inline void write_lock_ ## NAME ## _no_ord (TYPE *x) \ -{ \ - __ODCA("nikita-2977", rw_ ## NAME ## _is_not_write_locked(x)); \ - write_lock(&x->FIELD.lock); \ - write_ ## NAME ## _inc(); \ -} \ - \ -/* Read lock @x with explicit indication of spin lock profiling "sites". */ \ -/* See spin_lock_foo_at() above for more information. */ \ -static inline void read_lock_ ## NAME ## _at (TYPE *x) \ -{ \ - __ODCA("nikita-2975", rw_ordering_pred_ ## NAME(x)); \ - read_lock_ ## NAME ## _no_ord(x); \ -} \ - \ -/* Write lock @x with explicit indication of spin lock profiling "sites". */ \ -/* See spin_lock_foo_at() above for more information. */ \ -static inline void write_lock_ ## NAME ## _at (TYPE *x) \ -{ \ - __ODCA("nikita-2978", rw_ordering_pred_ ## NAME(x)); \ - write_lock_ ## NAME ## _no_ord(x); \ -} \ - \ -/* Read lock @x. */ \ -static inline void read_lock_ ## NAME (TYPE *x) \ -{ \ - __ODCA("nikita-2975", rw_ordering_pred_ ## NAME(x)); \ - read_lock_ ## NAME ## _no_ord(x); \ -} \ - \ -/* Write lock @x. */ \ -static inline void write_lock_ ## NAME (TYPE *x) \ -{ \ - __ODCA("nikita-2978", rw_ordering_pred_ ## NAME(x)); \ - write_lock_ ## NAME ## _no_ord(x); \ -} \ - \ -/* Release read lock on @x. */ \ -static inline void read_unlock_ ## NAME (TYPE *x) \ -{ \ - __ODCA("nikita-2979", LOCK_CNT_GTZ(read_locked_ ## NAME)); \ - __ODCA("nikita-2980", LOCK_CNT_GTZ(rw_locked_ ## NAME)); \ - __ODCA("nikita-2980", LOCK_CNT_GTZ(spin_locked)); \ - read_ ## NAME ## _dec(); \ - __ODCA("nikita-2703", rw_ ## NAME ## _is_read_locked(x)); \ - read_unlock (& x->FIELD.lock); \ -} \ - \ -/* Release write lock on @x. */ \ -static inline void write_unlock_ ## NAME (TYPE *x) \ -{ \ - __ODCA("nikita-2979", LOCK_CNT_GTZ(write_locked_ ## NAME)); \ - __ODCA("nikita-2980", LOCK_CNT_GTZ(rw_locked_ ## NAME)); \ - __ODCA("nikita-2980", LOCK_CNT_GTZ(spin_locked)); \ - write_ ## NAME ## _dec(); \ - __ODCA("nikita-2703", rw_ ## NAME ## _is_write_locked(x)); \ - write_unlock (& x->FIELD.lock); \ -} \ - \ -/* Try to obtain write lock on @x. On success, returns 1 with @x locked. */ \ -/* If @x is already locked, return 0 immediately. */ \ -static inline int write_trylock_ ## NAME (TYPE *x) \ -{ \ - if (write_trylock (& x->FIELD.lock)) { \ - write_ ## NAME ## _inc(); \ - return 1; \ - } \ - return 0; \ -} \ - \ - \ -typedef struct { int foo; } NAME ## _rw_dummy - -/* - * Helper macro to perform a simple operation that requires taking of read - * write lock. - * - * 1. Acquire read or write (depending on @rw parameter) lock on object @obj - * of type @obj_type. - * - * 2. Execute @exp under lock, and store result. - * - * 3. Release lock. - * - * 4. Return result of @exp. - * - * Example: - * - * tree_height = UNDER_RW(tree, current_tree, read, current_tree->height); - */ -#define UNDER_RW(obj_type, obj, rw, exp) \ -({ \ - typeof (obj) __obj; \ - typeof (exp) __result; \ - \ - __obj = (obj); \ - __ODCA("nikita-2981", __obj != NULL); \ - rw ## _lock_ ## obj_type ## _at (__obj); \ - __result = exp; \ - rw ## _unlock_ ## obj_type (__obj); \ - __result; \ -}) - -/* - * The same as UNDER_RW, but without storing and returning @exp's result. - */ -#define UNDER_RW_VOID(obj_type, obj, rw, exp) \ -({ \ - typeof (obj) __obj; \ - \ - __obj = (obj); \ - __ODCA("nikita-2982", __obj != NULL); \ - rw ## _lock_ ## obj_type ## _at (__obj); \ - exp; \ - rw ## _unlock_ ## obj_type (__obj); \ -}) - -#define LOCK_JNODE(node) spin_lock_jnode(node) -#define LOCK_JLOAD(node) spin_lock_jload(node) -#define LOCK_ATOM(atom) spin_lock_atom(atom) -#define LOCK_TXNH(txnh) spin_lock_txnh(txnh) -#define LOCK_INODE(inode) spin_lock_inode_object(inode) -#define RLOCK_TREE(tree) read_lock_tree(tree) -#define WLOCK_TREE(tree) write_lock_tree(tree) -#define RLOCK_DK(tree) read_lock_dk(tree) -#define WLOCK_DK(tree) write_lock_dk(tree) -#define RLOCK_ZLOCK(lock) read_lock_zlock(lock) -#define WLOCK_ZLOCK(lock) write_lock_zlock(lock) - -#define UNLOCK_JNODE(node) spin_unlock_jnode(node) -#define UNLOCK_JLOAD(node) spin_unlock_jload(node) -#define UNLOCK_ATOM(atom) spin_unlock_atom(atom) -#define UNLOCK_TXNH(txnh) spin_unlock_txnh(txnh) -#define UNLOCK_INODE(inode) spin_unlock_inode_object(inode) -#define RUNLOCK_TREE(tree) read_unlock_tree(tree) -#define WUNLOCK_TREE(tree) write_unlock_tree(tree) -#define RUNLOCK_DK(tree) read_unlock_dk(tree) -#define WUNLOCK_DK(tree) write_unlock_dk(tree) -#define RUNLOCK_ZLOCK(lock) read_unlock_zlock(lock) -#define WUNLOCK_ZLOCK(lock) write_unlock_zlock(lock) - -/* __SPIN_MACROS_H__ */ -#endif - -/* Make Linus happy. - Local variables: - c-indentation-style: "K&R" - mode-name: "LC" - c-basic-offset: 8 - tab-width: 8 - fill-column: 120 - scroll-step: 1 - End: -*/ diff -puN fs/reiser4/super.c~reiser4-spinlock-cleanup fs/reiser4/super.c --- devel/fs/reiser4/super.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/super.c 2006-02-16 14:17:05.000000000 -0800 @@ -318,9 +318,9 @@ void inc_unalloc_unfm_ptr(void) reiser4_super_info_data *sbinfo; sbinfo = get_super_private(get_current_context()->super); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); sbinfo->unalloc_extent_pointers++; - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } /* this is called when unallocated extent is converted to allocated */ @@ -329,10 +329,10 @@ void dec_unalloc_unfm_ptrs(int nr) reiser4_super_info_data *sbinfo; sbinfo = get_super_private(get_current_context()->super); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); BUG_ON(sbinfo->unalloc_extent_pointers < nr); sbinfo->unalloc_extent_pointers -= nr; - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } diff -puN fs/reiser4/super.h~reiser4-spinlock-cleanup fs/reiser4/super.h --- devel/fs/reiser4/super.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/super.h 2006-02-16 14:17:05.000000000 -0800 @@ -49,7 +49,10 @@ typedef enum { /* load all bitmap blocks at mount time */ REISER4_DONT_LOAD_BITMAP = 5, /* enforce atomicity during write(2) */ - REISER4_ATOMIC_WRITE = 6 + REISER4_ATOMIC_WRITE = 6, + /* don't use write barriers in the log writer code. */ + REISER4_NO_WRITE_BARRIER = 7 + } reiser4_fs_flag; /* @@ -119,7 +122,7 @@ struct reiser4_super_info_data { * guard spinlock which protects reiser4 super block fields (currently * blocks_free, blocks_free_committed) */ - reiser4_spin_data guard; + spinlock_t guard; /* next oid that will be returned by oid_allocate() */ oid_t next_to_use; @@ -220,7 +223,7 @@ struct reiser4_super_info_data { #if REISER4_USE_EFLUSH /* see emergency_flush.c for details */ - reiser4_spin_data eflush_guard; + spinlock_t eflush_guard; /* number of emergency flushed nodes */ int eflushed; /* hash table used by emergency flush. Protected by ->eflush_guard */ @@ -317,6 +320,13 @@ static inline reiser4_super_info_data *g return (reiser4_super_info_data *) super->s_fs_info; } +/* get ent context for the @super */ +static inline entd_context *get_entd_context(struct super_block *super) +{ + return &get_super_private(super)->entd; +} + + /* "Current" super-block: main super block used during current system call. Reference to this super block is stored in reiser4_context. */ static inline struct super_block *reiser4_get_current_sb(void) @@ -375,48 +385,16 @@ extern void build_object_ops(struct supe #define REISER4_SUPER_MAGIC 0x52345362 /* (*(__u32 *)"R4Sb"); */ -#define spin_ordering_pred_super(private) (1) -SPIN_LOCK_FUNCTIONS(super, reiser4_super_info_data, guard); - -/* - * lock reiser4-specific part of super block - */ -static inline void reiser4_spin_lock_sb(reiser4_super_info_data * sbinfo) -{ - spin_lock_super(sbinfo); -} - -/* - * unlock reiser4-specific part of super block - */ -static inline void reiser4_spin_unlock_sb(reiser4_super_info_data * sbinfo) -{ - spin_unlock_super(sbinfo); -} - -#if REISER4_USE_EFLUSH - -#define spin_ordering_pred_super_eflush(private) (1) -SPIN_LOCK_FUNCTIONS(super_eflush, reiser4_super_info_data, eflush_guard); - -/* - * lock emergency flush data-structures for super block @s - */ -static inline void spin_lock_eflush(const struct super_block *s) +static inline void spin_lock_reiser4_super(reiser4_super_info_data *sbinfo) { - reiser4_super_info_data *sbinfo = get_super_private(s); - spin_lock_super_eflush(sbinfo); + spin_lock(&(sbinfo->guard)); } -/* - * unlock emergency flush data-structures for super block @s - */ -static inline void spin_unlock_eflush(const struct super_block *s) +static inline void spin_unlock_reiser4_super(reiser4_super_info_data *sbinfo) { - reiser4_super_info_data *sbinfo = get_super_private(s); - spin_unlock_super_eflush(sbinfo); + assert_spin_locked(&(sbinfo->guard)); + spin_unlock(&(sbinfo->guard)); } -#endif extern __u64 flush_reserved(const struct super_block *); extern int reiser4_is_set(const struct super_block *super, reiser4_fs_flag f); diff -puN fs/reiser4/super_ops.c~reiser4-spinlock-cleanup fs/reiser4/super_ops.c --- devel/fs/reiser4/super_ops.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/super_ops.c 2006-02-16 14:17:05.000000000 -0800 @@ -107,7 +107,7 @@ static struct inode *reiser4_alloc_inode seal_init(&info->sd_seal, NULL, NULL); coord_init_invalid(&info->sd_coord, NULL); info->flags = 0; - spin_inode_object_init(info); + spin_lock_init(&info->guard); /* this deals with info's loading semaphore */ loading_alloc(info); info->vroot = UBER_TREE_ADDR; diff -puN fs/reiser4/tree.c~reiser4-spinlock-cleanup fs/reiser4/tree.c --- devel/fs/reiser4/tree.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/tree.c 2006-02-16 14:17:05.000000000 -0800 @@ -628,15 +628,15 @@ znode *child_znode(const coord_t * paren assert("nikita-1374", parent_coord != NULL); assert("nikita-1482", parent != NULL); - assert("nikita-1384", ergo(setup_dkeys_p, - rw_dk_is_not_locked(znode_get_tree - (parent)))); +#if REISER4_DEBUG + if (setup_dkeys_p) + assert_rw_not_locked(&(znode_get_tree(parent)->dk_lock)); +#endif assert("nikita-2947", znode_is_any_locked(parent)); if (znode_get_level(parent) <= LEAF_LEVEL) { /* trying to get child of leaf node */ warning("nikita-1217", "Child of maize?"); - print_znode("node", parent); return ERR_PTR(RETERR(-EIO)); } if (item_is_internal(parent_coord)) { @@ -659,7 +659,6 @@ znode *child_znode(const coord_t * paren set_child_delimiting_keys(parent, parent_coord, child); } else { warning("nikita-1483", "Internal item expected"); - print_znode("node", parent); child = ERR_PTR(RETERR(-EIO)); } return child; @@ -702,7 +701,7 @@ static void uncapture_znode(znode * node assert("zam-939", atom != NULL); spin_unlock_znode(node); flush_reserved2grabbed(atom, (__u64) 1); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } else spin_unlock_znode(node); } else { @@ -750,7 +749,7 @@ static void uncapture_znode(znode * node } uncapture_block(ZJNODE(node)); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); zput(node); } } @@ -770,7 +769,7 @@ void forget_znode(lock_handle * handle) assert("vs-164", znode_is_write_locked(node)); assert("nikita-1280", ZF_ISSET(node, JNODE_HEARD_BANSHEE)); - assert("nikita-3337", rw_zlock_is_locked(&node->lock)); + assert_rw_locked(&(node->lock.guard)); /* We assume that this node was detached from its parent before * unlocking, it gives no way to reach this node from parent through a @@ -780,10 +779,10 @@ void forget_znode(lock_handle * handle) * right neighbors. In the next several lines we remove the node from * the sibling list. */ - WLOCK_TREE(tree); + write_lock_tree(tree); sibling_list_remove(node); znode_remove(node, tree); - WUNLOCK_TREE(tree); + write_unlock_tree(tree); /* Here we set JNODE_DYING and cancel all pending lock requests. It * forces all lock requestor threads to repeat iterations of getting @@ -895,23 +894,25 @@ int find_child_ptr(znode * parent /* par * not aliased to ->in_parent of some znode. Otherwise, * parent_coord_to_coord() below would modify data protected by tree * lock. */ - RLOCK_TREE(tree); + read_lock_tree(tree); /* fast path. Try to use cached value. Lock tree to keep node->pos_in_parent and pos->*_blocknr consistent. */ if (child->in_parent.item_pos + 1 != 0) { parent_coord_to_coord(&child->in_parent, result); if (check_tree_pointer(result, child) == NS_FOUND) { - RUNLOCK_TREE(tree); + read_unlock_tree(tree); return NS_FOUND; } child->in_parent.item_pos = (unsigned short)~0; } - RUNLOCK_TREE(tree); + read_unlock_tree(tree); /* is above failed, find some key from @child. We are looking for the least key in a child. */ - UNDER_RW_VOID(dk, tree, read, ld = *znode_get_ld_key(child)); + read_lock_dk(tree); + ld = *znode_get_ld_key(child); + read_unlock_dk(tree); /* * now, lookup parent with key just found. Note, that left delimiting * key doesn't identify node uniquely, because (in extremely rare @@ -923,9 +924,9 @@ int find_child_ptr(znode * parent /* par lookup_res = nplug->lookup(parent, &ld, FIND_EXACT, result); /* update cached pos_in_node */ if (lookup_res == NS_FOUND) { - WLOCK_TREE(tree); + write_lock_tree(tree); coord_to_parent_coord(result, &child->in_parent); - WUNLOCK_TREE(tree); + write_unlock_tree(tree); lookup_res = check_tree_pointer(result, child); } if (lookup_res == NS_NOT_FOUND) @@ -954,9 +955,9 @@ static int find_child_by_addr(znode * pa for_all_units(result, parent) { if (check_tree_pointer(result, child) == NS_FOUND) { - UNDER_RW_VOID(tree, znode_get_tree(parent), write, - coord_to_parent_coord(result, - &child->in_parent)); + write_lock_tree(znode_get_tree(parent)); + coord_to_parent_coord(result, &child->in_parent); + write_unlock_tree(znode_get_tree(parent)); ret = NS_FOUND; break; } @@ -1201,9 +1202,9 @@ prepare_twig_kill(carry_kill_data * kdat case -E_NO_NEIGHBOR: /* there is no formatted node to the right of from->node */ - UNDER_RW_VOID(dk, tree, read, - key = - *znode_get_rd_key(from->node)); + read_lock_dk(tree); + key = *znode_get_rd_key(from->node); + read_unlock_dk(tree); right_coord.node = NULL; result = 0; break; @@ -1472,10 +1473,10 @@ int delete_node(znode * node, reiser4_ke be zero). */ tree = znode_get_tree(node); - WLOCK_TREE(tree); + write_lock_tree(tree); init_parent_coord(&node->in_parent, NULL); --parent_lock.node->c_count; - WUNLOCK_TREE(tree); + write_unlock_tree(tree); assert("zam-989", item_is_internal(&cut_from)); @@ -1495,8 +1496,8 @@ int delete_node(znode * node, reiser4_ke reiser4_tree *tree = current_tree; __u64 start_offset = 0, end_offset = 0; - RLOCK_TREE(tree); - WLOCK_DK(tree); + read_lock_tree(tree); + write_lock_dk(tree); if (object) { /* We use @smallest_removed and the left delimiting of * the current node for @object->i_blocks, i_bytes @@ -1513,8 +1514,8 @@ int delete_node(znode * node, reiser4_ke *smallest_removed = *znode_get_ld_key(node); - WUNLOCK_DK(tree); - RUNLOCK_TREE(tree); + write_unlock_dk(tree); + read_unlock_tree(tree); if (object) { /* we used to perform actions which are to be performed on items on their removal from tree in @@ -1534,6 +1535,16 @@ int delete_node(znode * node, reiser4_ke return ret; } +static int can_delete(const reiser4_key *key, znode *node) +{ + int result; + + read_lock_dk(current_tree); + result = keyle(key, znode_get_ld_key(node)); + read_unlock_dk(current_tree); + return result; +} + /** * This subroutine is not optimal but implementation seems to * be easier). @@ -1580,11 +1591,9 @@ cut_tree_worker_common(tap_t * tap, cons break; /* Check can we delete the node as a whole. */ if (*progress && znode_get_level(node) == LEAF_LEVEL && - UNDER_RW(dk, current_tree, read, - keyle(from_key, znode_get_ld_key(node)))) { - result = - delete_node(node, smallest_removed, object, - truncate); + can_delete(from_key, node)) { + result = delete_node(node, smallest_removed, object, + truncate); } else { result = tap_load(tap); if (result) @@ -1817,8 +1826,8 @@ cut_tree(reiser4_tree * tree, const reis void init_tree_0(reiser4_tree * tree) { assert("zam-683", tree != NULL); - rw_tree_init(tree); - spin_epoch_init(tree); + rwlock_init(&tree->tree_lock); + spin_lock_init(&tree->epoch_lock); } /* finishing reiser4 initialization */ diff -puN fs/reiser4/tree.h~reiser4-spinlock-cleanup fs/reiser4/tree.h --- devel/fs/reiser4/tree.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/tree.h 2006-02-16 14:17:05.000000000 -0800 @@ -8,7 +8,6 @@ #include "forward.h" #include "debug.h" -#include "spin_macros.h" #include "dformat.h" #include "plugin/node/node.h" #include "plugin/plugin.h" @@ -59,7 +58,7 @@ typedef struct cbk_cache_slot { */ typedef struct cbk_cache { /* serializator */ - reiser4_rw_data guard; + rwlock_t guard; int nr_slots; /* head of LRU list of cache slots */ struct list_head lru; @@ -67,10 +66,6 @@ typedef struct cbk_cache { cbk_cache_slot *slot; } cbk_cache; -#define rw_ordering_pred_cbk_cache(cache) (1) - -/* defined read-write locking functions for cbk_cache */ -RW_LOCK_FUNCTIONS(cbk_cache, cbk_cache, guard); /* level_lookup_result - possible outcome of looking up key at some level. This is used by coord_by_key when traversing tree downward. */ @@ -139,13 +134,13 @@ struct reiser4_tree { 4) SMP machines. Current 4-ways machine test does not show that tree lock is contented and it is a bottleneck (2003.07.25). */ - reiser4_rw_data tree_lock; + rwlock_t tree_lock; /* lock protecting delimiting keys */ - reiser4_rw_data dk_lock; + rwlock_t dk_lock; /* spin lock protecting znode_epoch */ - reiser4_spin_data epoch_lock; + spinlock_t epoch_lock; /* version stamp used to mark znode updates. See seal.[ch] for more * information. */ __u64 znode_epoch; @@ -165,9 +160,6 @@ struct reiser4_tree { } carry; }; -#define spin_ordering_pred_epoch(tree) (1) -SPIN_LOCK_FUNCTIONS(epoch, reiser4_tree, epoch_lock); - extern void init_tree_0(reiser4_tree *); extern int init_tree(reiser4_tree * tree, @@ -442,37 +434,126 @@ int lookup_couple(reiser4_tree * tree, tree_level lock_level, tree_level stop_level, __u32 flags, int *result1, int *result2); -/* ordering constraint for tree spin lock: tree lock is "strongest" */ -#define rw_ordering_pred_tree(tree) \ - (lock_counters()->spin_locked_txnh == 0) && \ - (lock_counters()->rw_locked_tree == 0) && \ - (lock_counters()->rw_locked_dk == 0) - -/* Define spin_lock_tree, spin_unlock_tree, and spin_tree_is_locked: - spin lock protecting znode hash, and parent and sibling pointers. */ -RW_LOCK_FUNCTIONS(tree, reiser4_tree, tree_lock); - -/* ordering constraint for delimiting key spin lock: dk lock is weaker than - tree lock */ -#define rw_ordering_pred_dk( tree ) 1 -#if 0 -(lock_counters()->rw_locked_tree == 0) && - (lock_counters()->spin_locked_jnode == 0) && - (lock_counters()->rw_locked_zlock == 0) && - (lock_counters()->spin_locked_txnh == 0) && - (lock_counters()->spin_locked_atom == 0) && - (lock_counters()->spin_locked_inode_object == 0) && - (lock_counters()->spin_locked_txnmgr == 0) -#endif -/* Define spin_lock_dk(), spin_unlock_dk(), etc: locking for delimiting - keys. */ - RW_LOCK_FUNCTIONS(dk, reiser4_tree, dk_lock); -#if REISER4_DEBUG -#define check_tree() print_tree_rec( "", current_tree, REISER4_TREE_CHECK ) -#else -#define check_tree() noop -#endif +static inline void read_lock_tree(reiser4_tree *tree) +{ + /* check that tree is not locked */ + assert("", (LOCK_CNT_NIL(rw_locked_tree) && + LOCK_CNT_NIL(read_locked_tree) && + LOCK_CNT_NIL(write_locked_tree))); + /* check that spinlocks of lower priorities are not held */ + assert("", (LOCK_CNT_NIL(spin_locked_txnh) && + LOCK_CNT_NIL(rw_locked_dk) && + LOCK_CNT_NIL(spin_locked_stack))); + + read_lock(&(tree->tree_lock)); + + LOCK_CNT_INC(read_locked_tree); + LOCK_CNT_INC(rw_locked_tree); + LOCK_CNT_INC(spin_locked); +} + +static inline void read_unlock_tree(reiser4_tree *tree) +{ + assert("nikita-1375", LOCK_CNT_GTZ(read_locked_tree)); + assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_tree)); + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); + + LOCK_CNT_DEC(read_locked_tree); + LOCK_CNT_DEC(rw_locked_tree); + LOCK_CNT_DEC(spin_locked); + + read_unlock(&(tree->tree_lock)); +} + +static inline void write_lock_tree(reiser4_tree *tree) +{ + /* check that tree is not locked */ + assert("", (LOCK_CNT_NIL(rw_locked_tree) && + LOCK_CNT_NIL(read_locked_tree) && + LOCK_CNT_NIL(write_locked_tree))); + /* check that spinlocks of lower priorities are not held */ + assert("", (LOCK_CNT_NIL(spin_locked_txnh) && + LOCK_CNT_NIL(rw_locked_dk) && + LOCK_CNT_NIL(spin_locked_stack))); + + write_lock(&(tree->tree_lock)); + + LOCK_CNT_INC(write_locked_tree); + LOCK_CNT_INC(rw_locked_tree); + LOCK_CNT_INC(spin_locked); +} + +static inline void write_unlock_tree(reiser4_tree *tree) +{ + assert("nikita-1375", LOCK_CNT_GTZ(write_locked_tree)); + assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_tree)); + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); + + LOCK_CNT_DEC(write_locked_tree); + LOCK_CNT_DEC(rw_locked_tree); + LOCK_CNT_DEC(spin_locked); + + write_unlock(&(tree->tree_lock)); +} + +static inline void read_lock_dk(reiser4_tree *tree) +{ + /* check that dk is not locked */ + assert("", (LOCK_CNT_NIL(rw_locked_dk) && + LOCK_CNT_NIL(read_locked_dk) && + LOCK_CNT_NIL(write_locked_dk))); + /* check that spinlocks of lower priorities are not held */ + assert("", LOCK_CNT_NIL(spin_locked_stack)); + + read_lock(&((tree)->dk_lock)); + + LOCK_CNT_INC(read_locked_dk); + LOCK_CNT_INC(rw_locked_dk); + LOCK_CNT_INC(spin_locked); +} + +static inline void read_unlock_dk(reiser4_tree *tree) +{ + assert("nikita-1375", LOCK_CNT_GTZ(read_locked_dk)); + assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_dk)); + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); + + LOCK_CNT_DEC(read_locked_dk); + LOCK_CNT_DEC(rw_locked_dk); + LOCK_CNT_DEC(spin_locked); + + read_unlock(&(tree->dk_lock)); +} + +static inline void write_lock_dk(reiser4_tree *tree) +{ + /* check that dk is not locked */ + assert("", (LOCK_CNT_NIL(rw_locked_dk) && + LOCK_CNT_NIL(read_locked_dk) && + LOCK_CNT_NIL(write_locked_dk))); + /* check that spinlocks of lower priorities are not held */ + assert("", LOCK_CNT_NIL(spin_locked_stack)); + + write_lock(&((tree)->dk_lock)); + + LOCK_CNT_INC(write_locked_dk); + LOCK_CNT_INC(rw_locked_dk); + LOCK_CNT_INC(spin_locked); +} + +static inline void write_unlock_dk(reiser4_tree *tree) +{ + assert("nikita-1375", LOCK_CNT_GTZ(write_locked_dk)); + assert("nikita-1376", LOCK_CNT_GTZ(rw_locked_dk)); + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); + + LOCK_CNT_DEC(write_locked_dk); + LOCK_CNT_DEC(rw_locked_dk); + LOCK_CNT_DEC(spin_locked); + + write_unlock(&(tree->dk_lock)); +} /* estimate api. Implementation is in estimate.c */ reiser4_block_nr estimate_one_insert_item(reiser4_tree *); @@ -480,16 +561,10 @@ reiser4_block_nr estimate_one_insert_int reiser4_block_nr estimate_insert_flow(tree_level); reiser4_block_nr estimate_one_item_removal(reiser4_tree *); reiser4_block_nr calc_estimate_one_insert(tree_level); -reiser4_block_nr estimate_disk_cluster(struct inode *); -reiser4_block_nr estimate_insert_cluster(struct inode *, int); +reiser4_block_nr estimate_dirty_cluster(struct inode *); +reiser4_block_nr estimate_insert_cluster(struct inode *); +reiser4_block_nr estimate_update_cluster(struct inode *); -/* take read or write tree lock, depending on @takeread argument */ -#define XLOCK_TREE(tree, takeread) \ - (takeread ? RLOCK_TREE(tree) : WLOCK_TREE(tree)) - -/* release read or write tree lock, depending on @takeread argument */ -#define XUNLOCK_TREE(tree, takeread) \ - (takeread ? RUNLOCK_TREE(tree) : WUNLOCK_TREE(tree)) /* __REISER4_TREE_H__ */ #endif diff -puN fs/reiser4/tree_mod.c~reiser4-spinlock-cleanup fs/reiser4/tree_mod.c --- devel/fs/reiser4/tree_mod.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/tree_mod.c 2006-02-16 14:17:05.000000000 -0800 @@ -152,7 +152,7 @@ znode *add_tree_root(znode * old_root /* znode_make_dirty(fake); /* new root is a child of "fake" node */ - WLOCK_TREE(tree); + write_lock_tree(tree); ++tree->height; @@ -168,17 +168,17 @@ znode *add_tree_root(znode * old_root /* * balancing are connected after balancing is * done---useful invariant to check. */ sibling_list_insert_nolock(new_root, NULL); - WUNLOCK_TREE(tree); + write_unlock_tree(tree); /* insert into new root pointer to the @old_root. */ assert("nikita-1110", WITH_DATA(new_root, node_is_empty(new_root))); - WLOCK_DK(tree); + write_lock_dk(tree); znode_set_ld_key(new_root, min_key()); znode_set_rd_key(new_root, max_key()); - WUNLOCK_DK(tree); + write_unlock_dk(tree); if (REISER4_DEBUG) { ZF_CLR(old_root, JNODE_LEFT_CONNECTED); ZF_CLR(old_root, JNODE_RIGHT_CONNECTED); @@ -234,7 +234,7 @@ static int add_child_ptr(znode * parent, coord_t coord; reiser4_item_data data; int result; - reiser4_key *key; + reiser4_key key; assert("nikita-1111", parent != NULL); assert("nikita-1112", child != NULL); @@ -250,10 +250,12 @@ static int add_child_ptr(znode * parent, build_child_ptr_data(child, &data); data.arg = NULL; - key = - UNDER_RW(dk, znode_get_tree(parent), read, znode_get_ld_key(child)); - result = - node_plugin_by_node(parent)->create_item(&coord, key, &data, NULL); + read_lock_dk(znode_get_tree(parent)); + key = *znode_get_ld_key(child); + read_unlock_dk(znode_get_tree(parent)); + + result = node_plugin_by_node(parent)->create_item(&coord, &key, &data, + NULL); znode_make_dirty(parent); zrelse(parent); return result; @@ -293,7 +295,7 @@ static int kill_root(reiser4_tree * tree /* don't take long term lock a @new_root. Take spinlock. */ - WLOCK_TREE(tree); + write_lock_tree(tree); tree->root_block = *new_root_blk; --tree->height; @@ -309,7 +311,7 @@ static int kill_root(reiser4_tree * tree ++uber->c_count; /* sibling_list_insert_nolock(new_root, NULL); */ - WUNLOCK_TREE(tree); + write_unlock_tree(tree); /* reinitialise old root. */ result = node_plugin_by_node(old_root)->init(old_root); diff -puN fs/reiser4/tree_walk.c~reiser4-spinlock-cleanup fs/reiser4/tree_walk.c --- devel/fs/reiser4/tree_walk.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/tree_walk.c 2006-02-16 14:17:05.000000000 -0800 @@ -66,7 +66,7 @@ static int lock_neighbor( assert("umka-236", node != NULL); assert("umka-237", tree != NULL); - assert("umka-301", rw_tree_is_locked(tree)); + assert_rw_locked(&(tree->tree_lock)); if (flags & GN_TRY_LOCK) req |= ZNODE_LOCK_NONBLOCK; @@ -94,14 +94,14 @@ static int lock_neighbor( /* protect it from deletion. */ zref(neighbor); - XUNLOCK_TREE(tree, rlocked); + rlocked ? read_unlock_tree(tree) : write_unlock_tree(tree); ret = longterm_lock_znode(result, neighbor, mode, req); /* The lock handle obtains its own reference, release the one from above. */ zput(neighbor); - XLOCK_TREE(tree, rlocked); + rlocked ? read_lock_tree(tree) : write_lock_tree(tree); /* restart if node we got reference to is being invalidated. we should not get reference to this node @@ -118,22 +118,26 @@ static int lock_neighbor( /* znode was locked by mistake; unlock it and restart locking process from beginning. */ - XUNLOCK_TREE(tree, rlocked); + rlocked ? read_unlock_tree(tree) : write_unlock_tree(tree); longterm_unlock_znode(result); - XLOCK_TREE(tree, rlocked); + rlocked ? read_lock_tree(tree) : write_lock_tree(tree); } } /* get parent node with longterm lock, accepts GN* flags. */ -int reiser4_get_parent_flags(lock_handle * result /* resulting lock handle */ , +int reiser4_get_parent_flags(lock_handle * lh /* resulting lock handle */ , znode * node /* child node */ , znode_lock_mode mode /* type of lock: read or write */ , int flags /* GN_* flags */ ) { - return UNDER_RW(tree, znode_get_tree(node), read, - lock_neighbor(result, node, PARENT_PTR_OFFSET, mode, - ZNODE_LOCK_HIPRI, flags, 1)); + int result; + + read_lock_tree(znode_get_tree(node)); + result = lock_neighbor(lh, node, PARENT_PTR_OFFSET, mode, + ZNODE_LOCK_HIPRI, flags, 1); + read_unlock_tree(znode_get_tree(node)); + return result; } /* wrapper function to lock right or left neighbor depending on GN_GO_LEFT @@ -184,7 +188,7 @@ int check_sibling_list(znode * node) return 1; assert("nikita-3270", node != NULL); - assert("nikita-3269", rw_tree_is_write_locked(znode_get_tree(node))); + assert_rw_write_locked(&(znode_get_tree(node)->tree_lock)); for (scan = node; znode_is_left_connected(scan); scan = next) { next = scan->left; @@ -331,7 +335,7 @@ static int far_next_coord(coord_t * coor node = handle->node; tree = znode_get_tree(node); - WUNLOCK_TREE(tree); + write_unlock_tree(tree); coord_init_zero(coord); @@ -358,7 +362,7 @@ static int far_next_coord(coord_t * coor error_locked: longterm_unlock_znode(handle); } - WLOCK_TREE(tree); + write_lock_tree(tree); return ret; } @@ -385,12 +389,12 @@ renew_sibling_link(coord_t * coord, lock assert("umka-247", child != NULL); assert("umka-303", tree != NULL); - WLOCK_TREE(tree); + write_lock_tree(tree); ret = far_next_coord(coord, handle, flags); if (ret) { if (ret != -ENOENT) { - WUNLOCK_TREE(tree); + write_unlock_tree(tree); return ret; } } else { @@ -407,11 +411,11 @@ renew_sibling_link(coord_t * coord, lock iplug = item_plugin_by_coord(coord); if (!item_is_internal(coord)) { link_znodes(child, NULL, to_left); - WUNLOCK_TREE(tree); + write_unlock_tree(tree); /* we know there can't be formatted neighbor */ return RETERR(-E_NO_NEIGHBOR); } - WUNLOCK_TREE(tree); + write_unlock_tree(tree); iplug->s.internal.down_link(coord, NULL, &da); @@ -431,7 +435,7 @@ renew_sibling_link(coord_t * coord, lock /* update delimiting keys */ set_child_delimiting_keys(coord->node, coord, neighbor); - WLOCK_TREE(tree); + write_lock_tree(tree); } if (likely(neighbor == NULL || @@ -445,7 +449,7 @@ renew_sibling_link(coord_t * coord, lock ret = RETERR(-EIO); } - WUNLOCK_TREE(tree); + write_unlock_tree(tree); /* if GN_NO_ALLOC isn't set we keep reference to neighbor znode */ if (neighbor != NULL && (flags & GN_NO_ALLOC)) @@ -526,21 +530,21 @@ int connect_znode(coord_t * parent_coord return ret; /* protect `connected' state check by tree_lock */ - RLOCK_TREE(tree); + read_lock_tree(tree); if (!znode_is_right_connected(child)) { - RUNLOCK_TREE(tree); + read_unlock_tree(tree); /* connect right (default is right) */ ret = connect_one_side(parent_coord, child, GN_NO_ALLOC); if (ret) goto zrelse_and_ret; - RLOCK_TREE(tree); + read_lock_tree(tree); } ret = znode_is_left_connected(child); - RUNLOCK_TREE(tree); + read_unlock_tree(tree); if (!ret) { ret = @@ -593,8 +597,9 @@ renew_neighbor(coord_t * coord, znode * and reference to neighbor znode incremented */ neighbor = (flags & GN_GO_LEFT) ? node->left : node->right; - ret = UNDER_RW(tree, tree, read, znode_is_connected(neighbor)); - + read_lock_tree(tree); + ret = znode_is_connected(neighbor); + read_unlock_tree(tree); if (ret) { ret = 0; goto out; @@ -676,9 +681,9 @@ reiser4_get_neighbor(lock_handle * neigh again: /* first, we try to use simple lock_neighbor() which requires sibling link existence */ - ret = UNDER_RW(tree, tree, read, - lock_side_neighbor(neighbor, node, lock_mode, flags, 1)); - + read_lock_tree(tree); + ret = lock_side_neighbor(neighbor, node, lock_mode, flags, 1); + read_unlock_tree(tree); if (!ret) { /* load znode content if it was specified */ if (flags & GN_LOAD_NEIGHBOR) { @@ -797,10 +802,10 @@ void sibling_list_remove(znode * node) tree = znode_get_tree(node); assert("umka-255", node != NULL); - assert("zam-878", rw_tree_is_write_locked(tree)); + assert_rw_write_locked(&(tree->tree_lock)); assert("nikita-3275", check_sibling_list(node)); - WLOCK_DK(tree); + write_lock_dk(tree); if (znode_is_right_connected(node) && node->right != NULL && znode_is_left_connected(node) && node->left != NULL) { assert("zam-32245", @@ -808,7 +813,7 @@ void sibling_list_remove(znode * node) znode_get_ld_key(node->right))); znode_set_rd_key(node->left, znode_get_ld_key(node->right)); } - WUNLOCK_DK(tree); + write_unlock_dk(tree); if (znode_is_right_connected(node) && node->right != NULL) { assert("zam-322", znode_is_left_connected(node->right)); diff -puN fs/reiser4/txnmgr.c~reiser4-spinlock-cleanup fs/reiser4/txnmgr.c --- devel/fs/reiser4/txnmgr.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/txnmgr.c 2006-02-16 14:17:05.000000000 -0800 @@ -258,7 +258,7 @@ static int capture_assign_block(txn_hand static int capture_assign_txnh(jnode * node, txn_handle * txnh, txn_capture mode, int can_coc); -static int fuse_not_fused_lock_owners(txn_handle * txnh, znode * node); +static void fuse_not_fused_lock_owners(txn_handle * txnh, znode * node); static int capture_init_fusion(jnode * node, txn_handle * txnh, txn_capture mode, int can_coc); @@ -344,10 +344,8 @@ void init_txnmgr(txn_mgr *mgr) mgr->atom_count = 0; mgr->id_count = 1; - INIT_LIST_HEAD(&mgr->atoms_list); - spin_txnmgr_init(mgr); - + spin_lock_init(&mgr->tmgr_lock); sema_init(&mgr->commit_semaphore, 1); } @@ -373,9 +371,7 @@ static void txnh_init(txn_handle * txnh, txnh->mode = mode; txnh->atom = NULL; txnh->flags = 0; - - spin_txnh_init(txnh); - + spin_lock_init(&txnh->hlock); INIT_LIST_HEAD(&txnh->txnh_link); } @@ -384,7 +380,8 @@ static void txnh_init(txn_handle * txnh, static int txnh_isclean(txn_handle * txnh) { assert("umka-172", txnh != NULL); - return txnh->atom == NULL && spin_txnh_is_not_locked(txnh); + return txnh->atom == NULL && + LOCK_CNT_NIL(spin_locked_txnh); } #endif @@ -407,7 +404,7 @@ static void atom_init(txn_atom * atom) INIT_LIST_HEAD(ATOM_OVRWR_LIST(atom)); INIT_LIST_HEAD(ATOM_WB_LIST(atom)); INIT_LIST_HEAD(&atom->inodes); - spin_atom_init(atom); + spin_lock_init(&atom->alock); /* list of transaction handles */ INIT_LIST_HEAD(&atom->txnh_list); /* link to transaction manager's list of atoms */ @@ -482,23 +479,21 @@ int txn_end(reiser4_context * context) assert("nikita-2967", lock_stack_isclean(get_current_lock_stack())); txnh = context->trans; - if (txnh != NULL) { - /* The txnh's field "atom" can be checked for NULL w/o holding a - lock because txnh->atom could be set by this thread's call to - try_capture or the deadlock prevention code in - fuse_not_fused_lock_owners(). But that code may assign an - atom to this transaction handle only if there are locked and - not yet fused nodes. It cannot happen because lock stack - should be clean at this moment. */ + /* Fuse_not_fused_lock_owners in a parallel thread may set + * txnh->atom to the current thread's transaction handle. At + * this moment current thread holds no long-term locks, but + * fuse_not_fused... releases znode->lock spin-lock right + * before assigning an atom to this transaction handle. It + * still keeps txnh locked so the code below prevents the + * fuse_not_fused... thread from racing too far. */ + spin_lock_txnh(txnh); + spin_unlock_txnh(txnh); if (txnh->atom != NULL) ret = commit_txnh(txnh); - assert("jmacd-633", txnh_isclean(txnh)); - context->trans = NULL; } - return ret; } @@ -524,10 +519,10 @@ static txn_atom *txnh_get_atom(txn_handl txn_atom *atom; assert("umka-180", txnh != NULL); - assert("jmacd-5108", spin_txnh_is_not_locked(txnh)); + assert_spin_not_locked(&(txnh->hlock)); while (1) { - LOCK_TXNH(txnh); + spin_lock_txnh(txnh); atom = txnh->atom; if (atom == NULL) @@ -538,16 +533,16 @@ static txn_atom *txnh_get_atom(txn_handl atomic_inc(&atom->refcount); - UNLOCK_TXNH(txnh); - LOCK_ATOM(atom); - LOCK_TXNH(txnh); + spin_unlock_txnh(txnh); + spin_lock_atom(atom); + spin_lock_txnh(txnh); if (txnh->atom == atom) { atomic_dec(&atom->refcount); break; } - UNLOCK_TXNH(txnh); + spin_unlock_txnh(txnh); atom_dec_and_unlock(atom); } @@ -569,7 +564,7 @@ txn_atom *get_current_atom_locked_nochec atom = txnh_get_atom(txnh); - UNLOCK_TXNH(txnh); + spin_unlock_txnh(txnh); return atom; } @@ -584,7 +579,7 @@ txn_atom *jnode_get_atom(jnode * node) assert("umka-181", node != NULL); while (1) { - assert("jmacd-5108", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); atom = node->atom; /* node is not in any atom */ @@ -598,11 +593,11 @@ txn_atom *jnode_get_atom(jnode * node) /* At least one jnode belongs to this atom it guarantees that * atom->refcount > 0, we can safely increment refcount. */ atomic_inc(&atom->refcount); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); /* re-acquire spin locks in the right order */ - LOCK_ATOM(atom); - LOCK_JNODE(node); + spin_lock_atom(atom); + spin_lock_jnode(node); /* check if node still points to the same atom. */ if (node->atom == atom) { @@ -612,7 +607,7 @@ txn_atom *jnode_get_atom(jnode * node) /* releasing of atom lock and reference requires not holding * locks on jnodes. */ - UNLOCK_JNODE(node); + spin_unlock_jnode(node); /* We do not sure that this atom has extra references except our * one, so we should call proper function which may free atom if @@ -621,7 +616,7 @@ txn_atom *jnode_get_atom(jnode * node) /* lock jnode again for getting valid node->atom pointer * value. */ - LOCK_JNODE(node); + spin_lock_jnode(node); } return atom; @@ -650,14 +645,14 @@ same_slum_check(jnode * node, jnode * ch check->atom) because atom could be locked and being fused at that moment, jnodes of the atom of that state (being fused) can point to different objects, but the atom is the same. */ - LOCK_JNODE(check); + spin_lock_jnode(check); atom = jnode_get_atom(check); if (atom == NULL) { compat = 0; } else { - compat = (node->atom == atom && jnode_is_dirty(check)); + compat = (node->atom == atom && JF_ISSET(check, JNODE_DIRTY)); if (compat && jnode_is_znode(check)) { compat &= znode_is_connected(JZNODE(check)); @@ -667,10 +662,10 @@ same_slum_check(jnode * node, jnode * ch compat &= (alloc_value == jnode_is_flushprepped(check)); } - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } - UNLOCK_JNODE(check); + spin_unlock_jnode(check); return compat; } @@ -681,7 +676,7 @@ void atom_dec_and_unlock(txn_atom * atom txn_mgr *mgr = &get_super_private(reiser4_get_current_sb())->tmgr; assert("umka-186", atom != NULL); - assert("jmacd-1071", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); assert("zam-1039", atomic_read(&atom->refcount) > 0); if (atomic_dec_and_test(&atom->refcount)) { @@ -690,21 +685,21 @@ void atom_dec_and_unlock(txn_atom * atom /* This atom should exist after we re-acquire its * spinlock, so we increment its reference counter. */ atomic_inc(&atom->refcount); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); spin_lock_txnmgr(mgr); - LOCK_ATOM(atom); + spin_lock_atom(atom); if (!atomic_dec_and_test(&atom->refcount)) { - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); spin_unlock_txnmgr(mgr); return; } } - assert("nikita-2656", spin_txnmgr_is_locked(mgr)); + assert_spin_locked(&(mgr->tmgr_lock)); atom_free(atom); spin_unlock_txnmgr(mgr); } else - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } /* Create new atom and connect it to given transaction handle. This adds the @@ -733,14 +728,14 @@ static int atom_begin_and_assign_to_txnh locks. */ mgr = &get_super_private(reiser4_get_current_sb())->tmgr; spin_lock_txnmgr(mgr); - LOCK_TXNH(txnh); + spin_lock_txnh(txnh); /* Check whether new atom still needed */ if (txnh->atom != NULL) { /* NOTE-NIKITA probably it is rather better to free * atom_alloc here than thread it up to try_capture(). */ - UNLOCK_TXNH(txnh); + spin_unlock_txnh(txnh); spin_unlock_txnmgr(mgr); return -E_REPEAT; @@ -753,9 +748,11 @@ static int atom_begin_and_assign_to_txnh assert("jmacd-17", atom_isclean(atom)); - /* Take the atom and txnmgr lock. No checks for lock ordering, because - @atom is new and inaccessible for others. */ - spin_lock_atom_no_ord(atom); + /* + * do not use spin_lock_atom because we have broken lock ordering here + * which is ok, as long as @atom is new and inaccessible for others. + */ + spin_lock(&(atom->alock)); /* add atom to the end of transaction manager's list of atoms */ list_add_tail(&atom->atom_link, &mgr->atoms_list); @@ -771,13 +768,12 @@ static int atom_begin_and_assign_to_txnh atom->super = reiser4_get_current_sb(); capture_assign_txnh_nolock(atom, txnh); - UNLOCK_ATOM(atom); - UNLOCK_TXNH(txnh); + spin_unlock(&(atom->alock)); + spin_unlock_txnh(txnh); return -E_REPEAT; } -#if REISER4_DEBUG /* Return true if an atom is currently "open". */ static int atom_isopen(const txn_atom * atom) { @@ -785,7 +781,6 @@ static int atom_isopen(const txn_atom * return atom->stage > 0 && atom->stage < ASTAGE_PRE_COMMIT; } -#endif /* Return the number of pointers to this atom that must be updated during fusion. This approximates the amount of work to be done. Fusion chooses the atom with fewer @@ -806,10 +801,10 @@ static void atom_free(txn_atom * atom) txn_mgr *mgr = &get_super_private(reiser4_get_current_sb())->tmgr; assert("umka-188", atom != NULL); - assert("jmacd-18", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); /* Remove from the txn_mgr's atom list */ - assert("nikita-2657", spin_txnmgr_is_locked(mgr)); + assert_spin_locked(&(mgr->tmgr_lock)); mgr->atom_count -= 1; list_del_init(&atom->atom_link); @@ -823,7 +818,7 @@ static void atom_free(txn_atom * atom) assert("jmacd-16", atom_isclean(atom)); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); kmem_cache_free(_atom_slab, atom); } @@ -836,7 +831,7 @@ static int atom_is_dotard(const txn_atom static int atom_can_be_committed(txn_atom * atom) { - assert("zam-884", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); assert("zam-885", atom->txnh_count > atom->nr_waiters); return atom->txnh_count == atom->nr_waiters + 1; } @@ -862,7 +857,7 @@ int current_atom_should_commit(void) atom = get_current_atom_locked_nocheck(); if (atom) { result = atom_should_commit(atom); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } return result; } @@ -906,7 +901,7 @@ jnode *find_first_dirty_jnode(txn_atom * jnode *first_dirty; tree_level level; - assert("zam-753", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); /* The flush starts from LEAF_LEVEL (=1). */ for (level = 1; level < REAL_MAX_ZTREE_HEIGHT + 1; level += 1) { @@ -940,7 +935,7 @@ static void dispatch_wb_list(txn_atom * jnode *cur; int total, moved; - assert("zam-905", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); total = 0; moved = 0; @@ -998,7 +993,7 @@ static void dispatch_wb_list(txn_atom * while (ATOM_WB_LIST(atom) != &cur->capture_link) { jnode *next = list_entry(cur->capture_link.next, jnode, capture_link); - LOCK_JNODE(cur); + spin_lock_jnode(cur); if (!JF_ISSET(cur, JNODE_WRITEBACK)) { if (JF_ISSET(cur, JNODE_DIRTY)) { queue_jnode(fq, cur); @@ -1009,7 +1004,7 @@ static void dispatch_wb_list(txn_atom * ATOM_CLEAN_LIST(atom)); } } - UNLOCK_JNODE(cur); + spin_unlock_jnode(cur); cur = next; } @@ -1029,7 +1024,7 @@ static int submit_wb_list(void) return PTR_ERR(fq); dispatch_wb_list(fq->atom, fq); - UNLOCK_ATOM(fq->atom); + spin_unlock_atom(fq->atom); ret = write_fq(fq, NULL, 1); fq_put(fq); @@ -1087,7 +1082,7 @@ static int commit_current_atom(long *nr_ int flushiters; assert("zam-888", atom != NULL && *atom != NULL); - assert("zam-886", spin_atom_is_locked(*atom)); + assert_spin_locked(&((*atom)->alock)); assert("zam-887", get_current_context()->trans->atom == *atom); assert("jmacd-151", atom_isopen(*atom)); @@ -1121,10 +1116,10 @@ static int commit_current_atom(long *nr_ if (ret) return ret; - assert("zam-882", spin_atom_is_locked(*atom)); + assert_spin_locked(&((*atom)->alock)); if (!atom_can_be_committed(*atom)) { - UNLOCK_ATOM(*atom); + spin_unlock_atom(*atom); return RETERR(-E_REPEAT); } @@ -1136,7 +1131,7 @@ static int commit_current_atom(long *nr_ at this point, commit should be successful. */ atom_set_stage(*atom, ASTAGE_PRE_COMMIT); ON_DEBUG(((*atom)->committer = current)); - UNLOCK_ATOM(*atom); + spin_unlock_atom(*atom); ret = current_atom_complete_writes(); if (ret) @@ -1164,8 +1159,8 @@ static int commit_current_atom(long *nr_ invalidate_list(ATOM_WB_LIST(*atom)); assert("zam-927", list_empty(&(*atom)->inodes)); - LOCK_ATOM(*atom); - done: + spin_lock_atom(*atom); + done: atom_set_stage(*atom, ASTAGE_DONE); ON_DEBUG((*atom)->committer = NULL); @@ -1180,7 +1175,7 @@ static int commit_current_atom(long *nr_ assert("jmacd-1070", atomic_read(&(*atom)->refcount) > 0); assert("jmacd-1062", (*atom)->capture_count == 0); BUG_ON((*atom)->capture_count != 0); - assert("jmacd-1071", spin_atom_is_locked(*atom)); + assert_spin_locked(&((*atom)->alock)); return ret; } @@ -1194,21 +1189,21 @@ static int force_commit_atom_nolock(txn_ txn_atom *atom; assert("zam-837", txnh != NULL); - assert("zam-835", spin_txnh_is_locked(txnh)); + assert_spin_locked(&(txnh->hlock)); assert("nikita-2966", lock_stack_isclean(get_current_lock_stack())); atom = txnh->atom; assert("zam-834", atom != NULL); - assert("zam-836", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); /* Set flags for atom and txnh: forcing atom commit and waiting for * commit completion */ txnh->flags |= TXNH_WAIT_COMMIT; atom->flags |= ATOM_FORCE_COMMIT; - UNLOCK_TXNH(txnh); - UNLOCK_ATOM(atom); + spin_unlock_txnh(txnh); + spin_unlock_atom(atom); txn_restart_current(); return 0; @@ -1240,7 +1235,7 @@ int txnmgr_force_commit_all(struct super spin_lock_txnmgr(mgr); list_for_each_entry(atom, &mgr->atoms_list, atom_link) { - LOCK_ATOM(atom); + spin_lock_atom(atom); /* Commit any atom which can be committed. If @commit_new_atoms * is not set we commit only atoms which were created before @@ -1251,7 +1246,7 @@ int txnmgr_force_commit_all(struct super spin_unlock_txnmgr(mgr); if (atom->stage < ASTAGE_PRE_COMMIT) { - LOCK_TXNH(txnh); + spin_lock_txnh(txnh); /* Add force-context txnh */ capture_assign_txnh_nolock(atom, txnh); ret = force_commit_atom_nolock(txnh); @@ -1265,17 +1260,17 @@ int txnmgr_force_commit_all(struct super } } - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } #if REISER4_DEBUG if (commit_all_atoms) { reiser4_super_info_data *sbinfo = get_super_private(super); - reiser4_spin_lock_sb(sbinfo); + spin_lock_reiser4_super(sbinfo); assert("zam-813", sbinfo->blocks_fake_allocated_unformatted == 0); assert("zam-812", sbinfo->blocks_fake_allocated == 0); - reiser4_spin_unlock_sb(sbinfo); + spin_unlock_reiser4_super(sbinfo); } #endif @@ -1324,10 +1319,10 @@ int commit_some_atoms(txn_mgr * mgr) */ if (atom_is_committable(atom)) { /* now, take spin lock and re-check */ - LOCK_ATOM(atom); + spin_lock_atom(atom); if (atom_is_committable(atom)) break; - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } } @@ -1340,7 +1335,7 @@ int commit_some_atoms(txn_mgr * mgr) return 0; } - LOCK_TXNH(txnh); + spin_lock_txnh(txnh); BUG_ON(atom == NULL); /* Set the atom to force committing */ @@ -1349,8 +1344,8 @@ int commit_some_atoms(txn_mgr * mgr) /* Add force-context txnh */ capture_assign_txnh_nolock(atom, txnh); - UNLOCK_TXNH(txnh); - UNLOCK_ATOM(atom); + spin_unlock_txnh(txnh); + spin_unlock_atom(atom); /* we are about to release daemon spin lock, notify daemon it has to rescan atoms */ @@ -1372,12 +1367,17 @@ static int txn_try_to_fuse_small_atom(tx repeat = 0; if (!spin_trylock_txnmgr(tmgr)) { - UNLOCK_ATOM(atom); + atomic_inc(&atom->refcount); + spin_unlock_atom(atom); spin_lock_txnmgr(tmgr); - LOCK_ATOM(atom); + spin_lock_atom(atom); repeat = 1; - if (atom->stage != atom_stage) - goto out; + if (atom->stage != atom_stage) { + spin_unlock_txnmgr(tmgr); + atom_dec_and_unlock(atom); + return -E_REPEAT; + } + atomic_dec(&atom->refcount); } list_for_each_entry(atom_2, &tmgr->atoms_list, atom_link) { @@ -1394,14 +1394,13 @@ static int txn_try_to_fuse_small_atom(tx /* all locks are lost we can only repeat here */ return -E_REPEAT; } - UNLOCK_ATOM(atom_2); + spin_unlock_atom(atom_2); } } atom->flags |= ATOM_CANCEL_FUSION; - out: spin_unlock_txnmgr(tmgr); if (repeat) { - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); return -E_REPEAT; } return 0; @@ -1441,7 +1440,7 @@ flush_some_atom(jnode * start, long *nr_ /* traverse the list of all atoms */ list_for_each_entry(atom, &tmgr->atoms_list, atom_link) { /* lock atom before checking its state */ - LOCK_ATOM(atom); + spin_lock_atom(atom); /* * we need an atom which is not being committed and @@ -1450,14 +1449,14 @@ flush_some_atom(jnode * start, long *nr_ */ if (atom->stage < ASTAGE_PRE_COMMIT && atom->nr_flushers == 0) { - LOCK_TXNH(txnh); + spin_lock_txnh(txnh); capture_assign_txnh_nolock(atom, txnh); - UNLOCK_TXNH(txnh); + spin_unlock_txnh(txnh); goto found; } - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } /* @@ -1466,13 +1465,13 @@ flush_some_atom(jnode * start, long *nr_ */ if (!current_is_pdflush() && !wbc->nonblocking) { list_for_each_entry(atom, &tmgr->atoms_list, atom_link) { - LOCK_ATOM(atom); + spin_lock_atom(atom); /* Repeat the check from the above. */ if (atom->stage < ASTAGE_PRE_COMMIT && atom->nr_flushers == 0) { - LOCK_TXNH(txnh); + spin_lock_txnh(txnh); capture_assign_txnh_nolock(atom, txnh); - UNLOCK_TXNH(txnh); + spin_unlock_txnh(txnh); goto found; } @@ -1486,7 +1485,7 @@ flush_some_atom(jnode * start, long *nr_ atom_wait_event(atom); goto repeat; } - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } } spin_unlock_txnmgr(tmgr); @@ -1498,6 +1497,13 @@ flush_some_atom(jnode * start, long *nr_ BUG_ON(atom->super != ctx->super); assert("vs-35", atom->super == ctx->super); + if (start) { + spin_lock_jnode(start); + ret = (atom == start->atom) ? 1 : 0; + spin_unlock_jnode(start); + if (ret == 0) + start = NULL; + } ret = flush_current_atom(flags, wbc->nr_to_write, nr_submitted, &atom, start); if (ret == 0) { /* flush_current_atom returns 0 only if it submitted for write @@ -1521,7 +1527,7 @@ flush_some_atom(jnode * start, long *nr_ txnh->flags |= TXNH_WAIT_COMMIT; atom->flags |= ATOM_FORCE_COMMIT; } - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } else if (ret == -E_REPEAT) { if (*nr_submitted == 0) { /* let others who hampers flushing (hold longterm locks, @@ -1556,12 +1562,12 @@ void invalidate_list(capture_list_head * spin_unlock(&scan_lock); atom = node->atom; - LOCK_ATOM(atom); + spin_lock_atom(atom); LOCK_JNODE(node); if (JF_ISSET(node, JNODE_CC) && node->pg) page_cache_release(node->pg); uncapture_block(node); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); JF_CLR(node, JNODE_SCANNED); jput(node); @@ -1579,7 +1585,7 @@ void invalidate_list(struct list_head *h jnode *node; node = list_entry(head->next, jnode, capture_link); - LOCK_JNODE(node); + spin_lock_jnode(node); uncapture_block(node); jput(node); } @@ -1601,7 +1607,7 @@ void atom_wait_event(txn_atom * atom) { txn_wait_links _wlinks; - assert("zam-744", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); assert("nikita-3156", lock_stack_isclean(get_current_lock_stack()) || atom->nr_running_queues > 0); @@ -1609,12 +1615,12 @@ void atom_wait_event(txn_atom * atom) init_wlinks(&_wlinks); list_add_tail(&_wlinks._fwaitfor_link, &atom->fwaitfor_list); atomic_inc(&atom->refcount); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); prepare_to_sleep(_wlinks._lock_stack); go_to_sleep(_wlinks._lock_stack); - LOCK_ATOM(atom); + spin_lock_atom(atom); list_del(&_wlinks._fwaitfor_link); atom_dec_and_unlock(atom); } @@ -1622,7 +1628,7 @@ void atom_wait_event(txn_atom * atom) void atom_set_stage(txn_atom * atom, txn_stage stage) { assert("nikita-3535", atom != NULL); - assert("nikita-3538", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); assert("nikita-3536", ASTAGE_FREE <= stage && stage <= ASTAGE_INVALID); /* Excelsior! */ assert("nikita-3537", stage >= atom->stage); @@ -1635,7 +1641,7 @@ void atom_set_stage(txn_atom * atom, txn /* wake all threads which wait for an event */ void atom_send_event(txn_atom * atom) { - assert("zam-745", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); wakeup_atom_waitfor_list(atom); } @@ -1675,7 +1681,7 @@ static int try_commit_txnh(commit_data * /* Get the atom and txnh locked. */ cd->atom = txnh_get_atom(cd->txnh); assert("jmacd-309", cd->atom != NULL); - UNLOCK_TXNH(cd->txnh); + spin_unlock_txnh(cd->txnh); if (cd->wait) { cd->atom->nr_waiters--; @@ -1751,7 +1757,7 @@ static int try_commit_txnh(commit_data * LONG_MAX, &cd->nr_written, &cd->atom, NULL); if (result == 0) { - UNLOCK_ATOM(cd->atom); + spin_unlock_atom(cd->atom); cd->preflush = 0; result = RETERR(-E_REPEAT); } else /* Atoms wasn't flushed @@ -1772,7 +1778,11 @@ static int try_commit_txnh(commit_data * } else result = 0; - assert("jmacd-1027", ergo(result == 0, spin_atom_is_locked(cd->atom))); +#if REISER4_DEBUG + if (result == 0) + assert_spin_locked(&(cd->atom->alock)); +#endif + /* perfectly valid assertion, except that when atom/txnh is not locked * fusion can take place, and cd->atom points nowhere. */ /* @@ -1798,15 +1808,14 @@ static int commit_txnh(txn_handle * txnh while (try_commit_txnh(&cd) != 0) preempt_point(); - assert("nikita-3171", spin_txnh_is_not_locked(txnh)); - LOCK_TXNH(txnh); + spin_lock_txnh(txnh); cd.atom->txnh_count -= 1; txnh->atom = NULL; /* remove transaction handle from atom's list of transaction handles */ list_del_init(&txnh->txnh_link); - UNLOCK_TXNH(txnh); + spin_unlock_txnh(txnh); atom_dec_and_unlock(cd.atom); /* if we don't want to do a commit (TXNH_DONT_COMMIT is set, probably * because it takes time) by current thread, we do that work @@ -1873,18 +1882,18 @@ try_capture_block(txn_handle * txnh, jno assert("umka-195", node != NULL); /* The jnode is already locked! Being called from try_capture(). */ - assert("jmacd-567", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); block_atom = node->atom; /* Get txnh spinlock, this allows us to compare txn_atom pointers but it doesn't let us touch the atoms themselves. */ - LOCK_TXNH(txnh); + spin_lock_txnh(txnh); txnh_atom = txnh->atom; if (txnh_atom != NULL && block_atom == txnh_atom) { - UNLOCK_TXNH(txnh); + spin_unlock_txnh(txnh); return 0; } /* NIKITA-HANS: nothing */ @@ -1924,21 +1933,11 @@ try_capture_block(txn_handle * txnh, jno if ( // txnh_atom->stage >= ASTAGE_CAPTURE_WAIT && jnode_is_znode(node) && znode_is_locked(JZNODE(node)) && JF_ISSET(node, JNODE_MISSED_IN_CAPTURE)) { - ret = fuse_not_fused_lock_owners(txnh, JZNODE(node)); - if (ret) { - JF_SET(node, JNODE_MISSED_IN_CAPTURE); - - assert("zam-687", - spin_txnh_is_not_locked(txnh)); - assert("zam-688", - spin_jnode_is_not_locked(node)); - - return ret; - } else - JF_CLR(node, JNODE_MISSED_IN_CAPTURE); - - assert("zam-701", spin_txnh_is_locked(txnh)); - assert("zam-702", spin_jnode_is_locked(node)); + spin_unlock_txnh(txnh); + JF_CLR(node, JNODE_MISSED_IN_CAPTURE); + spin_unlock_jnode(node); + fuse_not_fused_lock_owners(txnh, JZNODE(node)); + return RETERR(-E_REPEAT); } } @@ -1952,10 +1951,8 @@ try_capture_block(txn_handle * txnh, jno ret = capture_assign_txnh(node, txnh, mode, can_coc); if (ret != 0) { /* E_REPEAT or otherwise */ - assert("jmacd-6129", - spin_txnh_is_not_locked(txnh)); - assert("jmacd-6130", - spin_jnode_is_not_locked(node)); + assert_spin_not_locked(&(txnh->hlock)); + assert_spin_not_locked(&(node->guard)); return ret; } @@ -1963,8 +1960,8 @@ try_capture_block(txn_handle * txnh, jno granted because the block is committing. Locks still held. */ } else { if (mode & TXN_CAPTURE_DONT_FUSE) { - UNLOCK_TXNH(txnh); - UNLOCK_JNODE(node); + spin_unlock_txnh(txnh); + spin_unlock_jnode(node); /* we are in a "no-fusion" mode and @node is * already part of transaction. */ return RETERR(-E_NO_NEIGHBOR); @@ -1973,10 +1970,8 @@ try_capture_block(txn_handle * txnh, jno returns -E_REPEAT on successful fusion, 0 on the fall-through case. */ ret = capture_init_fusion(node, txnh, mode, can_coc); if (ret != 0) { - assert("jmacd-6131", - spin_txnh_is_not_locked(txnh)); - assert("jmacd-6132", - spin_jnode_is_not_locked(node)); + assert_spin_not_locked(&(txnh->hlock)); + assert_spin_not_locked(&(node->guard)); return ret; } @@ -1993,10 +1988,8 @@ try_capture_block(txn_handle * txnh, jno ret = capture_assign_block(txnh, node); if (ret != 0) { /* E_REPEAT or otherwise */ - assert("jmacd-6133", - spin_txnh_is_not_locked(txnh)); - assert("jmacd-6134", - spin_jnode_is_not_locked(node)); + assert_spin_not_locked(&(txnh->hlock)); + assert_spin_not_locked(&(node->guard)); return ret; } @@ -2006,8 +1999,8 @@ try_capture_block(txn_handle * txnh, jno /* In this case, neither txnh nor page are assigned to * an atom. */ - UNLOCK_JNODE(node); - UNLOCK_TXNH(txnh); + spin_unlock_jnode(node); + spin_unlock_txnh(txnh); return atom_begin_and_assign_to_txnh(atom_alloc, txnh); } @@ -2018,11 +2011,11 @@ try_capture_block(txn_handle * txnh, jno } /* Successful case: both jnode and txnh are still locked. */ - assert("jmacd-740", spin_txnh_is_locked(txnh)); - assert("jmacd-741", spin_jnode_is_locked(node)); + assert_spin_locked(&(txnh->hlock)); + assert_spin_locked(&(node->guard)); /* Release txnh lock, return with the jnode still locked. */ - UNLOCK_TXNH(txnh); + spin_unlock_txnh(txnh); return 0; } @@ -2032,7 +2025,7 @@ build_capture_mode(jnode * node, znode_l { txn_capture cap_mode; - assert("nikita-3187", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); /* FIXME_JMACD No way to set TXN_CAPTURE_READ_MODIFY yet. */ @@ -2091,7 +2084,7 @@ try_capture(jnode * node, znode_lock_mod #endif int ret; - assert("jmacd-604", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); repeat: cap_mode = build_capture_mode(node, lock_mode, flags); @@ -2111,15 +2104,18 @@ try_capture(jnode * node, znode_lock_mod If ret == 0 then jnode is still locked. If ret != 0 then jnode is unlocked. */ - assert("nikita-2674", ergo(ret == 0, spin_jnode_is_locked(node))); - assert("nikita-2675", ergo(ret != 0, spin_jnode_is_not_locked(node))); - - assert("nikita-2974", spin_txnh_is_not_locked(txnh)); +#if REISER4_DEBUG + if (ret == 0) + assert_spin_locked(&(node->guard)); + else + assert_spin_not_locked(&(node->guard)); +#endif + assert_spin_not_locked(&(txnh->guard)); if (ret == -E_REPEAT) { /* E_REPEAT implies all locks were released, therefore we need to take the jnode's lock again. */ - LOCK_JNODE(node); + spin_lock_jnode(node); /* Although this may appear to be a busy loop, it is not. There are several conditions that cause E_REPEAT to be @@ -2152,7 +2148,7 @@ try_capture(jnode * node, znode_lock_mod reiser4_stat_inc(coc.coc_wait); /* disable COC for the next loop iteration */ coc_enabled = 0; - LOCK_JNODE(node); + spin_lock_jnode(node); goto repeat; } #endif @@ -2178,11 +2174,11 @@ try_capture(jnode * node, znode_lock_mod re-acquiring it, but there are cases were failure occurs when the lock is not held, and those cases would need to be modified to re-take the lock. */ - LOCK_JNODE(node); + spin_lock_jnode(node); } /* Jnode is still locked. */ - assert("jmacd-760", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); return ret; } @@ -2194,94 +2190,83 @@ try_capture(jnode * node, znode_lock_mod */ /* fuse all 'active' atoms of lock owners of given node. */ -static int fuse_not_fused_lock_owners(txn_handle * txnh, znode * node) +static void fuse_not_fused_lock_owners(txn_handle * txnh, znode * node) { lock_handle *lh; - int repeat = 0; - txn_atom *atomh = txnh->atom; - -/* assert ("zam-689", znode_is_rlocked (node));*/ - assert("zam-690", spin_znode_is_locked(node)); - assert("zam-691", spin_txnh_is_locked(txnh)); - assert("zam-692", atomh != NULL); + int repeat; + txn_atom *atomh, *atomf; + reiser4_context *me = get_current_context(); + reiser4_context *ctx = NULL; - RLOCK_ZLOCK(&node->lock); + assert_spin_not_locked(&(ZJNODE(node)->guard)); + assert_spin_not_locked(&(txnh->hlock)); - if (!spin_trylock_atom(atomh)) { - repeat = 1; - goto fail; - } + repeat: + repeat = 0; + atomh = txnh_get_atom(txnh); + spin_unlock_txnh(txnh); + assert("zam-692", atomh != NULL); + read_lock_zlock(&node->lock); /* inspect list of lock owners */ list_for_each_entry(lh, &node->lock.owners, owners_link) { - reiser4_context *ctx; - txn_atom *atomf; - ctx = get_context_by_lock_stack(lh->owner); - - if (ctx == get_current_context()) + if (ctx == me) continue; + /* below we use two assumptions to avoid addition spin-locks + for checking the condition : + + 1) if the lock stack has lock, the transaction should be + opened, i.e. ctx->trans != NULL; - if (!spin_trylock_txnh(ctx->trans)) { + 2) reading of well-aligned ctx->trans->atom is atomic, if it + equals to the address of spin-locked atomh, we take that + the atoms are the same, nothing has to be captured. */ + if (atomh != ctx->trans->atom) { + reiser4_wake_up(lh->owner); repeat = 1; - continue; + break; } + } + if (repeat) { + int lock_ok; + lock_ok = spin_trylock_txnh(ctx->trans); + read_unlock_zlock(&node->lock); + if (!lock_ok) { + spin_unlock_atom(atomh); + goto repeat; + } atomf = ctx->trans->atom; - if (atomf == NULL) { capture_assign_txnh_nolock(atomh, ctx->trans); - UNLOCK_TXNH(ctx->trans); - - reiser4_wake_up(lh->owner); - continue; - } - - if (atomf == atomh) { - UNLOCK_TXNH(ctx->trans); - continue; - } - - if (!spin_trylock_atom(atomf)) { - UNLOCK_TXNH(ctx->trans); - repeat = 1; - continue; + spin_unlock_atom(atomh); + spin_unlock_txnh(ctx->trans); + goto repeat; } - - UNLOCK_TXNH(ctx->trans); - - if (atomf == atomh || atomf->stage > ASTAGE_CAPTURE_WAIT) { - UNLOCK_ATOM(atomf); - continue; + assert("zam-1059", atomf != atomh); + atomic_inc(&atomh->refcount); + atomic_inc(&atomf->refcount); + spin_unlock_txnh(ctx->trans); + if (atomf > atomh) { + spin_lock_atom(atomf); + } else { + spin_unlock_atom(atomh); + spin_lock_atom(atomf); + spin_lock_atom(atomh); + } + if (atomh == atomf || !atom_isopen(atomh) || !atom_isopen(atomf)) { + atom_dec_and_unlock(atomh); + atom_dec_and_unlock(atomf); + goto repeat; } - // repeat = 1; - - reiser4_wake_up(lh->owner); - - UNLOCK_TXNH(txnh); - RUNLOCK_ZLOCK(&node->lock); - spin_unlock_znode(node); - - /* @atomf is "small" and @atomh is "large", by - definition. Small atom is destroyed and large is unlocked - inside capture_fuse_into() - */ + atomic_dec(&atomh->refcount); + atomic_dec(&atomf->refcount); capture_fuse_into(atomf, atomh); - return RETERR(-E_REPEAT); - } - - UNLOCK_ATOM(atomh); - - if (repeat) { - fail: - UNLOCK_TXNH(txnh); - RUNLOCK_ZLOCK(&node->lock); - spin_unlock_znode(node); - return RETERR(-E_REPEAT); + goto repeat; } - - RUNLOCK_ZLOCK(&node->lock); - return 0; + read_unlock_zlock(&node->lock); + spin_unlock_atom(atomh); } /* This is the interface to capture unformatted nodes via their struct page @@ -2298,12 +2283,12 @@ int try_capture_page_to_invalidate(struc return PTR_ERR(node); } - LOCK_JNODE(node); + spin_lock_jnode(node); unlock_page(pg); ret = try_capture(node, ZNODE_WRITE_LOCK, 0, 0 /* no copy on capture */ ); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); jput(node); lock_page(pg); return ret; @@ -2327,22 +2312,22 @@ void uncapture_page(struct page *pg) assert("umka-199", pg != NULL); assert("nikita-3155", PageLocked(pg)); - reiser4_clear_page_dirty(pg); + clear_page_dirty_for_io(pg); reiser4_wait_page_writeback(pg); node = jprivate(pg); BUG_ON(node == NULL); - LOCK_JNODE(node); + spin_lock_jnode(node); eflush_del(node, 1 /* page is locked */ ); /*assert ("zam-815", !JF_ISSET(node, JNODE_EFLUSH)); */ atom = jnode_get_atom(node); if (atom == NULL) { - assert("jmacd-7111", !jnode_is_dirty(node)); - UNLOCK_JNODE(node); + assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY)); + spin_unlock_jnode(node); return; } @@ -2356,7 +2341,7 @@ void uncapture_page(struct page *pg) * wait all write_fq() for this atom to complete. This is not * significant overhead. */ while (JF_ISSET(node, JNODE_FLUSH_QUEUED) && atom->nr_running_queues) { - UNLOCK_JNODE(node); + spin_unlock_jnode(node); /* * at this moment we want to wait for "atom event", viz. wait * until @node can be removed from flush queue. But @@ -2373,18 +2358,18 @@ void uncapture_page(struct page *pg) * page may has been detached by ->writepage()->releasepage(). */ reiser4_wait_page_writeback(pg); - LOCK_JNODE(node); + spin_lock_jnode(node); eflush_del(node, 1); page_cache_release(pg); atom = jnode_get_atom(node); /* VS-FIXME-HANS: improve the commenting in this function */ if (atom == NULL) { - UNLOCK_JNODE(node); + spin_unlock_jnode(node); return; } } uncapture_block(node); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); jput(node); } @@ -2394,7 +2379,7 @@ void uncapture_jnode(jnode * node) { txn_atom *atom; - assert("vs-1462", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); assert("", node->pg == 0); if (JF_ISSET(node, JNODE_EFLUSH)) { @@ -2406,13 +2391,13 @@ void uncapture_jnode(jnode * node) /*jnode_make_clean(node); */ atom = jnode_get_atom(node); if (atom == NULL) { - assert("jmacd-7111", !jnode_is_dirty(node)); - UNLOCK_JNODE(node); + assert("jmacd-7111", !JF_ISSET(node, JNODE_DIRTY)); + spin_unlock_jnode(node); return; } uncapture_block(node); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); jput(node); } @@ -2423,8 +2408,8 @@ static void capture_assign_txnh_nolock(t assert("umka-200", atom != NULL); assert("umka-201", txnh != NULL); - assert("jmacd-822", spin_txnh_is_locked(txnh)); - assert("jmacd-823", spin_atom_is_locked(atom)); + assert_spin_locked(&(txnh->hlock)); + assert_spin_locked(&(atom->alock)); assert("jmacd-824", txnh->atom == NULL); assert("nikita-3540", atom_isopen(atom)); BUG_ON(txnh->atom != NULL); @@ -2441,11 +2426,11 @@ static void capture_assign_block_nolock( { assert("umka-202", atom != NULL); assert("umka-203", node != NULL); - assert("jmacd-321", spin_jnode_is_locked(node)); - assert("umka-295", spin_atom_is_locked(atom)); + assert_spin_locked(&(node->guard)); + assert_spin_locked(&(atom->alock)); assert("jmacd-323", node->atom == NULL); BUG_ON(!list_empty_careful(&node->capture_link)); - assert("nikita-3470", !jnode_is_dirty(node)); + assert("nikita-3470", !JF_ISSET(node, JNODE_DIRTY)); /* Pointer from jnode to atom is not counted in atom->refcount. */ node->atom = atom; @@ -2481,9 +2466,9 @@ int is_cced(const jnode * node) /* common code for dirtying both unformatted jnodes and formatted znodes. */ static void do_jnode_make_dirty(jnode * node, txn_atom * atom) { - assert("zam-748", spin_jnode_is_locked(node)); - assert("zam-750", spin_atom_is_locked(atom)); - assert("jmacd-3981", !jnode_is_dirty(node)); + assert_spin_locked(&(node->guard)); + assert_spin_locked(&(atom->alock)); + assert("jmacd-3981", !JF_ISSET(node, JNODE_DIRTY)); JF_SET(node, JNODE_DIRTY); @@ -2540,7 +2525,7 @@ static void do_jnode_make_dirty(jnode * void jnode_make_dirty_locked(jnode * node) { assert("umka-204", node != NULL); - assert("zam-7481", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); if (REISER4_DEBUG && rofs_jnode(node)) { warning("nikita-3365", "Dirtying jnode on rofs"); @@ -2548,16 +2533,16 @@ void jnode_make_dirty_locked(jnode * nod } /* Fast check for already dirty node */ - if (!jnode_is_dirty(node)) { + if (!JF_ISSET(node, JNODE_DIRTY)) { txn_atom *atom; atom = jnode_get_atom(node); assert("vs-1094", atom); /* Check jnode dirty status again because node spin lock might * be released inside jnode_get_atom(). */ - if (likely(!jnode_is_dirty(node))) + if (likely(!JF_ISSET(node, JNODE_DIRTY))) do_jnode_make_dirty(node, atom); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } } @@ -2580,7 +2565,7 @@ void znode_make_dirty(znode * z) return; } - LOCK_JNODE(node); + spin_lock_jnode(node); jnode_make_dirty_locked(node); page = jnode_page(node); if (page != NULL) { @@ -2594,16 +2579,16 @@ void znode_make_dirty(znode * z) /* jnode lock is not needed for the rest of * znode_set_dirty(). */ - UNLOCK_JNODE(node); + spin_unlock_jnode(node); /* reiser4 file write code calls set_page_dirty for * unformatted nodes, for formatted nodes we do it here. */ - set_page_dirty_internal(page, 0); + set_page_dirty_internal(page); page_cache_release(page); /* bump version counter in znode */ z->version = znode_build_version(jnode_get_tree(node)); } else { assert("zam-596", znode_above_root(JZNODE(node))); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); } assert("nikita-1900", znode_is_write_locked(z)); @@ -2620,7 +2605,7 @@ int sync_atom(txn_atom * atom) result = 0; if (atom != NULL) { if (atom->stage < ASTAGE_PRE_COMMIT) { - LOCK_TXNH(txnh); + spin_lock_txnh(txnh); capture_assign_txnh_nolock(atom, txnh); result = force_commit_atom_nolock(txnh); } else if (atom->stage < ASTAGE_POST_COMMIT) { @@ -2629,7 +2614,7 @@ int sync_atom(txn_atom * atom) /* try once more */ result = RETERR(-E_REPEAT); } else - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } return result; } @@ -2644,11 +2629,11 @@ count_jnode(txn_atom * atom, jnode * nod { struct list_head *pos; #if REISER4_COPY_ON_CAPTURE - assert("", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); #else assert("zam-1018", atom_is_protected(atom)); #endif - assert("", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); assert("", NODE_LIST(node) == old_list); switch (NODE_LIST(node)) { @@ -2782,7 +2767,7 @@ void jnode_make_wander_nolock(jnode * no assert("nikita-2431", node != NULL); assert("nikita-2432", !JF_ISSET(node, JNODE_RELOC)); - assert("nikita-3153", jnode_is_dirty(node)); + assert("nikita-3153", JF_ISSET(node, JNODE_DIRTY)); assert("zam-897", !JF_ISSET(node, JNODE_FLUSH_QUEUED)); assert("nikita-3367", !blocknr_is_fake(jnode_get_block(node))); @@ -2804,21 +2789,21 @@ void jnode_make_wander(jnode * node) { txn_atom *atom; - LOCK_JNODE(node); + spin_lock_jnode(node); atom = jnode_get_atom(node); assert("zam-913", atom != NULL); assert("zam-914", !JF_ISSET(node, JNODE_RELOC)); jnode_make_wander_nolock(node); - UNLOCK_ATOM(atom); - UNLOCK_JNODE(node); + spin_unlock_atom(atom); + spin_unlock_jnode(node); } /* this just sets RELOC bit */ static void jnode_make_reloc_nolock(flush_queue_t * fq, jnode * node) { - assert("vs-1480", spin_jnode_is_locked(node)); - assert("zam-916", jnode_is_dirty(node)); + assert_spin_locked(&(node->guard)); + assert("zam-916", JF_ISSET(node, JNODE_DIRTY)); assert("zam-917", !JF_ISSET(node, JNODE_RELOC)); assert("zam-918", !JF_ISSET(node, JNODE_OVRWR)); assert("zam-920", !JF_ISSET(node, JNODE_FLUSH_QUEUED)); @@ -2834,7 +2819,7 @@ void znode_make_reloc(znode * z, flush_q txn_atom *atom; node = ZJNODE(z); - LOCK_JNODE(node); + spin_lock_jnode(node); atom = jnode_get_atom(node); assert("zam-919", atom != NULL); @@ -2842,8 +2827,8 @@ void znode_make_reloc(znode * z, flush_q jnode_make_reloc_nolock(fq, node); queue_jnode(fq, node); - UNLOCK_ATOM(atom); - UNLOCK_JNODE(node); + spin_unlock_atom(atom); + spin_unlock_jnode(node); } @@ -2861,10 +2846,10 @@ static int trylock_wait(txn_atom * atom, if (unlikely(!spin_trylock_atom(atom))) { atomic_inc(&atom->refcount); - UNLOCK_JNODE(node); - UNLOCK_TXNH(txnh); + spin_unlock_jnode(node); + spin_unlock_txnh(txnh); - LOCK_ATOM(atom); + spin_lock_atom(atom); /* caller should eliminate extra reference by calling * atom_dec_and_unlock() for this atom. */ return 1; @@ -2900,8 +2885,8 @@ static int trylock_throttle(txn_atom * a assert("nikita-3225", txnh != NULL); assert("nikita-3226", node != NULL); - assert("nikita-3227", spin_txnh_is_locked(txnh)); - assert("nikita-3229", spin_jnode_is_locked(node)); + assert_spin_locked(&(txnh->hlock)); + assert_spin_locked(&(node->guard)); if (unlikely(trylock_wait(atom, txnh, node) != 0)) { atom_dec_and_unlock(atom); @@ -2937,7 +2922,7 @@ static int capture_assign_block(txn_hand capture_assign_block_nolock(atom, node); /* Success holds onto jnode & txnh locks. Unlock atom. */ - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); return 0; } } @@ -2979,7 +2964,7 @@ capture_assign_txnh(jnode * node, txn_ha * modify it somehow depending on its ->stage. In the simplest case, * where ->stage is ASTAGE_CAPTURE_FUSE, txnh should be added to * atom's list. Problem is that atom spin lock nests outside of jnode - * and transaction handle ones. So, we cannot just LOCK_ATOM here. + * and transaction handle ones. So, we cannot just spin_lock_atom here. * * Solutions tried here: * @@ -2995,15 +2980,15 @@ capture_assign_txnh(jnode * node, txn_ha * */ if (trylock_wait(atom, txnh, node) != 0) { - LOCK_JNODE(node); - LOCK_TXNH(txnh); + spin_lock_jnode(node); + spin_lock_txnh(txnh); /* NOTE-NIKITA is it at all possible that current txnh * spontaneously changes ->atom from NULL to non-NULL? */ if (node->atom == NULL || txnh->atom != NULL || atom != node->atom) { /* something changed. Caller have to re-decide */ - UNLOCK_TXNH(txnh); - UNLOCK_JNODE(node); + spin_unlock_txnh(txnh); + spin_unlock_jnode(node); atom_dec_and_unlock(atom); return RETERR(-E_REPEAT); } else { @@ -3067,7 +3052,7 @@ capture_assign_txnh(jnode * node, txn_ha } /* Unlock the atom */ - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); return 0; } @@ -3131,7 +3116,7 @@ static void wakeup_atom_waiting_list(txn static int wait_for_fusion(txn_atom * atom, txn_wait_links * wlinks) { assert("nikita-3330", atom != NULL); - assert("nikita-3331", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); /* atom->txnh_count == 1 is for waking waiters up if we are releasing * last transaction handle. */ @@ -3167,14 +3152,14 @@ capture_fuse_wait(jnode * node, txn_hand assert("umka-214", atomf != NULL); /* We do not need the node lock. */ - UNLOCK_JNODE(node); + spin_unlock_jnode(node); if ((mode & TXN_CAPTURE_NONBLOCKING) != 0) { - UNLOCK_TXNH(txnh); - UNLOCK_ATOM(atomf); + spin_unlock_txnh(txnh); + spin_unlock_atom(atomf); if (atomh) { - UNLOCK_ATOM(atomh); + spin_unlock_atom(atomh); } return RETERR(-E_BLOCK); @@ -3187,17 +3172,17 @@ capture_fuse_wait(jnode * node, txn_hand list_add_tail(&wlinks._fwaitfor_link, &atomf->fwaitfor_list); wlinks.waitfor_cb = wait_for_fusion; atomic_inc(&atomf->refcount); - UNLOCK_ATOM(atomf); + spin_unlock_atom(atomf); if (atomh) { /* Add txnh to atomh's waiting list, unlock atomh. */ list_add_tail(&wlinks._fwaiting_link, &atomh->fwaiting_list); atomic_inc(&atomh->refcount); - UNLOCK_ATOM(atomh); + spin_unlock_atom(atomh); } /* Go to sleep. */ - UNLOCK_TXNH(txnh); + spin_unlock_txnh(txnh); ret = prepare_to_sleep(wlinks._lock_stack); if (ret == 0) { @@ -3206,19 +3191,21 @@ capture_fuse_wait(jnode * node, txn_hand } /* Remove from the waitfor list. */ - LOCK_ATOM(atomf); + spin_lock_atom(atomf); list_del(&wlinks._fwaitfor_link); atom_dec_and_unlock(atomf); if (atomh) { /* Remove from the waiting list. */ - LOCK_ATOM(atomh); + spin_lock_atom(atomh); list_del(&wlinks._fwaiting_link); atom_dec_and_unlock(atomh); } - - assert("nikita-2186", ergo(ret, spin_jnode_is_not_locked(node))); +#if REISER4_DEBUG + if (ret) + assert_spin_not_locked(&(node->guard)); +#endif return ret; } @@ -3257,8 +3244,8 @@ capture_init_fusion_locked(jnode * node, /* A read request for a committing block can be satisfied w/o COPY-ON-CAPTURE. Success holds onto the jnode & txnh locks. */ - UNLOCK_ATOM(atomf); - UNLOCK_ATOM(atomh); + spin_unlock_atom(atomf); + spin_unlock_atom(atomh); return 0; } else { /* Perform COPY-ON-CAPTURE. Copy and try again. This function @@ -3279,8 +3266,8 @@ capture_init_fusion_locked(jnode * node, || atomf->txnh_count == 0); /* Now release the txnh lock: only holding the atoms at this point. */ - UNLOCK_TXNH(txnh); - UNLOCK_JNODE(node); + spin_unlock_txnh(txnh); + spin_unlock_jnode(node); /* Decide which should be kept and which should be merged. */ if (atom_pointer_count(atomf) < atom_pointer_count(atomh)) { @@ -3311,12 +3298,12 @@ capture_init_fusion(jnode * node, txn_ha return capture_init_fusion_locked(node, txnh, mode, can_coc); else { - UNLOCK_ATOM(node->atom); + spin_unlock_atom(node->atom); } } - UNLOCK_JNODE(node); - UNLOCK_TXNH(txnh); + spin_unlock_jnode(node); + spin_unlock_txnh(txnh); return RETERR(-E_REPEAT); } @@ -3333,14 +3320,16 @@ capture_fuse_jnode_lists(txn_atom *large assert("umka-219", large_head != NULL); assert("umka-220", small_head != NULL); /* small atom should be locked also. */ - assert("zam-968", spin_atom_is_locked(large)); + assert_spin_locked(&(large->alock)); /* For every jnode on small's capture list... */ list_for_each_entry(node, small_head, capture_link) { count += 1; /* With the jnode lock held, update atom pointer. */ - UNDER_SPIN_VOID(jnode, node, node->atom = large); + spin_lock_jnode(node); + node->atom = large; + spin_unlock_jnode(node); } /* Splice the lists. */ @@ -3367,7 +3356,9 @@ capture_fuse_txnh_lists(txn_atom *large, count += 1; /* With the txnh lock held, update atom pointer. */ - UNDER_SPIN_VOID(txnh, txnh, txnh->atom = large); + spin_lock_txnh(txnh); + txnh->atom = large; + spin_unlock_txnh(txnh); } /* Splice the txn_handle list. */ @@ -3391,8 +3382,8 @@ static void capture_fuse_into(txn_atom * assert("umka-224", small != NULL); assert("umka-225", small != NULL); - assert("umka-299", spin_atom_is_locked(large)); - assert("umka-300", spin_atom_is_locked(small)); + assert_spin_locked(&(large->alock)); + assert_spin_locked(&(small->alock)); assert("jmacd-201", atom_isopen(small)); assert("jmacd-202", atom_isopen(large)); @@ -3427,11 +3418,11 @@ static void capture_fuse_into(txn_atom * list_for_each_entry(node, &prot_list->nodes, capture_link) { zcount += 1; - LOCK_JNODE(node); + spin_lock_jnode(node); assert("nikita-3375", node->atom == small); /* With the jnode lock held, update atom pointer. */ node->atom = large; - UNLOCK_JNODE(node); + spin_unlock_jnode(node); } } /* Splice the lists of lists. */ @@ -3521,7 +3512,7 @@ static void capture_fuse_into(txn_atom * wakeup_atom_waiting_list(small); /* Unlock atoms */ - UNLOCK_ATOM(large); + spin_unlock_atom(large); atom_dec_and_unlock(small); } @@ -3534,7 +3525,7 @@ void protected_jnodes_init(protected_jno atom = get_current_atom_locked(); list_add(&list->inatom, &atom->protected); INIT_LIST_HEAD(&list->nodes); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } void protected_jnodes_done(protected_jnodes *list) @@ -3545,7 +3536,7 @@ void protected_jnodes_done(protected_jno atom = get_current_atom_locked(); list_del_init(&list->inatom); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } /* TXNMGR STUFF */ @@ -3647,10 +3638,10 @@ static void fake_jload(jnode * node) } /* for now - refuse to copy-on-capture any suspicious nodes (WRITEBACK, DIRTY, FLUSH_QUEUED) */ -static int check_capturable(const jnode * node, const txn_atom * atom) +static int check_capturable(jnode * node, const txn_atom * atom) { - assert("vs-1429", spin_jnode_is_locked(node)); - assert("vs-1487", check_spin_is_locked(&scan_lock)); + assert_spin_locked(&(node->guard)); + assert_spin_locked(&scan_lock); if (JF_ISSET(node, JNODE_WRITEBACK)) { reiser4_stat_inc(coc.writeback); @@ -3723,8 +3714,8 @@ static int copy_on_capture_clean(jnode * { int result; - assert("vs-1625", spin_atom_is_locked(atom)); - assert("vs-1432", spin_jnode_is_locked(node)); + assert_spin_locked(&(atom->alock)); + assert_spin_locked(&(node->guard)); assert("vs-1627", !JF_ISSET(node, JNODE_WRITEBACK)); spin_lock(&scan_lock); @@ -3735,8 +3726,8 @@ static int copy_on_capture_clean(jnode * reiser4_stat_inc(coc.ok_clean); } spin_unlock(&scan_lock); - UNLOCK_JNODE(node); - UNLOCK_ATOM(atom); + spin_unlock_jnode(node); + spin_unlock_atom(atom); return result; } @@ -3744,11 +3735,11 @@ static int copy_on_capture_clean(jnode * static void lock_two_nodes(jnode * node1, jnode * node2) { if (node1 > node2) { - LOCK_JNODE(node2); - LOCK_JNODE(node1); + spin_lock_jnode(node2); + spin_lock_jnode(node1); } else { - LOCK_JNODE(node1); - LOCK_JNODE(node2); + spin_lock_jnode(node1); + spin_lock_jnode(node2); } } @@ -3759,12 +3750,13 @@ static int copy_on_capture_nopage(jnode int result; jnode *copy; + assert("vs-1432", spin_atom_is_locked(atom)); assert("vs-1432", spin_jnode_is_locked(node)); jref(node); - UNLOCK_JNODE(node); - UNLOCK_ATOM(atom); + spin_unlock_jnode(node); + spin_unlock_atom(atom); assert("nikita-3475", schedulable()); copy = jclone(node); if (IS_ERR(copy)) { @@ -3772,7 +3764,7 @@ static int copy_on_capture_nopage(jnode return PTR_ERR(copy); } - LOCK_ATOM(atom); + spin_lock_atom(atom); lock_two_nodes(node, copy); spin_lock(&scan_lock); @@ -3791,9 +3783,9 @@ static int copy_on_capture_nopage(jnode } spin_unlock(&scan_lock); - UNLOCK_JNODE(node); - UNLOCK_JNODE(copy); - UNLOCK_ATOM(atom); + spin_unlock_jnode(node); + spin_unlock_jnode(copy); + spin_unlock_atom(atom); assert("nikita-3476", schedulable()); jput(copy); assert("nikita-3477", schedulable()); @@ -3822,7 +3814,7 @@ handle_coc(jnode * node, jnode * copy, s * free space may not be re-used in insertion. */ radix_tree_preload(GFP_KERNEL); - LOCK_ATOM(atom); + spin_lock_atom(atom); lock_two_nodes(node, copy); spin_lock(&scan_lock); @@ -3856,9 +3848,9 @@ handle_coc(jnode * node, jnode * copy, s assert("vs-1419", page_count(new_page) >= 3); spin_unlock(&scan_lock); - UNLOCK_JNODE(node); - UNLOCK_JNODE(copy); - UNLOCK_ATOM(atom); + spin_unlock_jnode(node); + spin_unlock_jnode(copy); + spin_unlock_atom(atom); radix_tree_preload_end(); unlock_page(page); @@ -3879,9 +3871,9 @@ handle_coc(jnode * node, jnode * copy, s ON_TRACE(TRACE_CAPTURE_COPY, "copy on capture done\n"); } else { spin_unlock(&scan_lock); - UNLOCK_JNODE(node); - UNLOCK_JNODE(copy); - UNLOCK_ATOM(atom); + spin_unlock_jnode(node); + spin_unlock_jnode(copy); + spin_unlock_atom(atom); radix_tree_preload_end(); kunmap(page); unlock_page(page); @@ -3906,8 +3898,8 @@ static int real_copy_on_capture(jnode * page = node->pg; page_cache_get(page); jref(node); - UNLOCK_JNODE(node); - UNLOCK_ATOM(atom); + spin_unlock_jnode(node); + spin_unlock_atom(atom); /* prevent node from eflushing */ result = jload(node); @@ -3951,8 +3943,8 @@ static int create_copy_and_replace(jnode if (JF_ISSET(node, JNODE_CCED)) { /* node is under copy on capture already */ reiser4_stat_inc(coc.coc_race); - UNLOCK_JNODE(node); - UNLOCK_ATOM(atom); + spin_unlock_jnode(node); + spin_unlock_atom(atom); return RETERR(-E_WAIT); } @@ -3962,8 +3954,8 @@ static int create_copy_and_replace(jnode ON_TRACE(TRACE_CAPTURE_COPY, "copy_on_capture: node %p, atom %p..", node, atom); if (JF_ISSET(node, JNODE_EFLUSH)) { - UNLOCK_JNODE(node); - UNLOCK_ATOM(atom); + spin_unlock_jnode(node); + spin_unlock_atom(atom); reiser4_stat_inc(coc.eflush); ON_TRACE(TRACE_CAPTURE_COPY, "eflushed\n"); @@ -3988,8 +3980,8 @@ static int create_copy_and_replace(jnode assert("vs-1640", inode != NULL); assert("vs-1641", page != NULL); assert("vs-1642", page->mapping != NULL); - UNLOCK_JNODE(node); - UNLOCK_ATOM(atom); + spin_unlock_jnode(node); + spin_unlock_atom(atom); down_write(&reiser4_inode_data(inode)->coc_sem); lock_page(page); @@ -4015,8 +4007,8 @@ static int create_copy_and_replace(jnode } pte_chain_unlock(page); unlock_page(page); - LOCK_ATOM(atom); - LOCK_JNODE(node); + spin_lock_atom(atom); + spin_lock_jnode(node); } else inode = NULL; @@ -4057,9 +4049,9 @@ capture_copy(jnode * node, txn_handle * /* The txnh and its (possibly NULL) atom's locks are not needed at this point. */ - UNLOCK_TXNH(txnh); + spin_unlock_txnh(txnh); if (atomh != NULL) - UNLOCK_ATOM(atomh); + spin_unlock_atom(atomh); /* create a copy of node, detach node from atom and attach its copy instead */ @@ -4067,7 +4059,7 @@ capture_copy(jnode * node, txn_handle * result = create_copy_and_replace(node, atomf); assert("nikita-3474", schedulable()); preempt_point(); - LOCK_ATOM(atomf); + spin_lock_atom(atomf); atom_dec_and_unlock(atomf); preempt_point(); @@ -4108,9 +4100,9 @@ void uncapture_block(jnode * node) assert("umka-228", atom != NULL); assert("jmacd-1021", node->atom == atom); - assert("jmacd-1022", spin_jnode_is_locked(node)); + assert_spin_locked(&(node->guard)); #if REISER4_COPY_ON_CAPTURE - assert("jmacd-1023", spin_atom_is_locked(atom)); + assert_spin_locked(&(atom->alock)); #else assert("jmacd-1023", atom_is_protected(atom)); #endif @@ -4134,7 +4126,7 @@ void uncapture_block(jnode * node) ON_DEBUG(count_jnode(atom, node, NODE_LIST(node), NOT_CAPTURED, 1)); node->atom = NULL; - UNLOCK_JNODE(node); + spin_unlock_jnode(node); LOCK_CNT_DEC(t_refs); } @@ -4143,9 +4135,8 @@ void uncapture_block(jnode * node) transaction. @atom and @node are spin locked */ void insert_into_atom_ovrwr_list(txn_atom * atom, jnode * node) { - assert("zam-538", spin_atom_is_locked(atom) - || atom->stage >= ASTAGE_PRE_COMMIT); - assert("zam-539", spin_jnode_is_locked(node)); + assert("zam-538", atom_is_protected(atom)); + assert_spin_locked(&(node->guard)); assert("zam-899", JF_ISSET(node, JNODE_OVRWR)); assert("zam-543", node->atom == NULL); assert("vs-1433", !jnode_is_unformatted(node) && !jnode_is_znode(node)); @@ -4201,10 +4192,12 @@ reiser4_block_nr txnmgr_count_deleted_bl spin_lock_txnmgr(tmgr); list_for_each_entry(atom, &tmgr->atoms_list, atom_link) { - LOCK_ATOM(atom); - blocknr_set_iterator(atom, &atom->delete_set, - count_deleted_blocks_actor, &result, 0); - UNLOCK_ATOM(atom); + spin_lock_atom(atom); + if (atom_isopen(atom)) + blocknr_set_iterator( + atom, &atom->delete_set, + count_deleted_blocks_actor, &result, 0); + spin_unlock_atom(atom); } spin_unlock_txnmgr(tmgr); diff -puN fs/reiser4/txnmgr.h~reiser4-spinlock-cleanup fs/reiser4/txnmgr.h --- devel/fs/reiser4/txnmgr.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/txnmgr.h 2006-02-16 14:17:05.000000000 -0800 @@ -8,7 +8,6 @@ #define __REISER4_TXNMGR_H__ #include "forward.h" -#include "spin_macros.h" #include "dformat.h" #include @@ -210,7 +209,7 @@ struct blocknr_set { struct txn_atom { /* The spinlock protecting the atom, held during fusion and various other state changes. */ - reiser4_spin_data alock; + spinlock_t alock; /* The atom's reference counter, increasing (in case of a duplication of an existing reference or when we are sure that some other @@ -343,7 +342,7 @@ typedef struct protected_jnodes { the system to a txn_atom. */ struct txn_handle { /* Spinlock protecting ->atom pointer */ - reiser4_spin_data hlock; + spinlock_t hlock; /* Flags for controlling commit_txnh() behavior */ /* from txn_handle_flags_t */ @@ -362,7 +361,7 @@ struct txn_handle { /* The transaction manager: one is contained in the reiser4_super_info_data */ struct txn_mgr { /* A spinlock protecting the atom list, id_count, flush_control */ - reiser4_spin_data tmgr_lock; + spinlock_t tmgr_lock; /* List of atoms. */ struct list_head atoms_list; @@ -440,7 +439,24 @@ extern int uncapture_inode(struct inode extern txn_atom *get_current_atom_locked_nocheck(void); -#define atom_is_protected(atom) (spin_atom_is_locked(atom) || (atom)->stage >= ASTAGE_PRE_COMMIT) +#if REISER4_DEBUG + +/** + * atom_is_protected - make sure that nobody but us can do anything with atom + * @atom: atom to be checked + * + * This is used to assert that atom either entered commit stages or is spin + * locked. + */ +static inline int atom_is_protected(txn_atom *atom) +{ + if (atom->stage >= ASTAGE_PRE_COMMIT) + return 1; + assert_spin_locked(&(atom->alock)); + return 1; +} + +#endif /* Get the current atom and spinlock it if current atom present. May not return NULL */ static inline txn_atom *get_current_atom_locked(void) @@ -487,31 +503,123 @@ extern int blocknr_set_iterator(txn_atom extern void flush_init_atom(txn_atom * atom); extern void flush_fuse_queues(txn_atom * large, txn_atom * small); -/* INLINE FUNCTIONS */ +static inline void spin_lock_atom(txn_atom *atom) +{ + /* check that spinlocks of lower priorities are not held */ + assert("", (LOCK_CNT_NIL(spin_locked_txnh) && + LOCK_CNT_NIL(spin_locked_jnode) && + LOCK_CNT_NIL(rw_locked_zlock) && + LOCK_CNT_NIL(rw_locked_dk) && + LOCK_CNT_NIL(rw_locked_tree))); + + spin_lock(&(atom->alock)); + + LOCK_CNT_INC(spin_locked_atom); + LOCK_CNT_INC(spin_locked); +} + +static inline int spin_trylock_atom(txn_atom *atom) +{ + if (spin_trylock(&(atom->alock))) { + LOCK_CNT_INC(spin_locked_atom); + LOCK_CNT_INC(spin_locked); + return 1; + } + return 0; +} + +static inline void spin_unlock_atom(txn_atom *atom) +{ + assert_spin_locked(&(atom->alock)); + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_atom)); + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); + + LOCK_CNT_DEC(spin_locked_atom); + LOCK_CNT_DEC(spin_locked); + + spin_unlock(&(atom->alock)); +} + +static inline void spin_lock_txnh(txn_handle *txnh) +{ + /* check that spinlocks of lower priorities are not held */ + assert("", (LOCK_CNT_NIL(rw_locked_dk) && + LOCK_CNT_NIL(rw_locked_zlock) && + LOCK_CNT_NIL(rw_locked_tree))); + + spin_lock(&(txnh->hlock)); + + LOCK_CNT_INC(spin_locked_txnh); + LOCK_CNT_INC(spin_locked); +} + +static inline int spin_trylock_txnh(txn_handle *txnh) +{ + if (spin_trylock(&(txnh->hlock))) { + LOCK_CNT_INC(spin_locked_txnh); + LOCK_CNT_INC(spin_locked); + return 1; + } + return 0; +} + +static inline void spin_unlock_txnh(txn_handle *txnh) +{ + assert_spin_locked(&(txnh->hlock)); + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_txnh)); + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); + + LOCK_CNT_DEC(spin_locked_txnh); + LOCK_CNT_DEC(spin_locked); + + spin_unlock(&(txnh->hlock)); +} + +#define spin_ordering_pred_txnmgr(tmgr) \ + ( LOCK_CNT_NIL(spin_locked_atom) && \ + LOCK_CNT_NIL(spin_locked_txnh) && \ + LOCK_CNT_NIL(spin_locked_jnode) && \ + LOCK_CNT_NIL(rw_locked_zlock) && \ + LOCK_CNT_NIL(rw_locked_dk) && \ + LOCK_CNT_NIL(rw_locked_tree) ) + +static inline void spin_lock_txnmgr(txn_mgr *mgr) +{ + /* check that spinlocks of lower priorities are not held */ + assert("", (LOCK_CNT_NIL(spin_locked_atom) && + LOCK_CNT_NIL(spin_locked_txnh) && + LOCK_CNT_NIL(spin_locked_jnode) && + LOCK_CNT_NIL(rw_locked_zlock) && + LOCK_CNT_NIL(rw_locked_dk) && + LOCK_CNT_NIL(rw_locked_tree))); + + spin_lock(&(mgr->tmgr_lock)); + + LOCK_CNT_INC(spin_locked_txnmgr); + LOCK_CNT_INC(spin_locked); +} -#define spin_ordering_pred_atom(atom) \ - ( ( lock_counters() -> spin_locked_txnh == 0 ) && \ - ( lock_counters() -> spin_locked_jnode == 0 ) && \ - ( lock_counters() -> rw_locked_zlock == 0 ) && \ - ( lock_counters() -> rw_locked_dk == 0 ) && \ - ( lock_counters() -> rw_locked_tree == 0 ) ) - -#define spin_ordering_pred_txnh(txnh) \ - ( ( lock_counters() -> rw_locked_dk == 0 ) && \ - ( lock_counters() -> rw_locked_zlock == 0 ) && \ - ( lock_counters() -> rw_locked_tree == 0 ) ) - -#define spin_ordering_pred_txnmgr(tmgr) \ - ( ( lock_counters() -> spin_locked_atom == 0 ) && \ - ( lock_counters() -> spin_locked_txnh == 0 ) && \ - ( lock_counters() -> spin_locked_jnode == 0 ) && \ - ( lock_counters() -> rw_locked_zlock == 0 ) && \ - ( lock_counters() -> rw_locked_dk == 0 ) && \ - ( lock_counters() -> rw_locked_tree == 0 ) ) - -SPIN_LOCK_FUNCTIONS(atom, txn_atom, alock); -SPIN_LOCK_FUNCTIONS(txnh, txn_handle, hlock); -SPIN_LOCK_FUNCTIONS(txnmgr, txn_mgr, tmgr_lock); +static inline int spin_trylock_txnmgr(txn_mgr *mgr) +{ + if (spin_trylock(&(mgr->tmgr_lock))) { + LOCK_CNT_INC(spin_locked_txnmgr); + LOCK_CNT_INC(spin_locked); + return 1; + } + return 0; +} + +static inline void spin_unlock_txnmgr(txn_mgr *mgr) +{ + assert_spin_locked(&(mgr->tmgr_lock)); + assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_txnmgr)); + assert("nikita-1376", LOCK_CNT_GTZ(spin_locked)); + + LOCK_CNT_DEC(spin_locked_txnmgr); + LOCK_CNT_DEC(spin_locked); + + spin_unlock(&(mgr->tmgr_lock)); +} typedef enum { FQ_IN_USE = 0x1 @@ -532,7 +640,7 @@ struct flush_queue { easier. See field in atom struct for description of list. */ struct list_head alink; /* A spinlock to protect changes of fq state and fq->atom pointer */ - reiser4_spin_data guard; + spinlock_t guard; /* flush_queue state: [in_use | ready] */ flush_queue_state_t state; /* A list which contains queued nodes, queued nodes are removed from any diff -puN fs/reiser4/vfs_ops.c~reiser4-spinlock-cleanup fs/reiser4/vfs_ops.c --- devel/fs/reiser4/vfs_ops.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/vfs_ops.c 2006-02-16 14:17:05.000000000 -0800 @@ -141,13 +141,6 @@ static void reiser4_d_release(struct den reiser4_free_dentry_fsdata(dentry); } - - -/* initialization and shutdown */ - - - - /* * Called by reiser4_sync_inodes(), during speculative write-back (through * pdflush, or balance_dirty_pages()). @@ -175,8 +168,7 @@ void writeout(struct super_block *sb, st mapping = get_super_fake(sb)->i_mapping; do { long nr_submitted = 0; - struct wbq * rq; - jnode * node = NULL; + jnode *node = NULL; /* do not put more requests to overload write queue */ if (wbc->nonblocking && @@ -188,15 +180,22 @@ void writeout(struct super_block *sb, st repeats++; BUG_ON(wbc->nr_to_write <= 0); - rq = get_wbq(sb); - node = get_jnode_by_wbq(sb, rq); + if (get_current_context()->entd) { + entd_context *ent = get_entd_context(sb); + + if (ent->cur_request->node) + /* + * this is ent thread and it managed to capture + * requested page itself - start flush from + * that page + */ + node = jref(ent->cur_request->node); + } - result = flush_some_atom( - node, &nr_submitted, wbc, JNODE_FLUSH_WRITE_BLOCKS); + result = flush_some_atom(node, &nr_submitted, wbc, + JNODE_FLUSH_WRITE_BLOCKS); if (result != 0) warning("nikita-31001", "Flush failed: %i", result); - if (rq) - put_wbq(sb, rq); if (node) jput(node); if (!nr_submitted) diff -puN fs/reiser4/vfs_ops.h~reiser4-spinlock-cleanup fs/reiser4/vfs_ops.h --- devel/fs/reiser4/vfs_ops.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/vfs_ops.h 2006-02-16 14:17:05.000000000 -0800 @@ -33,9 +33,8 @@ extern int reiser4_del_nlink(struct inod extern int reiser4_start_up_io(struct page *page); -extern void reiser4_clear_page_dirty(struct page *); extern void reiser4_throttle_write(struct inode *); -ON_DEBUG(int jnode_is_releasable(jnode *)); +extern int jnode_is_releasable(jnode *); #define CAPTURE_APAGE_BURST (1024l) void writeout(struct super_block *, struct writeback_control *); diff -puN fs/reiser4/wander.c~reiser4-spinlock-cleanup fs/reiser4/wander.c --- devel/fs/reiser4/wander.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/wander.c 2006-02-16 14:17:05.000000000 -0800 @@ -183,9 +183,8 @@ #include /* for struct bio */ #include -static int write_jnodes_to_disk_extent(struct list_head *head, jnode *, int, - const reiser4_block_nr *, - flush_queue_t *, int); +static int write_jnodes_to_disk_extent( + jnode *, int, const reiser4_block_nr *, flush_queue_t *, int); /* The commit_handle is a container for objects needed at atom commit time */ struct commit_handle { @@ -224,6 +223,18 @@ static void done_commit_handle(struct co assert("zam-690", list_empty(&ch->tx_list)); } +static inline int reiser4_use_write_barrier(struct super_block * s) +{ + return !reiser4_is_set(s, REISER4_NO_WRITE_BARRIER); +} + +static void disable_write_barrier(struct super_block * s) +{ + warning("zam-1055", "disabling write barrier\n"); + set_bit((int)REISER4_NO_WRITE_BARRIER, &get_super_private(s)->fs_flags); +} + + /* fill journal header block data */ static void format_journal_header(struct commit_handle *ch) { @@ -415,7 +426,7 @@ store_wmap_actor(txn_atom * atom UNUSED_ set is written to wandered locations and all wander records are written also. Updated journal header blocks contains a pointer (block number) to first wander record of the just written transaction */ -static int update_journal_header(struct commit_handle *ch) +static int update_journal_header(struct commit_handle *ch, int use_barrier) { struct reiser4_super_info_data *sbinfo = get_super_private(ch->super); jnode *jh = sbinfo->journal_header; @@ -424,12 +435,12 @@ static int update_journal_header(struct format_journal_header(ch); - ret = write_jnodes_to_disk_extent(&ch->tx_list, jh, 1, - jnode_get_block(jh), NULL, 0); + ret = write_jnodes_to_disk_extent(jh, 1, jnode_get_block(jh), NULL, + use_barrier ? WRITEOUT_BARRIER : 0); if (ret) return ret; - blk_run_address_space(sbinfo->fake->i_mapping); + // blk_run_address_space(sbinfo->fake->i_mapping); /*blk_run_queues(); */ ret = jwait_io(jh, WRITE); @@ -445,7 +456,7 @@ static int update_journal_header(struct /* This function is called after write-back is finished. We update journal footer block and free blocks which were occupied by wandered blocks and transaction wander records */ -static int update_journal_footer(struct commit_handle *ch) +static int update_journal_footer(struct commit_handle *ch, int use_barrier) { reiser4_super_info_data *sbinfo = get_super_private(ch->super); @@ -455,13 +466,12 @@ static int update_journal_footer(struct format_journal_footer(ch); - ret = - write_jnodes_to_disk_extent(&ch->tx_list, jf, 1, - jnode_get_block(jf), NULL, 0); + ret = write_jnodes_to_disk_extent(jf, 1, jnode_get_block(jf), NULL, + use_barrier ? WRITEOUT_BARRIER : 0); if (ret) return ret; - blk_run_address_space(sbinfo->fake->i_mapping); + // blk_run_address_space(sbinfo->fake->i_mapping); /*blk_run_queue(); */ ret = jwait_io(jf, WRITE); @@ -550,527 +560,14 @@ static void undo_bio(struct bio *bio) pg = bio->bi_io_vec[i].bv_page; ClearPageWriteback(pg); node = jprivate(pg); - LOCK_JNODE(node); + spin_lock_jnode(node); JF_CLR(node, JNODE_WRITEBACK); JF_SET(node, JNODE_DIRTY); - UNLOCK_JNODE(node); + spin_unlock_jnode(node); } bio_put(bio); } -#if REISER4_COPY_ON_CAPTURE - -extern spinlock_t scan_lock; - -/* put overwrite set back to atom's clean list */ -static void put_overwrite_set(struct commit_handle *ch) -{ - jnode *cur; - - spin_lock(&scan_lock); - cur = capture_list_front(ch->overwrite_set); - while (!capture_list_end(ch->overwrite_set, cur)) { - assert("vs-1443", NODE_LIST(cur) == OVRWR_LIST); - JF_SET(cur, JNODE_SCANNED); - spin_unlock(&scan_lock); - JF_CLR(cur, JNODE_JLOADED_BY_GET_OVERWRITE_SET); - jrelse_tail(cur); - spin_lock(&scan_lock); - JF_CLR(cur, JNODE_SCANNED); - cur = capture_list_next(cur); - } - spin_unlock(&scan_lock); -} - -/* Count overwrite set size, grab disk space for wandered blocks allocation. - Since we have a separate list for atom's overwrite set we just scan the list, - count bitmap and other not leaf nodes which wandered blocks allocation we - have to grab space for. */ -static int get_overwrite_set(struct commit_handle *ch) -{ - int ret; - jnode *cur; - __u64 nr_not_leaves = 0; -#if REISER4_DEBUG - __u64 nr_formatted_leaves = 0; - __u64 nr_unformatted_leaves = 0; -#endif - - assert("zam-697", ch->overwrite_set_size == 0); - - ch->overwrite_set = ATOM_OVRWR_LIST(ch->atom); - - spin_lock(&scan_lock); - cur = capture_list_front(ch->overwrite_set); - - while (!capture_list_end(ch->overwrite_set, cur)) { - jnode *next; - - /* FIXME: for all but first this bit is set already */ - assert("vs-1444", NODE_LIST(cur) == OVRWR_LIST); - JF_SET(cur, JNODE_SCANNED); - next = capture_list_next(cur); - if (!capture_list_end(ch->overwrite_set, next)) - JF_SET(next, JNODE_SCANNED); - spin_unlock(&scan_lock); - - if (jnode_is_znode(cur) && znode_above_root(JZNODE(cur))) { - ON_TRACE(TRACE_LOG, "fake znode found , WANDER=(%d)\n", - JF_ISSET(cur, JNODE_OVRWR)); - } - - /* Count bitmap locks for getting correct statistics what number - * of blocks were cleared by the transaction commit. */ - if (jnode_get_type(cur) == JNODE_BITMAP) - ch->nr_bitmap++; - - assert("zam-939", JF_ISSET(cur, JNODE_OVRWR) - || jnode_get_type(cur) == JNODE_BITMAP); - - if (jnode_is_znode(cur) && znode_above_root(JZNODE(cur))) { - /* we replace fake znode by another (real) - znode which is suggested by disk_layout - plugin */ - - /* FIXME: it looks like fake znode should be - replaced by jnode supplied by - disk_layout. */ - - struct super_block *s = reiser4_get_current_sb(); - reiser4_super_info_data *sbinfo = - get_current_super_private(); - - if (sbinfo->df_plug->log_super) { - jnode *sj = sbinfo->df_plug->log_super(s); - - assert("zam-593", sj != NULL); - - if (IS_ERR(sj)) - return PTR_ERR(sj); - - LOCK_ATOM(ch->atom); - LOCK_JNODE(sj); - JF_SET(sj, JNODE_OVRWR); - insert_into_atom_ovrwr_list(ch->atom, sj); - UNLOCK_JNODE(sj); - UNLOCK_ATOM(ch->atom); - - /* jload it as the rest of overwrite set */ - jload_gfp(sj, GFP_KERNEL, 0); - - ch->overwrite_set_size++; - } - LOCK_ATOM(ch->atom); - LOCK_JNODE(cur); - uncapture_block(cur); - UNLOCK_ATOM(ch->atom); - jput(cur); - - spin_lock(&scan_lock); - JF_CLR(cur, JNODE_SCANNED); - cur = next; - nr_not_leaves++; - } else { - int ret; - ch->overwrite_set_size++; - ret = jload_gfp(cur, GFP_KERNEL, 0); - if (ret) - reiser4_panic("zam-783", - "cannot load e-flushed jnode back (ret = %d)\n", - ret); - - /* Count not leaves here because we have to grab disk - * space for wandered blocks. They were not counted as - * "flush reserved". This should be done after doing - * jload() to avoid races with emergency - * flush. Counting should be done _after_ nodes are - * pinned * into memory by jload(). */ - if (!jnode_is_leaf(cur)) - nr_not_leaves++; - /* this is to check atom's flush reserved space for - * overwritten leaves */ - else { -#if REISER4_DEBUG - /* at this point @cur either has - * JNODE_FLUSH_RESERVED or is - * eflushed. Locking is not strong enough to - * write an assertion checking for this. */ - if (jnode_is_znode(cur)) - nr_formatted_leaves++; - else - nr_unformatted_leaves++; -#endif - JF_CLR(cur, JNODE_FLUSH_RESERVED); - } - spin_lock(&scan_lock); - JF_SET(cur, JNODE_JLOADED_BY_GET_OVERWRITE_SET); - assert("", cur->pg); - JF_CLR(cur, JNODE_SCANNED); - cur = next; - } - - } - spin_unlock(&scan_lock); - - /* Grab space for writing (wandered blocks) of not leaves found in - * overwrite set. */ - ret = reiser4_grab_space_force(nr_not_leaves, BA_RESERVED); - if (ret) - return ret; - - /* Disk space for allocation of wandered blocks of leaf nodes already - * reserved as "flush reserved", move it to grabbed space counter. */ - spin_lock_atom(ch->atom); - assert("zam-940", - nr_formatted_leaves + nr_unformatted_leaves <= - ch->atom->flush_reserved); - flush_reserved2grabbed(ch->atom, ch->atom->flush_reserved); - spin_unlock_atom(ch->atom); - - return ch->overwrite_set_size; -} - -/* Submit a write request for @nr jnodes beginning from the @first, other - jnodes are after the @first on the double-linked "capture" list. All - jnodes will be written to the disk region of @nr blocks starting with - @block_p block number. If @fq is not NULL it means that waiting for i/o - completion will be done more efficiently by using flush_queue_t objects - -ZAM-FIXME-HANS: brief me on why this function exists, and why bios are -aggregated in this function instead of being left to the layers below - -FIXME: ZAM->HANS: What layer are you talking about? Can you point me to that? -Why that layer needed? Why BIOs cannot be constructed here? -*/ -static int -write_jnodes_to_disk_extent(capture_list_head * head, jnode * first, int nr, - const reiser4_block_nr * block_p, - flush_queue_t * fq, int flags) -{ - struct super_block *super = reiser4_get_current_sb(); - int for_reclaim = flags & WRITEOUT_FOR_PAGE_RECLAIM; - int max_blocks; - jnode *cur = first; - reiser4_block_nr block; - - assert("zam-571", first != NULL); - assert("zam-572", block_p != NULL); - assert("zam-570", nr > 0); - - block = *block_p; - - ON_TRACE(TRACE_IO_W, "write of %d blocks starting from %llu\n", - nr, (unsigned long long)block); - - max_blocks = - bdev_get_queue(super->s_bdev)->max_sectors >> (super-> - s_blocksize_bits - - 9); - - while (nr > 0) { - struct bio *bio; - int nr_blocks = min(nr, max_blocks); - int i; - int nr_used; - - bio = bio_alloc(GFP_NOIO, nr_blocks); - if (!bio) - return RETERR(-ENOMEM); - - bio->bi_bdev = super->s_bdev; - bio->bi_sector = block * (super->s_blocksize >> 9); - for (nr_used = 0, i = 0; i < nr_blocks; i++) { - struct page *pg; - - assert("vs-1423", - ergo(jnode_is_znode(cur) - || jnode_is_unformatted(cur), JF_ISSET(cur, - JNODE_SCANNED))); - pg = jnode_page(cur); - assert("zam-573", pg != NULL); - - page_cache_get(pg); - - lock_and_wait_page_writeback(pg); - - LOCK_JNODE(cur); - assert("nikita-3553", jnode_page(cur) == pg); - assert("nikita-3554", jprivate(pg) == cur); - - assert("nikita-3166", - ergo(!JF_ISSET(cur, JNODE_CC), - pg->mapping == jnode_get_mapping(cur))); - if (!JF_ISSET(cur, JNODE_WRITEBACK)) { - assert("nikita-3165", - !jnode_is_releasable(cur)); - UNLOCK_JNODE(cur); - if (!bio_add_page(bio, - pg, super->s_blocksize, 0)) { - /* - * underlying device is satiated. Stop - * adding pages to the bio. - */ - unlock_page(pg); - page_cache_release(pg); - break; - } - - LOCK_JNODE(cur); - JF_SET(cur, JNODE_WRITEBACK); - JF_CLR(cur, JNODE_DIRTY); - ON_DEBUG(cur->written++); - UNLOCK_JNODE(cur); - - SetPageWriteback(pg); - if (for_reclaim) - ent_writes_page(super, pg); - spin_lock(&pg->mapping->page_lock); - - if (REISER4_STATS && !PageDirty(pg)) - reiser4_stat_inc(pages_clean); - - /* don't check return value: submit page even if - it wasn't dirty. */ - test_clear_page_dirty(pg); - - list_del(&pg->list); - list_add(&pg->list, &pg->mapping->locked_pages); - - spin_unlock(&pg->mapping->page_lock); - - nr_used++; - } else { - /* jnode being WRITEBACK might be replaced on - ovrwr_nodes list with jnode CC. We just - encountered this CC jnode. Do not submit i/o - for it */ - assert("zam-912", JF_ISSET(cur, JNODE_CC)); - UNLOCK_JNODE(cur); - } - unlock_page(pg); - - nr--; - cur = capture_list_next(cur); - } - if (nr_used > 0) { - assert("nikita-3455", - bio->bi_size == super->s_blocksize * nr_used); - assert("nikita-3456", bio->bi_vcnt == nr_used); - - /* Check if we are allowed to write at all */ - if (super->s_flags & MS_RDONLY) - undo_bio(bio); - else { - add_fq_to_bio(fq, bio); - reiser4_submit_bio(WRITE, bio); - } - - block += nr_used - 1; - update_blocknr_hint_default(super, &block); - block += 1; - } else { - reiser4_stat_inc(txnmgr.empty_bio); - bio_put(bio); - } - } - return 0; -} - -/* @nr jnodes starting from @j are marked as JNODE_SCANNED. Clear this bit for - all those jnodes */ -static void unscan_sequence_nolock(jnode * j, int nr) -{ - int i; - - for (i = 0; i < nr; i++) { - assert("vs-1631", JF_ISSET(j, JNODE_SCANNED)); - JF_CLR(j, JNODE_SCANNED); - j = capture_list_next(j); - } -} - -static void unscan_sequence(jnode * j, int nr) -{ - spin_lock(&scan_lock); - unscan_sequence_nolock(j, nr); - spin_unlock(&scan_lock); -} - -/* This is a procedure which recovers a contiguous sequences of disk block - numbers in the given list of j-nodes and submits write requests on this - per-sequence basis */ -int -write_jnode_list(capture_list_head * head, flush_queue_t * fq, - long *nr_submitted, int flags) -{ - int ret; - jnode *beg, *end; - - spin_lock(&scan_lock); - beg = capture_list_front(head); - while (!capture_list_end(head, beg)) { - int nr = 1; - jnode *cur; - - JF_SET(beg, JNODE_SCANNED); - end = beg; - cur = capture_list_next(beg); - - while (!capture_list_end(head, cur)) { - if (*jnode_get_block(cur) != *jnode_get_block(beg) + nr) - /* jnode from which next sequence of blocks starts */ - break; - - JF_SET(cur, JNODE_SCANNED); - ++nr; - end = cur; - cur = capture_list_next(cur); - } - spin_unlock(&scan_lock); - - ret = - write_jnodes_to_disk_extent(head, beg, nr, - jnode_get_block(beg), fq, - flags); - if (ret) { - unscan_sequence(beg, nr); - return ret; - } - - if (nr_submitted) - *nr_submitted += nr; - - spin_lock(&scan_lock); - unscan_sequence_nolock(beg, nr); - beg = capture_list_next(end); - } - - spin_unlock(&scan_lock); - return 0; -} - -/* add given wandered mapping to atom's wandered map - this starts from jnode which is in JNODE_SCANNED state. */ -static int -add_region_to_wmap(jnode * cur, int len, const reiser4_block_nr * block_p) -{ - int ret; - blocknr_set_entry *new_bsep = NULL; - reiser4_block_nr block; - int first; - txn_atom *atom; - - assert("zam-568", block_p != NULL); - block = *block_p; - assert("zam-569", len > 0); - - while ((len--) > 0) { - assert("vs-1422", JF_ISSET(cur, JNODE_SCANNED)); - - do { - atom = get_current_atom_locked(); - assert("zam-536", - !blocknr_is_fake(jnode_get_block(cur))); - ret = - blocknr_set_add_pair(atom, &atom->wandered_map, - &new_bsep, - jnode_get_block(cur), &block); - } while (ret == -E_REPEAT); - - if (ret) { - /* deallocate blocks which were not added to wandered - map */ - reiser4_block_nr wide_len = len; - - reiser4_dealloc_blocks(&block, &wide_len, - BLOCK_NOT_COUNTED, - BA_FORMATTED - /* formatted, without defer */ ); - - return ret; - } - - UNLOCK_ATOM(atom); - - cur = capture_list_next(cur); - ++block; - first = 0; - } - - return 0; -} - -/* Allocate wandered blocks for current atom's OVERWRITE SET and immediately - submit IO for allocated blocks. We assume that current atom is in a stage - when any atom fusion is impossible and atom is unlocked and it is safe. */ -static int alloc_wandered_blocks(struct commit_handle *ch, flush_queue_t * fq) -{ - reiser4_block_nr block; - - int rest; - int len, prev_len = 0, i; - int ret; - jnode *cur, *beg, *end; - - assert("zam-534", ch->overwrite_set_size > 0); - - cur = beg = end = NULL; - - for (rest = ch->overwrite_set_size; rest > 0; rest -= len) { - ret = get_more_wandered_blocks(rest, &block, &len); - if (ret) { - if (beg != NULL) - unscan_sequence_nolock(beg, prev_len); - return ret; - } - - spin_lock(&scan_lock); - if (beg == NULL) - cur = capture_list_front(ch->overwrite_set); - else { - unscan_sequence_nolock(beg, prev_len); - cur = capture_list_next(end); - } - beg = cur; - - /* mark @len jnodes starting from @cur as scanned */ - for (i = 0; i < len; i++) { - assert("vs-1633", - !capture_list_end(ch->overwrite_set, cur)); - assert("vs-1632", !JF_ISSET(cur, JNODE_SCANNED)); - JF_SET(cur, JNODE_SCANNED); - end = cur; - cur = capture_list_next(cur); - } - prev_len = len; - spin_unlock(&scan_lock); - - ret = add_region_to_wmap(beg, len, &block); - if (ret) { - unscan_sequence(beg, len); - return ret; - } - ret = - write_jnodes_to_disk_extent(ch->overwrite_set, beg, len, - &block, fq, 0); - if (ret) { - unscan_sequence(beg, len); - return ret; - } - assert("vs-1638", rest >= len); - } - - assert("vs-1634", rest == 0); - assert("vs-1635", beg != NULL && end != NULL); - assert("vs-1639", cur == capture_list_next(end)); - assert("vs-1636", capture_list_end(ch->overwrite_set, cur)); - unscan_sequence(beg, len); - - return 0; -} - -#else /* !REISER4_COPY_ON_CAPTURE */ - /* put overwrite set back to atom's clean list */ static void put_overwrite_set(struct commit_handle *ch) { @@ -1131,17 +628,17 @@ static int get_overwrite_set(struct comm if (IS_ERR(sj)) return PTR_ERR(sj); - LOCK_JNODE(sj); + spin_lock_jnode(sj); JF_SET(sj, JNODE_OVRWR); insert_into_atom_ovrwr_list(ch->atom, sj); - UNLOCK_JNODE(sj); + spin_unlock_jnode(sj); /* jload it as the rest of overwrite set */ jload_gfp(sj, GFP_KERNEL, 0); ch->overwrite_set_size++; } - LOCK_JNODE(cur); + spin_lock_jnode(cur); uncapture_block(cur); jput(cur); @@ -1218,13 +715,12 @@ static int get_overwrite_set(struct comm * FIXME: ZAM->HANS: What layer are you talking about? Can you point me to that? * Why that layer needed? Why BIOs cannot be constructed here? */ -static int -write_jnodes_to_disk_extent(struct list_head *head, jnode *first, int nr, - const reiser4_block_nr *block_p, - flush_queue_t *fq, int flags) +static int write_jnodes_to_disk_extent( + jnode *first, int nr, const reiser4_block_nr *block_p, + flush_queue_t *fq, int flags) { struct super_block *super = reiser4_get_current_sb(); - int for_reclaim = flags & WRITEOUT_FOR_PAGE_RECLAIM; + int write_op = ( flags & WRITEOUT_BARRIER ) ? WRITE_BARRIER : WRITE; int max_blocks; jnode *cur = first; reiser4_block_nr block; @@ -1234,10 +730,7 @@ write_jnodes_to_disk_extent(struct list_ assert("zam-570", nr > 0); block = *block_p; - max_blocks = - bdev_get_queue(super->s_bdev)->max_sectors >> (super-> - s_blocksize_bits - - 9); + max_blocks = min(bio_get_nr_vecs(super->s_bdev), BIO_MAX_PAGES); while (nr > 0) { struct bio *bio; @@ -1253,7 +746,6 @@ write_jnodes_to_disk_extent(struct list_ bio->bi_sector = block * (super->s_blocksize >> 9); for (nr_used = 0, i = 0; i < nr_blocks; i++) { struct page *pg; - ON_DEBUG(int jnode_is_releasable(jnode *)); pg = jnode_page(cur); assert("zam-573", pg != NULL); @@ -1272,21 +764,64 @@ write_jnodes_to_disk_extent(struct list_ break; } - LOCK_JNODE(cur); + spin_lock_jnode(cur); assert("nikita-3166", pg->mapping == jnode_get_mapping(cur)); assert("zam-912", !JF_ISSET(cur, JNODE_WRITEBACK)); +#if REISER4_DEBUG + spin_lock(&cur->load); assert("nikita-3165", !jnode_is_releasable(cur)); + spin_unlock(&cur->load); +#endif JF_SET(cur, JNODE_WRITEBACK); JF_CLR(cur, JNODE_DIRTY); ON_DEBUG(cur->written++); - UNLOCK_JNODE(cur); + spin_unlock_jnode(cur); + ClearPageError(pg); set_page_writeback(pg); - if (for_reclaim) - ent_writes_page(super, pg); - /* clear DIRTY or REISER4_MOVED tag if it is set */ - reiser4_clear_page_dirty(pg); + + if (get_current_context()->entd) { + /* this is ent thread */ + entd_context *ent = get_entd_context(super); + struct wbq *rq, *next; + + spin_lock(&ent->guard); + + if (pg == ent->cur_request->page) { + /* + * entd is called for this page. This + * request is not in th etodo list + */ + ent->cur_request->written = 1; + } else { + /* + * if we have written a page for which writepage + * is called for - move request to another list. + */ + list_for_each_entry_safe(rq, next, &ent->todo_list, link) { + assert("", rq->magic == WBQ_MAGIC); + if (pg == rq->page) { + /* + * remove request from + * entd's queue, but do + * not wake up a thread + * which put this + * request + */ + list_del_init(&rq->link); + ent->nr_todo_reqs --; + list_add_tail(&rq->link, &ent->done_list); + ent->nr_done_reqs ++; + rq->written = 1; + break; + } + } + } + spin_unlock(&ent->guard); + } + + clear_page_dirty_for_io(pg); unlock_page(pg); @@ -1302,8 +837,15 @@ write_jnodes_to_disk_extent(struct list_ if (super->s_flags & MS_RDONLY) undo_bio(bio); else { + int not_supported; + add_fq_to_bio(fq, bio); - reiser4_submit_bio(WRITE, bio); + bio_get(bio); + reiser4_submit_bio(write_op, bio); + not_supported = bio_flagged(bio, BIO_EOPNOTSUPP); + bio_put(bio); + if (not_supported) + return -EOPNOTSUPP; } block += nr_used - 1; @@ -1339,10 +881,8 @@ write_jnode_list(struct list_head *head, cur = list_entry(cur->capture_link.next, jnode, capture_link); } - ret = - write_jnodes_to_disk_extent(head, beg, nr, - jnode_get_block(beg), fq, - flags); + ret = write_jnodes_to_disk_extent( + beg, nr, jnode_get_block(beg), fq, flags); if (ret) return ret; @@ -1393,7 +933,7 @@ add_region_to_wmap(jnode * cur, int len, return ret; } - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); cur = list_entry(cur->capture_link.next, jnode, capture_link); ++block; @@ -1433,8 +973,7 @@ static int alloc_wandered_blocks(struct if (ret) return ret; - ret = write_jnodes_to_disk_extent(ch->overwrite_set, cur, len, - &block, fq, 0); + ret = write_jnodes_to_disk_extent(cur, len, &block, fq, 0); if (ret) return ret; @@ -1448,8 +987,6 @@ static int alloc_wandered_blocks(struct return 0; } -#endif /* ! REISER4_COPY_ON_CAPTURE */ - /* allocate given number of nodes over the journal area and link them into a list, return pointer to the first jnode in the list */ static int alloc_tx(struct commit_handle *ch, flush_queue_t * fq) @@ -1537,7 +1074,7 @@ static int alloc_tx(struct commit_handle atom = get_current_atom_locked(); blocknr_set_iterator(atom, &atom->wandered_map, &store_wmap_actor, ¶ms, 0); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); } { /* relse all jnodes from tx_list */ @@ -1560,6 +1097,99 @@ static int alloc_tx(struct commit_handle return ret; } +static int commit_tx(struct commit_handle *ch) +{ + flush_queue_t *fq; + int barrier; + int ret; + + /* Grab more space for wandered records. */ + ret = reiser4_grab_space_force((__u64) (ch->tx_size), BA_RESERVED); + if (ret) + return ret; + + fq = get_fq_for_current_atom(); + if (IS_ERR(fq)) + return PTR_ERR(fq); + + spin_unlock_atom(fq->atom); + do { + ret = alloc_wandered_blocks(ch, fq); + if (ret) + break; + ret = alloc_tx(ch, fq); + if (ret) + break; + } while (0); + + /* Release all grabbed space if it was not fully used for + * wandered blocks/records allocation. */ + all_grabbed2free(); + fq_put(fq); + if (ret) + return ret; + repeat_wo_barrier: + barrier = reiser4_use_write_barrier(ch->super); + if (!barrier) { + ret = current_atom_finish_all_fq(); + if (ret) + return ret; + } + ret = update_journal_header(ch, barrier); + if (barrier) { + if (ret) { + if (ret == -EOPNOTSUPP) { + disable_write_barrier(ch->super); + goto repeat_wo_barrier; + } + return ret; + } + ret = current_atom_finish_all_fq(); + } + return ret; +} + + +static int write_tx_back(struct commit_handle * ch) +{ + flush_queue_t *fq; + int ret; + int barrier; + + post_commit_hook(); + fq = get_fq_for_current_atom(); + if (IS_ERR(fq)) + return PTR_ERR(fq); + spin_unlock_atom(fq->atom); + ret = write_jnode_list( + ch->overwrite_set, fq, NULL, WRITEOUT_FOR_PAGE_RECLAIM); + fq_put(fq); + if (ret) + return ret; + repeat_wo_barrier: + barrier = reiser4_use_write_barrier(ch->super); + if (!barrier) { + ret = current_atom_finish_all_fq(); + if (ret) + return ret; + } + ret = update_journal_footer(ch, barrier); + if (barrier) { + if (ret) { + if (ret == -EOPNOTSUPP) { + disable_write_barrier(ch->super); + goto repeat_wo_barrier; + } + return ret; + } + ret = current_atom_finish_all_fq(); + } + if (ret) + return ret; + post_write_back_hook(); + return 0; +} + /* We assume that at this moment all captured blocks are marked as RELOC or WANDER (belong to Relocate o Overwrite set), all nodes from Relocate set are submitted to write. @@ -1594,11 +1224,11 @@ int reiser4_write_logs(long *nr_submitte * early flushed jnodes with CREATED bit are transferred to the * overwrite list. */ invalidate_list(ATOM_CLEAN_LIST(atom)); - LOCK_ATOM(atom); + spin_lock_atom(atom); /* There might be waiters for the relocate nodes which we have * released, wake them up. */ atom_send_event(atom); - UNLOCK_ATOM(atom); + spin_unlock_atom(atom); if (REISER4_DEBUG) { int level; @@ -1635,85 +1265,15 @@ int reiser4_write_logs(long *nr_submitte /* count all records needed for storing of the wandered set */ get_tx_size(&ch); - /* Grab more space for wandered records. */ - ret = reiser4_grab_space_force((__u64) (ch.tx_size), BA_RESERVED); + ret = commit_tx(&ch); if (ret) goto up_and_ret; - { - flush_queue_t *fq; - - fq = get_fq_for_current_atom(); - - if (IS_ERR(fq)) { - ret = PTR_ERR(fq); - goto up_and_ret; - } - - UNLOCK_ATOM(fq->atom); - - do { - ret = alloc_wandered_blocks(&ch, fq); - if (ret) - break; - - ret = alloc_tx(&ch, fq); - if (ret) - break; - } while (0); - - /* Release all grabbed space if it was not fully used for - * wandered blocks/records allocation. */ - all_grabbed2free(); - - fq_put(fq); - if (ret) - goto up_and_ret; - } - - ret = current_atom_finish_all_fq(); - if (ret) - goto up_and_ret; - - if ((ret = update_journal_header(&ch))) - goto up_and_ret; - - UNDER_SPIN_VOID(atom, atom, atom_set_stage(atom, ASTAGE_POST_COMMIT)); - - post_commit_hook(); - - { - /* force j-nodes write back */ - - flush_queue_t *fq; - - fq = get_fq_for_current_atom(); - - if (IS_ERR(fq)) { - ret = PTR_ERR(fq); - goto up_and_ret; - } - - UNLOCK_ATOM(fq->atom); - - ret = - write_jnode_list(ch.overwrite_set, fq, NULL, - WRITEOUT_FOR_PAGE_RECLAIM); - - fq_put(fq); - - if (ret) - goto up_and_ret; - } - - ret = current_atom_finish_all_fq(); - - if (ret) - goto up_and_ret; - - if ((ret = update_journal_footer(&ch))) - goto up_and_ret; + spin_lock_atom(atom); + atom_set_stage(atom, ASTAGE_POST_COMMIT); + spin_unlock_atom(atom); + ret = write_tx_back(&ch); post_write_back_hook(); up_and_ret: @@ -1943,7 +1503,7 @@ static int replay_transaction(const stru } } - ret = update_journal_footer(&ch); + ret = update_journal_footer(&ch, 0); free_ow_set: diff -puN fs/reiser4/writeout.h~reiser4-spinlock-cleanup fs/reiser4/writeout.h --- devel/fs/reiser4/writeout.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/writeout.h 2006-02-16 14:17:05.000000000 -0800 @@ -4,6 +4,7 @@ #define WRITEOUT_SINGLE_STREAM (0x1) #define WRITEOUT_FOR_PAGE_RECLAIM (0x2) +#define WRITEOUT_BARRIER (0x4) extern int get_writeout_flags(void); diff -puN fs/reiser4/znode.c~reiser4-spinlock-cleanup fs/reiser4/znode.c --- devel/fs/reiser4/znode.c~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/znode.c 2006-02-16 14:17:05.000000000 -0800 @@ -235,7 +235,7 @@ int znodes_tree_init(reiser4_tree * tree int result; assert("umka-050", tree != NULL); - rw_dk_init(tree); + rwlock_init(&tree->dk_lock); result = z_hash_init(&tree->zhash_table, REISER4_ZNODE_HASH_TABLE_SIZE); if (result != 0) @@ -341,7 +341,7 @@ void znode_remove(znode * node /* znode { assert("nikita-2108", node != NULL); assert("nikita-470", node->c_count == 0); - assert("zam-879", rw_tree_is_write_locked(tree)); + assert_rw_write_locked(&(tree->tree_lock)); /* remove reference to this znode from cbk cache */ cbk_cache_invalidate(node, tree); @@ -385,7 +385,7 @@ int znode_rehash(znode * node /* node to oldtable = znode_get_htable(node); newtable = get_htable(tree, new_block_nr); - WLOCK_TREE(tree); + write_lock_tree(tree); /* remove znode from hash-table */ z_hash_remove_rcu(oldtable, node); @@ -398,7 +398,7 @@ int znode_rehash(znode * node /* node to /* insert it into hash */ z_hash_insert_rcu(newtable, node); - WUNLOCK_TREE(tree); + write_unlock_tree(tree); return 0; } @@ -516,7 +516,7 @@ znode *zget(reiser4_tree * tree, ZJNODE(result)->key.z = *blocknr; result->level = level; - WLOCK_TREE(tree); + write_lock_tree(tree); shadow = z_hash_find_index(zth, hashi, blocknr); if (unlikely(shadow != NULL && !ZF_ISSET(shadow, JNODE_RIP))) { @@ -533,7 +533,7 @@ znode *zget(reiser4_tree * tree, add_x_ref(ZJNODE(result)); - WUNLOCK_TREE(tree); + write_unlock_tree(tree); } #if REISER4_DEBUG if (!blocknr_is_fake(blocknr) && *blocknr != 0) @@ -666,7 +666,7 @@ unsigned znode_free_space(znode * node / reiser4_key *znode_get_rd_key(znode * node /* znode to query */ ) { assert("nikita-958", node != NULL); - assert("nikita-1661", rw_dk_is_locked(znode_get_tree(node))); + assert_rw_locked(&(znode_get_tree(node)->dk_lock)); assert("nikita-3067", LOCK_CNT_GTZ(rw_locked_dk)); assert("nikita-30671", node->rd_key_version != 0); return &node->rd_key; @@ -676,7 +676,7 @@ reiser4_key *znode_get_rd_key(znode * no reiser4_key *znode_get_ld_key(znode * node /* znode to query */ ) { assert("nikita-974", node != NULL); - assert("nikita-1662", rw_dk_is_locked(znode_get_tree(node))); + assert_rw_locked(&(znode_get_tree(node)->dk_lock)); assert("nikita-3068", LOCK_CNT_GTZ(rw_locked_dk)); assert("nikita-30681", node->ld_key_version != 0); return &node->ld_key; @@ -690,7 +690,7 @@ reiser4_key *znode_set_rd_key(znode * no { assert("nikita-2937", node != NULL); assert("nikita-2939", key != NULL); - assert("nikita-2938", rw_dk_is_write_locked(znode_get_tree(node))); + assert_rw_write_locked(&(znode_get_tree(node)->dk_lock)); assert("nikita-3069", LOCK_CNT_GTZ(write_locked_dk)); assert("nikita-2944", znode_is_any_locked(node) || @@ -709,7 +709,7 @@ reiser4_key *znode_set_ld_key(znode * no { assert("nikita-2940", node != NULL); assert("nikita-2941", key != NULL); - assert("nikita-2942", rw_dk_is_write_locked(znode_get_tree(node))); + assert_rw_write_locked(&(znode_get_tree(node)->dk_lock)); assert("nikita-3070", LOCK_CNT_GTZ(write_locked_dk)); assert("nikita-2943", znode_is_any_locked(node) || keyeq(&node->ld_key, min_key())); @@ -735,11 +735,15 @@ int znode_contains_key(znode * node /* z int znode_contains_key_lock(znode * node /* znode to look in */ , const reiser4_key * key /* key to look for */ ) { + int result; + assert("umka-056", node != NULL); assert("umka-057", key != NULL); - return UNDER_RW(dk, znode_get_tree(node), - read, znode_contains_key(node, key)); + read_lock_dk(znode_get_tree(node)); + result = znode_contains_key(node, key); + read_unlock_dk(znode_get_tree(node)); + return result; } /* get parent pointer, assuming tree is not locked */ @@ -798,7 +802,12 @@ int znode_just_created(const znode * nod /* obtain updated ->znode_epoch. See seal.c for description. */ __u64 znode_build_version(reiser4_tree * tree) { - return UNDER_SPIN(epoch, tree, ++tree->znode_epoch); + __u64 result; + + spin_lock(&tree->epoch_lock); + result = ++tree->znode_epoch; + spin_unlock(&tree->epoch_lock); + return result; } void init_load_count(load_count * dh) @@ -975,7 +984,7 @@ static int znode_invariant_f(const znode } /* debugging aid: check znode invariant and panic if it doesn't hold */ -int znode_invariant(const znode * node /* znode to check */ ) +int znode_invariant(znode * node /* znode to check */ ) { char const *failed_msg; int result; @@ -983,85 +992,18 @@ int znode_invariant(const znode * node / assert("umka-063", node != NULL); assert("umka-064", current_tree != NULL); - spin_lock_znode((znode *) node); - RLOCK_TREE(znode_get_tree(node)); + spin_lock_znode(node); + read_lock_tree(znode_get_tree(node)); result = znode_invariant_f(node, &failed_msg); if (!result) { /* print_znode("corrupted node", node); */ warning("jmacd-555", "Condition %s failed", failed_msg); } - RUNLOCK_TREE(znode_get_tree(node)); - spin_unlock_znode((znode *) node); + read_unlock_tree(znode_get_tree(node)); + spin_unlock_znode(node); return result; } -/* debugging aid: output human readable information about @node */ -static void info_znode(const char *prefix /* prefix to print */ , - const znode * node /* node to print */ ) -{ - if (node == NULL) { - return; - } - info_jnode(prefix, ZJNODE(node)); - if (!jnode_is_znode(ZJNODE(node))) - return; - - printk("c_count: %i, readers: %i, items: %i\n", - node->c_count, node->lock.nr_readers, node->nr_items); -} - -/* debugging aid: output more human readable information about @node that - info_znode(). */ -void print_znode(const char *prefix /* prefix to print */ , - const znode * node /* node to print */ ) -{ - if (node == NULL) { - printk("%s: null\n", prefix); - return; - } - - info_znode(prefix, node); - if (!jnode_is_znode(ZJNODE(node))) - return; - info_znode("\tparent", znode_parent_nolock(node)); - info_znode("\tleft", node->left); - info_znode("\tright", node->right); - print_key("\tld", &node->ld_key); - print_key("\trd", &node->rd_key); - printk("\n"); -} - -/* print all znodes in @tree */ -void print_znodes(const char *prefix, reiser4_tree * tree) -{ - znode *node; - znode *next; - z_hash_table *htable; - int tree_lock_taken; - - if (tree == NULL) - tree = current_tree; - - /* this is debugging function. It can be called by reiser4_panic() - with tree spin-lock already held. Trylock is not exactly what we - want here, but it is passable. - */ - tree_lock_taken = write_trylock_tree(tree); - - htable = &tree->zhash_table; - for_all_in_htable(htable, z, node, next) { - info_znode(prefix, node); - } - - htable = &tree->zfake_table; - for_all_in_htable(htable, z, node, next) { - info_znode(prefix, node); - } - - if (tree_lock_taken) - WUNLOCK_TREE(tree); -} - /* return non-0 iff data are loaded into znode */ int znode_is_loaded(const znode * node /* znode to query */ ) { diff -puN fs/reiser4/znode.h~reiser4-spinlock-cleanup fs/reiser4/znode.h --- devel/fs/reiser4/znode.h~reiser4-spinlock-cleanup 2006-02-16 14:17:05.000000000 -0800 +++ devel-akpm/fs/reiser4/znode.h 2006-02-16 14:17:05.000000000 -0800 @@ -9,7 +9,6 @@ #include "forward.h" #include "debug.h" #include "dformat.h" -#include "spin_macros.h" #include "key.h" #include "coord.h" #include "plugin/node/node.h" @@ -261,15 +260,15 @@ extern void print_lock_stack(const char #define znode_make_clean(x) jnode_make_clean ( ZJNODE(x) ) #define znode_set_block(x, b) jnode_set_block ( ZJNODE(x), (b) ) -#define spin_lock_znode(x) LOCK_JNODE ( ZJNODE(x) ) -#define spin_unlock_znode(x) UNLOCK_JNODE ( ZJNODE(x) ) +#define spin_lock_znode(x) spin_lock_jnode ( ZJNODE(x) ) +#define spin_unlock_znode(x) spin_unlock_jnode ( ZJNODE(x) ) #define spin_trylock_znode(x) spin_trylock_jnode ( ZJNODE(x) ) #define spin_znode_is_locked(x) spin_jnode_is_locked ( ZJNODE(x) ) #define spin_znode_is_not_locked(x) spin_jnode_is_not_locked ( ZJNODE(x) ) #if REISER4_DEBUG extern int znode_x_count_is_protected(const znode * node); -extern int znode_invariant(const znode * node); +extern int znode_invariant(znode * node); #endif /* acquire reference to @node */ _