From: Mikulas Patocka [AGK FIXME - Justification for this patch is missing...] The previous code selected one exception as "primary_pe", linked all other exceptions to it and used reference counting to wait until all exceptions were reallocated. Now, a bio is linked to one exception and when that exception is reallocated, the bio is retried to possibly wait for other exceptions. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-snap.c | 165 +++++++++++++++++-------------------------------- drivers/md/dm-snap.c | 152 +++++++++++++++------------------------------------ 1 file changed, 46 insertions(+), 106 deletions(-) Index: linux/drivers/md/dm-snap.c =================================================================== --- linux.orig/drivers/md/dm-snap.c +++ linux/drivers/md/dm-snap.c @@ -142,28 +142,6 @@ struct dm_snap_pending_exception { struct bio_list origin_bios; struct bio_list snapshot_bios; - /* - * Short-term queue of pending exceptions prior to submission. - */ - struct list_head list; - - /* - * The primary pending_exception is the one that holds - * the ref_count and the list of origin_bios for a - * group of pending_exceptions. It is always last to get freed. - * These fields get set up when writing to the origin. - */ - struct dm_snap_pending_exception *primary_pe; - - /* - * Number of pending_exceptions processing this chunk. - * When this drops to zero we must complete the origin bios. - * If incrementing or decrementing this, hold pe->snap->lock for - * the sibling concerned and not pe->primary_pe->snap->lock unless - * they are the same. - */ - atomic_t ref_count; - /* Pointer back to snapshot context */ struct dm_snapshot *snap; @@ -1019,6 +997,28 @@ static void flush_queued_bios(struct wor flush_bios(queued_bios); } +static int do_origin(struct dm_dev *origin, struct bio *bio); + +/* + * Flush a list of buffers. + */ +static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio) +{ + struct bio *n; + int r; + + while (bio) { + n = bio->bi_next; + bio->bi_next = NULL; + r = do_origin(s->origin, bio); + if (r == DM_MAPIO_REMAPPED) + generic_make_request(bio); + else + BUG_ON(r != DM_MAPIO_SUBMITTED); + bio = n; + } +} + /* * Error a list of buffers. */ @@ -1052,39 +1052,6 @@ static void __invalidate_snapshot(struct dm_table_event(s->ti->table); } -static void get_pending_exception(struct dm_snap_pending_exception *pe) -{ - atomic_inc(&pe->ref_count); -} - -static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe) -{ - struct dm_snap_pending_exception *primary_pe; - struct bio *origin_bios = NULL; - - primary_pe = pe->primary_pe; - - /* - * If this pe is involved in a write to the origin and - * it is the last sibling to complete then release - * the bios for the original write to the origin. - */ - if (primary_pe && - atomic_dec_and_test(&primary_pe->ref_count)) { - origin_bios = bio_list_get(&primary_pe->origin_bios); - free_pending_exception(primary_pe); - } - - /* - * Free the pe if it's not linked to an origin write or if - * it's not itself a primary pe. - */ - if (!primary_pe || primary_pe != pe) - free_pending_exception(pe); - - return origin_bios; -} - static void pending_complete(struct dm_snap_pending_exception *pe, int success) { struct dm_exception *e; @@ -1129,7 +1096,8 @@ static void pending_complete(struct dm_s out: dm_remove_exception(&pe->e); snapshot_bios = bio_list_get(&pe->snapshot_bios); - origin_bios = put_pending_exception(pe); + origin_bios = bio_list_get(&pe->origin_bios); + free_pending_exception(pe); up_write(&s->lock); @@ -1139,7 +1107,7 @@ static void pending_complete(struct dm_s else flush_bios(snapshot_bios); - flush_bios(origin_bios); + retry_origin_bios(s, origin_bios); } static void commit_callback(void *context, int success) @@ -1226,8 +1194,6 @@ __find_pending_exception(struct dm_snaps pe->e.old_chunk = chunk; bio_list_init(&pe->origin_bios); bio_list_init(&pe->snapshot_bios); - pe->primary_pe = NULL; - atomic_set(&pe->ref_count, 0); pe->started = 0; if (s->store->type->prepare_exception(s->store, &pe->e)) { @@ -1235,7 +1201,6 @@ __find_pending_exception(struct dm_snaps return NULL; } - get_pending_exception(pe); dm_insert_exception(&s->pending, &pe->e); return pe; @@ -1492,12 +1457,11 @@ static int snapshot_iterate_devices(stru static int __origin_write(struct list_head *snapshots, sector_t sector, struct bio *bio) { - int r = DM_MAPIO_REMAPPED, first = 0; + int r = DM_MAPIO_REMAPPED; struct dm_snapshot *snap; struct dm_exception *e; - struct dm_snap_pending_exception *pe, *next_pe, *primary_pe = NULL; + struct dm_snap_pending_exception *pe, *pe_to_start = NULL; chunk_t chunk; - LIST_HEAD(pe_queue); /* Do all the snapshots on this origin */ list_for_each_entry (snap, snapshots, list) { @@ -1522,9 +1486,6 @@ static int __origin_write(struct list_he * Check exception table to see if block * is already remapped in this snapshot * and trigger an exception if not. - * - * ref_count is initialised to 1 so pending_complete() - * won't destroy the primary_pe while we're inside this loop. */ e = dm_lookup_exception(&snap->complete, chunk); if (e) @@ -1554,60 +1515,39 @@ static int __origin_write(struct list_he } } - if (!primary_pe) { - /* - * Either every pe here has same - * primary_pe or none has one yet. - */ - if (pe->primary_pe) - primary_pe = pe->primary_pe; - else { - primary_pe = pe; - first = 1; - } - - if (bio) - bio_list_add(&primary_pe->origin_bios, bio); - - r = DM_MAPIO_SUBMITTED; - } + r = DM_MAPIO_SUBMITTED; - if (!pe->primary_pe) { - pe->primary_pe = primary_pe; - get_pending_exception(primary_pe); + if (bio) { + bio_list_add(&pe->origin_bios, bio); + bio = NULL; + + if (!pe->started) { + pe->started = 1; + pe_to_start = pe; + } } if (!pe->started) { pe->started = 1; - list_add_tail(&pe->list, &pe_queue); + start_copy(pe); } next_snapshot: up_write(&snap->lock); } - if (!primary_pe) - return r; - /* - * If this is the first time we're processing this chunk and - * ref_count is now 1 it means all the pending exceptions - * got completed while we were in the loop above, so it falls to - * us here to remove the primary_pe and submit any origin_bios. + * pe_to_start is a small performance improvement: + * To avoid calling __origin_write N times for N snapshots, we start + * the snapshot where we queued the bio as the last one. + * + * If we start it as the last one, it finishes most likely as the last + * one and exceptions in other snapshots will be already finished when + * the bio will be retried. */ - if (first && atomic_dec_and_test(&primary_pe->ref_count)) { - flush_bios(bio_list_get(&primary_pe->origin_bios)); - free_pending_exception(primary_pe); - /* If we got here, pe_queue is necessarily empty. */ - return r; - } - - /* - * Now that we have a complete pe list we can start the copying. - */ - list_for_each_entry_safe(pe, next_pe, &pe_queue, list) - start_copy(pe); + if (pe_to_start) + start_copy(pe_to_start); return r; }