From: Mike Snitzer [AGK - untested alternative version avoiding storing handover flag] Permit in-use snapshot exception data to be 'handed over' from one snapshot instance to another. This is a pre-requisite for patches that allow the changes made in a snapshot device to be merged back into its origin device and also allows device resizing. The basic call sequence is: dmsetup load new_snapshot (referencing the existing in-use cow device) - the ctr code detects that the cow is already in use and links the two snapshot target instances together dmsetup suspend original_snapshot dmsetup resume new_snapshot - the new_snapshot becomes live, and if anything now tries to access the original one it will receive EIO dmsetup remove original_snapshot (There can only be two snapshot targets referencing the same cow device simultaneously.) Snapshot locking is such that: 0) snapshot that is passed to find_snapshot_using_cow() is not locked 1) only need handover-source lock to determine if handover is needed - handover-source lock is primary lock used in handover code paths - only need handover-destination lock before handover_exceptions() 2) handover-source lock is taken before handover-destination lock - but this is only ever needed before calling handover_exceptions() Signed-off-by: Mike Snitzer --- drivers/md/dm-snap.c | 242 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 204 insertions(+), 38 deletions(-) Index: linux-2.6.32-rc6/drivers/md/dm-snap.c =================================================================== --- linux-2.6.32-rc6.orig/drivers/md/dm-snap.c +++ linux-2.6.32-rc6/drivers/md/dm-snap.c @@ -303,35 +303,92 @@ static void __insert_origin(struct origi } /* + * Returns number of registered snapshots with same cow device. + * Returns 1: snap_src, NULL - normal snapshot + * Returns 2: snap_src, NULL - handed over, waiting for old to be deleted + * Returns 2: snap_src, snap_dest - waiting for handover + * Returns 1: NULL, snap_dest - source got destroyed before handover + * Returns 0: NULL, NULL - first new snapshot + */ +static int __find_snapshots_sharing_cow(struct dm_snapshot *snap, + struct dm_snapshot **snap_src, + struct dm_snapshot **snap_dest) +{ + struct dm_snapshot *s; + struct origin *o; + int count = 0; + int active; + + o = __lookup_origin(snap->origin->bdev); + if (!o) + goto out; + + list_for_each_entry(s, &o->snapshots, list) { + if (!bdev_equal(s->cow->bdev, snap->cow->bdev)) + continue; + + down_read(&s->lock); + active = s->active; + up_read(&s->lock); + + if (active) { + if (snap_src) + *snap_src = s; + } else if (snap_dest) + *snap_dest = s; + + count++; + } + +out: + return count; +} + +static int find_snapshots_sharing_cow(struct dm_snapshot *snap, + struct dm_snapshot **snap_src, + struct dm_snapshot **snap_dest) +{ + int count; + + down_read(&_origins_lock); + count = __find_snapshots_sharing_cow(snap, snap_src, snap_dest); + up_read(&_origins_lock); + + return count; +} + +/* * Make a note of the snapshot and its origin so we can look it * up when the origin has a write on it. + * + * Also validate snapshot exception store handovers. + * On success, returns 1 if this registration is a handover destination, + * otherwise returns 0. */ -static int register_snapshot(struct dm_snapshot *snap, - int origin_exists) +static int register_snapshot(struct dm_snapshot *snap) { - struct dm_snapshot *l; + struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; struct origin *o, *new_o = NULL; struct block_device *bdev = snap->origin->bdev; int r = 0; - if (!origin_exists) { - new_o = kmalloc(sizeof(*new_o), GFP_KERNEL); - if (!new_o) - return -ENOMEM; - } + new_o = kmalloc(sizeof(*new_o), GFP_KERNEL); + if (!new_o) + return -ENOMEM; down_write(&_origins_lock); - o = __lookup_origin(bdev); + /* Does snapshot need exceptions handed over to it? */ + if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest) == 2) || + snap_dest) { + r = -EINVAL; + goto out; + } + + o = __lookup_origin(bdev); if (o) kfree(new_o); else { - if (origin_exists) { - DMERR("register_snapshot failed to find origin."); - r = -EINVAL; - goto out; - } - /* New origin */ o = new_o; @@ -342,18 +399,40 @@ static int register_snapshot(struct dm_s __insert_origin(o); } - /* Sort the list according to chunk size, largest-first smallest-last */ - list_for_each_entry(l, &o->snapshots, list) - if (l->store->chunk_size < snap->store->chunk_size) - break; - list_add_tail(&snap->list, &l->list); + /* Position in list is irrelevant as there's no I/O yet. */ + list_add_tail(&snap->list, &o->snapshots); + if (snap_src) + r = 1; out: up_write(&_origins_lock); return r; } +/* + * Move snapshot to correct place in list according to chunk size. + */ +static void reregister_snapshot(struct dm_snapshot *s) +{ + struct dm_snapshot *l; + struct origin *o; + struct block_device *bdev = s->origin->bdev; + + down_write(&_origins_lock); + o = __lookup_origin(bdev); + + list_del(&s->list); + + /* Sort the list according to chunk size, largest-first smallest-last */ + list_for_each_entry(l, &o->snapshots, list) + if (l->store->chunk_size < s->store->chunk_size) + break; + list_add_tail(&s->list, &l->list); + + up_write(&_origins_lock); +} + static void unregister_snapshot(struct dm_snapshot *s) { struct origin *o; @@ -362,7 +441,7 @@ static void unregister_snapshot(struct d o = __lookup_origin(s->origin->bdev); list_del(&s->list); - if (list_empty(&o->snapshots)) { + if (o && list_empty(&o->snapshots)) { list_del(&o->hash_list); kfree(o); } @@ -672,6 +751,7 @@ static int snapshot_ctr(struct dm_target s->suspended = 0; atomic_set(&s->pending_exceptions_count, 0); init_rwsem(&s->lock); + INIT_LIST_HEAD(&s->list); spin_lock_init(&s->pe_lock); /* Allocate hash table for COW data */ @@ -706,7 +786,31 @@ static int snapshot_ctr(struct dm_target spin_lock_init(&s->tracked_chunk_lock); - /* Metadata must only be loaded into one table at once */ + bio_list_init(&s->queued_bios); + INIT_WORK(&s->queued_bios_work, flush_queued_bios); + + ti->private = s; + ti->num_flush_requests = 1; + + /* Add snapshot to the list of snapshots for this origin */ + /* Exceptions aren't triggered till snapshot_resume() is called */ + r = register_snapshot(s); + if (r == -ENOMEM) { + ti->error = "Snapshot origin struct allocation failed"; + goto bad_load_and_register; + } else if (r < 0) { + ti->error = "Snapshot cow pairing for exception table handover " + "failed"; + goto bad_load_and_register; + } + + /* + * Metadata must only be loaded into one table at once, so skip this + * if metadata will be handed over during resume. + */ + if (r > 0) + return 0; + r = s->store->type->read_metadata(s->store, dm_add_exception, (void *)s); if (r < 0) { @@ -717,25 +821,11 @@ static int snapshot_ctr(struct dm_target DMWARN("Snapshot is marked invalid."); } - bio_list_init(&s->queued_bios); - INIT_WORK(&s->queued_bios_work, flush_queued_bios); - if (!s->store->chunk_size) { ti->error = "Chunk size not set"; goto bad_load_and_register; } - - /* Add snapshot to the list of snapshots for this origin */ - /* Exceptions aren't triggered till snapshot_resume() is called */ - if (register_snapshot(s, 0)) { - r = -EINVAL; - ti->error = "Cannot register snapshot origin"; - goto bad_load_and_register; - } - - ti->private = s; ti->split_io = s->store->chunk_size; - ti->num_flush_requests = 1; return 0; @@ -777,15 +867,53 @@ static void __free_exceptions(struct dm_ dm_exception_table_exit(&s->complete, exception_cache); } +static void handover_exceptions(struct dm_snapshot *snap_src, + struct dm_snapshot *snap_dest) +{ + union { + struct dm_exception_table table_swap; + struct dm_exception_store *store_swap; + } u; + + /* swap exceptions tables and stores */ + u.table_swap = snap_dest->complete; + snap_dest->complete = snap_src->complete; + snap_src->complete = u.table_swap; + u.store_swap = snap_dest->store; + snap_dest->store = snap_src->store; + snap_src->store = u.store_swap; + + snap_dest->store->snap = snap_dest; + snap_src->store->snap = snap_src; + + /* reset split_io to store's chunk_size */ + if (snap_dest->ti->split_io != snap_dest->store->chunk_size) + snap_dest->ti->split_io = snap_dest->store->chunk_size; + + /* transfer 'valid' state, mark snap_src snapshot invalid */ + snap_dest->valid = snap_src->valid; + snap_src->valid = 0; +} + static void snapshot_dtr(struct dm_target *ti) { #ifdef CONFIG_DM_DEBUG int i; #endif struct dm_snapshot *s = ti->private; + struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; flush_workqueue(ksnapd); + /* Check whether exception handover must be cancelled */ + find_snapshots_sharing_cow(s, &snap_src, &snap_dest); + if (snap_src && snap_dest && (s == snap_src)) { + down_write(&snap_dest->lock); + snap_dest->valid = 0; + up_write(&snap_dest->lock); + DMERR("Cancelling snapshot handover."); + } + /* Prevent further origin writes from using this snapshot. */ /* After this returns there can be no new kcopyd jobs. */ unregister_snapshot(s); @@ -1198,14 +1326,51 @@ static void snapshot_postsuspend(struct up_write(&s->lock); } +static int snapshot_preresume(struct dm_target *ti) +{ + int r = 0; + struct dm_snapshot *s = ti->private; + struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; + + (void) find_snapshots_sharing_cow(s, &snap_src, &snap_dest); + if (snap_src && snap_dest) { + down_write(&snap_src->lock); + if (s == snap_src) { + DMERR("Unable to resume snapshot source until " + "handover complete."); + r = -EINVAL; + } else if (!snap_src->suspended) { + DMERR("Unable to perform snapshot handover until " + "source is suspended."); + r = -EINVAL; + } + up_write(&snap_src->lock); + } + + return r; +} + static void snapshot_resume(struct dm_target *ti) { struct dm_snapshot *s = ti->private; + struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; - down_write(&s->lock); + (void) find_snapshots_sharing_cow(s, &snap_src, &snap_dest); + if (snap_src && snap_dest) { + down_write_nested(&snap_src->lock, SINGLE_DEPTH_NESTING); + down_write(&snap_dest->lock); + handover_exceptions(snap_src, snap_dest); + up_write(&snap_dest->lock); + up_write(&snap_src->lock); + } + + /* Now we have correct chunk size, reregister */ + reregister_snapshot(snap_dest); + + down_write(&snap_dest->lock); s->active = 1; s->suspended = 0; - up_write(&s->lock); + up_write(&snap_dest->lock); } static int snapshot_status(struct dm_target *ti, status_type_t type, @@ -1518,6 +1683,7 @@ static struct target_type snapshot_targe .map = snapshot_map, .end_io = snapshot_end_io, .postsuspend = snapshot_postsuspend, + .preresume = snapshot_preresume, .resume = snapshot_resume, .status = snapshot_status, .iterate_devices = snapshot_iterate_devices,