From: Mikulas Patocka Merging is started when origin is resumed and it is stopped when origin is suspended or when the merging snapshot is destoyed. We don't need a separate thread, kcopyd does the job just fine (provided that we have a private kcopyd). Merging is not yet interlocked with writes, so there is a race condition with concurrent access. It will be fixed in further patches. Adds a supporting function to decrement consecutive chunk counter. Care is taken to increment the exception's old_chunk and new_chunk, prior to the dm_consecutive_chunk_count_dec() call, if the chunk is at the start of an exception's consecutive chunk range. This allows for snapshot-merge to support chunks that are added to the 'complete' exception hash table before existing chunks. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-exception-store.h | 11 +++ drivers/md/dm-exception-store.h | 11 ++ drivers/md/dm-snap.c | 193 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 198 insertions(+), 6 deletions(-) Index: linux-2.6.32/drivers/md/dm-exception-store.h =================================================================== --- linux-2.6.32.orig/drivers/md/dm-exception-store.h +++ linux-2.6.32/drivers/md/dm-exception-store.h @@ -154,6 +154,13 @@ static inline void dm_consecutive_chunk_ BUG_ON(!dm_consecutive_chunk_count(e)); } +static inline void dm_consecutive_chunk_count_dec(struct dm_exception *e) +{ + BUG_ON(!dm_consecutive_chunk_count(e)); + + e->new_chunk -= (1ULL << DM_CHUNK_NUMBER_BITS); +} + # else # define DM_CHUNK_CONSECUTIVE_BITS 0 @@ -171,6 +178,10 @@ static inline void dm_consecutive_chunk_ { } +static inline void dm_consecutive_chunk_count_dec(struct dm_exception *e) +{ +} + # endif /* Index: linux-2.6.32/drivers/md/dm-snap.c =================================================================== --- linux-2.6.32.orig/drivers/md/dm-snap.c +++ linux-2.6.32/drivers/md/dm-snap.c @@ -106,8 +106,18 @@ struct dm_snapshot { mempool_t *tracked_chunk_pool; spinlock_t tracked_chunk_lock; struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; + + /* Wait for events based on state bits */ + unsigned long bits; }; +/* Merge operation is in progress */ +#define MERGE_RUNNING 0 + +/* It is requested to shut down merging */ +/* Cleared back to 0 when the merging is stopped */ +#define SHUTDOWN_MERGE 1 + struct dm_dev *dm_snap_cow(struct dm_snapshot *s) { return s->cow; @@ -386,6 +396,13 @@ static int __validate_exception_handover return -EINVAL; } + if (!snap_src->store->type->prepare_merge || + !snap_src->store->type->commit_merge) { + snap->ti->error = "Snapshot exception store does not " + "support snapshot-merge."; + return -EINVAL; + } + return 1; } @@ -721,6 +738,136 @@ static int init_hash_tables(struct dm_sn return 0; } +static void merge_callback(int read_err, unsigned long write_err, + void *context); + +static void snapshot_merge_process(struct dm_snapshot *s) +{ + int r; + chunk_t old_chunk, new_chunk; + struct dm_exception *e; + struct dm_io_region src, dest; + + BUG_ON(!test_bit(MERGE_RUNNING, &s->bits)); + if (unlikely(test_bit(SHUTDOWN_MERGE, &s->bits))) + goto shut; + + if (!s->valid) { + DMERR("snapshot is invalid, can't merge"); + goto shut; + } + + r = s->store->type->prepare_merge(s->store, &old_chunk, &new_chunk); + if (r <= 0) { + if (r < 0) + DMERR("Read error in exception store, " + "shutting down merge"); + goto shut; + } + + /* TODO: use larger I/O size once we verify that kcopyd handles it */ + + /* !!! FIXME: intelock writes to this chunk */ + down_write(&s->lock); + e = dm_lookup_exception(&s->complete, old_chunk); + if (!e) { + DMERR("corruption detected: exception for block %llu is " + "on disk but not in memory", + (unsigned long long)old_chunk); + up_write(&s->lock); + goto shut; + } + if (dm_consecutive_chunk_count(e)) { + if (old_chunk == e->old_chunk) { + e->old_chunk++; + e->new_chunk++; + } else if (old_chunk != e->old_chunk + + dm_consecutive_chunk_count(e)) { + DMERR("attempt to merge block %llu from the " + "middle of a chunk range [%llu - %llu]", + (unsigned long long)old_chunk, + (unsigned long long)e->old_chunk, + (unsigned long long) + e->old_chunk + dm_consecutive_chunk_count(e)); + up_write(&s->lock); + goto shut; + } + dm_consecutive_chunk_count_dec(e); + } else { + dm_remove_exception(e); + free_completed_exception(e); + } + up_write(&s->lock); + + dest.bdev = s->origin->bdev; + dest.sector = chunk_to_sector(s->store, old_chunk); + dest.count = min((sector_t)s->store->chunk_size, + get_dev_size(dest.bdev) - dest.sector); + + src.bdev = s->cow->bdev; + src.sector = chunk_to_sector(s->store, new_chunk); + src.count = dest.count; + + dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s); + return; + +shut: + clear_bit_unlock(MERGE_RUNNING, &s->bits); + smp_mb__after_clear_bit(); + wake_up_bit(&s->bits, MERGE_RUNNING); +} + +static void merge_callback(int read_err, unsigned long write_err, void *context) +{ + int r; + struct dm_snapshot *s = context; + + if (read_err || write_err) { + if (read_err) + DMERR("Read error in data, shutting down merge"); + else + DMERR("Write error in data, shutting down merge"); + goto shut; + } + + r = s->store->type->commit_merge(s->store, 1); + if (r < 0) { + DMERR("Write error in exception store, shutting down merge"); + goto shut; + } + + snapshot_merge_process(s); + return; + +shut: + clear_bit_unlock(MERGE_RUNNING, &s->bits); + smp_mb__after_clear_bit(); + wake_up_bit(&s->bits, MERGE_RUNNING); +} + +static void start_merge(struct dm_snapshot *s) +{ + if (!test_and_set_bit(MERGE_RUNNING, &s->bits)) + snapshot_merge_process(s); +} + +static int wait_schedule(void *ptr) +{ + schedule(); + return 0; +} + +/* + * Stop the merging process and wait until it finishes. + */ +static void stop_merge(struct dm_snapshot *s) +{ + set_bit(SHUTDOWN_MERGE, &s->bits); + wait_on_bit(&s->bits, MERGE_RUNNING, wait_schedule, + TASK_UNINTERRUPTIBLE); + clear_bit(SHUTDOWN_MERGE, &s->bits); +} + /* * Construct a snapshot mapping:

*/ @@ -791,6 +938,7 @@ static int snapshot_ctr(struct dm_target init_rwsem(&s->lock); INIT_LIST_HEAD(&s->list); spin_lock_init(&s->pe_lock); + s->bits = 0; /* Allocate hash table for COW data */ if (init_hash_tables(s)) { @@ -963,6 +1111,9 @@ static void snapshot_dtr(struct dm_targe } up_read(&_origins_lock); + if (dm_target_is_snapshot_merge(ti)) + stop_merge(s); + /* Prevent further origin writes from using this snapshot. */ /* After this returns there can be no new kcopyd jobs. */ unregister_snapshot(s); @@ -1404,6 +1555,13 @@ static int snapshot_end_io(struct dm_tar return 0; } +static void snapshot_merge_presuspend(struct dm_target *ti) +{ + struct dm_snapshot *s = ti->private; + + stop_merge(s); +} + static void snapshot_postsuspend(struct dm_target *ti) { struct dm_snapshot *s = ti->private; @@ -1464,6 +1622,32 @@ static void snapshot_resume(struct dm_ta up_write(&s->lock); } +static chunk_t get_origin_minimum_chunksize(struct block_device *bdev) +{ + chunk_t min_chunksize; + + down_read(&_origins_lock); + + min_chunksize = __minimum_chunk_size(__lookup_origin(bdev)); + + up_read(&_origins_lock); + + return min_chunksize; +} + +static void snapshot_merge_resume(struct dm_target *ti) +{ + struct dm_snapshot *s = ti->private; + + snapshot_resume(ti); + /* + * snapshot-merge can take on the role of the origin too + * - must adjust snapshot-merge's ti->split_io accordingly + */ + ti->split_io = get_origin_minimum_chunksize(s->origin->bdev); + start_merge(s); +} + static int snapshot_status(struct dm_target *ti, status_type_t type, char *result, unsigned int maxlen) { @@ -1722,11 +1906,7 @@ static void origin_resume(struct dm_targ { struct dm_dev *dev = ti->private; - down_read(&_origins_lock); - - ti->split_io = __minimum_chunk_size(__lookup_origin(dev->bdev)); - - up_read(&_origins_lock); + ti->split_io = get_origin_minimum_chunksize(dev->bdev); } static int origin_status(struct dm_target *ti, status_type_t type, char *result, @@ -1790,9 +1970,10 @@ static struct target_type merge_target = .dtr = snapshot_dtr, .map = snapshot_merge_map, .end_io = snapshot_end_io, + .presuspend = snapshot_merge_presuspend, .postsuspend = snapshot_postsuspend, .preresume = snapshot_preresume, - .resume = snapshot_resume, + .resume = snapshot_merge_resume, .status = snapshot_status, .iterate_devices = snapshot_iterate_devices, };