From: Jonathan Brassow This patch gives mirror the ability to handle write failures during recovery. When kcopyd finishes resynchronizing a mirror region, it calls recovery_complete() with the results - which are currently ignored. This patch checks over the bits in 'write_err' and calls a new function, fail_mirror, on those devices whose bit is set. 'fail_mirror' increments the as-yet-unused error_count on the mirror device, and will switch the primary device pointer for the mirror set if the mirror is in-sync. To maintain backwards compatibility, fail_mirror does nothing if the DM_FEATURES_HANDLE_ERRORS flag is not present. [AGK Bug? Does default_mirror need protecting with a lock (or atomic)?] [AGK Patch incomplete? This patch introduces state information that must be exported to userspace: default_mirror and error_count.] [AGK Patch incomplete? fail_mirror should trigger an event to notify userspace - share code with dm-mpath trigger_event perhaps?] Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid1.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 69 insertions(+), 5 deletions(-) Index: linux-2.6.24-rc1/drivers/md/dm-raid1.c =================================================================== --- linux-2.6.24-rc1.orig/drivers/md/dm-raid1.c 2007-11-05 11:45:55.000000000 +0000 +++ linux-2.6.24-rc1/drivers/md/dm-raid1.c 2007-11-05 11:49:02.000000000 +0000 @@ -646,6 +646,51 @@ static void bio_set_ms(struct bio *bio, bio->bi_next = (struct bio *) ms; } +/* fail_mirror + * @m: mirror device to fail + * + * If the device is valid, mark it invalid. Also, + * if this is the default mirror device (i.e. the primary + * device) and the mirror set is in-sync, choose an + * alternative primary device. + * + * This function must not block. + */ +static void fail_mirror(struct mirror *m) +{ + struct mirror_set *ms = m->ms; + struct mirror *new; + + if (!errors_handled(ms)) + return; + + if (atomic_inc_return(&m->error_count) > 1) + return; + + if (m != ms->default_mirror) + return; + + /* Change default mirror provided it is fully in-sync. */ + if (!ms->in_sync) { + /* + * Better to issue requests to same failing device + * than to risk returning corrupt data. + */ + DMERR("Primary mirror (%s) failed while out-of-sync: " + "Reads may fail.", m->dev->name); + return; + } + + for (new = ms->mirror; new < ms->mirror + ms->nr_mirrors; new++) + if (!atomic_read(&new->error_count)) { + ms->default_mirror = new; + break; + } + + if (unlikely(new == ms->mirror + ms->nr_mirrors)) + DMWARN("All sides of mirror have failed."); +} + /*----------------------------------------------------------------- * Recovery. * @@ -656,16 +701,34 @@ static void bio_set_ms(struct bio *bio, static void recovery_complete(int read_err, unsigned int write_err, void *context) { - struct region *reg = (struct region *) context; + struct region *reg = (struct region *)context; + struct mirror_set *ms = reg->rh->ms; + unsigned long write_err_ulong = (unsigned long)write_err; + unsigned m; + int bit = 0; if (read_err) - /* Read error means the failure of default mirror. */ DMERR_LIMIT("Unable to read primary mirror during recovery"); - if (write_err) - DMERR_LIMIT("Write error during recovery (error = 0x%x)", - write_err); + if (!write_err) + goto out; + + DMERR_LIMIT("Write error during recovery: 0x%x", write_err); + + /* + * Bits correspond to devices excluding default mirror. + * The default mirror cannot change during recovery. + */ + for (m = 0; m < ms->nr_mirrors; m++) { + if (&ms->mirror[m] == ms->default_mirror) + continue; + + if (test_bit(bit, &write_err_ulong)) + fail_mirror(ms->mirror + m); + bit++; + } + out: rh_recovery_end(reg, !(read_err || write_err)); } @@ -1019,6 +1082,7 @@ static int get_mirror(struct mirror_set } ms->mirror[mirror].ms = ms; + atomic_set(&(ms->mirror[mirror].error_count), 0); ms->mirror[mirror].offset = offset; return 0;