From: Jonathan Brassow This patch gives mirror the ability to handle write failures during recovery. When kcopyd finishes resynchronizing a mirror region, it calls recovery_complete() with the results - which are currently ignored. This patch checks over the bits in 'write_err' and calls a new function, fail_mirror, on those devices whose bit is set. 'fail_mirror' increments the error_count on the mirror device, and will switch the primary device pointer for the mirror set if the mirror is in-sync. To maintain backwards compatibility, fail_mirror does nothing if the DM_FEATURES_HANDLE_ERRORS flag is not present. Signed-off-by: Jonathan Brassow --- drivers/md/dm-raid1.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 files changed, 72 insertions(+), 3 deletions(-) Index: linux/drivers/md/dm-raid1.c =================================================================== --- linux.orig/drivers/md/dm-raid1.c 2007-07-12 17:04:07.000000000 +0100 +++ linux/drivers/md/dm-raid1.c 2007-07-12 17:04:14.000000000 +0100 @@ -114,6 +114,7 @@ struct region { *---------------------------------------------------------------*/ struct mirror { atomic_t error_count; + struct mirror_set *ms; struct dm_dev *dev; sector_t offset; }; @@ -644,6 +645,53 @@ static void bio_set_ms(struct bio *bio, bio->bi_next = (struct bio *) ms; } +/* fail_mirror + * @m: mirror device to fail + * + * If the device is valid, mark it invalid. Also, + * if this is the default mirror device (i.e. the primary + * device) and the mirror set is in-sync, choose an + * alternative primary device. + * + * This function must not block + */ +static void fail_mirror(struct mirror *m) +{ + struct mirror_set *ms = m->ms; + struct mirror *new; + + /* Are we handling or ignoring device failures */ + if (!errors_handled(ms)) + return; + + if (atomic_inc_return(&m->error_count) > 1) + return; + + if (m != ms->default_mirror) + return; + + /* If the default mirror fails, change it. */ + if (!ms->in_sync) { + /* + * Cannot switch primary. Better to issue requests + * to same failing device than to risk returning + * corrupt data. + */ + DMERR("Primary mirror (%s) failed while out-of-sync: " + "Reads may fail.", m->dev->name); + return; + } + + for (new = ms->mirror; new < ms->mirror + ms->nr_mirrors; new++) + if (!atomic_read(&new->error_count)) { + ms->default_mirror = new; + break; + } + + if (unlikely(new == ms->mirror + ms->nr_mirrors)) + DMWARN("All sides of mirror have failed."); +} + /*----------------------------------------------------------------- * Recovery. * @@ -655,15 +703,34 @@ static void recovery_complete(int read_e void *context) { struct region *reg = (struct region *) context; + struct mirror_set *ms = reg->rh->ms; + unsigned long write_err_ulong = (unsigned long)write_err; + int m, bit = 0; if (read_err) /* Read error means the failure of default mirror. */ DMERR_LIMIT("Unable to read primary mirror during recovery"); - if (write_err) - DMERR_LIMIT("Write error during recovery (error = 0x%x)", - write_err); + if (!write_err) + goto out; + + DMERR_LIMIT("Write error during recovery (error = 0x%x)", + write_err); + + /* + * Bits correspond to devices (excluding default mirror). + * The default mirror cannot change during recovery. + */ + for (m = 0; m < ms->nr_mirrors; m++) { + if (&ms->mirror[m] == ms->default_mirror) + continue; + + if (test_bit(bit, &write_err_ulong)) + fail_mirror(ms->mirror + m); + bit++; + } + out: rh_recovery_end(reg, !(read_err || write_err)); } @@ -1017,6 +1084,8 @@ static int get_mirror(struct mirror_set } ms->mirror[mirror].offset = offset; + atomic_set(&(ms->mirror[mirror].error_count), 0); + ms->mirror[mirror].ms = ms; return 0; }