From: Milan Broz Change device-mapper never to split bios into more than two pieces in one go. The first part is mapped and sent for processing to lower layers. Any remaining part is returned to the block layer through generic_make_request() and queued for later processing. (This patch solves some of the problems introduced by md-dm-reduce-stack-usage-with-stacked-block-devices.patch.) Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 87 ++++++++++++++++++++++++++++++++++---------------------- 1 files changed, 54 insertions(+), 33 deletions(-) Index: current-quilt/drivers/md/dm.c =================================================================== --- current-quilt.orig/drivers/md/dm.c 2007-07-25 21:04:51.000000000 +0100 +++ current-quilt/drivers/md/dm.c 2007-07-25 21:04:52.000000000 +0100 @@ -539,6 +539,18 @@ static int clone_endio(struct bio *bio, return r; } +static int clone2_endio(struct bio *bio, unsigned int done, int error) +{ + if (bio->bi_size) + return 1; + + if (!bio_flagged(bio, BIO_UPTODATE) && !error) + error = -EIO; + + bio_put(bio); + return error; +} + static sector_t max_io_len(struct mapped_device *md, sector_t sector, struct dm_target *ti) { @@ -669,9 +681,23 @@ static struct bio *clone_bio(struct bio return clone; } +static struct bio *clone2_bio(struct bio *bio, sector_t sector, + unsigned short idx, unsigned int len, + struct mapped_device *md) +{ + struct bio *clone; + + clone = clone_bio(bio, sector, idx, bio->bi_vcnt - idx, len, md->bs); + clone->bi_end_io = clone2_endio; + clone->bi_private = md; + + return clone; +} + static void __clone_and_map(struct clone_info *ci) { struct bio *clone, *bio = ci->bio; + struct bio *clone2 = NULL; struct dm_target *ti = dm_table_find_target(ci->map, ci->sector); sector_t len = 0, max = max_io_len(ci->md, ci->sector, ti); struct dm_target_io *tio; @@ -684,16 +710,19 @@ static void __clone_and_map(struct clone tio->ti = ti; memset(&tio->info, 0, sizeof(tio->info)); + /* Merge page function should prevent split and in ideal + * situation do not allow splitting at all. + * Only very inefficient mapping should cause split to more + * than 2 pieces, no need to extra optimize this case. + */ if (ci->sector_count <= max) { /* * Optimise for the simple case where we can do all of * the remaining io with a single clone. */ clone = clone_bio(bio, ci->sector, ci->idx, - bio->bi_vcnt - ci->idx, ci->sector_count, - ci->md->bs); - __map_bio(ti, clone, tio); - ci->sector_count = 0; + bio->bi_vcnt - ci->idx, + ci->sector_count, ci->md->bs); } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { /* @@ -716,46 +745,39 @@ static void __clone_and_map(struct clone clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len, ci->md->bs); - __map_bio(ti, clone, tio); ci->sector += len; ci->sector_count -= len; ci->idx = i; + /* Clone second part of bio */ + clone2 = clone2_bio(bio, ci->sector, ci->idx, ci->sector_count, ci->md); } else { /* * Handle a bvec that must be split between two or more targets. */ struct bio_vec *bv = bio->bi_io_vec + ci->idx; - sector_t remaining = to_sector(bv->bv_len); - unsigned int offset = 0; - do { - if (offset) { - ti = dm_table_find_target(ci->map, ci->sector); - max = max_io_len(ci->md, ci->sector, ti); - - tio = alloc_tio(ci->md); - tio->io = ci->io; - tio->ti = ti; - memset(&tio->info, 0, sizeof(tio->info)); - } - - len = min(remaining, max); - - clone = split_bvec(bio, ci->sector, ci->idx, - bv->bv_offset + offset, len, - ci->md->bs); - - __map_bio(ti, clone, tio); - - ci->sector += len; - ci->sector_count -= len; - offset += to_bytes(len); - } while (remaining -= len); + clone = split_bvec(bio, ci->sector, ci->idx, bv->bv_offset, max, + ci->md->bs); - ci->idx++; + ci->sector += max; + ci->sector_count -= max; + + /* Clone second part of bio */ + clone2 = clone2_bio(bio, ci->sector, ci->idx, ci->sector_count, ci->md); + bv = clone2->bi_io_vec + clone2->bi_idx; + bv->bv_len -= to_bytes(max); + bv->bv_offset += to_bytes(max); } + + /* + * Fire off both parts of bio, the first will be remmaped and + * the second is queued for new dm_request to the same device. + */ + __map_bio(ti, clone, tio); + if (clone2) + generic_make_request(clone2); } /* @@ -781,8 +803,7 @@ static int __split_bio(struct mapped_dev ci.idx = bio->bi_idx; start_io_acct(ci.io); - while (ci.sector_count) - __clone_and_map(&ci); + __clone_and_map(&ci); /* drop the extra reference count */ dec_pending(ci.io, 0);