New device-mapper target that can delay I/O (for testing). Reads can be separated from writes, redirected to different underlying devices and delayed by differing amounts of time. [Need to add Documentation/device-mapper file] [Global lock needs making per-device.] [Need a mempool per-device too.] Index: linux-2.6.18-rc7/drivers/md/Kconfig =================================================================== --- linux-2.6.18-rc7.orig/drivers/md/Kconfig 2006-10-13 17:10:13.000000000 +0100 +++ linux-2.6.18-rc7/drivers/md/Kconfig 2006-10-13 17:10:21.000000000 +0100 @@ -257,5 +257,14 @@ config DM_MULTIPATH_EMC ---help--- Multipath support for EMC CX/AX series hardware. +config DM_DELAY + tristate "I/O delaying target (EXPERIMENTAL)" + depends on BLK_DEV_DM && EXPERIMENTAL + ---help--- + A target that delays reads and/or writes and can send + them to different devices. Useful for testing. + + If unsure, say N. + endmenu Index: linux-2.6.18-rc7/drivers/md/Makefile =================================================================== --- linux-2.6.18-rc7.orig/drivers/md/Makefile 2006-10-10 23:53:40.000000000 +0100 +++ linux-2.6.18-rc7/drivers/md/Makefile 2006-10-13 17:10:21.000000000 +0100 @@ -31,6 +31,7 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o obj-$(CONFIG_BLK_DEV_MD) += md-mod.o obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o obj-$(CONFIG_DM_CRYPT) += dm-crypt.o +obj-$(CONFIG_DM_DELAY) += dm-delay.o obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o Index: linux-2.6.18-rc7/drivers/md/dm-delay.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.18-rc7/drivers/md/dm-delay.c 2006-10-13 17:10:21.000000000 +0100 @@ -0,0 +1,382 @@ +/* + * Copyright (C) 2005 Red Hat GmbH + * + * A target that delays reads and/or writes and can send + * them to different devices. + * + * This file is released under the GPL. + */ + +#include +#include +#include +#include +#include + +#include "dm.h" +#include "dm-bio-list.h" + +#define DM_MSG_PREFIX "delay" + +/* + * Split: maps a split range of a device + */ +struct delay_c { + struct timer_list delay_timer; + struct semaphore timer_lock; + struct work_struct flush_expired_bios; + struct list_head delayed_bios; + atomic_t may_delay; + + struct dm_dev *dev_read; + sector_t start_read; + unsigned read_delay; + unsigned reads; + + struct dm_dev *dev_write; + sector_t start_write; + unsigned write_delay; + unsigned writes; +}; + +struct delay_info { + struct delay_c *context; + struct list_head list; + struct bio *bio; + unsigned long expires; +}; + +static DEFINE_MUTEX(delayed_bios_lock); + +static struct workqueue_struct *kdelayd_wq; +static kmem_cache_t *delayed_cache; +static mempool_t *delayed_pool; + +static void handle_delayed_timer(unsigned long data) +{ + struct delay_c *dc = (struct delay_c *)data; + + queue_work(kdelayd_wq, &dc->flush_expired_bios); +} + +static void queue_timeout(struct delay_c *dc, unsigned long expires) +{ + down(&dc->timer_lock); + + if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires) + mod_timer(&dc->delay_timer, expires); + + up(&dc->timer_lock); +} + +static void flush_bios(struct bio *bio) +{ + struct bio *n; + + while (bio) { + n = bio->bi_next; + bio->bi_next = NULL; + generic_make_request(bio); + bio = n; + } +} + +static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all) +{ + struct delay_info *delayed, *next; + unsigned long next_expires = 0; + int start_timer = 0; + BIO_LIST_HEAD(flush_bios); + + mutex_lock(&delayed_bios_lock); + list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) { + if (flush_all || time_after_eq(jiffies, delayed->expires)) { + list_del(&delayed->list); + bio_list_add(&flush_bios, delayed->bio); + if ((bio_data_dir(delayed->bio) == WRITE)) + delayed->context->writes--; + else + delayed->context->reads--; + mempool_free(delayed, delayed_pool); + continue; + } + + if (!start_timer) { + start_timer = 1; + next_expires = delayed->expires; + } else + next_expires = min(next_expires, delayed->expires); + } + + mutex_unlock(&delayed_bios_lock); + + if (start_timer) + queue_timeout(dc, next_expires); + + return bio_list_get(&flush_bios); +} + +static void flush_expired_bios(void *data) +{ + struct delay_c *dc = data; + + flush_bios(flush_delayed_bios(dc, 0)); +} + +/* + * Mapping parameters: + * [ ] + * + * With separate write parameters, the first set is only used for reads. + * Delays are specified in milliseconds. + */ +static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) +{ + struct delay_c *dc; + + if (argc != 3 && argc != 6) { + ti->error = "requires exactly 3 or 6 arguments"; + return -EINVAL; + } + + dc = kmalloc(sizeof(*dc), GFP_KERNEL); + if (!dc) { + ti->error = "Cannot allocate context"; + return -ENOMEM; + } + + dc->reads = dc->writes = 0; + + if (sscanf(argv[1], "%llu", &dc->start_read) != 1) { + ti->error = "Invalid device sector"; + goto bad; + } + + if (sscanf(argv[2], "%u", &dc->read_delay) != 1) { + ti->error = "Invalid delay"; + goto bad; + } + + if (dm_get_device(ti, argv[0], dc->start_read, ti->len, + dm_table_get_mode(ti->table), &dc->dev_read)) { + ti->error = "Device lookup failed"; + goto bad; + } + + if (argc == 3) { + dc->dev_write = NULL; + goto out; + } + + if (sscanf(argv[4], "%llu", &dc->start_write) != 1) { + ti->error = "Invalid write device sector"; + goto bad; + } + + if (sscanf(argv[5], "%u", &dc->write_delay) != 1) { + ti->error = "Invalid write delay"; + goto bad; + } + + if (dm_get_device(ti, argv[3], dc->start_write, ti->len, + dm_table_get_mode(ti->table), &dc->dev_write)) { + ti->error = "Write device lookup failed"; + dm_put_device(ti, dc->dev_read); + goto bad; + } + + out: + init_timer(&dc->delay_timer); + dc->delay_timer.function = handle_delayed_timer; + dc->delay_timer.data = (unsigned long)dc; + + INIT_WORK(&dc->flush_expired_bios, flush_expired_bios, dc); + INIT_LIST_HEAD(&dc->delayed_bios); + init_MUTEX(&dc->timer_lock); + atomic_set(&dc->may_delay, 1); + + ti->private = dc; + return 0; + + bad: + kfree(dc); + return -EINVAL; +} + +static void delay_dtr(struct dm_target *ti) +{ + struct delay_c *dc = ti->private; + + flush_workqueue(kdelayd_wq); + + dm_put_device(ti, dc->dev_read); + + if (dc->dev_write) + dm_put_device(ti, dc->dev_write); + + kfree(dc); +} + +static int delay_bio(struct delay_c *dc, int delay, struct bio *bio) +{ + struct delay_info *delayed; + unsigned long expires = 0; + + if (!delay || !atomic_read(&dc->may_delay)) + return 1; + + delayed = mempool_alloc(delayed_pool, GFP_NOIO); + + delayed->context = dc; + delayed->bio = bio; + delayed->expires = expires = jiffies + (delay * HZ / 1000); + + mutex_lock(&delayed_bios_lock); + + if (bio_data_dir(bio) == WRITE) + dc->writes++; + else + dc->reads++; + + list_add_tail(&delayed->list, &dc->delayed_bios); + + mutex_unlock(&delayed_bios_lock); + + queue_timeout(dc, expires); + + return 0; +} + +static void delay_presuspend(struct dm_target *ti) +{ + struct delay_c *dc = ti->private; + + atomic_set(&dc->may_delay, 0); + del_timer_sync(&dc->delay_timer); + flush_bios(flush_delayed_bios(dc, 1)); +} + +static void delay_resume(struct dm_target *ti) +{ + struct delay_c *dc = ti->private; + + atomic_set(&dc->may_delay, 1); +} + +static int delay_map(struct dm_target *ti, struct bio *bio, + union map_info *map_context) +{ + struct delay_c *dc = ti->private; + + if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) { + bio->bi_bdev = dc->dev_write->bdev; + bio->bi_sector = dc->start_write + + (bio->bi_sector - ti->begin); + + return delay_bio(dc, dc->write_delay, bio); + } + + bio->bi_bdev = dc->dev_read->bdev; + bio->bi_sector = dc->start_read + + (bio->bi_sector - ti->begin); + + return delay_bio(dc, dc->read_delay, bio); +} + +static int delay_status(struct dm_target *ti, status_type_t type, + char *result, unsigned maxlen) +{ + struct delay_c *dc = ti->private; + int sz = 0; + + switch (type) { + case STATUSTYPE_INFO: + DMEMIT("%u %u", dc->reads, dc->writes); + break; + + case STATUSTYPE_TABLE: + DMEMIT("%s %llu %u", dc->dev_read->name, dc->start_read, + dc->read_delay); + if (dc->dev_write) + DMEMIT("%s %llu %u", dc->dev_write->name, dc->start_write, + dc->write_delay); + break; + } + + return 0; +} + +static struct target_type delay_target = { + .name = "delay", + .version = {1, 0, 2}, + .module = THIS_MODULE, + .ctr = delay_ctr, + .dtr = delay_dtr, + .map = delay_map, + .presuspend = delay_presuspend, + .resume = delay_resume, + .status = delay_status, +}; + +static int __init dm_delay_init(void) +{ + int r = -ENOMEM; + + kdelayd_wq = create_workqueue("kdelayd"); + if (!kdelayd_wq) { + DMERR("Couldn't start kdelayd"); + goto bad1; + } + + delayed_cache = kmem_cache_create("dm-delay", + sizeof(struct delay_info), + __alignof__(struct delay_info), + 0, NULL, NULL); + if (!delayed_cache) { + DMERR("Couldn't create delayed bio cache."); + goto bad2; + } + + delayed_pool = mempool_create_slab_pool(128, delayed_cache); + if (!delayed_pool) { + DMERR("Couldn't create delayed bio pool."); + goto bad3; + } + + r = dm_register_target(&delay_target); + if (r < 0) { + DMERR("register failed %d", r); + goto bad4; + } + + return 0; + + bad4: + mempool_destroy(delayed_pool); + bad3: + kmem_cache_destroy(delayed_cache); + bad2: + destroy_workqueue(kdelayd_wq); + bad1: + return r; +} + +static void __exit dm_delay_exit(void) +{ + int r = dm_unregister_target(&delay_target); + + if (r < 0) + DMERR("unregister failed %d", r); + + mempool_destroy(delayed_pool); + kmem_cache_destroy(delayed_cache); + destroy_workqueue(kdelayd_wq); +} + +/* Module hooks */ +module_init(dm_delay_init); +module_exit(dm_delay_exit); + +MODULE_DESCRIPTION(DM_NAME " delay target"); +MODULE_AUTHOR("Heinz Mauelshagen "); +MODULE_LICENSE("GPL"); Index: linux-2.6.18-rc7/drivers/md/dm-bio-list.h =================================================================== --- linux-2.6.18-rc7.orig/drivers/md/dm-bio-list.h 2006-10-13 17:10:19.000000000 +0100 +++ linux-2.6.18-rc7/drivers/md/dm-bio-list.h 2006-10-13 17:10:21.000000000 +0100 @@ -14,6 +14,9 @@ struct bio_list { struct bio *tail; }; +#define BIO_LIST_HEAD(bio_list_name) \ + struct bio_list bio_list_name = { .head = NULL, .tail = NULL } + static inline void bio_list_init(struct bio_list *bl) { bl->head = bl->tail = NULL;