From fa9fcceea882c522d1429036aabedffd5929b964 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sun, 10 Sep 2006 21:54:55 +0900 Subject: [PATCH] Remove in-kernel cache mechanism in dm-userspace Signed-off-by: FUJITA Tomonori --- drivers/md/dm-user.h | 142 +----- drivers/md/dm-userspace-chardev.c | 854 +++++++--------------------------- drivers/md/dm-userspace.c | 933 ++++++------------------------------- include/linux/dm-userspace.h | 160 ++---- 4 files changed, 396 insertions(+), 1693 deletions(-) diff --git a/drivers/md/dm-user.h b/drivers/md/dm-user.h index a6d7114..890e36a 100644 --- a/drivers/md/dm-user.h +++ b/drivers/md/dm-user.h @@ -24,23 +24,10 @@ #include #define DMU_KEY_LEN 256 -extern struct target_type userspace_target; -extern mempool_t *request_pool; -extern dev_t dmu_dev; extern spinlock_t devices_lock; extern struct list_head devices; /* - * A hash table of remaps - */ -struct hash_table { - struct list_head *table; /* Array of lists (buckets) */ - uint64_t size; /* Number of buckets */ - uint32_t mask; /* Mask used to determine bucket */ - uint64_t count; /* Number of remaps in entire table */ -}; - -/* * A block device that we can send bios to */ struct target_device { @@ -60,138 +47,41 @@ struct dmu_device { struct list_head requests; /* List of pending requests */ struct list_head target_devs; /* List of devices we can target */ - struct hash_table remaps; /* Hash table of all our maps */ void *transport_private; /* Private data for userspace comms */ char key[DMU_KEY_LEN]; /* Unique name string for device */ struct kref users; /* Self-destructing reference count */ - wait_queue_head_t wqueue; /* To block while waiting for reqs */ - uint64_t block_size; /* Block size for this device */ uint64_t block_mask; /* Mask for offset in block */ unsigned int block_shift; /* Shift to convert to/from block */ struct kcopyd_client *kcopy; /* Interface to kcopyd */ - - uint32_t id_counter; /* Used to generate request IDs */ }; -struct userspace_request { - struct list_head list; /* Our place on the request queue */ - - spinlock_t lock; /* Protects all the fields below */ - - struct dmu_device *dev; /* The DMU device that owns us */ - - int type; /* Type of request */ - int sent; /* Non-zero if we've been sent */ - uint32_t flags; /* Attribute flags */ - uint32_t id; /* Unique ID for sync with userspace */ - union { - uint64_t block; /* The block in question */ - } u; - atomic_t refcnt; /* Reference count */ - - struct dmu_map *remap; /* The remap we represent */ -}; - -struct dmu_map { +struct dmu_request { struct list_head list; /* Our place in a remap bucket chain */ - struct list_head mru_list; /* Our place on the MRU list */ - - spinlock_t lock; /* Protects all the fields below */ - - uint64_t org_block; /* Original block */ - uint64_t new_block; /* Destination block */ - int64_t offset; /* Sectors to offset remapped block */ - uint32_t flags; /* Attribute flags */ - uint32_t id; /* Unique ID for sync with userspace */ - - struct target_device *src; /* Source blkdev for COPY_FIRST */ - struct target_device *dest; /* Where the remapped block is */ - - struct bio_list bios; /* Bios queued for remapping */ - struct bio_list bios_waiting; /* Bios waiting for endio sync */ - struct dmu_device *dev; /* The DMU device that owns us */ - struct dmu_map *next; /* Next remap that depends on us */ - - struct work_struct endio_task;/* Work to be done on bio endios */ + struct bio *bio; + u32 flags; }; -/* Find and grab a reference to a target device */ -struct target_device *find_target(struct dmu_device *dev, - dev_t devno); - -/* Object allocation, destruction, and initialization routines */ -void init_remap(struct dmu_device *dev, struct dmu_map *remap); -void init_request(struct dmu_device *dev, - int type, - struct userspace_request *req); -void free_remap(struct dmu_map *remap); -void __free_remap(struct dmu_map *remap); -struct dmu_map *alloc_remap_atomic(struct dmu_device *dev); - -/* Hash table manipulation */ -struct dmu_map *ht_find_map(struct hash_table *ht, uint64_t block); -void ht_insert_map(struct hash_table *ht, struct dmu_map *map); -struct dmu_map *ht_find_map_dev(struct dmu_device *dev, uint64_t block); -void ht_delete_map(struct hash_table *ht, struct dmu_map *map); +extern void dmu_map_done(struct dmu_device *dev, u64 id, uint32_t flags, + uint32_t src_maj, uint32_t src_min, + uint32_t dst_maj, uint32_t dst_min, + u64 block, u64 offset); /* Character device transport functions */ -int register_chardev_transport(struct dmu_device *dev); -void unregister_chardev_transport(struct dmu_device *dev); -int init_chardev_transport(void); -void cleanup_chardev_transport(void); -void write_chardev_transport_info(struct dmu_device *dev, - char *buf, unsigned int maxlen); - -/* Return the block number for @sector */ -static inline u64 dmu_block(struct dmu_device *dev, - sector_t sector) -{ - return sector >> dev->block_shift; -} - -/* Return the sector offset in a block for @sector */ -static inline u64 dmu_sector_offset(struct dmu_device *dev, - sector_t sector) -{ - return sector & dev->block_mask; -} - -/* Return the starting sector for @block */ -static inline u64 dmu_sector(struct dmu_device *dev, - uint64_t block) -{ - return block << dev->block_shift; -} - -/* Add a request to a device's request queue */ -static void add_request(struct dmu_device *dev, - struct userspace_request *req) -{ - spin_lock(&dev->lock); - list_add_tail(&req->list, &dev->requests); - spin_unlock(&dev->lock); - - wake_up(&dev->wqueue); -} +extern int register_chardev_transport(struct dmu_device *dev); +extern void unregister_chardev_transport(struct dmu_device *dev); +extern int init_chardev_transport(void); +extern void cleanup_chardev_transport(void); +extern void write_chardev_transport_info(struct dmu_device *dev, + char *buf, unsigned int maxlen); -/* Remap @bio based on the information in @remap */ -static void __bio_remap(struct bio *bio, - struct dmu_map *remap) -{ - BUG_ON(remap->dest == NULL); - - bio->bi_sector = dmu_sector(remap->dev, remap->new_block) + - dmu_sector_offset(remap->dev, bio->bi_sector) + - remap->offset; - - bio->bi_bdev = remap->dest->bdev; -} +extern int dmu_uspace_send_map_req(struct dmu_device *, u64, u32, u64); +extern int dmu_uspace_send_map_status(struct dmu_device *, u64, u32); /* Increase the usage count for @dev */ static inline void get_dev(struct dmu_device *dev) @@ -199,8 +89,8 @@ static inline void get_dev(struct dmu_de kref_get(&dev->users); } +extern void destroy_dmu_device(struct kref *ref); /* Decrease the usage count for @dev */ -void destroy_dmu_device(struct kref *ref); static inline void put_dev(struct dmu_device *dev) { kref_put(&dev->users, destroy_dmu_device); diff --git a/drivers/md/dm-userspace-chardev.c b/drivers/md/dm-userspace-chardev.c index 752fee9..5a4b0d3 100644 --- a/drivers/md/dm-userspace-chardev.c +++ b/drivers/md/dm-userspace-chardev.c @@ -2,6 +2,8 @@ * Copyright (C) International Business Machines Corp., 2006 * Author: Dan Smith * + * Copyright (C) 2006 FUJITA Tomonori + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; under version 2 of the License. @@ -36,775 +38,281 @@ #include "dm-user.h" #define DM_MSG_PREFIX "dm-userspace" +static dev_t dmu_dev; + /* This allows for a cleaner separation between the dm-userspace * device-mapper target, and the userspace transport used. Right now, * only a chardev transport exists, but it's possible that there could * be more in the future */ +struct dmu_ring { + u32 r_idx; + unsigned long r_pages[DMU_RING_PAGES]; + spinlock_t r_lock; +}; + struct chardev_transport { struct cdev cdev; dev_t ctl_dev; struct dmu_device *parent; -}; - -static void remap_flusher(struct dmu_map *remap); -static int have_pending_requests(struct dmu_device *dev) -{ - struct userspace_request *req; - int ret = 0; - - /* FIXME: We could keep a count of how many waiting reqs - * there are, eliminating the need to count, and possibly the - * need to lock - */ - - spin_lock(&dev->lock); - - list_for_each_entry(req, &dev->requests, list) { - if (!req->sent) { - ret = 1; - break; - } - } - - spin_unlock(&dev->lock); - - return ret; -} + struct dmu_ring tx; + struct dmu_ring rx; + wait_queue_head_t tx_poll_wait; +}; -static void copy_callback(int read_err, - unsigned int write_err, - void *data) +static inline void dmu_ring_idx_inc(struct dmu_ring *r) { - remap_flusher((struct dmu_map *)data); + if (r->r_idx == DMU_MAX_EVENTS - 1) + r->r_idx = 0; + else + r->r_idx++; } -static void copy_block(struct dmu_map *remap) +static struct dmu_event *dmu_head_event(struct dmu_ring *r, u32 idx) { - struct io_region src, dst; - struct kcopyd_client *client; - unsigned long flags; - - spin_lock_irqsave(&remap->lock, flags); + u32 pidx, off; - src.bdev = remap->src->bdev; - src.sector = remap->org_block << remap->dev->block_shift; - src.count = remap->dev->block_size; + pidx = idx / DMU_EVENT_PER_PAGE; + off = idx % DMU_EVENT_PER_PAGE; - dst.bdev = remap->dest->bdev; - dst.sector = (remap->new_block << remap->dev->block_shift); - dst.sector += remap->offset; - dst.count = remap->dev->block_size; - - client = remap->dev->kcopy; - - spin_unlock_irqrestore(&remap->lock, flags); - - kcopyd_copy(client, &src, 1, &dst, 0, copy_callback, remap); + return (struct dmu_event *) + (r->r_pages[pidx] + sizeof(struct dmu_event) * off); } -static void copy_or_flush(struct dmu_map *remap) +static int dmu_uspace_send_event(struct dmu_device *dev, u32 type, + struct dmu_event *p) { - int copy; - unsigned long flags; + struct chardev_transport *t = dev->transport_private; + struct dmu_event *ev; + struct dmu_ring *ring = &t->tx; + int err = 0; - spin_lock_irqsave(&remap->lock, flags); - copy = dmu_get_flag(&remap->flags, DMU_FLAG_COPY_FIRST); - spin_unlock_irqrestore(&remap->lock, flags); + spin_lock(&ring->r_lock); - if (copy) - copy_block(remap); + ev = dmu_head_event(ring, ring->r_idx); + if (!ev->status) + dmu_ring_idx_inc(ring); else - remap_flusher(remap); -} + err = -EBUSY; -static struct bio *pop_and_remap(struct dmu_map *remap) -{ - struct bio *bio = NULL; - unsigned long flags; - - spin_lock_irqsave(&remap->lock, flags); - - bio = bio_list_pop(&remap->bios); - if (bio) - __bio_remap(bio, remap); - else { - /* If there are no more bios, we must set the VALID - * flag before we release the lock - */ - dmu_set_flag(&remap->flags, DMU_FLAG_VALID); - } - - spin_unlock_irqrestore(&remap->lock, flags); + spin_unlock(&ring->r_lock); - return bio; -} + if (err) { + DMERR("Fail to send uspace %u\n", type); + return err; + } -static void get_remap_attrs(struct dmu_map *remap, - int *temporary, - struct dmu_map **next) -{ - unsigned long flags; + memcpy(ev, p, sizeof(*ev)); + ev->type = type; + ev->status = 1; + mb(); - spin_lock_irqsave(&remap->lock, flags); + flush_dcache_page(virt_to_page(ev)); - *temporary = dmu_get_flag(&remap->flags, DMU_FLAG_TEMPORARY); - *next = remap->next; - remap->next = NULL; + wake_up_interruptible(&t->tx_poll_wait); - spin_unlock_irqrestore(&remap->lock, flags); + return 0; } -static void remap_flusher(struct dmu_map *remap) +int dmu_uspace_send_map_req(struct dmu_device *dev, u64 id, u32 flags, u64 block) { - struct bio *bio; - int temporary = 0; - struct dmu_map *next; - - while (1) { - - bio = pop_and_remap(remap); - - if (bio) - generic_make_request(bio); - else - break; - } + struct dmu_event ev; - get_remap_attrs(remap, &temporary, &next); - - if (next) - copy_or_flush(next); - - if (temporary) { - free_remap(remap); - } + ev.k.map_req.id = id; + ev.k.map_req.flags = flags; + ev.k.map_req.block = block; + return dmu_uspace_send_event(dev, DM_USERSPACE_MAP_BLOCK_REQ, &ev); } -static int send_userspace_message(uint8_t __user *buffer, - struct userspace_request *req) +int dmu_uspace_send_map_status(struct dmu_device *dev, u64 id, u32 status) { - int ret = 0; - struct dmu_msg_header hdr; - union { - struct dmu_msg_map_request map_req; - struct dmu_msg_status status_req; - struct dmu_msg_version ver_req; - } msgs; - - memset(&msgs, 0, sizeof(msgs)); - spin_lock(&req->lock); - - hdr.id = req->id; - - switch (req->type) { - case DM_USERSPACE_GET_VERSION: - hdr.msg_type = req->type; - hdr.payload_len = sizeof(msgs.ver_req); - msgs.ver_req.kernel_ver = - userspace_target.version[0] << 16 | - userspace_target.version[1] << 8 | - userspace_target.version[2]; - - break; - - case DM_USERSPACE_MAP_BLOCK_REQ: - hdr.msg_type = req->type; - hdr.payload_len = sizeof(msgs.map_req); - msgs.map_req.org_block = - dmu_block(req->dev, req->remap->bios.head->bi_sector); - dmu_cpy_flag(&msgs.map_req.flags, req->flags, DMU_FLAG_RD); - dmu_cpy_flag(&msgs.map_req.flags, req->flags, DMU_FLAG_WR); + struct dmu_event ev; - break; - - case DM_USERSPACE_SYNC_COMPLETE: - case DM_USERSPACE_INVAL_COMPLETE: - case DM_USERSPACE_INVAL_FAILED: - hdr.msg_type = DM_USERSPACE_STATUS; - hdr.payload_len = sizeof(msgs.status_req); - msgs.status_req.status = req->type; - msgs.status_req.id_of_op = req->id; - - break; - - default: - DMWARN("Unknown message type %i", req->type); - ret = 0; - } - - spin_unlock(&req->lock); - - if (copy_to_user(buffer, &hdr, sizeof(hdr))) - return -EFAULT; - if (copy_to_user(buffer + sizeof(hdr), &msgs, hdr.payload_len)) - return -EFAULT; - - ret = sizeof(hdr) + hdr.payload_len; - - if ((req->type != DM_USERSPACE_MAP_BLOCK_REQ) && - (req->type != DM_USERSPACE_SYNC_COMPLETE)) { - /* Only some requests get responses, so we take others - * off the request queue here - */ - spin_lock(&req->dev->lock); - list_del(&req->list); - spin_unlock(&req->dev->lock); - mempool_free(req, request_pool); - } - - return ret; + ev.k.map_done.id = id; + ev.k.map_done.status = status; + return dmu_uspace_send_event(dev, DM_USERSPACE_MAP_BLOCK_DONE, &ev); } -struct userspace_request *pluck_next_request(struct dmu_device *dev, - int size_available) +static void dmu_event_recv(struct dmu_device *dev, struct dmu_event *ev) { - struct userspace_request *req, *match = NULL; - - spin_lock(&dev->lock); - - list_for_each_entry(req, &dev->requests, list) { - spin_lock(&req->lock); - if (!req->sent) { - if (dmu_get_msg_len(req->type) < size_available) { - req->sent = 1; - match = req; - } else { - /* Must break here to preserve order */ - spin_unlock(&req->lock); - break; - } - } - spin_unlock(&req->lock); - - if (match) - break; + switch (ev->type) { + case DM_USERSPACE_MAP_BLOCK_RSP: + dmu_map_done(dev, ev->u.map_rsp.id, ev->u.map_rsp.flags, + ev->u.map_rsp.src_maj, ev->u.map_rsp.src_min, + ev->u.map_rsp.dst_maj, ev->u.map_rsp.dst_min, + ev->u.map_rsp.block, ev->u.map_rsp.offset); + break; + default: + printk("unknown type %d\n", ev->type); } - - spin_unlock(&dev->lock); - - return match; } -ssize_t dmu_ctl_read(struct file *file, char __user *buffer, - size_t size, loff_t *offset) +static ssize_t dmu_ctl_write(struct file *file, const char __user * buffer, + size_t count, loff_t * ppos) { - struct dmu_device *dev = (struct dmu_device *)file->private_data; - struct userspace_request *req = NULL; - int ret = 0, r; - - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - - while (!have_pending_requests(dev)) { - if (file->f_flags & O_NONBLOCK) { - return 0; - } - - if (wait_event_interruptible(dev->wqueue, - have_pending_requests(dev))) - return -ERESTARTSYS; - } + struct chardev_transport *t = dev->transport_private; + struct dmu_ring *ring = &t->rx; + struct dmu_event *ev; - while(ret < size) { - req = pluck_next_request(dev, size - ret); - if (!req) - /* One or more of the following conditions is true: - * 1. No more requests available for sending - * 2. No more room in the outgoing buffer - */ + while (1) { + ev = dmu_head_event(ring, ring->r_idx); + if (!ev->status) break; - r = send_userspace_message((void *)(buffer + ret), req); - if (r == 0) - continue; - else if (r < 0) - return r; + /* do we need this? */ + flush_dcache_page(virt_to_page(ev)); - ret += r; - } + dmu_ring_idx_inc(ring); + dmu_event_recv(dev, ev); + ev->status = 0; + }; - return ret; + return count; } -/* - * Returns: - * 1 if we're chained to our parent - * 0 if parent is valid and was removed - * -1 if we gave our bios to the invalid parent - */ -static int handle_parent_remap(struct dmu_map *parent, - struct dmu_map *remap, - struct dmu_msg_map_response *msg) +static void dmu_ring_free(struct dmu_ring *r) { - int ret = 0; - int free_parent = 0; - unsigned long flags; - - spin_lock_irqsave(&parent->lock, flags); - - if (!dmu_get_flag(&parent->flags, DMU_FLAG_INUSE)) { - /* This is in the process of being destroyed, - * so we can't use it - */ - goto end_parent; - } - - if (!dmu_get_flag(&parent->flags, DMU_FLAG_VALID)) { - if (dmu_get_flag(&parent->flags, DMU_FLAG_WR) == - dmu_get_flag(&msg->flags, DMU_FLAG_WR) && - (parent->new_block == msg->new_block)) { - /* Perms match for this not-yet-valid remap, - so tag our bios on to it and bail */ - bio_list_merge(&parent->bios, &remap->bios); - bio_list_init(&remap->bios); - ret = -1; - } else { - /* Remove parent from remap table, and - * chain our new remap to this one so - * it will fire when parent goes - * valid - */ - list_del_init(&parent->list); - if (parent->next) { - DMERR("Parent already chained!"); - BUG(); - } - parent->next = remap; - dmu_set_flag(&parent->flags, DMU_FLAG_TEMPORARY); - ret = 1; - } - } else { - /* Remove existing valid remap */ - free_parent = 1; - } - - end_parent: - if (free_parent) - __free_remap(parent); - - spin_unlock_irqrestore(&parent->lock, flags); - - return ret; + int i; + for (i = 0; i < DMU_RING_PAGES; i++) + free_page(r->r_pages[i]); } -static int remap_request(struct dmu_msg_map_response *msg, - struct dmu_device *dev, uint32_t id) +static int dmu_ring_alloc(struct dmu_ring *r) { - struct dmu_map *remap = NULL, *parent = NULL; - struct target_device *s_dev = NULL, *d_dev = NULL; - int is_chained = 0; - struct userspace_request *cursor, *next, *req = NULL; - - /* See if we have a pending request that matches */ - spin_lock(&dev->lock); - list_for_each_entry_safe(cursor, next, &dev->requests, list) { - if ((cursor->type == DM_USERSPACE_MAP_BLOCK_REQ) && - (cursor->id == msg->id_of_req)) { - req = cursor; - list_del(&req->list); - break; - } - } - spin_unlock(&dev->lock); - - if (dmu_get_flag(&msg->flags, DMU_FLAG_COPY_FIRST)) { - s_dev = find_target(dev, MKDEV(msg->src_maj, msg->src_min)); - if (!s_dev) { - DMERR("Failed to find src device %i:%i", - msg->src_maj, msg->src_min); - goto bad; - } - } - - d_dev = find_target(dev, MKDEV(msg->dst_maj, msg->dst_min)); - if (!d_dev) { - DMERR("Failed to find dest device %i:%i", - msg->dst_maj, msg->dst_min); - goto bad; - } - - if (req) { - while (atomic_read(&req->refcnt) != 0) - /* Wait for exclusive use of request. Even - * though we have removed it from the list, - * someone still has a pointer to it, which - * means we must wait for them to finish with - * it before continuing. - */ - schedule(); - remap = req->remap; - mempool_free(req, request_pool); - } else { - /* Allocate a new remap early (before grabbing locks), - * since we will most likely need it, and we didn't - * get one with the request - */ - /* FIXME */ - remap = alloc_remap_atomic(dev); - if (!remap) { - DMERR("Failed to alloc remap!"); - goto bad; - } - init_remap(dev, remap); - } - - spin_lock(&dev->lock); - - /* FIXME: Now that we pass the remap with the req, do we need - IRQs disabled here? */ - spin_lock(&remap->lock); - remap->org_block = msg->org_block; - - /* Now, we insert the new remap into the table, and remove the - * existing map, if present, all while the device is locked - */ - - parent = ht_find_map(&dev->remaps, msg->org_block); - if (parent) { - is_chained = handle_parent_remap(parent, remap, msg); - if (is_chained < 0) { - __free_remap(remap); - spin_unlock(&remap->lock); - spin_unlock(&dev->lock); - return 1; + int i; + + spin_lock_init(&r->r_lock); + for (i = 0; i < DMU_RING_PAGES; i++) { + r->r_pages[i] = get_zeroed_page(GFP_KERNEL); + if (!r->r_pages[i]) { + printk("out of memory\n"); + return -ENOMEM; } } - - if (dmu_get_flag(&msg->flags, DMU_FLAG_SYNC)) - dmu_set_flag(&remap->flags, DMU_FLAG_WAITING); - - remap->new_block = msg->new_block; - remap->offset = msg->offset; - remap->src = s_dev; - remap->dest = d_dev; - remap->dev = dev; - remap->id = id; - - dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_COPY_FIRST); - dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_TEMPORARY); - dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_SYNC); - dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_WR); - dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_RD); - dmu_clr_flag(&remap->flags, DMU_FLAG_VALID); - - spin_unlock(&remap->lock); - - ht_insert_map(&dev->remaps, remap); - - spin_unlock(&dev->lock); - - if (! is_chained) - copy_or_flush(remap); - - return 1; - - bad: - DMERR("Remap error: chaos may ensue"); - return 0; } -/* - * Adds the request to the front of the queue so it's picked up first - */ -static void add_urgent_request(struct dmu_device *dev, - struct userspace_request *req) -{ - spin_lock(&dev->lock); - list_add(&req->list, &dev->requests); - spin_unlock(&dev->lock); - - wake_up(&dev->wqueue); -} - -static int version_request(struct dmu_msg_version *msg, - struct dmu_device *dev, uint32_t id) -{ - struct userspace_request *req; - - req = mempool_alloc(request_pool, GFP_NOIO); - if (!req) { - DMERR("Failed to alloc version response"); - return 0; - } - - init_request(dev, DM_USERSPACE_GET_VERSION, req); - add_urgent_request(dev, req); - - return 1; -} - -static int invalidate_request(struct dmu_msg_invalidate_map *msg, - struct dmu_device *dev, uint32_t id) +static int dmu_ctl_open(struct inode *inode, struct file *file) { - struct dmu_map *remap; - struct userspace_request *req; - int ret = 1; - unsigned long flags; - - remap = ht_find_map_dev(dev, msg->org_block); - if (!remap) - ret = 0; - else { - spin_lock(&dev->lock); - spin_lock_irqsave(&remap->lock, flags); - if (dmu_get_flag(&remap->flags, DMU_FLAG_VALID)) - ht_delete_map(&dev->remaps, remap); - else - ret = 0; - spin_unlock_irqrestore(&remap->lock, flags); - spin_unlock(&dev->lock); - } - - req = mempool_alloc(request_pool, GFP_NOIO); - if (!req) { - DMERR("Failed to allocate request"); - return 0; - } + struct chardev_transport *t; + struct dmu_device *dev; + int err; - if (ret) - init_request(dev, DM_USERSPACE_INVAL_COMPLETE, req); - else - init_request(dev, DM_USERSPACE_INVAL_FAILED, req); + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; - req->u.block = msg->org_block; - req->id = id; + t = container_of(inode->i_cdev, struct chardev_transport, cdev); - add_request(dev, req); + init_waitqueue_head(&t->tx_poll_wait); + err = dmu_ring_alloc(&t->tx); + if (err) + goto free_tx; - return ret; -} + err = dmu_ring_alloc(&t->rx); + if (err) + goto free_rx; -static void sync_complete(struct dmu_device *dev, uint32_t id_of_op) { - struct dmu_map *remap = NULL; - struct bio *bio; - struct userspace_request *req, *next; - unsigned long flags; + dev = t->parent; - spin_lock(&dev->lock); - list_for_each_entry_safe(req, next, &dev->requests, list) { - if (req->id == id_of_op) { - list_del(&req->list); - break; - } - } - spin_unlock(&dev->lock); + get_dev(dev); - if (!req) { - DMERR("Unable to complete unknown request: %u\n", - id_of_op); - return; - } + file->private_data = dev; - while (atomic_read(&req->refcnt) != 0) - /* Wait for exclusive use of request. Even - * though we have removed it from the list, - * someone still has a pointer to it, which - * means we must wait for them to finish with - * it before continuing. - */ - schedule(); - - remap = req->remap; - mempool_free(req, request_pool); - - if (remap) { - spin_lock_irqsave(&remap->lock, flags); - dmu_clr_flag(&remap->flags, DMU_FLAG_WAITING); - spin_unlock_irqrestore(&remap->lock, flags); - while(1) { - spin_lock_irqsave(&remap->lock, flags); - bio = remap->bios_waiting.head; - spin_unlock_irqrestore(&remap->lock, flags); - if (!bio) - break; - bio->bi_end_io(bio, 0, 0); - } - } else { - DMERR("Unable to complete empty request: %u\n", - id_of_op); - } + return 0; +free_rx: + dmu_ring_free(&t->rx); +free_tx: + dmu_ring_free(&t->tx); + return err; } -ssize_t dmu_ctl_write(struct file *file, const char __user *buffer, - size_t size, loff_t *offset) +static int dmu_ctl_release(struct inode *inode, struct file *file) { struct dmu_device *dev = (struct dmu_device *)file->private_data; - int ret = 0; - struct dmu_msg_header hdr; - union { - struct dmu_msg_map_response map_rsp; - struct dmu_msg_invalidate_map inval_rsp; - struct dmu_msg_version ver_req; - struct dmu_msg_status status_rsp; - } msgs; - - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - - while ((ret + sizeof(hdr)) < size) { - if (copy_from_user(&hdr, buffer+ret, sizeof(hdr))) { - DMERR("%s copy_from_user failed!", __FUNCTION__); - ret = -EFAULT; - goto out; - } - - ret += sizeof(hdr); - - switch (hdr.msg_type) { - - case DM_USERSPACE_GET_VERSION: - if (hdr.payload_len != sizeof(msgs.ver_req)) { - DMERR("Malformed version request"); - break; - } - - if (copy_from_user(&msgs.ver_req, buffer+ret, - sizeof(msgs.ver_req))) { - DMERR("%s copy_from_user failed!", - __FUNCTION__); - ret = -EFAULT; - goto out; - } - - version_request(&msgs.ver_req, dev, hdr.id); - break; - - case DM_USERSPACE_MAP_BLOCK_RESP: - if (hdr.payload_len != sizeof(msgs.map_rsp)) { - DMERR("Malformed block response"); - break; - } - - if (copy_from_user(&msgs.map_rsp, buffer+ret, - sizeof(msgs.map_rsp))) { - DMERR("%s copy_from_user failed!", - __FUNCTION__); - ret = -EFAULT; - goto out; - } - - remap_request(&msgs.map_rsp, dev, - msgs.map_rsp.id_of_req); - break; - - case DM_USERSPACE_MAP_FAILED: - if (hdr.payload_len != sizeof(msgs.map_rsp)) { - DMERR("Malformed block failed response"); - break; - } - - if (copy_from_user(&msgs.map_rsp, buffer+ret, - sizeof(msgs.map_rsp))) { - DMERR("%s copy_from_user failed", - __FUNCTION__); - ret = -EFAULT; - goto out; - } - - DMERR("Userspace map failed"); - break; - - case DM_USERSPACE_MAP_INVALIDATE: - if (hdr.payload_len != sizeof(msgs.inval_rsp)) { - DMERR("Malformed invalidate request"); - break; - } - - if (copy_from_user(&msgs.inval_rsp, buffer+ret, - sizeof(msgs.inval_rsp))) { - DMERR("%s copy_from_user failed", - __FUNCTION__); - ret = -EFAULT; - goto out; - } - - invalidate_request(&msgs.inval_rsp, dev, hdr.id); - break; + struct chardev_transport *t = dev->transport_private; - case DM_USERSPACE_STATUS: - if (hdr.payload_len != sizeof(msgs.status_rsp)) { - DMERR("Malformed invalidate request"); - break; - } - - if (copy_from_user(&msgs.status_rsp, buffer+ret, - sizeof(msgs.status_rsp))) { - DMERR("%s copy_from_user failed", - __FUNCTION__); - ret = -EFAULT; - goto out; - } - - if (msgs.status_rsp.status == - DM_USERSPACE_SYNC_COMPLETE) { - /* FIXME: check req */ - sync_complete(dev, msgs.status_rsp.id_of_op); - } - break; + t = container_of(inode->i_cdev, struct chardev_transport, cdev); - default: - DMWARN("Unknown request type: %i", hdr.msg_type); - } + dmu_ring_free(&t->rx); + dmu_ring_free(&t->tx); + put_dev(dev); - ret += hdr.payload_len; - } - out: - return ret; + return 0; } -int dmu_ctl_open(struct inode *inode, struct file *file) +static unsigned dmu_ctl_poll(struct file *file, poll_table *wait) { - struct chardev_transport *t; - struct dmu_device *dev; + struct dmu_device *dev = (struct dmu_device *)file->private_data; + struct chardev_transport *t = dev->transport_private; + struct dmu_ring *ring = &t->tx; + struct dmu_event *ev; + unsigned int mask = 0; + u32 idx; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; + poll_wait(file, &t->tx_poll_wait, wait); - t = container_of(inode->i_cdev, struct chardev_transport, cdev); - dev = t->parent; + spin_lock(&ring->r_lock); - get_dev(dev); + idx = ring->r_idx ? ring->r_idx - 1 : DMU_MAX_EVENTS - 1; + ev = dmu_head_event(ring, idx); + if (ev->status) + mask |= POLLIN | POLLRDNORM; - file->private_data = dev; + spin_unlock(&ring->r_lock); - return 0; + return mask; } -int dmu_ctl_release(struct inode *inode, struct file *file) +static int dmu_ring_map(struct vm_area_struct *vma, unsigned long addr, + struct dmu_ring *ring) { - struct dmu_device *dev; - - dev = (struct dmu_device *)file->private_data; - - put_dev(dev); + int i, err; + + for (i = 0; i < DMU_RING_PAGES; i++) { + struct page *page = virt_to_page(ring->r_pages[i]); + err = vm_insert_page(vma, addr, page); + if (err) + return err; + addr += PAGE_SIZE; + } return 0; } -unsigned dmu_ctl_poll(struct file *file, poll_table *wait) +static int dmu_ctl_mmap(struct file *file, struct vm_area_struct *vma) { struct dmu_device *dev = (struct dmu_device *)file->private_data; - unsigned mask = 0; + struct chardev_transport *t = dev->transport_private; + unsigned long addr; + int err; - poll_wait(file, &dev->wqueue, wait); + if (vma->vm_pgoff) + return -EINVAL; - if (have_pending_requests(dev)) - mask |= POLLIN | POLLRDNORM; + if (vma->vm_end - vma->vm_start != DMU_RING_SIZE * 2) { + DMERR("mmap size must be %lu, not %lu \n", + DMU_RING_SIZE * 2, vma->vm_end - vma->vm_start); + return -EINVAL; + } - return mask; + addr = vma->vm_start; + err = dmu_ring_map(vma, addr, &t->tx); + if (err) + return err; + err = dmu_ring_map(vma, addr + DMU_RING_SIZE, &t->rx); + + return err; } static struct file_operations ctl_fops = { - .open = dmu_ctl_open, - .release = dmu_ctl_release, - .read = dmu_ctl_read, - .write = dmu_ctl_write, - .poll = dmu_ctl_poll, - .owner = THIS_MODULE, + .open = dmu_ctl_open, + .release = dmu_ctl_release, + .write = dmu_ctl_write, + .mmap = dmu_ctl_mmap, + .poll = dmu_ctl_poll, + .owner = THIS_MODULE, }; static int get_free_minor(void) @@ -835,8 +343,7 @@ int register_chardev_transport(struct dm struct chardev_transport *t; int ret; - dev->transport_private = kmalloc(sizeof(struct chardev_transport), - GFP_KERNEL); + dev->transport_private = kzalloc(sizeof(*t), GFP_KERNEL); t = dev->transport_private; if (!t) { @@ -858,11 +365,10 @@ int register_chardev_transport(struct dm goto bad; } - return 1; - + return 0; bad: kfree(t); - return 0; + return -ENOMEM; } void unregister_chardev_transport(struct dmu_device *dev) diff --git a/drivers/md/dm-userspace.c b/drivers/md/dm-userspace.c index d567f49..f57df7d 100644 --- a/drivers/md/dm-userspace.c +++ b/drivers/md/dm-userspace.c @@ -36,425 +36,35 @@ #include "dm-bio-list.h" #include "kcopyd.h" #include "dm-user.h" -#define DMU_COPY_PAGES 256 -#define DMU_REMAP_RESERVE 128 -#define DMU_CACHE_LIMIT 4096 +#define DM_MSG_PREFIX "dm-userspace" -#define DM_MSG_PREFIX "dm-userspace" +#define DMU_COPY_PAGES 256 static kmem_cache_t *request_cache; -static kmem_cache_t *remap_cache; - -mempool_t *request_pool; - -static int enable_watchdog = 0; -static struct work_struct wd; - -static unsigned int map_cache = DMU_CACHE_LIMIT; +static mempool_t *request_pool; spinlock_t devices_lock; LIST_HEAD(devices); -static spinlock_t mru_list_lock; -static unsigned int mru_count; -static LIST_HEAD(mru_list); - -/* Device number for the control device */ -dev_t dmu_dev; - -static int error_bios(struct bio_list *bios) -{ - struct bio *bio; - int count = 0; - - while ((bio = bio_list_pop(bios)) != NULL) { - bio_io_error(bio, bio->bi_size); - count++; - } - - if (count) - DMERR("*** Failed %i requests", count); - - return count; -} - -static int destroy_mru_list(void) -{ - struct dmu_map *map, *next; - int count = 0; - - spin_lock(&mru_list_lock); - - list_for_each_entry_safe(map, next, &mru_list, mru_list) { - list_del(&map->mru_list); - kmem_cache_free(remap_cache, map); - count++; - mru_count -= 1; - } - - spin_unlock(&mru_list_lock); - - return count; -} - -static void remap_hit(struct dmu_map *remap) -{ - spin_lock(&mru_list_lock); - - list_del_init(&remap->mru_list); - list_add(&remap->mru_list, &mru_list); - - spin_unlock(&mru_list_lock); -} - -static int steal_remap(struct dmu_device *dev, struct dmu_map *remap) -{ - unsigned long flags; - - spin_lock(&dev->lock); - spin_lock_irqsave(&remap->lock, flags); - - if (!dmu_get_flag(&remap->flags, DMU_FLAG_VALID)) - goto unlock; - - if (dmu_get_flag(&remap->flags, DMU_FLAG_INUSE)) { - /* Remove it from whatever device owns it */ - - if (!list_empty(&remap->list)) - list_del_init(&remap->list); - - dmu_clr_flag(&remap->flags, DMU_FLAG_INUSE); - remap->dev = NULL; - } - - unlock: - spin_unlock_irqrestore(&remap->lock, flags); - spin_unlock(&dev->lock); - - return 1; -} - -static struct dmu_map *find_lru_victim(struct dmu_device *dev) +/* Return the block number for @sector */ +static inline u64 dmu_block(struct dmu_device *dev, sector_t sector) { - struct dmu_map *remap; - unsigned int count = 0; - - spin_lock(&mru_list_lock); - list_for_each_entry_reverse(remap, &mru_list, mru_list) { - count++; - if (dmu_get_flag(&remap->flags, DMU_FLAG_VALID)&& - !dmu_get_flag(&remap->flags, DMU_FLAG_WAITING)) { - list_del_init(&remap->mru_list); - break; - } - } - spin_unlock(&mru_list_lock); - - if (remap) { - steal_remap(dev, remap); - spin_lock(&mru_list_lock); - list_add_tail(&remap->mru_list, &mru_list); - spin_unlock(&mru_list_lock); - } - - return remap; + return sector >> dev->block_shift; } -struct dmu_map *alloc_remap_atomic(struct dmu_device *dev) +/* Return the sector offset in a block for @sector */ +static inline u64 dmu_sector_offset(struct dmu_device *dev, sector_t sector) { - struct dmu_map *remap = NULL; - - if (mru_count < map_cache) { - /* Try to allocate one from the cache */ - remap = kmem_cache_alloc(remap_cache, GFP_NOIO); - if (remap) { - INIT_LIST_HEAD(&remap->mru_list); - - spin_lock(&mru_list_lock); - list_add_tail(&remap->mru_list, &mru_list); - mru_count += 1; - spin_unlock(&mru_list_lock); - - goto out; - } - } - - /* Unable to alloc one, so get the LRU item off the list */ - remap = find_lru_victim(dev); - - if (remap) { - DMINFO("Memory is low. Stole the LRU remap..."); - remap_hit(remap); - } else { - DMERR("Failed to alloc or steal a remap!"); - BUG(); - } - - out: - return remap; + return sector & dev->block_mask; } -void free_remap(struct dmu_map *remap) +/* Return the starting sector for @block */ +static inline u64 dmu_sector(struct dmu_device *dev, uint64_t block) { - unsigned long flags; - - if (error_bios(&remap->bios)) { - DMERR("Freed a map with in-flight data!"); - BUG(); - } - - spin_lock_irqsave(&remap->lock, flags); - remap->flags = 0; - spin_unlock_irqrestore(&remap->lock, flags); - - spin_lock(&remap->dev->lock); - list_del(&remap->list); - spin_unlock(&remap->dev->lock); - - spin_lock(&mru_list_lock); - list_del_init(&remap->mru_list); - list_add_tail(&remap->mru_list, &mru_list); - spin_unlock(&mru_list_lock); + return block << dev->block_shift; } -void __free_remap(struct dmu_map *remap) -{ - if (error_bios(&remap->bios)) { - DMERR("Freed a map with in-flight data!"); - BUG(); - } - - remap->flags = 0; - - list_del(&remap->list); - - spin_lock(&mru_list_lock); - list_del_init(&remap->mru_list); - list_add_tail(&remap->mru_list, &mru_list); - spin_unlock(&mru_list_lock); -} - -static struct userspace_request *make_sync_req(struct dmu_device *dev, - struct dmu_map *remap) -{ - struct userspace_request *req; - unsigned long flags; - - req = mempool_alloc(request_pool, GFP_NOIO); - if (!req) { - DMERR("Failed to allocate copy response"); - return NULL; - } - init_request(dev, DM_USERSPACE_SYNC_COMPLETE, req); - - spin_lock_irqsave(&remap->lock, flags); - req->id = remap->id; - spin_unlock_irqrestore(&remap->lock, flags); - - req->remap = remap; - - return req; -} - -static void endio_worker(void *data) -{ - struct dmu_map *remap = data; - struct userspace_request *req = NULL; - - req = make_sync_req(remap->dev, remap); - - if (req) - add_request(req->dev, req); -} - -void init_remap(struct dmu_device *dev, struct dmu_map *remap) -{ - spin_lock_init(&remap->lock); - remap->org_block = remap->new_block = 0; - remap->offset = 0; - remap->flags = 0; - dmu_set_flag(&remap->flags, DMU_FLAG_INUSE); - remap->src = remap->dest = NULL; - bio_list_init(&remap->bios); - bio_list_init(&remap->bios_waiting); - INIT_LIST_HEAD(&remap->list); - remap->dev = dev; - remap->next = NULL; - - INIT_WORK(&remap->endio_task, endio_worker, remap); -} - -void init_request(struct dmu_device *dev, - int type, - struct userspace_request *req) -{ - spin_lock_init(&req->lock); - INIT_LIST_HEAD(&req->list); - req->dev = dev; - req->type = type; - req->sent = 0; - req->flags = 0; - if (type == DM_USERSPACE_SYNC_COMPLETE) { - req->u.block = 0; - req->id = 0; - } else { - spin_lock(&dev->lock); - dev->id_counter++; - if (dev->id_counter == 0) - dev->id_counter = 1; - req->id = dev->id_counter; - spin_unlock(&dev->lock); - } - atomic_set(&req->refcnt, 0); -} - -/* - * For an even block distribution, this is not too bad, but it could - * probably be better - */ -static uint32_t ht_hash(struct hash_table *ht, uint64_t block) -{ - return (uint32_t)block & ht->mask; -} - -static int ht_init(struct hash_table *ht, unsigned long size) -{ - uint64_t i; - unsigned long pages; - unsigned int order = ffs((size * sizeof(struct list_head *)) / - PAGE_SIZE); - - if (order > 9) - return 0; - - pages = __get_free_pages(GFP_ATOMIC, order); - if (!pages) - return 0; - - ht->table = (void *)pages; - ht->size = size; - ht->count = 0; - ht->mask = size - 1; - - for (i = 0; i < size; i++) - INIT_LIST_HEAD(&ht->table[i]); - - return 1; -} - -static void ht_insert_bucket(struct dmu_map *map, struct list_head *list) -{ - list_add_tail(&map->list, list); -} - -/* - * I'm sure this is quite dumb, but it works for now - */ -static int ht_should_grow(struct hash_table *ht) -{ - return ht->count > (2 * (ht->size / 4)); -} - -static void ht_grow_table(struct hash_table *ht); -void ht_insert_map(struct hash_table *ht, struct dmu_map *map) -{ - uint32_t addr; - - addr = ht_hash(ht, map->org_block) & ht->mask; - - BUG_ON(addr >= ht->size); - - ht_insert_bucket(map, &ht->table[addr]); - ht->count++; - - if (ht_should_grow(ht)) - ht_grow_table(ht); -} - -void ht_delete_map(struct hash_table *ht, struct dmu_map *map) -{ - list_del_init(&map->list); - BUG_ON(ht->count == 0); - ht->count--; -} - -struct dmu_map *ht_find_map(struct hash_table *ht, uint64_t block) -{ - uint32_t addr; - struct dmu_map *m; - - addr = ht_hash(ht, block) & ht->mask; - - BUG_ON(addr >= ht->size); - - list_for_each_entry(m, &ht->table[addr], list) { - if (m->org_block == block) { - remap_hit(m); - return m; - } - } - - return NULL; -} - -struct dmu_map *ht_find_map_dev(struct dmu_device *dev, uint64_t block) -{ - struct dmu_map *remap; - - spin_lock(&dev->lock); - - remap = ht_find_map(&dev->remaps, block); - - spin_unlock(&dev->lock); - - return remap; -} - -static void ht_grow_table(struct hash_table *ht) -{ - struct hash_table old_table; - uint64_t i; - - old_table = *ht; - - if (!ht_init(ht, old_table.size * 2)) - return; - - for (i = 0; i < old_table.size; i++ ) { - struct dmu_map *m, *n; - list_for_each_entry_safe(m, n, &old_table.table[i], - list) { - list_del_init(&m->list); - ht_insert_map(ht, m); - } - } - - free_pages((unsigned long)old_table.table, - ffs((old_table.size * sizeof(struct list_head *)) - / PAGE_SIZE)); -} - -static uint64_t ht_destroy_table(struct hash_table *ht) -{ - uint64_t i, count = 0; - struct dmu_map *m, *n; - - for (i = 0; i < ht->size; i++) { - list_for_each_entry_safe(m, n, &ht->table[i], list) { - ht_delete_map(ht, m); - free_remap(m); - count++; - } - } - - free_pages((unsigned long)ht->table, - ffs((ht->size * sizeof(struct list_head *)) - / PAGE_SIZE)); - - return count; -} - -struct target_device *find_target(struct dmu_device *dev, +static struct target_device *find_target(struct dmu_device *dev, dev_t devno) { struct target_device *target, *match = NULL; @@ -516,59 +126,10 @@ static void put_target(struct dmu_device kfree(target); } -/* - * This periodically dumps out some debug information. It's really - * only useful while developing. - */ -static void watchdog(void *data) -{ - unsigned int v_remaps, i_remaps, reqs, s_reqs, devs = 0; - struct dmu_device *dev; - struct dmu_map *map; - struct userspace_request *req; - uint64_t i; - - spin_lock(&devices_lock); - - list_for_each_entry(dev, &devices, list) { - spin_lock(&dev->lock); - - v_remaps = i_remaps = reqs = s_reqs = 0; - - for (i = 0; i < dev->remaps.size; i++) { - list_for_each_entry(map, &dev->remaps.table[i], list) - if (dmu_get_flag(&map->flags, DMU_FLAG_VALID)) - v_remaps++; - else - i_remaps++; - } - - list_for_each_entry(req, &dev->requests, list) - if (req->sent) - s_reqs++; - else - reqs++; - - printk("Device " - " reqs: %u/%u " - " inv maps: %u " - " val maps: %u\n", - reqs, s_reqs, i_remaps, v_remaps); - devs++; - - spin_unlock(&dev->lock); - } - - spin_unlock(&devices_lock); - - schedule_delayed_work(&wd, HZ); -} - void destroy_dmu_device(struct kref *ref) { struct dmu_device *dev; struct list_head *cursor, *next; - uint64_t remaps; dev = container_of(ref, struct dmu_device, users); @@ -586,51 +147,22 @@ void destroy_dmu_device(struct kref *ref put_target(dev, target); } - remaps = ht_destroy_table(&dev->remaps); - - list_for_each_safe(cursor, next, &dev->requests) { - struct userspace_request *req; - - req = list_entry(cursor, - struct userspace_request, - list); - - list_del(&req->list); - - mempool_free(req, request_pool); - } - kcopyd_client_destroy(dev->kcopy); unregister_chardev_transport(dev); kfree(dev); - - /* Don't leave remaps laying around if there are not any - * devices left to use them - */ - spin_lock(&devices_lock); - destroy_mru_list(); - spin_unlock(&devices_lock); } static int init_dmu_device(struct dmu_device *dev, u32 block_size) { int ret; - init_waitqueue_head(&dev->wqueue); INIT_LIST_HEAD(&dev->list); INIT_LIST_HEAD(&dev->requests); INIT_LIST_HEAD(&dev->target_devs); kref_init(&dev->users); spin_lock_init(&dev->lock); - dev->id_counter = 1; /* reserve 0 for unsolicited maps */ - - if (!ht_init(&dev->remaps, 2048)) { - DMERR("Unable to allocate hash table"); - return 0; - } - dev->block_size = block_size; dev->block_mask = block_size - 1; dev->block_shift = ffs(block_size) - 1; @@ -652,7 +184,7 @@ static struct dmu_device *new_dmu_device int ret; dev = kmalloc(sizeof(*dev), GFP_KERNEL); - if (dev == NULL) { + if (!dev) { DMERR("Failed to allocate new userspace device"); return NULL; } @@ -663,7 +195,7 @@ static struct dmu_device *new_dmu_device snprintf(dev->key, DMU_KEY_LEN, "%s", key); ret = register_chardev_transport(dev); - if (!ret) + if (ret) goto bad2; spin_lock(&devices_lock); @@ -671,7 +203,6 @@ static struct dmu_device *new_dmu_device spin_unlock(&devices_lock); return dev; - bad2: put_dev(dev); bad1: @@ -721,17 +252,16 @@ static int dmu_ctr(struct dm_target *ti, block_size = simple_strtoul(block_size_param, NULL, 10) / 512; dev = find_dmu_device(device_key); - if (dev == NULL) { + if (!dev) { dev = new_dmu_device(device_key, ti, block_size); - if (dev == NULL) { + if (!dev) { ti->error = "Failed to create device"; goto bad; } - } else { + } else get_dev(dev); - } spin_lock(&dev->lock); if (dev->block_size != block_size) { @@ -757,13 +287,11 @@ static int dmu_ctr(struct dm_target *ti, return 0; bad: - if (dev) { + if (dev) spin_unlock(&dev->lock); - } out: - if (dev) { + if (dev) put_dev(dev); - } return -EINVAL; } @@ -775,314 +303,187 @@ static void dmu_dtr(struct dm_target *ti put_dev(dev); } -/* Search @dev for an outstanding request for remapping @block */ -static struct userspace_request *find_existing_req(struct dmu_device *dev, - uint64_t block) +static int dmu_map(struct dm_target *ti, struct bio *bio, + union map_info *map_context) { - struct userspace_request *req; - struct userspace_request *match = NULL; - - spin_lock(&dev->lock); + struct dmu_device *dev = (struct dmu_device *) ti->private; + struct dmu_request *req; + int err; - list_for_each_entry_reverse(req, &dev->requests, list) { - if ((req->type == DM_USERSPACE_MAP_BLOCK_REQ) && - (req->remap->org_block == block)) { - match = req; - atomic_inc(&match->refcnt); - break; - } + req = mempool_alloc(request_pool, GFP_NOIO); + if (!req) { + DMERR("Failed to allocate request"); + return -1; } - spin_unlock(&dev->lock); - - return match; -} - -static int make_new_request(struct dmu_device *dev, - struct bio *bio, - void **ctxptr) -{ - struct userspace_request *req; - - req = mempool_alloc(request_pool, GFP_NOIO); - if (req == NULL) - goto bad; + req->dev = dev; + req->bio = bio; - init_request(dev, DM_USERSPACE_MAP_BLOCK_REQ, req); + spin_lock(&dev->lock); + list_add_tail(&req->list, &dev->requests); + spin_unlock(&dev->lock); - dmu_set_flag(&req->flags, DMU_FLAG_RD); - if (bio_rw(bio)) - dmu_set_flag(&req->flags, DMU_FLAG_WR); - else - dmu_clr_flag(&req->flags, DMU_FLAG_WR); + err = dmu_uspace_send_map_req(dev, (u64)(unsigned long)req, 0, + dmu_block(dev, bio->bi_sector)); + if (err) { + spin_lock(&dev->lock); + list_del(&req->list); + spin_unlock(&dev->lock); - req->remap = alloc_remap_atomic(dev); - if (!req->remap) { - DMERR("Failed to alloc remap!"); - goto bad; + mempool_free(req, request_pool); + return -1; } - init_remap(dev, req->remap); - - bio_list_add(&req->remap->bios, bio); - req->remap->org_block = dmu_block(dev, bio->bi_sector); - - *ctxptr = req->remap; - - add_request(dev, req); + map_context->ptr = req; return 0; - - bad: - DMERR("Failed to queue bio!"); - return -1; } -static int dmu_map_remap_case(struct dmu_device *dev, - struct dmu_map *remap, - struct bio *bio) +static int dmu_status(struct dm_target *ti, status_type_t type, + char *result, unsigned int maxlen) { - int ret = 0; - int rw; - unsigned long flags; - - spin_lock_irqsave(&remap->lock, flags); - - /* - * We've got it locked, so make sure the info is still valid - * before we use it - */ - if (!dmu_get_flag(&remap->flags, DMU_FLAG_INUSE)) { - ret = -1; - DMERR("Got an invalid remap from hashtable"); - goto unlock; - } else if (remap->org_block != dmu_block(dev, bio->bi_sector)) { - ret = -1; - DMERR("Aiee, org block changed underneath us!"); - goto unlock; - } - - rw = dmu_get_flag(&remap->flags, DMU_FLAG_WR); + struct dmu_device *dev = (struct dmu_device *) ti->private; + switch (type) { + case STATUSTYPE_INFO: + write_chardev_transport_info(dev, result, maxlen); + break; - if (rw || (bio_rw(bio) == rw)) { - if (dmu_get_flag(&remap->flags, DMU_FLAG_VALID)) { - __bio_remap(bio, remap); - ret = 1; - } else { - bio_list_add(&remap->bios, bio); - } - } else { - ret = -1; -// printk("Remap doesn't match perms: %llu (%c!=%c)\n", -// remap->org_block, -// rw ? 'W':'R', -// bio_rw(bio) ? 'W':'R'); + case STATUSTYPE_TABLE: + snprintf(result, maxlen, "%s %llu", + dev->key, + dev->block_size * 512); + break; } - unlock: - spin_unlock_irqrestore(&remap->lock, flags); - - return ret; + return 0; } -static int dmu_map_request_case(struct dmu_device *dev, - struct userspace_request *req, - struct bio *bio) +static int dmu_end_io(struct dm_target *ti, struct bio *bio, + int error, union map_info *map_context) { - int ret = 0; - int req_rw = dmu_get_flag(&req->flags, DMU_FLAG_WR); - unsigned long flags; - - spin_lock(&req->lock); - spin_lock_irqsave(&req->remap->lock, flags); - - if (!req_rw && bio_rw(bio) && !req->sent) { - /* Convert to R/W and Queue */ - dmu_set_flag(&req->flags, DMU_FLAG_WR); - bio_list_add(&req->remap->bios, bio); - } else if (!req_rw && bio_rw(bio) && req->sent) { - /* Can't convert, must re-request */ - ret = -1; - } else { - /* Queue */ - bio_list_add(&req->remap->bios, bio); - } + struct dmu_request *req = map_context->ptr; + int err; - spin_unlock_irqrestore(&req->remap->lock, flags); - spin_unlock(&req->lock); + if (req->flags & DMU_FLAG_WAITING) { + err = dmu_uspace_send_map_status(req->dev, + (u64)(unsigned long)req, 0); + if (err) + DMERR("can't send notification %llu", (u64)(unsigned long)req); + } - return ret; + mempool_free(req, request_pool); + return 0; } -DECLARE_MUTEX(map_mutex); +static struct target_type userspace_target = { + .name = "userspace", + .version = {0, 1, 0}, + .module = THIS_MODULE, + .ctr = dmu_ctr, + .dtr = dmu_dtr, + .map = dmu_map, + .status = dmu_status, + .end_io = dmu_end_io +}; -static int dmu_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static void copy_block_done(int read_err, unsigned int write_err, void *data) { - struct dmu_device *dev = (struct dmu_device *) ti->private; - struct dmu_map *remap; - struct userspace_request *req; - int ret = 0; - u64 block; - - down(&map_mutex); - - map_context->ptr = NULL; - - block = dmu_block(dev, bio->bi_sector); - - remap = ht_find_map_dev(dev, block); - if (remap) { - ret = dmu_map_remap_case(dev, remap, bio); - if (ret >= 0) { - map_context->ptr = remap; - goto done; - } + struct dmu_request *req = data; + generic_make_request(req->bio); +} - } +static void copy_block(struct dmu_device *dev, struct block_device *src_dev, + struct block_device *dst_dev, struct dmu_request *req, + u64 block, u64 offset) +{ + struct io_region src, dst; + struct kcopyd_client *client; - req = find_existing_req(dev, block); - if (req) { - ret = dmu_map_request_case(dev, req, bio); - atomic_dec(&req->refcnt); - if (ret >= 0) { - map_context->ptr = req->remap; - goto done; - } - } + src.bdev = src_dev; + src.sector = dmu_sector(dev, dmu_block(dev, req->bio->bi_sector)); + src.count = dev->block_size; - ret = make_new_request(dev, bio, &map_context->ptr); + dst.bdev = dst_dev; + dst.sector = dmu_sector(dev, block); + dst.sector += offset; + dst.count = dev->block_size; - done: - up(&map_mutex); - - return ret; + client = dev->kcopy; + + kcopyd_copy(client, &src, 1, &dst, 0, copy_block_done, req); } -static int dmu_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) +void dmu_map_done(struct dmu_device *dev, u64 id, uint32_t flags, + uint32_t src_maj, uint32_t src_min, + uint32_t dst_maj, uint32_t dst_min, u64 block, u64 offset) { - struct dmu_device *dev = (struct dmu_device *) ti->private; + struct dmu_request *cur, *next, *req = NULL; + struct target_device *src_dev = NULL, *dst_dev; + struct bio *bio; - switch (type) { - case STATUSTYPE_INFO: - write_chardev_transport_info(dev, result, maxlen); - break; + spin_lock(&dev->lock); + list_for_each_entry_safe(cur, next, &dev->requests, list) { + if ((u64) (unsigned long)cur == id) { + list_del(&cur->list); + req = cur; + } + } + spin_unlock(&dev->lock); - case STATUSTYPE_TABLE: - snprintf(result, maxlen, "%s %llu", - dev->key, - dev->block_size * 512); - break; + if (!req) { + DMERR("can't find %llu", (unsigned long long)id); + return; } - return 0; -} + bio = req->bio; + req->flags = flags; -static int __handle_bio_endio(struct dmu_map *remap, - struct bio *bio, - struct userspace_request **req) -{ - int ret = 0; - unsigned long flags; - - spin_lock_irqsave(&remap->lock, flags); - if (dmu_get_flag(&remap->flags, DMU_FLAG_WAITING) && - remap->bios_waiting.head == NULL) { - /* First endio and waiting for resp from userspace */ - bio_list_add(&remap->bios_waiting, bio); - - /* Schedule request worker */ - INIT_WORK(&remap->endio_task, endio_worker, remap); - schedule_work(&remap->endio_task); - - ret = 1; - } else if (dmu_get_flag(&remap->flags, DMU_FLAG_WAITING)) { - /* Still waiting for resp from userspace */ - bio_list_add(&remap->bios_waiting, bio); - ret = 1; - } else if (remap->bios_waiting.head != NULL) { - /* Got resp from userspace but bios waiting list nonempty */ - if (bio == remap->bios_waiting.head) { - bio_list_pop(&remap->bios_waiting); - ret = 0; - } else { - bio_list_add(&remap->bios_waiting, bio); - ret = 1; + if (flags & DMU_FLAG_VALID) { + if (flags & DMU_FLAG_COPY_FIRST) { + src_dev = find_target(dev, MKDEV(src_maj, src_min)); + if (!src_dev) + goto eio; } - } - spin_unlock_irqrestore(&remap->lock, flags); - return ret; -} + dst_dev = find_target(dev, MKDEV(dst_maj, dst_min)); + if (!dst_dev) + goto eio; -static int dmu_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) -{ - struct dmu_map *remap; - struct userspace_request *req = NULL; - int ret = 0; + bio->bi_sector = dmu_sector(dev, block) + + dmu_sector_offset(dev, bio->bi_sector) + offset; + bio->bi_bdev = dst_dev->bdev; - remap = map_context->ptr; - - if (error) { - DMERR("Error in dmu_end_io"); - return -1; - } else if (!remap) { - return 0; + if (flags & DMU_FLAG_COPY_FIRST) + copy_block(dev, src_dev->bdev, dst_dev->bdev, + req, block, offset); + else + generic_make_request(bio); } - ret = __handle_bio_endio(remap, bio, &req); - - return ret; + return; +eio: + bio_io_error(bio, bio->bi_size); } -struct target_type userspace_target = { - .name = "userspace", - .version = {0, 1, 0}, - .module = THIS_MODULE, - .ctr = dmu_ctr, - .dtr = dmu_dtr, - .map = dmu_map, - .status = dmu_status, - .end_io = dmu_end_io -}; - int __init dm_userspace_init(void) { - int i; - int r = dm_register_target(&userspace_target); - if (r < 0) { - DMERR("Register failed %d", r); + int err; + + err = dm_register_target(&userspace_target); + if (err < 0) { + DMERR("Register failed %d", err); return 0; } spin_lock_init(&devices_lock); - spin_lock_init(&mru_list_lock); - - if (enable_watchdog) { - INIT_WORK(&wd, watchdog, NULL); - schedule_delayed_work(&wd, HZ); - } - request_cache = - kmem_cache_create("dm-userspace-requests", - sizeof(struct userspace_request), - __alignof__ (struct userspace_request), - 0, NULL, NULL); + request_cache = kmem_cache_create("dm-userspace-requests", + sizeof(struct dmu_request), + __alignof__ (struct dmu_request), + 0, NULL, NULL); if (!request_cache) { DMERR("Failed to allocate request cache"); - goto bad; - } - - remap_cache = - kmem_cache_create("dm-userspace-remaps", - sizeof(struct dmu_map), - __alignof__ (struct dmu_map), - 0, NULL, NULL); - if (!remap_cache) { - DMERR("Failed to allocate remap cache"); - goto bad2; + goto unregister_target; } request_pool = mempool_create(64, @@ -1090,39 +491,21 @@ int __init dm_userspace_init(void) request_cache); if (!request_pool) { DMERR("Failed to allocate request pool"); - goto bad3; + goto request_cache_destroy; } - r = init_chardev_transport(); - if (!r) - goto bad4; - - for (i = 0; i < DMU_REMAP_RESERVE; i++) { - struct dmu_map *remap; - - remap = alloc_remap_atomic(NULL); - if (!remap) { - DMERR("Failed to allocate %i/%i reserve remap", - i, DMU_REMAP_RESERVE); - goto bad5; - } - init_remap(NULL, remap); - remap_hit(remap); - } + err = init_chardev_transport(); + if (!err) + goto request_pool_destroy; return 1; - bad5: - destroy_mru_list(); - bad4: +request_pool_destroy: mempool_destroy(request_pool); - bad3: - kmem_cache_destroy(remap_cache); - bad2: +request_cache_destroy: kmem_cache_destroy(request_cache); - bad: +unregister_target: dm_unregister_target(&userspace_target); - return 0; } @@ -1132,10 +515,6 @@ void __exit dm_userspace_exit(void) struct list_head *cursor, *next; struct dmu_device *dev; - if (enable_watchdog) - if (!cancel_delayed_work(&wd)) - flush_scheduled_work(); - spin_lock(&devices_lock); list_for_each_safe(cursor, next, &devices) { @@ -1149,11 +528,8 @@ void __exit dm_userspace_exit(void) cleanup_chardev_transport(); - r = destroy_mru_list(); - mempool_destroy(request_pool); kmem_cache_destroy(request_cache); - kmem_cache_destroy(remap_cache); r = dm_unregister_target(&userspace_target); if (r < 0) @@ -1163,9 +539,6 @@ void __exit dm_userspace_exit(void) module_init(dm_userspace_init); module_exit(dm_userspace_exit); -module_param(enable_watchdog, int, S_IRUGO); -module_param(map_cache, int, S_IRUGO); - MODULE_DESCRIPTION(DM_NAME " userspace target"); MODULE_AUTHOR("Dan Smith"); MODULE_LICENSE("GPL"); diff --git a/include/linux/dm-userspace.h b/include/linux/dm-userspace.h index 9b2db20..bfad3b6 100644 --- a/include/linux/dm-userspace.h +++ b/include/linux/dm-userspace.h @@ -25,123 +25,57 @@ #include /* * Message Types */ -#define DM_USERSPACE_GET_VERSION 1 -#define DM_USERSPACE_MAP_BLOCK_REQ 2 -#define DM_USERSPACE_MAP_BLOCK_RESP 3 -#define DM_USERSPACE_MAP_FAILED 4 -#define DM_USERSPACE_MAP_INVALIDATE 5 -#define DM_USERSPACE_STATUS 6 - -/* - * Status codes - */ -#define DM_USERSPACE_INVAL_COMPLETE 101 -#define DM_USERSPACE_INVAL_FAILED 102 -#define DM_USERSPACE_SYNC_COMPLETE 103 +#define DM_USERSPACE_MAP_BLOCK_REQ 1 +#define DM_USERSPACE_MAP_BLOCK_RSP 2 +#define DM_USERSPACE_MAP_BLOCK_DONE 3 /* * Flags and associated macros */ -#define DMU_FLAG_VALID 1 -#define DMU_FLAG_RD 2 -#define DMU_FLAG_WR 4 -#define DMU_FLAG_COPY_FIRST 8 -#define DMU_FLAG_TEMPORARY 16 -#define DMU_FLAG_INUSE 32 -#define DMU_FLAG_SYNC 64 -#define DMU_FLAG_WAITING 128 - -static int dmu_get_flag(uint32_t *flags, uint32_t flag) -{ - return (*flags & flag) != 0; -} - -static void dmu_set_flag(uint32_t *flags, uint32_t flag) -{ - *flags |= flag; -} - -static void dmu_clr_flag(uint32_t *flags, uint32_t flag) -{ - *flags &= (~flag); -} - -static void dmu_cpy_flag(uint32_t *flags, uint32_t src, uint32_t flag) -{ - *flags = (*flags & ~flag) | (src & flag); -} - -/* - * This message header is sent in front of every message, in both - * directions - */ -struct dmu_msg_header { - uint32_t msg_type; - uint32_t payload_len; - uint32_t id; -}; - -/* DM_USERSPACE_GET_VERSION */ -struct dmu_msg_version { - uint32_t userspace_ver; - uint32_t kernel_ver; -}; - -/* For status codes */ -struct dmu_msg_status { - uint32_t id_of_op; +#define DMU_FLAG_VALID (1 << 0) +#define DMU_FLAG_RD (1 << 1) +#define DMU_FLAG_WR (1 << 2) +#define DMU_FLAG_COPY_FIRST (1 << 3) +#define DMU_FLAG_SYNC (1 << 4) +#define DMU_FLAG_WAITING (1 << 5) + +struct dmu_event { uint32_t status; -}; - -/* DM_USERSPACE_MAP_BLOCK_REQ */ -struct dmu_msg_map_request { - uint64_t org_block; - - uint32_t flags; -}; - -/* DM_USERSPACE_MAP_BLOCK_RESP - * DM_USERSPACE_MAP_BLOCK_FAILED - */ -struct dmu_msg_map_response { - uint64_t org_block; - uint64_t new_block; - int64_t offset; - - uint32_t id_of_req; - uint32_t flags; - - uint32_t src_maj; - uint32_t src_min; - - uint32_t dst_maj; - uint32_t dst_min; -}; - -/* DM_USERSPACE_MAP_INVALIDATE */ -struct dmu_msg_invalidate_map { - uint64_t org_block; -}; - -static inline int dmu_get_msg_len(int type) -{ - switch (type) { - case DM_USERSPACE_GET_VERSION: - return sizeof(struct dmu_msg_version); - case DM_USERSPACE_INVAL_COMPLETE: - case DM_USERSPACE_INVAL_FAILED: - case DM_USERSPACE_STATUS: - return sizeof(struct dmu_msg_status); - case DM_USERSPACE_MAP_BLOCK_REQ: - return sizeof(struct dmu_msg_map_request); - case DM_USERSPACE_MAP_BLOCK_RESP: - case DM_USERSPACE_MAP_FAILED: - return sizeof(struct dmu_msg_map_response); - case DM_USERSPACE_MAP_INVALIDATE: - return sizeof(struct dmu_msg_invalidate_map); - default: - return -1; - }; -} + uint32_t type; + + /* user -> kernel */ + union { + struct { + aligned_u64 id; + uint32_t flags; + uint32_t src_maj; + uint32_t src_min; + + uint32_t dst_maj; + uint32_t dst_min; + aligned_u64 block; + aligned_u64 offset; + } map_rsp; + } u; + + /* kernel -> user */ + union { + struct { + aligned_u64 id; + uint32_t flags; + aligned_u64 block; + } map_req; + struct { + aligned_u64 id; + uint32_t status; + } map_done; + } k; + +} __attribute__ ((aligned (sizeof(uint64_t)))); + +#define DMU_RING_SIZE (1UL << 16) +#define DMU_RING_PAGES (DMU_RING_SIZE >> PAGE_SHIFT) +#define DMU_EVENT_PER_PAGE (PAGE_SIZE / sizeof(struct dmu_event)) +#define DMU_MAX_EVENTS (DMU_EVENT_PER_PAGE * DMU_RING_PAGES) #endif -- 1.4.1