From d0be70047d0c65d34f3a982dbf63be9c47a54627 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 4 Aug 2008 17:03:19 +0900 Subject: [PATCH] dm snapshot: shared exception store This is a new implementation of dm-snapshot. The important design differences from the current dm-snapshot are: - It uses one exception store per origin device that is shared by all snapshots. - It doesn't keep the complete exception tables in memory. I took the exception store code of Zumastor (http://zumastor.org/). Zumastor is remote replication software (a local server sends the delta between two snapshots to a remote server, and then the remote server applies the delta in an atomic manner. So the data on the remote server is always consistent). Zumastor snapshot fulfills the above two requirements, but it is implemented in user space. The dm kernel module sends the information of a request to user space and the user space daemon tells the kernel what to do. Zumastor user-space daemon needs to take care about replication so the user-space approach makes sense but I think that the pure user-space approach is an overkill just for snapshot. I prefer to implement snapshot in kernel space (as the current dm-snapshot does). I think that we can add features for remote replication software like Zumastor to it, that is, features to provide user space a delta between two snapshots and apply the delta in an atomic manner (via ioctl or something else). Note that the code is still in a very early stage. There are lots of TODO items: - snapshot deletion support - writable snapshot support - protection for unexpected events (probably journaling) - performance improvement (handling exception cache and format, locking, etc) - better integration with the current snapshot code - improvement on error handling - cleanups - generating a delta between two snapshots - applying a delta to in a atomic manner The patch against 2.6.26 is available at: http://www.kernel.org/pub/linux/kernel/people/tomo/dm-snap/0001-dm-snapshot-dm-snapshot-shared-exception-store.patch Here's an exmaple (/dev/sdb1 as an origin device and /dev/sdg1 as a cow device): - creates the set of an origin and a cow: flax:~# echo 0 `blockdev --getsize /dev/sdb1` snapshot-origin /dev/sdb1 /dev/sdg1 P2 16 |dmsetup create work - no snapshot yet: flax:~# dmsetup status work: 0 125017767 snapshot-origin : no snapshot - creates one snapshot (the id of the snapshot is 0): flax:~# dmsetup message /dev/mapper/work 0 snapshot create 0 - creates one snapshot (the id of the snapshot is 1): flax:~# dmsetup message /dev/mapper/work 0 snapshot create 1 - there are two snapshots (#0 and #1): flax:~# dmsetup status work: 0 125017767 snapshot-origin 0 1 - let's access to the snapshots: flax:~# echo 0 `blockdev --getsize /dev/sdb1` snapshot /dev/sdb1 0|dmsetup create work-snap0 flax:~# echo 0 `blockdev --getsize /dev/sdb1` snapshot /dev/sdb1 1|dmsetup create work-snap1 flax:~# ls /dev/mapper/ control work work-snap0 work-snap1 Signed-off-by: FUJITA Tomonori --- drivers/md/dm-exception-store.c | 1004 ++++++++++++++++++++++++++++++++++++++- drivers/md/dm-snap.c | 332 ++++++++++++-- drivers/md/dm-snap.h | 29 +- 3 files changed, 1306 insertions(+), 59 deletions(-) diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 41f4080..205d5b1 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -7,6 +7,15 @@ * This file is released under the GPL. */ +/* + * the new exception code is taken from Zumastor project + * + * (c) 2003, Sistina Software Inc. + * (c) 2004, Red Hat Software Inc. + * (c) 2005 Daniel Phillips + * (c) 2006 - 2007, Google Inc + */ + #include "dm.h" #include "dm-snap.h" @@ -59,6 +68,8 @@ */ #define SNAPSHOT_DISK_VERSION 1 +#define MAX_SNAPSHOTS 64 + struct disk_header { uint32_t magic; @@ -76,6 +87,10 @@ struct disk_header { /* In sectors */ uint32_t chunk_size; + + uint64_t root_tree_chunk; + uint64_t snapmask; + uint32_t tree_level; }; struct disk_exception { @@ -127,6 +142,22 @@ struct pstore { struct dm_io_client *io_client; struct workqueue_struct *metadata_wq; + + uint64_t root_tree_chunk; + uint32_t tree_level; + + unsigned long nr_chunks; + unsigned long nr_bitmap_chunks; + unsigned long *bitmap; + unsigned long cur_bitmap_chunk; + unsigned long cur_bitmap_index; + + struct list_head chunk_buffer_list; + struct list_head chunk_buffer_dirty_list; + + int header_dirty; + u64 snapmask; + int nr_chunk_buffers; }; static unsigned sectors_to_pages(unsigned sectors) @@ -175,7 +206,8 @@ static void do_metadata(struct work_struct *work) /* * Read or write a chunk aligned and sized block of data from a device. */ -static int chunk_io(struct pstore *ps, uint32_t chunk, int rw, int metadata) +static int chunk_io(struct pstore *ps, uint32_t chunk, int rw, int metadata, + void *buf) { struct dm_io_region where = { .bdev = ps->snap->cow->bdev, @@ -185,7 +217,7 @@ static int chunk_io(struct pstore *ps, uint32_t chunk, int rw, int metadata) struct dm_io_request io_req = { .bi_rw = rw, .mem.type = DM_IO_VMA, - .mem.ptr.vma = ps->area, + .mem.ptr.vma = buf, .client = ps->io_client, .notify.fn = NULL, }; @@ -220,7 +252,7 @@ static int area_io(struct pstore *ps, uint32_t area, int rw) /* convert a metadata area index to a chunk index */ chunk = 1 + ((ps->exceptions_per_area + 1) * area); - r = chunk_io(ps, chunk, rw, 0); + r = chunk_io(ps, chunk, rw, 0, ps->area); if (r) return r; @@ -261,7 +293,7 @@ static int read_header(struct pstore *ps, int *new_snapshot) if (r) return r; - r = chunk_io(ps, 0, READ, 1); + r = chunk_io(ps, 0, READ, 1, ps->area); if (r) goto bad; @@ -323,7 +355,13 @@ static int write_header(struct pstore *ps) dh->version = cpu_to_le32(ps->version); dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); - return chunk_io(ps, 0, WRITE, 1); + dh->root_tree_chunk = cpu_to_le64(ps->root_tree_chunk); + dh->snapmask = cpu_to_le64(ps->snapmask); + dh->tree_level = cpu_to_le32(ps->tree_level); + + ps->header_dirty = 0; + + return chunk_io(ps, 0, WRITE, 1, ps->area); } /* @@ -513,7 +551,7 @@ static int persistent_read_metadata(struct exception_store *store) } static int persistent_prepare(struct exception_store *store, - struct dm_snap_exception *e) + struct dm_snap_exception *e, int *skip) { struct pstore *ps = get_info(store); uint32_t stride; @@ -601,7 +639,928 @@ static void persistent_drop(struct exception_store *store) DMWARN("write header failed"); } -int dm_create_persistent(struct exception_store *store) +static struct exception_store_operations persistent_ops = { + .destroy = persistent_destroy, + .read_metadata = persistent_read_metadata, + .prepare_exception = persistent_prepare, + .commit_exception = persistent_commit, + .drop_snapshot = persistent_drop, + .fraction_full = persistent_fraction_full, +}; + +struct chunk_buffer { + struct list_head list; + struct list_head dirty_list; + u64 chunk; + void *data; +}; + +struct node { + u32 count; + u32 unused; + struct index_entry { + u64 key; // note: entries[0].key never accessed + u64 chunk; // node sector address goes here + } entries[]; +}; + +struct leaf { + u16 magic; + u16 version; + u32 count; + u64 base_chunk; // !!! FIXME the code doesn't use the base_chunk properly + u64 using_mask; + + struct tree_map { + u32 offset; + u32 rchunk; + } map[]; +}; + +struct exception { + u64 share; + u64 chunk; +}; + +static inline struct node *buffer2node(struct chunk_buffer *buffer) +{ + return (struct node *)buffer->data; +} + +static inline struct leaf *buffer2leaf(struct chunk_buffer *buffer) +{ + return (struct leaf *)buffer->data; +} + +static struct chunk_buffer *alloc_chunk_buffer(struct pstore *ps) +{ + struct chunk_buffer *b; + + b = kzalloc(sizeof(*b), GFP_NOFS); + if (!b) { + printk("%s %d: out of memory\n", __FUNCTION__, __LINE__); + return NULL; + } + + b->data = vmalloc(ps->snap->chunk_size << SECTOR_SHIFT); + if (!b->data) { + printk("%s %d: out of memory\n", __FUNCTION__, __LINE__); + kfree(b); + return NULL; + } + + memset(b->data, 0, ps->snap->chunk_size << SECTOR_SHIFT); + + list_add(&b->list, &ps->chunk_buffer_list); + INIT_LIST_HEAD(&b->dirty_list); + + ps->nr_chunk_buffers++; + + return b; +} + +static void free_chunk_buffer(struct pstore *ps, struct chunk_buffer *b) +{ + list_del(&b->list); + vfree(b->data); + kfree(b); + + ps->nr_chunk_buffers--; +} + +static void persistent2_drop(struct exception_store *store) +{ + struct pstore *ps = get_info(store); + + chunk_io(ps, ps->cur_bitmap_chunk, WRITE, 1, ps->bitmap); + write_header(ps); + + ps->valid = 0; +} + +static void persistent2_destroy(struct exception_store *store) +{ + struct pstore *ps = get_info(store); + struct chunk_buffer *bb, *n; + + list_for_each_entry_safe(bb, n, &ps->chunk_buffer_list, list) + free_chunk_buffer(ps, bb); + + persistent2_drop(store); + + destroy_workqueue(ps->metadata_wq); + dm_io_client_destroy(ps->io_client); + vfree(ps->bitmap); + ps->bitmap = NULL; + vfree(ps->callbacks); + free_area(ps); + kfree(ps); +} + +static int read_new_bitmap_chunk(struct pstore *ps) +{ + chunk_io(ps, ps->cur_bitmap_chunk, WRITE, 1, ps->bitmap); + + ps->cur_bitmap_chunk++; + if (ps->cur_bitmap_chunk == ps->nr_bitmap_chunks) + ps->cur_bitmap_chunk = 1; + + chunk_io(ps, ps->cur_bitmap_chunk, READ, 1, ps->bitmap); + + return 0; +} + +static unsigned long persistent2_allocate_chunk(struct pstore *ps) +{ + unsigned long idx; + unsigned long limit; + unsigned long start_chunk; + unsigned long nr = (ps->snap->chunk_size << SECTOR_SHIFT) * 8; + + start_chunk = ps->cur_bitmap_chunk; +again: + if (ps->cur_bitmap_chunk == ps->nr_bitmap_chunks) + limit = ps->nr_chunks - (nr * (ps->nr_bitmap_chunks - 1)); + else + limit = nr; + + idx = find_next_zero_bit(ps->bitmap, limit, ps->cur_bitmap_index); + if (idx < limit) { + set_bit(idx, ps->bitmap); + + if (idx == limit - 1) { + ps->cur_bitmap_index = 0; + + read_new_bitmap_chunk(ps); + } else + ps->cur_bitmap_index++; + } else { + chunk_io(ps, ps->cur_bitmap_chunk, WRITE, 1, ps->bitmap); + + read_new_bitmap_chunk(ps); + + /* todo: check # free chunks */ + if (start_chunk == ps->cur_bitmap_chunk) { + printk("%s %d: fail to find a new chunk\n", + __FUNCTION__, __LINE__); + return 0; + } + + goto again; + } + + return (idx + (ps->cur_bitmap_chunk - 1) * + (ps->snap->chunk_size << SECTOR_SHIFT) * 8); +} + +static void init_leaf(struct pstore *ps, struct leaf *leaf) +{ + leaf->magic = 0x1eaf; + leaf->version = 0; + leaf->base_chunk = 0; + leaf->count = 0; + leaf->map[0].offset = ps->snap->chunk_size << SECTOR_SHIFT; +} + +static struct chunk_buffer *new_btree_obj(struct pstore *ps) +{ + u64 chunk; + struct chunk_buffer *b; + + b = alloc_chunk_buffer(ps); + if (!b) + return NULL; + + chunk = persistent2_allocate_chunk(ps); + if (!chunk) { + free_chunk_buffer(ps, b); + return NULL; + } + + b->chunk = chunk; + + return b; +} + +static int persistent2_create_bitmap(struct exception_store *store) +{ + struct pstore *ps = get_info(store); + int i, r, rest, this; + uint32_t chunk; + + /* bitmap + superblock */ + rest = ps->nr_bitmap_chunks + 1; + + for (chunk = 0; chunk < ps->nr_bitmap_chunks; chunk++) { + memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); + + this = min_t(int, rest, ps->snap->chunk_size * 512 * 8); + if (this) { + rest -= this; + + memset(ps->area, 0xff, this / 8); + + for (i = 0; i < this % 8; i++) + set_bit(i, (char *)ps->area + this / 8); + } + + r = chunk_io(ps, chunk + 1, WRITE, 1, ps->area); + if (r) + return r; + } + + return 0; +} + +static struct chunk_buffer *new_leaf(struct pstore *ps) +{ + struct chunk_buffer *cb; + + cb = new_btree_obj(ps); + if (cb) + init_leaf(ps, cb->data); + + return cb; +} + +static struct chunk_buffer *new_node(struct pstore *ps) +{ + return new_btree_obj(ps); +} + +static int persistent2_create_btree(struct pstore *ps) +{ + struct chunk_buffer *l, *n; + int r; + + r = chunk_io(ps, ps->cur_bitmap_chunk, READ, 1, + ps->bitmap); + if (r) + return r; + + l = new_btree_obj(ps); + if (!l) + return -ENOMEM; + init_leaf(ps, l->data); + + n = new_btree_obj(ps); + if (!n) + return -ENOMEM; + + buffer2node(n)->count = 1; + buffer2node(n)->entries[0].chunk = l->chunk; + + chunk_io(ps, l->chunk, WRITE, 1, l->data); + chunk_io(ps, n->chunk, WRITE, 1, n->data); + + ps->root_tree_chunk = n->chunk; + ps->snapmask = 0; + ps->tree_level = 1; + + return 0; +} + +static int persistent2_create_header(struct exception_store *store) +{ + struct pstore *ps = get_info(store); + int r; + + r = persistent2_create_bitmap(store); + if (r) + return r; + + r = persistent2_create_btree(ps); + if (r) + return r; + + ps->valid = 1; + r = write_header(ps); + if (r) + return r; + + return r; +} + +static int persistent2_read_header(struct pstore *ps, int *new_snapshot) +{ + struct disk_header *dh; + int r; + + ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> + chunk_size)); + if (IS_ERR(ps->io_client)) + return PTR_ERR(ps->io_client); + + ps->bitmap = vmalloc(ps->snap->chunk_size << SECTOR_SHIFT); + if (!ps->bitmap) + return -ENOMEM; + + r = alloc_area(ps); + if (r) + goto fail_alloc_area; + + + r = chunk_io(ps, 0, READ, 1, ps->area); + if (r) + goto fail_to_read_header; + + dh = (struct disk_header *) ps->area; + + if (le32_to_cpu(dh->magic) == 0) { + *new_snapshot = 1; + return 0; + } + + if (le32_to_cpu(dh->magic) != SNAP_MAGIC) { + DMWARN("Invalid or corrupt snapshot"); + r = -ENXIO; + goto fail_to_read_header; + } + + *new_snapshot = 0; + ps->valid = le32_to_cpu(dh->valid); + ps->version = le32_to_cpu(dh->version); + + ps->root_tree_chunk = cpu_to_le64(dh->root_tree_chunk); + ps->snapmask = cpu_to_le64(dh->snapmask); + ps->tree_level = cpu_to_le32(dh->tree_level); + + if (ps->snap->chunk_size != le32_to_cpu(dh->chunk_size)) { + DMWARN("Invalid chunk size"); + r = -ENXIO; + goto fail_to_read_header; + } + + return 0; +fail_to_read_header: + free_area(ps); +fail_alloc_area: + vfree(ps->bitmap); + ps->bitmap = NULL; + return r; +} + +static int persistent2_read_metadata(struct exception_store *store) +{ + int r, uninitialized_var(new_snapshot); + struct pstore *ps = get_info(store); + sector_t size = get_dev_size(store->snap->cow->bdev); + unsigned long bitmap_chunk_bytes; + unsigned chunk_bytes = ps->snap->chunk_size << SECTOR_SHIFT; + + ps->cur_bitmap_chunk = 1; + ps->cur_bitmap_index = 0; + ps->nr_chunks = size >> ps->snap->chunk_shift; + + ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) / + sizeof(struct disk_exception); + ps->callbacks = dm_vcalloc(ps->exceptions_per_area, + sizeof(*ps->callbacks)); + if (!ps->callbacks) + return -ENOMEM; + + INIT_LIST_HEAD(&ps->chunk_buffer_list); + INIT_LIST_HEAD(&ps->chunk_buffer_dirty_list); + ps->nr_chunk_buffers = 0; + + bitmap_chunk_bytes = DIV_ROUND_UP(ps->nr_chunks, 8); + ps->nr_bitmap_chunks = DIV_ROUND_UP(bitmap_chunk_bytes, chunk_bytes); + + r = persistent2_read_header(ps, &new_snapshot); + if (r) + return r; + + if (new_snapshot) + printk("%s %d: creates a new cow device\n", + __FUNCTION__, __LINE__); + else + printk("%s %d: loads the cow device\n", __FUNCTION__, __LINE__); + + if (new_snapshot) { + r = persistent2_create_header(store); + if (r) { + DMWARN("write_header failed"); + return r; + } + } else { + if (ps->version != 2) { + DMWARN("unable to handle snapshot disk version %d", + ps->version); + return -EINVAL; + } + + /* + * Metadata are valid, but snapshot is invalidated + */ + if (!ps->valid) + return 1; + + r = chunk_io(ps, ps->cur_bitmap_chunk, READ, 1, + ps->bitmap); + } + + return r; +} + +struct etree_path { + struct chunk_buffer *buffer; + struct index_entry *pnext; +}; + +static struct chunk_buffer *btbread(struct pstore *ps, u64 chunk) +{ + struct chunk_buffer *b; + + list_for_each_entry(b, &ps->chunk_buffer_list, list) { + if (b->chunk == chunk) + return b; + } + + b = alloc_chunk_buffer(ps); + if (!b) + return NULL; + + b->chunk = chunk; + + chunk_io(ps, chunk, READ, 1, b->data); + + return b; +} + +static void brelse(struct chunk_buffer *buffer) +{ +} + +static void brelse_dirty(struct pstore *ps, struct chunk_buffer *b) +{ + if (list_empty(&b->dirty_list)) + list_add(&b->dirty_list, &ps->chunk_buffer_dirty_list); +} + +static void set_buffer_dirty(struct pstore *ps, struct chunk_buffer *b) +{ + if (list_empty(&b->dirty_list)) + list_add(&b->dirty_list, &ps->chunk_buffer_dirty_list); +} + +static inline struct exception *emap(struct leaf *leaf, unsigned i) +{ + return (struct exception *)((char *) leaf + leaf->map[i].offset); +} + +static int add_exception_to_leaf(struct leaf *leaf, u64 chunk, u64 exception, + int snapshot, u64 active) +{ + unsigned target = chunk - leaf->base_chunk; + u64 mask = 1ULL << snapshot, sharemap; + struct exception *ins, *exceptions = emap(leaf, 0); + char *maptop = (char *)(&leaf->map[leaf->count + 1]); + unsigned i, j, free = (char *)exceptions - maptop; + + /* + * Find the chunk for which we're adding an exception entry. + */ + for (i = 0; i < leaf->count; i++) // !!! binsearch goes here + if (leaf->map[i].rchunk >= target) + break; + + /* + * If we didn't find the chunk, insert a new one at map[i]. + */ + if (i == leaf->count || leaf->map[i].rchunk > target) { + if (free < sizeof(struct exception) + sizeof(struct tree_map)) + return -1; + ins = emap(leaf, i); + memmove(&leaf->map[i+1], &leaf->map[i], maptop - (char *)&leaf->map[i]); + leaf->map[i].offset = (char *)ins - (char *)leaf; + leaf->map[i].rchunk = target; + leaf->count++; + sharemap = snapshot == -1? active: mask; + goto insert; + } + + if (free < sizeof(struct exception)) + return -1; + /* + * Compute the share map from that of each existing exception entry + * for this chunk. If we're doing this for a chunk on the origin, + * the new exception is shared between those snapshots that weren't + * already sharing exceptions for this chunk. (We combine the sharing + * that already exists, invert it, then mask off everything but the + * active snapshots.) + * + * If this is a chunk on a snapshot we go through the existing + * exception list to turn off sharing with this snapshot (with the + * side effect that if the chunk was only shared by this snapshot it + * becomes unshared). We then set sharing for this snapshot in the + * new exception entry. + */ + if (snapshot == -1) { + for (sharemap = 0, ins = emap(leaf, i); ins < emap(leaf, i+1); ins++) + sharemap |= ins->share; + sharemap = (~sharemap) & active; + } else { + for (ins = emap(leaf, i); ins < emap(leaf, i+1); ins++) + if ((ins->share & mask)) { + ins->share &= ~mask; + break; + } + sharemap = mask; + } + ins = emap(leaf, i); +insert: + /* + * Insert the new exception entry. These grow from the end of the + * block toward the beginning. First move any earlier exceptions up + * to make room for the new one, then insert the new entry in the + * space freed. Adjust the offsets for all earlier chunks. + */ + memmove(exceptions - 1, exceptions, (char *)ins - (char *)exceptions); + ins--; + ins->share = sharemap; + ins->chunk = exception; + + for (j = 0; j <= i; j++) + leaf->map[j].offset -= sizeof(struct exception); + + return 0; +} + +static void insert_child(struct node *node, struct index_entry *p, u64 child, + u64 childkey) +{ + memmove(p + 1, p, (char *)(&node->entries[0] + node->count) - (char *)p); + p->chunk = child; + p->key = childkey; + node->count++; +} + +static u64 split_leaf(struct leaf *leaf, struct leaf *leaf2) +{ + unsigned i, nhead = (leaf->count + 1) / 2, ntail = leaf->count - nhead, tailsize; + /* Should split at middle of data instead of median exception */ + u64 splitpoint = leaf->map[nhead].rchunk + leaf->base_chunk; + char *phead, *ptail; + + phead = (char *)emap(leaf, 0); + ptail = (char *)emap(leaf, nhead); + tailsize = (char *)emap(leaf, leaf->count) - ptail; + + /* Copy upper half to new leaf */ + memcpy(leaf2, leaf, offsetof(struct leaf, map)); + memcpy(&leaf2->map[0], &leaf->map[nhead], (ntail + 1) * sizeof(struct tree_map)); + memcpy(ptail - (char *)leaf + (char *)leaf2, ptail, tailsize); + leaf2->count = ntail; + + /* Move lower half to top of block */ + memmove(phead + tailsize, phead, ptail - phead); + leaf->count = nhead; + for (i = 0; i <= nhead; i++) + leaf->map[i].offset += tailsize; + leaf->map[nhead].rchunk = 0; + + return splitpoint; +} + +static int add_exception_to_tree(struct pstore *ps, struct chunk_buffer *leafbuf, + u64 target, u64 exception, int snapbit, + struct etree_path path[], unsigned levels) +{ + struct node *newroot; + struct chunk_buffer *newrootbuf, *childbuf; + struct leaf *leaf; + u64 childkey, childsector; + int ret; + + ret = add_exception_to_leaf(buffer2leaf(leafbuf), target, + exception, snapbit, ps->snapmask); + if (!ret) { + brelse_dirty(ps, leafbuf); + return 0; + } + + /* + * There wasn't room to add a new exception to the leaf. Split it. + */ + + childbuf = new_leaf(ps); + if (!childbuf) + return -ENOMEM; /* this is the right thing to do? */ + + set_buffer_dirty(ps, childbuf); + + childkey = split_leaf(buffer2leaf(leafbuf), buffer2leaf(childbuf)); + childsector = childbuf->chunk; + + /* + * Now add the exception to the appropriate leaf. Childkey has the + * first chunk in the new leaf we just created. + */ + if (target < childkey) + leaf = buffer2leaf(leafbuf); + else + leaf = buffer2leaf(childbuf); + + ret = add_exception_to_leaf(leaf, target, exception, snapbit, + ps->snapmask); + if (ret) + return -ENOMEM; + + brelse_dirty(ps, leafbuf); + brelse_dirty(ps, childbuf); + + while (levels--) { + unsigned half; + u64 newkey; + struct index_entry *pnext = path[levels].pnext; + struct chunk_buffer *parentbuf = path[levels].buffer; + struct node *parent = buffer2node(parentbuf); + struct chunk_buffer *newbuf; + struct node *newnode; + int csize = ps->snap->chunk_size << SECTOR_SHIFT; + int alloc_per_node = (csize - offsetof(struct node, entries)) + / sizeof(struct index_entry); + + if (parent->count < alloc_per_node) { + insert_child(parent, pnext, childsector, childkey); + set_buffer_dirty(ps, parentbuf); + return 0; + } + /* + * Split the node. + */ + half = parent->count / 2; + newkey = parent->entries[half].key; + newbuf = new_node(ps); + if (!newbuf) + return -ENOMEM; + set_buffer_dirty(ps, newbuf); + newnode = buffer2node(newbuf); + + newnode->count = parent->count - half; + memcpy(&newnode->entries[0], &parent->entries[half], + newnode->count * sizeof(struct index_entry)); + parent->count = half; + /* + * If the path entry is in the new node, use that as the + * parent. + */ + if (pnext > &parent->entries[half]) { + pnext = pnext - &parent->entries[half] + newnode->entries; + set_buffer_dirty(ps, parentbuf); + parentbuf = newbuf; + parent = newnode; + } else set_buffer_dirty(ps, newbuf); + /* + * Insert the child now that we have room in the parent, then + * climb the path and insert the new child there. + */ + insert_child(parent, pnext, childsector, childkey); + set_buffer_dirty(ps, parentbuf); + childkey = newkey; + childsector = newbuf->chunk; + brelse(newbuf); + } + + newrootbuf = new_node(ps); + if (!newrootbuf) + return -ENOMEM; + + newroot = buffer2node(newrootbuf); + + newroot->count = 2; + newroot->entries[0].chunk = ps->root_tree_chunk; + newroot->entries[1].key = childkey; + newroot->entries[1].chunk = childsector; + ps->root_tree_chunk = newrootbuf->chunk; + ps->tree_level++; + ps->header_dirty = 1; + brelse_dirty(ps, newrootbuf); + return 0; +} + +static struct chunk_buffer *probe(struct pstore *ps, u64 chunk, + struct etree_path *path) +{ + unsigned i, levels = ps->tree_level; + struct node *node; + struct chunk_buffer *nodebuf = btbread(ps, ps->root_tree_chunk); + + if (!nodebuf) + return NULL; + node = buffer2node(nodebuf); + + for (i = 0; i < levels; i++) { + struct index_entry *pnext = node->entries, *top = pnext + node->count; + + while (++pnext < top) + if (pnext->key > chunk) + break; + + path[i].buffer = nodebuf; + path[i].pnext = pnext; + nodebuf = btbread(ps, (pnext - 1)->chunk); + if (!nodebuf) + return NULL; + + node = (struct node *)nodebuf->data; + } + BUG_ON(((struct leaf *)nodebuf->data)->magic != 0x1eaf); + return nodebuf; +} + +static int origin_chunk_unique(struct leaf *leaf, u64 chunk, u64 snapmask) +{ + u64 using = 0; + u64 i, target = chunk - leaf->base_chunk; + struct exception const *p; + + for (i = 0; i < leaf->count; i++) + if (leaf->map[i].rchunk == target) + goto found; + return !snapmask; +found: + for (p = emap(leaf, i); p < emap(leaf, i+1); p++) + using |= p->share; + + return !(~using & snapmask); +} + +static int persistent2_prepare(struct exception_store *store, + struct dm_snap_exception *e, int *skip) +{ + struct pstore *ps = get_info(store); + struct chunk_buffer *cb; + struct etree_path path[ps->tree_level + 1]; + chunk_t new_chunk, chunk = e->old_chunk; + int ret; + + cb = probe(ps, chunk, path); + if (!cb) + return 1; + + + ret = origin_chunk_unique(buffer2leaf(cb), chunk, ps->snapmask); + if (ret) { + *skip = 1; + return 1; + } + + new_chunk = persistent2_allocate_chunk(ps); + if (!new_chunk) + return 1; + + ret = add_exception_to_tree(ps, cb, chunk, new_chunk, -1, path, + ps->tree_level); + + e->new_chunk = new_chunk; + atomic_inc(&ps->pending_count); + + return 0; +} + +#define MAX_CHUNK_BUFFERS 128 + +static void persistent2_commit(struct exception_store *store, + struct dm_snap_exception *e, + void (*callback) (void *, int success), + void *callback_context) +{ + int i, r = 0; + struct pstore *ps = get_info(store); + struct commit_callback *cb; + + cb = ps->callbacks + ps->callback_count++; + cb->callback = callback; + cb->context = callback_context; + + if (atomic_dec_and_test(&ps->pending_count) || + (ps->callback_count == ps->exceptions_per_area)) { + struct chunk_buffer *b, *n; + + down_write(&ps->snap->lock); + + list_for_each_entry_safe(b, n, &ps->chunk_buffer_dirty_list, + dirty_list) { + + list_del_init(&b->dirty_list); + list_move_tail(&b->list, &ps->chunk_buffer_list); + + /* todo: can be async */ + chunk_io(ps, b->chunk, WRITE, 1, b->data); + } + + if (ps->header_dirty) + write_header(ps); + + list_for_each_entry_safe(b, n, &ps->chunk_buffer_list, list) { + if (ps->nr_chunk_buffers < MAX_CHUNK_BUFFERS) + break; + + free_chunk_buffer(ps, b); + } + + up_write(&ps->snap->lock); + + for (i = 0; i < ps->callback_count; i++) { + cb = ps->callbacks + i; + cb->callback(cb->context, r == 0 ? 1 : 0); + } + + ps->callback_count = 0; + } +} + +static int snapshot_chunk_unique(struct leaf *leaf, u64 chunk, int snapbit, + chunk_t *exception) +{ + u64 mask = 1LL << snapbit; + unsigned i, target = chunk - leaf->base_chunk; + struct exception const *p; + + for (i = 0; i < leaf->count; i++) + if (leaf->map[i].rchunk == target) + goto found; + return 0; +found: + for (p = emap(leaf, i); p < emap(leaf, i+1); p++) + /* shared if more than one bit set including this one */ + if ((p->share & mask)) { + *exception = p->chunk; + return !(p->share & ~mask); + } + return 0; +} + +static int persistent2_test_exception(struct exception_store *store, + chunk_t old_chunk, chunk_t *new_chunk, + int snapid) +{ + struct pstore *ps = get_info(store); + unsigned levels = ps->tree_level; + struct etree_path path[levels + 1]; + struct chunk_buffer *leafbuf; + + *new_chunk = 0; + + leafbuf = probe(ps, (u64)old_chunk, path); + if (!leafbuf) + return -EIO; + + snapshot_chunk_unique(buffer2leaf(leafbuf), old_chunk, snapid, new_chunk); + + return 0; +} + +static int persistent2_create_snapshot(struct exception_store *store, int snapid) +{ + struct pstore *ps = get_info(store); + int r = 0; + + down_write(&store->snap->lock); + if (test_and_set_bit(snapid, &ps->snapmask)) + r = -EINVAL; + + if (r) + printk("%s %d: %dth snapshot exists.\n", + __FUNCTION__, __LINE__, snapid); + else { + write_header(ps); + printk("%s %d: create %uth snapshot.\n", + __FUNCTION__, __LINE__, snapid); + } + + up_write(&store->snap->lock); + + return r; +} + +static int persistent2_test_snapshot(struct exception_store *store, int snapid) +{ + struct pstore *ps = get_info(store); + int r; + + down_write(&store->snap->lock); + + r = test_bit(snapid, &ps->snapmask); + + up_write(&store->snap->lock); + + return r; +} + +static struct exception_store_operations persistent2_ops = { + .destroy = persistent2_destroy, + .read_metadata = persistent2_read_metadata, + .prepare_exception = persistent2_prepare, + .commit_exception = persistent2_commit, + .drop_snapshot = persistent2_drop, + .test_exception = persistent2_test_exception, + .create_snapshot = persistent2_create_snapshot, + .test_snapshot = persistent2_test_snapshot, +}; + +int dm_create_persistent(struct exception_store *store, int version) { struct pstore *ps; @@ -612,7 +1571,7 @@ int dm_create_persistent(struct exception_store *store) ps->snap = store->snap; ps->valid = 1; - ps->version = SNAPSHOT_DISK_VERSION; + ps->version = version; ps->area = NULL; ps->next_free = 2; /* skipping the header and first area */ ps->current_committed = 0; @@ -628,12 +1587,11 @@ int dm_create_persistent(struct exception_store *store) return -ENOMEM; } - store->destroy = persistent_destroy; - store->read_metadata = persistent_read_metadata; - store->prepare_exception = persistent_prepare; - store->commit_exception = persistent_commit; - store->drop_snapshot = persistent_drop; - store->fraction_full = persistent_fraction_full; + if (version == 1) + store->store_ops = &persistent_ops; + else + store->store_ops = &persistent2_ops; + store->context = ps; return 0; @@ -657,7 +1615,7 @@ static int transient_read_metadata(struct exception_store *store) } static int transient_prepare(struct exception_store *store, - struct dm_snap_exception *e) + struct dm_snap_exception *e, int *skip) { struct transient_c *tc = (struct transient_c *) store->context; sector_t size = get_dev_size(store->snap->cow->bdev); @@ -687,16 +1645,20 @@ static void transient_fraction_full(struct exception_store *store, *denominator = get_dev_size(store->snap->cow->bdev); } +static struct exception_store_operations transient_ops = { + .destroy = transient_destroy, + .read_metadata = transient_read_metadata, + .prepare_exception = transient_prepare, + .commit_exception = transient_commit, + .drop_snapshot = NULL, + .fraction_full = transient_fraction_full, +}; + int dm_create_transient(struct exception_store *store) { struct transient_c *tc; - store->destroy = transient_destroy; - store->read_metadata = transient_read_metadata; - store->prepare_exception = transient_prepare; - store->commit_exception = transient_commit; - store->drop_snapshot = NULL; - store->fraction_full = transient_fraction_full; + store->store_ops = &transient_ops; tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); if (!tc) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 1ba8a47..210ae0d 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -43,6 +43,15 @@ static struct workqueue_struct *ksnapd; static void flush_queued_bios(struct work_struct *work); +struct dm_origin { + struct dm_dev *origin; + + struct dm_snapshot *origin_snap; + + /* should be replaced with a list */ + struct dm_snapshot *snaps[64]; +}; + struct dm_snap_pending_exception { struct dm_snap_exception e; @@ -173,7 +182,6 @@ static int register_snapshot(struct dm_snapshot *snap) down_write(&_origins_lock); o = __lookup_origin(bdev); - if (!o) { /* New origin */ o = kmalloc(sizeof(*o), GFP_KERNEL); @@ -476,6 +484,70 @@ static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg, return 0; } +static int snapshot2_ctr(struct dm_target *ti, char *origin_path, char *snapindex) +{ + struct dm_dev *dev; + struct origin *o; + struct dm_snapshot *snap, *origin_snap = NULL; + struct dm_origin *dmo; + unsigned idx; + int r; + + snap = kzalloc(sizeof(*snap), GFP_KERNEL); + if (!snap) + return -ENOMEM; + + idx = simple_strtoul(snapindex, NULL, 0); + if (idx >= 64) { + r = -EINVAL; + ti->error = "Invalid snapid"; + goto out; + } + + r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &dev); + if (r) { + ti->error = "Cannot get origin device"; + goto out; + } + + r = -EINVAL; + down_read(&_origins_lock); + o = __lookup_origin(dev->bdev); + if (o && !list_empty(&o->snapshots)) { + origin_snap = list_first_entry(&o->snapshots, + struct dm_snapshot, list); + dmo = origin_snap->ti->private; + + if (dmo->snaps[idx]) { + ti->error = "already taken"; + r = -EINVAL; + } else if (!origin_snap->store.store_ops->test_snapshot(&origin_snap->store, idx)) { + ti->error = "non-exist snapshot"; + r = -EINVAL; + } else { + dm_table_get(origin_snap->ti->table); + dmo->snaps[idx] = snap; + ti->private = snap; + snap->snapid = idx; + snap->origin = dev; + snap->ti = origin_snap->ti; + ti->split_io = origin_snap->chunk_size; + snap->chunk_size = origin_snap->chunk_size; + snap->chunk_mask = origin_snap->chunk_mask; + snap->chunk_shift = origin_snap->chunk_shift; + r = 0; + } + } + up_read(&_origins_lock); + + if (r) + dm_put_device(ti, dev); +out: + if (r) + kfree(snap); + return r; +} + /* * Construct a snapshot mapping:

*/ @@ -486,6 +558,11 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) char persistent; char *origin_path; char *cow_path; + int version = 1; + int origin_mode = FMODE_READ; + + if (argc == 2) + return snapshot2_ctr(ti, argv[0], argv[1]); if (argc != 4) { ti->error = "requires exactly 4 arguments"; @@ -503,6 +580,23 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad1; } + if (strlen(argv[2]) > 1) { + version = simple_strtoul(argv[2] + 1, NULL, 0); + + if (version != 2) { + ti->error = "Invalid version"; + r = -EINVAL; + goto bad1; + } + if (persistent != 'P') { + ti->error = "only persistent suppoted"; + r = -EINVAL; + goto bad1; + } + + origin_mode |= FMODE_WRITE; + } + s = kmalloc(sizeof(*s), GFP_KERNEL); if (s == NULL) { ti->error = "Cannot allocate snapshot context private " @@ -511,7 +605,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad1; } - r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin); + r = dm_get_device(ti, origin_path, 0, ti->len, origin_mode, &s->origin); if (r) { ti->error = "Cannot get origin device"; goto bad2; @@ -536,6 +630,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s->last_percent = 0; init_rwsem(&s->lock); spin_lock_init(&s->pe_lock); + s->version = version; s->ti = ti; /* Allocate hash table for COW data */ @@ -548,7 +643,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s->store.snap = s; if (persistent == 'P') - r = dm_create_persistent(&s->store); + r = dm_create_persistent(&s->store, version); else r = dm_create_transient(&s->store); @@ -565,7 +660,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) } /* Metadata must only be loaded into one table at once */ - r = s->store.read_metadata(&s->store); + r = s->store.store_ops->read_metadata(&s->store); if (r < 0) { ti->error = "Failed to read snapshot metadata"; goto bad6; @@ -594,7 +689,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) dm_kcopyd_client_destroy(s->kcopyd_client); bad5: - s->store.destroy(&s->store); + s->store.store_ops->destroy(&s->store); bad4: exit_exception_table(&s->pending, pending_cache); @@ -619,13 +714,28 @@ static void __free_exceptions(struct dm_snapshot *s) exit_exception_table(&s->pending, pending_cache); exit_exception_table(&s->complete, exception_cache); - s->store.destroy(&s->store); + s->store.store_ops->destroy(&s->store); } static void snapshot_dtr(struct dm_target *ti) { struct dm_snapshot *s = ti->private; + if (!s->cow) { + struct dm_origin *dmo = s->ti->private; + + down_write(&_origins_lock); + + dmo->snaps[s->snapid] = NULL; + + up_write(&_origins_lock); + + dm_table_put(s->ti->table); + dm_put_device(ti, s->origin); + kfree(s); + return; + } + flush_workqueue(ksnapd); /* Prevent further origin writes from using this snapshot. */ @@ -694,8 +804,8 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err) else if (err == -ENOMEM) DMERR("Invalidating snapshot: Unable to allocate exception."); - if (s->store.drop_snapshot) - s->store.drop_snapshot(&s->store); + if (s->store.store_ops->drop_snapshot) + s->store.store_ops->drop_snapshot(&s->store); s->valid = 0; @@ -755,6 +865,11 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) goto out; } + if (s->version == 2) { + down_write(&s->lock); + goto out; + } + e = alloc_exception(); if (!e) { down_write(&s->lock); @@ -814,8 +929,8 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) else /* Update the metadata if we are persistent */ - s->store.commit_exception(&s->store, &pe->e, commit_callback, - pe); + s->store.store_ops->commit_exception(&s->store, &pe->e, commit_callback, + pe); } /* @@ -852,7 +967,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) * this. */ static struct dm_snap_pending_exception * -__find_pending_exception(struct dm_snapshot *s, struct bio *bio) +__find_pending_exception(struct dm_snapshot *s, struct bio *bio, int *skip) { struct dm_snap_exception *e; struct dm_snap_pending_exception *pe; @@ -896,7 +1011,7 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) pe->snap = s; pe->started = 0; - if (s->store.prepare_exception(&s->store, &pe->e)) { + if (s->store.store_ops->prepare_exception(&s->store, &pe->e, skip)) { free_pending_exception(pe); return NULL; } @@ -917,17 +1032,58 @@ static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e, (bio->bi_sector & s->chunk_mask); } +static int unified_snapshot_map(struct dm_target *ti, + struct dm_snapshot *s, struct bio *bio, + chunk_t chunk) +{ + struct dm_origin *dmo = s->ti->private; + struct exception_store *store = &dmo->origin_snap->store; + int r = DM_MAPIO_REMAPPED; + + down_write(&dmo->origin_snap->lock); + + /* not yet */ + if (bio_rw(bio) == WRITE) + r = -EIO; + else { + int ret; + chunk_t new_chunk; + + ret = store->store_ops->test_exception(store, chunk, &new_chunk, + s->snapid); + if (ret) + r = -EIO; + else { + if (new_chunk) { + struct dm_snap_exception e; + e.old_chunk = chunk; + e.new_chunk = new_chunk; + remap_exception(dmo->origin_snap, &e, bio, chunk); + } else + bio->bi_bdev = s->origin->bdev; + } + } + + up_write(&dmo->origin_snap->lock); + + return r; +} + static int snapshot_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { struct dm_snap_exception *e; struct dm_snapshot *s = ti->private; int r = DM_MAPIO_REMAPPED; + int skip; chunk_t chunk; struct dm_snap_pending_exception *pe = NULL; chunk = sector_to_chunk(s, bio->bi_sector); + if (!s->cow) + return unified_snapshot_map(ti, s, bio, chunk); + /* Full snapshots are not usable */ /* To get here the table must be live so s->active is always set. */ if (!s->valid) @@ -955,7 +1111,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, * writeable. */ if (bio_rw(bio) == WRITE) { - pe = __find_pending_exception(s, bio); + pe = __find_pending_exception(s, bio, &skip); if (!pe) { __invalidate_snapshot(s, -ENOMEM); r = -EIO; @@ -993,6 +1149,9 @@ static void snapshot_resume(struct dm_target *ti) { struct dm_snapshot *s = ti->private; + if (!s->cow) + return; + down_write(&s->lock); s->active = 1; up_write(&s->lock); @@ -1005,12 +1164,15 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: + if (!snap->cow) + break; + if (!snap->valid) snprintf(result, maxlen, "Invalid"); else { - if (snap->store.fraction_full) { + if (snap->store.store_ops->fraction_full) { sector_t numerator, denominator; - snap->store.fraction_full(&snap->store, + snap->store.store_ops->fraction_full(&snap->store, &numerator, &denominator); snprintf(result, maxlen, "%llu/%llu", @@ -1028,10 +1190,14 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, * to make private copies if the output is to * make sense. */ - snprintf(result, maxlen, "%s %s %c %llu", - snap->origin->name, snap->cow->name, - snap->type, - (unsigned long long)snap->chunk_size); + if (snap->cow) + snprintf(result, maxlen, "%s %s %c %llu", + snap->origin->name, snap->cow->name, + snap->type, + (unsigned long long)snap->chunk_size); + else + snprintf(result, maxlen, "%s %d", + snap->origin->name, snap->snapid); break; } @@ -1044,6 +1210,7 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, static int __origin_write(struct list_head *snapshots, struct bio *bio) { int r = DM_MAPIO_REMAPPED, first = 0; + int skip; struct dm_snapshot *snap; struct dm_snap_exception *e; struct dm_snap_pending_exception *pe, *next_pe, *primary_pe = NULL; @@ -1081,9 +1248,11 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) if (e) goto next_snapshot; - pe = __find_pending_exception(snap, bio); + skip = 0; + pe = __find_pending_exception(snap, bio, &skip); if (!pe) { - __invalidate_snapshot(snap, -ENOMEM); + if (!skip) + __invalidate_snapshot(snap, -ENOMEM); goto next_snapshot; } @@ -1174,33 +1343,78 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) { int r; struct dm_dev *dev; + struct dm_origin *dmo; - if (argc != 1) { + if (argc != 4 && argc != 1) { ti->error = "origin: incorrect number of arguments"; return -EINVAL; } + dmo = kzalloc(sizeof(*dmo), GFP_KERNEL); + if (!dmo) + return -ENOMEM; + + if (argc == 4) { + struct dm_snapshot *s; + + r = snapshot_ctr(ti, argc, argv); + if (r) + goto out; + + s = ti->private; + + if (s->chunk_size != 16) { + r = -EINVAL; + ti->error = "invalid chunk size"; + snapshot_dtr(ti); + goto out; + } + + /* fixme */ + s->active = 1; + + dmo->origin = s->origin; + dmo->origin_snap = s; + ti->private = dmo; + + return 0; + } + r = dm_get_device(ti, argv[0], 0, ti->len, dm_table_get_mode(ti->table), &dev); if (r) { ti->error = "Cannot get target device"; - return r; + goto out; } - ti->private = dev; + dmo->origin = dev; + ti->private = dmo; return 0; +out: + kfree(dmo); + return r; } static void origin_dtr(struct dm_target *ti) { - struct dm_dev *dev = ti->private; - dm_put_device(ti, dev); + struct dm_origin *dmo = ti->private; + + if (dmo->origin_snap) { + ti->private = dmo->origin_snap; + snapshot_dtr(ti); + kfree(dmo); + return; + } + + dm_put_device(ti, dmo->origin); + kfree(dmo); } static int origin_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { - struct dm_dev *dev = ti->private; + struct dm_origin *dmo = ti->private; + struct dm_dev *dev = dmo->origin; bio->bi_bdev = dev->bdev; /* Only tell snapshots if this is a write */ @@ -1215,11 +1429,16 @@ static int origin_map(struct dm_target *ti, struct bio *bio, */ static void origin_resume(struct dm_target *ti) { - struct dm_dev *dev = ti->private; + struct dm_origin *dmo = ti->private; + struct dm_dev *dev = dmo->origin; struct dm_snapshot *snap; struct origin *o; chunk_t chunk_size = 0; + /* todo */ + if (dmo->origin_snap) + return; + down_read(&_origins_lock); o = __lookup_origin(dev->bdev); if (o) @@ -1233,21 +1452,71 @@ static void origin_resume(struct dm_target *ti) static int origin_status(struct dm_target *ti, status_type_t type, char *result, unsigned int maxlen) { - struct dm_dev *dev = ti->private; + struct dm_origin *dmo = ti->private; + struct dm_dev *dev = dmo->origin; + struct dm_snapshot *snap = dmo->origin_snap; + int r, i, cnt; switch (type) { case STATUSTYPE_INFO: - result[0] = '\0'; + if (dmo->origin_snap) { + for (i = 0, cnt = 0; maxlen && i < MAX_NR_SNAPS; i++) { + r = snap->store.store_ops->test_snapshot(&snap->store, i); + if (r) { + r = snprintf(result, maxlen, "%d ", i); + result += r; + maxlen -= min_t(int, r, maxlen); + cnt++; + } + } + if (!cnt) + snprintf(result, maxlen, ": no snapshot"); + } else + result[0] = '\0'; break; case STATUSTYPE_TABLE: - snprintf(result, maxlen, "%s", dev->name); + r = snprintf(result, maxlen, "%s", dev->name); + if (dmo->origin_snap && r > 0 && r < maxlen) + snprintf(result + r, maxlen - r, "%s %s %llu", + snap->origin->name, snap->cow->name, + (unsigned long long)snap->chunk_size); break; } return 0; } +#define MSG_STR(x) x, sizeof(x) + +static int origin_message(struct dm_target *ti, unsigned argc, char **argv) +{ + struct dm_origin *dmo = ti->private; + struct dm_snapshot *snap = dmo->origin_snap; + unsigned idx; + int ret = -EINVAL; + + if (!snap) + return ret; + + if (argc != 3) + return ret; + + if (strnicmp(argv[0], MSG_STR("snapshot"))) + return ret; + + if (!strnicmp(argv[1], MSG_STR("create"))) { + idx = simple_strtoul(argv[2], NULL, 0); + + ret = snap->store.store_ops->create_snapshot(&snap->store, + idx); + } else if (!strnicmp(argv[1], MSG_STR("remove"))) + printk("%s %d: removing snapshots is not supported yet.\n", + __FUNCTION__, __LINE__); + + return ret; +} + static struct target_type origin_target = { .name = "snapshot-origin", .version = {1, 6, 0}, @@ -1257,6 +1526,7 @@ static struct target_type origin_target = { .map = origin_map, .resume = origin_resume, .status = origin_status, + .message = origin_message, }; static struct target_type snapshot_target = { diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index 24f9fb7..432f2de 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h @@ -14,6 +14,8 @@ #include #include +#define MAX_NR_SNAPS 64 + struct exception_table { uint32_t hash_mask; unsigned hash_shift; @@ -83,12 +85,9 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) # endif -/* - * Abstraction to handle the meta/layout of exception stores (the - * COW device). - */ -struct exception_store { +struct exception_store; +struct exception_store_operations { /* * Destroys this object when you've finished with it. */ @@ -104,7 +103,7 @@ struct exception_store { * Find somewhere to store the next exception. */ int (*prepare_exception) (struct exception_store *store, - struct dm_snap_exception *e); + struct dm_snap_exception *e, int *skip); /* * Update the metadata with this exception. @@ -126,6 +125,19 @@ struct exception_store { sector_t *numerator, sector_t *denominator); + int (*test_exception)(struct exception_store *store, + chunk_t old_chunk, chunk_t *new_chunk, int snapid); + + int (*create_snapshot)(struct exception_store *store, int snapid); + int (*test_snapshot)(struct exception_store *store, int snapid); +}; + +/* + * Abstraction to handle the meta/layout of exception stores (the + * COW device). + */ +struct exception_store { + struct exception_store_operations *store_ops; struct dm_snapshot *snap; void *context; }; @@ -174,6 +186,9 @@ struct dm_snapshot { /* Queue of snapshot writes for ksnapd to flush */ struct bio_list queued_bios; struct work_struct queued_bios_work; + + int snapid; + int version; }; /* @@ -186,7 +201,7 @@ int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new); * Constructor and destructor for the default persistent * store. */ -int dm_create_persistent(struct exception_store *store); +int dm_create_persistent(struct exception_store *store, int version); int dm_create_transient(struct exception_store *store); -- 1.5.5.GIT