From ecashin@coraid.com Tue Jan 3 13:19:50 2006 From: "Ed L. Cashin" CC: ecashin@coraid.com, Greg K-H Subject: aoe: support dynamic resizing of AoE devices Date: Tue, 03 Jan 2006 16:07:41 -0500 Message-ID: <87irt110ma.fsf@coraid.com> Allow the driver to recognize AoE devices that have changed size. Devices not in use are updated automatically, and devices that are in use are updated at user request. Signed-off-by: "Ed L. Cashin" Signed-off-by: Greg Kroah-Hartman --- Documentation/aoe/mkdevs.sh | 2 Documentation/aoe/udev.txt | 1 drivers/block/aoe/aoe.h | 13 +++-- drivers/block/aoe/aoeblk.c | 22 +++----- drivers/block/aoe/aoechr.c | 37 ++++++++++++++ drivers/block/aoe/aoecmd.c | 113 +++++++++++++++++++++++++++++++++++--------- drivers/block/aoe/aoedev.c | 50 ++++++++++++------- 7 files changed, 179 insertions(+), 59 deletions(-) --- gregkh-2.6.orig/Documentation/aoe/udev.txt +++ gregkh-2.6/Documentation/aoe/udev.txt @@ -18,6 +18,7 @@ SUBSYSTEM="aoe", KERNEL="discover", NAME="etherd/%k", GROUP="disk", MODE="0220" SUBSYSTEM="aoe", KERNEL="err", NAME="etherd/%k", GROUP="disk", MODE="0440" SUBSYSTEM="aoe", KERNEL="interfaces", NAME="etherd/%k", GROUP="disk", MODE="0220" +SUBSYSTEM="aoe", KERNEL="revalidate", NAME="etherd/%k", GROUP="disk", MODE="0220" # aoe block devices KERNEL="etherd*", NAME="%k", GROUP="disk" --- gregkh-2.6.orig/drivers/block/aoe/aoeblk.c +++ gregkh-2.6/drivers/block/aoe/aoeblk.c @@ -22,7 +22,9 @@ static ssize_t aoedisk_show_state(struct return snprintf(page, PAGE_SIZE, "%s%s\n", (d->flags & DEVFL_UP) ? "up" : "down", - (d->flags & DEVFL_CLOSEWAIT) ? ",closewait" : ""); + (d->flags & DEVFL_PAUSE) ? ",paused" : + (d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : ""); + /* I'd rather see nopen exported so we can ditch closewait */ } static ssize_t aoedisk_show_mac(struct gendisk * disk, char *page) { @@ -107,8 +109,7 @@ aoeblk_release(struct inode *inode, stru spin_lock_irqsave(&d->lock, flags); - if (--d->nopen == 0 && (d->flags & DEVFL_CLOSEWAIT)) { - d->flags &= ~DEVFL_CLOSEWAIT; + if (--d->nopen == 0 && !(d->flags & DEVFL_UP)) { spin_unlock_irqrestore(&d->lock, flags); aoecmd_cfg(d->aoemajor, d->aoeminor); return 0; @@ -158,14 +159,14 @@ aoeblk_make_request(request_queue_t *q, } list_add_tail(&buf->bufs, &d->bufq); - aoecmd_work(d); + aoecmd_work(d); sl = d->sendq_hd; d->sendq_hd = d->sendq_tl = NULL; spin_unlock_irqrestore(&d->lock, flags); - aoenet_xmit(sl); + return 0; } @@ -205,7 +206,7 @@ aoeblk_gdalloc(void *vp) printk(KERN_ERR "aoe: aoeblk_gdalloc: cannot allocate disk " "structure for %ld.%ld\n", d->aoemajor, d->aoeminor); spin_lock_irqsave(&d->lock, flags); - d->flags &= ~DEVFL_WORKON; + d->flags &= ~DEVFL_GDALLOC; spin_unlock_irqrestore(&d->lock, flags); return; } @@ -218,7 +219,7 @@ aoeblk_gdalloc(void *vp) "for %ld.%ld\n", d->aoemajor, d->aoeminor); put_disk(gd); spin_lock_irqsave(&d->lock, flags); - d->flags &= ~DEVFL_WORKON; + d->flags &= ~DEVFL_GDALLOC; spin_unlock_irqrestore(&d->lock, flags); return; } @@ -235,18 +236,13 @@ aoeblk_gdalloc(void *vp) gd->queue = &d->blkq; d->gd = gd; - d->flags &= ~DEVFL_WORKON; + d->flags &= ~DEVFL_GDALLOC; d->flags |= DEVFL_UP; spin_unlock_irqrestore(&d->lock, flags); add_disk(gd); aoedisk_add_sysfs(d); - - printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu " - "sectors\n", (unsigned long long)mac_addr(d->addr), - d->aoemajor, d->aoeminor, - d->fw_ver, (long long)d->ssize); } void --- gregkh-2.6.orig/drivers/block/aoe/aoedev.c +++ gregkh-2.6/drivers/block/aoe/aoedev.c @@ -12,6 +12,24 @@ static struct aoedev *devlist; static spinlock_t devlist_lock; +int +aoedev_isbusy(struct aoedev *d) +{ + struct frame *f, *e; + + f = d->frames; + e = f + d->nframes; + do { + if (f->tag != FREETAG) { + printk(KERN_DEBUG "aoe: %ld.%ld isbusy\n", + d->aoemajor, d->aoeminor); + return 1; + } + } while (++f < e); + + return 0; +} + struct aoedev * aoedev_by_aoeaddr(int maj, int min) { @@ -44,6 +62,8 @@ aoedev_newdev(unsigned long nframes) return NULL; } + INIT_WORK(&d->work, aoecmd_sleepwork, d); + d->nframes = nframes; d->frames = f; e = f + nframes; @@ -92,17 +112,15 @@ aoedev_downdev(struct aoedev *d) bio_endio(bio, bio->bi_size, -EIO); } - if (d->nopen) - d->flags |= DEVFL_CLOSEWAIT; if (d->gd) d->gd->capacity = 0; - d->flags &= ~DEVFL_UP; + d->flags &= ~(DEVFL_UP | DEVFL_PAUSE); } +/* find it or malloc it */ struct aoedev * -aoedev_set(unsigned long sysminor, unsigned char *addr, - struct net_device *ifp, unsigned long bufcnt) +aoedev_by_sysminor_m(ulong sysminor, ulong bufcnt) { struct aoedev *d; unsigned long flags; @@ -113,25 +131,19 @@ aoedev_set(unsigned long sysminor, unsig if (d->sysminor == sysminor) break; - if (d == NULL && (d = aoedev_newdev(bufcnt)) == NULL) { - spin_unlock_irqrestore(&devlist_lock, flags); - printk(KERN_INFO "aoe: aoedev_set: aoedev_newdev failure.\n"); - return NULL; - } /* if newdev, (d->flags & DEVFL_UP) == 0 for below */ - - spin_unlock_irqrestore(&devlist_lock, flags); - spin_lock_irqsave(&d->lock, flags); - - d->ifp = ifp; - memcpy(d->addr, addr, sizeof d->addr); - if ((d->flags & DEVFL_UP) == 0) { - aoedev_downdev(d); /* flushes outstanding frames */ + if (d == NULL) { + d = aoedev_newdev(bufcnt); + if (d == NULL) { + spin_unlock_irqrestore(&devlist_lock, flags); + printk(KERN_INFO "aoe: aoedev_set: aoedev_newdev failure.\n"); + return NULL; + } d->sysminor = sysminor; d->aoemajor = AOEMAJOR(sysminor); d->aoeminor = AOEMINOR(sysminor); } - spin_unlock_irqrestore(&d->lock, flags); + spin_unlock_irqrestore(&devlist_lock, flags); return d; } --- gregkh-2.6.orig/Documentation/aoe/mkdevs.sh +++ gregkh-2.6/Documentation/aoe/mkdevs.sh @@ -27,6 +27,8 @@ rm -f $dir/discover mknod -m 0200 $dir/discover c $MAJOR 3 rm -f $dir/interfaces mknod -m 0200 $dir/interfaces c $MAJOR 4 +rm -f $dir/revalidate +mknod -m 0200 $dir/revalidate c $MAJOR 5 export n_partitions mkshelf=`echo $0 | sed 's!mkdevs!mkshelf!'` --- gregkh-2.6.orig/drivers/block/aoe/aoecmd.c +++ gregkh-2.6/drivers/block/aoe/aoecmd.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "aoe.h" @@ -195,6 +196,14 @@ aoecmd_work(struct aoedev *d) { struct frame *f; struct buf *buf; + + if (d->flags & DEVFL_PAUSE) { + if (!aoedev_isbusy(d)) + d->sendq_hd = aoecmd_cfg_pkts(d->aoemajor, + d->aoeminor, &d->sendq_tl); + return; + } + loop: f = getframe(d, FREETAG); if (f == NULL) @@ -306,6 +315,38 @@ tdie: spin_unlock_irqrestore(&d->lock, aoenet_xmit(sl); } +/* this function performs work that has been deferred until sleeping is OK + */ +void +aoecmd_sleepwork(void *vp) +{ + struct aoedev *d = (struct aoedev *) vp; + + if (d->flags & DEVFL_GDALLOC) + aoeblk_gdalloc(d); + + if (d->flags & DEVFL_NEWSIZE) { + struct block_device *bd; + unsigned long flags; + u64 ssize; + + ssize = d->gd->capacity; + bd = bdget_disk(d->gd, 0); + + if (bd) { + mutex_lock(&bd->bd_inode->i_mutex); + i_size_write(bd->bd_inode, (loff_t)ssize<<9); + mutex_unlock(&bd->bd_inode->i_mutex); +//?? rescan_partitions(d->gd, bd); + bdput(bd); + } + spin_lock_irqsave(&d->lock, flags); + d->flags |= DEVFL_UP; + d->flags &= ~DEVFL_NEWSIZE; + spin_unlock_irqrestore(&d->lock, flags); + } +} + static void ataid_complete(struct aoedev *d, unsigned char *id) { @@ -340,21 +381,29 @@ ataid_complete(struct aoedev *d, unsigne d->geo.heads = le16_to_cpu(get_unaligned((__le16 *) &id[55<<1])); d->geo.sectors = le16_to_cpu(get_unaligned((__le16 *) &id[56<<1])); } + + if (d->ssize != ssize) + printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu " + "sectors\n", (unsigned long long)mac_addr(d->addr), + d->aoemajor, d->aoeminor, + d->fw_ver, (long long)ssize); d->ssize = ssize; d->geo.start = 0; if (d->gd != NULL) { d->gd->capacity = ssize; - d->flags |= DEVFL_UP; - return; - } - if (d->flags & DEVFL_WORKON) { - printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! " - "(This really shouldn't happen).\n"); - return; + d->flags |= DEVFL_NEWSIZE; + } else { + if (d->flags & DEVFL_GDALLOC) { + printk(KERN_INFO "aoe: %s: %s e%lu.%lu, %s\n", + __FUNCTION__, + "can't schedule work for", + d->aoemajor, d->aoeminor, + "it's already on! (This really shouldn't happen).\n"); + return; + } + d->flags |= DEVFL_GDALLOC; } - INIT_WORK(&d->work, aoeblk_gdalloc, d); schedule_work(&d->work); - d->flags |= DEVFL_WORKON; } static void @@ -452,7 +501,7 @@ aoecmd_ata_rsp(struct sk_buff *skb) return; } ataid_complete(d, (char *) (ahin+1)); - /* d->flags |= DEVFL_WC_UPDATE; */ + d->flags &= ~DEVFL_PAUSE; break; default: printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized " @@ -485,24 +534,25 @@ aoecmd_ata_rsp(struct sk_buff *skb) f->tag = FREETAG; aoecmd_work(d); - sl = d->sendq_hd; d->sendq_hd = d->sendq_tl = NULL; spin_unlock_irqrestore(&d->lock, flags); - aoenet_xmit(sl); } -void -aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) +/* some callers cannot sleep, and they can call this function, + * transmitting the packets later, when interrupts are on + */ +struct sk_buff * +aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail) { struct aoe_hdr *h; struct aoe_cfghdr *ch; - struct sk_buff *skb, *sl; + struct sk_buff *skb, *sl, *sl_tail; struct net_device *ifp; - sl = NULL; + sl = sl_tail = NULL; read_lock(&dev_base_lock); for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) { @@ -515,6 +565,8 @@ aoecmd_cfg(ushort aoemajor, unsigned cha printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n"); continue; } + if (sl_tail == NULL) + sl_tail = skb; h = (struct aoe_hdr *) skb->mac.raw; memset(h, 0, sizeof *h + sizeof *ch); @@ -531,6 +583,18 @@ aoecmd_cfg(ushort aoemajor, unsigned cha } read_unlock(&dev_base_lock); + if (tail != NULL) + *tail = sl_tail; + return sl; +} + +void +aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) +{ + struct sk_buff *sl; + + sl = aoecmd_cfg_pkts(aoemajor, aoeminor, NULL); + aoenet_xmit(sl); } @@ -619,23 +683,28 @@ aoecmd_cfg_rsp(struct sk_buff *skb) if (bufcnt > MAXFRAMES) /* keep it reasonable */ bufcnt = MAXFRAMES; - d = aoedev_set(sysminor, h->src, skb->dev, bufcnt); + d = aoedev_by_sysminor_m(sysminor, bufcnt); if (d == NULL) { - printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n"); + printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device sysminor_m failure\n"); return; } spin_lock_irqsave(&d->lock, flags); - if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) { + /* permit device to migrate mac and network interface */ + d->ifp = skb->dev; + memcpy(d->addr, h->src, sizeof d->addr); + + /* don't change users' perspective */ + if (d->nopen && !(d->flags & DEVFL_PAUSE)) { spin_unlock_irqrestore(&d->lock, flags); return; } - + d->flags |= DEVFL_PAUSE; /* force pause */ d->fw_ver = be16_to_cpu(ch->fwver); - /* we get here only if the device is new */ - sl = aoecmd_ata_id(d); + /* check for already outstanding ataid */ + sl = aoedev_isbusy(d) == 0 ? aoecmd_ata_id(d) : NULL; spin_unlock_irqrestore(&d->lock, flags); --- gregkh-2.6.orig/drivers/block/aoe/aoe.h +++ gregkh-2.6/drivers/block/aoe/aoe.h @@ -75,8 +75,9 @@ enum { DEVFL_TKILL = (1<<1), /* flag for timer to know when to kill self */ DEVFL_EXT = (1<<2), /* device accepts lba48 commands */ DEVFL_CLOSEWAIT = (1<<3), /* device is waiting for all closes to revalidate */ - DEVFL_WC_UPDATE = (1<<4), /* this device needs to update write cache status */ - DEVFL_WORKON = (1<<4), + DEVFL_GDALLOC = (1<<4), /* need to alloc gendisk */ + DEVFL_PAUSE = (1<<5), + DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */ BUFFL_FAIL = 1, }; @@ -152,16 +153,18 @@ void aoechr_exit(void); void aoechr_error(char *); void aoecmd_work(struct aoedev *d); -void aoecmd_cfg(ushort, unsigned char); +void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor); +struct sk_buff *aoecmd_cfg_pkts(ushort, unsigned char, struct sk_buff **); void aoecmd_ata_rsp(struct sk_buff *); void aoecmd_cfg_rsp(struct sk_buff *); +void aoecmd_sleepwork(void *vp); int aoedev_init(void); void aoedev_exit(void); struct aoedev *aoedev_by_aoeaddr(int maj, int min); +struct aoedev *aoedev_by_sysminor_m(ulong sysminor, ulong bufcnt); void aoedev_downdev(struct aoedev *d); -struct aoedev *aoedev_set(unsigned long, unsigned char *, struct net_device *, unsigned long); -int aoedev_busy(void); +int aoedev_isbusy(struct aoedev *d); int aoenet_init(void); void aoenet_exit(void); --- gregkh-2.6.orig/drivers/block/aoe/aoechr.c +++ gregkh-2.6/drivers/block/aoe/aoechr.c @@ -13,6 +13,7 @@ enum { MINOR_ERR = 2, MINOR_DISCOVER, MINOR_INTERFACES, + MINOR_REVALIDATE, MSGSZ = 2048, NARGS = 10, NMSG = 100, /* message backlog to retain */ @@ -41,6 +42,7 @@ static struct aoe_chardev chardevs[] = { { MINOR_ERR, "err" }, { MINOR_DISCOVER, "discover" }, { MINOR_INTERFACES, "interfaces" }, + { MINOR_REVALIDATE, "revalidate" }, }; static int @@ -62,6 +64,39 @@ interfaces(const char __user *str, size_ return 0; } +static int +revalidate(const char __user *str, size_t size) +{ + int major, minor, n; + ulong flags; + struct aoedev *d; + char buf[16]; + + if (size >= sizeof buf) + return -EINVAL; + buf[sizeof buf - 1] = '\0'; + if (copy_from_user(buf, str, size)) + return -EFAULT; + + /* should be e%d.%d format */ + n = sscanf(buf, "e%d.%d", &major, &minor); + if (n != 2) { + printk(KERN_ERR "aoe: %s: invalid device specification\n", + __FUNCTION__); + return -EINVAL; + } + d = aoedev_by_aoeaddr(major, minor); + if (!d) + return -EINVAL; + + spin_lock_irqsave(&d->lock, flags); + d->flags |= DEVFL_PAUSE; + spin_unlock_irqrestore(&d->lock, flags); + aoecmd_cfg(major, minor); + + return 0; +} + void aoechr_error(char *msg) { @@ -114,6 +149,8 @@ aoechr_write(struct file *filp, const ch case MINOR_INTERFACES: ret = interfaces(buf, cnt); break; + case MINOR_REVALIDATE: + ret = revalidate(buf, cnt); } if (ret == 0) ret = cnt;