linux/drivers/block/aoe/aoedev.c
<<
>>
Prefs
   1/* Copyright (c) 2012 Coraid, Inc.  See COPYING for GPL terms. */
   2/*
   3 * aoedev.c
   4 * AoE device utility functions; maintains device list.
   5 */
   6
   7#include <linux/hdreg.h>
   8#include <linux/blkdev.h>
   9#include <linux/netdevice.h>
  10#include <linux/delay.h>
  11#include <linux/slab.h>
  12#include <linux/bitmap.h>
  13#include <linux/kdev_t.h>
  14#include <linux/moduleparam.h>
  15#include "aoe.h"
  16
  17static void dummy_timer(ulong);
  18static void aoedev_freedev(struct aoedev *);
  19static void freetgt(struct aoedev *d, struct aoetgt *t);
  20static void skbpoolfree(struct aoedev *d);
  21
  22static int aoe_dyndevs = 1;
  23module_param(aoe_dyndevs, int, 0644);
  24MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices.");
  25
  26static struct aoedev *devlist;
  27static DEFINE_SPINLOCK(devlist_lock);
  28
  29/* Because some systems will have one, many, or no
  30 *   - partitions,
  31 *   - slots per shelf,
  32 *   - or shelves,
  33 * we need some flexibility in the way the minor numbers
  34 * are allocated.  So they are dynamic.
  35 */
  36#define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
  37
  38static DEFINE_SPINLOCK(used_minors_lock);
  39static DECLARE_BITMAP(used_minors, N_DEVS);
  40
  41static int
  42minor_get_dyn(ulong *sysminor)
  43{
  44        ulong flags;
  45        ulong n;
  46        int error = 0;
  47
  48        spin_lock_irqsave(&used_minors_lock, flags);
  49        n = find_first_zero_bit(used_minors, N_DEVS);
  50        if (n < N_DEVS)
  51                set_bit(n, used_minors);
  52        else
  53                error = -1;
  54        spin_unlock_irqrestore(&used_minors_lock, flags);
  55
  56        *sysminor = n * AOE_PARTITIONS;
  57        return error;
  58}
  59
  60static int
  61minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
  62{
  63        ulong flags;
  64        ulong n;
  65        int error = 0;
  66        enum {
  67                /* for backwards compatibility when !aoe_dyndevs,
  68                 * a static number of supported slots per shelf */
  69                NPERSHELF = 16,
  70        };
  71
  72        n = aoemaj * NPERSHELF + aoemin;
  73        if (aoemin >= NPERSHELF || n >= N_DEVS) {
  74                pr_err("aoe: %s with e%ld.%d\n",
  75                        "cannot use static minor device numbers",
  76                        aoemaj, aoemin);
  77                error = -1;
  78        } else {
  79                spin_lock_irqsave(&used_minors_lock, flags);
  80                if (test_bit(n, used_minors)) {
  81                        pr_err("aoe: %s %lu\n",
  82                                "existing device already has static minor number",
  83                                n);
  84                        error = -1;
  85                } else
  86                        set_bit(n, used_minors);
  87                spin_unlock_irqrestore(&used_minors_lock, flags);
  88        }
  89
  90        *sysminor = n;
  91        return error;
  92}
  93
  94static int
  95minor_get(ulong *sysminor, ulong aoemaj, int aoemin)
  96{
  97        if (aoe_dyndevs)
  98                return minor_get_dyn(sysminor);
  99        else
 100                return minor_get_static(sysminor, aoemaj, aoemin);
 101}
 102
 103static void
 104minor_free(ulong minor)
 105{
 106        ulong flags;
 107
 108        minor /= AOE_PARTITIONS;
 109        BUG_ON(minor >= N_DEVS);
 110
 111        spin_lock_irqsave(&used_minors_lock, flags);
 112        BUG_ON(!test_bit(minor, used_minors));
 113        clear_bit(minor, used_minors);
 114        spin_unlock_irqrestore(&used_minors_lock, flags);
 115}
 116
 117/*
 118 * Users who grab a pointer to the device with aoedev_by_aoeaddr
 119 * automatically get a reference count and must be responsible
 120 * for performing a aoedev_put.  With the addition of async
 121 * kthread processing I'm no longer confident that we can
 122 * guarantee consistency in the face of device flushes.
 123 *
 124 * For the time being, we only bother to add extra references for
 125 * frames sitting on the iocq.  When the kthreads finish processing
 126 * these frames, they will aoedev_put the device.
 127 */
 128
 129void
 130aoedev_put(struct aoedev *d)
 131{
 132        ulong flags;
 133
 134        spin_lock_irqsave(&devlist_lock, flags);
 135        d->ref--;
 136        spin_unlock_irqrestore(&devlist_lock, flags);
 137}
 138
 139static void
 140dummy_timer(ulong vp)
 141{
 142        struct aoedev *d;
 143
 144        d = (struct aoedev *)vp;
 145        if (d->flags & DEVFL_TKILL)
 146                return;
 147        d->timer.expires = jiffies + HZ;
 148        add_timer(&d->timer);
 149}
 150
 151static void
 152aoe_failip(struct aoedev *d)
 153{
 154        struct request *rq;
 155        struct bio *bio;
 156        unsigned long n;
 157
 158        aoe_failbuf(d, d->ip.buf);
 159
 160        rq = d->ip.rq;
 161        if (rq == NULL)
 162                return;
 163        while ((bio = d->ip.nxbio)) {
 164                clear_bit(BIO_UPTODATE, &bio->bi_flags);
 165                d->ip.nxbio = bio->bi_next;
 166                n = (unsigned long) rq->special;
 167                rq->special = (void *) --n;
 168        }
 169        if ((unsigned long) rq->special == 0)
 170                aoe_end_request(d, rq, 0);
 171}
 172
 173void
 174aoedev_downdev(struct aoedev *d)
 175{
 176        struct aoetgt *t, **tt, **te;
 177        struct frame *f;
 178        struct list_head *head, *pos, *nx;
 179        struct request *rq;
 180        int i;
 181
 182        d->flags &= ~DEVFL_UP;
 183
 184        /* clean out active buffers */
 185        for (i = 0; i < NFACTIVE; i++) {
 186                head = &d->factive[i];
 187                list_for_each_safe(pos, nx, head) {
 188                        f = list_entry(pos, struct frame, head);
 189                        list_del(pos);
 190                        if (f->buf) {
 191                                f->buf->nframesout--;
 192                                aoe_failbuf(d, f->buf);
 193                        }
 194                        aoe_freetframe(f);
 195                }
 196        }
 197        /* reset window dressings */
 198        tt = d->targets;
 199        te = tt + NTARGETS;
 200        for (; tt < te && (t = *tt); tt++) {
 201                t->maxout = t->nframes;
 202                t->nout = 0;
 203        }
 204
 205        /* clean out the in-process request (if any) */
 206        aoe_failip(d);
 207        d->htgt = NULL;
 208
 209        /* fast fail all pending I/O */
 210        if (d->blkq) {
 211                while ((rq = blk_peek_request(d->blkq))) {
 212                        blk_start_request(rq);
 213                        aoe_end_request(d, rq, 1);
 214                }
 215        }
 216
 217        if (d->gd)
 218                set_capacity(d->gd, 0);
 219}
 220
 221static void
 222aoedev_freedev(struct aoedev *d)
 223{
 224        struct aoetgt **t, **e;
 225
 226        cancel_work_sync(&d->work);
 227        if (d->gd) {
 228                aoedisk_rm_sysfs(d);
 229                del_gendisk(d->gd);
 230                put_disk(d->gd);
 231                blk_cleanup_queue(d->blkq);
 232        }
 233        t = d->targets;
 234        e = t + NTARGETS;
 235        for (; t < e && *t; t++)
 236                freetgt(d, *t);
 237        if (d->bufpool)
 238                mempool_destroy(d->bufpool);
 239        skbpoolfree(d);
 240        minor_free(d->sysminor);
 241        kfree(d);
 242}
 243
 244int
 245aoedev_flush(const char __user *str, size_t cnt)
 246{
 247        ulong flags;
 248        struct aoedev *d, **dd;
 249        struct aoedev *rmd = NULL;
 250        char buf[16];
 251        int all = 0;
 252
 253        if (cnt >= 3) {
 254                if (cnt > sizeof buf)
 255                        cnt = sizeof buf;
 256                if (copy_from_user(buf, str, cnt))
 257                        return -EFAULT;
 258                all = !strncmp(buf, "all", 3);
 259        }
 260
 261        spin_lock_irqsave(&devlist_lock, flags);
 262        dd = &devlist;
 263        while ((d = *dd)) {
 264                spin_lock(&d->lock);
 265                if ((!all && (d->flags & DEVFL_UP))
 266                || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
 267                || d->nopen
 268                || d->ref) {
 269                        spin_unlock(&d->lock);
 270                        dd = &d->next;
 271                        continue;
 272                }
 273                *dd = d->next;
 274                aoedev_downdev(d);
 275                d->flags |= DEVFL_TKILL;
 276                spin_unlock(&d->lock);
 277                d->next = rmd;
 278                rmd = d;
 279        }
 280        spin_unlock_irqrestore(&devlist_lock, flags);
 281        while ((d = rmd)) {
 282                rmd = d->next;
 283                del_timer_sync(&d->timer);
 284                aoedev_freedev(d);      /* must be able to sleep */
 285        }
 286        return 0;
 287}
 288
 289/* This has been confirmed to occur once with Tms=3*1000 due to the
 290 * driver changing link and not processing its transmit ring.  The
 291 * problem is hard enough to solve by returning an error that I'm
 292 * still punting on "solving" this.
 293 */
 294static void
 295skbfree(struct sk_buff *skb)
 296{
 297        enum { Sms = 250, Tms = 30 * 1000};
 298        int i = Tms / Sms;
 299
 300        if (skb == NULL)
 301                return;
 302        while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
 303                msleep(Sms);
 304        if (i < 0) {
 305                printk(KERN_ERR
 306                        "aoe: %s holds ref: %s\n",
 307                        skb->dev ? skb->dev->name : "netif",
 308                        "cannot free skb -- memory leaked.");
 309                return;
 310        }
 311        skb->truesize -= skb->data_len;
 312        skb_shinfo(skb)->nr_frags = skb->data_len = 0;
 313        skb_trim(skb, 0);
 314        dev_kfree_skb(skb);
 315}
 316
 317static void
 318skbpoolfree(struct aoedev *d)
 319{
 320        struct sk_buff *skb, *tmp;
 321
 322        skb_queue_walk_safe(&d->skbpool, skb, tmp)
 323                skbfree(skb);
 324
 325        __skb_queue_head_init(&d->skbpool);
 326}
 327
 328/* find it or allocate it */
 329struct aoedev *
 330aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
 331{
 332        struct aoedev *d;
 333        int i;
 334        ulong flags;
 335        ulong sysminor;
 336
 337        spin_lock_irqsave(&devlist_lock, flags);
 338
 339        for (d=devlist; d; d=d->next)
 340                if (d->aoemajor == maj && d->aoeminor == min) {
 341                        d->ref++;
 342                        break;
 343                }
 344        if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
 345                goto out;
 346        d = kcalloc(1, sizeof *d, GFP_ATOMIC);
 347        if (!d)
 348                goto out;
 349        INIT_WORK(&d->work, aoecmd_sleepwork);
 350        spin_lock_init(&d->lock);
 351        skb_queue_head_init(&d->skbpool);
 352        init_timer(&d->timer);
 353        d->timer.data = (ulong) d;
 354        d->timer.function = dummy_timer;
 355        d->timer.expires = jiffies + HZ;
 356        add_timer(&d->timer);
 357        d->bufpool = NULL;      /* defer to aoeblk_gdalloc */
 358        d->tgt = d->targets;
 359        d->ref = 1;
 360        for (i = 0; i < NFACTIVE; i++)
 361                INIT_LIST_HEAD(&d->factive[i]);
 362        d->sysminor = sysminor;
 363        d->aoemajor = maj;
 364        d->aoeminor = min;
 365        d->mintimer = MINTIMER;
 366        d->next = devlist;
 367        devlist = d;
 368 out:
 369        spin_unlock_irqrestore(&devlist_lock, flags);
 370        return d;
 371}
 372
 373static void
 374freetgt(struct aoedev *d, struct aoetgt *t)
 375{
 376        struct frame *f;
 377        struct list_head *pos, *nx, *head;
 378        struct aoeif *ifp;
 379
 380        for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
 381                if (!ifp->nd)
 382                        break;
 383                dev_put(ifp->nd);
 384        }
 385
 386        head = &t->ffree;
 387        list_for_each_safe(pos, nx, head) {
 388                list_del(pos);
 389                f = list_entry(pos, struct frame, head);
 390                skbfree(f->skb);
 391                kfree(f);
 392        }
 393        kfree(t);
 394}
 395
 396void
 397aoedev_exit(void)
 398{
 399        struct aoedev *d;
 400        ulong flags;
 401
 402        aoe_flush_iocq();
 403        while ((d = devlist)) {
 404                devlist = d->next;
 405
 406                spin_lock_irqsave(&d->lock, flags);
 407                aoedev_downdev(d);
 408                d->flags |= DEVFL_TKILL;
 409                spin_unlock_irqrestore(&d->lock, flags);
 410
 411                del_timer_sync(&d->timer);
 412                aoedev_freedev(d);
 413        }
 414}
 415
 416int __init
 417aoedev_init(void)
 418{
 419        return 0;
 420}
 421