linux/drivers/block/aoe/aoedev.c
<<
>>
Prefs
   1/* Copyright (c) 2013 Coraid, Inc.  See COPYING for GPL terms. */
   2/*
   3 * aoedev.c
   4 * AoE device utility functions; maintains device list.
   5 */
   6
   7#include <linux/hdreg.h>
   8#include <linux/blkdev.h>
   9#include <linux/netdevice.h>
  10#include <linux/delay.h>
  11#include <linux/slab.h>
  12#include <linux/bitmap.h>
  13#include <linux/kdev_t.h>
  14#include <linux/moduleparam.h>
  15#include <linux/string.h>
  16#include "aoe.h"
  17
  18static void dummy_timer(ulong);
  19static void freetgt(struct aoedev *d, struct aoetgt *t);
  20static void skbpoolfree(struct aoedev *d);
  21
  22static int aoe_dyndevs = 1;
  23module_param(aoe_dyndevs, int, 0644);
  24MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices.");
  25
  26static struct aoedev *devlist;
  27static DEFINE_SPINLOCK(devlist_lock);
  28
  29/* Because some systems will have one, many, or no
  30 *   - partitions,
  31 *   - slots per shelf,
  32 *   - or shelves,
  33 * we need some flexibility in the way the minor numbers
  34 * are allocated.  So they are dynamic.
  35 */
  36#define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
  37
  38static DEFINE_SPINLOCK(used_minors_lock);
  39static DECLARE_BITMAP(used_minors, N_DEVS);
  40
  41static int
  42minor_get_dyn(ulong *sysminor)
  43{
  44        ulong flags;
  45        ulong n;
  46        int error = 0;
  47
  48        spin_lock_irqsave(&used_minors_lock, flags);
  49        n = find_first_zero_bit(used_minors, N_DEVS);
  50        if (n < N_DEVS)
  51                set_bit(n, used_minors);
  52        else
  53                error = -1;
  54        spin_unlock_irqrestore(&used_minors_lock, flags);
  55
  56        *sysminor = n * AOE_PARTITIONS;
  57        return error;
  58}
  59
  60static int
  61minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
  62{
  63        ulong flags;
  64        ulong n;
  65        int error = 0;
  66        enum {
  67                /* for backwards compatibility when !aoe_dyndevs,
  68                 * a static number of supported slots per shelf */
  69                NPERSHELF = 16,
  70        };
  71
  72        if (aoemin >= NPERSHELF) {
  73                pr_err("aoe: %s %d slots per shelf\n",
  74                        "static minor device numbers support only",
  75                        NPERSHELF);
  76                error = -1;
  77                goto out;
  78        }
  79
  80        n = aoemaj * NPERSHELF + aoemin;
  81        if (n >= N_DEVS) {
  82                pr_err("aoe: %s with e%ld.%d\n",
  83                        "cannot use static minor device numbers",
  84                        aoemaj, aoemin);
  85                error = -1;
  86                goto out;
  87        }
  88
  89        spin_lock_irqsave(&used_minors_lock, flags);
  90        if (test_bit(n, used_minors)) {
  91                pr_err("aoe: %s %lu\n",
  92                        "existing device already has static minor number",
  93                        n);
  94                error = -1;
  95        } else
  96                set_bit(n, used_minors);
  97        spin_unlock_irqrestore(&used_minors_lock, flags);
  98        *sysminor = n * AOE_PARTITIONS;
  99out:
 100        return error;
 101}
 102
 103static int
 104minor_get(ulong *sysminor, ulong aoemaj, int aoemin)
 105{
 106        if (aoe_dyndevs)
 107                return minor_get_dyn(sysminor);
 108        else
 109                return minor_get_static(sysminor, aoemaj, aoemin);
 110}
 111
 112static void
 113minor_free(ulong minor)
 114{
 115        ulong flags;
 116
 117        minor /= AOE_PARTITIONS;
 118        BUG_ON(minor >= N_DEVS);
 119
 120        spin_lock_irqsave(&used_minors_lock, flags);
 121        BUG_ON(!test_bit(minor, used_minors));
 122        clear_bit(minor, used_minors);
 123        spin_unlock_irqrestore(&used_minors_lock, flags);
 124}
 125
 126/*
 127 * Users who grab a pointer to the device with aoedev_by_aoeaddr
 128 * automatically get a reference count and must be responsible
 129 * for performing a aoedev_put.  With the addition of async
 130 * kthread processing I'm no longer confident that we can
 131 * guarantee consistency in the face of device flushes.
 132 *
 133 * For the time being, we only bother to add extra references for
 134 * frames sitting on the iocq.  When the kthreads finish processing
 135 * these frames, they will aoedev_put the device.
 136 */
 137
 138void
 139aoedev_put(struct aoedev *d)
 140{
 141        ulong flags;
 142
 143        spin_lock_irqsave(&devlist_lock, flags);
 144        d->ref--;
 145        spin_unlock_irqrestore(&devlist_lock, flags);
 146}
 147
 148static void
 149dummy_timer(ulong vp)
 150{
 151        struct aoedev *d;
 152
 153        d = (struct aoedev *)vp;
 154        if (d->flags & DEVFL_TKILL)
 155                return;
 156        d->timer.expires = jiffies + HZ;
 157        add_timer(&d->timer);
 158}
 159
 160static void
 161aoe_failip(struct aoedev *d)
 162{
 163        struct request *rq;
 164        struct bio *bio;
 165        unsigned long n;
 166
 167        aoe_failbuf(d, d->ip.buf);
 168
 169        rq = d->ip.rq;
 170        if (rq == NULL)
 171                return;
 172        while ((bio = d->ip.nxbio)) {
 173                clear_bit(BIO_UPTODATE, &bio->bi_flags);
 174                d->ip.nxbio = bio->bi_next;
 175                n = (unsigned long) rq->special;
 176                rq->special = (void *) --n;
 177        }
 178        if ((unsigned long) rq->special == 0)
 179                aoe_end_request(d, rq, 0);
 180}
 181
 182static void
 183downdev_frame(struct list_head *pos)
 184{
 185        struct frame *f;
 186
 187        f = list_entry(pos, struct frame, head);
 188        list_del(pos);
 189        if (f->buf) {
 190                f->buf->nframesout--;
 191                aoe_failbuf(f->t->d, f->buf);
 192        }
 193        aoe_freetframe(f);
 194}
 195
 196void
 197aoedev_downdev(struct aoedev *d)
 198{
 199        struct aoetgt *t, **tt, **te;
 200        struct list_head *head, *pos, *nx;
 201        struct request *rq;
 202        int i;
 203
 204        d->flags &= ~DEVFL_UP;
 205
 206        /* clean out active and to-be-retransmitted buffers */
 207        for (i = 0; i < NFACTIVE; i++) {
 208                head = &d->factive[i];
 209                list_for_each_safe(pos, nx, head)
 210                        downdev_frame(pos);
 211        }
 212        head = &d->rexmitq;
 213        list_for_each_safe(pos, nx, head)
 214                downdev_frame(pos);
 215
 216        /* reset window dressings */
 217        tt = d->targets;
 218        te = tt + d->ntargets;
 219        for (; tt < te && (t = *tt); tt++) {
 220                aoecmd_wreset(t);
 221                t->nout = 0;
 222        }
 223
 224        /* clean out the in-process request (if any) */
 225        aoe_failip(d);
 226
 227        /* fast fail all pending I/O */
 228        if (d->blkq) {
 229                while ((rq = blk_peek_request(d->blkq))) {
 230                        blk_start_request(rq);
 231                        aoe_end_request(d, rq, 1);
 232                }
 233        }
 234
 235        if (d->gd)
 236                set_capacity(d->gd, 0);
 237}
 238
 239/* return whether the user asked for this particular
 240 * device to be flushed
 241 */
 242static int
 243user_req(char *s, size_t slen, struct aoedev *d)
 244{
 245        const char *p;
 246        size_t lim;
 247
 248        if (!d->gd)
 249                return 0;
 250        p = kbasename(d->gd->disk_name);
 251        lim = sizeof(d->gd->disk_name);
 252        lim -= p - d->gd->disk_name;
 253        if (slen < lim)
 254                lim = slen;
 255
 256        return !strncmp(s, p, lim);
 257}
 258
 259static void
 260freedev(struct aoedev *d)
 261{
 262        struct aoetgt **t, **e;
 263        int freeing = 0;
 264        unsigned long flags;
 265
 266        spin_lock_irqsave(&d->lock, flags);
 267        if (d->flags & DEVFL_TKILL
 268        && !(d->flags & DEVFL_FREEING)) {
 269                d->flags |= DEVFL_FREEING;
 270                freeing = 1;
 271        }
 272        spin_unlock_irqrestore(&d->lock, flags);
 273        if (!freeing)
 274                return;
 275
 276        del_timer_sync(&d->timer);
 277        if (d->gd) {
 278                aoedisk_rm_debugfs(d);
 279                aoedisk_rm_sysfs(d);
 280                del_gendisk(d->gd);
 281                put_disk(d->gd);
 282                blk_cleanup_queue(d->blkq);
 283        }
 284        t = d->targets;
 285        e = t + d->ntargets;
 286        for (; t < e && *t; t++)
 287                freetgt(d, *t);
 288        if (d->bufpool)
 289                mempool_destroy(d->bufpool);
 290        skbpoolfree(d);
 291        minor_free(d->sysminor);
 292
 293        spin_lock_irqsave(&d->lock, flags);
 294        d->flags |= DEVFL_FREED;
 295        spin_unlock_irqrestore(&d->lock, flags);
 296}
 297
 298enum flush_parms {
 299        NOT_EXITING = 0,
 300        EXITING = 1,
 301};
 302
 303static int
 304flush(const char __user *str, size_t cnt, int exiting)
 305{
 306        ulong flags;
 307        struct aoedev *d, **dd;
 308        char buf[16];
 309        int all = 0;
 310        int specified = 0;      /* flush a specific device */
 311        unsigned int skipflags;
 312
 313        skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL;
 314
 315        if (!exiting && cnt >= 3) {
 316                if (cnt > sizeof buf)
 317                        cnt = sizeof buf;
 318                if (copy_from_user(buf, str, cnt))
 319                        return -EFAULT;
 320                all = !strncmp(buf, "all", 3);
 321                if (!all)
 322                        specified = 1;
 323        }
 324
 325        flush_scheduled_work();
 326        /* pass one: without sleeping, do aoedev_downdev */
 327        spin_lock_irqsave(&devlist_lock, flags);
 328        for (d = devlist; d; d = d->next) {
 329                spin_lock(&d->lock);
 330                if (exiting) {
 331                        /* unconditionally take each device down */
 332                } else if (specified) {
 333                        if (!user_req(buf, cnt, d))
 334                                goto cont;
 335                } else if ((!all && (d->flags & DEVFL_UP))
 336                || d->flags & skipflags
 337                || d->nopen
 338                || d->ref)
 339                        goto cont;
 340
 341                aoedev_downdev(d);
 342                d->flags |= DEVFL_TKILL;
 343cont:
 344                spin_unlock(&d->lock);
 345        }
 346        spin_unlock_irqrestore(&devlist_lock, flags);
 347
 348        /* pass two: call freedev, which might sleep,
 349         * for aoedevs marked with DEVFL_TKILL
 350         */
 351restart:
 352        spin_lock_irqsave(&devlist_lock, flags);
 353        for (d = devlist; d; d = d->next) {
 354                spin_lock(&d->lock);
 355                if (d->flags & DEVFL_TKILL
 356                && !(d->flags & DEVFL_FREEING)) {
 357                        spin_unlock(&d->lock);
 358                        spin_unlock_irqrestore(&devlist_lock, flags);
 359                        freedev(d);
 360                        goto restart;
 361                }
 362                spin_unlock(&d->lock);
 363        }
 364
 365        /* pass three: remove aoedevs marked with DEVFL_FREED */
 366        for (dd = &devlist, d = *dd; d; d = *dd) {
 367                struct aoedev *doomed = NULL;
 368
 369                spin_lock(&d->lock);
 370                if (d->flags & DEVFL_FREED) {
 371                        *dd = d->next;
 372                        doomed = d;
 373                } else {
 374                        dd = &d->next;
 375                }
 376                spin_unlock(&d->lock);
 377                if (doomed)
 378                        kfree(doomed->targets);
 379                kfree(doomed);
 380        }
 381        spin_unlock_irqrestore(&devlist_lock, flags);
 382
 383        return 0;
 384}
 385
 386int
 387aoedev_flush(const char __user *str, size_t cnt)
 388{
 389        return flush(str, cnt, NOT_EXITING);
 390}
 391
 392/* This has been confirmed to occur once with Tms=3*1000 due to the
 393 * driver changing link and not processing its transmit ring.  The
 394 * problem is hard enough to solve by returning an error that I'm
 395 * still punting on "solving" this.
 396 */
 397static void
 398skbfree(struct sk_buff *skb)
 399{
 400        enum { Sms = 250, Tms = 30 * 1000};
 401        int i = Tms / Sms;
 402
 403        if (skb == NULL)
 404                return;
 405        while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
 406                msleep(Sms);
 407        if (i < 0) {
 408                printk(KERN_ERR
 409                        "aoe: %s holds ref: %s\n",
 410                        skb->dev ? skb->dev->name : "netif",
 411                        "cannot free skb -- memory leaked.");
 412                return;
 413        }
 414        skb->truesize -= skb->data_len;
 415        skb_shinfo(skb)->nr_frags = skb->data_len = 0;
 416        skb_trim(skb, 0);
 417        dev_kfree_skb(skb);
 418}
 419
 420static void
 421skbpoolfree(struct aoedev *d)
 422{
 423        struct sk_buff *skb, *tmp;
 424
 425        skb_queue_walk_safe(&d->skbpool, skb, tmp)
 426                skbfree(skb);
 427
 428        __skb_queue_head_init(&d->skbpool);
 429}
 430
 431/* find it or allocate it */
 432struct aoedev *
 433aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
 434{
 435        struct aoedev *d;
 436        int i;
 437        ulong flags;
 438        ulong sysminor = 0;
 439
 440        spin_lock_irqsave(&devlist_lock, flags);
 441
 442        for (d=devlist; d; d=d->next)
 443                if (d->aoemajor == maj && d->aoeminor == min) {
 444                        spin_lock(&d->lock);
 445                        if (d->flags & DEVFL_TKILL) {
 446                                spin_unlock(&d->lock);
 447                                d = NULL;
 448                                goto out;
 449                        }
 450                        d->ref++;
 451                        spin_unlock(&d->lock);
 452                        break;
 453                }
 454        if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
 455                goto out;
 456        d = kcalloc(1, sizeof *d, GFP_ATOMIC);
 457        if (!d)
 458                goto out;
 459        d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC);
 460        if (!d->targets) {
 461                kfree(d);
 462                d = NULL;
 463                goto out;
 464        }
 465        d->ntargets = NTARGETS;
 466        INIT_WORK(&d->work, aoecmd_sleepwork);
 467        spin_lock_init(&d->lock);
 468        skb_queue_head_init(&d->skbpool);
 469        init_timer(&d->timer);
 470        d->timer.data = (ulong) d;
 471        d->timer.function = dummy_timer;
 472        d->timer.expires = jiffies + HZ;
 473        add_timer(&d->timer);
 474        d->bufpool = NULL;      /* defer to aoeblk_gdalloc */
 475        d->tgt = d->targets;
 476        d->ref = 1;
 477        for (i = 0; i < NFACTIVE; i++)
 478                INIT_LIST_HEAD(&d->factive[i]);
 479        INIT_LIST_HEAD(&d->rexmitq);
 480        d->sysminor = sysminor;
 481        d->aoemajor = maj;
 482        d->aoeminor = min;
 483        d->rttavg = RTTAVG_INIT;
 484        d->rttdev = RTTDEV_INIT;
 485        d->next = devlist;
 486        devlist = d;
 487 out:
 488        spin_unlock_irqrestore(&devlist_lock, flags);
 489        return d;
 490}
 491
 492static void
 493freetgt(struct aoedev *d, struct aoetgt *t)
 494{
 495        struct frame *f;
 496        struct list_head *pos, *nx, *head;
 497        struct aoeif *ifp;
 498
 499        for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
 500                if (!ifp->nd)
 501                        break;
 502                dev_put(ifp->nd);
 503        }
 504
 505        head = &t->ffree;
 506        list_for_each_safe(pos, nx, head) {
 507                list_del(pos);
 508                f = list_entry(pos, struct frame, head);
 509                skbfree(f->skb);
 510                kfree(f);
 511        }
 512        kfree(t);
 513}
 514
 515void
 516aoedev_exit(void)
 517{
 518        flush_scheduled_work();
 519        flush(NULL, 0, EXITING);
 520}
 521
 522int __init
 523aoedev_init(void)
 524{
 525        return 0;
 526}
 527