linux/drivers/block/aoe/aoedev.c
<<
>>
Prefs
   1/* Copyright (c) 2013 Coraid, Inc.  See COPYING for GPL terms. */
   2/*
   3 * aoedev.c
   4 * AoE device utility functions; maintains device list.
   5 */
   6
   7#include <linux/hdreg.h>
   8#include <linux/blk-mq.h>
   9#include <linux/netdevice.h>
  10#include <linux/delay.h>
  11#include <linux/slab.h>
  12#include <linux/bitmap.h>
  13#include <linux/kdev_t.h>
  14#include <linux/moduleparam.h>
  15#include <linux/string.h>
  16#include "aoe.h"
  17
  18static void freetgt(struct aoedev *d, struct aoetgt *t);
  19static void skbpoolfree(struct aoedev *d);
  20
  21static int aoe_dyndevs = 1;
  22module_param(aoe_dyndevs, int, 0644);
  23MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices.");
  24
  25static struct aoedev *devlist;
  26static DEFINE_SPINLOCK(devlist_lock);
  27
  28/* Because some systems will have one, many, or no
  29 *   - partitions,
  30 *   - slots per shelf,
  31 *   - or shelves,
  32 * we need some flexibility in the way the minor numbers
  33 * are allocated.  So they are dynamic.
  34 */
  35#define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
  36
  37static DEFINE_SPINLOCK(used_minors_lock);
  38static DECLARE_BITMAP(used_minors, N_DEVS);
  39
  40static int
  41minor_get_dyn(ulong *sysminor)
  42{
  43        ulong flags;
  44        ulong n;
  45        int error = 0;
  46
  47        spin_lock_irqsave(&used_minors_lock, flags);
  48        n = find_first_zero_bit(used_minors, N_DEVS);
  49        if (n < N_DEVS)
  50                set_bit(n, used_minors);
  51        else
  52                error = -1;
  53        spin_unlock_irqrestore(&used_minors_lock, flags);
  54
  55        *sysminor = n * AOE_PARTITIONS;
  56        return error;
  57}
  58
  59static int
  60minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
  61{
  62        ulong flags;
  63        ulong n;
  64        int error = 0;
  65        enum {
  66                /* for backwards compatibility when !aoe_dyndevs,
  67                 * a static number of supported slots per shelf */
  68                NPERSHELF = 16,
  69        };
  70
  71        if (aoemin >= NPERSHELF) {
  72                pr_err("aoe: %s %d slots per shelf\n",
  73                        "static minor device numbers support only",
  74                        NPERSHELF);
  75                error = -1;
  76                goto out;
  77        }
  78
  79        n = aoemaj * NPERSHELF + aoemin;
  80        if (n >= N_DEVS) {
  81                pr_err("aoe: %s with e%ld.%d\n",
  82                        "cannot use static minor device numbers",
  83                        aoemaj, aoemin);
  84                error = -1;
  85                goto out;
  86        }
  87
  88        spin_lock_irqsave(&used_minors_lock, flags);
  89        if (test_bit(n, used_minors)) {
  90                pr_err("aoe: %s %lu\n",
  91                        "existing device already has static minor number",
  92                        n);
  93                error = -1;
  94        } else
  95                set_bit(n, used_minors);
  96        spin_unlock_irqrestore(&used_minors_lock, flags);
  97        *sysminor = n * AOE_PARTITIONS;
  98out:
  99        return error;
 100}
 101
 102static int
 103minor_get(ulong *sysminor, ulong aoemaj, int aoemin)
 104{
 105        if (aoe_dyndevs)
 106                return minor_get_dyn(sysminor);
 107        else
 108                return minor_get_static(sysminor, aoemaj, aoemin);
 109}
 110
 111static void
 112minor_free(ulong minor)
 113{
 114        ulong flags;
 115
 116        minor /= AOE_PARTITIONS;
 117        BUG_ON(minor >= N_DEVS);
 118
 119        spin_lock_irqsave(&used_minors_lock, flags);
 120        BUG_ON(!test_bit(minor, used_minors));
 121        clear_bit(minor, used_minors);
 122        spin_unlock_irqrestore(&used_minors_lock, flags);
 123}
 124
 125/*
 126 * Users who grab a pointer to the device with aoedev_by_aoeaddr
 127 * automatically get a reference count and must be responsible
 128 * for performing a aoedev_put.  With the addition of async
 129 * kthread processing I'm no longer confident that we can
 130 * guarantee consistency in the face of device flushes.
 131 *
 132 * For the time being, we only bother to add extra references for
 133 * frames sitting on the iocq.  When the kthreads finish processing
 134 * these frames, they will aoedev_put the device.
 135 */
 136
 137void
 138aoedev_put(struct aoedev *d)
 139{
 140        ulong flags;
 141
 142        spin_lock_irqsave(&devlist_lock, flags);
 143        d->ref--;
 144        spin_unlock_irqrestore(&devlist_lock, flags);
 145}
 146
 147static void
 148dummy_timer(struct timer_list *t)
 149{
 150        struct aoedev *d;
 151
 152        d = from_timer(d, t, timer);
 153        if (d->flags & DEVFL_TKILL)
 154                return;
 155        d->timer.expires = jiffies + HZ;
 156        add_timer(&d->timer);
 157}
 158
 159static void
 160aoe_failip(struct aoedev *d)
 161{
 162        struct request *rq;
 163        struct aoe_req *req;
 164        struct bio *bio;
 165
 166        aoe_failbuf(d, d->ip.buf);
 167        rq = d->ip.rq;
 168        if (rq == NULL)
 169                return;
 170
 171        req = blk_mq_rq_to_pdu(rq);
 172        while ((bio = d->ip.nxbio)) {
 173                bio->bi_status = BLK_STS_IOERR;
 174                d->ip.nxbio = bio->bi_next;
 175                req->nr_bios--;
 176        }
 177
 178        if (!req->nr_bios)
 179                aoe_end_request(d, rq, 0);
 180}
 181
 182static void
 183downdev_frame(struct list_head *pos)
 184{
 185        struct frame *f;
 186
 187        f = list_entry(pos, struct frame, head);
 188        list_del(pos);
 189        if (f->buf) {
 190                f->buf->nframesout--;
 191                aoe_failbuf(f->t->d, f->buf);
 192        }
 193        aoe_freetframe(f);
 194}
 195
 196void
 197aoedev_downdev(struct aoedev *d)
 198{
 199        struct aoetgt *t, **tt, **te;
 200        struct list_head *head, *pos, *nx;
 201        int i;
 202
 203        d->flags &= ~DEVFL_UP;
 204
 205        /* clean out active and to-be-retransmitted buffers */
 206        for (i = 0; i < NFACTIVE; i++) {
 207                head = &d->factive[i];
 208                list_for_each_safe(pos, nx, head)
 209                        downdev_frame(pos);
 210        }
 211        head = &d->rexmitq;
 212        list_for_each_safe(pos, nx, head)
 213                downdev_frame(pos);
 214
 215        /* reset window dressings */
 216        tt = d->targets;
 217        te = tt + d->ntargets;
 218        for (; tt < te && (t = *tt); tt++) {
 219                aoecmd_wreset(t);
 220                t->nout = 0;
 221        }
 222
 223        /* clean out the in-process request (if any) */
 224        aoe_failip(d);
 225
 226        /* fast fail all pending I/O */
 227        if (d->blkq) {
 228                /* UP is cleared, freeze+quiesce to insure all are errored */
 229                blk_mq_freeze_queue(d->blkq);
 230                blk_mq_quiesce_queue(d->blkq);
 231                blk_mq_unquiesce_queue(d->blkq);
 232                blk_mq_unfreeze_queue(d->blkq);
 233        }
 234
 235        if (d->gd)
 236                set_capacity(d->gd, 0);
 237}
 238
 239/* return whether the user asked for this particular
 240 * device to be flushed
 241 */
 242static int
 243user_req(char *s, size_t slen, struct aoedev *d)
 244{
 245        const char *p;
 246        size_t lim;
 247
 248        if (!d->gd)
 249                return 0;
 250        p = kbasename(d->gd->disk_name);
 251        lim = sizeof(d->gd->disk_name);
 252        lim -= p - d->gd->disk_name;
 253        if (slen < lim)
 254                lim = slen;
 255
 256        return !strncmp(s, p, lim);
 257}
 258
 259static void
 260freedev(struct aoedev *d)
 261{
 262        struct aoetgt **t, **e;
 263        int freeing = 0;
 264        unsigned long flags;
 265
 266        spin_lock_irqsave(&d->lock, flags);
 267        if (d->flags & DEVFL_TKILL
 268        && !(d->flags & DEVFL_FREEING)) {
 269                d->flags |= DEVFL_FREEING;
 270                freeing = 1;
 271        }
 272        spin_unlock_irqrestore(&d->lock, flags);
 273        if (!freeing)
 274                return;
 275
 276        del_timer_sync(&d->timer);
 277        if (d->gd) {
 278                aoedisk_rm_debugfs(d);
 279                del_gendisk(d->gd);
 280                put_disk(d->gd);
 281                blk_mq_free_tag_set(&d->tag_set);
 282                blk_cleanup_queue(d->blkq);
 283        }
 284        t = d->targets;
 285        e = t + d->ntargets;
 286        for (; t < e && *t; t++)
 287                freetgt(d, *t);
 288
 289        mempool_destroy(d->bufpool);
 290        skbpoolfree(d);
 291        minor_free(d->sysminor);
 292
 293        spin_lock_irqsave(&d->lock, flags);
 294        d->flags |= DEVFL_FREED;
 295        spin_unlock_irqrestore(&d->lock, flags);
 296}
 297
 298enum flush_parms {
 299        NOT_EXITING = 0,
 300        EXITING = 1,
 301};
 302
 303static int
 304flush(const char __user *str, size_t cnt, int exiting)
 305{
 306        ulong flags;
 307        struct aoedev *d, **dd;
 308        char buf[16];
 309        int all = 0;
 310        int specified = 0;      /* flush a specific device */
 311        unsigned int skipflags;
 312
 313        skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL;
 314
 315        if (!exiting && cnt >= 3) {
 316                if (cnt > sizeof buf)
 317                        cnt = sizeof buf;
 318                if (copy_from_user(buf, str, cnt))
 319                        return -EFAULT;
 320                all = !strncmp(buf, "all", 3);
 321                if (!all)
 322                        specified = 1;
 323        }
 324
 325        flush_scheduled_work();
 326        /* pass one: do aoedev_downdev, which might sleep */
 327restart1:
 328        spin_lock_irqsave(&devlist_lock, flags);
 329        for (d = devlist; d; d = d->next) {
 330                spin_lock(&d->lock);
 331                if (d->flags & DEVFL_TKILL)
 332                        goto cont;
 333
 334                if (exiting) {
 335                        /* unconditionally take each device down */
 336                } else if (specified) {
 337                        if (!user_req(buf, cnt, d))
 338                                goto cont;
 339                } else if ((!all && (d->flags & DEVFL_UP))
 340                || d->flags & skipflags
 341                || d->nopen
 342                || d->ref)
 343                        goto cont;
 344
 345                spin_unlock(&d->lock);
 346                spin_unlock_irqrestore(&devlist_lock, flags);
 347                aoedev_downdev(d);
 348                d->flags |= DEVFL_TKILL;
 349                goto restart1;
 350cont:
 351                spin_unlock(&d->lock);
 352        }
 353        spin_unlock_irqrestore(&devlist_lock, flags);
 354
 355        /* pass two: call freedev, which might sleep,
 356         * for aoedevs marked with DEVFL_TKILL
 357         */
 358restart2:
 359        spin_lock_irqsave(&devlist_lock, flags);
 360        for (d = devlist; d; d = d->next) {
 361                spin_lock(&d->lock);
 362                if (d->flags & DEVFL_TKILL
 363                && !(d->flags & DEVFL_FREEING)) {
 364                        spin_unlock(&d->lock);
 365                        spin_unlock_irqrestore(&devlist_lock, flags);
 366                        freedev(d);
 367                        goto restart2;
 368                }
 369                spin_unlock(&d->lock);
 370        }
 371
 372        /* pass three: remove aoedevs marked with DEVFL_FREED */
 373        for (dd = &devlist, d = *dd; d; d = *dd) {
 374                struct aoedev *doomed = NULL;
 375
 376                spin_lock(&d->lock);
 377                if (d->flags & DEVFL_FREED) {
 378                        *dd = d->next;
 379                        doomed = d;
 380                } else {
 381                        dd = &d->next;
 382                }
 383                spin_unlock(&d->lock);
 384                if (doomed)
 385                        kfree(doomed->targets);
 386                kfree(doomed);
 387        }
 388        spin_unlock_irqrestore(&devlist_lock, flags);
 389
 390        return 0;
 391}
 392
 393int
 394aoedev_flush(const char __user *str, size_t cnt)
 395{
 396        return flush(str, cnt, NOT_EXITING);
 397}
 398
 399/* This has been confirmed to occur once with Tms=3*1000 due to the
 400 * driver changing link and not processing its transmit ring.  The
 401 * problem is hard enough to solve by returning an error that I'm
 402 * still punting on "solving" this.
 403 */
 404static void
 405skbfree(struct sk_buff *skb)
 406{
 407        enum { Sms = 250, Tms = 30 * 1000};
 408        int i = Tms / Sms;
 409
 410        if (skb == NULL)
 411                return;
 412        while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
 413                msleep(Sms);
 414        if (i < 0) {
 415                printk(KERN_ERR
 416                        "aoe: %s holds ref: %s\n",
 417                        skb->dev ? skb->dev->name : "netif",
 418                        "cannot free skb -- memory leaked.");
 419                return;
 420        }
 421        skb->truesize -= skb->data_len;
 422        skb_shinfo(skb)->nr_frags = skb->data_len = 0;
 423        skb_trim(skb, 0);
 424        dev_kfree_skb(skb);
 425}
 426
 427static void
 428skbpoolfree(struct aoedev *d)
 429{
 430        struct sk_buff *skb, *tmp;
 431
 432        skb_queue_walk_safe(&d->skbpool, skb, tmp)
 433                skbfree(skb);
 434
 435        __skb_queue_head_init(&d->skbpool);
 436}
 437
 438/* find it or allocate it */
 439struct aoedev *
 440aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
 441{
 442        struct aoedev *d;
 443        int i;
 444        ulong flags;
 445        ulong sysminor = 0;
 446
 447        spin_lock_irqsave(&devlist_lock, flags);
 448
 449        for (d=devlist; d; d=d->next)
 450                if (d->aoemajor == maj && d->aoeminor == min) {
 451                        spin_lock(&d->lock);
 452                        if (d->flags & DEVFL_TKILL) {
 453                                spin_unlock(&d->lock);
 454                                d = NULL;
 455                                goto out;
 456                        }
 457                        d->ref++;
 458                        spin_unlock(&d->lock);
 459                        break;
 460                }
 461        if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
 462                goto out;
 463        d = kcalloc(1, sizeof *d, GFP_ATOMIC);
 464        if (!d)
 465                goto out;
 466        d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC);
 467        if (!d->targets) {
 468                kfree(d);
 469                d = NULL;
 470                goto out;
 471        }
 472        d->ntargets = NTARGETS;
 473        INIT_WORK(&d->work, aoecmd_sleepwork);
 474        spin_lock_init(&d->lock);
 475        INIT_LIST_HEAD(&d->rq_list);
 476        skb_queue_head_init(&d->skbpool);
 477        timer_setup(&d->timer, dummy_timer, 0);
 478        d->timer.expires = jiffies + HZ;
 479        add_timer(&d->timer);
 480        d->bufpool = NULL;      /* defer to aoeblk_gdalloc */
 481        d->tgt = d->targets;
 482        d->ref = 1;
 483        for (i = 0; i < NFACTIVE; i++)
 484                INIT_LIST_HEAD(&d->factive[i]);
 485        INIT_LIST_HEAD(&d->rexmitq);
 486        d->sysminor = sysminor;
 487        d->aoemajor = maj;
 488        d->aoeminor = min;
 489        d->rttavg = RTTAVG_INIT;
 490        d->rttdev = RTTDEV_INIT;
 491        d->next = devlist;
 492        devlist = d;
 493 out:
 494        spin_unlock_irqrestore(&devlist_lock, flags);
 495        return d;
 496}
 497
 498static void
 499freetgt(struct aoedev *d, struct aoetgt *t)
 500{
 501        struct frame *f;
 502        struct list_head *pos, *nx, *head;
 503        struct aoeif *ifp;
 504
 505        for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
 506                if (!ifp->nd)
 507                        break;
 508                dev_put(ifp->nd);
 509        }
 510
 511        head = &t->ffree;
 512        list_for_each_safe(pos, nx, head) {
 513                list_del(pos);
 514                f = list_entry(pos, struct frame, head);
 515                skbfree(f->skb);
 516                kfree(f);
 517        }
 518        kfree(t);
 519}
 520
 521void
 522aoedev_exit(void)
 523{
 524        flush_scheduled_work();
 525        flush(NULL, 0, EXITING);
 526}
 527
 528int __init
 529aoedev_init(void)
 530{
 531        return 0;
 532}
 533