linux/drivers/block/aoe/aoedev.c
<<
>>
Prefs
   1/* Copyright (c) 2013 Coraid, Inc.  See COPYING for GPL terms. */
   2/*
   3 * aoedev.c
   4 * AoE device utility functions; maintains device list.
   5 */
   6
   7#include <linux/hdreg.h>
   8#include <linux/blk-mq.h>
   9#include <linux/netdevice.h>
  10#include <linux/delay.h>
  11#include <linux/slab.h>
  12#include <linux/bitmap.h>
  13#include <linux/kdev_t.h>
  14#include <linux/moduleparam.h>
  15#include <linux/string.h>
  16#include "aoe.h"
  17
  18static void freetgt(struct aoedev *d, struct aoetgt *t);
  19static void skbpoolfree(struct aoedev *d);
  20
  21static int aoe_dyndevs = 1;
  22module_param(aoe_dyndevs, int, 0644);
  23MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices.");
  24
  25static struct aoedev *devlist;
  26static DEFINE_SPINLOCK(devlist_lock);
  27
  28/* Because some systems will have one, many, or no
  29 *   - partitions,
  30 *   - slots per shelf,
  31 *   - or shelves,
  32 * we need some flexibility in the way the minor numbers
  33 * are allocated.  So they are dynamic.
  34 */
  35#define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
  36
  37static DEFINE_SPINLOCK(used_minors_lock);
  38static DECLARE_BITMAP(used_minors, N_DEVS);
  39
  40static int
  41minor_get_dyn(ulong *sysminor)
  42{
  43        ulong flags;
  44        ulong n;
  45        int error = 0;
  46
  47        spin_lock_irqsave(&used_minors_lock, flags);
  48        n = find_first_zero_bit(used_minors, N_DEVS);
  49        if (n < N_DEVS)
  50                set_bit(n, used_minors);
  51        else
  52                error = -1;
  53        spin_unlock_irqrestore(&used_minors_lock, flags);
  54
  55        *sysminor = n * AOE_PARTITIONS;
  56        return error;
  57}
  58
  59static int
  60minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
  61{
  62        ulong flags;
  63        ulong n;
  64        int error = 0;
  65        enum {
  66                /* for backwards compatibility when !aoe_dyndevs,
  67                 * a static number of supported slots per shelf */
  68                NPERSHELF = 16,
  69        };
  70
  71        if (aoemin >= NPERSHELF) {
  72                pr_err("aoe: %s %d slots per shelf\n",
  73                        "static minor device numbers support only",
  74                        NPERSHELF);
  75                error = -1;
  76                goto out;
  77        }
  78
  79        n = aoemaj * NPERSHELF + aoemin;
  80        if (n >= N_DEVS) {
  81                pr_err("aoe: %s with e%ld.%d\n",
  82                        "cannot use static minor device numbers",
  83                        aoemaj, aoemin);
  84                error = -1;
  85                goto out;
  86        }
  87
  88        spin_lock_irqsave(&used_minors_lock, flags);
  89        if (test_bit(n, used_minors)) {
  90                pr_err("aoe: %s %lu\n",
  91                        "existing device already has static minor number",
  92                        n);
  93                error = -1;
  94        } else
  95                set_bit(n, used_minors);
  96        spin_unlock_irqrestore(&used_minors_lock, flags);
  97        *sysminor = n * AOE_PARTITIONS;
  98out:
  99        return error;
 100}
 101
 102static int
 103minor_get(ulong *sysminor, ulong aoemaj, int aoemin)
 104{
 105        if (aoe_dyndevs)
 106                return minor_get_dyn(sysminor);
 107        else
 108                return minor_get_static(sysminor, aoemaj, aoemin);
 109}
 110
 111static void
 112minor_free(ulong minor)
 113{
 114        ulong flags;
 115
 116        minor /= AOE_PARTITIONS;
 117        BUG_ON(minor >= N_DEVS);
 118
 119        spin_lock_irqsave(&used_minors_lock, flags);
 120        BUG_ON(!test_bit(minor, used_minors));
 121        clear_bit(minor, used_minors);
 122        spin_unlock_irqrestore(&used_minors_lock, flags);
 123}
 124
 125/*
 126 * Users who grab a pointer to the device with aoedev_by_aoeaddr
 127 * automatically get a reference count and must be responsible
 128 * for performing a aoedev_put.  With the addition of async
 129 * kthread processing I'm no longer confident that we can
 130 * guarantee consistency in the face of device flushes.
 131 *
 132 * For the time being, we only bother to add extra references for
 133 * frames sitting on the iocq.  When the kthreads finish processing
 134 * these frames, they will aoedev_put the device.
 135 */
 136
 137void
 138aoedev_put(struct aoedev *d)
 139{
 140        ulong flags;
 141
 142        spin_lock_irqsave(&devlist_lock, flags);
 143        d->ref--;
 144        spin_unlock_irqrestore(&devlist_lock, flags);
 145}
 146
 147static void
 148dummy_timer(struct timer_list *t)
 149{
 150        struct aoedev *d;
 151
 152        d = from_timer(d, t, timer);
 153        if (d->flags & DEVFL_TKILL)
 154                return;
 155        d->timer.expires = jiffies + HZ;
 156        add_timer(&d->timer);
 157}
 158
 159static void
 160aoe_failip(struct aoedev *d)
 161{
 162        struct request *rq;
 163        struct aoe_req *req;
 164        struct bio *bio;
 165
 166        aoe_failbuf(d, d->ip.buf);
 167        rq = d->ip.rq;
 168        if (rq == NULL)
 169                return;
 170
 171        req = blk_mq_rq_to_pdu(rq);
 172        while ((bio = d->ip.nxbio)) {
 173                bio->bi_status = BLK_STS_IOERR;
 174                d->ip.nxbio = bio->bi_next;
 175                req->nr_bios--;
 176        }
 177
 178        if (!req->nr_bios)
 179                aoe_end_request(d, rq, 0);
 180}
 181
 182static void
 183downdev_frame(struct list_head *pos)
 184{
 185        struct frame *f;
 186
 187        f = list_entry(pos, struct frame, head);
 188        list_del(pos);
 189        if (f->buf) {
 190                f->buf->nframesout--;
 191                aoe_failbuf(f->t->d, f->buf);
 192        }
 193        aoe_freetframe(f);
 194}
 195
 196void
 197aoedev_downdev(struct aoedev *d)
 198{
 199        struct aoetgt *t, **tt, **te;
 200        struct list_head *head, *pos, *nx;
 201        int i;
 202
 203        d->flags &= ~DEVFL_UP;
 204
 205        /* clean out active and to-be-retransmitted buffers */
 206        for (i = 0; i < NFACTIVE; i++) {
 207                head = &d->factive[i];
 208                list_for_each_safe(pos, nx, head)
 209                        downdev_frame(pos);
 210        }
 211        head = &d->rexmitq;
 212        list_for_each_safe(pos, nx, head)
 213                downdev_frame(pos);
 214
 215        /* reset window dressings */
 216        tt = d->targets;
 217        te = tt + d->ntargets;
 218        for (; tt < te && (t = *tt); tt++) {
 219                aoecmd_wreset(t);
 220                t->nout = 0;
 221        }
 222
 223        /* clean out the in-process request (if any) */
 224        aoe_failip(d);
 225
 226        /* fast fail all pending I/O */
 227        if (d->blkq) {
 228                /* UP is cleared, freeze+quiesce to insure all are errored */
 229                blk_mq_freeze_queue(d->blkq);
 230                blk_mq_quiesce_queue(d->blkq);
 231                blk_mq_unquiesce_queue(d->blkq);
 232                blk_mq_unfreeze_queue(d->blkq);
 233        }
 234
 235        if (d->gd)
 236                set_capacity(d->gd, 0);
 237}
 238
 239/* return whether the user asked for this particular
 240 * device to be flushed
 241 */
 242static int
 243user_req(char *s, size_t slen, struct aoedev *d)
 244{
 245        const char *p;
 246        size_t lim;
 247
 248        if (!d->gd)
 249                return 0;
 250        p = kbasename(d->gd->disk_name);
 251        lim = sizeof(d->gd->disk_name);
 252        lim -= p - d->gd->disk_name;
 253        if (slen < lim)
 254                lim = slen;
 255
 256        return !strncmp(s, p, lim);
 257}
 258
 259static void
 260freedev(struct aoedev *d)
 261{
 262        struct aoetgt **t, **e;
 263        int freeing = 0;
 264        unsigned long flags;
 265
 266        spin_lock_irqsave(&d->lock, flags);
 267        if (d->flags & DEVFL_TKILL
 268        && !(d->flags & DEVFL_FREEING)) {
 269                d->flags |= DEVFL_FREEING;
 270                freeing = 1;
 271        }
 272        spin_unlock_irqrestore(&d->lock, flags);
 273        if (!freeing)
 274                return;
 275
 276        del_timer_sync(&d->timer);
 277        if (d->gd) {
 278                aoedisk_rm_debugfs(d);
 279                aoedisk_rm_sysfs(d);
 280                del_gendisk(d->gd);
 281                put_disk(d->gd);
 282                blk_mq_free_tag_set(&d->tag_set);
 283                blk_cleanup_queue(d->blkq);
 284        }
 285        t = d->targets;
 286        e = t + d->ntargets;
 287        for (; t < e && *t; t++)
 288                freetgt(d, *t);
 289
 290        mempool_destroy(d->bufpool);
 291        skbpoolfree(d);
 292        minor_free(d->sysminor);
 293
 294        spin_lock_irqsave(&d->lock, flags);
 295        d->flags |= DEVFL_FREED;
 296        spin_unlock_irqrestore(&d->lock, flags);
 297}
 298
 299enum flush_parms {
 300        NOT_EXITING = 0,
 301        EXITING = 1,
 302};
 303
 304static int
 305flush(const char __user *str, size_t cnt, int exiting)
 306{
 307        ulong flags;
 308        struct aoedev *d, **dd;
 309        char buf[16];
 310        int all = 0;
 311        int specified = 0;      /* flush a specific device */
 312        unsigned int skipflags;
 313
 314        skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL;
 315
 316        if (!exiting && cnt >= 3) {
 317                if (cnt > sizeof buf)
 318                        cnt = sizeof buf;
 319                if (copy_from_user(buf, str, cnt))
 320                        return -EFAULT;
 321                all = !strncmp(buf, "all", 3);
 322                if (!all)
 323                        specified = 1;
 324        }
 325
 326        flush_scheduled_work();
 327        /* pass one: do aoedev_downdev, which might sleep */
 328restart1:
 329        spin_lock_irqsave(&devlist_lock, flags);
 330        for (d = devlist; d; d = d->next) {
 331                spin_lock(&d->lock);
 332                if (d->flags & DEVFL_TKILL)
 333                        goto cont;
 334
 335                if (exiting) {
 336                        /* unconditionally take each device down */
 337                } else if (specified) {
 338                        if (!user_req(buf, cnt, d))
 339                                goto cont;
 340                } else if ((!all && (d->flags & DEVFL_UP))
 341                || d->flags & skipflags
 342                || d->nopen
 343                || d->ref)
 344                        goto cont;
 345
 346                spin_unlock(&d->lock);
 347                spin_unlock_irqrestore(&devlist_lock, flags);
 348                aoedev_downdev(d);
 349                d->flags |= DEVFL_TKILL;
 350                goto restart1;
 351cont:
 352                spin_unlock(&d->lock);
 353        }
 354        spin_unlock_irqrestore(&devlist_lock, flags);
 355
 356        /* pass two: call freedev, which might sleep,
 357         * for aoedevs marked with DEVFL_TKILL
 358         */
 359restart2:
 360        spin_lock_irqsave(&devlist_lock, flags);
 361        for (d = devlist; d; d = d->next) {
 362                spin_lock(&d->lock);
 363                if (d->flags & DEVFL_TKILL
 364                && !(d->flags & DEVFL_FREEING)) {
 365                        spin_unlock(&d->lock);
 366                        spin_unlock_irqrestore(&devlist_lock, flags);
 367                        freedev(d);
 368                        goto restart2;
 369                }
 370                spin_unlock(&d->lock);
 371        }
 372
 373        /* pass three: remove aoedevs marked with DEVFL_FREED */
 374        for (dd = &devlist, d = *dd; d; d = *dd) {
 375                struct aoedev *doomed = NULL;
 376
 377                spin_lock(&d->lock);
 378                if (d->flags & DEVFL_FREED) {
 379                        *dd = d->next;
 380                        doomed = d;
 381                } else {
 382                        dd = &d->next;
 383                }
 384                spin_unlock(&d->lock);
 385                if (doomed)
 386                        kfree(doomed->targets);
 387                kfree(doomed);
 388        }
 389        spin_unlock_irqrestore(&devlist_lock, flags);
 390
 391        return 0;
 392}
 393
 394int
 395aoedev_flush(const char __user *str, size_t cnt)
 396{
 397        return flush(str, cnt, NOT_EXITING);
 398}
 399
 400/* This has been confirmed to occur once with Tms=3*1000 due to the
 401 * driver changing link and not processing its transmit ring.  The
 402 * problem is hard enough to solve by returning an error that I'm
 403 * still punting on "solving" this.
 404 */
 405static void
 406skbfree(struct sk_buff *skb)
 407{
 408        enum { Sms = 250, Tms = 30 * 1000};
 409        int i = Tms / Sms;
 410
 411        if (skb == NULL)
 412                return;
 413        while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
 414                msleep(Sms);
 415        if (i < 0) {
 416                printk(KERN_ERR
 417                        "aoe: %s holds ref: %s\n",
 418                        skb->dev ? skb->dev->name : "netif",
 419                        "cannot free skb -- memory leaked.");
 420                return;
 421        }
 422        skb->truesize -= skb->data_len;
 423        skb_shinfo(skb)->nr_frags = skb->data_len = 0;
 424        skb_trim(skb, 0);
 425        dev_kfree_skb(skb);
 426}
 427
 428static void
 429skbpoolfree(struct aoedev *d)
 430{
 431        struct sk_buff *skb, *tmp;
 432
 433        skb_queue_walk_safe(&d->skbpool, skb, tmp)
 434                skbfree(skb);
 435
 436        __skb_queue_head_init(&d->skbpool);
 437}
 438
 439/* find it or allocate it */
 440struct aoedev *
 441aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
 442{
 443        struct aoedev *d;
 444        int i;
 445        ulong flags;
 446        ulong sysminor = 0;
 447
 448        spin_lock_irqsave(&devlist_lock, flags);
 449
 450        for (d=devlist; d; d=d->next)
 451                if (d->aoemajor == maj && d->aoeminor == min) {
 452                        spin_lock(&d->lock);
 453                        if (d->flags & DEVFL_TKILL) {
 454                                spin_unlock(&d->lock);
 455                                d = NULL;
 456                                goto out;
 457                        }
 458                        d->ref++;
 459                        spin_unlock(&d->lock);
 460                        break;
 461                }
 462        if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
 463                goto out;
 464        d = kcalloc(1, sizeof *d, GFP_ATOMIC);
 465        if (!d)
 466                goto out;
 467        d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC);
 468        if (!d->targets) {
 469                kfree(d);
 470                d = NULL;
 471                goto out;
 472        }
 473        d->ntargets = NTARGETS;
 474        INIT_WORK(&d->work, aoecmd_sleepwork);
 475        spin_lock_init(&d->lock);
 476        INIT_LIST_HEAD(&d->rq_list);
 477        skb_queue_head_init(&d->skbpool);
 478        timer_setup(&d->timer, dummy_timer, 0);
 479        d->timer.expires = jiffies + HZ;
 480        add_timer(&d->timer);
 481        d->bufpool = NULL;      /* defer to aoeblk_gdalloc */
 482        d->tgt = d->targets;
 483        d->ref = 1;
 484        for (i = 0; i < NFACTIVE; i++)
 485                INIT_LIST_HEAD(&d->factive[i]);
 486        INIT_LIST_HEAD(&d->rexmitq);
 487        d->sysminor = sysminor;
 488        d->aoemajor = maj;
 489        d->aoeminor = min;
 490        d->rttavg = RTTAVG_INIT;
 491        d->rttdev = RTTDEV_INIT;
 492        d->next = devlist;
 493        devlist = d;
 494 out:
 495        spin_unlock_irqrestore(&devlist_lock, flags);
 496        return d;
 497}
 498
 499static void
 500freetgt(struct aoedev *d, struct aoetgt *t)
 501{
 502        struct frame *f;
 503        struct list_head *pos, *nx, *head;
 504        struct aoeif *ifp;
 505
 506        for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
 507                if (!ifp->nd)
 508                        break;
 509                dev_put(ifp->nd);
 510        }
 511
 512        head = &t->ffree;
 513        list_for_each_safe(pos, nx, head) {
 514                list_del(pos);
 515                f = list_entry(pos, struct frame, head);
 516                skbfree(f->skb);
 517                kfree(f);
 518        }
 519        kfree(t);
 520}
 521
 522void
 523aoedev_exit(void)
 524{
 525        flush_scheduled_work();
 526        flush(NULL, 0, EXITING);
 527}
 528
 529int __init
 530aoedev_init(void)
 531{
 532        return 0;
 533}
 534