linux/drivers/block/aoe/aoecmd.c
<<
>>
Prefs
   1/* Copyright (c) 2013 Coraid, Inc.  See COPYING for GPL terms. */
   2/*
   3 * aoecmd.c
   4 * Filesystem request handling methods
   5 */
   6
   7#include <linux/ata.h>
   8#include <linux/slab.h>
   9#include <linux/hdreg.h>
  10#include <linux/blk-mq.h>
  11#include <linux/skbuff.h>
  12#include <linux/netdevice.h>
  13#include <linux/genhd.h>
  14#include <linux/moduleparam.h>
  15#include <linux/workqueue.h>
  16#include <linux/kthread.h>
  17#include <net/net_namespace.h>
  18#include <asm/unaligned.h>
  19#include <linux/uio.h>
  20#include "aoe.h"
  21
  22#define MAXIOC (8192)   /* default meant to avoid most soft lockups */
  23
  24static void ktcomplete(struct frame *, struct sk_buff *);
  25static int count_targets(struct aoedev *d, int *untainted);
  26
  27static struct buf *nextbuf(struct aoedev *);
  28
  29static int aoe_deadsecs = 60 * 3;
  30module_param(aoe_deadsecs, int, 0644);
  31MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
  32
  33static int aoe_maxout = 64;
  34module_param(aoe_maxout, int, 0644);
  35MODULE_PARM_DESC(aoe_maxout,
  36        "Only aoe_maxout outstanding packets for every MAC on eX.Y.");
  37
  38/* The number of online cpus during module initialization gives us a
  39 * convenient heuristic cap on the parallelism used for ktio threads
  40 * doing I/O completion.  It is not important that the cap equal the
  41 * actual number of running CPUs at any given time, but because of CPU
  42 * hotplug, we take care to use ncpus instead of using
  43 * num_online_cpus() after module initialization.
  44 */
  45static int ncpus;
  46
  47/* mutex lock used for synchronization while thread spawning */
  48static DEFINE_MUTEX(ktio_spawn_lock);
  49
  50static wait_queue_head_t *ktiowq;
  51static struct ktstate *kts;
  52
  53/* io completion queue */
  54struct iocq_ktio {
  55        struct list_head head;
  56        spinlock_t lock;
  57};
  58static struct iocq_ktio *iocq;
  59
  60static struct page *empty_page;
  61
  62static struct sk_buff *
  63new_skb(ulong len)
  64{
  65        struct sk_buff *skb;
  66
  67        skb = alloc_skb(len + MAX_HEADER, GFP_ATOMIC);
  68        if (skb) {
  69                skb_reserve(skb, MAX_HEADER);
  70                skb_reset_mac_header(skb);
  71                skb_reset_network_header(skb);
  72                skb->protocol = __constant_htons(ETH_P_AOE);
  73                skb_checksum_none_assert(skb);
  74        }
  75        return skb;
  76}
  77
  78static struct frame *
  79getframe_deferred(struct aoedev *d, u32 tag)
  80{
  81        struct list_head *head, *pos, *nx;
  82        struct frame *f;
  83
  84        head = &d->rexmitq;
  85        list_for_each_safe(pos, nx, head) {
  86                f = list_entry(pos, struct frame, head);
  87                if (f->tag == tag) {
  88                        list_del(pos);
  89                        return f;
  90                }
  91        }
  92        return NULL;
  93}
  94
  95static struct frame *
  96getframe(struct aoedev *d, u32 tag)
  97{
  98        struct frame *f;
  99        struct list_head *head, *pos, *nx;
 100        u32 n;
 101
 102        n = tag % NFACTIVE;
 103        head = &d->factive[n];
 104        list_for_each_safe(pos, nx, head) {
 105                f = list_entry(pos, struct frame, head);
 106                if (f->tag == tag) {
 107                        list_del(pos);
 108                        return f;
 109                }
 110        }
 111        return NULL;
 112}
 113
 114/*
 115 * Leave the top bit clear so we have tagspace for userland.
 116 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
 117 * This driver reserves tag -1 to mean "unused frame."
 118 */
 119static int
 120newtag(struct aoedev *d)
 121{
 122        register ulong n;
 123
 124        n = jiffies & 0xffff;
 125        return n |= (++d->lasttag & 0x7fff) << 16;
 126}
 127
 128static u32
 129aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
 130{
 131        u32 host_tag = newtag(d);
 132
 133        memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
 134        memcpy(h->dst, t->addr, sizeof h->dst);
 135        h->type = __constant_cpu_to_be16(ETH_P_AOE);
 136        h->verfl = AOE_HVER;
 137        h->major = cpu_to_be16(d->aoemajor);
 138        h->minor = d->aoeminor;
 139        h->cmd = AOECMD_ATA;
 140        h->tag = cpu_to_be32(host_tag);
 141
 142        return host_tag;
 143}
 144
 145static inline void
 146put_lba(struct aoe_atahdr *ah, sector_t lba)
 147{
 148        ah->lba0 = lba;
 149        ah->lba1 = lba >>= 8;
 150        ah->lba2 = lba >>= 8;
 151        ah->lba3 = lba >>= 8;
 152        ah->lba4 = lba >>= 8;
 153        ah->lba5 = lba >>= 8;
 154}
 155
 156static struct aoeif *
 157ifrotate(struct aoetgt *t)
 158{
 159        struct aoeif *ifp;
 160
 161        ifp = t->ifp;
 162        ifp++;
 163        if (ifp >= &t->ifs[NAOEIFS] || ifp->nd == NULL)
 164                ifp = t->ifs;
 165        if (ifp->nd == NULL)
 166                return NULL;
 167        return t->ifp = ifp;
 168}
 169
 170static void
 171skb_pool_put(struct aoedev *d, struct sk_buff *skb)
 172{
 173        __skb_queue_tail(&d->skbpool, skb);
 174}
 175
 176static struct sk_buff *
 177skb_pool_get(struct aoedev *d)
 178{
 179        struct sk_buff *skb = skb_peek(&d->skbpool);
 180
 181        if (skb && atomic_read(&skb_shinfo(skb)->dataref) == 1) {
 182                __skb_unlink(skb, &d->skbpool);
 183                return skb;
 184        }
 185        if (skb_queue_len(&d->skbpool) < NSKBPOOLMAX &&
 186            (skb = new_skb(ETH_ZLEN)))
 187                return skb;
 188
 189        return NULL;
 190}
 191
 192void
 193aoe_freetframe(struct frame *f)
 194{
 195        struct aoetgt *t;
 196
 197        t = f->t;
 198        f->buf = NULL;
 199        memset(&f->iter, 0, sizeof(f->iter));
 200        f->r_skb = NULL;
 201        f->flags = 0;
 202        list_add(&f->head, &t->ffree);
 203}
 204
 205static struct frame *
 206newtframe(struct aoedev *d, struct aoetgt *t)
 207{
 208        struct frame *f;
 209        struct sk_buff *skb;
 210        struct list_head *pos;
 211
 212        if (list_empty(&t->ffree)) {
 213                if (t->falloc >= NSKBPOOLMAX*2)
 214                        return NULL;
 215                f = kcalloc(1, sizeof(*f), GFP_ATOMIC);
 216                if (f == NULL)
 217                        return NULL;
 218                t->falloc++;
 219                f->t = t;
 220        } else {
 221                pos = t->ffree.next;
 222                list_del(pos);
 223                f = list_entry(pos, struct frame, head);
 224        }
 225
 226        skb = f->skb;
 227        if (skb == NULL) {
 228                f->skb = skb = new_skb(ETH_ZLEN);
 229                if (!skb) {
 230bail:                   aoe_freetframe(f);
 231                        return NULL;
 232                }
 233        }
 234
 235        if (atomic_read(&skb_shinfo(skb)->dataref) != 1) {
 236                skb = skb_pool_get(d);
 237                if (skb == NULL)
 238                        goto bail;
 239                skb_pool_put(d, f->skb);
 240                f->skb = skb;
 241        }
 242
 243        skb->truesize -= skb->data_len;
 244        skb_shinfo(skb)->nr_frags = skb->data_len = 0;
 245        skb_trim(skb, 0);
 246        return f;
 247}
 248
 249static struct frame *
 250newframe(struct aoedev *d)
 251{
 252        struct frame *f;
 253        struct aoetgt *t, **tt;
 254        int totout = 0;
 255        int use_tainted;
 256        int has_untainted;
 257
 258        if (!d->targets || !d->targets[0]) {
 259                printk(KERN_ERR "aoe: NULL TARGETS!\n");
 260                return NULL;
 261        }
 262        tt = d->tgt;    /* last used target */
 263        for (use_tainted = 0, has_untainted = 0;;) {
 264                tt++;
 265                if (tt >= &d->targets[d->ntargets] || !*tt)
 266                        tt = d->targets;
 267                t = *tt;
 268                if (!t->taint) {
 269                        has_untainted = 1;
 270                        totout += t->nout;
 271                }
 272                if (t->nout < t->maxout
 273                && (use_tainted || !t->taint)
 274                && t->ifp->nd) {
 275                        f = newtframe(d, t);
 276                        if (f) {
 277                                ifrotate(t);
 278                                d->tgt = tt;
 279                                return f;
 280                        }
 281                }
 282                if (tt == d->tgt) {     /* we've looped and found nada */
 283                        if (!use_tainted && !has_untainted)
 284                                use_tainted = 1;
 285                        else
 286                                break;
 287                }
 288        }
 289        if (totout == 0) {
 290                d->kicked++;
 291                d->flags |= DEVFL_KICKME;
 292        }
 293        return NULL;
 294}
 295
 296static void
 297skb_fillup(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter)
 298{
 299        int frag = 0;
 300        struct bio_vec bv;
 301
 302        __bio_for_each_segment(bv, bio, iter, iter)
 303                skb_fill_page_desc(skb, frag++, bv.bv_page,
 304                                   bv.bv_offset, bv.bv_len);
 305}
 306
 307static void
 308fhash(struct frame *f)
 309{
 310        struct aoedev *d = f->t->d;
 311        u32 n;
 312
 313        n = f->tag % NFACTIVE;
 314        list_add_tail(&f->head, &d->factive[n]);
 315}
 316
 317static void
 318ata_rw_frameinit(struct frame *f)
 319{
 320        struct aoetgt *t;
 321        struct aoe_hdr *h;
 322        struct aoe_atahdr *ah;
 323        struct sk_buff *skb;
 324        char writebit, extbit;
 325
 326        skb = f->skb;
 327        h = (struct aoe_hdr *) skb_mac_header(skb);
 328        ah = (struct aoe_atahdr *) (h + 1);
 329        skb_put(skb, sizeof(*h) + sizeof(*ah));
 330        memset(h, 0, skb->len);
 331
 332        writebit = 0x10;
 333        extbit = 0x4;
 334
 335        t = f->t;
 336        f->tag = aoehdr_atainit(t->d, t, h);
 337        fhash(f);
 338        t->nout++;
 339        f->waited = 0;
 340        f->waited_total = 0;
 341
 342        /* set up ata header */
 343        ah->scnt = f->iter.bi_size >> 9;
 344        put_lba(ah, f->iter.bi_sector);
 345        if (t->d->flags & DEVFL_EXT) {
 346                ah->aflags |= AOEAFL_EXT;
 347        } else {
 348                extbit = 0;
 349                ah->lba3 &= 0x0f;
 350                ah->lba3 |= 0xe0;       /* LBA bit + obsolete 0xa0 */
 351        }
 352        if (f->buf && bio_data_dir(f->buf->bio) == WRITE) {
 353                skb_fillup(skb, f->buf->bio, f->iter);
 354                ah->aflags |= AOEAFL_WRITE;
 355                skb->len += f->iter.bi_size;
 356                skb->data_len = f->iter.bi_size;
 357                skb->truesize += f->iter.bi_size;
 358                t->wpkts++;
 359        } else {
 360                t->rpkts++;
 361                writebit = 0;
 362        }
 363
 364        ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
 365        skb->dev = t->ifp->nd;
 366}
 367
 368static int
 369aoecmd_ata_rw(struct aoedev *d)
 370{
 371        struct frame *f;
 372        struct buf *buf;
 373        struct sk_buff *skb;
 374        struct sk_buff_head queue;
 375
 376        buf = nextbuf(d);
 377        if (buf == NULL)
 378                return 0;
 379        f = newframe(d);
 380        if (f == NULL)
 381                return 0;
 382
 383        /* initialize the headers & frame */
 384        f->buf = buf;
 385        f->iter = buf->iter;
 386        f->iter.bi_size = min_t(unsigned long,
 387                                d->maxbcnt ?: DEFAULTBCNT,
 388                                f->iter.bi_size);
 389        bio_advance_iter(buf->bio, &buf->iter, f->iter.bi_size);
 390
 391        if (!buf->iter.bi_size)
 392                d->ip.buf = NULL;
 393
 394        /* mark all tracking fields and load out */
 395        buf->nframesout += 1;
 396
 397        ata_rw_frameinit(f);
 398
 399        skb = skb_clone(f->skb, GFP_ATOMIC);
 400        if (skb) {
 401                f->sent = ktime_get();
 402                __skb_queue_head_init(&queue);
 403                __skb_queue_tail(&queue, skb);
 404                aoenet_xmit(&queue);
 405        }
 406        return 1;
 407}
 408
 409/* some callers cannot sleep, and they can call this function,
 410 * transmitting the packets later, when interrupts are on
 411 */
 412static void
 413aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *queue)
 414{
 415        struct aoe_hdr *h;
 416        struct aoe_cfghdr *ch;
 417        struct sk_buff *skb;
 418        struct net_device *ifp;
 419
 420        rcu_read_lock();
 421        for_each_netdev_rcu(&init_net, ifp) {
 422                dev_hold(ifp);
 423                if (!is_aoe_netif(ifp))
 424                        goto cont;
 425
 426                skb = new_skb(sizeof *h + sizeof *ch);
 427                if (skb == NULL) {
 428                        printk(KERN_INFO "aoe: skb alloc failure\n");
 429                        goto cont;
 430                }
 431                skb_put(skb, sizeof *h + sizeof *ch);
 432                skb->dev = ifp;
 433                __skb_queue_tail(queue, skb);
 434                h = (struct aoe_hdr *) skb_mac_header(skb);
 435                memset(h, 0, sizeof *h + sizeof *ch);
 436
 437                memset(h->dst, 0xff, sizeof h->dst);
 438                memcpy(h->src, ifp->dev_addr, sizeof h->src);
 439                h->type = __constant_cpu_to_be16(ETH_P_AOE);
 440                h->verfl = AOE_HVER;
 441                h->major = cpu_to_be16(aoemajor);
 442                h->minor = aoeminor;
 443                h->cmd = AOECMD_CFG;
 444
 445cont:
 446                dev_put(ifp);
 447        }
 448        rcu_read_unlock();
 449}
 450
 451static void
 452resend(struct aoedev *d, struct frame *f)
 453{
 454        struct sk_buff *skb;
 455        struct sk_buff_head queue;
 456        struct aoe_hdr *h;
 457        struct aoetgt *t;
 458        char buf[128];
 459        u32 n;
 460
 461        t = f->t;
 462        n = newtag(d);
 463        skb = f->skb;
 464        if (ifrotate(t) == NULL) {
 465                /* probably can't happen, but set it up to fail anyway */
 466                pr_info("aoe: resend: no interfaces to rotate to.\n");
 467                ktcomplete(f, NULL);
 468                return;
 469        }
 470        h = (struct aoe_hdr *) skb_mac_header(skb);
 471
 472        if (!(f->flags & FFL_PROBE)) {
 473                snprintf(buf, sizeof(buf),
 474                        "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
 475                        "retransmit", d->aoemajor, d->aoeminor,
 476                        f->tag, jiffies, n,
 477                        h->src, h->dst, t->nout);
 478                aoechr_error(buf);
 479        }
 480
 481        f->tag = n;
 482        fhash(f);
 483        h->tag = cpu_to_be32(n);
 484        memcpy(h->dst, t->addr, sizeof h->dst);
 485        memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
 486
 487        skb->dev = t->ifp->nd;
 488        skb = skb_clone(skb, GFP_ATOMIC);
 489        if (skb == NULL)
 490                return;
 491        f->sent = ktime_get();
 492        __skb_queue_head_init(&queue);
 493        __skb_queue_tail(&queue, skb);
 494        aoenet_xmit(&queue);
 495}
 496
 497static int
 498tsince_hr(struct frame *f)
 499{
 500        u64 delta = ktime_to_ns(ktime_sub(ktime_get(), f->sent));
 501
 502        /* delta is normally under 4.2 seconds, avoid 64-bit division */
 503        if (likely(delta <= UINT_MAX))
 504                return (u32)delta / NSEC_PER_USEC;
 505
 506        /* avoid overflow after 71 minutes */
 507        if (delta > ((u64)INT_MAX * NSEC_PER_USEC))
 508                return INT_MAX;
 509
 510        return div_u64(delta, NSEC_PER_USEC);
 511}
 512
 513static int
 514tsince(u32 tag)
 515{
 516        int n;
 517
 518        n = jiffies & 0xffff;
 519        n -= tag & 0xffff;
 520        if (n < 0)
 521                n += 1<<16;
 522        return jiffies_to_usecs(n + 1);
 523}
 524
 525static struct aoeif *
 526getif(struct aoetgt *t, struct net_device *nd)
 527{
 528        struct aoeif *p, *e;
 529
 530        p = t->ifs;
 531        e = p + NAOEIFS;
 532        for (; p < e; p++)
 533                if (p->nd == nd)
 534                        return p;
 535        return NULL;
 536}
 537
 538static void
 539ejectif(struct aoetgt *t, struct aoeif *ifp)
 540{
 541        struct aoeif *e;
 542        struct net_device *nd;
 543        ulong n;
 544
 545        nd = ifp->nd;
 546        e = t->ifs + NAOEIFS - 1;
 547        n = (e - ifp) * sizeof *ifp;
 548        memmove(ifp, ifp+1, n);
 549        e->nd = NULL;
 550        dev_put(nd);
 551}
 552
 553static struct frame *
 554reassign_frame(struct frame *f)
 555{
 556        struct frame *nf;
 557        struct sk_buff *skb;
 558
 559        nf = newframe(f->t->d);
 560        if (!nf)
 561                return NULL;
 562        if (nf->t == f->t) {
 563                aoe_freetframe(nf);
 564                return NULL;
 565        }
 566
 567        skb = nf->skb;
 568        nf->skb = f->skb;
 569        nf->buf = f->buf;
 570        nf->iter = f->iter;
 571        nf->waited = 0;
 572        nf->waited_total = f->waited_total;
 573        nf->sent = f->sent;
 574        f->skb = skb;
 575
 576        return nf;
 577}
 578
 579static void
 580probe(struct aoetgt *t)
 581{
 582        struct aoedev *d;
 583        struct frame *f;
 584        struct sk_buff *skb;
 585        struct sk_buff_head queue;
 586        size_t n, m;
 587        int frag;
 588
 589        d = t->d;
 590        f = newtframe(d, t);
 591        if (!f) {
 592                pr_err("%s %pm for e%ld.%d: %s\n",
 593                        "aoe: cannot probe remote address",
 594                        t->addr,
 595                        (long) d->aoemajor, d->aoeminor,
 596                        "no frame available");
 597                return;
 598        }
 599        f->flags |= FFL_PROBE;
 600        ifrotate(t);
 601        f->iter.bi_size = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
 602        ata_rw_frameinit(f);
 603        skb = f->skb;
 604        for (frag = 0, n = f->iter.bi_size; n > 0; ++frag, n -= m) {
 605                if (n < PAGE_SIZE)
 606                        m = n;
 607                else
 608                        m = PAGE_SIZE;
 609                skb_fill_page_desc(skb, frag, empty_page, 0, m);
 610        }
 611        skb->len += f->iter.bi_size;
 612        skb->data_len = f->iter.bi_size;
 613        skb->truesize += f->iter.bi_size;
 614
 615        skb = skb_clone(f->skb, GFP_ATOMIC);
 616        if (skb) {
 617                f->sent = ktime_get();
 618                __skb_queue_head_init(&queue);
 619                __skb_queue_tail(&queue, skb);
 620                aoenet_xmit(&queue);
 621        }
 622}
 623
 624static long
 625rto(struct aoedev *d)
 626{
 627        long t;
 628
 629        t = 2 * d->rttavg >> RTTSCALE;
 630        t += 8 * d->rttdev >> RTTDSCALE;
 631        if (t == 0)
 632                t = 1;
 633
 634        return t;
 635}
 636
 637static void
 638rexmit_deferred(struct aoedev *d)
 639{
 640        struct aoetgt *t;
 641        struct frame *f;
 642        struct frame *nf;
 643        struct list_head *pos, *nx, *head;
 644        int since;
 645        int untainted;
 646
 647        count_targets(d, &untainted);
 648
 649        head = &d->rexmitq;
 650        list_for_each_safe(pos, nx, head) {
 651                f = list_entry(pos, struct frame, head);
 652                t = f->t;
 653                if (t->taint) {
 654                        if (!(f->flags & FFL_PROBE)) {
 655                                nf = reassign_frame(f);
 656                                if (nf) {
 657                                        if (t->nout_probes == 0
 658                                        && untainted > 0) {
 659                                                probe(t);
 660                                                t->nout_probes++;
 661                                        }
 662                                        list_replace(&f->head, &nf->head);
 663                                        pos = &nf->head;
 664                                        aoe_freetframe(f);
 665                                        f = nf;
 666                                        t = f->t;
 667                                }
 668                        } else if (untainted < 1) {
 669                                /* don't probe w/o other untainted aoetgts */
 670                                goto stop_probe;
 671                        } else if (tsince_hr(f) < t->taint * rto(d)) {
 672                                /* reprobe slowly when taint is high */
 673                                continue;
 674                        }
 675                } else if (f->flags & FFL_PROBE) {
 676stop_probe:             /* don't probe untainted aoetgts */
 677                        list_del(pos);
 678                        aoe_freetframe(f);
 679                        /* leaving d->kicked, because this is routine */
 680                        f->t->d->flags |= DEVFL_KICKME;
 681                        continue;
 682                }
 683                if (t->nout >= t->maxout)
 684                        continue;
 685                list_del(pos);
 686                t->nout++;
 687                if (f->flags & FFL_PROBE)
 688                        t->nout_probes++;
 689                since = tsince_hr(f);
 690                f->waited += since;
 691                f->waited_total += since;
 692                resend(d, f);
 693        }
 694}
 695
 696/* An aoetgt accumulates demerits quickly, and successful
 697 * probing redeems the aoetgt slowly.
 698 */
 699static void
 700scorn(struct aoetgt *t)
 701{
 702        int n;
 703
 704        n = t->taint++;
 705        t->taint += t->taint * 2;
 706        if (n > t->taint)
 707                t->taint = n;
 708        if (t->taint > MAX_TAINT)
 709                t->taint = MAX_TAINT;
 710}
 711
 712static int
 713count_targets(struct aoedev *d, int *untainted)
 714{
 715        int i, good;
 716
 717        for (i = good = 0; i < d->ntargets && d->targets[i]; ++i)
 718                if (d->targets[i]->taint == 0)
 719                        good++;
 720
 721        if (untainted)
 722                *untainted = good;
 723        return i;
 724}
 725
 726static void
 727rexmit_timer(struct timer_list *timer)
 728{
 729        struct aoedev *d;
 730        struct aoetgt *t;
 731        struct aoeif *ifp;
 732        struct frame *f;
 733        struct list_head *head, *pos, *nx;
 734        LIST_HEAD(flist);
 735        register long timeout;
 736        ulong flags, n;
 737        int i;
 738        int utgts;      /* number of aoetgt descriptors (not slots) */
 739        int since;
 740
 741        d = from_timer(d, timer, timer);
 742
 743        spin_lock_irqsave(&d->lock, flags);
 744
 745        /* timeout based on observed timings and variations */
 746        timeout = rto(d);
 747
 748        utgts = count_targets(d, NULL);
 749
 750        if (d->flags & DEVFL_TKILL) {
 751                spin_unlock_irqrestore(&d->lock, flags);
 752                return;
 753        }
 754
 755        /* collect all frames to rexmit into flist */
 756        for (i = 0; i < NFACTIVE; i++) {
 757                head = &d->factive[i];
 758                list_for_each_safe(pos, nx, head) {
 759                        f = list_entry(pos, struct frame, head);
 760                        if (tsince_hr(f) < timeout)
 761                                break;  /* end of expired frames */
 762                        /* move to flist for later processing */
 763                        list_move_tail(pos, &flist);
 764                }
 765        }
 766
 767        /* process expired frames */
 768        while (!list_empty(&flist)) {
 769                pos = flist.next;
 770                f = list_entry(pos, struct frame, head);
 771                since = tsince_hr(f);
 772                n = f->waited_total + since;
 773                n /= USEC_PER_SEC;
 774                if (aoe_deadsecs
 775                && n > aoe_deadsecs
 776                && !(f->flags & FFL_PROBE)) {
 777                        /* Waited too long.  Device failure.
 778                         * Hang all frames on first hash bucket for downdev
 779                         * to clean up.
 780                         */
 781                        list_splice(&flist, &d->factive[0]);
 782                        aoedev_downdev(d);
 783                        goto out;
 784                }
 785
 786                t = f->t;
 787                n = f->waited + since;
 788                n /= USEC_PER_SEC;
 789                if (aoe_deadsecs && utgts > 0
 790                && (n > aoe_deadsecs / utgts || n > HARD_SCORN_SECS))
 791                        scorn(t); /* avoid this target */
 792
 793                if (t->maxout != 1) {
 794                        t->ssthresh = t->maxout / 2;
 795                        t->maxout = 1;
 796                }
 797
 798                if (f->flags & FFL_PROBE) {
 799                        t->nout_probes--;
 800                } else {
 801                        ifp = getif(t, f->skb->dev);
 802                        if (ifp && ++ifp->lost > (t->nframes << 1)
 803                        && (ifp != t->ifs || t->ifs[1].nd)) {
 804                                ejectif(t, ifp);
 805                                ifp = NULL;
 806                        }
 807                }
 808                list_move_tail(pos, &d->rexmitq);
 809                t->nout--;
 810        }
 811        rexmit_deferred(d);
 812
 813out:
 814        if ((d->flags & DEVFL_KICKME) && d->blkq) {
 815                d->flags &= ~DEVFL_KICKME;
 816                blk_mq_run_hw_queues(d->blkq, true);
 817        }
 818
 819        d->timer.expires = jiffies + TIMERTICK;
 820        add_timer(&d->timer);
 821
 822        spin_unlock_irqrestore(&d->lock, flags);
 823}
 824
 825static void
 826bufinit(struct buf *buf, struct request *rq, struct bio *bio)
 827{
 828        memset(buf, 0, sizeof(*buf));
 829        buf->rq = rq;
 830        buf->bio = bio;
 831        buf->iter = bio->bi_iter;
 832}
 833
 834static struct buf *
 835nextbuf(struct aoedev *d)
 836{
 837        struct request *rq;
 838        struct request_queue *q;
 839        struct aoe_req *req;
 840        struct buf *buf;
 841        struct bio *bio;
 842
 843        q = d->blkq;
 844        if (q == NULL)
 845                return NULL;    /* initializing */
 846        if (d->ip.buf)
 847                return d->ip.buf;
 848        rq = d->ip.rq;
 849        if (rq == NULL) {
 850                rq = list_first_entry_or_null(&d->rq_list, struct request,
 851                                                queuelist);
 852                if (rq == NULL)
 853                        return NULL;
 854                list_del_init(&rq->queuelist);
 855                blk_mq_start_request(rq);
 856                d->ip.rq = rq;
 857                d->ip.nxbio = rq->bio;
 858
 859                req = blk_mq_rq_to_pdu(rq);
 860                req->nr_bios = 0;
 861                __rq_for_each_bio(bio, rq)
 862                        req->nr_bios++;
 863        }
 864        buf = mempool_alloc(d->bufpool, GFP_ATOMIC);
 865        if (buf == NULL) {
 866                pr_err("aoe: nextbuf: unable to mempool_alloc!\n");
 867                return NULL;
 868        }
 869        bio = d->ip.nxbio;
 870        bufinit(buf, rq, bio);
 871        bio = bio->bi_next;
 872        d->ip.nxbio = bio;
 873        if (bio == NULL)
 874                d->ip.rq = NULL;
 875        return d->ip.buf = buf;
 876}
 877
 878/* enters with d->lock held */
 879void
 880aoecmd_work(struct aoedev *d)
 881{
 882        rexmit_deferred(d);
 883        while (aoecmd_ata_rw(d))
 884                ;
 885}
 886
 887/* this function performs work that has been deferred until sleeping is OK
 888 */
 889void
 890aoecmd_sleepwork(struct work_struct *work)
 891{
 892        struct aoedev *d = container_of(work, struct aoedev, work);
 893        struct block_device *bd;
 894        u64 ssize;
 895
 896        if (d->flags & DEVFL_GDALLOC)
 897                aoeblk_gdalloc(d);
 898
 899        if (d->flags & DEVFL_NEWSIZE) {
 900                ssize = get_capacity(d->gd);
 901                bd = bdget_disk(d->gd, 0);
 902                if (bd) {
 903                        inode_lock(bd->bd_inode);
 904                        i_size_write(bd->bd_inode, (loff_t)ssize<<9);
 905                        inode_unlock(bd->bd_inode);
 906                        bdput(bd);
 907                }
 908                spin_lock_irq(&d->lock);
 909                d->flags |= DEVFL_UP;
 910                d->flags &= ~DEVFL_NEWSIZE;
 911                spin_unlock_irq(&d->lock);
 912        }
 913}
 914
 915static void
 916ata_ident_fixstring(u16 *id, int ns)
 917{
 918        u16 s;
 919
 920        while (ns-- > 0) {
 921                s = *id;
 922                *id++ = s >> 8 | s << 8;
 923        }
 924}
 925
 926static void
 927ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
 928{
 929        u64 ssize;
 930        u16 n;
 931
 932        /* word 83: command set supported */
 933        n = get_unaligned_le16(&id[83 << 1]);
 934
 935        /* word 86: command set/feature enabled */
 936        n |= get_unaligned_le16(&id[86 << 1]);
 937
 938        if (n & (1<<10)) {      /* bit 10: LBA 48 */
 939                d->flags |= DEVFL_EXT;
 940
 941                /* word 100: number lba48 sectors */
 942                ssize = get_unaligned_le64(&id[100 << 1]);
 943
 944                /* set as in ide-disk.c:init_idedisk_capacity */
 945                d->geo.cylinders = ssize;
 946                d->geo.cylinders /= (255 * 63);
 947                d->geo.heads = 255;
 948                d->geo.sectors = 63;
 949        } else {
 950                d->flags &= ~DEVFL_EXT;
 951
 952                /* number lba28 sectors */
 953                ssize = get_unaligned_le32(&id[60 << 1]);
 954
 955                /* NOTE: obsolete in ATA 6 */
 956                d->geo.cylinders = get_unaligned_le16(&id[54 << 1]);
 957                d->geo.heads = get_unaligned_le16(&id[55 << 1]);
 958                d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
 959        }
 960
 961        ata_ident_fixstring((u16 *) &id[10<<1], 10);    /* serial */
 962        ata_ident_fixstring((u16 *) &id[23<<1], 4);     /* firmware */
 963        ata_ident_fixstring((u16 *) &id[27<<1], 20);    /* model */
 964        memcpy(d->ident, id, sizeof(d->ident));
 965
 966        if (d->ssize != ssize)
 967                printk(KERN_INFO
 968                        "aoe: %pm e%ld.%d v%04x has %llu sectors\n",
 969                        t->addr,
 970                        d->aoemajor, d->aoeminor,
 971                        d->fw_ver, (long long)ssize);
 972        d->ssize = ssize;
 973        d->geo.start = 0;
 974        if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
 975                return;
 976        if (d->gd != NULL) {
 977                set_capacity(d->gd, ssize);
 978                d->flags |= DEVFL_NEWSIZE;
 979        } else
 980                d->flags |= DEVFL_GDALLOC;
 981        schedule_work(&d->work);
 982}
 983
 984static void
 985calc_rttavg(struct aoedev *d, struct aoetgt *t, int rtt)
 986{
 987        register long n;
 988
 989        n = rtt;
 990
 991        /* cf. Congestion Avoidance and Control, Jacobson & Karels, 1988 */
 992        n -= d->rttavg >> RTTSCALE;
 993        d->rttavg += n;
 994        if (n < 0)
 995                n = -n;
 996        n -= d->rttdev >> RTTDSCALE;
 997        d->rttdev += n;
 998
 999        if (!t || t->maxout >= t->nframes)
1000                return;
1001        if (t->maxout < t->ssthresh)
1002                t->maxout += 1;
1003        else if (t->nout == t->maxout && t->next_cwnd-- == 0) {
1004                t->maxout += 1;
1005                t->next_cwnd = t->maxout;
1006        }
1007}
1008
1009static struct aoetgt *
1010gettgt(struct aoedev *d, char *addr)
1011{
1012        struct aoetgt **t, **e;
1013
1014        t = d->targets;
1015        e = t + d->ntargets;
1016        for (; t < e && *t; t++)
1017                if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
1018                        return *t;
1019        return NULL;
1020}
1021
1022static void
1023bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt)
1024{
1025        int soff = 0;
1026        struct bio_vec bv;
1027
1028        iter.bi_size = cnt;
1029
1030        __bio_for_each_segment(bv, bio, iter, iter) {
1031                char *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
1032                skb_copy_bits(skb, soff, p, bv.bv_len);
1033                kunmap_atomic(p);
1034                soff += bv.bv_len;
1035        }
1036}
1037
1038void
1039aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
1040{
1041        struct bio *bio;
1042        int bok;
1043        struct request_queue *q;
1044        blk_status_t err = BLK_STS_OK;
1045
1046        q = d->blkq;
1047        if (rq == d->ip.rq)
1048                d->ip.rq = NULL;
1049        do {
1050                bio = rq->bio;
1051                bok = !fastfail && !bio->bi_status;
1052                if (!bok)
1053                        err = BLK_STS_IOERR;
1054        } while (blk_update_request(rq, bok ? BLK_STS_OK : BLK_STS_IOERR, bio->bi_iter.bi_size));
1055
1056        __blk_mq_end_request(rq, err);
1057
1058        /* cf. http://lkml.org/lkml/2006/10/31/28 */
1059        if (!fastfail)
1060                blk_mq_run_hw_queues(q, true);
1061}
1062
1063static void
1064aoe_end_buf(struct aoedev *d, struct buf *buf)
1065{
1066        struct request *rq = buf->rq;
1067        struct aoe_req *req = blk_mq_rq_to_pdu(rq);
1068
1069        if (buf == d->ip.buf)
1070                d->ip.buf = NULL;
1071        mempool_free(buf, d->bufpool);
1072        if (--req->nr_bios == 0)
1073                aoe_end_request(d, rq, 0);
1074}
1075
1076static void
1077ktiocomplete(struct frame *f)
1078{
1079        struct aoe_hdr *hin, *hout;
1080        struct aoe_atahdr *ahin, *ahout;
1081        struct buf *buf;
1082        struct sk_buff *skb;
1083        struct aoetgt *t;
1084        struct aoeif *ifp;
1085        struct aoedev *d;
1086        long n;
1087        int untainted;
1088
1089        if (f == NULL)
1090                return;
1091
1092        t = f->t;
1093        d = t->d;
1094        skb = f->r_skb;
1095        buf = f->buf;
1096        if (f->flags & FFL_PROBE)
1097                goto out;
1098        if (!skb)               /* just fail the buf. */
1099                goto noskb;
1100
1101        hout = (struct aoe_hdr *) skb_mac_header(f->skb);
1102        ahout = (struct aoe_atahdr *) (hout+1);
1103
1104        hin = (struct aoe_hdr *) skb->data;
1105        skb_pull(skb, sizeof(*hin));
1106        ahin = (struct aoe_atahdr *) skb->data;
1107        skb_pull(skb, sizeof(*ahin));
1108        if (ahin->cmdstat & 0xa9) {     /* these bits cleared on success */
1109                pr_err("aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
1110                        ahout->cmdstat, ahin->cmdstat,
1111                        d->aoemajor, d->aoeminor);
1112noskb:          if (buf)
1113                        buf->bio->bi_status = BLK_STS_IOERR;
1114                goto out;
1115        }
1116
1117        n = ahout->scnt << 9;
1118        switch (ahout->cmdstat) {
1119        case ATA_CMD_PIO_READ:
1120        case ATA_CMD_PIO_READ_EXT:
1121                if (skb->len < n) {
1122                        pr_err("%s e%ld.%d.  skb->len=%d need=%ld\n",
1123                                "aoe: runt data size in read from",
1124                                (long) d->aoemajor, d->aoeminor,
1125                               skb->len, n);
1126                        buf->bio->bi_status = BLK_STS_IOERR;
1127                        break;
1128                }
1129                if (n > f->iter.bi_size) {
1130                        pr_err_ratelimited("%s e%ld.%d.  bytes=%ld need=%u\n",
1131                                "aoe: too-large data size in read from",
1132                                (long) d->aoemajor, d->aoeminor,
1133                                n, f->iter.bi_size);
1134                        buf->bio->bi_status = BLK_STS_IOERR;
1135                        break;
1136                }
1137                bvcpy(skb, f->buf->bio, f->iter, n);
1138                /* fall through */
1139        case ATA_CMD_PIO_WRITE:
1140        case ATA_CMD_PIO_WRITE_EXT:
1141                spin_lock_irq(&d->lock);
1142                ifp = getif(t, skb->dev);
1143                if (ifp)
1144                        ifp->lost = 0;
1145                spin_unlock_irq(&d->lock);
1146                break;
1147        case ATA_CMD_ID_ATA:
1148                if (skb->len < 512) {
1149                        pr_info("%s e%ld.%d.  skb->len=%d need=512\n",
1150                                "aoe: runt data size in ataid from",
1151                                (long) d->aoemajor, d->aoeminor,
1152                                skb->len);
1153                        break;
1154                }
1155                if (skb_linearize(skb))
1156                        break;
1157                spin_lock_irq(&d->lock);
1158                ataid_complete(d, t, skb->data);
1159                spin_unlock_irq(&d->lock);
1160                break;
1161        default:
1162                pr_info("aoe: unrecognized ata command %2.2Xh for %d.%d\n",
1163                        ahout->cmdstat,
1164                        be16_to_cpu(get_unaligned(&hin->major)),
1165                        hin->minor);
1166        }
1167out:
1168        spin_lock_irq(&d->lock);
1169        if (t->taint > 0
1170        && --t->taint > 0
1171        && t->nout_probes == 0) {
1172                count_targets(d, &untainted);
1173                if (untainted > 0) {
1174                        probe(t);
1175                        t->nout_probes++;
1176                }
1177        }
1178
1179        aoe_freetframe(f);
1180
1181        if (buf && --buf->nframesout == 0 && buf->iter.bi_size == 0)
1182                aoe_end_buf(d, buf);
1183
1184        spin_unlock_irq(&d->lock);
1185        aoedev_put(d);
1186        dev_kfree_skb(skb);
1187}
1188
1189/* Enters with iocq.lock held.
1190 * Returns true iff responses needing processing remain.
1191 */
1192static int
1193ktio(int id)
1194{
1195        struct frame *f;
1196        struct list_head *pos;
1197        int i;
1198        int actual_id;
1199
1200        for (i = 0; ; ++i) {
1201                if (i == MAXIOC)
1202                        return 1;
1203                if (list_empty(&iocq[id].head))
1204                        return 0;
1205                pos = iocq[id].head.next;
1206                list_del(pos);
1207                f = list_entry(pos, struct frame, head);
1208                spin_unlock_irq(&iocq[id].lock);
1209                ktiocomplete(f);
1210
1211                /* Figure out if extra threads are required. */
1212                actual_id = f->t->d->aoeminor % ncpus;
1213
1214                if (!kts[actual_id].active) {
1215                        BUG_ON(id != 0);
1216                        mutex_lock(&ktio_spawn_lock);
1217                        if (!kts[actual_id].active
1218                                && aoe_ktstart(&kts[actual_id]) == 0)
1219                                kts[actual_id].active = 1;
1220                        mutex_unlock(&ktio_spawn_lock);
1221                }
1222                spin_lock_irq(&iocq[id].lock);
1223        }
1224}
1225
1226static int
1227kthread(void *vp)
1228{
1229        struct ktstate *k;
1230        DECLARE_WAITQUEUE(wait, current);
1231        int more;
1232
1233        k = vp;
1234        current->flags |= PF_NOFREEZE;
1235        set_user_nice(current, -10);
1236        complete(&k->rendez);   /* tell spawner we're running */
1237        do {
1238                spin_lock_irq(k->lock);
1239                more = k->fn(k->id);
1240                if (!more) {
1241                        add_wait_queue(k->waitq, &wait);
1242                        __set_current_state(TASK_INTERRUPTIBLE);
1243                }
1244                spin_unlock_irq(k->lock);
1245                if (!more) {
1246                        schedule();
1247                        remove_wait_queue(k->waitq, &wait);
1248                } else
1249                        cond_resched();
1250        } while (!kthread_should_stop());
1251        complete(&k->rendez);   /* tell spawner we're stopping */
1252        return 0;
1253}
1254
1255void
1256aoe_ktstop(struct ktstate *k)
1257{
1258        kthread_stop(k->task);
1259        wait_for_completion(&k->rendez);
1260}
1261
1262int
1263aoe_ktstart(struct ktstate *k)
1264{
1265        struct task_struct *task;
1266
1267        init_completion(&k->rendez);
1268        task = kthread_run(kthread, k, "%s", k->name);
1269        if (task == NULL || IS_ERR(task))
1270                return -ENOMEM;
1271        k->task = task;
1272        wait_for_completion(&k->rendez); /* allow kthread to start */
1273        init_completion(&k->rendez);    /* for waiting for exit later */
1274        return 0;
1275}
1276
1277/* pass it off to kthreads for processing */
1278static void
1279ktcomplete(struct frame *f, struct sk_buff *skb)
1280{
1281        int id;
1282        ulong flags;
1283
1284        f->r_skb = skb;
1285        id = f->t->d->aoeminor % ncpus;
1286        spin_lock_irqsave(&iocq[id].lock, flags);
1287        if (!kts[id].active) {
1288                spin_unlock_irqrestore(&iocq[id].lock, flags);
1289                /* The thread with id has not been spawned yet,
1290                 * so delegate the work to the main thread and
1291                 * try spawning a new thread.
1292                 */
1293                id = 0;
1294                spin_lock_irqsave(&iocq[id].lock, flags);
1295        }
1296        list_add_tail(&f->head, &iocq[id].head);
1297        spin_unlock_irqrestore(&iocq[id].lock, flags);
1298        wake_up(&ktiowq[id]);
1299}
1300
1301struct sk_buff *
1302aoecmd_ata_rsp(struct sk_buff *skb)
1303{
1304        struct aoedev *d;
1305        struct aoe_hdr *h;
1306        struct frame *f;
1307        u32 n;
1308        ulong flags;
1309        char ebuf[128];
1310        u16 aoemajor;
1311
1312        h = (struct aoe_hdr *) skb->data;
1313        aoemajor = be16_to_cpu(get_unaligned(&h->major));
1314        d = aoedev_by_aoeaddr(aoemajor, h->minor, 0);
1315        if (d == NULL) {
1316                snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
1317                        "for unknown device %d.%d\n",
1318                        aoemajor, h->minor);
1319                aoechr_error(ebuf);
1320                return skb;
1321        }
1322
1323        spin_lock_irqsave(&d->lock, flags);
1324
1325        n = be32_to_cpu(get_unaligned(&h->tag));
1326        f = getframe(d, n);
1327        if (f) {
1328                calc_rttavg(d, f->t, tsince_hr(f));
1329                f->t->nout--;
1330                if (f->flags & FFL_PROBE)
1331                        f->t->nout_probes--;
1332        } else {
1333                f = getframe_deferred(d, n);
1334                if (f) {
1335                        calc_rttavg(d, NULL, tsince_hr(f));
1336                } else {
1337                        calc_rttavg(d, NULL, tsince(n));
1338                        spin_unlock_irqrestore(&d->lock, flags);
1339                        aoedev_put(d);
1340                        snprintf(ebuf, sizeof(ebuf),
1341                                 "%15s e%d.%d    tag=%08x@%08lx s=%pm d=%pm\n",
1342                                 "unexpected rsp",
1343                                 get_unaligned_be16(&h->major),
1344                                 h->minor,
1345                                 get_unaligned_be32(&h->tag),
1346                                 jiffies,
1347                                 h->src,
1348                                 h->dst);
1349                        aoechr_error(ebuf);
1350                        return skb;
1351                }
1352        }
1353        aoecmd_work(d);
1354
1355        spin_unlock_irqrestore(&d->lock, flags);
1356
1357        ktcomplete(f, skb);
1358
1359        /*
1360         * Note here that we do not perform an aoedev_put, as we are
1361         * leaving this reference for the ktio to release.
1362         */
1363        return NULL;
1364}
1365
1366void
1367aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
1368{
1369        struct sk_buff_head queue;
1370
1371        __skb_queue_head_init(&queue);
1372        aoecmd_cfg_pkts(aoemajor, aoeminor, &queue);
1373        aoenet_xmit(&queue);
1374}
1375
1376struct sk_buff *
1377aoecmd_ata_id(struct aoedev *d)
1378{
1379        struct aoe_hdr *h;
1380        struct aoe_atahdr *ah;
1381        struct frame *f;
1382        struct sk_buff *skb;
1383        struct aoetgt *t;
1384
1385        f = newframe(d);
1386        if (f == NULL)
1387                return NULL;
1388
1389        t = *d->tgt;
1390
1391        /* initialize the headers & frame */
1392        skb = f->skb;
1393        h = (struct aoe_hdr *) skb_mac_header(skb);
1394        ah = (struct aoe_atahdr *) (h+1);
1395        skb_put(skb, sizeof *h + sizeof *ah);
1396        memset(h, 0, skb->len);
1397        f->tag = aoehdr_atainit(d, t, h);
1398        fhash(f);
1399        t->nout++;
1400        f->waited = 0;
1401        f->waited_total = 0;
1402
1403        /* set up ata header */
1404        ah->scnt = 1;
1405        ah->cmdstat = ATA_CMD_ID_ATA;
1406        ah->lba3 = 0xa0;
1407
1408        skb->dev = t->ifp->nd;
1409
1410        d->rttavg = RTTAVG_INIT;
1411        d->rttdev = RTTDEV_INIT;
1412        d->timer.function = rexmit_timer;
1413
1414        skb = skb_clone(skb, GFP_ATOMIC);
1415        if (skb)
1416                f->sent = ktime_get();
1417
1418        return skb;
1419}
1420
1421static struct aoetgt **
1422grow_targets(struct aoedev *d)
1423{
1424        ulong oldn, newn;
1425        struct aoetgt **tt;
1426
1427        oldn = d->ntargets;
1428        newn = oldn * 2;
1429        tt = kcalloc(newn, sizeof(*d->targets), GFP_ATOMIC);
1430        if (!tt)
1431                return NULL;
1432        memmove(tt, d->targets, sizeof(*d->targets) * oldn);
1433        d->tgt = tt + (d->tgt - d->targets);
1434        kfree(d->targets);
1435        d->targets = tt;
1436        d->ntargets = newn;
1437
1438        return &d->targets[oldn];
1439}
1440
1441static struct aoetgt *
1442addtgt(struct aoedev *d, char *addr, ulong nframes)
1443{
1444        struct aoetgt *t, **tt, **te;
1445
1446        tt = d->targets;
1447        te = tt + d->ntargets;
1448        for (; tt < te && *tt; tt++)
1449                ;
1450
1451        if (tt == te) {
1452                tt = grow_targets(d);
1453                if (!tt)
1454                        goto nomem;
1455        }
1456        t = kzalloc(sizeof(*t), GFP_ATOMIC);
1457        if (!t)
1458                goto nomem;
1459        t->nframes = nframes;
1460        t->d = d;
1461        memcpy(t->addr, addr, sizeof t->addr);
1462        t->ifp = t->ifs;
1463        aoecmd_wreset(t);
1464        t->maxout = t->nframes / 2;
1465        INIT_LIST_HEAD(&t->ffree);
1466        return *tt = t;
1467
1468 nomem:
1469        pr_info("aoe: cannot allocate memory to add target\n");
1470        return NULL;
1471}
1472
1473static void
1474setdbcnt(struct aoedev *d)
1475{
1476        struct aoetgt **t, **e;
1477        int bcnt = 0;
1478
1479        t = d->targets;
1480        e = t + d->ntargets;
1481        for (; t < e && *t; t++)
1482                if (bcnt == 0 || bcnt > (*t)->minbcnt)
1483                        bcnt = (*t)->minbcnt;
1484        if (bcnt != d->maxbcnt) {
1485                d->maxbcnt = bcnt;
1486                pr_info("aoe: e%ld.%d: setting %d byte data frames\n",
1487                        d->aoemajor, d->aoeminor, bcnt);
1488        }
1489}
1490
1491static void
1492setifbcnt(struct aoetgt *t, struct net_device *nd, int bcnt)
1493{
1494        struct aoedev *d;
1495        struct aoeif *p, *e;
1496        int minbcnt;
1497
1498        d = t->d;
1499        minbcnt = bcnt;
1500        p = t->ifs;
1501        e = p + NAOEIFS;
1502        for (; p < e; p++) {
1503                if (p->nd == NULL)
1504                        break;          /* end of the valid interfaces */
1505                if (p->nd == nd) {
1506                        p->bcnt = bcnt; /* we're updating */
1507                        nd = NULL;
1508                } else if (minbcnt > p->bcnt)
1509                        minbcnt = p->bcnt; /* find the min interface */
1510        }
1511        if (nd) {
1512                if (p == e) {
1513                        pr_err("aoe: device setifbcnt failure; too many interfaces.\n");
1514                        return;
1515                }
1516                dev_hold(nd);
1517                p->nd = nd;
1518                p->bcnt = bcnt;
1519        }
1520        t->minbcnt = minbcnt;
1521        setdbcnt(d);
1522}
1523
1524void
1525aoecmd_cfg_rsp(struct sk_buff *skb)
1526{
1527        struct aoedev *d;
1528        struct aoe_hdr *h;
1529        struct aoe_cfghdr *ch;
1530        struct aoetgt *t;
1531        ulong flags, aoemajor;
1532        struct sk_buff *sl;
1533        struct sk_buff_head queue;
1534        u16 n;
1535
1536        sl = NULL;
1537        h = (struct aoe_hdr *) skb_mac_header(skb);
1538        ch = (struct aoe_cfghdr *) (h+1);
1539
1540        /*
1541         * Enough people have their dip switches set backwards to
1542         * warrant a loud message for this special case.
1543         */
1544        aoemajor = get_unaligned_be16(&h->major);
1545        if (aoemajor == 0xfff) {
1546                printk(KERN_ERR "aoe: Warning: shelf address is all ones.  "
1547                        "Check shelf dip switches.\n");
1548                return;
1549        }
1550        if (aoemajor == 0xffff) {
1551                pr_info("aoe: e%ld.%d: broadcast shelf number invalid\n",
1552                        aoemajor, (int) h->minor);
1553                return;
1554        }
1555        if (h->minor == 0xff) {
1556                pr_info("aoe: e%ld.%d: broadcast slot number invalid\n",
1557                        aoemajor, (int) h->minor);
1558                return;
1559        }
1560
1561        n = be16_to_cpu(ch->bufcnt);
1562        if (n > aoe_maxout)     /* keep it reasonable */
1563                n = aoe_maxout;
1564
1565        d = aoedev_by_aoeaddr(aoemajor, h->minor, 1);
1566        if (d == NULL) {
1567                pr_info("aoe: device allocation failure\n");
1568                return;
1569        }
1570
1571        spin_lock_irqsave(&d->lock, flags);
1572
1573        t = gettgt(d, h->src);
1574        if (t) {
1575                t->nframes = n;
1576                if (n < t->maxout)
1577                        aoecmd_wreset(t);
1578        } else {
1579                t = addtgt(d, h->src, n);
1580                if (!t)
1581                        goto bail;
1582        }
1583        n = skb->dev->mtu;
1584        n -= sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
1585        n /= 512;
1586        if (n > ch->scnt)
1587                n = ch->scnt;
1588        n = n ? n * 512 : DEFAULTBCNT;
1589        setifbcnt(t, skb->dev, n);
1590
1591        /* don't change users' perspective */
1592        if (d->nopen == 0) {
1593                d->fw_ver = be16_to_cpu(ch->fwver);
1594                sl = aoecmd_ata_id(d);
1595        }
1596bail:
1597        spin_unlock_irqrestore(&d->lock, flags);
1598        aoedev_put(d);
1599        if (sl) {
1600                __skb_queue_head_init(&queue);
1601                __skb_queue_tail(&queue, sl);
1602                aoenet_xmit(&queue);
1603        }
1604}
1605
1606void
1607aoecmd_wreset(struct aoetgt *t)
1608{
1609        t->maxout = 1;
1610        t->ssthresh = t->nframes / 2;
1611        t->next_cwnd = t->nframes;
1612}
1613
1614void
1615aoecmd_cleanslate(struct aoedev *d)
1616{
1617        struct aoetgt **t, **te;
1618
1619        d->rttavg = RTTAVG_INIT;
1620        d->rttdev = RTTDEV_INIT;
1621        d->maxbcnt = 0;
1622
1623        t = d->targets;
1624        te = t + d->ntargets;
1625        for (; t < te && *t; t++)
1626                aoecmd_wreset(*t);
1627}
1628
1629void
1630aoe_failbuf(struct aoedev *d, struct buf *buf)
1631{
1632        if (buf == NULL)
1633                return;
1634        buf->iter.bi_size = 0;
1635        buf->bio->bi_status = BLK_STS_IOERR;
1636        if (buf->nframesout == 0)
1637                aoe_end_buf(d, buf);
1638}
1639
1640void
1641aoe_flush_iocq(void)
1642{
1643        int i;
1644
1645        for (i = 0; i < ncpus; i++) {
1646                if (kts[i].active)
1647                        aoe_flush_iocq_by_index(i);
1648        }
1649}
1650
1651void
1652aoe_flush_iocq_by_index(int id)
1653{
1654        struct frame *f;
1655        struct aoedev *d;
1656        LIST_HEAD(flist);
1657        struct list_head *pos;
1658        struct sk_buff *skb;
1659        ulong flags;
1660
1661        spin_lock_irqsave(&iocq[id].lock, flags);
1662        list_splice_init(&iocq[id].head, &flist);
1663        spin_unlock_irqrestore(&iocq[id].lock, flags);
1664        while (!list_empty(&flist)) {
1665                pos = flist.next;
1666                list_del(pos);
1667                f = list_entry(pos, struct frame, head);
1668                d = f->t->d;
1669                skb = f->r_skb;
1670                spin_lock_irqsave(&d->lock, flags);
1671                if (f->buf) {
1672                        f->buf->nframesout--;
1673                        aoe_failbuf(d, f->buf);
1674                }
1675                aoe_freetframe(f);
1676                spin_unlock_irqrestore(&d->lock, flags);
1677                dev_kfree_skb(skb);
1678                aoedev_put(d);
1679        }
1680}
1681
1682int __init
1683aoecmd_init(void)
1684{
1685        void *p;
1686        int i;
1687        int ret;
1688
1689        /* get_zeroed_page returns page with ref count 1 */
1690        p = (void *) get_zeroed_page(GFP_KERNEL);
1691        if (!p)
1692                return -ENOMEM;
1693        empty_page = virt_to_page(p);
1694
1695        ncpus = num_online_cpus();
1696
1697        iocq = kcalloc(ncpus, sizeof(struct iocq_ktio), GFP_KERNEL);
1698        if (!iocq)
1699                return -ENOMEM;
1700
1701        kts = kcalloc(ncpus, sizeof(struct ktstate), GFP_KERNEL);
1702        if (!kts) {
1703                ret = -ENOMEM;
1704                goto kts_fail;
1705        }
1706
1707        ktiowq = kcalloc(ncpus, sizeof(wait_queue_head_t), GFP_KERNEL);
1708        if (!ktiowq) {
1709                ret = -ENOMEM;
1710                goto ktiowq_fail;
1711        }
1712
1713        mutex_init(&ktio_spawn_lock);
1714
1715        for (i = 0; i < ncpus; i++) {
1716                INIT_LIST_HEAD(&iocq[i].head);
1717                spin_lock_init(&iocq[i].lock);
1718                init_waitqueue_head(&ktiowq[i]);
1719                snprintf(kts[i].name, sizeof(kts[i].name), "aoe_ktio%d", i);
1720                kts[i].fn = ktio;
1721                kts[i].waitq = &ktiowq[i];
1722                kts[i].lock = &iocq[i].lock;
1723                kts[i].id = i;
1724                kts[i].active = 0;
1725        }
1726        kts[0].active = 1;
1727        if (aoe_ktstart(&kts[0])) {
1728                ret = -ENOMEM;
1729                goto ktstart_fail;
1730        }
1731        return 0;
1732
1733ktstart_fail:
1734        kfree(ktiowq);
1735ktiowq_fail:
1736        kfree(kts);
1737kts_fail:
1738        kfree(iocq);
1739
1740        return ret;
1741}
1742
1743void
1744aoecmd_exit(void)
1745{
1746        int i;
1747
1748        for (i = 0; i < ncpus; i++)
1749                if (kts[i].active)
1750                        aoe_ktstop(&kts[i]);
1751
1752        aoe_flush_iocq();
1753
1754        /* Free up the iocq and thread speicific configuration
1755        * allocated during startup.
1756        */
1757        kfree(iocq);
1758        kfree(kts);
1759        kfree(ktiowq);
1760
1761        free_page((unsigned long) page_address(empty_page));
1762        empty_page = NULL;
1763}
1764