linux/drivers/block/aoe/aoecmd.c
<<
>>
Prefs
   1/* Copyright (c) 2013 Coraid, Inc.  See COPYING for GPL terms. */
   2/*
   3 * aoecmd.c
   4 * Filesystem request handling methods
   5 */
   6
   7#include <linux/ata.h>
   8#include <linux/slab.h>
   9#include <linux/hdreg.h>
  10#include <linux/blkdev.h>
  11#include <linux/skbuff.h>
  12#include <linux/netdevice.h>
  13#include <linux/genhd.h>
  14#include <linux/moduleparam.h>
  15#include <linux/workqueue.h>
  16#include <linux/kthread.h>
  17#include <net/net_namespace.h>
  18#include <asm/unaligned.h>
  19#include <linux/uio.h>
  20#include "aoe.h"
  21
  22#define MAXIOC (8192)   /* default meant to avoid most soft lockups */
  23
  24static void ktcomplete(struct frame *, struct sk_buff *);
  25static int count_targets(struct aoedev *d, int *untainted);
  26
  27static struct buf *nextbuf(struct aoedev *);
  28
  29static int aoe_deadsecs = 60 * 3;
  30module_param(aoe_deadsecs, int, 0644);
  31MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
  32
  33static int aoe_maxout = 64;
  34module_param(aoe_maxout, int, 0644);
  35MODULE_PARM_DESC(aoe_maxout,
  36        "Only aoe_maxout outstanding packets for every MAC on eX.Y.");
  37
  38/* The number of online cpus during module initialization gives us a
  39 * convenient heuristic cap on the parallelism used for ktio threads
  40 * doing I/O completion.  It is not important that the cap equal the
  41 * actual number of running CPUs at any given time, but because of CPU
  42 * hotplug, we take care to use ncpus instead of using
  43 * num_online_cpus() after module initialization.
  44 */
  45static int ncpus;
  46
  47/* mutex lock used for synchronization while thread spawning */
  48static DEFINE_MUTEX(ktio_spawn_lock);
  49
  50static wait_queue_head_t *ktiowq;
  51static struct ktstate *kts;
  52
  53/* io completion queue */
  54struct iocq_ktio {
  55        struct list_head head;
  56        spinlock_t lock;
  57};
  58static struct iocq_ktio *iocq;
  59
  60static struct page *empty_page;
  61
  62static struct sk_buff *
  63new_skb(ulong len)
  64{
  65        struct sk_buff *skb;
  66
  67        skb = alloc_skb(len + MAX_HEADER, GFP_ATOMIC);
  68        if (skb) {
  69                skb_reserve(skb, MAX_HEADER);
  70                skb_reset_mac_header(skb);
  71                skb_reset_network_header(skb);
  72                skb->protocol = __constant_htons(ETH_P_AOE);
  73                skb_checksum_none_assert(skb);
  74        }
  75        return skb;
  76}
  77
  78static struct frame *
  79getframe_deferred(struct aoedev *d, u32 tag)
  80{
  81        struct list_head *head, *pos, *nx;
  82        struct frame *f;
  83
  84        head = &d->rexmitq;
  85        list_for_each_safe(pos, nx, head) {
  86                f = list_entry(pos, struct frame, head);
  87                if (f->tag == tag) {
  88                        list_del(pos);
  89                        return f;
  90                }
  91        }
  92        return NULL;
  93}
  94
  95static struct frame *
  96getframe(struct aoedev *d, u32 tag)
  97{
  98        struct frame *f;
  99        struct list_head *head, *pos, *nx;
 100        u32 n;
 101
 102        n = tag % NFACTIVE;
 103        head = &d->factive[n];
 104        list_for_each_safe(pos, nx, head) {
 105                f = list_entry(pos, struct frame, head);
 106                if (f->tag == tag) {
 107                        list_del(pos);
 108                        return f;
 109                }
 110        }
 111        return NULL;
 112}
 113
 114/*
 115 * Leave the top bit clear so we have tagspace for userland.
 116 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
 117 * This driver reserves tag -1 to mean "unused frame."
 118 */
 119static int
 120newtag(struct aoedev *d)
 121{
 122        register ulong n;
 123
 124        n = jiffies & 0xffff;
 125        return n |= (++d->lasttag & 0x7fff) << 16;
 126}
 127
 128static u32
 129aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
 130{
 131        u32 host_tag = newtag(d);
 132
 133        memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
 134        memcpy(h->dst, t->addr, sizeof h->dst);
 135        h->type = __constant_cpu_to_be16(ETH_P_AOE);
 136        h->verfl = AOE_HVER;
 137        h->major = cpu_to_be16(d->aoemajor);
 138        h->minor = d->aoeminor;
 139        h->cmd = AOECMD_ATA;
 140        h->tag = cpu_to_be32(host_tag);
 141
 142        return host_tag;
 143}
 144
 145static inline void
 146put_lba(struct aoe_atahdr *ah, sector_t lba)
 147{
 148        ah->lba0 = lba;
 149        ah->lba1 = lba >>= 8;
 150        ah->lba2 = lba >>= 8;
 151        ah->lba3 = lba >>= 8;
 152        ah->lba4 = lba >>= 8;
 153        ah->lba5 = lba >>= 8;
 154}
 155
 156static struct aoeif *
 157ifrotate(struct aoetgt *t)
 158{
 159        struct aoeif *ifp;
 160
 161        ifp = t->ifp;
 162        ifp++;
 163        if (ifp >= &t->ifs[NAOEIFS] || ifp->nd == NULL)
 164                ifp = t->ifs;
 165        if (ifp->nd == NULL)
 166                return NULL;
 167        return t->ifp = ifp;
 168}
 169
 170static void
 171skb_pool_put(struct aoedev *d, struct sk_buff *skb)
 172{
 173        __skb_queue_tail(&d->skbpool, skb);
 174}
 175
 176static struct sk_buff *
 177skb_pool_get(struct aoedev *d)
 178{
 179        struct sk_buff *skb = skb_peek(&d->skbpool);
 180
 181        if (skb && atomic_read(&skb_shinfo(skb)->dataref) == 1) {
 182                __skb_unlink(skb, &d->skbpool);
 183                return skb;
 184        }
 185        if (skb_queue_len(&d->skbpool) < NSKBPOOLMAX &&
 186            (skb = new_skb(ETH_ZLEN)))
 187                return skb;
 188
 189        return NULL;
 190}
 191
 192void
 193aoe_freetframe(struct frame *f)
 194{
 195        struct aoetgt *t;
 196
 197        t = f->t;
 198        f->buf = NULL;
 199        memset(&f->iter, 0, sizeof(f->iter));
 200        f->r_skb = NULL;
 201        f->flags = 0;
 202        list_add(&f->head, &t->ffree);
 203}
 204
 205static struct frame *
 206newtframe(struct aoedev *d, struct aoetgt *t)
 207{
 208        struct frame *f;
 209        struct sk_buff *skb;
 210        struct list_head *pos;
 211
 212        if (list_empty(&t->ffree)) {
 213                if (t->falloc >= NSKBPOOLMAX*2)
 214                        return NULL;
 215                f = kcalloc(1, sizeof(*f), GFP_ATOMIC);
 216                if (f == NULL)
 217                        return NULL;
 218                t->falloc++;
 219                f->t = t;
 220        } else {
 221                pos = t->ffree.next;
 222                list_del(pos);
 223                f = list_entry(pos, struct frame, head);
 224        }
 225
 226        skb = f->skb;
 227        if (skb == NULL) {
 228                f->skb = skb = new_skb(ETH_ZLEN);
 229                if (!skb) {
 230bail:                   aoe_freetframe(f);
 231                        return NULL;
 232                }
 233        }
 234
 235        if (atomic_read(&skb_shinfo(skb)->dataref) != 1) {
 236                skb = skb_pool_get(d);
 237                if (skb == NULL)
 238                        goto bail;
 239                skb_pool_put(d, f->skb);
 240                f->skb = skb;
 241        }
 242
 243        skb->truesize -= skb->data_len;
 244        skb_shinfo(skb)->nr_frags = skb->data_len = 0;
 245        skb_trim(skb, 0);
 246        return f;
 247}
 248
 249static struct frame *
 250newframe(struct aoedev *d)
 251{
 252        struct frame *f;
 253        struct aoetgt *t, **tt;
 254        int totout = 0;
 255        int use_tainted;
 256        int has_untainted;
 257
 258        if (!d->targets || !d->targets[0]) {
 259                printk(KERN_ERR "aoe: NULL TARGETS!\n");
 260                return NULL;
 261        }
 262        tt = d->tgt;    /* last used target */
 263        for (use_tainted = 0, has_untainted = 0;;) {
 264                tt++;
 265                if (tt >= &d->targets[d->ntargets] || !*tt)
 266                        tt = d->targets;
 267                t = *tt;
 268                if (!t->taint) {
 269                        has_untainted = 1;
 270                        totout += t->nout;
 271                }
 272                if (t->nout < t->maxout
 273                && (use_tainted || !t->taint)
 274                && t->ifp->nd) {
 275                        f = newtframe(d, t);
 276                        if (f) {
 277                                ifrotate(t);
 278                                d->tgt = tt;
 279                                return f;
 280                        }
 281                }
 282                if (tt == d->tgt) {     /* we've looped and found nada */
 283                        if (!use_tainted && !has_untainted)
 284                                use_tainted = 1;
 285                        else
 286                                break;
 287                }
 288        }
 289        if (totout == 0) {
 290                d->kicked++;
 291                d->flags |= DEVFL_KICKME;
 292        }
 293        return NULL;
 294}
 295
 296static void
 297skb_fillup(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter)
 298{
 299        int frag = 0;
 300        struct bio_vec bv;
 301
 302        __bio_for_each_segment(bv, bio, iter, iter)
 303                skb_fill_page_desc(skb, frag++, bv.bv_page,
 304                                   bv.bv_offset, bv.bv_len);
 305}
 306
 307static void
 308fhash(struct frame *f)
 309{
 310        struct aoedev *d = f->t->d;
 311        u32 n;
 312
 313        n = f->tag % NFACTIVE;
 314        list_add_tail(&f->head, &d->factive[n]);
 315}
 316
 317static void
 318ata_rw_frameinit(struct frame *f)
 319{
 320        struct aoetgt *t;
 321        struct aoe_hdr *h;
 322        struct aoe_atahdr *ah;
 323        struct sk_buff *skb;
 324        char writebit, extbit;
 325
 326        skb = f->skb;
 327        h = (struct aoe_hdr *) skb_mac_header(skb);
 328        ah = (struct aoe_atahdr *) (h + 1);
 329        skb_put(skb, sizeof(*h) + sizeof(*ah));
 330        memset(h, 0, skb->len);
 331
 332        writebit = 0x10;
 333        extbit = 0x4;
 334
 335        t = f->t;
 336        f->tag = aoehdr_atainit(t->d, t, h);
 337        fhash(f);
 338        t->nout++;
 339        f->waited = 0;
 340        f->waited_total = 0;
 341
 342        /* set up ata header */
 343        ah->scnt = f->iter.bi_size >> 9;
 344        put_lba(ah, f->iter.bi_sector);
 345        if (t->d->flags & DEVFL_EXT) {
 346                ah->aflags |= AOEAFL_EXT;
 347        } else {
 348                extbit = 0;
 349                ah->lba3 &= 0x0f;
 350                ah->lba3 |= 0xe0;       /* LBA bit + obsolete 0xa0 */
 351        }
 352        if (f->buf && bio_data_dir(f->buf->bio) == WRITE) {
 353                skb_fillup(skb, f->buf->bio, f->iter);
 354                ah->aflags |= AOEAFL_WRITE;
 355                skb->len += f->iter.bi_size;
 356                skb->data_len = f->iter.bi_size;
 357                skb->truesize += f->iter.bi_size;
 358                t->wpkts++;
 359        } else {
 360                t->rpkts++;
 361                writebit = 0;
 362        }
 363
 364        ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
 365        skb->dev = t->ifp->nd;
 366}
 367
 368static int
 369aoecmd_ata_rw(struct aoedev *d)
 370{
 371        struct frame *f;
 372        struct buf *buf;
 373        struct sk_buff *skb;
 374        struct sk_buff_head queue;
 375
 376        buf = nextbuf(d);
 377        if (buf == NULL)
 378                return 0;
 379        f = newframe(d);
 380        if (f == NULL)
 381                return 0;
 382
 383        /* initialize the headers & frame */
 384        f->buf = buf;
 385        f->iter = buf->iter;
 386        f->iter.bi_size = min_t(unsigned long,
 387                                d->maxbcnt ?: DEFAULTBCNT,
 388                                f->iter.bi_size);
 389        bio_advance_iter(buf->bio, &buf->iter, f->iter.bi_size);
 390
 391        if (!buf->iter.bi_size)
 392                d->ip.buf = NULL;
 393
 394        /* mark all tracking fields and load out */
 395        buf->nframesout += 1;
 396
 397        ata_rw_frameinit(f);
 398
 399        skb = skb_clone(f->skb, GFP_ATOMIC);
 400        if (skb) {
 401                do_gettimeofday(&f->sent);
 402                f->sent_jiffs = (u32) jiffies;
 403                __skb_queue_head_init(&queue);
 404                __skb_queue_tail(&queue, skb);
 405                aoenet_xmit(&queue);
 406        }
 407        return 1;
 408}
 409
 410/* some callers cannot sleep, and they can call this function,
 411 * transmitting the packets later, when interrupts are on
 412 */
 413static void
 414aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *queue)
 415{
 416        struct aoe_hdr *h;
 417        struct aoe_cfghdr *ch;
 418        struct sk_buff *skb;
 419        struct net_device *ifp;
 420
 421        rcu_read_lock();
 422        for_each_netdev_rcu(&init_net, ifp) {
 423                dev_hold(ifp);
 424                if (!is_aoe_netif(ifp))
 425                        goto cont;
 426
 427                skb = new_skb(sizeof *h + sizeof *ch);
 428                if (skb == NULL) {
 429                        printk(KERN_INFO "aoe: skb alloc failure\n");
 430                        goto cont;
 431                }
 432                skb_put(skb, sizeof *h + sizeof *ch);
 433                skb->dev = ifp;
 434                __skb_queue_tail(queue, skb);
 435                h = (struct aoe_hdr *) skb_mac_header(skb);
 436                memset(h, 0, sizeof *h + sizeof *ch);
 437
 438                memset(h->dst, 0xff, sizeof h->dst);
 439                memcpy(h->src, ifp->dev_addr, sizeof h->src);
 440                h->type = __constant_cpu_to_be16(ETH_P_AOE);
 441                h->verfl = AOE_HVER;
 442                h->major = cpu_to_be16(aoemajor);
 443                h->minor = aoeminor;
 444                h->cmd = AOECMD_CFG;
 445
 446cont:
 447                dev_put(ifp);
 448        }
 449        rcu_read_unlock();
 450}
 451
 452static void
 453resend(struct aoedev *d, struct frame *f)
 454{
 455        struct sk_buff *skb;
 456        struct sk_buff_head queue;
 457        struct aoe_hdr *h;
 458        struct aoetgt *t;
 459        char buf[128];
 460        u32 n;
 461
 462        t = f->t;
 463        n = newtag(d);
 464        skb = f->skb;
 465        if (ifrotate(t) == NULL) {
 466                /* probably can't happen, but set it up to fail anyway */
 467                pr_info("aoe: resend: no interfaces to rotate to.\n");
 468                ktcomplete(f, NULL);
 469                return;
 470        }
 471        h = (struct aoe_hdr *) skb_mac_header(skb);
 472
 473        if (!(f->flags & FFL_PROBE)) {
 474                snprintf(buf, sizeof(buf),
 475                        "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
 476                        "retransmit", d->aoemajor, d->aoeminor,
 477                        f->tag, jiffies, n,
 478                        h->src, h->dst, t->nout);
 479                aoechr_error(buf);
 480        }
 481
 482        f->tag = n;
 483        fhash(f);
 484        h->tag = cpu_to_be32(n);
 485        memcpy(h->dst, t->addr, sizeof h->dst);
 486        memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
 487
 488        skb->dev = t->ifp->nd;
 489        skb = skb_clone(skb, GFP_ATOMIC);
 490        if (skb == NULL)
 491                return;
 492        do_gettimeofday(&f->sent);
 493        f->sent_jiffs = (u32) jiffies;
 494        __skb_queue_head_init(&queue);
 495        __skb_queue_tail(&queue, skb);
 496        aoenet_xmit(&queue);
 497}
 498
 499static int
 500tsince_hr(struct frame *f)
 501{
 502        struct timeval now;
 503        int n;
 504
 505        do_gettimeofday(&now);
 506        n = now.tv_usec - f->sent.tv_usec;
 507        n += (now.tv_sec - f->sent.tv_sec) * USEC_PER_SEC;
 508
 509        if (n < 0)
 510                n = -n;
 511
 512        /* For relatively long periods, use jiffies to avoid
 513         * discrepancies caused by updates to the system time.
 514         *
 515         * On system with HZ of 1000, 32-bits is over 49 days
 516         * worth of jiffies, or over 71 minutes worth of usecs.
 517         *
 518         * Jiffies overflow is handled by subtraction of unsigned ints:
 519         * (gdb) print (unsigned) 2 - (unsigned) 0xfffffffe
 520         * $3 = 4
 521         * (gdb)
 522         */
 523        if (n > USEC_PER_SEC / 4) {
 524                n = ((u32) jiffies) - f->sent_jiffs;
 525                n *= USEC_PER_SEC / HZ;
 526        }
 527
 528        return n;
 529}
 530
 531static int
 532tsince(u32 tag)
 533{
 534        int n;
 535
 536        n = jiffies & 0xffff;
 537        n -= tag & 0xffff;
 538        if (n < 0)
 539                n += 1<<16;
 540        return jiffies_to_usecs(n + 1);
 541}
 542
 543static struct aoeif *
 544getif(struct aoetgt *t, struct net_device *nd)
 545{
 546        struct aoeif *p, *e;
 547
 548        p = t->ifs;
 549        e = p + NAOEIFS;
 550        for (; p < e; p++)
 551                if (p->nd == nd)
 552                        return p;
 553        return NULL;
 554}
 555
 556static void
 557ejectif(struct aoetgt *t, struct aoeif *ifp)
 558{
 559        struct aoeif *e;
 560        struct net_device *nd;
 561        ulong n;
 562
 563        nd = ifp->nd;
 564        e = t->ifs + NAOEIFS - 1;
 565        n = (e - ifp) * sizeof *ifp;
 566        memmove(ifp, ifp+1, n);
 567        e->nd = NULL;
 568        dev_put(nd);
 569}
 570
 571static struct frame *
 572reassign_frame(struct frame *f)
 573{
 574        struct frame *nf;
 575        struct sk_buff *skb;
 576
 577        nf = newframe(f->t->d);
 578        if (!nf)
 579                return NULL;
 580        if (nf->t == f->t) {
 581                aoe_freetframe(nf);
 582                return NULL;
 583        }
 584
 585        skb = nf->skb;
 586        nf->skb = f->skb;
 587        nf->buf = f->buf;
 588        nf->iter = f->iter;
 589        nf->waited = 0;
 590        nf->waited_total = f->waited_total;
 591        nf->sent = f->sent;
 592        nf->sent_jiffs = f->sent_jiffs;
 593        f->skb = skb;
 594
 595        return nf;
 596}
 597
 598static void
 599probe(struct aoetgt *t)
 600{
 601        struct aoedev *d;
 602        struct frame *f;
 603        struct sk_buff *skb;
 604        struct sk_buff_head queue;
 605        size_t n, m;
 606        int frag;
 607
 608        d = t->d;
 609        f = newtframe(d, t);
 610        if (!f) {
 611                pr_err("%s %pm for e%ld.%d: %s\n",
 612                        "aoe: cannot probe remote address",
 613                        t->addr,
 614                        (long) d->aoemajor, d->aoeminor,
 615                        "no frame available");
 616                return;
 617        }
 618        f->flags |= FFL_PROBE;
 619        ifrotate(t);
 620        f->iter.bi_size = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
 621        ata_rw_frameinit(f);
 622        skb = f->skb;
 623        for (frag = 0, n = f->iter.bi_size; n > 0; ++frag, n -= m) {
 624                if (n < PAGE_SIZE)
 625                        m = n;
 626                else
 627                        m = PAGE_SIZE;
 628                skb_fill_page_desc(skb, frag, empty_page, 0, m);
 629        }
 630        skb->len += f->iter.bi_size;
 631        skb->data_len = f->iter.bi_size;
 632        skb->truesize += f->iter.bi_size;
 633
 634        skb = skb_clone(f->skb, GFP_ATOMIC);
 635        if (skb) {
 636                do_gettimeofday(&f->sent);
 637                f->sent_jiffs = (u32) jiffies;
 638                __skb_queue_head_init(&queue);
 639                __skb_queue_tail(&queue, skb);
 640                aoenet_xmit(&queue);
 641        }
 642}
 643
 644static long
 645rto(struct aoedev *d)
 646{
 647        long t;
 648
 649        t = 2 * d->rttavg >> RTTSCALE;
 650        t += 8 * d->rttdev >> RTTDSCALE;
 651        if (t == 0)
 652                t = 1;
 653
 654        return t;
 655}
 656
 657static void
 658rexmit_deferred(struct aoedev *d)
 659{
 660        struct aoetgt *t;
 661        struct frame *f;
 662        struct frame *nf;
 663        struct list_head *pos, *nx, *head;
 664        int since;
 665        int untainted;
 666
 667        count_targets(d, &untainted);
 668
 669        head = &d->rexmitq;
 670        list_for_each_safe(pos, nx, head) {
 671                f = list_entry(pos, struct frame, head);
 672                t = f->t;
 673                if (t->taint) {
 674                        if (!(f->flags & FFL_PROBE)) {
 675                                nf = reassign_frame(f);
 676                                if (nf) {
 677                                        if (t->nout_probes == 0
 678                                        && untainted > 0) {
 679                                                probe(t);
 680                                                t->nout_probes++;
 681                                        }
 682                                        list_replace(&f->head, &nf->head);
 683                                        pos = &nf->head;
 684                                        aoe_freetframe(f);
 685                                        f = nf;
 686                                        t = f->t;
 687                                }
 688                        } else if (untainted < 1) {
 689                                /* don't probe w/o other untainted aoetgts */
 690                                goto stop_probe;
 691                        } else if (tsince_hr(f) < t->taint * rto(d)) {
 692                                /* reprobe slowly when taint is high */
 693                                continue;
 694                        }
 695                } else if (f->flags & FFL_PROBE) {
 696stop_probe:             /* don't probe untainted aoetgts */
 697                        list_del(pos);
 698                        aoe_freetframe(f);
 699                        /* leaving d->kicked, because this is routine */
 700                        f->t->d->flags |= DEVFL_KICKME;
 701                        continue;
 702                }
 703                if (t->nout >= t->maxout)
 704                        continue;
 705                list_del(pos);
 706                t->nout++;
 707                if (f->flags & FFL_PROBE)
 708                        t->nout_probes++;
 709                since = tsince_hr(f);
 710                f->waited += since;
 711                f->waited_total += since;
 712                resend(d, f);
 713        }
 714}
 715
 716/* An aoetgt accumulates demerits quickly, and successful
 717 * probing redeems the aoetgt slowly.
 718 */
 719static void
 720scorn(struct aoetgt *t)
 721{
 722        int n;
 723
 724        n = t->taint++;
 725        t->taint += t->taint * 2;
 726        if (n > t->taint)
 727                t->taint = n;
 728        if (t->taint > MAX_TAINT)
 729                t->taint = MAX_TAINT;
 730}
 731
 732static int
 733count_targets(struct aoedev *d, int *untainted)
 734{
 735        int i, good;
 736
 737        for (i = good = 0; i < d->ntargets && d->targets[i]; ++i)
 738                if (d->targets[i]->taint == 0)
 739                        good++;
 740
 741        if (untainted)
 742                *untainted = good;
 743        return i;
 744}
 745
 746static void
 747rexmit_timer(ulong vp)
 748{
 749        struct aoedev *d;
 750        struct aoetgt *t;
 751        struct aoeif *ifp;
 752        struct frame *f;
 753        struct list_head *head, *pos, *nx;
 754        LIST_HEAD(flist);
 755        register long timeout;
 756        ulong flags, n;
 757        int i;
 758        int utgts;      /* number of aoetgt descriptors (not slots) */
 759        int since;
 760
 761        d = (struct aoedev *) vp;
 762
 763        spin_lock_irqsave(&d->lock, flags);
 764
 765        /* timeout based on observed timings and variations */
 766        timeout = rto(d);
 767
 768        utgts = count_targets(d, NULL);
 769
 770        if (d->flags & DEVFL_TKILL) {
 771                spin_unlock_irqrestore(&d->lock, flags);
 772                return;
 773        }
 774
 775        /* collect all frames to rexmit into flist */
 776        for (i = 0; i < NFACTIVE; i++) {
 777                head = &d->factive[i];
 778                list_for_each_safe(pos, nx, head) {
 779                        f = list_entry(pos, struct frame, head);
 780                        if (tsince_hr(f) < timeout)
 781                                break;  /* end of expired frames */
 782                        /* move to flist for later processing */
 783                        list_move_tail(pos, &flist);
 784                }
 785        }
 786
 787        /* process expired frames */
 788        while (!list_empty(&flist)) {
 789                pos = flist.next;
 790                f = list_entry(pos, struct frame, head);
 791                since = tsince_hr(f);
 792                n = f->waited_total + since;
 793                n /= USEC_PER_SEC;
 794                if (aoe_deadsecs
 795                && n > aoe_deadsecs
 796                && !(f->flags & FFL_PROBE)) {
 797                        /* Waited too long.  Device failure.
 798                         * Hang all frames on first hash bucket for downdev
 799                         * to clean up.
 800                         */
 801                        list_splice(&flist, &d->factive[0]);
 802                        aoedev_downdev(d);
 803                        goto out;
 804                }
 805
 806                t = f->t;
 807                n = f->waited + since;
 808                n /= USEC_PER_SEC;
 809                if (aoe_deadsecs && utgts > 0
 810                && (n > aoe_deadsecs / utgts || n > HARD_SCORN_SECS))
 811                        scorn(t); /* avoid this target */
 812
 813                if (t->maxout != 1) {
 814                        t->ssthresh = t->maxout / 2;
 815                        t->maxout = 1;
 816                }
 817
 818                if (f->flags & FFL_PROBE) {
 819                        t->nout_probes--;
 820                } else {
 821                        ifp = getif(t, f->skb->dev);
 822                        if (ifp && ++ifp->lost > (t->nframes << 1)
 823                        && (ifp != t->ifs || t->ifs[1].nd)) {
 824                                ejectif(t, ifp);
 825                                ifp = NULL;
 826                        }
 827                }
 828                list_move_tail(pos, &d->rexmitq);
 829                t->nout--;
 830        }
 831        rexmit_deferred(d);
 832
 833out:
 834        if ((d->flags & DEVFL_KICKME) && d->blkq) {
 835                d->flags &= ~DEVFL_KICKME;
 836                d->blkq->request_fn(d->blkq);
 837        }
 838
 839        d->timer.expires = jiffies + TIMERTICK;
 840        add_timer(&d->timer);
 841
 842        spin_unlock_irqrestore(&d->lock, flags);
 843}
 844
 845static unsigned long
 846rqbiocnt(struct request *r)
 847{
 848        struct bio *bio;
 849        unsigned long n = 0;
 850
 851        __rq_for_each_bio(bio, r)
 852                n++;
 853        return n;
 854}
 855
 856static void
 857bufinit(struct buf *buf, struct request *rq, struct bio *bio)
 858{
 859        memset(buf, 0, sizeof(*buf));
 860        buf->rq = rq;
 861        buf->bio = bio;
 862        buf->iter = bio->bi_iter;
 863}
 864
 865static struct buf *
 866nextbuf(struct aoedev *d)
 867{
 868        struct request *rq;
 869        struct request_queue *q;
 870        struct buf *buf;
 871        struct bio *bio;
 872
 873        q = d->blkq;
 874        if (q == NULL)
 875                return NULL;    /* initializing */
 876        if (d->ip.buf)
 877                return d->ip.buf;
 878        rq = d->ip.rq;
 879        if (rq == NULL) {
 880                rq = blk_peek_request(q);
 881                if (rq == NULL)
 882                        return NULL;
 883                blk_start_request(rq);
 884                d->ip.rq = rq;
 885                d->ip.nxbio = rq->bio;
 886                rq->special = (void *) rqbiocnt(rq);
 887        }
 888        buf = mempool_alloc(d->bufpool, GFP_ATOMIC);
 889        if (buf == NULL) {
 890                pr_err("aoe: nextbuf: unable to mempool_alloc!\n");
 891                return NULL;
 892        }
 893        bio = d->ip.nxbio;
 894        bufinit(buf, rq, bio);
 895        bio = bio->bi_next;
 896        d->ip.nxbio = bio;
 897        if (bio == NULL)
 898                d->ip.rq = NULL;
 899        return d->ip.buf = buf;
 900}
 901
 902/* enters with d->lock held */
 903void
 904aoecmd_work(struct aoedev *d)
 905{
 906        rexmit_deferred(d);
 907        while (aoecmd_ata_rw(d))
 908                ;
 909}
 910
 911/* this function performs work that has been deferred until sleeping is OK
 912 */
 913void
 914aoecmd_sleepwork(struct work_struct *work)
 915{
 916        struct aoedev *d = container_of(work, struct aoedev, work);
 917        struct block_device *bd;
 918        u64 ssize;
 919
 920        if (d->flags & DEVFL_GDALLOC)
 921                aoeblk_gdalloc(d);
 922
 923        if (d->flags & DEVFL_NEWSIZE) {
 924                ssize = get_capacity(d->gd);
 925                bd = bdget_disk(d->gd, 0);
 926                if (bd) {
 927                        inode_lock(bd->bd_inode);
 928                        i_size_write(bd->bd_inode, (loff_t)ssize<<9);
 929                        inode_unlock(bd->bd_inode);
 930                        bdput(bd);
 931                }
 932                spin_lock_irq(&d->lock);
 933                d->flags |= DEVFL_UP;
 934                d->flags &= ~DEVFL_NEWSIZE;
 935                spin_unlock_irq(&d->lock);
 936        }
 937}
 938
 939static void
 940ata_ident_fixstring(u16 *id, int ns)
 941{
 942        u16 s;
 943
 944        while (ns-- > 0) {
 945                s = *id;
 946                *id++ = s >> 8 | s << 8;
 947        }
 948}
 949
 950static void
 951ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
 952{
 953        u64 ssize;
 954        u16 n;
 955
 956        /* word 83: command set supported */
 957        n = get_unaligned_le16(&id[83 << 1]);
 958
 959        /* word 86: command set/feature enabled */
 960        n |= get_unaligned_le16(&id[86 << 1]);
 961
 962        if (n & (1<<10)) {      /* bit 10: LBA 48 */
 963                d->flags |= DEVFL_EXT;
 964
 965                /* word 100: number lba48 sectors */
 966                ssize = get_unaligned_le64(&id[100 << 1]);
 967
 968                /* set as in ide-disk.c:init_idedisk_capacity */
 969                d->geo.cylinders = ssize;
 970                d->geo.cylinders /= (255 * 63);
 971                d->geo.heads = 255;
 972                d->geo.sectors = 63;
 973        } else {
 974                d->flags &= ~DEVFL_EXT;
 975
 976                /* number lba28 sectors */
 977                ssize = get_unaligned_le32(&id[60 << 1]);
 978
 979                /* NOTE: obsolete in ATA 6 */
 980                d->geo.cylinders = get_unaligned_le16(&id[54 << 1]);
 981                d->geo.heads = get_unaligned_le16(&id[55 << 1]);
 982                d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
 983        }
 984
 985        ata_ident_fixstring((u16 *) &id[10<<1], 10);    /* serial */
 986        ata_ident_fixstring((u16 *) &id[23<<1], 4);     /* firmware */
 987        ata_ident_fixstring((u16 *) &id[27<<1], 20);    /* model */
 988        memcpy(d->ident, id, sizeof(d->ident));
 989
 990        if (d->ssize != ssize)
 991                printk(KERN_INFO
 992                        "aoe: %pm e%ld.%d v%04x has %llu sectors\n",
 993                        t->addr,
 994                        d->aoemajor, d->aoeminor,
 995                        d->fw_ver, (long long)ssize);
 996        d->ssize = ssize;
 997        d->geo.start = 0;
 998        if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
 999                return;
1000        if (d->gd != NULL) {
1001                set_capacity(d->gd, ssize);
1002                d->flags |= DEVFL_NEWSIZE;
1003        } else
1004                d->flags |= DEVFL_GDALLOC;
1005        schedule_work(&d->work);
1006}
1007
1008static void
1009calc_rttavg(struct aoedev *d, struct aoetgt *t, int rtt)
1010{
1011        register long n;
1012
1013        n = rtt;
1014
1015        /* cf. Congestion Avoidance and Control, Jacobson & Karels, 1988 */
1016        n -= d->rttavg >> RTTSCALE;
1017        d->rttavg += n;
1018        if (n < 0)
1019                n = -n;
1020        n -= d->rttdev >> RTTDSCALE;
1021        d->rttdev += n;
1022
1023        if (!t || t->maxout >= t->nframes)
1024                return;
1025        if (t->maxout < t->ssthresh)
1026                t->maxout += 1;
1027        else if (t->nout == t->maxout && t->next_cwnd-- == 0) {
1028                t->maxout += 1;
1029                t->next_cwnd = t->maxout;
1030        }
1031}
1032
1033static struct aoetgt *
1034gettgt(struct aoedev *d, char *addr)
1035{
1036        struct aoetgt **t, **e;
1037
1038        t = d->targets;
1039        e = t + d->ntargets;
1040        for (; t < e && *t; t++)
1041                if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
1042                        return *t;
1043        return NULL;
1044}
1045
1046static void
1047bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt)
1048{
1049        int soff = 0;
1050        struct bio_vec bv;
1051
1052        iter.bi_size = cnt;
1053
1054        __bio_for_each_segment(bv, bio, iter, iter) {
1055                char *p = page_address(bv.bv_page) + bv.bv_offset;
1056                skb_copy_bits(skb, soff, p, bv.bv_len);
1057                soff += bv.bv_len;
1058        }
1059}
1060
1061void
1062aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
1063{
1064        struct bio *bio;
1065        int bok;
1066        struct request_queue *q;
1067
1068        q = d->blkq;
1069        if (rq == d->ip.rq)
1070                d->ip.rq = NULL;
1071        do {
1072                bio = rq->bio;
1073                bok = !fastfail && !bio->bi_error;
1074        } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size));
1075
1076        /* cf. http://lkml.org/lkml/2006/10/31/28 */
1077        if (!fastfail)
1078                __blk_run_queue(q);
1079}
1080
1081static void
1082aoe_end_buf(struct aoedev *d, struct buf *buf)
1083{
1084        struct request *rq;
1085        unsigned long n;
1086
1087        if (buf == d->ip.buf)
1088                d->ip.buf = NULL;
1089        rq = buf->rq;
1090        mempool_free(buf, d->bufpool);
1091        n = (unsigned long) rq->special;
1092        rq->special = (void *) --n;
1093        if (n == 0)
1094                aoe_end_request(d, rq, 0);
1095}
1096
1097static void
1098ktiocomplete(struct frame *f)
1099{
1100        struct aoe_hdr *hin, *hout;
1101        struct aoe_atahdr *ahin, *ahout;
1102        struct buf *buf;
1103        struct sk_buff *skb;
1104        struct aoetgt *t;
1105        struct aoeif *ifp;
1106        struct aoedev *d;
1107        long n;
1108        int untainted;
1109
1110        if (f == NULL)
1111                return;
1112
1113        t = f->t;
1114        d = t->d;
1115        skb = f->r_skb;
1116        buf = f->buf;
1117        if (f->flags & FFL_PROBE)
1118                goto out;
1119        if (!skb)               /* just fail the buf. */
1120                goto noskb;
1121
1122        hout = (struct aoe_hdr *) skb_mac_header(f->skb);
1123        ahout = (struct aoe_atahdr *) (hout+1);
1124
1125        hin = (struct aoe_hdr *) skb->data;
1126        skb_pull(skb, sizeof(*hin));
1127        ahin = (struct aoe_atahdr *) skb->data;
1128        skb_pull(skb, sizeof(*ahin));
1129        if (ahin->cmdstat & 0xa9) {     /* these bits cleared on success */
1130                pr_err("aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
1131                        ahout->cmdstat, ahin->cmdstat,
1132                        d->aoemajor, d->aoeminor);
1133noskb:          if (buf)
1134                        buf->bio->bi_error = -EIO;
1135                goto out;
1136        }
1137
1138        n = ahout->scnt << 9;
1139        switch (ahout->cmdstat) {
1140        case ATA_CMD_PIO_READ:
1141        case ATA_CMD_PIO_READ_EXT:
1142                if (skb->len < n) {
1143                        pr_err("%s e%ld.%d.  skb->len=%d need=%ld\n",
1144                                "aoe: runt data size in read from",
1145                                (long) d->aoemajor, d->aoeminor,
1146                               skb->len, n);
1147                        buf->bio->bi_error = -EIO;
1148                        break;
1149                }
1150                if (n > f->iter.bi_size) {
1151                        pr_err_ratelimited("%s e%ld.%d.  bytes=%ld need=%u\n",
1152                                "aoe: too-large data size in read from",
1153                                (long) d->aoemajor, d->aoeminor,
1154                                n, f->iter.bi_size);
1155                        buf->bio->bi_error = -EIO;
1156                        break;
1157                }
1158                bvcpy(skb, f->buf->bio, f->iter, n);
1159        case ATA_CMD_PIO_WRITE:
1160        case ATA_CMD_PIO_WRITE_EXT:
1161                spin_lock_irq(&d->lock);
1162                ifp = getif(t, skb->dev);
1163                if (ifp)
1164                        ifp->lost = 0;
1165                spin_unlock_irq(&d->lock);
1166                break;
1167        case ATA_CMD_ID_ATA:
1168                if (skb->len < 512) {
1169                        pr_info("%s e%ld.%d.  skb->len=%d need=512\n",
1170                                "aoe: runt data size in ataid from",
1171                                (long) d->aoemajor, d->aoeminor,
1172                                skb->len);
1173                        break;
1174                }
1175                if (skb_linearize(skb))
1176                        break;
1177                spin_lock_irq(&d->lock);
1178                ataid_complete(d, t, skb->data);
1179                spin_unlock_irq(&d->lock);
1180                break;
1181        default:
1182                pr_info("aoe: unrecognized ata command %2.2Xh for %d.%d\n",
1183                        ahout->cmdstat,
1184                        be16_to_cpu(get_unaligned(&hin->major)),
1185                        hin->minor);
1186        }
1187out:
1188        spin_lock_irq(&d->lock);
1189        if (t->taint > 0
1190        && --t->taint > 0
1191        && t->nout_probes == 0) {
1192                count_targets(d, &untainted);
1193                if (untainted > 0) {
1194                        probe(t);
1195                        t->nout_probes++;
1196                }
1197        }
1198
1199        aoe_freetframe(f);
1200
1201        if (buf && --buf->nframesout == 0 && buf->iter.bi_size == 0)
1202                aoe_end_buf(d, buf);
1203
1204        spin_unlock_irq(&d->lock);
1205        aoedev_put(d);
1206        dev_kfree_skb(skb);
1207}
1208
1209/* Enters with iocq.lock held.
1210 * Returns true iff responses needing processing remain.
1211 */
1212static int
1213ktio(int id)
1214{
1215        struct frame *f;
1216        struct list_head *pos;
1217        int i;
1218        int actual_id;
1219
1220        for (i = 0; ; ++i) {
1221                if (i == MAXIOC)
1222                        return 1;
1223                if (list_empty(&iocq[id].head))
1224                        return 0;
1225                pos = iocq[id].head.next;
1226                list_del(pos);
1227                f = list_entry(pos, struct frame, head);
1228                spin_unlock_irq(&iocq[id].lock);
1229                ktiocomplete(f);
1230
1231                /* Figure out if extra threads are required. */
1232                actual_id = f->t->d->aoeminor % ncpus;
1233
1234                if (!kts[actual_id].active) {
1235                        BUG_ON(id != 0);
1236                        mutex_lock(&ktio_spawn_lock);
1237                        if (!kts[actual_id].active
1238                                && aoe_ktstart(&kts[actual_id]) == 0)
1239                                kts[actual_id].active = 1;
1240                        mutex_unlock(&ktio_spawn_lock);
1241                }
1242                spin_lock_irq(&iocq[id].lock);
1243        }
1244}
1245
1246static int
1247kthread(void *vp)
1248{
1249        struct ktstate *k;
1250        DECLARE_WAITQUEUE(wait, current);
1251        int more;
1252
1253        k = vp;
1254        current->flags |= PF_NOFREEZE;
1255        set_user_nice(current, -10);
1256        complete(&k->rendez);   /* tell spawner we're running */
1257        do {
1258                spin_lock_irq(k->lock);
1259                more = k->fn(k->id);
1260                if (!more) {
1261                        add_wait_queue(k->waitq, &wait);
1262                        __set_current_state(TASK_INTERRUPTIBLE);
1263                }
1264                spin_unlock_irq(k->lock);
1265                if (!more) {
1266                        schedule();
1267                        remove_wait_queue(k->waitq, &wait);
1268                } else
1269                        cond_resched();
1270        } while (!kthread_should_stop());
1271        complete(&k->rendez);   /* tell spawner we're stopping */
1272        return 0;
1273}
1274
1275void
1276aoe_ktstop(struct ktstate *k)
1277{
1278        kthread_stop(k->task);
1279        wait_for_completion(&k->rendez);
1280}
1281
1282int
1283aoe_ktstart(struct ktstate *k)
1284{
1285        struct task_struct *task;
1286
1287        init_completion(&k->rendez);
1288        task = kthread_run(kthread, k, "%s", k->name);
1289        if (task == NULL || IS_ERR(task))
1290                return -ENOMEM;
1291        k->task = task;
1292        wait_for_completion(&k->rendez); /* allow kthread to start */
1293        init_completion(&k->rendez);    /* for waiting for exit later */
1294        return 0;
1295}
1296
1297/* pass it off to kthreads for processing */
1298static void
1299ktcomplete(struct frame *f, struct sk_buff *skb)
1300{
1301        int id;
1302        ulong flags;
1303
1304        f->r_skb = skb;
1305        id = f->t->d->aoeminor % ncpus;
1306        spin_lock_irqsave(&iocq[id].lock, flags);
1307        if (!kts[id].active) {
1308                spin_unlock_irqrestore(&iocq[id].lock, flags);
1309                /* The thread with id has not been spawned yet,
1310                 * so delegate the work to the main thread and
1311                 * try spawning a new thread.
1312                 */
1313                id = 0;
1314                spin_lock_irqsave(&iocq[id].lock, flags);
1315        }
1316        list_add_tail(&f->head, &iocq[id].head);
1317        spin_unlock_irqrestore(&iocq[id].lock, flags);
1318        wake_up(&ktiowq[id]);
1319}
1320
1321struct sk_buff *
1322aoecmd_ata_rsp(struct sk_buff *skb)
1323{
1324        struct aoedev *d;
1325        struct aoe_hdr *h;
1326        struct frame *f;
1327        u32 n;
1328        ulong flags;
1329        char ebuf[128];
1330        u16 aoemajor;
1331
1332        h = (struct aoe_hdr *) skb->data;
1333        aoemajor = be16_to_cpu(get_unaligned(&h->major));
1334        d = aoedev_by_aoeaddr(aoemajor, h->minor, 0);
1335        if (d == NULL) {
1336                snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
1337                        "for unknown device %d.%d\n",
1338                        aoemajor, h->minor);
1339                aoechr_error(ebuf);
1340                return skb;
1341        }
1342
1343        spin_lock_irqsave(&d->lock, flags);
1344
1345        n = be32_to_cpu(get_unaligned(&h->tag));
1346        f = getframe(d, n);
1347        if (f) {
1348                calc_rttavg(d, f->t, tsince_hr(f));
1349                f->t->nout--;
1350                if (f->flags & FFL_PROBE)
1351                        f->t->nout_probes--;
1352        } else {
1353                f = getframe_deferred(d, n);
1354                if (f) {
1355                        calc_rttavg(d, NULL, tsince_hr(f));
1356                } else {
1357                        calc_rttavg(d, NULL, tsince(n));
1358                        spin_unlock_irqrestore(&d->lock, flags);
1359                        aoedev_put(d);
1360                        snprintf(ebuf, sizeof(ebuf),
1361                                 "%15s e%d.%d    tag=%08x@%08lx s=%pm d=%pm\n",
1362                                 "unexpected rsp",
1363                                 get_unaligned_be16(&h->major),
1364                                 h->minor,
1365                                 get_unaligned_be32(&h->tag),
1366                                 jiffies,
1367                                 h->src,
1368                                 h->dst);
1369                        aoechr_error(ebuf);
1370                        return skb;
1371                }
1372        }
1373        aoecmd_work(d);
1374
1375        spin_unlock_irqrestore(&d->lock, flags);
1376
1377        ktcomplete(f, skb);
1378
1379        /*
1380         * Note here that we do not perform an aoedev_put, as we are
1381         * leaving this reference for the ktio to release.
1382         */
1383        return NULL;
1384}
1385
1386void
1387aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
1388{
1389        struct sk_buff_head queue;
1390
1391        __skb_queue_head_init(&queue);
1392        aoecmd_cfg_pkts(aoemajor, aoeminor, &queue);
1393        aoenet_xmit(&queue);
1394}
1395
1396struct sk_buff *
1397aoecmd_ata_id(struct aoedev *d)
1398{
1399        struct aoe_hdr *h;
1400        struct aoe_atahdr *ah;
1401        struct frame *f;
1402        struct sk_buff *skb;
1403        struct aoetgt *t;
1404
1405        f = newframe(d);
1406        if (f == NULL)
1407                return NULL;
1408
1409        t = *d->tgt;
1410
1411        /* initialize the headers & frame */
1412        skb = f->skb;
1413        h = (struct aoe_hdr *) skb_mac_header(skb);
1414        ah = (struct aoe_atahdr *) (h+1);
1415        skb_put(skb, sizeof *h + sizeof *ah);
1416        memset(h, 0, skb->len);
1417        f->tag = aoehdr_atainit(d, t, h);
1418        fhash(f);
1419        t->nout++;
1420        f->waited = 0;
1421        f->waited_total = 0;
1422
1423        /* set up ata header */
1424        ah->scnt = 1;
1425        ah->cmdstat = ATA_CMD_ID_ATA;
1426        ah->lba3 = 0xa0;
1427
1428        skb->dev = t->ifp->nd;
1429
1430        d->rttavg = RTTAVG_INIT;
1431        d->rttdev = RTTDEV_INIT;
1432        d->timer.function = rexmit_timer;
1433
1434        skb = skb_clone(skb, GFP_ATOMIC);
1435        if (skb) {
1436                do_gettimeofday(&f->sent);
1437                f->sent_jiffs = (u32) jiffies;
1438        }
1439
1440        return skb;
1441}
1442
1443static struct aoetgt **
1444grow_targets(struct aoedev *d)
1445{
1446        ulong oldn, newn;
1447        struct aoetgt **tt;
1448
1449        oldn = d->ntargets;
1450        newn = oldn * 2;
1451        tt = kcalloc(newn, sizeof(*d->targets), GFP_ATOMIC);
1452        if (!tt)
1453                return NULL;
1454        memmove(tt, d->targets, sizeof(*d->targets) * oldn);
1455        d->tgt = tt + (d->tgt - d->targets);
1456        kfree(d->targets);
1457        d->targets = tt;
1458        d->ntargets = newn;
1459
1460        return &d->targets[oldn];
1461}
1462
1463static struct aoetgt *
1464addtgt(struct aoedev *d, char *addr, ulong nframes)
1465{
1466        struct aoetgt *t, **tt, **te;
1467
1468        tt = d->targets;
1469        te = tt + d->ntargets;
1470        for (; tt < te && *tt; tt++)
1471                ;
1472
1473        if (tt == te) {
1474                tt = grow_targets(d);
1475                if (!tt)
1476                        goto nomem;
1477        }
1478        t = kzalloc(sizeof(*t), GFP_ATOMIC);
1479        if (!t)
1480                goto nomem;
1481        t->nframes = nframes;
1482        t->d = d;
1483        memcpy(t->addr, addr, sizeof t->addr);
1484        t->ifp = t->ifs;
1485        aoecmd_wreset(t);
1486        t->maxout = t->nframes / 2;
1487        INIT_LIST_HEAD(&t->ffree);
1488        return *tt = t;
1489
1490 nomem:
1491        pr_info("aoe: cannot allocate memory to add target\n");
1492        return NULL;
1493}
1494
1495static void
1496setdbcnt(struct aoedev *d)
1497{
1498        struct aoetgt **t, **e;
1499        int bcnt = 0;
1500
1501        t = d->targets;
1502        e = t + d->ntargets;
1503        for (; t < e && *t; t++)
1504                if (bcnt == 0 || bcnt > (*t)->minbcnt)
1505                        bcnt = (*t)->minbcnt;
1506        if (bcnt != d->maxbcnt) {
1507                d->maxbcnt = bcnt;
1508                pr_info("aoe: e%ld.%d: setting %d byte data frames\n",
1509                        d->aoemajor, d->aoeminor, bcnt);
1510        }
1511}
1512
1513static void
1514setifbcnt(struct aoetgt *t, struct net_device *nd, int bcnt)
1515{
1516        struct aoedev *d;
1517        struct aoeif *p, *e;
1518        int minbcnt;
1519
1520        d = t->d;
1521        minbcnt = bcnt;
1522        p = t->ifs;
1523        e = p + NAOEIFS;
1524        for (; p < e; p++) {
1525                if (p->nd == NULL)
1526                        break;          /* end of the valid interfaces */
1527                if (p->nd == nd) {
1528                        p->bcnt = bcnt; /* we're updating */
1529                        nd = NULL;
1530                } else if (minbcnt > p->bcnt)
1531                        minbcnt = p->bcnt; /* find the min interface */
1532        }
1533        if (nd) {
1534                if (p == e) {
1535                        pr_err("aoe: device setifbcnt failure; too many interfaces.\n");
1536                        return;
1537                }
1538                dev_hold(nd);
1539                p->nd = nd;
1540                p->bcnt = bcnt;
1541        }
1542        t->minbcnt = minbcnt;
1543        setdbcnt(d);
1544}
1545
1546void
1547aoecmd_cfg_rsp(struct sk_buff *skb)
1548{
1549        struct aoedev *d;
1550        struct aoe_hdr *h;
1551        struct aoe_cfghdr *ch;
1552        struct aoetgt *t;
1553        ulong flags, aoemajor;
1554        struct sk_buff *sl;
1555        struct sk_buff_head queue;
1556        u16 n;
1557
1558        sl = NULL;
1559        h = (struct aoe_hdr *) skb_mac_header(skb);
1560        ch = (struct aoe_cfghdr *) (h+1);
1561
1562        /*
1563         * Enough people have their dip switches set backwards to
1564         * warrant a loud message for this special case.
1565         */
1566        aoemajor = get_unaligned_be16(&h->major);
1567        if (aoemajor == 0xfff) {
1568                printk(KERN_ERR "aoe: Warning: shelf address is all ones.  "
1569                        "Check shelf dip switches.\n");
1570                return;
1571        }
1572        if (aoemajor == 0xffff) {
1573                pr_info("aoe: e%ld.%d: broadcast shelf number invalid\n",
1574                        aoemajor, (int) h->minor);
1575                return;
1576        }
1577        if (h->minor == 0xff) {
1578                pr_info("aoe: e%ld.%d: broadcast slot number invalid\n",
1579                        aoemajor, (int) h->minor);
1580                return;
1581        }
1582
1583        n = be16_to_cpu(ch->bufcnt);
1584        if (n > aoe_maxout)     /* keep it reasonable */
1585                n = aoe_maxout;
1586
1587        d = aoedev_by_aoeaddr(aoemajor, h->minor, 1);
1588        if (d == NULL) {
1589                pr_info("aoe: device allocation failure\n");
1590                return;
1591        }
1592
1593        spin_lock_irqsave(&d->lock, flags);
1594
1595        t = gettgt(d, h->src);
1596        if (t) {
1597                t->nframes = n;
1598                if (n < t->maxout)
1599                        aoecmd_wreset(t);
1600        } else {
1601                t = addtgt(d, h->src, n);
1602                if (!t)
1603                        goto bail;
1604        }
1605        n = skb->dev->mtu;
1606        n -= sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
1607        n /= 512;
1608        if (n > ch->scnt)
1609                n = ch->scnt;
1610        n = n ? n * 512 : DEFAULTBCNT;
1611        setifbcnt(t, skb->dev, n);
1612
1613        /* don't change users' perspective */
1614        if (d->nopen == 0) {
1615                d->fw_ver = be16_to_cpu(ch->fwver);
1616                sl = aoecmd_ata_id(d);
1617        }
1618bail:
1619        spin_unlock_irqrestore(&d->lock, flags);
1620        aoedev_put(d);
1621        if (sl) {
1622                __skb_queue_head_init(&queue);
1623                __skb_queue_tail(&queue, sl);
1624                aoenet_xmit(&queue);
1625        }
1626}
1627
1628void
1629aoecmd_wreset(struct aoetgt *t)
1630{
1631        t->maxout = 1;
1632        t->ssthresh = t->nframes / 2;
1633        t->next_cwnd = t->nframes;
1634}
1635
1636void
1637aoecmd_cleanslate(struct aoedev *d)
1638{
1639        struct aoetgt **t, **te;
1640
1641        d->rttavg = RTTAVG_INIT;
1642        d->rttdev = RTTDEV_INIT;
1643        d->maxbcnt = 0;
1644
1645        t = d->targets;
1646        te = t + d->ntargets;
1647        for (; t < te && *t; t++)
1648                aoecmd_wreset(*t);
1649}
1650
1651void
1652aoe_failbuf(struct aoedev *d, struct buf *buf)
1653{
1654        if (buf == NULL)
1655                return;
1656        buf->iter.bi_size = 0;
1657        buf->bio->bi_error = -EIO;
1658        if (buf->nframesout == 0)
1659                aoe_end_buf(d, buf);
1660}
1661
1662void
1663aoe_flush_iocq(void)
1664{
1665        int i;
1666
1667        for (i = 0; i < ncpus; i++) {
1668                if (kts[i].active)
1669                        aoe_flush_iocq_by_index(i);
1670        }
1671}
1672
1673void
1674aoe_flush_iocq_by_index(int id)
1675{
1676        struct frame *f;
1677        struct aoedev *d;
1678        LIST_HEAD(flist);
1679        struct list_head *pos;
1680        struct sk_buff *skb;
1681        ulong flags;
1682
1683        spin_lock_irqsave(&iocq[id].lock, flags);
1684        list_splice_init(&iocq[id].head, &flist);
1685        spin_unlock_irqrestore(&iocq[id].lock, flags);
1686        while (!list_empty(&flist)) {
1687                pos = flist.next;
1688                list_del(pos);
1689                f = list_entry(pos, struct frame, head);
1690                d = f->t->d;
1691                skb = f->r_skb;
1692                spin_lock_irqsave(&d->lock, flags);
1693                if (f->buf) {
1694                        f->buf->nframesout--;
1695                        aoe_failbuf(d, f->buf);
1696                }
1697                aoe_freetframe(f);
1698                spin_unlock_irqrestore(&d->lock, flags);
1699                dev_kfree_skb(skb);
1700                aoedev_put(d);
1701        }
1702}
1703
1704int __init
1705aoecmd_init(void)
1706{
1707        void *p;
1708        int i;
1709        int ret;
1710
1711        /* get_zeroed_page returns page with ref count 1 */
1712        p = (void *) get_zeroed_page(GFP_KERNEL);
1713        if (!p)
1714                return -ENOMEM;
1715        empty_page = virt_to_page(p);
1716
1717        ncpus = num_online_cpus();
1718
1719        iocq = kcalloc(ncpus, sizeof(struct iocq_ktio), GFP_KERNEL);
1720        if (!iocq)
1721                return -ENOMEM;
1722
1723        kts = kcalloc(ncpus, sizeof(struct ktstate), GFP_KERNEL);
1724        if (!kts) {
1725                ret = -ENOMEM;
1726                goto kts_fail;
1727        }
1728
1729        ktiowq = kcalloc(ncpus, sizeof(wait_queue_head_t), GFP_KERNEL);
1730        if (!ktiowq) {
1731                ret = -ENOMEM;
1732                goto ktiowq_fail;
1733        }
1734
1735        mutex_init(&ktio_spawn_lock);
1736
1737        for (i = 0; i < ncpus; i++) {
1738                INIT_LIST_HEAD(&iocq[i].head);
1739                spin_lock_init(&iocq[i].lock);
1740                init_waitqueue_head(&ktiowq[i]);
1741                snprintf(kts[i].name, sizeof(kts[i].name), "aoe_ktio%d", i);
1742                kts[i].fn = ktio;
1743                kts[i].waitq = &ktiowq[i];
1744                kts[i].lock = &iocq[i].lock;
1745                kts[i].id = i;
1746                kts[i].active = 0;
1747        }
1748        kts[0].active = 1;
1749        if (aoe_ktstart(&kts[0])) {
1750                ret = -ENOMEM;
1751                goto ktstart_fail;
1752        }
1753        return 0;
1754
1755ktstart_fail:
1756        kfree(ktiowq);
1757ktiowq_fail:
1758        kfree(kts);
1759kts_fail:
1760        kfree(iocq);
1761
1762        return ret;
1763}
1764
1765void
1766aoecmd_exit(void)
1767{
1768        int i;
1769
1770        for (i = 0; i < ncpus; i++)
1771                if (kts[i].active)
1772                        aoe_ktstop(&kts[i]);
1773
1774        aoe_flush_iocq();
1775
1776        /* Free up the iocq and thread speicific configuration
1777        * allocated during startup.
1778        */
1779        kfree(iocq);
1780        kfree(kts);
1781        kfree(ktiowq);
1782
1783        free_page((unsigned long) page_address(empty_page));
1784        empty_page = NULL;
1785}
1786