linux/drivers/block/aoe/aoecmd.c
<<
>>
Prefs
   1/* Copyright (c) 2007 Coraid, Inc.  See COPYING for GPL terms. */
   2/*
   3 * aoecmd.c
   4 * Filesystem request handling methods
   5 */
   6
   7#include <linux/ata.h>
   8#include <linux/slab.h>
   9#include <linux/hdreg.h>
  10#include <linux/blkdev.h>
  11#include <linux/skbuff.h>
  12#include <linux/netdevice.h>
  13#include <linux/genhd.h>
  14#include <linux/moduleparam.h>
  15#include <net/net_namespace.h>
  16#include <asm/unaligned.h>
  17#include "aoe.h"
  18
  19static int aoe_deadsecs = 60 * 3;
  20module_param(aoe_deadsecs, int, 0644);
  21MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
  22
  23static int aoe_maxout = 16;
  24module_param(aoe_maxout, int, 0644);
  25MODULE_PARM_DESC(aoe_maxout,
  26        "Only aoe_maxout outstanding packets for every MAC on eX.Y.");
  27
  28static struct sk_buff *
  29new_skb(ulong len)
  30{
  31        struct sk_buff *skb;
  32
  33        skb = alloc_skb(len, GFP_ATOMIC);
  34        if (skb) {
  35                skb_reset_mac_header(skb);
  36                skb_reset_network_header(skb);
  37                skb->protocol = __constant_htons(ETH_P_AOE);
  38        }
  39        return skb;
  40}
  41
  42static struct frame *
  43getframe(struct aoetgt *t, int tag)
  44{
  45        struct frame *f, *e;
  46
  47        f = t->frames;
  48        e = f + t->nframes;
  49        for (; f<e; f++)
  50                if (f->tag == tag)
  51                        return f;
  52        return NULL;
  53}
  54
  55/*
  56 * Leave the top bit clear so we have tagspace for userland.
  57 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
  58 * This driver reserves tag -1 to mean "unused frame."
  59 */
  60static int
  61newtag(struct aoetgt *t)
  62{
  63        register ulong n;
  64
  65        n = jiffies & 0xffff;
  66        return n |= (++t->lasttag & 0x7fff) << 16;
  67}
  68
  69static int
  70aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
  71{
  72        u32 host_tag = newtag(t);
  73
  74        memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
  75        memcpy(h->dst, t->addr, sizeof h->dst);
  76        h->type = __constant_cpu_to_be16(ETH_P_AOE);
  77        h->verfl = AOE_HVER;
  78        h->major = cpu_to_be16(d->aoemajor);
  79        h->minor = d->aoeminor;
  80        h->cmd = AOECMD_ATA;
  81        h->tag = cpu_to_be32(host_tag);
  82
  83        return host_tag;
  84}
  85
  86static inline void
  87put_lba(struct aoe_atahdr *ah, sector_t lba)
  88{
  89        ah->lba0 = lba;
  90        ah->lba1 = lba >>= 8;
  91        ah->lba2 = lba >>= 8;
  92        ah->lba3 = lba >>= 8;
  93        ah->lba4 = lba >>= 8;
  94        ah->lba5 = lba >>= 8;
  95}
  96
  97static void
  98ifrotate(struct aoetgt *t)
  99{
 100        t->ifp++;
 101        if (t->ifp >= &t->ifs[NAOEIFS] || t->ifp->nd == NULL)
 102                t->ifp = t->ifs;
 103        if (t->ifp->nd == NULL) {
 104                printk(KERN_INFO "aoe: no interface to rotate to\n");
 105                BUG();
 106        }
 107}
 108
 109static void
 110skb_pool_put(struct aoedev *d, struct sk_buff *skb)
 111{
 112        __skb_queue_tail(&d->skbpool, skb);
 113}
 114
 115static struct sk_buff *
 116skb_pool_get(struct aoedev *d)
 117{
 118        struct sk_buff *skb = skb_peek(&d->skbpool);
 119
 120        if (skb && atomic_read(&skb_shinfo(skb)->dataref) == 1) {
 121                __skb_unlink(skb, &d->skbpool);
 122                return skb;
 123        }
 124        if (skb_queue_len(&d->skbpool) < NSKBPOOLMAX &&
 125            (skb = new_skb(ETH_ZLEN)))
 126                return skb;
 127
 128        return NULL;
 129}
 130
 131/* freeframe is where we do our load balancing so it's a little hairy. */
 132static struct frame *
 133freeframe(struct aoedev *d)
 134{
 135        struct frame *f, *e, *rf;
 136        struct aoetgt **t;
 137        struct sk_buff *skb;
 138
 139        if (d->targets[0] == NULL) {    /* shouldn't happen, but I'm paranoid */
 140                printk(KERN_ERR "aoe: NULL TARGETS!\n");
 141                return NULL;
 142        }
 143        t = d->tgt;
 144        t++;
 145        if (t >= &d->targets[NTARGETS] || !*t)
 146                t = d->targets;
 147        for (;;) {
 148                if ((*t)->nout < (*t)->maxout
 149                && t != d->htgt
 150                && (*t)->ifp->nd) {
 151                        rf = NULL;
 152                        f = (*t)->frames;
 153                        e = f + (*t)->nframes;
 154                        for (; f < e; f++) {
 155                                if (f->tag != FREETAG)
 156                                        continue;
 157                                skb = f->skb;
 158                                if (!skb
 159                                && !(f->skb = skb = new_skb(ETH_ZLEN)))
 160                                        continue;
 161                                if (atomic_read(&skb_shinfo(skb)->dataref)
 162                                        != 1) {
 163                                        if (!rf)
 164                                                rf = f;
 165                                        continue;
 166                                }
 167gotone:                         skb_shinfo(skb)->nr_frags = skb->data_len = 0;
 168                                skb_trim(skb, 0);
 169                                d->tgt = t;
 170                                ifrotate(*t);
 171                                return f;
 172                        }
 173                        /* Work can be done, but the network layer is
 174                           holding our precious packets.  Try to grab
 175                           one from the pool. */
 176                        f = rf;
 177                        if (f == NULL) {        /* more paranoia */
 178                                printk(KERN_ERR
 179                                        "aoe: freeframe: %s.\n",
 180                                        "unexpected null rf");
 181                                d->flags |= DEVFL_KICKME;
 182                                return NULL;
 183                        }
 184                        skb = skb_pool_get(d);
 185                        if (skb) {
 186                                skb_pool_put(d, f->skb);
 187                                f->skb = skb;
 188                                goto gotone;
 189                        }
 190                        (*t)->dataref++;
 191                        if ((*t)->nout == 0)
 192                                d->flags |= DEVFL_KICKME;
 193                }
 194                if (t == d->tgt)        /* we've looped and found nada */
 195                        break;
 196                t++;
 197                if (t >= &d->targets[NTARGETS] || !*t)
 198                        t = d->targets;
 199        }
 200        return NULL;
 201}
 202
 203static int
 204aoecmd_ata_rw(struct aoedev *d)
 205{
 206        struct frame *f;
 207        struct aoe_hdr *h;
 208        struct aoe_atahdr *ah;
 209        struct buf *buf;
 210        struct bio_vec *bv;
 211        struct aoetgt *t;
 212        struct sk_buff *skb;
 213        ulong bcnt;
 214        char writebit, extbit;
 215
 216        writebit = 0x10;
 217        extbit = 0x4;
 218
 219        f = freeframe(d);
 220        if (f == NULL)
 221                return 0;
 222        t = *d->tgt;
 223        buf = d->inprocess;
 224        bv = buf->bv;
 225        bcnt = t->ifp->maxbcnt;
 226        if (bcnt == 0)
 227                bcnt = DEFAULTBCNT;
 228        if (bcnt > buf->bv_resid)
 229                bcnt = buf->bv_resid;
 230        /* initialize the headers & frame */
 231        skb = f->skb;
 232        h = (struct aoe_hdr *) skb_mac_header(skb);
 233        ah = (struct aoe_atahdr *) (h+1);
 234        skb_put(skb, sizeof *h + sizeof *ah);
 235        memset(h, 0, skb->len);
 236        f->tag = aoehdr_atainit(d, t, h);
 237        t->nout++;
 238        f->waited = 0;
 239        f->buf = buf;
 240        f->bufaddr = page_address(bv->bv_page) + buf->bv_off;
 241        f->bcnt = bcnt;
 242        f->lba = buf->sector;
 243
 244        /* set up ata header */
 245        ah->scnt = bcnt >> 9;
 246        put_lba(ah, buf->sector);
 247        if (d->flags & DEVFL_EXT) {
 248                ah->aflags |= AOEAFL_EXT;
 249        } else {
 250                extbit = 0;
 251                ah->lba3 &= 0x0f;
 252                ah->lba3 |= 0xe0;       /* LBA bit + obsolete 0xa0 */
 253        }
 254        if (bio_data_dir(buf->bio) == WRITE) {
 255                skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt);
 256                ah->aflags |= AOEAFL_WRITE;
 257                skb->len += bcnt;
 258                skb->data_len = bcnt;
 259                t->wpkts++;
 260        } else {
 261                t->rpkts++;
 262                writebit = 0;
 263        }
 264
 265        ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
 266
 267        /* mark all tracking fields and load out */
 268        buf->nframesout += 1;
 269        buf->bv_off += bcnt;
 270        buf->bv_resid -= bcnt;
 271        buf->resid -= bcnt;
 272        buf->sector += bcnt >> 9;
 273        if (buf->resid == 0) {
 274                d->inprocess = NULL;
 275        } else if (buf->bv_resid == 0) {
 276                buf->bv = ++bv;
 277                buf->bv_resid = bv->bv_len;
 278                WARN_ON(buf->bv_resid == 0);
 279                buf->bv_off = bv->bv_offset;
 280        }
 281
 282        skb->dev = t->ifp->nd;
 283        skb = skb_clone(skb, GFP_ATOMIC);
 284        if (skb)
 285                __skb_queue_tail(&d->sendq, skb);
 286        return 1;
 287}
 288
 289/* some callers cannot sleep, and they can call this function,
 290 * transmitting the packets later, when interrupts are on
 291 */
 292static void
 293aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *queue)
 294{
 295        struct aoe_hdr *h;
 296        struct aoe_cfghdr *ch;
 297        struct sk_buff *skb;
 298        struct net_device *ifp;
 299
 300        rcu_read_lock();
 301        for_each_netdev_rcu(&init_net, ifp) {
 302                dev_hold(ifp);
 303                if (!is_aoe_netif(ifp))
 304                        goto cont;
 305
 306                skb = new_skb(sizeof *h + sizeof *ch);
 307                if (skb == NULL) {
 308                        printk(KERN_INFO "aoe: skb alloc failure\n");
 309                        goto cont;
 310                }
 311                skb_put(skb, sizeof *h + sizeof *ch);
 312                skb->dev = ifp;
 313                __skb_queue_tail(queue, skb);
 314                h = (struct aoe_hdr *) skb_mac_header(skb);
 315                memset(h, 0, sizeof *h + sizeof *ch);
 316
 317                memset(h->dst, 0xff, sizeof h->dst);
 318                memcpy(h->src, ifp->dev_addr, sizeof h->src);
 319                h->type = __constant_cpu_to_be16(ETH_P_AOE);
 320                h->verfl = AOE_HVER;
 321                h->major = cpu_to_be16(aoemajor);
 322                h->minor = aoeminor;
 323                h->cmd = AOECMD_CFG;
 324
 325cont:
 326                dev_put(ifp);
 327        }
 328        rcu_read_unlock();
 329}
 330
 331static void
 332resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
 333{
 334        struct sk_buff *skb;
 335        struct aoe_hdr *h;
 336        struct aoe_atahdr *ah;
 337        char buf[128];
 338        u32 n;
 339
 340        ifrotate(t);
 341        n = newtag(t);
 342        skb = f->skb;
 343        h = (struct aoe_hdr *) skb_mac_header(skb);
 344        ah = (struct aoe_atahdr *) (h+1);
 345
 346        snprintf(buf, sizeof buf,
 347                "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
 348                "retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
 349                h->src, h->dst, t->nout);
 350        aoechr_error(buf);
 351
 352        f->tag = n;
 353        h->tag = cpu_to_be32(n);
 354        memcpy(h->dst, t->addr, sizeof h->dst);
 355        memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
 356
 357        switch (ah->cmdstat) {
 358        default:
 359                break;
 360        case ATA_CMD_PIO_READ:
 361        case ATA_CMD_PIO_READ_EXT:
 362        case ATA_CMD_PIO_WRITE:
 363        case ATA_CMD_PIO_WRITE_EXT:
 364                put_lba(ah, f->lba);
 365
 366                n = f->bcnt;
 367                if (n > DEFAULTBCNT)
 368                        n = DEFAULTBCNT;
 369                ah->scnt = n >> 9;
 370                if (ah->aflags & AOEAFL_WRITE) {
 371                        skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
 372                                offset_in_page(f->bufaddr), n);
 373                        skb->len = sizeof *h + sizeof *ah + n;
 374                        skb->data_len = n;
 375                }
 376        }
 377        skb->dev = t->ifp->nd;
 378        skb = skb_clone(skb, GFP_ATOMIC);
 379        if (skb == NULL)
 380                return;
 381        __skb_queue_tail(&d->sendq, skb);
 382}
 383
 384static int
 385tsince(int tag)
 386{
 387        int n;
 388
 389        n = jiffies & 0xffff;
 390        n -= tag & 0xffff;
 391        if (n < 0)
 392                n += 1<<16;
 393        return n;
 394}
 395
 396static struct aoeif *
 397getif(struct aoetgt *t, struct net_device *nd)
 398{
 399        struct aoeif *p, *e;
 400
 401        p = t->ifs;
 402        e = p + NAOEIFS;
 403        for (; p < e; p++)
 404                if (p->nd == nd)
 405                        return p;
 406        return NULL;
 407}
 408
 409static struct aoeif *
 410addif(struct aoetgt *t, struct net_device *nd)
 411{
 412        struct aoeif *p;
 413
 414        p = getif(t, NULL);
 415        if (!p)
 416                return NULL;
 417        p->nd = nd;
 418        p->maxbcnt = DEFAULTBCNT;
 419        p->lost = 0;
 420        p->lostjumbo = 0;
 421        return p;
 422}
 423
 424static void
 425ejectif(struct aoetgt *t, struct aoeif *ifp)
 426{
 427        struct aoeif *e;
 428        ulong n;
 429
 430        e = t->ifs + NAOEIFS - 1;
 431        n = (e - ifp) * sizeof *ifp;
 432        memmove(ifp, ifp+1, n);
 433        e->nd = NULL;
 434}
 435
 436static int
 437sthtith(struct aoedev *d)
 438{
 439        struct frame *f, *e, *nf;
 440        struct sk_buff *skb;
 441        struct aoetgt *ht = *d->htgt;
 442
 443        f = ht->frames;
 444        e = f + ht->nframes;
 445        for (; f < e; f++) {
 446                if (f->tag == FREETAG)
 447                        continue;
 448                nf = freeframe(d);
 449                if (!nf)
 450                        return 0;
 451                skb = nf->skb;
 452                *nf = *f;
 453                f->skb = skb;
 454                f->tag = FREETAG;
 455                nf->waited = 0;
 456                ht->nout--;
 457                (*d->tgt)->nout++;
 458                resend(d, *d->tgt, nf);
 459        }
 460        /* he's clean, he's useless.  take away his interfaces */
 461        memset(ht->ifs, 0, sizeof ht->ifs);
 462        d->htgt = NULL;
 463        return 1;
 464}
 465
 466static inline unsigned char
 467ata_scnt(unsigned char *packet) {
 468        struct aoe_hdr *h;
 469        struct aoe_atahdr *ah;
 470
 471        h = (struct aoe_hdr *) packet;
 472        ah = (struct aoe_atahdr *) (h+1);
 473        return ah->scnt;
 474}
 475
 476static void
 477rexmit_timer(ulong vp)
 478{
 479        struct sk_buff_head queue;
 480        struct aoedev *d;
 481        struct aoetgt *t, **tt, **te;
 482        struct aoeif *ifp;
 483        struct frame *f, *e;
 484        register long timeout;
 485        ulong flags, n;
 486
 487        d = (struct aoedev *) vp;
 488
 489        /* timeout is always ~150% of the moving average */
 490        timeout = d->rttavg;
 491        timeout += timeout >> 1;
 492
 493        spin_lock_irqsave(&d->lock, flags);
 494
 495        if (d->flags & DEVFL_TKILL) {
 496                spin_unlock_irqrestore(&d->lock, flags);
 497                return;
 498        }
 499        tt = d->targets;
 500        te = tt + NTARGETS;
 501        for (; tt < te && *tt; tt++) {
 502                t = *tt;
 503                f = t->frames;
 504                e = f + t->nframes;
 505                for (; f < e; f++) {
 506                        if (f->tag == FREETAG
 507                        || tsince(f->tag) < timeout)
 508                                continue;
 509                        n = f->waited += timeout;
 510                        n /= HZ;
 511                        if (n > aoe_deadsecs) {
 512                                /* waited too long.  device failure. */
 513                                aoedev_downdev(d);
 514                                break;
 515                        }
 516
 517                        if (n > HELPWAIT /* see if another target can help */
 518                        && (tt != d->targets || d->targets[1]))
 519                                d->htgt = tt;
 520
 521                        if (t->nout == t->maxout) {
 522                                if (t->maxout > 1)
 523                                        t->maxout--;
 524                                t->lastwadj = jiffies;
 525                        }
 526
 527                        ifp = getif(t, f->skb->dev);
 528                        if (ifp && ++ifp->lost > (t->nframes << 1)
 529                        && (ifp != t->ifs || t->ifs[1].nd)) {
 530                                ejectif(t, ifp);
 531                                ifp = NULL;
 532                        }
 533
 534                        if (ata_scnt(skb_mac_header(f->skb)) > DEFAULTBCNT / 512
 535                        && ifp && ++ifp->lostjumbo > (t->nframes << 1)
 536                        && ifp->maxbcnt != DEFAULTBCNT) {
 537                                printk(KERN_INFO
 538                                        "aoe: e%ld.%d: "
 539                                        "too many lost jumbo on "
 540                                        "%s:%pm - "
 541                                        "falling back to %d frames.\n",
 542                                        d->aoemajor, d->aoeminor,
 543                                        ifp->nd->name, t->addr,
 544                                        DEFAULTBCNT);
 545                                ifp->maxbcnt = 0;
 546                        }
 547                        resend(d, t, f);
 548                }
 549
 550                /* window check */
 551                if (t->nout == t->maxout
 552                && t->maxout < t->nframes
 553                && (jiffies - t->lastwadj)/HZ > 10) {
 554                        t->maxout++;
 555                        t->lastwadj = jiffies;
 556                }
 557        }
 558
 559        if (!skb_queue_empty(&d->sendq)) {
 560                n = d->rttavg <<= 1;
 561                if (n > MAXTIMER)
 562                        d->rttavg = MAXTIMER;
 563        }
 564
 565        if (d->flags & DEVFL_KICKME || d->htgt) {
 566                d->flags &= ~DEVFL_KICKME;
 567                aoecmd_work(d);
 568        }
 569
 570        __skb_queue_head_init(&queue);
 571        skb_queue_splice_init(&d->sendq, &queue);
 572
 573        d->timer.expires = jiffies + TIMERTICK;
 574        add_timer(&d->timer);
 575
 576        spin_unlock_irqrestore(&d->lock, flags);
 577
 578        aoenet_xmit(&queue);
 579}
 580
 581/* enters with d->lock held */
 582void
 583aoecmd_work(struct aoedev *d)
 584{
 585        struct buf *buf;
 586loop:
 587        if (d->htgt && !sthtith(d))
 588                return;
 589        if (d->inprocess == NULL) {
 590                if (list_empty(&d->bufq))
 591                        return;
 592                buf = container_of(d->bufq.next, struct buf, bufs);
 593                list_del(d->bufq.next);
 594                d->inprocess = buf;
 595        }
 596        if (aoecmd_ata_rw(d))
 597                goto loop;
 598}
 599
 600/* this function performs work that has been deferred until sleeping is OK
 601 */
 602void
 603aoecmd_sleepwork(struct work_struct *work)
 604{
 605        struct aoedev *d = container_of(work, struct aoedev, work);
 606
 607        if (d->flags & DEVFL_GDALLOC)
 608                aoeblk_gdalloc(d);
 609
 610        if (d->flags & DEVFL_NEWSIZE) {
 611                struct block_device *bd;
 612                unsigned long flags;
 613                u64 ssize;
 614
 615                ssize = get_capacity(d->gd);
 616                bd = bdget_disk(d->gd, 0);
 617
 618                if (bd) {
 619                        mutex_lock(&bd->bd_inode->i_mutex);
 620                        i_size_write(bd->bd_inode, (loff_t)ssize<<9);
 621                        mutex_unlock(&bd->bd_inode->i_mutex);
 622                        bdput(bd);
 623                }
 624                spin_lock_irqsave(&d->lock, flags);
 625                d->flags |= DEVFL_UP;
 626                d->flags &= ~DEVFL_NEWSIZE;
 627                spin_unlock_irqrestore(&d->lock, flags);
 628        }
 629}
 630
 631static void
 632ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
 633{
 634        u64 ssize;
 635        u16 n;
 636
 637        /* word 83: command set supported */
 638        n = get_unaligned_le16(&id[83 << 1]);
 639
 640        /* word 86: command set/feature enabled */
 641        n |= get_unaligned_le16(&id[86 << 1]);
 642
 643        if (n & (1<<10)) {      /* bit 10: LBA 48 */
 644                d->flags |= DEVFL_EXT;
 645
 646                /* word 100: number lba48 sectors */
 647                ssize = get_unaligned_le64(&id[100 << 1]);
 648
 649                /* set as in ide-disk.c:init_idedisk_capacity */
 650                d->geo.cylinders = ssize;
 651                d->geo.cylinders /= (255 * 63);
 652                d->geo.heads = 255;
 653                d->geo.sectors = 63;
 654        } else {
 655                d->flags &= ~DEVFL_EXT;
 656
 657                /* number lba28 sectors */
 658                ssize = get_unaligned_le32(&id[60 << 1]);
 659
 660                /* NOTE: obsolete in ATA 6 */
 661                d->geo.cylinders = get_unaligned_le16(&id[54 << 1]);
 662                d->geo.heads = get_unaligned_le16(&id[55 << 1]);
 663                d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
 664        }
 665
 666        if (d->ssize != ssize)
 667                printk(KERN_INFO
 668                        "aoe: %pm e%ld.%d v%04x has %llu sectors\n",
 669                        t->addr,
 670                        d->aoemajor, d->aoeminor,
 671                        d->fw_ver, (long long)ssize);
 672        d->ssize = ssize;
 673        d->geo.start = 0;
 674        if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
 675                return;
 676        if (d->gd != NULL) {
 677                set_capacity(d->gd, ssize);
 678                d->flags |= DEVFL_NEWSIZE;
 679        } else
 680                d->flags |= DEVFL_GDALLOC;
 681        schedule_work(&d->work);
 682}
 683
 684static void
 685calc_rttavg(struct aoedev *d, int rtt)
 686{
 687        register long n;
 688
 689        n = rtt;
 690        if (n < 0) {
 691                n = -rtt;
 692                if (n < MINTIMER)
 693                        n = MINTIMER;
 694                else if (n > MAXTIMER)
 695                        n = MAXTIMER;
 696                d->mintimer += (n - d->mintimer) >> 1;
 697        } else if (n < d->mintimer)
 698                n = d->mintimer;
 699        else if (n > MAXTIMER)
 700                n = MAXTIMER;
 701
 702        /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
 703        n -= d->rttavg;
 704        d->rttavg += n >> 2;
 705}
 706
 707static struct aoetgt *
 708gettgt(struct aoedev *d, char *addr)
 709{
 710        struct aoetgt **t, **e;
 711
 712        t = d->targets;
 713        e = t + NTARGETS;
 714        for (; t < e && *t; t++)
 715                if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
 716                        return *t;
 717        return NULL;
 718}
 719
 720static inline void
 721diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector)
 722{
 723        unsigned long n_sect = bio->bi_size >> 9;
 724        const int rw = bio_data_dir(bio);
 725        struct hd_struct *part;
 726        int cpu;
 727
 728        cpu = part_stat_lock();
 729        part = disk_map_sector_rcu(disk, sector);
 730
 731        part_stat_inc(cpu, part, ios[rw]);
 732        part_stat_add(cpu, part, ticks[rw], duration);
 733        part_stat_add(cpu, part, sectors[rw], n_sect);
 734        part_stat_add(cpu, part, io_ticks, duration);
 735
 736        part_stat_unlock();
 737}
 738
 739void
 740aoecmd_ata_rsp(struct sk_buff *skb)
 741{
 742        struct sk_buff_head queue;
 743        struct aoedev *d;
 744        struct aoe_hdr *hin, *hout;
 745        struct aoe_atahdr *ahin, *ahout;
 746        struct frame *f;
 747        struct buf *buf;
 748        struct aoetgt *t;
 749        struct aoeif *ifp;
 750        register long n;
 751        ulong flags;
 752        char ebuf[128];
 753        u16 aoemajor;
 754
 755        hin = (struct aoe_hdr *) skb_mac_header(skb);
 756        aoemajor = get_unaligned_be16(&hin->major);
 757        d = aoedev_by_aoeaddr(aoemajor, hin->minor);
 758        if (d == NULL) {
 759                snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
 760                        "for unknown device %d.%d\n",
 761                         aoemajor, hin->minor);
 762                aoechr_error(ebuf);
 763                return;
 764        }
 765
 766        spin_lock_irqsave(&d->lock, flags);
 767
 768        n = get_unaligned_be32(&hin->tag);
 769        t = gettgt(d, hin->src);
 770        if (t == NULL) {
 771                printk(KERN_INFO "aoe: can't find target e%ld.%d:%pm\n",
 772                        d->aoemajor, d->aoeminor, hin->src);
 773                spin_unlock_irqrestore(&d->lock, flags);
 774                return;
 775        }
 776        f = getframe(t, n);
 777        if (f == NULL) {
 778                calc_rttavg(d, -tsince(n));
 779                spin_unlock_irqrestore(&d->lock, flags);
 780                snprintf(ebuf, sizeof ebuf,
 781                        "%15s e%d.%d    tag=%08x@%08lx\n",
 782                        "unexpected rsp",
 783                        get_unaligned_be16(&hin->major),
 784                        hin->minor,
 785                        get_unaligned_be32(&hin->tag),
 786                        jiffies);
 787                aoechr_error(ebuf);
 788                return;
 789        }
 790
 791        calc_rttavg(d, tsince(f->tag));
 792
 793        ahin = (struct aoe_atahdr *) (hin+1);
 794        hout = (struct aoe_hdr *) skb_mac_header(f->skb);
 795        ahout = (struct aoe_atahdr *) (hout+1);
 796        buf = f->buf;
 797
 798        if (ahin->cmdstat & 0xa9) {     /* these bits cleared on success */
 799                printk(KERN_ERR
 800                        "aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
 801                        ahout->cmdstat, ahin->cmdstat,
 802                        d->aoemajor, d->aoeminor);
 803                if (buf)
 804                        buf->flags |= BUFFL_FAIL;
 805        } else {
 806                if (d->htgt && t == *d->htgt) /* I'll help myself, thank you. */
 807                        d->htgt = NULL;
 808                n = ahout->scnt << 9;
 809                switch (ahout->cmdstat) {
 810                case ATA_CMD_PIO_READ:
 811                case ATA_CMD_PIO_READ_EXT:
 812                        if (skb->len - sizeof *hin - sizeof *ahin < n) {
 813                                printk(KERN_ERR
 814                                        "aoe: %s.  skb->len=%d need=%ld\n",
 815                                        "runt data size in read", skb->len, n);
 816                                /* fail frame f?  just returning will rexmit. */
 817                                spin_unlock_irqrestore(&d->lock, flags);
 818                                return;
 819                        }
 820                        memcpy(f->bufaddr, ahin+1, n);
 821                case ATA_CMD_PIO_WRITE:
 822                case ATA_CMD_PIO_WRITE_EXT:
 823                        ifp = getif(t, skb->dev);
 824                        if (ifp) {
 825                                ifp->lost = 0;
 826                                if (n > DEFAULTBCNT)
 827                                        ifp->lostjumbo = 0;
 828                        }
 829                        if (f->bcnt -= n) {
 830                                f->lba += n >> 9;
 831                                f->bufaddr += n;
 832                                resend(d, t, f);
 833                                goto xmit;
 834                        }
 835                        break;
 836                case ATA_CMD_ID_ATA:
 837                        if (skb->len - sizeof *hin - sizeof *ahin < 512) {
 838                                printk(KERN_INFO
 839                                        "aoe: runt data size in ataid.  skb->len=%d\n",
 840                                        skb->len);
 841                                spin_unlock_irqrestore(&d->lock, flags);
 842                                return;
 843                        }
 844                        ataid_complete(d, t, (char *) (ahin+1));
 845                        break;
 846                default:
 847                        printk(KERN_INFO
 848                                "aoe: unrecognized ata command %2.2Xh for %d.%d\n",
 849                                ahout->cmdstat,
 850                                get_unaligned_be16(&hin->major),
 851                                hin->minor);
 852                }
 853        }
 854
 855        if (buf && --buf->nframesout == 0 && buf->resid == 0) {
 856                diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector);
 857                if (buf->flags & BUFFL_FAIL)
 858                        bio_endio(buf->bio, -EIO);
 859                else {
 860                        bio_flush_dcache_pages(buf->bio);
 861                        bio_endio(buf->bio, 0);
 862                }
 863                mempool_free(buf, d->bufpool);
 864        }
 865
 866        f->buf = NULL;
 867        f->tag = FREETAG;
 868        t->nout--;
 869
 870        aoecmd_work(d);
 871xmit:
 872        __skb_queue_head_init(&queue);
 873        skb_queue_splice_init(&d->sendq, &queue);
 874
 875        spin_unlock_irqrestore(&d->lock, flags);
 876        aoenet_xmit(&queue);
 877}
 878
 879void
 880aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
 881{
 882        struct sk_buff_head queue;
 883
 884        __skb_queue_head_init(&queue);
 885        aoecmd_cfg_pkts(aoemajor, aoeminor, &queue);
 886        aoenet_xmit(&queue);
 887}
 888 
 889struct sk_buff *
 890aoecmd_ata_id(struct aoedev *d)
 891{
 892        struct aoe_hdr *h;
 893        struct aoe_atahdr *ah;
 894        struct frame *f;
 895        struct sk_buff *skb;
 896        struct aoetgt *t;
 897
 898        f = freeframe(d);
 899        if (f == NULL)
 900                return NULL;
 901
 902        t = *d->tgt;
 903
 904        /* initialize the headers & frame */
 905        skb = f->skb;
 906        h = (struct aoe_hdr *) skb_mac_header(skb);
 907        ah = (struct aoe_atahdr *) (h+1);
 908        skb_put(skb, sizeof *h + sizeof *ah);
 909        memset(h, 0, skb->len);
 910        f->tag = aoehdr_atainit(d, t, h);
 911        t->nout++;
 912        f->waited = 0;
 913
 914        /* set up ata header */
 915        ah->scnt = 1;
 916        ah->cmdstat = ATA_CMD_ID_ATA;
 917        ah->lba3 = 0xa0;
 918
 919        skb->dev = t->ifp->nd;
 920
 921        d->rttavg = MAXTIMER;
 922        d->timer.function = rexmit_timer;
 923
 924        return skb_clone(skb, GFP_ATOMIC);
 925}
 926 
 927static struct aoetgt *
 928addtgt(struct aoedev *d, char *addr, ulong nframes)
 929{
 930        struct aoetgt *t, **tt, **te;
 931        struct frame *f, *e;
 932
 933        tt = d->targets;
 934        te = tt + NTARGETS;
 935        for (; tt < te && *tt; tt++)
 936                ;
 937
 938        if (tt == te) {
 939                printk(KERN_INFO
 940                        "aoe: device addtgt failure; too many targets\n");
 941                return NULL;
 942        }
 943        t = kcalloc(1, sizeof *t, GFP_ATOMIC);
 944        f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
 945        if (!t || !f) {
 946                kfree(f);
 947                kfree(t);
 948                printk(KERN_INFO "aoe: cannot allocate memory to add target\n");
 949                return NULL;
 950        }
 951
 952        t->nframes = nframes;
 953        t->frames = f;
 954        e = f + nframes;
 955        for (; f < e; f++)
 956                f->tag = FREETAG;
 957        memcpy(t->addr, addr, sizeof t->addr);
 958        t->ifp = t->ifs;
 959        t->maxout = t->nframes;
 960        return *tt = t;
 961}
 962
 963void
 964aoecmd_cfg_rsp(struct sk_buff *skb)
 965{
 966        struct aoedev *d;
 967        struct aoe_hdr *h;
 968        struct aoe_cfghdr *ch;
 969        struct aoetgt *t;
 970        struct aoeif *ifp;
 971        ulong flags, sysminor, aoemajor;
 972        struct sk_buff *sl;
 973        u16 n;
 974
 975        h = (struct aoe_hdr *) skb_mac_header(skb);
 976        ch = (struct aoe_cfghdr *) (h+1);
 977
 978        /*
 979         * Enough people have their dip switches set backwards to
 980         * warrant a loud message for this special case.
 981         */
 982        aoemajor = get_unaligned_be16(&h->major);
 983        if (aoemajor == 0xfff) {
 984                printk(KERN_ERR "aoe: Warning: shelf address is all ones.  "
 985                        "Check shelf dip switches.\n");
 986                return;
 987        }
 988
 989        sysminor = SYSMINOR(aoemajor, h->minor);
 990        if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
 991                printk(KERN_INFO "aoe: e%ld.%d: minor number too large\n",
 992                        aoemajor, (int) h->minor);
 993                return;
 994        }
 995
 996        n = be16_to_cpu(ch->bufcnt);
 997        if (n > aoe_maxout)     /* keep it reasonable */
 998                n = aoe_maxout;
 999
1000        d = aoedev_by_sysminor_m(sysminor);
1001        if (d == NULL) {
1002                printk(KERN_INFO "aoe: device sysminor_m failure\n");
1003                return;
1004        }
1005
1006        spin_lock_irqsave(&d->lock, flags);
1007
1008        t = gettgt(d, h->src);
1009        if (!t) {
1010                t = addtgt(d, h->src, n);
1011                if (!t) {
1012                        spin_unlock_irqrestore(&d->lock, flags);
1013                        return;
1014                }
1015        }
1016        ifp = getif(t, skb->dev);
1017        if (!ifp) {
1018                ifp = addif(t, skb->dev);
1019                if (!ifp) {
1020                        printk(KERN_INFO
1021                                "aoe: device addif failure; "
1022                                "too many interfaces?\n");
1023                        spin_unlock_irqrestore(&d->lock, flags);
1024                        return;
1025                }
1026        }
1027        if (ifp->maxbcnt) {
1028                n = ifp->nd->mtu;
1029                n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr);
1030                n /= 512;
1031                if (n > ch->scnt)
1032                        n = ch->scnt;
1033                n = n ? n * 512 : DEFAULTBCNT;
1034                if (n != ifp->maxbcnt) {
1035                        printk(KERN_INFO
1036                                "aoe: e%ld.%d: setting %d%s%s:%pm\n",
1037                                d->aoemajor, d->aoeminor, n,
1038                                " byte data frames on ", ifp->nd->name,
1039                                t->addr);
1040                        ifp->maxbcnt = n;
1041                }
1042        }
1043
1044        /* don't change users' perspective */
1045        if (d->nopen) {
1046                spin_unlock_irqrestore(&d->lock, flags);
1047                return;
1048        }
1049        d->fw_ver = be16_to_cpu(ch->fwver);
1050
1051        sl = aoecmd_ata_id(d);
1052
1053        spin_unlock_irqrestore(&d->lock, flags);
1054
1055        if (sl) {
1056                struct sk_buff_head queue;
1057                __skb_queue_head_init(&queue);
1058                __skb_queue_tail(&queue, sl);
1059                aoenet_xmit(&queue);
1060        }
1061}
1062
1063void
1064aoecmd_cleanslate(struct aoedev *d)
1065{
1066        struct aoetgt **t, **te;
1067        struct aoeif *p, *e;
1068
1069        d->mintimer = MINTIMER;
1070
1071        t = d->targets;
1072        te = t + NTARGETS;
1073        for (; t < te && *t; t++) {
1074                (*t)->maxout = (*t)->nframes;
1075                p = (*t)->ifs;
1076                e = p + NAOEIFS;
1077                for (; p < e; p++) {
1078                        p->lostjumbo = 0;
1079                        p->lost = 0;
1080                        p->maxbcnt = DEFAULTBCNT;
1081                }
1082        }
1083}
1084