linux/drivers/net/cxgb3/sge.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/skbuff.h>
  33#include <linux/netdevice.h>
  34#include <linux/etherdevice.h>
  35#include <linux/if_vlan.h>
  36#include <linux/ip.h>
  37#include <linux/tcp.h>
  38#include <linux/dma-mapping.h>
  39#include <linux/slab.h>
  40#include <net/arp.h>
  41#include "common.h"
  42#include "regs.h"
  43#include "sge_defs.h"
  44#include "t3_cpl.h"
  45#include "firmware_exports.h"
  46#include "cxgb3_offload.h"
  47
  48#define USE_GTS 0
  49
  50#define SGE_RX_SM_BUF_SIZE 1536
  51
  52#define SGE_RX_COPY_THRES  256
  53#define SGE_RX_PULL_LEN    128
  54
  55#define SGE_PG_RSVD SMP_CACHE_BYTES
  56/*
  57 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
  58 * It must be a divisor of PAGE_SIZE.  If set to 0 FL0 will use sk_buffs
  59 * directly.
  60 */
  61#define FL0_PG_CHUNK_SIZE  2048
  62#define FL0_PG_ORDER 0
  63#define FL0_PG_ALLOC_SIZE (PAGE_SIZE << FL0_PG_ORDER)
  64#define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
  65#define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
  66#define FL1_PG_ALLOC_SIZE (PAGE_SIZE << FL1_PG_ORDER)
  67
  68#define SGE_RX_DROP_THRES 16
  69#define RX_RECLAIM_PERIOD (HZ/4)
  70
  71/*
  72 * Max number of Rx buffers we replenish at a time.
  73 */
  74#define MAX_RX_REFILL 16U
  75/*
  76 * Period of the Tx buffer reclaim timer.  This timer does not need to run
  77 * frequently as Tx buffers are usually reclaimed by new Tx packets.
  78 */
  79#define TX_RECLAIM_PERIOD (HZ / 4)
  80#define TX_RECLAIM_TIMER_CHUNK 64U
  81#define TX_RECLAIM_CHUNK 16U
  82
  83/* WR size in bytes */
  84#define WR_LEN (WR_FLITS * 8)
  85
  86/*
  87 * Types of Tx queues in each queue set.  Order here matters, do not change.
  88 */
  89enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
  90
  91/* Values for sge_txq.flags */
  92enum {
  93        TXQ_RUNNING = 1 << 0,   /* fetch engine is running */
  94        TXQ_LAST_PKT_DB = 1 << 1,       /* last packet rang the doorbell */
  95};
  96
  97struct tx_desc {
  98        __be64 flit[TX_DESC_FLITS];
  99};
 100
 101struct rx_desc {
 102        __be32 addr_lo;
 103        __be32 len_gen;
 104        __be32 gen2;
 105        __be32 addr_hi;
 106};
 107
 108struct tx_sw_desc {             /* SW state per Tx descriptor */
 109        struct sk_buff *skb;
 110        u8 eop;       /* set if last descriptor for packet */
 111        u8 addr_idx;  /* buffer index of first SGL entry in descriptor */
 112        u8 fragidx;   /* first page fragment associated with descriptor */
 113        s8 sflit;     /* start flit of first SGL entry in descriptor */
 114};
 115
 116struct rx_sw_desc {                /* SW state per Rx descriptor */
 117        union {
 118                struct sk_buff *skb;
 119                struct fl_pg_chunk pg_chunk;
 120        };
 121        DEFINE_DMA_UNMAP_ADDR(dma_addr);
 122};
 123
 124struct rsp_desc {               /* response queue descriptor */
 125        struct rss_header rss_hdr;
 126        __be32 flags;
 127        __be32 len_cq;
 128        u8 imm_data[47];
 129        u8 intr_gen;
 130};
 131
 132/*
 133 * Holds unmapping information for Tx packets that need deferred unmapping.
 134 * This structure lives at skb->head and must be allocated by callers.
 135 */
 136struct deferred_unmap_info {
 137        struct pci_dev *pdev;
 138        dma_addr_t addr[MAX_SKB_FRAGS + 1];
 139};
 140
 141/*
 142 * Maps a number of flits to the number of Tx descriptors that can hold them.
 143 * The formula is
 144 *
 145 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
 146 *
 147 * HW allows up to 4 descriptors to be combined into a WR.
 148 */
 149static u8 flit_desc_map[] = {
 150        0,
 151#if SGE_NUM_GENBITS == 1
 152        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 153        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 154        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 155        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
 156#elif SGE_NUM_GENBITS == 2
 157        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 158        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 159        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 160        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 161#else
 162# error "SGE_NUM_GENBITS must be 1 or 2"
 163#endif
 164};
 165
 166static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
 167{
 168        return container_of(q, struct sge_qset, fl[qidx]);
 169}
 170
 171static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
 172{
 173        return container_of(q, struct sge_qset, rspq);
 174}
 175
 176static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
 177{
 178        return container_of(q, struct sge_qset, txq[qidx]);
 179}
 180
 181/**
 182 *      refill_rspq - replenish an SGE response queue
 183 *      @adapter: the adapter
 184 *      @q: the response queue to replenish
 185 *      @credits: how many new responses to make available
 186 *
 187 *      Replenishes a response queue by making the supplied number of responses
 188 *      available to HW.
 189 */
 190static inline void refill_rspq(struct adapter *adapter,
 191                               const struct sge_rspq *q, unsigned int credits)
 192{
 193        rmb();
 194        t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
 195                     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 196}
 197
 198/**
 199 *      need_skb_unmap - does the platform need unmapping of sk_buffs?
 200 *
 201 *      Returns true if the platform needs sk_buff unmapping.  The compiler
 202 *      optimizes away unecessary code if this returns true.
 203 */
 204static inline int need_skb_unmap(void)
 205{
 206#ifdef CONFIG_NEED_DMA_MAP_STATE
 207        return 1;
 208#else
 209        return 0;
 210#endif
 211}
 212
 213/**
 214 *      unmap_skb - unmap a packet main body and its page fragments
 215 *      @skb: the packet
 216 *      @q: the Tx queue containing Tx descriptors for the packet
 217 *      @cidx: index of Tx descriptor
 218 *      @pdev: the PCI device
 219 *
 220 *      Unmap the main body of an sk_buff and its page fragments, if any.
 221 *      Because of the fairly complicated structure of our SGLs and the desire
 222 *      to conserve space for metadata, the information necessary to unmap an
 223 *      sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
 224 *      descriptors (the physical addresses of the various data buffers), and
 225 *      the SW descriptor state (assorted indices).  The send functions
 226 *      initialize the indices for the first packet descriptor so we can unmap
 227 *      the buffers held in the first Tx descriptor here, and we have enough
 228 *      information at this point to set the state for the next Tx descriptor.
 229 *
 230 *      Note that it is possible to clean up the first descriptor of a packet
 231 *      before the send routines have written the next descriptors, but this
 232 *      race does not cause any problem.  We just end up writing the unmapping
 233 *      info for the descriptor first.
 234 */
 235static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
 236                             unsigned int cidx, struct pci_dev *pdev)
 237{
 238        const struct sg_ent *sgp;
 239        struct tx_sw_desc *d = &q->sdesc[cidx];
 240        int nfrags, frag_idx, curflit, j = d->addr_idx;
 241
 242        sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
 243        frag_idx = d->fragidx;
 244
 245        if (frag_idx == 0 && skb_headlen(skb)) {
 246                pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
 247                                 skb_headlen(skb), PCI_DMA_TODEVICE);
 248                j = 1;
 249        }
 250
 251        curflit = d->sflit + 1 + j;
 252        nfrags = skb_shinfo(skb)->nr_frags;
 253
 254        while (frag_idx < nfrags && curflit < WR_FLITS) {
 255                pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
 256                               skb_shinfo(skb)->frags[frag_idx].size,
 257                               PCI_DMA_TODEVICE);
 258                j ^= 1;
 259                if (j == 0) {
 260                        sgp++;
 261                        curflit++;
 262                }
 263                curflit++;
 264                frag_idx++;
 265        }
 266
 267        if (frag_idx < nfrags) {   /* SGL continues into next Tx descriptor */
 268                d = cidx + 1 == q->size ? q->sdesc : d + 1;
 269                d->fragidx = frag_idx;
 270                d->addr_idx = j;
 271                d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
 272        }
 273}
 274
 275/**
 276 *      free_tx_desc - reclaims Tx descriptors and their buffers
 277 *      @adapter: the adapter
 278 *      @q: the Tx queue to reclaim descriptors from
 279 *      @n: the number of descriptors to reclaim
 280 *
 281 *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 282 *      Tx buffers.  Called with the Tx queue lock held.
 283 */
 284static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
 285                         unsigned int n)
 286{
 287        struct tx_sw_desc *d;
 288        struct pci_dev *pdev = adapter->pdev;
 289        unsigned int cidx = q->cidx;
 290
 291        const int need_unmap = need_skb_unmap() &&
 292                               q->cntxt_id >= FW_TUNNEL_SGEEC_START;
 293
 294        d = &q->sdesc[cidx];
 295        while (n--) {
 296                if (d->skb) {   /* an SGL is present */
 297                        if (need_unmap)
 298                                unmap_skb(d->skb, q, cidx, pdev);
 299                        if (d->eop) {
 300                                kfree_skb(d->skb);
 301                                d->skb = NULL;
 302                        }
 303                }
 304                ++d;
 305                if (++cidx == q->size) {
 306                        cidx = 0;
 307                        d = q->sdesc;
 308                }
 309        }
 310        q->cidx = cidx;
 311}
 312
 313/**
 314 *      reclaim_completed_tx - reclaims completed Tx descriptors
 315 *      @adapter: the adapter
 316 *      @q: the Tx queue to reclaim completed descriptors from
 317 *      @chunk: maximum number of descriptors to reclaim
 318 *
 319 *      Reclaims Tx descriptors that the SGE has indicated it has processed,
 320 *      and frees the associated buffers if possible.  Called with the Tx
 321 *      queue's lock held.
 322 */
 323static inline unsigned int reclaim_completed_tx(struct adapter *adapter,
 324                                                struct sge_txq *q,
 325                                                unsigned int chunk)
 326{
 327        unsigned int reclaim = q->processed - q->cleaned;
 328
 329        reclaim = min(chunk, reclaim);
 330        if (reclaim) {
 331                free_tx_desc(adapter, q, reclaim);
 332                q->cleaned += reclaim;
 333                q->in_use -= reclaim;
 334        }
 335        return q->processed - q->cleaned;
 336}
 337
 338/**
 339 *      should_restart_tx - are there enough resources to restart a Tx queue?
 340 *      @q: the Tx queue
 341 *
 342 *      Checks if there are enough descriptors to restart a suspended Tx queue.
 343 */
 344static inline int should_restart_tx(const struct sge_txq *q)
 345{
 346        unsigned int r = q->processed - q->cleaned;
 347
 348        return q->in_use - r < (q->size >> 1);
 349}
 350
 351static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q,
 352                          struct rx_sw_desc *d)
 353{
 354        if (q->use_pages && d->pg_chunk.page) {
 355                (*d->pg_chunk.p_cnt)--;
 356                if (!*d->pg_chunk.p_cnt)
 357                        pci_unmap_page(pdev,
 358                                       d->pg_chunk.mapping,
 359                                       q->alloc_size, PCI_DMA_FROMDEVICE);
 360
 361                put_page(d->pg_chunk.page);
 362                d->pg_chunk.page = NULL;
 363        } else {
 364                pci_unmap_single(pdev, dma_unmap_addr(d, dma_addr),
 365                                 q->buf_size, PCI_DMA_FROMDEVICE);
 366                kfree_skb(d->skb);
 367                d->skb = NULL;
 368        }
 369}
 370
 371/**
 372 *      free_rx_bufs - free the Rx buffers on an SGE free list
 373 *      @pdev: the PCI device associated with the adapter
 374 *      @rxq: the SGE free list to clean up
 375 *
 376 *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
 377 *      this queue should be stopped before calling this function.
 378 */
 379static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
 380{
 381        unsigned int cidx = q->cidx;
 382
 383        while (q->credits--) {
 384                struct rx_sw_desc *d = &q->sdesc[cidx];
 385
 386
 387                clear_rx_desc(pdev, q, d);
 388                if (++cidx == q->size)
 389                        cidx = 0;
 390        }
 391
 392        if (q->pg_chunk.page) {
 393                __free_pages(q->pg_chunk.page, q->order);
 394                q->pg_chunk.page = NULL;
 395        }
 396}
 397
 398/**
 399 *      add_one_rx_buf - add a packet buffer to a free-buffer list
 400 *      @va:  buffer start VA
 401 *      @len: the buffer length
 402 *      @d: the HW Rx descriptor to write
 403 *      @sd: the SW Rx descriptor to write
 404 *      @gen: the generation bit value
 405 *      @pdev: the PCI device associated with the adapter
 406 *
 407 *      Add a buffer of the given length to the supplied HW and SW Rx
 408 *      descriptors.
 409 */
 410static inline int add_one_rx_buf(void *va, unsigned int len,
 411                                 struct rx_desc *d, struct rx_sw_desc *sd,
 412                                 unsigned int gen, struct pci_dev *pdev)
 413{
 414        dma_addr_t mapping;
 415
 416        mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
 417        if (unlikely(pci_dma_mapping_error(pdev, mapping)))
 418                return -ENOMEM;
 419
 420        dma_unmap_addr_set(sd, dma_addr, mapping);
 421
 422        d->addr_lo = cpu_to_be32(mapping);
 423        d->addr_hi = cpu_to_be32((u64) mapping >> 32);
 424        wmb();
 425        d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
 426        d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
 427        return 0;
 428}
 429
 430static inline int add_one_rx_chunk(dma_addr_t mapping, struct rx_desc *d,
 431                                   unsigned int gen)
 432{
 433        d->addr_lo = cpu_to_be32(mapping);
 434        d->addr_hi = cpu_to_be32((u64) mapping >> 32);
 435        wmb();
 436        d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
 437        d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
 438        return 0;
 439}
 440
 441static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q,
 442                          struct rx_sw_desc *sd, gfp_t gfp,
 443                          unsigned int order)
 444{
 445        if (!q->pg_chunk.page) {
 446                dma_addr_t mapping;
 447
 448                q->pg_chunk.page = alloc_pages(gfp, order);
 449                if (unlikely(!q->pg_chunk.page))
 450                        return -ENOMEM;
 451                q->pg_chunk.va = page_address(q->pg_chunk.page);
 452                q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) -
 453                                    SGE_PG_RSVD;
 454                q->pg_chunk.offset = 0;
 455                mapping = pci_map_page(adapter->pdev, q->pg_chunk.page,
 456                                       0, q->alloc_size, PCI_DMA_FROMDEVICE);
 457                q->pg_chunk.mapping = mapping;
 458        }
 459        sd->pg_chunk = q->pg_chunk;
 460
 461        prefetch(sd->pg_chunk.p_cnt);
 462
 463        q->pg_chunk.offset += q->buf_size;
 464        if (q->pg_chunk.offset == (PAGE_SIZE << order))
 465                q->pg_chunk.page = NULL;
 466        else {
 467                q->pg_chunk.va += q->buf_size;
 468                get_page(q->pg_chunk.page);
 469        }
 470
 471        if (sd->pg_chunk.offset == 0)
 472                *sd->pg_chunk.p_cnt = 1;
 473        else
 474                *sd->pg_chunk.p_cnt += 1;
 475
 476        return 0;
 477}
 478
 479static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
 480{
 481        if (q->pend_cred >= q->credits / 4) {
 482                q->pend_cred = 0;
 483                wmb();
 484                t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
 485        }
 486}
 487
 488/**
 489 *      refill_fl - refill an SGE free-buffer list
 490 *      @adapter: the adapter
 491 *      @q: the free-list to refill
 492 *      @n: the number of new buffers to allocate
 493 *      @gfp: the gfp flags for allocating new buffers
 494 *
 495 *      (Re)populate an SGE free-buffer list with up to @n new packet buffers,
 496 *      allocated with the supplied gfp flags.  The caller must assure that
 497 *      @n does not exceed the queue's capacity.
 498 */
 499static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
 500{
 501        struct rx_sw_desc *sd = &q->sdesc[q->pidx];
 502        struct rx_desc *d = &q->desc[q->pidx];
 503        unsigned int count = 0;
 504
 505        while (n--) {
 506                dma_addr_t mapping;
 507                int err;
 508
 509                if (q->use_pages) {
 510                        if (unlikely(alloc_pg_chunk(adap, q, sd, gfp,
 511                                                    q->order))) {
 512nomem:                          q->alloc_failed++;
 513                                break;
 514                        }
 515                        mapping = sd->pg_chunk.mapping + sd->pg_chunk.offset;
 516                        dma_unmap_addr_set(sd, dma_addr, mapping);
 517
 518                        add_one_rx_chunk(mapping, d, q->gen);
 519                        pci_dma_sync_single_for_device(adap->pdev, mapping,
 520                                                q->buf_size - SGE_PG_RSVD,
 521                                                PCI_DMA_FROMDEVICE);
 522                } else {
 523                        void *buf_start;
 524
 525                        struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
 526                        if (!skb)
 527                                goto nomem;
 528
 529                        sd->skb = skb;
 530                        buf_start = skb->data;
 531                        err = add_one_rx_buf(buf_start, q->buf_size, d, sd,
 532                                             q->gen, adap->pdev);
 533                        if (unlikely(err)) {
 534                                clear_rx_desc(adap->pdev, q, sd);
 535                                break;
 536                        }
 537                }
 538
 539                d++;
 540                sd++;
 541                if (++q->pidx == q->size) {
 542                        q->pidx = 0;
 543                        q->gen ^= 1;
 544                        sd = q->sdesc;
 545                        d = q->desc;
 546                }
 547                count++;
 548        }
 549
 550        q->credits += count;
 551        q->pend_cred += count;
 552        ring_fl_db(adap, q);
 553
 554        return count;
 555}
 556
 557static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
 558{
 559        refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits),
 560                  GFP_ATOMIC | __GFP_COMP);
 561}
 562
 563/**
 564 *      recycle_rx_buf - recycle a receive buffer
 565 *      @adapter: the adapter
 566 *      @q: the SGE free list
 567 *      @idx: index of buffer to recycle
 568 *
 569 *      Recycles the specified buffer on the given free list by adding it at
 570 *      the next available slot on the list.
 571 */
 572static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
 573                           unsigned int idx)
 574{
 575        struct rx_desc *from = &q->desc[idx];
 576        struct rx_desc *to = &q->desc[q->pidx];
 577
 578        q->sdesc[q->pidx] = q->sdesc[idx];
 579        to->addr_lo = from->addr_lo;    /* already big endian */
 580        to->addr_hi = from->addr_hi;    /* likewise */
 581        wmb();
 582        to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
 583        to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
 584
 585        if (++q->pidx == q->size) {
 586                q->pidx = 0;
 587                q->gen ^= 1;
 588        }
 589
 590        q->credits++;
 591        q->pend_cred++;
 592        ring_fl_db(adap, q);
 593}
 594
 595/**
 596 *      alloc_ring - allocate resources for an SGE descriptor ring
 597 *      @pdev: the PCI device
 598 *      @nelem: the number of descriptors
 599 *      @elem_size: the size of each descriptor
 600 *      @sw_size: the size of the SW state associated with each ring element
 601 *      @phys: the physical address of the allocated ring
 602 *      @metadata: address of the array holding the SW state for the ring
 603 *
 604 *      Allocates resources for an SGE descriptor ring, such as Tx queues,
 605 *      free buffer lists, or response queues.  Each SGE ring requires
 606 *      space for its HW descriptors plus, optionally, space for the SW state
 607 *      associated with each HW entry (the metadata).  The function returns
 608 *      three values: the virtual address for the HW ring (the return value
 609 *      of the function), the physical address of the HW ring, and the address
 610 *      of the SW ring.
 611 */
 612static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
 613                        size_t sw_size, dma_addr_t * phys, void *metadata)
 614{
 615        size_t len = nelem * elem_size;
 616        void *s = NULL;
 617        void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
 618
 619        if (!p)
 620                return NULL;
 621        if (sw_size && metadata) {
 622                s = kcalloc(nelem, sw_size, GFP_KERNEL);
 623
 624                if (!s) {
 625                        dma_free_coherent(&pdev->dev, len, p, *phys);
 626                        return NULL;
 627                }
 628                *(void **)metadata = s;
 629        }
 630        memset(p, 0, len);
 631        return p;
 632}
 633
 634/**
 635 *      t3_reset_qset - reset a sge qset
 636 *      @q: the queue set
 637 *
 638 *      Reset the qset structure.
 639 *      the NAPI structure is preserved in the event of
 640 *      the qset's reincarnation, for example during EEH recovery.
 641 */
 642static void t3_reset_qset(struct sge_qset *q)
 643{
 644        if (q->adap &&
 645            !(q->adap->flags & NAPI_INIT)) {
 646                memset(q, 0, sizeof(*q));
 647                return;
 648        }
 649
 650        q->adap = NULL;
 651        memset(&q->rspq, 0, sizeof(q->rspq));
 652        memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
 653        memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
 654        q->txq_stopped = 0;
 655        q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
 656        q->rx_reclaim_timer.function = NULL;
 657        q->nomem = 0;
 658        napi_free_frags(&q->napi);
 659}
 660
 661
 662/**
 663 *      free_qset - free the resources of an SGE queue set
 664 *      @adapter: the adapter owning the queue set
 665 *      @q: the queue set
 666 *
 667 *      Release the HW and SW resources associated with an SGE queue set, such
 668 *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 669 *      queue set must be quiesced prior to calling this.
 670 */
 671static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
 672{
 673        int i;
 674        struct pci_dev *pdev = adapter->pdev;
 675
 676        for (i = 0; i < SGE_RXQ_PER_SET; ++i)
 677                if (q->fl[i].desc) {
 678                        spin_lock_irq(&adapter->sge.reg_lock);
 679                        t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
 680                        spin_unlock_irq(&adapter->sge.reg_lock);
 681                        free_rx_bufs(pdev, &q->fl[i]);
 682                        kfree(q->fl[i].sdesc);
 683                        dma_free_coherent(&pdev->dev,
 684                                          q->fl[i].size *
 685                                          sizeof(struct rx_desc), q->fl[i].desc,
 686                                          q->fl[i].phys_addr);
 687                }
 688
 689        for (i = 0; i < SGE_TXQ_PER_SET; ++i)
 690                if (q->txq[i].desc) {
 691                        spin_lock_irq(&adapter->sge.reg_lock);
 692                        t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
 693                        spin_unlock_irq(&adapter->sge.reg_lock);
 694                        if (q->txq[i].sdesc) {
 695                                free_tx_desc(adapter, &q->txq[i],
 696                                             q->txq[i].in_use);
 697                                kfree(q->txq[i].sdesc);
 698                        }
 699                        dma_free_coherent(&pdev->dev,
 700                                          q->txq[i].size *
 701                                          sizeof(struct tx_desc),
 702                                          q->txq[i].desc, q->txq[i].phys_addr);
 703                        __skb_queue_purge(&q->txq[i].sendq);
 704                }
 705
 706        if (q->rspq.desc) {
 707                spin_lock_irq(&adapter->sge.reg_lock);
 708                t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
 709                spin_unlock_irq(&adapter->sge.reg_lock);
 710                dma_free_coherent(&pdev->dev,
 711                                  q->rspq.size * sizeof(struct rsp_desc),
 712                                  q->rspq.desc, q->rspq.phys_addr);
 713        }
 714
 715        t3_reset_qset(q);
 716}
 717
 718/**
 719 *      init_qset_cntxt - initialize an SGE queue set context info
 720 *      @qs: the queue set
 721 *      @id: the queue set id
 722 *
 723 *      Initializes the TIDs and context ids for the queues of a queue set.
 724 */
 725static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
 726{
 727        qs->rspq.cntxt_id = id;
 728        qs->fl[0].cntxt_id = 2 * id;
 729        qs->fl[1].cntxt_id = 2 * id + 1;
 730        qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 731        qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 732        qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 733        qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 734        qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 735}
 736
 737/**
 738 *      sgl_len - calculates the size of an SGL of the given capacity
 739 *      @n: the number of SGL entries
 740 *
 741 *      Calculates the number of flits needed for a scatter/gather list that
 742 *      can hold the given number of entries.
 743 */
 744static inline unsigned int sgl_len(unsigned int n)
 745{
 746        /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
 747        return (3 * n) / 2 + (n & 1);
 748}
 749
 750/**
 751 *      flits_to_desc - returns the num of Tx descriptors for the given flits
 752 *      @n: the number of flits
 753 *
 754 *      Calculates the number of Tx descriptors needed for the supplied number
 755 *      of flits.
 756 */
 757static inline unsigned int flits_to_desc(unsigned int n)
 758{
 759        BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
 760        return flit_desc_map[n];
 761}
 762
 763/**
 764 *      get_packet - return the next ingress packet buffer from a free list
 765 *      @adap: the adapter that received the packet
 766 *      @fl: the SGE free list holding the packet
 767 *      @len: the packet length including any SGE padding
 768 *      @drop_thres: # of remaining buffers before we start dropping packets
 769 *
 770 *      Get the next packet from a free list and complete setup of the
 771 *      sk_buff.  If the packet is small we make a copy and recycle the
 772 *      original buffer, otherwise we use the original buffer itself.  If a
 773 *      positive drop threshold is supplied packets are dropped and their
 774 *      buffers recycled if (a) the number of remaining buffers is under the
 775 *      threshold and the packet is too big to copy, or (b) the packet should
 776 *      be copied but there is no memory for the copy.
 777 */
 778static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
 779                                  unsigned int len, unsigned int drop_thres)
 780{
 781        struct sk_buff *skb = NULL;
 782        struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
 783
 784        prefetch(sd->skb->data);
 785        fl->credits--;
 786
 787        if (len <= SGE_RX_COPY_THRES) {
 788                skb = alloc_skb(len, GFP_ATOMIC);
 789                if (likely(skb != NULL)) {
 790                        __skb_put(skb, len);
 791                        pci_dma_sync_single_for_cpu(adap->pdev,
 792                                            dma_unmap_addr(sd, dma_addr), len,
 793                                            PCI_DMA_FROMDEVICE);
 794                        memcpy(skb->data, sd->skb->data, len);
 795                        pci_dma_sync_single_for_device(adap->pdev,
 796                                            dma_unmap_addr(sd, dma_addr), len,
 797                                            PCI_DMA_FROMDEVICE);
 798                } else if (!drop_thres)
 799                        goto use_orig_buf;
 800recycle:
 801                recycle_rx_buf(adap, fl, fl->cidx);
 802                return skb;
 803        }
 804
 805        if (unlikely(fl->credits < drop_thres) &&
 806            refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits - 1),
 807                      GFP_ATOMIC | __GFP_COMP) == 0)
 808                goto recycle;
 809
 810use_orig_buf:
 811        pci_unmap_single(adap->pdev, dma_unmap_addr(sd, dma_addr),
 812                         fl->buf_size, PCI_DMA_FROMDEVICE);
 813        skb = sd->skb;
 814        skb_put(skb, len);
 815        __refill_fl(adap, fl);
 816        return skb;
 817}
 818
 819/**
 820 *      get_packet_pg - return the next ingress packet buffer from a free list
 821 *      @adap: the adapter that received the packet
 822 *      @fl: the SGE free list holding the packet
 823 *      @len: the packet length including any SGE padding
 824 *      @drop_thres: # of remaining buffers before we start dropping packets
 825 *
 826 *      Get the next packet from a free list populated with page chunks.
 827 *      If the packet is small we make a copy and recycle the original buffer,
 828 *      otherwise we attach the original buffer as a page fragment to a fresh
 829 *      sk_buff.  If a positive drop threshold is supplied packets are dropped
 830 *      and their buffers recycled if (a) the number of remaining buffers is
 831 *      under the threshold and the packet is too big to copy, or (b) there's
 832 *      no system memory.
 833 *
 834 *      Note: this function is similar to @get_packet but deals with Rx buffers
 835 *      that are page chunks rather than sk_buffs.
 836 */
 837static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
 838                                     struct sge_rspq *q, unsigned int len,
 839                                     unsigned int drop_thres)
 840{
 841        struct sk_buff *newskb, *skb;
 842        struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
 843
 844        dma_addr_t dma_addr = dma_unmap_addr(sd, dma_addr);
 845
 846        newskb = skb = q->pg_skb;
 847        if (!skb && (len <= SGE_RX_COPY_THRES)) {
 848                newskb = alloc_skb(len, GFP_ATOMIC);
 849                if (likely(newskb != NULL)) {
 850                        __skb_put(newskb, len);
 851                        pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
 852                                            PCI_DMA_FROMDEVICE);
 853                        memcpy(newskb->data, sd->pg_chunk.va, len);
 854                        pci_dma_sync_single_for_device(adap->pdev, dma_addr,
 855                                                       len,
 856                                                       PCI_DMA_FROMDEVICE);
 857                } else if (!drop_thres)
 858                        return NULL;
 859recycle:
 860                fl->credits--;
 861                recycle_rx_buf(adap, fl, fl->cidx);
 862                q->rx_recycle_buf++;
 863                return newskb;
 864        }
 865
 866        if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
 867                goto recycle;
 868
 869        prefetch(sd->pg_chunk.p_cnt);
 870
 871        if (!skb)
 872                newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
 873
 874        if (unlikely(!newskb)) {
 875                if (!drop_thres)
 876                        return NULL;
 877                goto recycle;
 878        }
 879
 880        pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
 881                                    PCI_DMA_FROMDEVICE);
 882        (*sd->pg_chunk.p_cnt)--;
 883        if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
 884                pci_unmap_page(adap->pdev,
 885                               sd->pg_chunk.mapping,
 886                               fl->alloc_size,
 887                               PCI_DMA_FROMDEVICE);
 888        if (!skb) {
 889                __skb_put(newskb, SGE_RX_PULL_LEN);
 890                memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
 891                skb_fill_page_desc(newskb, 0, sd->pg_chunk.page,
 892                                   sd->pg_chunk.offset + SGE_RX_PULL_LEN,
 893                                   len - SGE_RX_PULL_LEN);
 894                newskb->len = len;
 895                newskb->data_len = len - SGE_RX_PULL_LEN;
 896                newskb->truesize += newskb->data_len;
 897        } else {
 898                skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags,
 899                                   sd->pg_chunk.page,
 900                                   sd->pg_chunk.offset, len);
 901                newskb->len += len;
 902                newskb->data_len += len;
 903                newskb->truesize += len;
 904        }
 905
 906        fl->credits--;
 907        /*
 908         * We do not refill FLs here, we let the caller do it to overlap a
 909         * prefetch.
 910         */
 911        return newskb;
 912}
 913
 914/**
 915 *      get_imm_packet - return the next ingress packet buffer from a response
 916 *      @resp: the response descriptor containing the packet data
 917 *
 918 *      Return a packet containing the immediate data of the given response.
 919 */
 920static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
 921{
 922        struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
 923
 924        if (skb) {
 925                __skb_put(skb, IMMED_PKT_SIZE);
 926                skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
 927        }
 928        return skb;
 929}
 930
 931/**
 932 *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 933 *      @skb: the packet
 934 *
 935 *      Returns the number of Tx descriptors needed for the given Ethernet
 936 *      packet.  Ethernet packets require addition of WR and CPL headers.
 937 */
 938static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
 939{
 940        unsigned int flits;
 941
 942        if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
 943                return 1;
 944
 945        flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
 946        if (skb_shinfo(skb)->gso_size)
 947                flits++;
 948        return flits_to_desc(flits);
 949}
 950
 951/**
 952 *      make_sgl - populate a scatter/gather list for a packet
 953 *      @skb: the packet
 954 *      @sgp: the SGL to populate
 955 *      @start: start address of skb main body data to include in the SGL
 956 *      @len: length of skb main body data to include in the SGL
 957 *      @pdev: the PCI device
 958 *
 959 *      Generates a scatter/gather list for the buffers that make up a packet
 960 *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 961 *      appropriately.
 962 */
 963static inline unsigned int make_sgl(const struct sk_buff *skb,
 964                                    struct sg_ent *sgp, unsigned char *start,
 965                                    unsigned int len, struct pci_dev *pdev)
 966{
 967        dma_addr_t mapping;
 968        unsigned int i, j = 0, nfrags;
 969
 970        if (len) {
 971                mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
 972                sgp->len[0] = cpu_to_be32(len);
 973                sgp->addr[0] = cpu_to_be64(mapping);
 974                j = 1;
 975        }
 976
 977        nfrags = skb_shinfo(skb)->nr_frags;
 978        for (i = 0; i < nfrags; i++) {
 979                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 980
 981                mapping = pci_map_page(pdev, frag->page, frag->page_offset,
 982                                       frag->size, PCI_DMA_TODEVICE);
 983                sgp->len[j] = cpu_to_be32(frag->size);
 984                sgp->addr[j] = cpu_to_be64(mapping);
 985                j ^= 1;
 986                if (j == 0)
 987                        ++sgp;
 988        }
 989        if (j)
 990                sgp->len[j] = 0;
 991        return ((nfrags + (len != 0)) * 3) / 2 + j;
 992}
 993
 994/**
 995 *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 996 *      @adap: the adapter
 997 *      @q: the Tx queue
 998 *
 999 *      Ring the doorbel if a Tx queue is asleep.  There is a natural race,
1000 *      where the HW is going to sleep just after we checked, however,
1001 *      then the interrupt handler will detect the outstanding TX packet
1002 *      and ring the doorbell for us.
1003 *
1004 *      When GTS is disabled we unconditionally ring the doorbell.
1005 */
1006static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
1007{
1008#if USE_GTS
1009        clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1010        if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1011                set_bit(TXQ_LAST_PKT_DB, &q->flags);
1012                t3_write_reg(adap, A_SG_KDOORBELL,
1013                             F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1014        }
1015#else
1016        wmb();                  /* write descriptors before telling HW */
1017        t3_write_reg(adap, A_SG_KDOORBELL,
1018                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1019#endif
1020}
1021
1022static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
1023{
1024#if SGE_NUM_GENBITS == 2
1025        d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
1026#endif
1027}
1028
1029/**
1030 *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
1031 *      @ndesc: number of Tx descriptors spanned by the SGL
1032 *      @skb: the packet corresponding to the WR
1033 *      @d: first Tx descriptor to be written
1034 *      @pidx: index of above descriptors
1035 *      @q: the SGE Tx queue
1036 *      @sgl: the SGL
1037 *      @flits: number of flits to the start of the SGL in the first descriptor
1038 *      @sgl_flits: the SGL size in flits
1039 *      @gen: the Tx descriptor generation
1040 *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
1041 *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
1042 *
1043 *      Write a work request header and an associated SGL.  If the SGL is
1044 *      small enough to fit into one Tx descriptor it has already been written
1045 *      and we just need to write the WR header.  Otherwise we distribute the
1046 *      SGL across the number of descriptors it spans.
1047 */
1048static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
1049                             struct tx_desc *d, unsigned int pidx,
1050                             const struct sge_txq *q,
1051                             const struct sg_ent *sgl,
1052                             unsigned int flits, unsigned int sgl_flits,
1053                             unsigned int gen, __be32 wr_hi,
1054                             __be32 wr_lo)
1055{
1056        struct work_request_hdr *wrp = (struct work_request_hdr *)d;
1057        struct tx_sw_desc *sd = &q->sdesc[pidx];
1058
1059        sd->skb = skb;
1060        if (need_skb_unmap()) {
1061                sd->fragidx = 0;
1062                sd->addr_idx = 0;
1063                sd->sflit = flits;
1064        }
1065
1066        if (likely(ndesc == 1)) {
1067                sd->eop = 1;
1068                wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1069                                   V_WR_SGLSFLT(flits)) | wr_hi;
1070                wmb();
1071                wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1072                                   V_WR_GEN(gen)) | wr_lo;
1073                wr_gen2(d, gen);
1074        } else {
1075                unsigned int ogen = gen;
1076                const u64 *fp = (const u64 *)sgl;
1077                struct work_request_hdr *wp = wrp;
1078
1079                wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1080                                   V_WR_SGLSFLT(flits)) | wr_hi;
1081
1082                while (sgl_flits) {
1083                        unsigned int avail = WR_FLITS - flits;
1084
1085                        if (avail > sgl_flits)
1086                                avail = sgl_flits;
1087                        memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
1088                        sgl_flits -= avail;
1089                        ndesc--;
1090                        if (!sgl_flits)
1091                                break;
1092
1093                        fp += avail;
1094                        d++;
1095                        sd->eop = 0;
1096                        sd++;
1097                        if (++pidx == q->size) {
1098                                pidx = 0;
1099                                gen ^= 1;
1100                                d = q->desc;
1101                                sd = q->sdesc;
1102                        }
1103
1104                        sd->skb = skb;
1105                        wrp = (struct work_request_hdr *)d;
1106                        wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1107                                           V_WR_SGLSFLT(1)) | wr_hi;
1108                        wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1109                                                        sgl_flits + 1)) |
1110                                           V_WR_GEN(gen)) | wr_lo;
1111                        wr_gen2(d, gen);
1112                        flits = 1;
1113                }
1114                sd->eop = 1;
1115                wrp->wr_hi |= htonl(F_WR_EOP);
1116                wmb();
1117                wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1118                wr_gen2((struct tx_desc *)wp, ogen);
1119                WARN_ON(ndesc != 0);
1120        }
1121}
1122
1123/**
1124 *      write_tx_pkt_wr - write a TX_PKT work request
1125 *      @adap: the adapter
1126 *      @skb: the packet to send
1127 *      @pi: the egress interface
1128 *      @pidx: index of the first Tx descriptor to write
1129 *      @gen: the generation value to use
1130 *      @q: the Tx queue
1131 *      @ndesc: number of descriptors the packet will occupy
1132 *      @compl: the value of the COMPL bit to use
1133 *
1134 *      Generate a TX_PKT work request to send the supplied packet.
1135 */
1136static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1137                            const struct port_info *pi,
1138                            unsigned int pidx, unsigned int gen,
1139                            struct sge_txq *q, unsigned int ndesc,
1140                            unsigned int compl)
1141{
1142        unsigned int flits, sgl_flits, cntrl, tso_info;
1143        struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1144        struct tx_desc *d = &q->desc[pidx];
1145        struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1146
1147        cpl->len = htonl(skb->len);
1148        cntrl = V_TXPKT_INTF(pi->port_id);
1149
1150        if (vlan_tx_tag_present(skb))
1151                cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1152
1153        tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1154        if (tso_info) {
1155                int eth_type;
1156                struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1157
1158                d->flit[2] = 0;
1159                cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1160                hdr->cntrl = htonl(cntrl);
1161                eth_type = skb_network_offset(skb) == ETH_HLEN ?
1162                    CPL_ETH_II : CPL_ETH_II_VLAN;
1163                tso_info |= V_LSO_ETH_TYPE(eth_type) |
1164                    V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
1165                    V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
1166                hdr->lso_info = htonl(tso_info);
1167                flits = 3;
1168        } else {
1169                cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1170                cntrl |= F_TXPKT_IPCSUM_DIS;    /* SW calculates IP csum */
1171                cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1172                cpl->cntrl = htonl(cntrl);
1173
1174                if (skb->len <= WR_LEN - sizeof(*cpl)) {
1175                        q->sdesc[pidx].skb = NULL;
1176                        if (!skb->data_len)
1177                                skb_copy_from_linear_data(skb, &d->flit[2],
1178                                                          skb->len);
1179                        else
1180                                skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1181
1182                        flits = (skb->len + 7) / 8 + 2;
1183                        cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1184                                              V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1185                                              | F_WR_SOP | F_WR_EOP | compl);
1186                        wmb();
1187                        cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1188                                              V_WR_TID(q->token));
1189                        wr_gen2(d, gen);
1190                        kfree_skb(skb);
1191                        return;
1192                }
1193
1194                flits = 2;
1195        }
1196
1197        sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1198        sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
1199
1200        write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1201                         htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1202                         htonl(V_WR_TID(q->token)));
1203}
1204
1205static inline void t3_stop_tx_queue(struct netdev_queue *txq,
1206                                    struct sge_qset *qs, struct sge_txq *q)
1207{
1208        netif_tx_stop_queue(txq);
1209        set_bit(TXQ_ETH, &qs->txq_stopped);
1210        q->stops++;
1211}
1212
1213/**
1214 *      eth_xmit - add a packet to the Ethernet Tx queue
1215 *      @skb: the packet
1216 *      @dev: the egress net device
1217 *
1218 *      Add a packet to an SGE Tx queue.  Runs with softirqs disabled.
1219 */
1220netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1221{
1222        int qidx;
1223        unsigned int ndesc, pidx, credits, gen, compl;
1224        const struct port_info *pi = netdev_priv(dev);
1225        struct adapter *adap = pi->adapter;
1226        struct netdev_queue *txq;
1227        struct sge_qset *qs;
1228        struct sge_txq *q;
1229
1230        /*
1231         * The chip min packet length is 9 octets but play safe and reject
1232         * anything shorter than an Ethernet header.
1233         */
1234        if (unlikely(skb->len < ETH_HLEN)) {
1235                dev_kfree_skb(skb);
1236                return NETDEV_TX_OK;
1237        }
1238
1239        qidx = skb_get_queue_mapping(skb);
1240        qs = &pi->qs[qidx];
1241        q = &qs->txq[TXQ_ETH];
1242        txq = netdev_get_tx_queue(dev, qidx);
1243
1244        reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1245
1246        credits = q->size - q->in_use;
1247        ndesc = calc_tx_descs(skb);
1248
1249        if (unlikely(credits < ndesc)) {
1250                t3_stop_tx_queue(txq, qs, q);
1251                dev_err(&adap->pdev->dev,
1252                        "%s: Tx ring %u full while queue awake!\n",
1253                        dev->name, q->cntxt_id & 7);
1254                return NETDEV_TX_BUSY;
1255        }
1256
1257        q->in_use += ndesc;
1258        if (unlikely(credits - ndesc < q->stop_thres)) {
1259                t3_stop_tx_queue(txq, qs, q);
1260
1261                if (should_restart_tx(q) &&
1262                    test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1263                        q->restarts++;
1264                        netif_tx_start_queue(txq);
1265                }
1266        }
1267
1268        gen = q->gen;
1269        q->unacked += ndesc;
1270        compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1271        q->unacked &= 7;
1272        pidx = q->pidx;
1273        q->pidx += ndesc;
1274        if (q->pidx >= q->size) {
1275                q->pidx -= q->size;
1276                q->gen ^= 1;
1277        }
1278
1279        /* update port statistics */
1280        if (skb->ip_summed == CHECKSUM_COMPLETE)
1281                qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1282        if (skb_shinfo(skb)->gso_size)
1283                qs->port_stats[SGE_PSTAT_TSO]++;
1284        if (vlan_tx_tag_present(skb))
1285                qs->port_stats[SGE_PSTAT_VLANINS]++;
1286
1287        /*
1288         * We do not use Tx completion interrupts to free DMAd Tx packets.
1289         * This is good for performance but means that we rely on new Tx
1290         * packets arriving to run the destructors of completed packets,
1291         * which open up space in their sockets' send queues.  Sometimes
1292         * we do not get such new packets causing Tx to stall.  A single
1293         * UDP transmitter is a good example of this situation.  We have
1294         * a clean up timer that periodically reclaims completed packets
1295         * but it doesn't run often enough (nor do we want it to) to prevent
1296         * lengthy stalls.  A solution to this problem is to run the
1297         * destructor early, after the packet is queued but before it's DMAd.
1298         * A cons is that we lie to socket memory accounting, but the amount
1299         * of extra memory is reasonable (limited by the number of Tx
1300         * descriptors), the packets do actually get freed quickly by new
1301         * packets almost always, and for protocols like TCP that wait for
1302         * acks to really free up the data the extra memory is even less.
1303         * On the positive side we run the destructors on the sending CPU
1304         * rather than on a potentially different completing CPU, usually a
1305         * good thing.  We also run them without holding our Tx queue lock,
1306         * unlike what reclaim_completed_tx() would otherwise do.
1307         *
1308         * Run the destructor before telling the DMA engine about the packet
1309         * to make sure it doesn't complete and get freed prematurely.
1310         */
1311        if (likely(!skb_shared(skb)))
1312                skb_orphan(skb);
1313
1314        write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1315        check_ring_tx_db(adap, q);
1316        return NETDEV_TX_OK;
1317}
1318
1319/**
1320 *      write_imm - write a packet into a Tx descriptor as immediate data
1321 *      @d: the Tx descriptor to write
1322 *      @skb: the packet
1323 *      @len: the length of packet data to write as immediate data
1324 *      @gen: the generation bit value to write
1325 *
1326 *      Writes a packet as immediate data into a Tx descriptor.  The packet
1327 *      contains a work request at its beginning.  We must write the packet
1328 *      carefully so the SGE doesn't read it accidentally before it's written
1329 *      in its entirety.
1330 */
1331static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1332                             unsigned int len, unsigned int gen)
1333{
1334        struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1335        struct work_request_hdr *to = (struct work_request_hdr *)d;
1336
1337        if (likely(!skb->data_len))
1338                memcpy(&to[1], &from[1], len - sizeof(*from));
1339        else
1340                skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1341
1342        to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1343                                        V_WR_BCNTLFLT(len & 7));
1344        wmb();
1345        to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1346                                        V_WR_LEN((len + 7) / 8));
1347        wr_gen2(d, gen);
1348        kfree_skb(skb);
1349}
1350
1351/**
1352 *      check_desc_avail - check descriptor availability on a send queue
1353 *      @adap: the adapter
1354 *      @q: the send queue
1355 *      @skb: the packet needing the descriptors
1356 *      @ndesc: the number of Tx descriptors needed
1357 *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1358 *
1359 *      Checks if the requested number of Tx descriptors is available on an
1360 *      SGE send queue.  If the queue is already suspended or not enough
1361 *      descriptors are available the packet is queued for later transmission.
1362 *      Must be called with the Tx queue locked.
1363 *
1364 *      Returns 0 if enough descriptors are available, 1 if there aren't
1365 *      enough descriptors and the packet has been queued, and 2 if the caller
1366 *      needs to retry because there weren't enough descriptors at the
1367 *      beginning of the call but some freed up in the mean time.
1368 */
1369static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1370                                   struct sk_buff *skb, unsigned int ndesc,
1371                                   unsigned int qid)
1372{
1373        if (unlikely(!skb_queue_empty(&q->sendq))) {
1374              addq_exit:__skb_queue_tail(&q->sendq, skb);
1375                return 1;
1376        }
1377        if (unlikely(q->size - q->in_use < ndesc)) {
1378                struct sge_qset *qs = txq_to_qset(q, qid);
1379
1380                set_bit(qid, &qs->txq_stopped);
1381                smp_mb__after_clear_bit();
1382
1383                if (should_restart_tx(q) &&
1384                    test_and_clear_bit(qid, &qs->txq_stopped))
1385                        return 2;
1386
1387                q->stops++;
1388                goto addq_exit;
1389        }
1390        return 0;
1391}
1392
1393/**
1394 *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1395 *      @q: the SGE control Tx queue
1396 *
1397 *      This is a variant of reclaim_completed_tx() that is used for Tx queues
1398 *      that send only immediate data (presently just the control queues) and
1399 *      thus do not have any sk_buffs to release.
1400 */
1401static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1402{
1403        unsigned int reclaim = q->processed - q->cleaned;
1404
1405        q->in_use -= reclaim;
1406        q->cleaned += reclaim;
1407}
1408
1409static inline int immediate(const struct sk_buff *skb)
1410{
1411        return skb->len <= WR_LEN;
1412}
1413
1414/**
1415 *      ctrl_xmit - send a packet through an SGE control Tx queue
1416 *      @adap: the adapter
1417 *      @q: the control queue
1418 *      @skb: the packet
1419 *
1420 *      Send a packet through an SGE control Tx queue.  Packets sent through
1421 *      a control queue must fit entirely as immediate data in a single Tx
1422 *      descriptor and have no page fragments.
1423 */
1424static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1425                     struct sk_buff *skb)
1426{
1427        int ret;
1428        struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1429
1430        if (unlikely(!immediate(skb))) {
1431                WARN_ON(1);
1432                dev_kfree_skb(skb);
1433                return NET_XMIT_SUCCESS;
1434        }
1435
1436        wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1437        wrp->wr_lo = htonl(V_WR_TID(q->token));
1438
1439        spin_lock(&q->lock);
1440      again:reclaim_completed_tx_imm(q);
1441
1442        ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1443        if (unlikely(ret)) {
1444                if (ret == 1) {
1445                        spin_unlock(&q->lock);
1446                        return NET_XMIT_CN;
1447                }
1448                goto again;
1449        }
1450
1451        write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1452
1453        q->in_use++;
1454        if (++q->pidx >= q->size) {
1455                q->pidx = 0;
1456                q->gen ^= 1;
1457        }
1458        spin_unlock(&q->lock);
1459        wmb();
1460        t3_write_reg(adap, A_SG_KDOORBELL,
1461                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1462        return NET_XMIT_SUCCESS;
1463}
1464
1465/**
1466 *      restart_ctrlq - restart a suspended control queue
1467 *      @qs: the queue set cotaining the control queue
1468 *
1469 *      Resumes transmission on a suspended Tx control queue.
1470 */
1471static void restart_ctrlq(unsigned long data)
1472{
1473        struct sk_buff *skb;
1474        struct sge_qset *qs = (struct sge_qset *)data;
1475        struct sge_txq *q = &qs->txq[TXQ_CTRL];
1476
1477        spin_lock(&q->lock);
1478      again:reclaim_completed_tx_imm(q);
1479
1480        while (q->in_use < q->size &&
1481               (skb = __skb_dequeue(&q->sendq)) != NULL) {
1482
1483                write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1484
1485                if (++q->pidx >= q->size) {
1486                        q->pidx = 0;
1487                        q->gen ^= 1;
1488                }
1489                q->in_use++;
1490        }
1491
1492        if (!skb_queue_empty(&q->sendq)) {
1493                set_bit(TXQ_CTRL, &qs->txq_stopped);
1494                smp_mb__after_clear_bit();
1495
1496                if (should_restart_tx(q) &&
1497                    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1498                        goto again;
1499                q->stops++;
1500        }
1501
1502        spin_unlock(&q->lock);
1503        wmb();
1504        t3_write_reg(qs->adap, A_SG_KDOORBELL,
1505                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1506}
1507
1508/*
1509 * Send a management message through control queue 0
1510 */
1511int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1512{
1513        int ret;
1514        local_bh_disable();
1515        ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1516        local_bh_enable();
1517
1518        return ret;
1519}
1520
1521/**
1522 *      deferred_unmap_destructor - unmap a packet when it is freed
1523 *      @skb: the packet
1524 *
1525 *      This is the packet destructor used for Tx packets that need to remain
1526 *      mapped until they are freed rather than until their Tx descriptors are
1527 *      freed.
1528 */
1529static void deferred_unmap_destructor(struct sk_buff *skb)
1530{
1531        int i;
1532        const dma_addr_t *p;
1533        const struct skb_shared_info *si;
1534        const struct deferred_unmap_info *dui;
1535
1536        dui = (struct deferred_unmap_info *)skb->head;
1537        p = dui->addr;
1538
1539        if (skb->tail - skb->transport_header)
1540                pci_unmap_single(dui->pdev, *p++,
1541                                 skb->tail - skb->transport_header,
1542                                 PCI_DMA_TODEVICE);
1543
1544        si = skb_shinfo(skb);
1545        for (i = 0; i < si->nr_frags; i++)
1546                pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1547                               PCI_DMA_TODEVICE);
1548}
1549
1550static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1551                                     const struct sg_ent *sgl, int sgl_flits)
1552{
1553        dma_addr_t *p;
1554        struct deferred_unmap_info *dui;
1555
1556        dui = (struct deferred_unmap_info *)skb->head;
1557        dui->pdev = pdev;
1558        for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1559                *p++ = be64_to_cpu(sgl->addr[0]);
1560                *p++ = be64_to_cpu(sgl->addr[1]);
1561        }
1562        if (sgl_flits)
1563                *p = be64_to_cpu(sgl->addr[0]);
1564}
1565
1566/**
1567 *      write_ofld_wr - write an offload work request
1568 *      @adap: the adapter
1569 *      @skb: the packet to send
1570 *      @q: the Tx queue
1571 *      @pidx: index of the first Tx descriptor to write
1572 *      @gen: the generation value to use
1573 *      @ndesc: number of descriptors the packet will occupy
1574 *
1575 *      Write an offload work request to send the supplied packet.  The packet
1576 *      data already carry the work request with most fields populated.
1577 */
1578static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1579                          struct sge_txq *q, unsigned int pidx,
1580                          unsigned int gen, unsigned int ndesc)
1581{
1582        unsigned int sgl_flits, flits;
1583        struct work_request_hdr *from;
1584        struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1585        struct tx_desc *d = &q->desc[pidx];
1586
1587        if (immediate(skb)) {
1588                q->sdesc[pidx].skb = NULL;
1589                write_imm(d, skb, skb->len, gen);
1590                return;
1591        }
1592
1593        /* Only TX_DATA builds SGLs */
1594
1595        from = (struct work_request_hdr *)skb->data;
1596        memcpy(&d->flit[1], &from[1],
1597               skb_transport_offset(skb) - sizeof(*from));
1598
1599        flits = skb_transport_offset(skb) / 8;
1600        sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1601        sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
1602                             skb->tail - skb->transport_header,
1603                             adap->pdev);
1604        if (need_skb_unmap()) {
1605                setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1606                skb->destructor = deferred_unmap_destructor;
1607        }
1608
1609        write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1610                         gen, from->wr_hi, from->wr_lo);
1611}
1612
1613/**
1614 *      calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1615 *      @skb: the packet
1616 *
1617 *      Returns the number of Tx descriptors needed for the given offload
1618 *      packet.  These packets are already fully constructed.
1619 */
1620static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1621{
1622        unsigned int flits, cnt;
1623
1624        if (skb->len <= WR_LEN)
1625                return 1;       /* packet fits as immediate data */
1626
1627        flits = skb_transport_offset(skb) / 8;  /* headers */
1628        cnt = skb_shinfo(skb)->nr_frags;
1629        if (skb->tail != skb->transport_header)
1630                cnt++;
1631        return flits_to_desc(flits + sgl_len(cnt));
1632}
1633
1634/**
1635 *      ofld_xmit - send a packet through an offload queue
1636 *      @adap: the adapter
1637 *      @q: the Tx offload queue
1638 *      @skb: the packet
1639 *
1640 *      Send an offload packet through an SGE offload queue.
1641 */
1642static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1643                     struct sk_buff *skb)
1644{
1645        int ret;
1646        unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1647
1648        spin_lock(&q->lock);
1649again:  reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1650
1651        ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1652        if (unlikely(ret)) {
1653                if (ret == 1) {
1654                        skb->priority = ndesc;  /* save for restart */
1655                        spin_unlock(&q->lock);
1656                        return NET_XMIT_CN;
1657                }
1658                goto again;
1659        }
1660
1661        gen = q->gen;
1662        q->in_use += ndesc;
1663        pidx = q->pidx;
1664        q->pidx += ndesc;
1665        if (q->pidx >= q->size) {
1666                q->pidx -= q->size;
1667                q->gen ^= 1;
1668        }
1669        spin_unlock(&q->lock);
1670
1671        write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1672        check_ring_tx_db(adap, q);
1673        return NET_XMIT_SUCCESS;
1674}
1675
1676/**
1677 *      restart_offloadq - restart a suspended offload queue
1678 *      @qs: the queue set cotaining the offload queue
1679 *
1680 *      Resumes transmission on a suspended Tx offload queue.
1681 */
1682static void restart_offloadq(unsigned long data)
1683{
1684        struct sk_buff *skb;
1685        struct sge_qset *qs = (struct sge_qset *)data;
1686        struct sge_txq *q = &qs->txq[TXQ_OFLD];
1687        const struct port_info *pi = netdev_priv(qs->netdev);
1688        struct adapter *adap = pi->adapter;
1689
1690        spin_lock(&q->lock);
1691again:  reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1692
1693        while ((skb = skb_peek(&q->sendq)) != NULL) {
1694                unsigned int gen, pidx;
1695                unsigned int ndesc = skb->priority;
1696
1697                if (unlikely(q->size - q->in_use < ndesc)) {
1698                        set_bit(TXQ_OFLD, &qs->txq_stopped);
1699                        smp_mb__after_clear_bit();
1700
1701                        if (should_restart_tx(q) &&
1702                            test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1703                                goto again;
1704                        q->stops++;
1705                        break;
1706                }
1707
1708                gen = q->gen;
1709                q->in_use += ndesc;
1710                pidx = q->pidx;
1711                q->pidx += ndesc;
1712                if (q->pidx >= q->size) {
1713                        q->pidx -= q->size;
1714                        q->gen ^= 1;
1715                }
1716                __skb_unlink(skb, &q->sendq);
1717                spin_unlock(&q->lock);
1718
1719                write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1720                spin_lock(&q->lock);
1721        }
1722        spin_unlock(&q->lock);
1723
1724#if USE_GTS
1725        set_bit(TXQ_RUNNING, &q->flags);
1726        set_bit(TXQ_LAST_PKT_DB, &q->flags);
1727#endif
1728        wmb();
1729        t3_write_reg(adap, A_SG_KDOORBELL,
1730                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1731}
1732
1733/**
1734 *      queue_set - return the queue set a packet should use
1735 *      @skb: the packet
1736 *
1737 *      Maps a packet to the SGE queue set it should use.  The desired queue
1738 *      set is carried in bits 1-3 in the packet's priority.
1739 */
1740static inline int queue_set(const struct sk_buff *skb)
1741{
1742        return skb->priority >> 1;
1743}
1744
1745/**
1746 *      is_ctrl_pkt - return whether an offload packet is a control packet
1747 *      @skb: the packet
1748 *
1749 *      Determines whether an offload packet should use an OFLD or a CTRL
1750 *      Tx queue.  This is indicated by bit 0 in the packet's priority.
1751 */
1752static inline int is_ctrl_pkt(const struct sk_buff *skb)
1753{
1754        return skb->priority & 1;
1755}
1756
1757/**
1758 *      t3_offload_tx - send an offload packet
1759 *      @tdev: the offload device to send to
1760 *      @skb: the packet
1761 *
1762 *      Sends an offload packet.  We use the packet priority to select the
1763 *      appropriate Tx queue as follows: bit 0 indicates whether the packet
1764 *      should be sent as regular or control, bits 1-3 select the queue set.
1765 */
1766int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1767{
1768        struct adapter *adap = tdev2adap(tdev);
1769        struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1770
1771        if (unlikely(is_ctrl_pkt(skb)))
1772                return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1773
1774        return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1775}
1776
1777/**
1778 *      offload_enqueue - add an offload packet to an SGE offload receive queue
1779 *      @q: the SGE response queue
1780 *      @skb: the packet
1781 *
1782 *      Add a new offload packet to an SGE response queue's offload packet
1783 *      queue.  If the packet is the first on the queue it schedules the RX
1784 *      softirq to process the queue.
1785 */
1786static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1787{
1788        int was_empty = skb_queue_empty(&q->rx_queue);
1789
1790        __skb_queue_tail(&q->rx_queue, skb);
1791
1792        if (was_empty) {
1793                struct sge_qset *qs = rspq_to_qset(q);
1794
1795                napi_schedule(&qs->napi);
1796        }
1797}
1798
1799/**
1800 *      deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1801 *      @tdev: the offload device that will be receiving the packets
1802 *      @q: the SGE response queue that assembled the bundle
1803 *      @skbs: the partial bundle
1804 *      @n: the number of packets in the bundle
1805 *
1806 *      Delivers a (partial) bundle of Rx offload packets to an offload device.
1807 */
1808static inline void deliver_partial_bundle(struct t3cdev *tdev,
1809                                          struct sge_rspq *q,
1810                                          struct sk_buff *skbs[], int n)
1811{
1812        if (n) {
1813                q->offload_bundles++;
1814                tdev->recv(tdev, skbs, n);
1815        }
1816}
1817
1818/**
1819 *      ofld_poll - NAPI handler for offload packets in interrupt mode
1820 *      @dev: the network device doing the polling
1821 *      @budget: polling budget
1822 *
1823 *      The NAPI handler for offload packets when a response queue is serviced
1824 *      by the hard interrupt handler, i.e., when it's operating in non-polling
1825 *      mode.  Creates small packet batches and sends them through the offload
1826 *      receive handler.  Batches need to be of modest size as we do prefetches
1827 *      on the packets in each.
1828 */
1829static int ofld_poll(struct napi_struct *napi, int budget)
1830{
1831        struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
1832        struct sge_rspq *q = &qs->rspq;
1833        struct adapter *adapter = qs->adap;
1834        int work_done = 0;
1835
1836        while (work_done < budget) {
1837                struct sk_buff *skb, *tmp, *skbs[RX_BUNDLE_SIZE];
1838                struct sk_buff_head queue;
1839                int ngathered;
1840
1841                spin_lock_irq(&q->lock);
1842                __skb_queue_head_init(&queue);
1843                skb_queue_splice_init(&q->rx_queue, &queue);
1844                if (skb_queue_empty(&queue)) {
1845                        napi_complete(napi);
1846                        spin_unlock_irq(&q->lock);
1847                        return work_done;
1848                }
1849                spin_unlock_irq(&q->lock);
1850
1851                ngathered = 0;
1852                skb_queue_walk_safe(&queue, skb, tmp) {
1853                        if (work_done >= budget)
1854                                break;
1855                        work_done++;
1856
1857                        __skb_unlink(skb, &queue);
1858                        prefetch(skb->data);
1859                        skbs[ngathered] = skb;
1860                        if (++ngathered == RX_BUNDLE_SIZE) {
1861                                q->offload_bundles++;
1862                                adapter->tdev.recv(&adapter->tdev, skbs,
1863                                                   ngathered);
1864                                ngathered = 0;
1865                        }
1866                }
1867                if (!skb_queue_empty(&queue)) {
1868                        /* splice remaining packets back onto Rx queue */
1869                        spin_lock_irq(&q->lock);
1870                        skb_queue_splice(&queue, &q->rx_queue);
1871                        spin_unlock_irq(&q->lock);
1872                }
1873                deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1874        }
1875
1876        return work_done;
1877}
1878
1879/**
1880 *      rx_offload - process a received offload packet
1881 *      @tdev: the offload device receiving the packet
1882 *      @rq: the response queue that received the packet
1883 *      @skb: the packet
1884 *      @rx_gather: a gather list of packets if we are building a bundle
1885 *      @gather_idx: index of the next available slot in the bundle
1886 *
1887 *      Process an ingress offload pakcet and add it to the offload ingress
1888 *      queue.  Returns the index of the next available slot in the bundle.
1889 */
1890static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1891                             struct sk_buff *skb, struct sk_buff *rx_gather[],
1892                             unsigned int gather_idx)
1893{
1894        skb_reset_mac_header(skb);
1895        skb_reset_network_header(skb);
1896        skb_reset_transport_header(skb);
1897
1898        if (rq->polling) {
1899                rx_gather[gather_idx++] = skb;
1900                if (gather_idx == RX_BUNDLE_SIZE) {
1901                        tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1902                        gather_idx = 0;
1903                        rq->offload_bundles++;
1904                }
1905        } else
1906                offload_enqueue(rq, skb);
1907
1908        return gather_idx;
1909}
1910
1911/**
1912 *      restart_tx - check whether to restart suspended Tx queues
1913 *      @qs: the queue set to resume
1914 *
1915 *      Restarts suspended Tx queues of an SGE queue set if they have enough
1916 *      free resources to resume operation.
1917 */
1918static void restart_tx(struct sge_qset *qs)
1919{
1920        if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1921            should_restart_tx(&qs->txq[TXQ_ETH]) &&
1922            test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1923                qs->txq[TXQ_ETH].restarts++;
1924                if (netif_running(qs->netdev))
1925                        netif_tx_wake_queue(qs->tx_q);
1926        }
1927
1928        if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1929            should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1930            test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1931                qs->txq[TXQ_OFLD].restarts++;
1932                tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1933        }
1934        if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1935            should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1936            test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1937                qs->txq[TXQ_CTRL].restarts++;
1938                tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1939        }
1940}
1941
1942/**
1943 *      cxgb3_arp_process - process an ARP request probing a private IP address
1944 *      @adapter: the adapter
1945 *      @skb: the skbuff containing the ARP request
1946 *
1947 *      Check if the ARP request is probing the private IP address
1948 *      dedicated to iSCSI, generate an ARP reply if so.
1949 */
1950static void cxgb3_arp_process(struct port_info *pi, struct sk_buff *skb)
1951{
1952        struct net_device *dev = skb->dev;
1953        struct arphdr *arp;
1954        unsigned char *arp_ptr;
1955        unsigned char *sha;
1956        __be32 sip, tip;
1957
1958        if (!dev)
1959                return;
1960
1961        skb_reset_network_header(skb);
1962        arp = arp_hdr(skb);
1963
1964        if (arp->ar_op != htons(ARPOP_REQUEST))
1965                return;
1966
1967        arp_ptr = (unsigned char *)(arp + 1);
1968        sha = arp_ptr;
1969        arp_ptr += dev->addr_len;
1970        memcpy(&sip, arp_ptr, sizeof(sip));
1971        arp_ptr += sizeof(sip);
1972        arp_ptr += dev->addr_len;
1973        memcpy(&tip, arp_ptr, sizeof(tip));
1974
1975        if (tip != pi->iscsi_ipv4addr)
1976                return;
1977
1978        arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
1979                 pi->iscsic.mac_addr, sha);
1980
1981}
1982
1983static inline int is_arp(struct sk_buff *skb)
1984{
1985        return skb->protocol == htons(ETH_P_ARP);
1986}
1987
1988static void cxgb3_process_iscsi_prov_pack(struct port_info *pi,
1989                                        struct sk_buff *skb)
1990{
1991        if (is_arp(skb)) {
1992                cxgb3_arp_process(pi, skb);
1993                return;
1994        }
1995
1996        if (pi->iscsic.recv)
1997                pi->iscsic.recv(pi, skb);
1998
1999}
2000
2001/**
2002 *      rx_eth - process an ingress ethernet packet
2003 *      @adap: the adapter
2004 *      @rq: the response queue that received the packet
2005 *      @skb: the packet
2006 *      @pad: amount of padding at the start of the buffer
2007 *
2008 *      Process an ingress ethernet pakcet and deliver it to the stack.
2009 *      The padding is 2 if the packet was delivered in an Rx buffer and 0
2010 *      if it was immediate data in a response.
2011 */
2012static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
2013                   struct sk_buff *skb, int pad, int lro)
2014{
2015        struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
2016        struct sge_qset *qs = rspq_to_qset(rq);
2017        struct port_info *pi;
2018
2019        skb_pull(skb, sizeof(*p) + pad);
2020        skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
2021        pi = netdev_priv(skb->dev);
2022        if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid &&
2023            p->csum == htons(0xffff) && !p->fragment) {
2024                qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2025                skb->ip_summed = CHECKSUM_UNNECESSARY;
2026        } else
2027                skb_checksum_none_assert(skb);
2028        skb_record_rx_queue(skb, qs - &adap->sge.qs[0]);
2029
2030        if (unlikely(p->vlan_valid)) {
2031                struct vlan_group *grp = pi->vlan_grp;
2032
2033                qs->port_stats[SGE_PSTAT_VLANEX]++;
2034                if (likely(grp))
2035                        if (lro)
2036                                vlan_gro_receive(&qs->napi, grp,
2037                                                 ntohs(p->vlan), skb);
2038                        else {
2039                                if (unlikely(pi->iscsic.flags)) {
2040                                        unsigned short vtag = ntohs(p->vlan) &
2041                                                                VLAN_VID_MASK;
2042                                        skb->dev = vlan_group_get_device(grp,
2043                                                                         vtag);
2044                                        cxgb3_process_iscsi_prov_pack(pi, skb);
2045                                }
2046                                __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
2047                                                  rq->polling);
2048                        }
2049                else
2050                        dev_kfree_skb_any(skb);
2051        } else if (rq->polling) {
2052                if (lro)
2053                        napi_gro_receive(&qs->napi, skb);
2054                else {
2055                        if (unlikely(pi->iscsic.flags))
2056                                cxgb3_process_iscsi_prov_pack(pi, skb);
2057                        netif_receive_skb(skb);
2058                }
2059        } else
2060                netif_rx(skb);
2061}
2062
2063static inline int is_eth_tcp(u32 rss)
2064{
2065        return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE;
2066}
2067
2068/**
2069 *      lro_add_page - add a page chunk to an LRO session
2070 *      @adap: the adapter
2071 *      @qs: the associated queue set
2072 *      @fl: the free list containing the page chunk to add
2073 *      @len: packet length
2074 *      @complete: Indicates the last fragment of a frame
2075 *
2076 *      Add a received packet contained in a page chunk to an existing LRO
2077 *      session.
2078 */
2079static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2080                         struct sge_fl *fl, int len, int complete)
2081{
2082        struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2083        struct port_info *pi = netdev_priv(qs->netdev);
2084        struct sk_buff *skb = NULL;
2085        struct cpl_rx_pkt *cpl;
2086        struct skb_frag_struct *rx_frag;
2087        int nr_frags;
2088        int offset = 0;
2089
2090        if (!qs->nomem) {
2091                skb = napi_get_frags(&qs->napi);
2092                qs->nomem = !skb;
2093        }
2094
2095        fl->credits--;
2096
2097        pci_dma_sync_single_for_cpu(adap->pdev,
2098                                    dma_unmap_addr(sd, dma_addr),
2099                                    fl->buf_size - SGE_PG_RSVD,
2100                                    PCI_DMA_FROMDEVICE);
2101
2102        (*sd->pg_chunk.p_cnt)--;
2103        if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
2104                pci_unmap_page(adap->pdev,
2105                               sd->pg_chunk.mapping,
2106                               fl->alloc_size,
2107                               PCI_DMA_FROMDEVICE);
2108
2109        if (!skb) {
2110                put_page(sd->pg_chunk.page);
2111                if (complete)
2112                        qs->nomem = 0;
2113                return;
2114        }
2115
2116        rx_frag = skb_shinfo(skb)->frags;
2117        nr_frags = skb_shinfo(skb)->nr_frags;
2118
2119        if (!nr_frags) {
2120                offset = 2 + sizeof(struct cpl_rx_pkt);
2121                cpl = qs->lro_va = sd->pg_chunk.va + 2;
2122
2123                if ((pi->rx_offload & T3_RX_CSUM) &&
2124                     cpl->csum_valid && cpl->csum == htons(0xffff)) {
2125                        skb->ip_summed = CHECKSUM_UNNECESSARY;
2126                        qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2127                } else
2128                        skb->ip_summed = CHECKSUM_NONE;
2129        } else
2130                cpl = qs->lro_va;
2131
2132        len -= offset;
2133
2134        rx_frag += nr_frags;
2135        rx_frag->page = sd->pg_chunk.page;
2136        rx_frag->page_offset = sd->pg_chunk.offset + offset;
2137        rx_frag->size = len;
2138
2139        skb->len += len;
2140        skb->data_len += len;
2141        skb->truesize += len;
2142        skb_shinfo(skb)->nr_frags++;
2143
2144        if (!complete)
2145                return;
2146
2147        skb_record_rx_queue(skb, qs - &adap->sge.qs[0]);
2148
2149        if (unlikely(cpl->vlan_valid)) {
2150                struct vlan_group *grp = pi->vlan_grp;
2151
2152                if (likely(grp != NULL)) {
2153                        vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan));
2154                        return;
2155                }
2156        }
2157        napi_gro_frags(&qs->napi);
2158}
2159
2160/**
2161 *      handle_rsp_cntrl_info - handles control information in a response
2162 *      @qs: the queue set corresponding to the response
2163 *      @flags: the response control flags
2164 *
2165 *      Handles the control information of an SGE response, such as GTS
2166 *      indications and completion credits for the queue set's Tx queues.
2167 *      HW coalesces credits, we don't do any extra SW coalescing.
2168 */
2169static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
2170{
2171        unsigned int credits;
2172
2173#if USE_GTS
2174        if (flags & F_RSPD_TXQ0_GTS)
2175                clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2176#endif
2177
2178        credits = G_RSPD_TXQ0_CR(flags);
2179        if (credits)
2180                qs->txq[TXQ_ETH].processed += credits;
2181
2182        credits = G_RSPD_TXQ2_CR(flags);
2183        if (credits)
2184                qs->txq[TXQ_CTRL].processed += credits;
2185
2186# if USE_GTS
2187        if (flags & F_RSPD_TXQ1_GTS)
2188                clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2189# endif
2190        credits = G_RSPD_TXQ1_CR(flags);
2191        if (credits)
2192                qs->txq[TXQ_OFLD].processed += credits;
2193}
2194
2195/**
2196 *      check_ring_db - check if we need to ring any doorbells
2197 *      @adapter: the adapter
2198 *      @qs: the queue set whose Tx queues are to be examined
2199 *      @sleeping: indicates which Tx queue sent GTS
2200 *
2201 *      Checks if some of a queue set's Tx queues need to ring their doorbells
2202 *      to resume transmission after idling while they still have unprocessed
2203 *      descriptors.
2204 */
2205static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
2206                          unsigned int sleeping)
2207{
2208        if (sleeping & F_RSPD_TXQ0_GTS) {
2209                struct sge_txq *txq = &qs->txq[TXQ_ETH];
2210
2211                if (txq->cleaned + txq->in_use != txq->processed &&
2212                    !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2213                        set_bit(TXQ_RUNNING, &txq->flags);
2214                        t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2215                                     V_EGRCNTX(txq->cntxt_id));
2216                }
2217        }
2218
2219        if (sleeping & F_RSPD_TXQ1_GTS) {
2220                struct sge_txq *txq = &qs->txq[TXQ_OFLD];
2221
2222                if (txq->cleaned + txq->in_use != txq->processed &&
2223                    !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2224                        set_bit(TXQ_RUNNING, &txq->flags);
2225                        t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2226                                     V_EGRCNTX(txq->cntxt_id));
2227                }
2228        }
2229}
2230
2231/**
2232 *      is_new_response - check if a response is newly written
2233 *      @r: the response descriptor
2234 *      @q: the response queue
2235 *
2236 *      Returns true if a response descriptor contains a yet unprocessed
2237 *      response.
2238 */
2239static inline int is_new_response(const struct rsp_desc *r,
2240                                  const struct sge_rspq *q)
2241{
2242        return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2243}
2244
2245static inline void clear_rspq_bufstate(struct sge_rspq * const q)
2246{
2247        q->pg_skb = NULL;
2248        q->rx_recycle_buf = 0;
2249}
2250
2251#define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2252#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2253                        V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2254                        V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2255                        V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2256
2257/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2258#define NOMEM_INTR_DELAY 2500
2259
2260/**
2261 *      process_responses - process responses from an SGE response queue
2262 *      @adap: the adapter
2263 *      @qs: the queue set to which the response queue belongs
2264 *      @budget: how many responses can be processed in this round
2265 *
2266 *      Process responses from an SGE response queue up to the supplied budget.
2267 *      Responses include received packets as well as credits and other events
2268 *      for the queues that belong to the response queue's queue set.
2269 *      A negative budget is effectively unlimited.
2270 *
2271 *      Additionally choose the interrupt holdoff time for the next interrupt
2272 *      on this queue.  If the system is under memory shortage use a fairly
2273 *      long delay to help recovery.
2274 */
2275static int process_responses(struct adapter *adap, struct sge_qset *qs,
2276                             int budget)
2277{
2278        struct sge_rspq *q = &qs->rspq;
2279        struct rsp_desc *r = &q->desc[q->cidx];
2280        int budget_left = budget;
2281        unsigned int sleeping = 0;
2282        struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2283        int ngathered = 0;
2284
2285        q->next_holdoff = q->holdoff_tmr;
2286
2287        while (likely(budget_left && is_new_response(r, q))) {
2288                int packet_complete, eth, ethpad = 2, lro = qs->lro_enabled;
2289                struct sk_buff *skb = NULL;
2290                u32 len, flags;
2291                __be32 rss_hi, rss_lo;
2292
2293                rmb();
2294                eth = r->rss_hdr.opcode == CPL_RX_PKT;
2295                rss_hi = *(const __be32 *)r;
2296                rss_lo = r->rss_hdr.rss_hash_val;
2297                flags = ntohl(r->flags);
2298
2299                if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2300                        skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2301                        if (!skb)
2302                                goto no_mem;
2303
2304                        memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
2305                        skb->data[0] = CPL_ASYNC_NOTIF;
2306                        rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2307                        q->async_notif++;
2308                } else if (flags & F_RSPD_IMM_DATA_VALID) {
2309                        skb = get_imm_packet(r);
2310                        if (unlikely(!skb)) {
2311no_mem:
2312                                q->next_holdoff = NOMEM_INTR_DELAY;
2313                                q->nomem++;
2314                                /* consume one credit since we tried */
2315                                budget_left--;
2316                                break;
2317                        }
2318                        q->imm_data++;
2319                        ethpad = 0;
2320                } else if ((len = ntohl(r->len_cq)) != 0) {
2321                        struct sge_fl *fl;
2322
2323                        lro &= eth && is_eth_tcp(rss_hi);
2324
2325                        fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2326                        if (fl->use_pages) {
2327                                void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2328
2329                                prefetch(addr);
2330#if L1_CACHE_BYTES < 128
2331                                prefetch(addr + L1_CACHE_BYTES);
2332#endif
2333                                __refill_fl(adap, fl);
2334                                if (lro > 0) {
2335                                        lro_add_page(adap, qs, fl,
2336                                                     G_RSPD_LEN(len),
2337                                                     flags & F_RSPD_EOP);
2338                                         goto next_fl;
2339                                }
2340
2341                                skb = get_packet_pg(adap, fl, q,
2342                                                    G_RSPD_LEN(len),
2343                                                    eth ?
2344                                                    SGE_RX_DROP_THRES : 0);
2345                                q->pg_skb = skb;
2346                        } else
2347                                skb = get_packet(adap, fl, G_RSPD_LEN(len),
2348                                                 eth ? SGE_RX_DROP_THRES : 0);
2349                        if (unlikely(!skb)) {
2350                                if (!eth)
2351                                        goto no_mem;
2352                                q->rx_drops++;
2353                        } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2354                                __skb_pull(skb, 2);
2355next_fl:
2356                        if (++fl->cidx == fl->size)
2357                                fl->cidx = 0;
2358                } else
2359                        q->pure_rsps++;
2360
2361                if (flags & RSPD_CTRL_MASK) {
2362                        sleeping |= flags & RSPD_GTS_MASK;
2363                        handle_rsp_cntrl_info(qs, flags);
2364                }
2365
2366                r++;
2367                if (unlikely(++q->cidx == q->size)) {
2368                        q->cidx = 0;
2369                        q->gen ^= 1;
2370                        r = q->desc;
2371                }
2372                prefetch(r);
2373
2374                if (++q->credits >= (q->size / 4)) {
2375                        refill_rspq(adap, q, q->credits);
2376                        q->credits = 0;
2377                }
2378
2379                packet_complete = flags &
2380                                  (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID |
2381                                   F_RSPD_ASYNC_NOTIF);
2382
2383                if (skb != NULL && packet_complete) {
2384                        if (eth)
2385                                rx_eth(adap, q, skb, ethpad, lro);
2386                        else {
2387                                q->offload_pkts++;
2388                                /* Preserve the RSS info in csum & priority */
2389                                skb->csum = rss_hi;
2390                                skb->priority = rss_lo;
2391                                ngathered = rx_offload(&adap->tdev, q, skb,
2392                                                       offload_skbs,
2393                                                       ngathered);
2394                        }
2395
2396                        if (flags & F_RSPD_EOP)
2397                                clear_rspq_bufstate(q);
2398                }
2399                --budget_left;
2400        }
2401
2402        deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2403
2404        if (sleeping)
2405                check_ring_db(adap, qs, sleeping);
2406
2407        smp_mb();               /* commit Tx queue .processed updates */
2408        if (unlikely(qs->txq_stopped != 0))
2409                restart_tx(qs);
2410
2411        budget -= budget_left;
2412        return budget;
2413}
2414
2415static inline int is_pure_response(const struct rsp_desc *r)
2416{
2417        __be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2418
2419        return (n | r->len_cq) == 0;
2420}
2421
2422/**
2423 *      napi_rx_handler - the NAPI handler for Rx processing
2424 *      @napi: the napi instance
2425 *      @budget: how many packets we can process in this round
2426 *
2427 *      Handler for new data events when using NAPI.
2428 */
2429static int napi_rx_handler(struct napi_struct *napi, int budget)
2430{
2431        struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2432        struct adapter *adap = qs->adap;
2433        int work_done = process_responses(adap, qs, budget);
2434
2435        if (likely(work_done < budget)) {
2436                napi_complete(napi);
2437
2438                /*
2439                 * Because we don't atomically flush the following
2440                 * write it is possible that in very rare cases it can
2441                 * reach the device in a way that races with a new
2442                 * response being written plus an error interrupt
2443                 * causing the NAPI interrupt handler below to return
2444                 * unhandled status to the OS.  To protect against
2445                 * this would require flushing the write and doing
2446                 * both the write and the flush with interrupts off.
2447                 * Way too expensive and unjustifiable given the
2448                 * rarity of the race.
2449                 *
2450                 * The race cannot happen at all with MSI-X.
2451                 */
2452                t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2453                             V_NEWTIMER(qs->rspq.next_holdoff) |
2454                             V_NEWINDEX(qs->rspq.cidx));
2455        }
2456        return work_done;
2457}
2458
2459/*
2460 * Returns true if the device is already scheduled for polling.
2461 */
2462static inline int napi_is_scheduled(struct napi_struct *napi)
2463{
2464        return test_bit(NAPI_STATE_SCHED, &napi->state);
2465}
2466
2467/**
2468 *      process_pure_responses - process pure responses from a response queue
2469 *      @adap: the adapter
2470 *      @qs: the queue set owning the response queue
2471 *      @r: the first pure response to process
2472 *
2473 *      A simpler version of process_responses() that handles only pure (i.e.,
2474 *      non data-carrying) responses.  Such respones are too light-weight to
2475 *      justify calling a softirq under NAPI, so we handle them specially in
2476 *      the interrupt handler.  The function is called with a pointer to a
2477 *      response, which the caller must ensure is a valid pure response.
2478 *
2479 *      Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2480 */
2481static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2482                                  struct rsp_desc *r)
2483{
2484        struct sge_rspq *q = &qs->rspq;
2485        unsigned int sleeping = 0;
2486
2487        do {
2488                u32 flags = ntohl(r->flags);
2489
2490                r++;
2491                if (unlikely(++q->cidx == q->size)) {
2492                        q->cidx = 0;
2493                        q->gen ^= 1;
2494                        r = q->desc;
2495                }
2496                prefetch(r);
2497
2498                if (flags & RSPD_CTRL_MASK) {
2499                        sleeping |= flags & RSPD_GTS_MASK;
2500                        handle_rsp_cntrl_info(qs, flags);
2501                }
2502
2503                q->pure_rsps++;
2504                if (++q->credits >= (q->size / 4)) {
2505                        refill_rspq(adap, q, q->credits);
2506                        q->credits = 0;
2507                }
2508                if (!is_new_response(r, q))
2509                        break;
2510                rmb();
2511        } while (is_pure_response(r));
2512
2513        if (sleeping)
2514                check_ring_db(adap, qs, sleeping);
2515
2516        smp_mb();               /* commit Tx queue .processed updates */
2517        if (unlikely(qs->txq_stopped != 0))
2518                restart_tx(qs);
2519
2520        return is_new_response(r, q);
2521}
2522
2523/**
2524 *      handle_responses - decide what to do with new responses in NAPI mode
2525 *      @adap: the adapter
2526 *      @q: the response queue
2527 *
2528 *      This is used by the NAPI interrupt handlers to decide what to do with
2529 *      new SGE responses.  If there are no new responses it returns -1.  If
2530 *      there are new responses and they are pure (i.e., non-data carrying)
2531 *      it handles them straight in hard interrupt context as they are very
2532 *      cheap and don't deliver any packets.  Finally, if there are any data
2533 *      signaling responses it schedules the NAPI handler.  Returns 1 if it
2534 *      schedules NAPI, 0 if all new responses were pure.
2535 *
2536 *      The caller must ascertain NAPI is not already running.
2537 */
2538static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2539{
2540        struct sge_qset *qs = rspq_to_qset(q);
2541        struct rsp_desc *r = &q->desc[q->cidx];
2542
2543        if (!is_new_response(r, q))
2544                return -1;
2545        rmb();
2546        if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2547                t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2548                             V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2549                return 0;
2550        }
2551        napi_schedule(&qs->napi);
2552        return 1;
2553}
2554
2555/*
2556 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2557 * (i.e., response queue serviced in hard interrupt).
2558 */
2559static irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2560{
2561        struct sge_qset *qs = cookie;
2562        struct adapter *adap = qs->adap;
2563        struct sge_rspq *q = &qs->rspq;
2564
2565        spin_lock(&q->lock);
2566        if (process_responses(adap, qs, -1) == 0)
2567                q->unhandled_irqs++;
2568        t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2569                     V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2570        spin_unlock(&q->lock);
2571        return IRQ_HANDLED;
2572}
2573
2574/*
2575 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2576 * (i.e., response queue serviced by NAPI polling).
2577 */
2578static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2579{
2580        struct sge_qset *qs = cookie;
2581        struct sge_rspq *q = &qs->rspq;
2582
2583        spin_lock(&q->lock);
2584
2585        if (handle_responses(qs->adap, q) < 0)
2586                q->unhandled_irqs++;
2587        spin_unlock(&q->lock);
2588        return IRQ_HANDLED;
2589}
2590
2591/*
2592 * The non-NAPI MSI interrupt handler.  This needs to handle data events from
2593 * SGE response queues as well as error and other async events as they all use
2594 * the same MSI vector.  We use one SGE response queue per port in this mode
2595 * and protect all response queues with queue 0's lock.
2596 */
2597static irqreturn_t t3_intr_msi(int irq, void *cookie)
2598{
2599        int new_packets = 0;
2600        struct adapter *adap = cookie;
2601        struct sge_rspq *q = &adap->sge.qs[0].rspq;
2602
2603        spin_lock(&q->lock);
2604
2605        if (process_responses(adap, &adap->sge.qs[0], -1)) {
2606                t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2607                             V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2608                new_packets = 1;
2609        }
2610
2611        if (adap->params.nports == 2 &&
2612            process_responses(adap, &adap->sge.qs[1], -1)) {
2613                struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2614
2615                t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2616                             V_NEWTIMER(q1->next_holdoff) |
2617                             V_NEWINDEX(q1->cidx));
2618                new_packets = 1;
2619        }
2620
2621        if (!new_packets && t3_slow_intr_handler(adap) == 0)
2622                q->unhandled_irqs++;
2623
2624        spin_unlock(&q->lock);
2625        return IRQ_HANDLED;
2626}
2627
2628static int rspq_check_napi(struct sge_qset *qs)
2629{
2630        struct sge_rspq *q = &qs->rspq;
2631
2632        if (!napi_is_scheduled(&qs->napi) &&
2633            is_new_response(&q->desc[q->cidx], q)) {
2634                napi_schedule(&qs->napi);
2635                return 1;
2636        }
2637        return 0;
2638}
2639
2640/*
2641 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2642 * by NAPI polling).  Handles data events from SGE response queues as well as
2643 * error and other async events as they all use the same MSI vector.  We use
2644 * one SGE response queue per port in this mode and protect all response
2645 * queues with queue 0's lock.
2646 */
2647static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2648{
2649        int new_packets;
2650        struct adapter *adap = cookie;
2651        struct sge_rspq *q = &adap->sge.qs[0].rspq;
2652
2653        spin_lock(&q->lock);
2654
2655        new_packets = rspq_check_napi(&adap->sge.qs[0]);
2656        if (adap->params.nports == 2)
2657                new_packets += rspq_check_napi(&adap->sge.qs[1]);
2658        if (!new_packets && t3_slow_intr_handler(adap) == 0)
2659                q->unhandled_irqs++;
2660
2661        spin_unlock(&q->lock);
2662        return IRQ_HANDLED;
2663}
2664
2665/*
2666 * A helper function that processes responses and issues GTS.
2667 */
2668static inline int process_responses_gts(struct adapter *adap,
2669                                        struct sge_rspq *rq)
2670{
2671        int work;
2672
2673        work = process_responses(adap, rspq_to_qset(rq), -1);
2674        t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2675                     V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2676        return work;
2677}
2678
2679/*
2680 * The legacy INTx interrupt handler.  This needs to handle data events from
2681 * SGE response queues as well as error and other async events as they all use
2682 * the same interrupt pin.  We use one SGE response queue per port in this mode
2683 * and protect all response queues with queue 0's lock.
2684 */
2685static irqreturn_t t3_intr(int irq, void *cookie)
2686{
2687        int work_done, w0, w1;
2688        struct adapter *adap = cookie;
2689        struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2690        struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2691
2692        spin_lock(&q0->lock);
2693
2694        w0 = is_new_response(&q0->desc[q0->cidx], q0);
2695        w1 = adap->params.nports == 2 &&
2696            is_new_response(&q1->desc[q1->cidx], q1);
2697
2698        if (likely(w0 | w1)) {
2699                t3_write_reg(adap, A_PL_CLI, 0);
2700                t3_read_reg(adap, A_PL_CLI);    /* flush */
2701
2702                if (likely(w0))
2703                        process_responses_gts(adap, q0);
2704
2705                if (w1)
2706                        process_responses_gts(adap, q1);
2707
2708                work_done = w0 | w1;
2709        } else
2710                work_done = t3_slow_intr_handler(adap);
2711
2712        spin_unlock(&q0->lock);
2713        return IRQ_RETVAL(work_done != 0);
2714}
2715
2716/*
2717 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2718 * Handles data events from SGE response queues as well as error and other
2719 * async events as they all use the same interrupt pin.  We use one SGE
2720 * response queue per port in this mode and protect all response queues with
2721 * queue 0's lock.
2722 */
2723static irqreturn_t t3b_intr(int irq, void *cookie)
2724{
2725        u32 map;
2726        struct adapter *adap = cookie;
2727        struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2728
2729        t3_write_reg(adap, A_PL_CLI, 0);
2730        map = t3_read_reg(adap, A_SG_DATA_INTR);
2731
2732        if (unlikely(!map))     /* shared interrupt, most likely */
2733                return IRQ_NONE;
2734
2735        spin_lock(&q0->lock);
2736
2737        if (unlikely(map & F_ERRINTR))
2738                t3_slow_intr_handler(adap);
2739
2740        if (likely(map & 1))
2741                process_responses_gts(adap, q0);
2742
2743        if (map & 2)
2744                process_responses_gts(adap, &adap->sge.qs[1].rspq);
2745
2746        spin_unlock(&q0->lock);
2747        return IRQ_HANDLED;
2748}
2749
2750/*
2751 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2752 * Handles data events from SGE response queues as well as error and other
2753 * async events as they all use the same interrupt pin.  We use one SGE
2754 * response queue per port in this mode and protect all response queues with
2755 * queue 0's lock.
2756 */
2757static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2758{
2759        u32 map;
2760        struct adapter *adap = cookie;
2761        struct sge_qset *qs0 = &adap->sge.qs[0];
2762        struct sge_rspq *q0 = &qs0->rspq;
2763
2764        t3_write_reg(adap, A_PL_CLI, 0);
2765        map = t3_read_reg(adap, A_SG_DATA_INTR);
2766
2767        if (unlikely(!map))     /* shared interrupt, most likely */
2768                return IRQ_NONE;
2769
2770        spin_lock(&q0->lock);
2771
2772        if (unlikely(map & F_ERRINTR))
2773                t3_slow_intr_handler(adap);
2774
2775        if (likely(map & 1))
2776                napi_schedule(&qs0->napi);
2777
2778        if (map & 2)
2779                napi_schedule(&adap->sge.qs[1].napi);
2780
2781        spin_unlock(&q0->lock);
2782        return IRQ_HANDLED;
2783}
2784
2785/**
2786 *      t3_intr_handler - select the top-level interrupt handler
2787 *      @adap: the adapter
2788 *      @polling: whether using NAPI to service response queues
2789 *
2790 *      Selects the top-level interrupt handler based on the type of interrupts
2791 *      (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2792 *      response queues.
2793 */
2794irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
2795{
2796        if (adap->flags & USING_MSIX)
2797                return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2798        if (adap->flags & USING_MSI)
2799                return polling ? t3_intr_msi_napi : t3_intr_msi;
2800        if (adap->params.rev > 0)
2801                return polling ? t3b_intr_napi : t3b_intr;
2802        return t3_intr;
2803}
2804
2805#define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2806                    F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2807                    V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2808                    F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2809                    F_HIRCQPARITYERROR)
2810#define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2811#define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2812                      F_RSPQDISABLED)
2813
2814/**
2815 *      t3_sge_err_intr_handler - SGE async event interrupt handler
2816 *      @adapter: the adapter
2817 *
2818 *      Interrupt handler for SGE asynchronous (non-data) events.
2819 */
2820void t3_sge_err_intr_handler(struct adapter *adapter)
2821{
2822        unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE) &
2823                                 ~F_FLEMPTY;
2824
2825        if (status & SGE_PARERR)
2826                CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2827                         status & SGE_PARERR);
2828        if (status & SGE_FRAMINGERR)
2829                CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2830                         status & SGE_FRAMINGERR);
2831
2832        if (status & F_RSPQCREDITOVERFOW)
2833                CH_ALERT(adapter, "SGE response queue credit overflow\n");
2834
2835        if (status & F_RSPQDISABLED) {
2836                v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2837
2838                CH_ALERT(adapter,
2839                         "packet delivered to disabled response queue "
2840                         "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2841        }
2842
2843        if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2844                queue_work(cxgb3_wq, &adapter->db_drop_task);
2845
2846        if (status & (F_HIPRIORITYDBFULL | F_LOPRIORITYDBFULL))
2847                queue_work(cxgb3_wq, &adapter->db_full_task);
2848
2849        if (status & (F_HIPRIORITYDBEMPTY | F_LOPRIORITYDBEMPTY))
2850                queue_work(cxgb3_wq, &adapter->db_empty_task);
2851
2852        t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2853        if (status &  SGE_FATALERR)
2854                t3_fatal_err(adapter);
2855}
2856
2857/**
2858 *      sge_timer_tx - perform periodic maintenance of an SGE qset
2859 *      @data: the SGE queue set to maintain
2860 *
2861 *      Runs periodically from a timer to perform maintenance of an SGE queue
2862 *      set.  It performs two tasks:
2863 *
2864 *      Cleans up any completed Tx descriptors that may still be pending.
2865 *      Normal descriptor cleanup happens when new packets are added to a Tx
2866 *      queue so this timer is relatively infrequent and does any cleanup only
2867 *      if the Tx queue has not seen any new packets in a while.  We make a
2868 *      best effort attempt to reclaim descriptors, in that we don't wait
2869 *      around if we cannot get a queue's lock (which most likely is because
2870 *      someone else is queueing new packets and so will also handle the clean
2871 *      up).  Since control queues use immediate data exclusively we don't
2872 *      bother cleaning them up here.
2873 *
2874 */
2875static void sge_timer_tx(unsigned long data)
2876{
2877        struct sge_qset *qs = (struct sge_qset *)data;
2878        struct port_info *pi = netdev_priv(qs->netdev);
2879        struct adapter *adap = pi->adapter;
2880        unsigned int tbd[SGE_TXQ_PER_SET] = {0, 0};
2881        unsigned long next_period;
2882
2883        if (__netif_tx_trylock(qs->tx_q)) {
2884                tbd[TXQ_ETH] = reclaim_completed_tx(adap, &qs->txq[TXQ_ETH],
2885                                                     TX_RECLAIM_TIMER_CHUNK);
2886                __netif_tx_unlock(qs->tx_q);
2887        }
2888
2889        if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2890                tbd[TXQ_OFLD] = reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD],
2891                                                     TX_RECLAIM_TIMER_CHUNK);
2892                spin_unlock(&qs->txq[TXQ_OFLD].lock);
2893        }
2894
2895        next_period = TX_RECLAIM_PERIOD >>
2896                      (max(tbd[TXQ_ETH], tbd[TXQ_OFLD]) /
2897                      TX_RECLAIM_TIMER_CHUNK);
2898        mod_timer(&qs->tx_reclaim_timer, jiffies + next_period);
2899}
2900
2901/*
2902 *      sge_timer_rx - perform periodic maintenance of an SGE qset
2903 *      @data: the SGE queue set to maintain
2904 *
2905 *      a) Replenishes Rx queues that have run out due to memory shortage.
2906 *      Normally new Rx buffers are added when existing ones are consumed but
2907 *      when out of memory a queue can become empty.  We try to add only a few
2908 *      buffers here, the queue will be replenished fully as these new buffers
2909 *      are used up if memory shortage has subsided.
2910 *
2911 *      b) Return coalesced response queue credits in case a response queue is
2912 *      starved.
2913 *
2914 */
2915static void sge_timer_rx(unsigned long data)
2916{
2917        spinlock_t *lock;
2918        struct sge_qset *qs = (struct sge_qset *)data;
2919        struct port_info *pi = netdev_priv(qs->netdev);
2920        struct adapter *adap = pi->adapter;
2921        u32 status;
2922
2923        lock = adap->params.rev > 0 ?
2924               &qs->rspq.lock : &adap->sge.qs[0].rspq.lock;
2925
2926        if (!spin_trylock_irq(lock))
2927                goto out;
2928
2929        if (napi_is_scheduled(&qs->napi))
2930                goto unlock;
2931
2932        if (adap->params.rev < 4) {
2933                status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2934
2935                if (status & (1 << qs->rspq.cntxt_id)) {
2936                        qs->rspq.starved++;
2937                        if (qs->rspq.credits) {
2938                                qs->rspq.credits--;
2939                                refill_rspq(adap, &qs->rspq, 1);
2940                                qs->rspq.restarted++;
2941                                t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
2942                                             1 << qs->rspq.cntxt_id);
2943                        }
2944                }
2945        }
2946
2947        if (qs->fl[0].credits < qs->fl[0].size)
2948                __refill_fl(adap, &qs->fl[0]);
2949        if (qs->fl[1].credits < qs->fl[1].size)
2950                __refill_fl(adap, &qs->fl[1]);
2951
2952unlock:
2953        spin_unlock_irq(lock);
2954out:
2955        mod_timer(&qs->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD);
2956}
2957
2958/**
2959 *      t3_update_qset_coalesce - update coalescing settings for a queue set
2960 *      @qs: the SGE queue set
2961 *      @p: new queue set parameters
2962 *
2963 *      Update the coalescing settings for an SGE queue set.  Nothing is done
2964 *      if the queue set is not initialized yet.
2965 */
2966void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2967{
2968        qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2969        qs->rspq.polling = p->polling;
2970        qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
2971}
2972
2973/**
2974 *      t3_sge_alloc_qset - initialize an SGE queue set
2975 *      @adapter: the adapter
2976 *      @id: the queue set id
2977 *      @nports: how many Ethernet ports will be using this queue set
2978 *      @irq_vec_idx: the IRQ vector index for response queue interrupts
2979 *      @p: configuration parameters for this queue set
2980 *      @ntxq: number of Tx queues for the queue set
2981 *      @netdev: net device associated with this queue set
2982 *      @netdevq: net device TX queue associated with this queue set
2983 *
2984 *      Allocate resources and initialize an SGE queue set.  A queue set
2985 *      comprises a response queue, two Rx free-buffer queues, and up to 3
2986 *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
2987 *      queue, offload queue, and control queue.
2988 */
2989int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2990                      int irq_vec_idx, const struct qset_params *p,
2991                      int ntxq, struct net_device *dev,
2992                      struct netdev_queue *netdevq)
2993{
2994        int i, avail, ret = -ENOMEM;
2995        struct sge_qset *q = &adapter->sge.qs[id];
2996
2997        init_qset_cntxt(q, id);
2998        setup_timer(&q->tx_reclaim_timer, sge_timer_tx, (unsigned long)q);
2999        setup_timer(&q->rx_reclaim_timer, sge_timer_rx, (unsigned long)q);
3000
3001        q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
3002                                   sizeof(struct rx_desc),
3003                                   sizeof(struct rx_sw_desc),
3004                                   &q->fl[0].phys_addr, &q->fl[0].sdesc);
3005        if (!q->fl[0].desc)
3006                goto err;
3007
3008        q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
3009                                   sizeof(struct rx_desc),
3010                                   sizeof(struct rx_sw_desc),
3011                                   &q->fl[1].phys_addr, &q->fl[1].sdesc);
3012        if (!q->fl[1].desc)
3013                goto err;
3014
3015        q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
3016                                  sizeof(struct rsp_desc), 0,
3017                                  &q->rspq.phys_addr, NULL);
3018        if (!q->rspq.desc)
3019                goto err;
3020
3021        for (i = 0; i < ntxq; ++i) {
3022                /*
3023                 * The control queue always uses immediate data so does not
3024                 * need to keep track of any sk_buffs.
3025                 */
3026                size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
3027
3028                q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
3029                                            sizeof(struct tx_desc), sz,
3030                                            &q->txq[i].phys_addr,
3031                                            &q->txq[i].sdesc);
3032                if (!q->txq[i].desc)
3033                        goto err;
3034
3035                q->txq[i].gen = 1;
3036                q->txq[i].size = p->txq_size[i];
3037                spin_lock_init(&q->txq[i].lock);
3038                skb_queue_head_init(&q->txq[i].sendq);
3039        }
3040
3041        tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
3042                     (unsigned long)q);
3043        tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
3044                     (unsigned long)q);
3045
3046        q->fl[0].gen = q->fl[1].gen = 1;
3047        q->fl[0].size = p->fl_size;
3048        q->fl[1].size = p->jumbo_size;
3049
3050        q->rspq.gen = 1;
3051        q->rspq.size = p->rspq_size;
3052        spin_lock_init(&q->rspq.lock);
3053        skb_queue_head_init(&q->rspq.rx_queue);
3054
3055        q->txq[TXQ_ETH].stop_thres = nports *
3056            flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
3057
3058#if FL0_PG_CHUNK_SIZE > 0
3059        q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
3060#else
3061        q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
3062#endif
3063#if FL1_PG_CHUNK_SIZE > 0
3064        q->fl[1].buf_size = FL1_PG_CHUNK_SIZE;
3065#else
3066        q->fl[1].buf_size = is_offload(adapter) ?
3067                (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
3068                MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
3069#endif
3070
3071        q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
3072        q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
3073        q->fl[0].order = FL0_PG_ORDER;
3074        q->fl[1].order = FL1_PG_ORDER;
3075        q->fl[0].alloc_size = FL0_PG_ALLOC_SIZE;
3076        q->fl[1].alloc_size = FL1_PG_ALLOC_SIZE;
3077
3078        spin_lock_irq(&adapter->sge.reg_lock);
3079
3080        /* FL threshold comparison uses < */
3081        ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
3082                                   q->rspq.phys_addr, q->rspq.size,
3083                                   q->fl[0].buf_size - SGE_PG_RSVD, 1, 0);
3084        if (ret)
3085                goto err_unlock;
3086
3087        for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
3088                ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
3089                                          q->fl[i].phys_addr, q->fl[i].size,
3090                                          q->fl[i].buf_size - SGE_PG_RSVD,
3091                                          p->cong_thres, 1, 0);
3092                if (ret)
3093                        goto err_unlock;
3094        }
3095
3096        ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
3097                                 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
3098                                 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
3099                                 1, 0);
3100        if (ret)
3101                goto err_unlock;
3102
3103        if (ntxq > 1) {
3104                ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
3105                                         USE_GTS, SGE_CNTXT_OFLD, id,
3106                                         q->txq[TXQ_OFLD].phys_addr,
3107                                         q->txq[TXQ_OFLD].size, 0, 1, 0);
3108                if (ret)
3109                        goto err_unlock;
3110        }
3111
3112        if (ntxq > 2) {
3113                ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
3114                                         SGE_CNTXT_CTRL, id,
3115                                         q->txq[TXQ_CTRL].phys_addr,
3116                                         q->txq[TXQ_CTRL].size,
3117                                         q->txq[TXQ_CTRL].token, 1, 0);
3118                if (ret)
3119                        goto err_unlock;
3120        }
3121
3122        spin_unlock_irq(&adapter->sge.reg_lock);
3123
3124        q->adap = adapter;
3125        q->netdev = dev;
3126        q->tx_q = netdevq;
3127        t3_update_qset_coalesce(q, p);
3128
3129        avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,
3130                          GFP_KERNEL | __GFP_COMP);
3131        if (!avail) {
3132                CH_ALERT(adapter, "free list queue 0 initialization failed\n");
3133                goto err;
3134        }
3135        if (avail < q->fl[0].size)
3136                CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
3137                        avail);
3138
3139        avail = refill_fl(adapter, &q->fl[1], q->fl[1].size,
3140                          GFP_KERNEL | __GFP_COMP);
3141        if (avail < q->fl[1].size)
3142                CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
3143                        avail);
3144        refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
3145
3146        t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
3147                     V_NEWTIMER(q->rspq.holdoff_tmr));
3148
3149        return 0;
3150
3151err_unlock:
3152        spin_unlock_irq(&adapter->sge.reg_lock);
3153err:
3154        t3_free_qset(adapter, q);
3155        return ret;
3156}
3157
3158/**
3159 *      t3_start_sge_timers - start SGE timer call backs
3160 *      @adap: the adapter
3161 *
3162 *      Starts each SGE queue set's timer call back
3163 */
3164void t3_start_sge_timers(struct adapter *adap)
3165{
3166        int i;
3167
3168        for (i = 0; i < SGE_QSETS; ++i) {
3169                struct sge_qset *q = &adap->sge.qs[i];
3170
3171        if (q->tx_reclaim_timer.function)
3172                mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
3173
3174        if (q->rx_reclaim_timer.function)
3175                mod_timer(&q->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD);
3176        }
3177}
3178
3179/**
3180 *      t3_stop_sge_timers - stop SGE timer call backs
3181 *      @adap: the adapter
3182 *
3183 *      Stops each SGE queue set's timer call back
3184 */
3185void t3_stop_sge_timers(struct adapter *adap)
3186{
3187        int i;
3188
3189        for (i = 0; i < SGE_QSETS; ++i) {
3190                struct sge_qset *q = &adap->sge.qs[i];
3191
3192                if (q->tx_reclaim_timer.function)
3193                        del_timer_sync(&q->tx_reclaim_timer);
3194                if (q->rx_reclaim_timer.function)
3195                        del_timer_sync(&q->rx_reclaim_timer);
3196        }
3197}
3198
3199/**
3200 *      t3_free_sge_resources - free SGE resources
3201 *      @adap: the adapter
3202 *
3203 *      Frees resources used by the SGE queue sets.
3204 */
3205void t3_free_sge_resources(struct adapter *adap)
3206{
3207        int i;
3208
3209        for (i = 0; i < SGE_QSETS; ++i)
3210                t3_free_qset(adap, &adap->sge.qs[i]);
3211}
3212
3213/**
3214 *      t3_sge_start - enable SGE
3215 *      @adap: the adapter
3216 *
3217 *      Enables the SGE for DMAs.  This is the last step in starting packet
3218 *      transfers.
3219 */
3220void t3_sge_start(struct adapter *adap)
3221{
3222        t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
3223}
3224
3225/**
3226 *      t3_sge_stop - disable SGE operation
3227 *      @adap: the adapter
3228 *
3229 *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
3230 *      from error interrupts) or from normal process context.  In the latter
3231 *      case it also disables any pending queue restart tasklets.  Note that
3232 *      if it is called in interrupt context it cannot disable the restart
3233 *      tasklets as it cannot wait, however the tasklets will have no effect
3234 *      since the doorbells are disabled and the driver will call this again
3235 *      later from process context, at which time the tasklets will be stopped
3236 *      if they are still running.
3237 */
3238void t3_sge_stop(struct adapter *adap)
3239{
3240        t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
3241        if (!in_interrupt()) {
3242                int i;
3243
3244                for (i = 0; i < SGE_QSETS; ++i) {
3245                        struct sge_qset *qs = &adap->sge.qs[i];
3246
3247                        tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
3248                        tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
3249                }
3250        }
3251}
3252
3253/**
3254 *      t3_sge_init - initialize SGE
3255 *      @adap: the adapter
3256 *      @p: the SGE parameters
3257 *
3258 *      Performs SGE initialization needed every time after a chip reset.
3259 *      We do not initialize any of the queue sets here, instead the driver
3260 *      top-level must request those individually.  We also do not enable DMA
3261 *      here, that should be done after the queues have been set up.
3262 */
3263void t3_sge_init(struct adapter *adap, struct sge_params *p)
3264{
3265        unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
3266
3267        ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
3268            F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
3269            V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
3270            V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
3271#if SGE_NUM_GENBITS == 1
3272        ctrl |= F_EGRGENCTRL;
3273#endif
3274        if (adap->params.rev > 0) {
3275                if (!(adap->flags & (USING_MSIX | USING_MSI)))
3276                        ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
3277        }
3278        t3_write_reg(adap, A_SG_CONTROL, ctrl);
3279        t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
3280                     V_LORCQDRBTHRSH(512));
3281        t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
3282        t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
3283                     V_TIMEOUT(200 * core_ticks_per_usec(adap)));
3284        t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
3285                     adap->params.rev < T3_REV_C ? 1000 : 500);
3286        t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
3287        t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
3288        t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
3289        t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
3290        t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
3291}
3292
3293/**
3294 *      t3_sge_prep - one-time SGE initialization
3295 *      @adap: the associated adapter
3296 *      @p: SGE parameters
3297 *
3298 *      Performs one-time initialization of SGE SW state.  Includes determining
3299 *      defaults for the assorted SGE parameters, which admins can change until
3300 *      they are used to initialize the SGE.
3301 */
3302void t3_sge_prep(struct adapter *adap, struct sge_params *p)
3303{
3304        int i;
3305
3306        p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
3307            SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
3308
3309        for (i = 0; i < SGE_QSETS; ++i) {
3310                struct qset_params *q = p->qset + i;
3311
3312                q->polling = adap->params.rev > 0;
3313                q->coalesce_usecs = 5;
3314                q->rspq_size = 1024;
3315                q->fl_size = 1024;
3316                q->jumbo_size = 512;
3317                q->txq_size[TXQ_ETH] = 1024;
3318                q->txq_size[TXQ_OFLD] = 1024;
3319                q->txq_size[TXQ_CTRL] = 256;
3320                q->cong_thres = 0;
3321        }
3322
3323        spin_lock_init(&adap->sge.reg_lock);
3324}
3325