linux/drivers/net/ethernet/chelsio/cxgb3/sge.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/skbuff.h>
  33#include <linux/netdevice.h>
  34#include <linux/etherdevice.h>
  35#include <linux/if_vlan.h>
  36#include <linux/ip.h>
  37#include <linux/tcp.h>
  38#include <linux/dma-mapping.h>
  39#include <linux/slab.h>
  40#include <linux/prefetch.h>
  41#include <net/arp.h>
  42#include "common.h"
  43#include "regs.h"
  44#include "sge_defs.h"
  45#include "t3_cpl.h"
  46#include "firmware_exports.h"
  47#include "cxgb3_offload.h"
  48
  49#define USE_GTS 0
  50
  51#define SGE_RX_SM_BUF_SIZE 1536
  52
  53#define SGE_RX_COPY_THRES  256
  54#define SGE_RX_PULL_LEN    128
  55
  56#define SGE_PG_RSVD SMP_CACHE_BYTES
  57/*
  58 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
  59 * It must be a divisor of PAGE_SIZE.  If set to 0 FL0 will use sk_buffs
  60 * directly.
  61 */
  62#define FL0_PG_CHUNK_SIZE  2048
  63#define FL0_PG_ORDER 0
  64#define FL0_PG_ALLOC_SIZE (PAGE_SIZE << FL0_PG_ORDER)
  65#define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
  66#define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
  67#define FL1_PG_ALLOC_SIZE (PAGE_SIZE << FL1_PG_ORDER)
  68
  69#define SGE_RX_DROP_THRES 16
  70#define RX_RECLAIM_PERIOD (HZ/4)
  71
  72/*
  73 * Max number of Rx buffers we replenish at a time.
  74 */
  75#define MAX_RX_REFILL 16U
  76/*
  77 * Period of the Tx buffer reclaim timer.  This timer does not need to run
  78 * frequently as Tx buffers are usually reclaimed by new Tx packets.
  79 */
  80#define TX_RECLAIM_PERIOD (HZ / 4)
  81#define TX_RECLAIM_TIMER_CHUNK 64U
  82#define TX_RECLAIM_CHUNK 16U
  83
  84/* WR size in bytes */
  85#define WR_LEN (WR_FLITS * 8)
  86
  87/*
  88 * Types of Tx queues in each queue set.  Order here matters, do not change.
  89 */
  90enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
  91
  92/* Values for sge_txq.flags */
  93enum {
  94        TXQ_RUNNING = 1 << 0,   /* fetch engine is running */
  95        TXQ_LAST_PKT_DB = 1 << 1,       /* last packet rang the doorbell */
  96};
  97
  98struct tx_desc {
  99        __be64 flit[TX_DESC_FLITS];
 100};
 101
 102struct rx_desc {
 103        __be32 addr_lo;
 104        __be32 len_gen;
 105        __be32 gen2;
 106        __be32 addr_hi;
 107};
 108
 109struct tx_sw_desc {             /* SW state per Tx descriptor */
 110        struct sk_buff *skb;
 111        u8 eop;       /* set if last descriptor for packet */
 112        u8 addr_idx;  /* buffer index of first SGL entry in descriptor */
 113        u8 fragidx;   /* first page fragment associated with descriptor */
 114        s8 sflit;     /* start flit of first SGL entry in descriptor */
 115};
 116
 117struct rx_sw_desc {                /* SW state per Rx descriptor */
 118        union {
 119                struct sk_buff *skb;
 120                struct fl_pg_chunk pg_chunk;
 121        };
 122        DEFINE_DMA_UNMAP_ADDR(dma_addr);
 123};
 124
 125struct rsp_desc {               /* response queue descriptor */
 126        struct rss_header rss_hdr;
 127        __be32 flags;
 128        __be32 len_cq;
 129        struct_group(immediate,
 130                u8 imm_data[47];
 131                u8 intr_gen;
 132        );
 133};
 134
 135/*
 136 * Holds unmapping information for Tx packets that need deferred unmapping.
 137 * This structure lives at skb->head and must be allocated by callers.
 138 */
 139struct deferred_unmap_info {
 140        struct pci_dev *pdev;
 141        dma_addr_t addr[MAX_SKB_FRAGS + 1];
 142};
 143
 144/*
 145 * Maps a number of flits to the number of Tx descriptors that can hold them.
 146 * The formula is
 147 *
 148 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
 149 *
 150 * HW allows up to 4 descriptors to be combined into a WR.
 151 */
 152static u8 flit_desc_map[] = {
 153        0,
 154#if SGE_NUM_GENBITS == 1
 155        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 156        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 157        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 158        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
 159#elif SGE_NUM_GENBITS == 2
 160        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 161        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 162        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 163        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 164#else
 165# error "SGE_NUM_GENBITS must be 1 or 2"
 166#endif
 167};
 168
 169static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
 170{
 171        return container_of(q, struct sge_qset, fl[qidx]);
 172}
 173
 174static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
 175{
 176        return container_of(q, struct sge_qset, rspq);
 177}
 178
 179static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
 180{
 181        return container_of(q, struct sge_qset, txq[qidx]);
 182}
 183
 184/**
 185 *      refill_rspq - replenish an SGE response queue
 186 *      @adapter: the adapter
 187 *      @q: the response queue to replenish
 188 *      @credits: how many new responses to make available
 189 *
 190 *      Replenishes a response queue by making the supplied number of responses
 191 *      available to HW.
 192 */
 193static inline void refill_rspq(struct adapter *adapter,
 194                               const struct sge_rspq *q, unsigned int credits)
 195{
 196        rmb();
 197        t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
 198                     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 199}
 200
 201/**
 202 *      need_skb_unmap - does the platform need unmapping of sk_buffs?
 203 *
 204 *      Returns true if the platform needs sk_buff unmapping.  The compiler
 205 *      optimizes away unnecessary code if this returns true.
 206 */
 207static inline int need_skb_unmap(void)
 208{
 209#ifdef CONFIG_NEED_DMA_MAP_STATE
 210        return 1;
 211#else
 212        return 0;
 213#endif
 214}
 215
 216/**
 217 *      unmap_skb - unmap a packet main body and its page fragments
 218 *      @skb: the packet
 219 *      @q: the Tx queue containing Tx descriptors for the packet
 220 *      @cidx: index of Tx descriptor
 221 *      @pdev: the PCI device
 222 *
 223 *      Unmap the main body of an sk_buff and its page fragments, if any.
 224 *      Because of the fairly complicated structure of our SGLs and the desire
 225 *      to conserve space for metadata, the information necessary to unmap an
 226 *      sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
 227 *      descriptors (the physical addresses of the various data buffers), and
 228 *      the SW descriptor state (assorted indices).  The send functions
 229 *      initialize the indices for the first packet descriptor so we can unmap
 230 *      the buffers held in the first Tx descriptor here, and we have enough
 231 *      information at this point to set the state for the next Tx descriptor.
 232 *
 233 *      Note that it is possible to clean up the first descriptor of a packet
 234 *      before the send routines have written the next descriptors, but this
 235 *      race does not cause any problem.  We just end up writing the unmapping
 236 *      info for the descriptor first.
 237 */
 238static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
 239                             unsigned int cidx, struct pci_dev *pdev)
 240{
 241        const struct sg_ent *sgp;
 242        struct tx_sw_desc *d = &q->sdesc[cidx];
 243        int nfrags, frag_idx, curflit, j = d->addr_idx;
 244
 245        sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
 246        frag_idx = d->fragidx;
 247
 248        if (frag_idx == 0 && skb_headlen(skb)) {
 249                dma_unmap_single(&pdev->dev, be64_to_cpu(sgp->addr[0]),
 250                                 skb_headlen(skb), DMA_TO_DEVICE);
 251                j = 1;
 252        }
 253
 254        curflit = d->sflit + 1 + j;
 255        nfrags = skb_shinfo(skb)->nr_frags;
 256
 257        while (frag_idx < nfrags && curflit < WR_FLITS) {
 258                dma_unmap_page(&pdev->dev, be64_to_cpu(sgp->addr[j]),
 259                               skb_frag_size(&skb_shinfo(skb)->frags[frag_idx]),
 260                               DMA_TO_DEVICE);
 261                j ^= 1;
 262                if (j == 0) {
 263                        sgp++;
 264                        curflit++;
 265                }
 266                curflit++;
 267                frag_idx++;
 268        }
 269
 270        if (frag_idx < nfrags) {   /* SGL continues into next Tx descriptor */
 271                d = cidx + 1 == q->size ? q->sdesc : d + 1;
 272                d->fragidx = frag_idx;
 273                d->addr_idx = j;
 274                d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
 275        }
 276}
 277
 278/**
 279 *      free_tx_desc - reclaims Tx descriptors and their buffers
 280 *      @adapter: the adapter
 281 *      @q: the Tx queue to reclaim descriptors from
 282 *      @n: the number of descriptors to reclaim
 283 *
 284 *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 285 *      Tx buffers.  Called with the Tx queue lock held.
 286 */
 287static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
 288                         unsigned int n)
 289{
 290        struct tx_sw_desc *d;
 291        struct pci_dev *pdev = adapter->pdev;
 292        unsigned int cidx = q->cidx;
 293
 294        const int need_unmap = need_skb_unmap() &&
 295                               q->cntxt_id >= FW_TUNNEL_SGEEC_START;
 296
 297        d = &q->sdesc[cidx];
 298        while (n--) {
 299                if (d->skb) {   /* an SGL is present */
 300                        if (need_unmap)
 301                                unmap_skb(d->skb, q, cidx, pdev);
 302                        if (d->eop) {
 303                                dev_consume_skb_any(d->skb);
 304                                d->skb = NULL;
 305                        }
 306                }
 307                ++d;
 308                if (++cidx == q->size) {
 309                        cidx = 0;
 310                        d = q->sdesc;
 311                }
 312        }
 313        q->cidx = cidx;
 314}
 315
 316/**
 317 *      reclaim_completed_tx - reclaims completed Tx descriptors
 318 *      @adapter: the adapter
 319 *      @q: the Tx queue to reclaim completed descriptors from
 320 *      @chunk: maximum number of descriptors to reclaim
 321 *
 322 *      Reclaims Tx descriptors that the SGE has indicated it has processed,
 323 *      and frees the associated buffers if possible.  Called with the Tx
 324 *      queue's lock held.
 325 */
 326static inline unsigned int reclaim_completed_tx(struct adapter *adapter,
 327                                                struct sge_txq *q,
 328                                                unsigned int chunk)
 329{
 330        unsigned int reclaim = q->processed - q->cleaned;
 331
 332        reclaim = min(chunk, reclaim);
 333        if (reclaim) {
 334                free_tx_desc(adapter, q, reclaim);
 335                q->cleaned += reclaim;
 336                q->in_use -= reclaim;
 337        }
 338        return q->processed - q->cleaned;
 339}
 340
 341/**
 342 *      should_restart_tx - are there enough resources to restart a Tx queue?
 343 *      @q: the Tx queue
 344 *
 345 *      Checks if there are enough descriptors to restart a suspended Tx queue.
 346 */
 347static inline int should_restart_tx(const struct sge_txq *q)
 348{
 349        unsigned int r = q->processed - q->cleaned;
 350
 351        return q->in_use - r < (q->size >> 1);
 352}
 353
 354static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q,
 355                          struct rx_sw_desc *d)
 356{
 357        if (q->use_pages && d->pg_chunk.page) {
 358                (*d->pg_chunk.p_cnt)--;
 359                if (!*d->pg_chunk.p_cnt)
 360                        dma_unmap_page(&pdev->dev, d->pg_chunk.mapping,
 361                                       q->alloc_size, DMA_FROM_DEVICE);
 362
 363                put_page(d->pg_chunk.page);
 364                d->pg_chunk.page = NULL;
 365        } else {
 366                dma_unmap_single(&pdev->dev, dma_unmap_addr(d, dma_addr),
 367                                 q->buf_size, DMA_FROM_DEVICE);
 368                kfree_skb(d->skb);
 369                d->skb = NULL;
 370        }
 371}
 372
 373/**
 374 *      free_rx_bufs - free the Rx buffers on an SGE free list
 375 *      @pdev: the PCI device associated with the adapter
 376 *      @q: the SGE free list to clean up
 377 *
 378 *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
 379 *      this queue should be stopped before calling this function.
 380 */
 381static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
 382{
 383        unsigned int cidx = q->cidx;
 384
 385        while (q->credits--) {
 386                struct rx_sw_desc *d = &q->sdesc[cidx];
 387
 388
 389                clear_rx_desc(pdev, q, d);
 390                if (++cidx == q->size)
 391                        cidx = 0;
 392        }
 393
 394        if (q->pg_chunk.page) {
 395                __free_pages(q->pg_chunk.page, q->order);
 396                q->pg_chunk.page = NULL;
 397        }
 398}
 399
 400/**
 401 *      add_one_rx_buf - add a packet buffer to a free-buffer list
 402 *      @va:  buffer start VA
 403 *      @len: the buffer length
 404 *      @d: the HW Rx descriptor to write
 405 *      @sd: the SW Rx descriptor to write
 406 *      @gen: the generation bit value
 407 *      @pdev: the PCI device associated with the adapter
 408 *
 409 *      Add a buffer of the given length to the supplied HW and SW Rx
 410 *      descriptors.
 411 */
 412static inline int add_one_rx_buf(void *va, unsigned int len,
 413                                 struct rx_desc *d, struct rx_sw_desc *sd,
 414                                 unsigned int gen, struct pci_dev *pdev)
 415{
 416        dma_addr_t mapping;
 417
 418        mapping = dma_map_single(&pdev->dev, va, len, DMA_FROM_DEVICE);
 419        if (unlikely(dma_mapping_error(&pdev->dev, mapping)))
 420                return -ENOMEM;
 421
 422        dma_unmap_addr_set(sd, dma_addr, mapping);
 423
 424        d->addr_lo = cpu_to_be32(mapping);
 425        d->addr_hi = cpu_to_be32((u64) mapping >> 32);
 426        dma_wmb();
 427        d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
 428        d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
 429        return 0;
 430}
 431
 432static inline int add_one_rx_chunk(dma_addr_t mapping, struct rx_desc *d,
 433                                   unsigned int gen)
 434{
 435        d->addr_lo = cpu_to_be32(mapping);
 436        d->addr_hi = cpu_to_be32((u64) mapping >> 32);
 437        dma_wmb();
 438        d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
 439        d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
 440        return 0;
 441}
 442
 443static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q,
 444                          struct rx_sw_desc *sd, gfp_t gfp,
 445                          unsigned int order)
 446{
 447        if (!q->pg_chunk.page) {
 448                dma_addr_t mapping;
 449
 450                q->pg_chunk.page = alloc_pages(gfp, order);
 451                if (unlikely(!q->pg_chunk.page))
 452                        return -ENOMEM;
 453                q->pg_chunk.va = page_address(q->pg_chunk.page);
 454                q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) -
 455                                    SGE_PG_RSVD;
 456                q->pg_chunk.offset = 0;
 457                mapping = dma_map_page(&adapter->pdev->dev, q->pg_chunk.page,
 458                                       0, q->alloc_size, DMA_FROM_DEVICE);
 459                if (unlikely(dma_mapping_error(&adapter->pdev->dev, mapping))) {
 460                        __free_pages(q->pg_chunk.page, order);
 461                        q->pg_chunk.page = NULL;
 462                        return -EIO;
 463                }
 464                q->pg_chunk.mapping = mapping;
 465        }
 466        sd->pg_chunk = q->pg_chunk;
 467
 468        prefetch(sd->pg_chunk.p_cnt);
 469
 470        q->pg_chunk.offset += q->buf_size;
 471        if (q->pg_chunk.offset == (PAGE_SIZE << order))
 472                q->pg_chunk.page = NULL;
 473        else {
 474                q->pg_chunk.va += q->buf_size;
 475                get_page(q->pg_chunk.page);
 476        }
 477
 478        if (sd->pg_chunk.offset == 0)
 479                *sd->pg_chunk.p_cnt = 1;
 480        else
 481                *sd->pg_chunk.p_cnt += 1;
 482
 483        return 0;
 484}
 485
 486static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
 487{
 488        if (q->pend_cred >= q->credits / 4) {
 489                q->pend_cred = 0;
 490                wmb();
 491                t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
 492        }
 493}
 494
 495/**
 496 *      refill_fl - refill an SGE free-buffer list
 497 *      @adap: the adapter
 498 *      @q: the free-list to refill
 499 *      @n: the number of new buffers to allocate
 500 *      @gfp: the gfp flags for allocating new buffers
 501 *
 502 *      (Re)populate an SGE free-buffer list with up to @n new packet buffers,
 503 *      allocated with the supplied gfp flags.  The caller must assure that
 504 *      @n does not exceed the queue's capacity.
 505 */
 506static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
 507{
 508        struct rx_sw_desc *sd = &q->sdesc[q->pidx];
 509        struct rx_desc *d = &q->desc[q->pidx];
 510        unsigned int count = 0;
 511
 512        while (n--) {
 513                dma_addr_t mapping;
 514                int err;
 515
 516                if (q->use_pages) {
 517                        if (unlikely(alloc_pg_chunk(adap, q, sd, gfp,
 518                                                    q->order))) {
 519nomem:                          q->alloc_failed++;
 520                                break;
 521                        }
 522                        mapping = sd->pg_chunk.mapping + sd->pg_chunk.offset;
 523                        dma_unmap_addr_set(sd, dma_addr, mapping);
 524
 525                        add_one_rx_chunk(mapping, d, q->gen);
 526                        dma_sync_single_for_device(&adap->pdev->dev, mapping,
 527                                                   q->buf_size - SGE_PG_RSVD,
 528                                                   DMA_FROM_DEVICE);
 529                } else {
 530                        void *buf_start;
 531
 532                        struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
 533                        if (!skb)
 534                                goto nomem;
 535
 536                        sd->skb = skb;
 537                        buf_start = skb->data;
 538                        err = add_one_rx_buf(buf_start, q->buf_size, d, sd,
 539                                             q->gen, adap->pdev);
 540                        if (unlikely(err)) {
 541                                clear_rx_desc(adap->pdev, q, sd);
 542                                break;
 543                        }
 544                }
 545
 546                d++;
 547                sd++;
 548                if (++q->pidx == q->size) {
 549                        q->pidx = 0;
 550                        q->gen ^= 1;
 551                        sd = q->sdesc;
 552                        d = q->desc;
 553                }
 554                count++;
 555        }
 556
 557        q->credits += count;
 558        q->pend_cred += count;
 559        ring_fl_db(adap, q);
 560
 561        return count;
 562}
 563
 564static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
 565{
 566        refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits),
 567                  GFP_ATOMIC | __GFP_COMP);
 568}
 569
 570/**
 571 *      recycle_rx_buf - recycle a receive buffer
 572 *      @adap: the adapter
 573 *      @q: the SGE free list
 574 *      @idx: index of buffer to recycle
 575 *
 576 *      Recycles the specified buffer on the given free list by adding it at
 577 *      the next available slot on the list.
 578 */
 579static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
 580                           unsigned int idx)
 581{
 582        struct rx_desc *from = &q->desc[idx];
 583        struct rx_desc *to = &q->desc[q->pidx];
 584
 585        q->sdesc[q->pidx] = q->sdesc[idx];
 586        to->addr_lo = from->addr_lo;    /* already big endian */
 587        to->addr_hi = from->addr_hi;    /* likewise */
 588        dma_wmb();
 589        to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
 590        to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
 591
 592        if (++q->pidx == q->size) {
 593                q->pidx = 0;
 594                q->gen ^= 1;
 595        }
 596
 597        q->credits++;
 598        q->pend_cred++;
 599        ring_fl_db(adap, q);
 600}
 601
 602/**
 603 *      alloc_ring - allocate resources for an SGE descriptor ring
 604 *      @pdev: the PCI device
 605 *      @nelem: the number of descriptors
 606 *      @elem_size: the size of each descriptor
 607 *      @sw_size: the size of the SW state associated with each ring element
 608 *      @phys: the physical address of the allocated ring
 609 *      @metadata: address of the array holding the SW state for the ring
 610 *
 611 *      Allocates resources for an SGE descriptor ring, such as Tx queues,
 612 *      free buffer lists, or response queues.  Each SGE ring requires
 613 *      space for its HW descriptors plus, optionally, space for the SW state
 614 *      associated with each HW entry (the metadata).  The function returns
 615 *      three values: the virtual address for the HW ring (the return value
 616 *      of the function), the physical address of the HW ring, and the address
 617 *      of the SW ring.
 618 */
 619static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
 620                        size_t sw_size, dma_addr_t * phys, void *metadata)
 621{
 622        size_t len = nelem * elem_size;
 623        void *s = NULL;
 624        void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
 625
 626        if (!p)
 627                return NULL;
 628        if (sw_size && metadata) {
 629                s = kcalloc(nelem, sw_size, GFP_KERNEL);
 630
 631                if (!s) {
 632                        dma_free_coherent(&pdev->dev, len, p, *phys);
 633                        return NULL;
 634                }
 635                *(void **)metadata = s;
 636        }
 637        return p;
 638}
 639
 640/**
 641 *      t3_reset_qset - reset a sge qset
 642 *      @q: the queue set
 643 *
 644 *      Reset the qset structure.
 645 *      the NAPI structure is preserved in the event of
 646 *      the qset's reincarnation, for example during EEH recovery.
 647 */
 648static void t3_reset_qset(struct sge_qset *q)
 649{
 650        if (q->adap &&
 651            !(q->adap->flags & NAPI_INIT)) {
 652                memset(q, 0, sizeof(*q));
 653                return;
 654        }
 655
 656        q->adap = NULL;
 657        memset(&q->rspq, 0, sizeof(q->rspq));
 658        memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
 659        memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
 660        q->txq_stopped = 0;
 661        q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
 662        q->rx_reclaim_timer.function = NULL;
 663        q->nomem = 0;
 664        napi_free_frags(&q->napi);
 665}
 666
 667
 668/**
 669 *      t3_free_qset - free the resources of an SGE queue set
 670 *      @adapter: the adapter owning the queue set
 671 *      @q: the queue set
 672 *
 673 *      Release the HW and SW resources associated with an SGE queue set, such
 674 *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 675 *      queue set must be quiesced prior to calling this.
 676 */
 677static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
 678{
 679        int i;
 680        struct pci_dev *pdev = adapter->pdev;
 681
 682        for (i = 0; i < SGE_RXQ_PER_SET; ++i)
 683                if (q->fl[i].desc) {
 684                        spin_lock_irq(&adapter->sge.reg_lock);
 685                        t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
 686                        spin_unlock_irq(&adapter->sge.reg_lock);
 687                        free_rx_bufs(pdev, &q->fl[i]);
 688                        kfree(q->fl[i].sdesc);
 689                        dma_free_coherent(&pdev->dev,
 690                                          q->fl[i].size *
 691                                          sizeof(struct rx_desc), q->fl[i].desc,
 692                                          q->fl[i].phys_addr);
 693                }
 694
 695        for (i = 0; i < SGE_TXQ_PER_SET; ++i)
 696                if (q->txq[i].desc) {
 697                        spin_lock_irq(&adapter->sge.reg_lock);
 698                        t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
 699                        spin_unlock_irq(&adapter->sge.reg_lock);
 700                        if (q->txq[i].sdesc) {
 701                                free_tx_desc(adapter, &q->txq[i],
 702                                             q->txq[i].in_use);
 703                                kfree(q->txq[i].sdesc);
 704                        }
 705                        dma_free_coherent(&pdev->dev,
 706                                          q->txq[i].size *
 707                                          sizeof(struct tx_desc),
 708                                          q->txq[i].desc, q->txq[i].phys_addr);
 709                        __skb_queue_purge(&q->txq[i].sendq);
 710                }
 711
 712        if (q->rspq.desc) {
 713                spin_lock_irq(&adapter->sge.reg_lock);
 714                t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
 715                spin_unlock_irq(&adapter->sge.reg_lock);
 716                dma_free_coherent(&pdev->dev,
 717                                  q->rspq.size * sizeof(struct rsp_desc),
 718                                  q->rspq.desc, q->rspq.phys_addr);
 719        }
 720
 721        t3_reset_qset(q);
 722}
 723
 724/**
 725 *      init_qset_cntxt - initialize an SGE queue set context info
 726 *      @qs: the queue set
 727 *      @id: the queue set id
 728 *
 729 *      Initializes the TIDs and context ids for the queues of a queue set.
 730 */
 731static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
 732{
 733        qs->rspq.cntxt_id = id;
 734        qs->fl[0].cntxt_id = 2 * id;
 735        qs->fl[1].cntxt_id = 2 * id + 1;
 736        qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 737        qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 738        qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 739        qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 740        qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 741}
 742
 743/**
 744 *      sgl_len - calculates the size of an SGL of the given capacity
 745 *      @n: the number of SGL entries
 746 *
 747 *      Calculates the number of flits needed for a scatter/gather list that
 748 *      can hold the given number of entries.
 749 */
 750static inline unsigned int sgl_len(unsigned int n)
 751{
 752        /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
 753        return (3 * n) / 2 + (n & 1);
 754}
 755
 756/**
 757 *      flits_to_desc - returns the num of Tx descriptors for the given flits
 758 *      @n: the number of flits
 759 *
 760 *      Calculates the number of Tx descriptors needed for the supplied number
 761 *      of flits.
 762 */
 763static inline unsigned int flits_to_desc(unsigned int n)
 764{
 765        BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
 766        return flit_desc_map[n];
 767}
 768
 769/**
 770 *      get_packet - return the next ingress packet buffer from a free list
 771 *      @adap: the adapter that received the packet
 772 *      @fl: the SGE free list holding the packet
 773 *      @len: the packet length including any SGE padding
 774 *      @drop_thres: # of remaining buffers before we start dropping packets
 775 *
 776 *      Get the next packet from a free list and complete setup of the
 777 *      sk_buff.  If the packet is small we make a copy and recycle the
 778 *      original buffer, otherwise we use the original buffer itself.  If a
 779 *      positive drop threshold is supplied packets are dropped and their
 780 *      buffers recycled if (a) the number of remaining buffers is under the
 781 *      threshold and the packet is too big to copy, or (b) the packet should
 782 *      be copied but there is no memory for the copy.
 783 */
 784static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
 785                                  unsigned int len, unsigned int drop_thres)
 786{
 787        struct sk_buff *skb = NULL;
 788        struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
 789
 790        prefetch(sd->skb->data);
 791        fl->credits--;
 792
 793        if (len <= SGE_RX_COPY_THRES) {
 794                skb = alloc_skb(len, GFP_ATOMIC);
 795                if (likely(skb != NULL)) {
 796                        __skb_put(skb, len);
 797                        dma_sync_single_for_cpu(&adap->pdev->dev,
 798                                                dma_unmap_addr(sd, dma_addr),
 799                                                len, DMA_FROM_DEVICE);
 800                        memcpy(skb->data, sd->skb->data, len);
 801                        dma_sync_single_for_device(&adap->pdev->dev,
 802                                                   dma_unmap_addr(sd, dma_addr),
 803                                                   len, DMA_FROM_DEVICE);
 804                } else if (!drop_thres)
 805                        goto use_orig_buf;
 806recycle:
 807                recycle_rx_buf(adap, fl, fl->cidx);
 808                return skb;
 809        }
 810
 811        if (unlikely(fl->credits < drop_thres) &&
 812            refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits - 1),
 813                      GFP_ATOMIC | __GFP_COMP) == 0)
 814                goto recycle;
 815
 816use_orig_buf:
 817        dma_unmap_single(&adap->pdev->dev, dma_unmap_addr(sd, dma_addr),
 818                         fl->buf_size, DMA_FROM_DEVICE);
 819        skb = sd->skb;
 820        skb_put(skb, len);
 821        __refill_fl(adap, fl);
 822        return skb;
 823}
 824
 825/**
 826 *      get_packet_pg - return the next ingress packet buffer from a free list
 827 *      @adap: the adapter that received the packet
 828 *      @fl: the SGE free list holding the packet
 829 *      @q: the queue
 830 *      @len: the packet length including any SGE padding
 831 *      @drop_thres: # of remaining buffers before we start dropping packets
 832 *
 833 *      Get the next packet from a free list populated with page chunks.
 834 *      If the packet is small we make a copy and recycle the original buffer,
 835 *      otherwise we attach the original buffer as a page fragment to a fresh
 836 *      sk_buff.  If a positive drop threshold is supplied packets are dropped
 837 *      and their buffers recycled if (a) the number of remaining buffers is
 838 *      under the threshold and the packet is too big to copy, or (b) there's
 839 *      no system memory.
 840 *
 841 *      Note: this function is similar to @get_packet but deals with Rx buffers
 842 *      that are page chunks rather than sk_buffs.
 843 */
 844static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
 845                                     struct sge_rspq *q, unsigned int len,
 846                                     unsigned int drop_thres)
 847{
 848        struct sk_buff *newskb, *skb;
 849        struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
 850
 851        dma_addr_t dma_addr = dma_unmap_addr(sd, dma_addr);
 852
 853        newskb = skb = q->pg_skb;
 854        if (!skb && (len <= SGE_RX_COPY_THRES)) {
 855                newskb = alloc_skb(len, GFP_ATOMIC);
 856                if (likely(newskb != NULL)) {
 857                        __skb_put(newskb, len);
 858                        dma_sync_single_for_cpu(&adap->pdev->dev, dma_addr,
 859                                                len, DMA_FROM_DEVICE);
 860                        memcpy(newskb->data, sd->pg_chunk.va, len);
 861                        dma_sync_single_for_device(&adap->pdev->dev, dma_addr,
 862                                                   len, DMA_FROM_DEVICE);
 863                } else if (!drop_thres)
 864                        return NULL;
 865recycle:
 866                fl->credits--;
 867                recycle_rx_buf(adap, fl, fl->cidx);
 868                q->rx_recycle_buf++;
 869                return newskb;
 870        }
 871
 872        if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
 873                goto recycle;
 874
 875        prefetch(sd->pg_chunk.p_cnt);
 876
 877        if (!skb)
 878                newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
 879
 880        if (unlikely(!newskb)) {
 881                if (!drop_thres)
 882                        return NULL;
 883                goto recycle;
 884        }
 885
 886        dma_sync_single_for_cpu(&adap->pdev->dev, dma_addr, len,
 887                                DMA_FROM_DEVICE);
 888        (*sd->pg_chunk.p_cnt)--;
 889        if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
 890                dma_unmap_page(&adap->pdev->dev, sd->pg_chunk.mapping,
 891                               fl->alloc_size, DMA_FROM_DEVICE);
 892        if (!skb) {
 893                __skb_put(newskb, SGE_RX_PULL_LEN);
 894                memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
 895                skb_fill_page_desc(newskb, 0, sd->pg_chunk.page,
 896                                   sd->pg_chunk.offset + SGE_RX_PULL_LEN,
 897                                   len - SGE_RX_PULL_LEN);
 898                newskb->len = len;
 899                newskb->data_len = len - SGE_RX_PULL_LEN;
 900                newskb->truesize += newskb->data_len;
 901        } else {
 902                skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags,
 903                                   sd->pg_chunk.page,
 904                                   sd->pg_chunk.offset, len);
 905                newskb->len += len;
 906                newskb->data_len += len;
 907                newskb->truesize += len;
 908        }
 909
 910        fl->credits--;
 911        /*
 912         * We do not refill FLs here, we let the caller do it to overlap a
 913         * prefetch.
 914         */
 915        return newskb;
 916}
 917
 918/**
 919 *      get_imm_packet - return the next ingress packet buffer from a response
 920 *      @resp: the response descriptor containing the packet data
 921 *
 922 *      Return a packet containing the immediate data of the given response.
 923 */
 924static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
 925{
 926        struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
 927
 928        if (skb) {
 929                __skb_put(skb, IMMED_PKT_SIZE);
 930                BUILD_BUG_ON(IMMED_PKT_SIZE != sizeof(resp->immediate));
 931                skb_copy_to_linear_data(skb, &resp->immediate, IMMED_PKT_SIZE);
 932        }
 933        return skb;
 934}
 935
 936/**
 937 *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 938 *      @skb: the packet
 939 *
 940 *      Returns the number of Tx descriptors needed for the given Ethernet
 941 *      packet.  Ethernet packets require addition of WR and CPL headers.
 942 */
 943static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
 944{
 945        unsigned int flits;
 946
 947        if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
 948                return 1;
 949
 950        flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
 951        if (skb_shinfo(skb)->gso_size)
 952                flits++;
 953        return flits_to_desc(flits);
 954}
 955
 956/*      map_skb - map a packet main body and its page fragments
 957 *      @pdev: the PCI device
 958 *      @skb: the packet
 959 *      @addr: placeholder to save the mapped addresses
 960 *
 961 *      map the main body of an sk_buff and its page fragments, if any.
 962 */
 963static int map_skb(struct pci_dev *pdev, const struct sk_buff *skb,
 964                   dma_addr_t *addr)
 965{
 966        const skb_frag_t *fp, *end;
 967        const struct skb_shared_info *si;
 968
 969        if (skb_headlen(skb)) {
 970                *addr = dma_map_single(&pdev->dev, skb->data,
 971                                       skb_headlen(skb), DMA_TO_DEVICE);
 972                if (dma_mapping_error(&pdev->dev, *addr))
 973                        goto out_err;
 974                addr++;
 975        }
 976
 977        si = skb_shinfo(skb);
 978        end = &si->frags[si->nr_frags];
 979
 980        for (fp = si->frags; fp < end; fp++) {
 981                *addr = skb_frag_dma_map(&pdev->dev, fp, 0, skb_frag_size(fp),
 982                                         DMA_TO_DEVICE);
 983                if (dma_mapping_error(&pdev->dev, *addr))
 984                        goto unwind;
 985                addr++;
 986        }
 987        return 0;
 988
 989unwind:
 990        while (fp-- > si->frags)
 991                dma_unmap_page(&pdev->dev, *--addr, skb_frag_size(fp),
 992                               DMA_TO_DEVICE);
 993
 994        dma_unmap_single(&pdev->dev, addr[-1], skb_headlen(skb),
 995                         DMA_TO_DEVICE);
 996out_err:
 997        return -ENOMEM;
 998}
 999
1000/**
1001 *      write_sgl - populate a scatter/gather list for a packet
1002 *      @skb: the packet
1003 *      @sgp: the SGL to populate
1004 *      @start: start address of skb main body data to include in the SGL
1005 *      @len: length of skb main body data to include in the SGL
1006 *      @addr: the list of the mapped addresses
1007 *
1008 *      Copies the scatter/gather list for the buffers that make up a packet
1009 *      and returns the SGL size in 8-byte words.  The caller must size the SGL
1010 *      appropriately.
1011 */
1012static inline unsigned int write_sgl(const struct sk_buff *skb,
1013                                     struct sg_ent *sgp, unsigned char *start,
1014                                     unsigned int len, const dma_addr_t *addr)
1015{
1016        unsigned int i, j = 0, k = 0, nfrags;
1017
1018        if (len) {
1019                sgp->len[0] = cpu_to_be32(len);
1020                sgp->addr[j++] = cpu_to_be64(addr[k++]);
1021        }
1022
1023        nfrags = skb_shinfo(skb)->nr_frags;
1024        for (i = 0; i < nfrags; i++) {
1025                const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1026
1027                sgp->len[j] = cpu_to_be32(skb_frag_size(frag));
1028                sgp->addr[j] = cpu_to_be64(addr[k++]);
1029                j ^= 1;
1030                if (j == 0)
1031                        ++sgp;
1032        }
1033        if (j)
1034                sgp->len[j] = 0;
1035        return ((nfrags + (len != 0)) * 3) / 2 + j;
1036}
1037
1038/**
1039 *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1040 *      @adap: the adapter
1041 *      @q: the Tx queue
1042 *
1043 *      Ring the doorbel if a Tx queue is asleep.  There is a natural race,
1044 *      where the HW is going to sleep just after we checked, however,
1045 *      then the interrupt handler will detect the outstanding TX packet
1046 *      and ring the doorbell for us.
1047 *
1048 *      When GTS is disabled we unconditionally ring the doorbell.
1049 */
1050static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
1051{
1052#if USE_GTS
1053        clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1054        if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1055                set_bit(TXQ_LAST_PKT_DB, &q->flags);
1056                t3_write_reg(adap, A_SG_KDOORBELL,
1057                             F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1058        }
1059#else
1060        wmb();                  /* write descriptors before telling HW */
1061        t3_write_reg(adap, A_SG_KDOORBELL,
1062                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1063#endif
1064}
1065
1066static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
1067{
1068#if SGE_NUM_GENBITS == 2
1069        d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
1070#endif
1071}
1072
1073/**
1074 *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
1075 *      @ndesc: number of Tx descriptors spanned by the SGL
1076 *      @skb: the packet corresponding to the WR
1077 *      @d: first Tx descriptor to be written
1078 *      @pidx: index of above descriptors
1079 *      @q: the SGE Tx queue
1080 *      @sgl: the SGL
1081 *      @flits: number of flits to the start of the SGL in the first descriptor
1082 *      @sgl_flits: the SGL size in flits
1083 *      @gen: the Tx descriptor generation
1084 *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
1085 *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
1086 *
1087 *      Write a work request header and an associated SGL.  If the SGL is
1088 *      small enough to fit into one Tx descriptor it has already been written
1089 *      and we just need to write the WR header.  Otherwise we distribute the
1090 *      SGL across the number of descriptors it spans.
1091 */
1092static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
1093                             struct tx_desc *d, unsigned int pidx,
1094                             const struct sge_txq *q,
1095                             const struct sg_ent *sgl,
1096                             unsigned int flits, unsigned int sgl_flits,
1097                             unsigned int gen, __be32 wr_hi,
1098                             __be32 wr_lo)
1099{
1100        struct work_request_hdr *wrp = (struct work_request_hdr *)d;
1101        struct tx_sw_desc *sd = &q->sdesc[pidx];
1102
1103        sd->skb = skb;
1104        if (need_skb_unmap()) {
1105                sd->fragidx = 0;
1106                sd->addr_idx = 0;
1107                sd->sflit = flits;
1108        }
1109
1110        if (likely(ndesc == 1)) {
1111                sd->eop = 1;
1112                wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1113                                   V_WR_SGLSFLT(flits)) | wr_hi;
1114                dma_wmb();
1115                wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1116                                   V_WR_GEN(gen)) | wr_lo;
1117                wr_gen2(d, gen);
1118        } else {
1119                unsigned int ogen = gen;
1120                const u64 *fp = (const u64 *)sgl;
1121                struct work_request_hdr *wp = wrp;
1122
1123                wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1124                                   V_WR_SGLSFLT(flits)) | wr_hi;
1125
1126                while (sgl_flits) {
1127                        unsigned int avail = WR_FLITS - flits;
1128
1129                        if (avail > sgl_flits)
1130                                avail = sgl_flits;
1131                        memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
1132                        sgl_flits -= avail;
1133                        ndesc--;
1134                        if (!sgl_flits)
1135                                break;
1136
1137                        fp += avail;
1138                        d++;
1139                        sd->eop = 0;
1140                        sd++;
1141                        if (++pidx == q->size) {
1142                                pidx = 0;
1143                                gen ^= 1;
1144                                d = q->desc;
1145                                sd = q->sdesc;
1146                        }
1147
1148                        sd->skb = skb;
1149                        wrp = (struct work_request_hdr *)d;
1150                        wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1151                                           V_WR_SGLSFLT(1)) | wr_hi;
1152                        wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1153                                                        sgl_flits + 1)) |
1154                                           V_WR_GEN(gen)) | wr_lo;
1155                        wr_gen2(d, gen);
1156                        flits = 1;
1157                }
1158                sd->eop = 1;
1159                wrp->wr_hi |= htonl(F_WR_EOP);
1160                dma_wmb();
1161                wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1162                wr_gen2((struct tx_desc *)wp, ogen);
1163                WARN_ON(ndesc != 0);
1164        }
1165}
1166
1167/**
1168 *      write_tx_pkt_wr - write a TX_PKT work request
1169 *      @adap: the adapter
1170 *      @skb: the packet to send
1171 *      @pi: the egress interface
1172 *      @pidx: index of the first Tx descriptor to write
1173 *      @gen: the generation value to use
1174 *      @q: the Tx queue
1175 *      @ndesc: number of descriptors the packet will occupy
1176 *      @compl: the value of the COMPL bit to use
1177 *      @addr: address
1178 *
1179 *      Generate a TX_PKT work request to send the supplied packet.
1180 */
1181static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1182                            const struct port_info *pi,
1183                            unsigned int pidx, unsigned int gen,
1184                            struct sge_txq *q, unsigned int ndesc,
1185                            unsigned int compl, const dma_addr_t *addr)
1186{
1187        unsigned int flits, sgl_flits, cntrl, tso_info;
1188        struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1189        struct tx_desc *d = &q->desc[pidx];
1190        struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1191
1192        cpl->len = htonl(skb->len);
1193        cntrl = V_TXPKT_INTF(pi->port_id);
1194
1195        if (skb_vlan_tag_present(skb))
1196                cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(skb_vlan_tag_get(skb));
1197
1198        tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1199        if (tso_info) {
1200                int eth_type;
1201                struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1202
1203                d->flit[2] = 0;
1204                cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1205                hdr->cntrl = htonl(cntrl);
1206                eth_type = skb_network_offset(skb) == ETH_HLEN ?
1207                    CPL_ETH_II : CPL_ETH_II_VLAN;
1208                tso_info |= V_LSO_ETH_TYPE(eth_type) |
1209                    V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
1210                    V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
1211                hdr->lso_info = htonl(tso_info);
1212                flits = 3;
1213        } else {
1214                cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1215                cntrl |= F_TXPKT_IPCSUM_DIS;    /* SW calculates IP csum */
1216                cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1217                cpl->cntrl = htonl(cntrl);
1218
1219                if (skb->len <= WR_LEN - sizeof(*cpl)) {
1220                        q->sdesc[pidx].skb = NULL;
1221                        if (!skb->data_len)
1222                                skb_copy_from_linear_data(skb, &d->flit[2],
1223                                                          skb->len);
1224                        else
1225                                skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1226
1227                        flits = (skb->len + 7) / 8 + 2;
1228                        cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1229                                              V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1230                                              | F_WR_SOP | F_WR_EOP | compl);
1231                        dma_wmb();
1232                        cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1233                                              V_WR_TID(q->token));
1234                        wr_gen2(d, gen);
1235                        dev_consume_skb_any(skb);
1236                        return;
1237                }
1238
1239                flits = 2;
1240        }
1241
1242        sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1243        sgl_flits = write_sgl(skb, sgp, skb->data, skb_headlen(skb), addr);
1244
1245        write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1246                         htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1247                         htonl(V_WR_TID(q->token)));
1248}
1249
1250static inline void t3_stop_tx_queue(struct netdev_queue *txq,
1251                                    struct sge_qset *qs, struct sge_txq *q)
1252{
1253        netif_tx_stop_queue(txq);
1254        set_bit(TXQ_ETH, &qs->txq_stopped);
1255        q->stops++;
1256}
1257
1258/**
1259 *      t3_eth_xmit - add a packet to the Ethernet Tx queue
1260 *      @skb: the packet
1261 *      @dev: the egress net device
1262 *
1263 *      Add a packet to an SGE Tx queue.  Runs with softirqs disabled.
1264 */
1265netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1266{
1267        int qidx;
1268        unsigned int ndesc, pidx, credits, gen, compl;
1269        const struct port_info *pi = netdev_priv(dev);
1270        struct adapter *adap = pi->adapter;
1271        struct netdev_queue *txq;
1272        struct sge_qset *qs;
1273        struct sge_txq *q;
1274        dma_addr_t addr[MAX_SKB_FRAGS + 1];
1275
1276        /*
1277         * The chip min packet length is 9 octets but play safe and reject
1278         * anything shorter than an Ethernet header.
1279         */
1280        if (unlikely(skb->len < ETH_HLEN)) {
1281                dev_kfree_skb_any(skb);
1282                return NETDEV_TX_OK;
1283        }
1284
1285        qidx = skb_get_queue_mapping(skb);
1286        qs = &pi->qs[qidx];
1287        q = &qs->txq[TXQ_ETH];
1288        txq = netdev_get_tx_queue(dev, qidx);
1289
1290        reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1291
1292        credits = q->size - q->in_use;
1293        ndesc = calc_tx_descs(skb);
1294
1295        if (unlikely(credits < ndesc)) {
1296                t3_stop_tx_queue(txq, qs, q);
1297                dev_err(&adap->pdev->dev,
1298                        "%s: Tx ring %u full while queue awake!\n",
1299                        dev->name, q->cntxt_id & 7);
1300                return NETDEV_TX_BUSY;
1301        }
1302
1303        /* Check if ethernet packet can't be sent as immediate data */
1304        if (skb->len > (WR_LEN - sizeof(struct cpl_tx_pkt))) {
1305                if (unlikely(map_skb(adap->pdev, skb, addr) < 0)) {
1306                        dev_kfree_skb(skb);
1307                        return NETDEV_TX_OK;
1308                }
1309        }
1310
1311        q->in_use += ndesc;
1312        if (unlikely(credits - ndesc < q->stop_thres)) {
1313                t3_stop_tx_queue(txq, qs, q);
1314
1315                if (should_restart_tx(q) &&
1316                    test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1317                        q->restarts++;
1318                        netif_tx_start_queue(txq);
1319                }
1320        }
1321
1322        gen = q->gen;
1323        q->unacked += ndesc;
1324        compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1325        q->unacked &= 7;
1326        pidx = q->pidx;
1327        q->pidx += ndesc;
1328        if (q->pidx >= q->size) {
1329                q->pidx -= q->size;
1330                q->gen ^= 1;
1331        }
1332
1333        /* update port statistics */
1334        if (skb->ip_summed == CHECKSUM_PARTIAL)
1335                qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1336        if (skb_shinfo(skb)->gso_size)
1337                qs->port_stats[SGE_PSTAT_TSO]++;
1338        if (skb_vlan_tag_present(skb))
1339                qs->port_stats[SGE_PSTAT_VLANINS]++;
1340
1341        /*
1342         * We do not use Tx completion interrupts to free DMAd Tx packets.
1343         * This is good for performance but means that we rely on new Tx
1344         * packets arriving to run the destructors of completed packets,
1345         * which open up space in their sockets' send queues.  Sometimes
1346         * we do not get such new packets causing Tx to stall.  A single
1347         * UDP transmitter is a good example of this situation.  We have
1348         * a clean up timer that periodically reclaims completed packets
1349         * but it doesn't run often enough (nor do we want it to) to prevent
1350         * lengthy stalls.  A solution to this problem is to run the
1351         * destructor early, after the packet is queued but before it's DMAd.
1352         * A cons is that we lie to socket memory accounting, but the amount
1353         * of extra memory is reasonable (limited by the number of Tx
1354         * descriptors), the packets do actually get freed quickly by new
1355         * packets almost always, and for protocols like TCP that wait for
1356         * acks to really free up the data the extra memory is even less.
1357         * On the positive side we run the destructors on the sending CPU
1358         * rather than on a potentially different completing CPU, usually a
1359         * good thing.  We also run them without holding our Tx queue lock,
1360         * unlike what reclaim_completed_tx() would otherwise do.
1361         *
1362         * Run the destructor before telling the DMA engine about the packet
1363         * to make sure it doesn't complete and get freed prematurely.
1364         */
1365        if (likely(!skb_shared(skb)))
1366                skb_orphan(skb);
1367
1368        write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl, addr);
1369        check_ring_tx_db(adap, q);
1370        return NETDEV_TX_OK;
1371}
1372
1373/**
1374 *      write_imm - write a packet into a Tx descriptor as immediate data
1375 *      @d: the Tx descriptor to write
1376 *      @skb: the packet
1377 *      @len: the length of packet data to write as immediate data
1378 *      @gen: the generation bit value to write
1379 *
1380 *      Writes a packet as immediate data into a Tx descriptor.  The packet
1381 *      contains a work request at its beginning.  We must write the packet
1382 *      carefully so the SGE doesn't read it accidentally before it's written
1383 *      in its entirety.
1384 */
1385static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1386                             unsigned int len, unsigned int gen)
1387{
1388        struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1389        struct work_request_hdr *to = (struct work_request_hdr *)d;
1390
1391        if (likely(!skb->data_len))
1392                memcpy(&to[1], &from[1], len - sizeof(*from));
1393        else
1394                skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1395
1396        to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1397                                        V_WR_BCNTLFLT(len & 7));
1398        dma_wmb();
1399        to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1400                                        V_WR_LEN((len + 7) / 8));
1401        wr_gen2(d, gen);
1402        kfree_skb(skb);
1403}
1404
1405/**
1406 *      check_desc_avail - check descriptor availability on a send queue
1407 *      @adap: the adapter
1408 *      @q: the send queue
1409 *      @skb: the packet needing the descriptors
1410 *      @ndesc: the number of Tx descriptors needed
1411 *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1412 *
1413 *      Checks if the requested number of Tx descriptors is available on an
1414 *      SGE send queue.  If the queue is already suspended or not enough
1415 *      descriptors are available the packet is queued for later transmission.
1416 *      Must be called with the Tx queue locked.
1417 *
1418 *      Returns 0 if enough descriptors are available, 1 if there aren't
1419 *      enough descriptors and the packet has been queued, and 2 if the caller
1420 *      needs to retry because there weren't enough descriptors at the
1421 *      beginning of the call but some freed up in the mean time.
1422 */
1423static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1424                                   struct sk_buff *skb, unsigned int ndesc,
1425                                   unsigned int qid)
1426{
1427        if (unlikely(!skb_queue_empty(&q->sendq))) {
1428              addq_exit:__skb_queue_tail(&q->sendq, skb);
1429                return 1;
1430        }
1431        if (unlikely(q->size - q->in_use < ndesc)) {
1432                struct sge_qset *qs = txq_to_qset(q, qid);
1433
1434                set_bit(qid, &qs->txq_stopped);
1435                smp_mb__after_atomic();
1436
1437                if (should_restart_tx(q) &&
1438                    test_and_clear_bit(qid, &qs->txq_stopped))
1439                        return 2;
1440
1441                q->stops++;
1442                goto addq_exit;
1443        }
1444        return 0;
1445}
1446
1447/**
1448 *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1449 *      @q: the SGE control Tx queue
1450 *
1451 *      This is a variant of reclaim_completed_tx() that is used for Tx queues
1452 *      that send only immediate data (presently just the control queues) and
1453 *      thus do not have any sk_buffs to release.
1454 */
1455static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1456{
1457        unsigned int reclaim = q->processed - q->cleaned;
1458
1459        q->in_use -= reclaim;
1460        q->cleaned += reclaim;
1461}
1462
1463static inline int immediate(const struct sk_buff *skb)
1464{
1465        return skb->len <= WR_LEN;
1466}
1467
1468/**
1469 *      ctrl_xmit - send a packet through an SGE control Tx queue
1470 *      @adap: the adapter
1471 *      @q: the control queue
1472 *      @skb: the packet
1473 *
1474 *      Send a packet through an SGE control Tx queue.  Packets sent through
1475 *      a control queue must fit entirely as immediate data in a single Tx
1476 *      descriptor and have no page fragments.
1477 */
1478static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1479                     struct sk_buff *skb)
1480{
1481        int ret;
1482        struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1483
1484        if (unlikely(!immediate(skb))) {
1485                WARN_ON(1);
1486                dev_kfree_skb(skb);
1487                return NET_XMIT_SUCCESS;
1488        }
1489
1490        wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1491        wrp->wr_lo = htonl(V_WR_TID(q->token));
1492
1493        spin_lock(&q->lock);
1494      again:reclaim_completed_tx_imm(q);
1495
1496        ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1497        if (unlikely(ret)) {
1498                if (ret == 1) {
1499                        spin_unlock(&q->lock);
1500                        return NET_XMIT_CN;
1501                }
1502                goto again;
1503        }
1504
1505        write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1506
1507        q->in_use++;
1508        if (++q->pidx >= q->size) {
1509                q->pidx = 0;
1510                q->gen ^= 1;
1511        }
1512        spin_unlock(&q->lock);
1513        wmb();
1514        t3_write_reg(adap, A_SG_KDOORBELL,
1515                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1516        return NET_XMIT_SUCCESS;
1517}
1518
1519/**
1520 *      restart_ctrlq - restart a suspended control queue
1521 *      @w: pointer to the work associated with this handler
1522 *
1523 *      Resumes transmission on a suspended Tx control queue.
1524 */
1525static void restart_ctrlq(struct work_struct *w)
1526{
1527        struct sk_buff *skb;
1528        struct sge_qset *qs = container_of(w, struct sge_qset,
1529                                           txq[TXQ_CTRL].qresume_task);
1530        struct sge_txq *q = &qs->txq[TXQ_CTRL];
1531
1532        spin_lock(&q->lock);
1533      again:reclaim_completed_tx_imm(q);
1534
1535        while (q->in_use < q->size &&
1536               (skb = __skb_dequeue(&q->sendq)) != NULL) {
1537
1538                write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1539
1540                if (++q->pidx >= q->size) {
1541                        q->pidx = 0;
1542                        q->gen ^= 1;
1543                }
1544                q->in_use++;
1545        }
1546
1547        if (!skb_queue_empty(&q->sendq)) {
1548                set_bit(TXQ_CTRL, &qs->txq_stopped);
1549                smp_mb__after_atomic();
1550
1551                if (should_restart_tx(q) &&
1552                    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1553                        goto again;
1554                q->stops++;
1555        }
1556
1557        spin_unlock(&q->lock);
1558        wmb();
1559        t3_write_reg(qs->adap, A_SG_KDOORBELL,
1560                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1561}
1562
1563/*
1564 * Send a management message through control queue 0
1565 */
1566int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1567{
1568        int ret;
1569        local_bh_disable();
1570        ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1571        local_bh_enable();
1572
1573        return ret;
1574}
1575
1576/**
1577 *      deferred_unmap_destructor - unmap a packet when it is freed
1578 *      @skb: the packet
1579 *
1580 *      This is the packet destructor used for Tx packets that need to remain
1581 *      mapped until they are freed rather than until their Tx descriptors are
1582 *      freed.
1583 */
1584static void deferred_unmap_destructor(struct sk_buff *skb)
1585{
1586        int i;
1587        const dma_addr_t *p;
1588        const struct skb_shared_info *si;
1589        const struct deferred_unmap_info *dui;
1590
1591        dui = (struct deferred_unmap_info *)skb->head;
1592        p = dui->addr;
1593
1594        if (skb_tail_pointer(skb) - skb_transport_header(skb))
1595                dma_unmap_single(&dui->pdev->dev, *p++,
1596                                 skb_tail_pointer(skb) - skb_transport_header(skb),
1597                                 DMA_TO_DEVICE);
1598
1599        si = skb_shinfo(skb);
1600        for (i = 0; i < si->nr_frags; i++)
1601                dma_unmap_page(&dui->pdev->dev, *p++,
1602                               skb_frag_size(&si->frags[i]), DMA_TO_DEVICE);
1603}
1604
1605static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1606                                     const struct sg_ent *sgl, int sgl_flits)
1607{
1608        dma_addr_t *p;
1609        struct deferred_unmap_info *dui;
1610
1611        dui = (struct deferred_unmap_info *)skb->head;
1612        dui->pdev = pdev;
1613        for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1614                *p++ = be64_to_cpu(sgl->addr[0]);
1615                *p++ = be64_to_cpu(sgl->addr[1]);
1616        }
1617        if (sgl_flits)
1618                *p = be64_to_cpu(sgl->addr[0]);
1619}
1620
1621/**
1622 *      write_ofld_wr - write an offload work request
1623 *      @adap: the adapter
1624 *      @skb: the packet to send
1625 *      @q: the Tx queue
1626 *      @pidx: index of the first Tx descriptor to write
1627 *      @gen: the generation value to use
1628 *      @ndesc: number of descriptors the packet will occupy
1629 *      @addr: the address
1630 *
1631 *      Write an offload work request to send the supplied packet.  The packet
1632 *      data already carry the work request with most fields populated.
1633 */
1634static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1635                          struct sge_txq *q, unsigned int pidx,
1636                          unsigned int gen, unsigned int ndesc,
1637                          const dma_addr_t *addr)
1638{
1639        unsigned int sgl_flits, flits;
1640        struct work_request_hdr *from;
1641        struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1642        struct tx_desc *d = &q->desc[pidx];
1643
1644        if (immediate(skb)) {
1645                q->sdesc[pidx].skb = NULL;
1646                write_imm(d, skb, skb->len, gen);
1647                return;
1648        }
1649
1650        /* Only TX_DATA builds SGLs */
1651
1652        from = (struct work_request_hdr *)skb->data;
1653        memcpy(&d->flit[1], &from[1],
1654               skb_transport_offset(skb) - sizeof(*from));
1655
1656        flits = skb_transport_offset(skb) / 8;
1657        sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1658        sgl_flits = write_sgl(skb, sgp, skb_transport_header(skb),
1659                              skb_tail_pointer(skb) - skb_transport_header(skb),
1660                              addr);
1661        if (need_skb_unmap()) {
1662                setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1663                skb->destructor = deferred_unmap_destructor;
1664        }
1665
1666        write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1667                         gen, from->wr_hi, from->wr_lo);
1668}
1669
1670/**
1671 *      calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1672 *      @skb: the packet
1673 *
1674 *      Returns the number of Tx descriptors needed for the given offload
1675 *      packet.  These packets are already fully constructed.
1676 */
1677static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1678{
1679        unsigned int flits, cnt;
1680
1681        if (skb->len <= WR_LEN)
1682                return 1;       /* packet fits as immediate data */
1683
1684        flits = skb_transport_offset(skb) / 8;  /* headers */
1685        cnt = skb_shinfo(skb)->nr_frags;
1686        if (skb_tail_pointer(skb) != skb_transport_header(skb))
1687                cnt++;
1688        return flits_to_desc(flits + sgl_len(cnt));
1689}
1690
1691/**
1692 *      ofld_xmit - send a packet through an offload queue
1693 *      @adap: the adapter
1694 *      @q: the Tx offload queue
1695 *      @skb: the packet
1696 *
1697 *      Send an offload packet through an SGE offload queue.
1698 */
1699static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1700                     struct sk_buff *skb)
1701{
1702        int ret;
1703        unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1704
1705        spin_lock(&q->lock);
1706again:  reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1707
1708        ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1709        if (unlikely(ret)) {
1710                if (ret == 1) {
1711                        skb->priority = ndesc;  /* save for restart */
1712                        spin_unlock(&q->lock);
1713                        return NET_XMIT_CN;
1714                }
1715                goto again;
1716        }
1717
1718        if (!immediate(skb) &&
1719            map_skb(adap->pdev, skb, (dma_addr_t *)skb->head)) {
1720                spin_unlock(&q->lock);
1721                return NET_XMIT_SUCCESS;
1722        }
1723
1724        gen = q->gen;
1725        q->in_use += ndesc;
1726        pidx = q->pidx;
1727        q->pidx += ndesc;
1728        if (q->pidx >= q->size) {
1729                q->pidx -= q->size;
1730                q->gen ^= 1;
1731        }
1732        spin_unlock(&q->lock);
1733
1734        write_ofld_wr(adap, skb, q, pidx, gen, ndesc, (dma_addr_t *)skb->head);
1735        check_ring_tx_db(adap, q);
1736        return NET_XMIT_SUCCESS;
1737}
1738
1739/**
1740 *      restart_offloadq - restart a suspended offload queue
1741 *      @w: pointer to the work associated with this handler
1742 *
1743 *      Resumes transmission on a suspended Tx offload queue.
1744 */
1745static void restart_offloadq(struct work_struct *w)
1746{
1747        struct sk_buff *skb;
1748        struct sge_qset *qs = container_of(w, struct sge_qset,
1749                                           txq[TXQ_OFLD].qresume_task);
1750        struct sge_txq *q = &qs->txq[TXQ_OFLD];
1751        const struct port_info *pi = netdev_priv(qs->netdev);
1752        struct adapter *adap = pi->adapter;
1753        unsigned int written = 0;
1754
1755        spin_lock(&q->lock);
1756again:  reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1757
1758        while ((skb = skb_peek(&q->sendq)) != NULL) {
1759                unsigned int gen, pidx;
1760                unsigned int ndesc = skb->priority;
1761
1762                if (unlikely(q->size - q->in_use < ndesc)) {
1763                        set_bit(TXQ_OFLD, &qs->txq_stopped);
1764                        smp_mb__after_atomic();
1765
1766                        if (should_restart_tx(q) &&
1767                            test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1768                                goto again;
1769                        q->stops++;
1770                        break;
1771                }
1772
1773                if (!immediate(skb) &&
1774                    map_skb(adap->pdev, skb, (dma_addr_t *)skb->head))
1775                        break;
1776
1777                gen = q->gen;
1778                q->in_use += ndesc;
1779                pidx = q->pidx;
1780                q->pidx += ndesc;
1781                written += ndesc;
1782                if (q->pidx >= q->size) {
1783                        q->pidx -= q->size;
1784                        q->gen ^= 1;
1785                }
1786                __skb_unlink(skb, &q->sendq);
1787                spin_unlock(&q->lock);
1788
1789                write_ofld_wr(adap, skb, q, pidx, gen, ndesc,
1790                              (dma_addr_t *)skb->head);
1791                spin_lock(&q->lock);
1792        }
1793        spin_unlock(&q->lock);
1794
1795#if USE_GTS
1796        set_bit(TXQ_RUNNING, &q->flags);
1797        set_bit(TXQ_LAST_PKT_DB, &q->flags);
1798#endif
1799        wmb();
1800        if (likely(written))
1801                t3_write_reg(adap, A_SG_KDOORBELL,
1802                             F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1803}
1804
1805/**
1806 *      queue_set - return the queue set a packet should use
1807 *      @skb: the packet
1808 *
1809 *      Maps a packet to the SGE queue set it should use.  The desired queue
1810 *      set is carried in bits 1-3 in the packet's priority.
1811 */
1812static inline int queue_set(const struct sk_buff *skb)
1813{
1814        return skb->priority >> 1;
1815}
1816
1817/**
1818 *      is_ctrl_pkt - return whether an offload packet is a control packet
1819 *      @skb: the packet
1820 *
1821 *      Determines whether an offload packet should use an OFLD or a CTRL
1822 *      Tx queue.  This is indicated by bit 0 in the packet's priority.
1823 */
1824static inline int is_ctrl_pkt(const struct sk_buff *skb)
1825{
1826        return skb->priority & 1;
1827}
1828
1829/**
1830 *      t3_offload_tx - send an offload packet
1831 *      @tdev: the offload device to send to
1832 *      @skb: the packet
1833 *
1834 *      Sends an offload packet.  We use the packet priority to select the
1835 *      appropriate Tx queue as follows: bit 0 indicates whether the packet
1836 *      should be sent as regular or control, bits 1-3 select the queue set.
1837 */
1838int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1839{
1840        struct adapter *adap = tdev2adap(tdev);
1841        struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1842
1843        if (unlikely(is_ctrl_pkt(skb)))
1844                return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1845
1846        return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1847}
1848
1849/**
1850 *      offload_enqueue - add an offload packet to an SGE offload receive queue
1851 *      @q: the SGE response queue
1852 *      @skb: the packet
1853 *
1854 *      Add a new offload packet to an SGE response queue's offload packet
1855 *      queue.  If the packet is the first on the queue it schedules the RX
1856 *      softirq to process the queue.
1857 */
1858static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1859{
1860        int was_empty = skb_queue_empty(&q->rx_queue);
1861
1862        __skb_queue_tail(&q->rx_queue, skb);
1863
1864        if (was_empty) {
1865                struct sge_qset *qs = rspq_to_qset(q);
1866
1867                napi_schedule(&qs->napi);
1868        }
1869}
1870
1871/**
1872 *      deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1873 *      @tdev: the offload device that will be receiving the packets
1874 *      @q: the SGE response queue that assembled the bundle
1875 *      @skbs: the partial bundle
1876 *      @n: the number of packets in the bundle
1877 *
1878 *      Delivers a (partial) bundle of Rx offload packets to an offload device.
1879 */
1880static inline void deliver_partial_bundle(struct t3cdev *tdev,
1881                                          struct sge_rspq *q,
1882                                          struct sk_buff *skbs[], int n)
1883{
1884        if (n) {
1885                q->offload_bundles++;
1886                tdev->recv(tdev, skbs, n);
1887        }
1888}
1889
1890/**
1891 *      ofld_poll - NAPI handler for offload packets in interrupt mode
1892 *      @napi: the network device doing the polling
1893 *      @budget: polling budget
1894 *
1895 *      The NAPI handler for offload packets when a response queue is serviced
1896 *      by the hard interrupt handler, i.e., when it's operating in non-polling
1897 *      mode.  Creates small packet batches and sends them through the offload
1898 *      receive handler.  Batches need to be of modest size as we do prefetches
1899 *      on the packets in each.
1900 */
1901static int ofld_poll(struct napi_struct *napi, int budget)
1902{
1903        struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
1904        struct sge_rspq *q = &qs->rspq;
1905        struct adapter *adapter = qs->adap;
1906        int work_done = 0;
1907
1908        while (work_done < budget) {
1909                struct sk_buff *skb, *tmp, *skbs[RX_BUNDLE_SIZE];
1910                struct sk_buff_head queue;
1911                int ngathered;
1912
1913                spin_lock_irq(&q->lock);
1914                __skb_queue_head_init(&queue);
1915                skb_queue_splice_init(&q->rx_queue, &queue);
1916                if (skb_queue_empty(&queue)) {
1917                        napi_complete_done(napi, work_done);
1918                        spin_unlock_irq(&q->lock);
1919                        return work_done;
1920                }
1921                spin_unlock_irq(&q->lock);
1922
1923                ngathered = 0;
1924                skb_queue_walk_safe(&queue, skb, tmp) {
1925                        if (work_done >= budget)
1926                                break;
1927                        work_done++;
1928
1929                        __skb_unlink(skb, &queue);
1930                        prefetch(skb->data);
1931                        skbs[ngathered] = skb;
1932                        if (++ngathered == RX_BUNDLE_SIZE) {
1933                                q->offload_bundles++;
1934                                adapter->tdev.recv(&adapter->tdev, skbs,
1935                                                   ngathered);
1936                                ngathered = 0;
1937                        }
1938                }
1939                if (!skb_queue_empty(&queue)) {
1940                        /* splice remaining packets back onto Rx queue */
1941                        spin_lock_irq(&q->lock);
1942                        skb_queue_splice(&queue, &q->rx_queue);
1943                        spin_unlock_irq(&q->lock);
1944                }
1945                deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1946        }
1947
1948        return work_done;
1949}
1950
1951/**
1952 *      rx_offload - process a received offload packet
1953 *      @tdev: the offload device receiving the packet
1954 *      @rq: the response queue that received the packet
1955 *      @skb: the packet
1956 *      @rx_gather: a gather list of packets if we are building a bundle
1957 *      @gather_idx: index of the next available slot in the bundle
1958 *
1959 *      Process an ingress offload packet and add it to the offload ingress
1960 *      queue.  Returns the index of the next available slot in the bundle.
1961 */
1962static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1963                             struct sk_buff *skb, struct sk_buff *rx_gather[],
1964                             unsigned int gather_idx)
1965{
1966        skb_reset_mac_header(skb);
1967        skb_reset_network_header(skb);
1968        skb_reset_transport_header(skb);
1969
1970        if (rq->polling) {
1971                rx_gather[gather_idx++] = skb;
1972                if (gather_idx == RX_BUNDLE_SIZE) {
1973                        tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1974                        gather_idx = 0;
1975                        rq->offload_bundles++;
1976                }
1977        } else
1978                offload_enqueue(rq, skb);
1979
1980        return gather_idx;
1981}
1982
1983/**
1984 *      restart_tx - check whether to restart suspended Tx queues
1985 *      @qs: the queue set to resume
1986 *
1987 *      Restarts suspended Tx queues of an SGE queue set if they have enough
1988 *      free resources to resume operation.
1989 */
1990static void restart_tx(struct sge_qset *qs)
1991{
1992        if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1993            should_restart_tx(&qs->txq[TXQ_ETH]) &&
1994            test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1995                qs->txq[TXQ_ETH].restarts++;
1996                if (netif_running(qs->netdev))
1997                        netif_tx_wake_queue(qs->tx_q);
1998        }
1999
2000        if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
2001            should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2002            test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2003                qs->txq[TXQ_OFLD].restarts++;
2004
2005                /* The work can be quite lengthy so we use driver's own queue */
2006                queue_work(cxgb3_wq, &qs->txq[TXQ_OFLD].qresume_task);
2007        }
2008        if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
2009            should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2010            test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2011                qs->txq[TXQ_CTRL].restarts++;
2012
2013                /* The work can be quite lengthy so we use driver's own queue */
2014                queue_work(cxgb3_wq, &qs->txq[TXQ_CTRL].qresume_task);
2015        }
2016}
2017
2018/**
2019 *      cxgb3_arp_process - process an ARP request probing a private IP address
2020 *      @pi: the port info
2021 *      @skb: the skbuff containing the ARP request
2022 *
2023 *      Check if the ARP request is probing the private IP address
2024 *      dedicated to iSCSI, generate an ARP reply if so.
2025 */
2026static void cxgb3_arp_process(struct port_info *pi, struct sk_buff *skb)
2027{
2028        struct net_device *dev = skb->dev;
2029        struct arphdr *arp;
2030        unsigned char *arp_ptr;
2031        unsigned char *sha;
2032        __be32 sip, tip;
2033
2034        if (!dev)
2035                return;
2036
2037        skb_reset_network_header(skb);
2038        arp = arp_hdr(skb);
2039
2040        if (arp->ar_op != htons(ARPOP_REQUEST))
2041                return;
2042
2043        arp_ptr = (unsigned char *)(arp + 1);
2044        sha = arp_ptr;
2045        arp_ptr += dev->addr_len;
2046        memcpy(&sip, arp_ptr, sizeof(sip));
2047        arp_ptr += sizeof(sip);
2048        arp_ptr += dev->addr_len;
2049        memcpy(&tip, arp_ptr, sizeof(tip));
2050
2051        if (tip != pi->iscsi_ipv4addr)
2052                return;
2053
2054        arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
2055                 pi->iscsic.mac_addr, sha);
2056
2057}
2058
2059static inline int is_arp(struct sk_buff *skb)
2060{
2061        return skb->protocol == htons(ETH_P_ARP);
2062}
2063
2064static void cxgb3_process_iscsi_prov_pack(struct port_info *pi,
2065                                        struct sk_buff *skb)
2066{
2067        if (is_arp(skb)) {
2068                cxgb3_arp_process(pi, skb);
2069                return;
2070        }
2071
2072        if (pi->iscsic.recv)
2073                pi->iscsic.recv(pi, skb);
2074
2075}
2076
2077/**
2078 *      rx_eth - process an ingress ethernet packet
2079 *      @adap: the adapter
2080 *      @rq: the response queue that received the packet
2081 *      @skb: the packet
2082 *      @pad: padding
2083 *      @lro: large receive offload
2084 *
2085 *      Process an ingress ethernet packet and deliver it to the stack.
2086 *      The padding is 2 if the packet was delivered in an Rx buffer and 0
2087 *      if it was immediate data in a response.
2088 */
2089static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
2090                   struct sk_buff *skb, int pad, int lro)
2091{
2092        struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
2093        struct sge_qset *qs = rspq_to_qset(rq);
2094        struct port_info *pi;
2095
2096        skb_pull(skb, sizeof(*p) + pad);
2097        skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
2098        pi = netdev_priv(skb->dev);
2099        if ((skb->dev->features & NETIF_F_RXCSUM) && p->csum_valid &&
2100            p->csum == htons(0xffff) && !p->fragment) {
2101                qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2102                skb->ip_summed = CHECKSUM_UNNECESSARY;
2103        } else
2104                skb_checksum_none_assert(skb);
2105        skb_record_rx_queue(skb, qs - &adap->sge.qs[pi->first_qset]);
2106
2107        if (p->vlan_valid) {
2108                qs->port_stats[SGE_PSTAT_VLANEX]++;
2109                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(p->vlan));
2110        }
2111        if (rq->polling) {
2112                if (lro)
2113                        napi_gro_receive(&qs->napi, skb);
2114                else {
2115                        if (unlikely(pi->iscsic.flags))
2116                                cxgb3_process_iscsi_prov_pack(pi, skb);
2117                        netif_receive_skb(skb);
2118                }
2119        } else
2120                netif_rx(skb);
2121}
2122
2123static inline int is_eth_tcp(u32 rss)
2124{
2125        return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE;
2126}
2127
2128/**
2129 *      lro_add_page - add a page chunk to an LRO session
2130 *      @adap: the adapter
2131 *      @qs: the associated queue set
2132 *      @fl: the free list containing the page chunk to add
2133 *      @len: packet length
2134 *      @complete: Indicates the last fragment of a frame
2135 *
2136 *      Add a received packet contained in a page chunk to an existing LRO
2137 *      session.
2138 */
2139static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2140                         struct sge_fl *fl, int len, int complete)
2141{
2142        struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2143        struct port_info *pi = netdev_priv(qs->netdev);
2144        struct sk_buff *skb = NULL;
2145        struct cpl_rx_pkt *cpl;
2146        skb_frag_t *rx_frag;
2147        int nr_frags;
2148        int offset = 0;
2149
2150        if (!qs->nomem) {
2151                skb = napi_get_frags(&qs->napi);
2152                qs->nomem = !skb;
2153        }
2154
2155        fl->credits--;
2156
2157        dma_sync_single_for_cpu(&adap->pdev->dev,
2158                                dma_unmap_addr(sd, dma_addr),
2159                                fl->buf_size - SGE_PG_RSVD, DMA_FROM_DEVICE);
2160
2161        (*sd->pg_chunk.p_cnt)--;
2162        if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
2163                dma_unmap_page(&adap->pdev->dev, sd->pg_chunk.mapping,
2164                               fl->alloc_size, DMA_FROM_DEVICE);
2165
2166        if (!skb) {
2167                put_page(sd->pg_chunk.page);
2168                if (complete)
2169                        qs->nomem = 0;
2170                return;
2171        }
2172
2173        rx_frag = skb_shinfo(skb)->frags;
2174        nr_frags = skb_shinfo(skb)->nr_frags;
2175
2176        if (!nr_frags) {
2177                offset = 2 + sizeof(struct cpl_rx_pkt);
2178                cpl = qs->lro_va = sd->pg_chunk.va + 2;
2179
2180                if ((qs->netdev->features & NETIF_F_RXCSUM) &&
2181                     cpl->csum_valid && cpl->csum == htons(0xffff)) {
2182                        skb->ip_summed = CHECKSUM_UNNECESSARY;
2183                        qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2184                } else
2185                        skb->ip_summed = CHECKSUM_NONE;
2186        } else
2187                cpl = qs->lro_va;
2188
2189        len -= offset;
2190
2191        rx_frag += nr_frags;
2192        __skb_frag_set_page(rx_frag, sd->pg_chunk.page);
2193        skb_frag_off_set(rx_frag, sd->pg_chunk.offset + offset);
2194        skb_frag_size_set(rx_frag, len);
2195
2196        skb->len += len;
2197        skb->data_len += len;
2198        skb->truesize += len;
2199        skb_shinfo(skb)->nr_frags++;
2200
2201        if (!complete)
2202                return;
2203
2204        skb_record_rx_queue(skb, qs - &adap->sge.qs[pi->first_qset]);
2205
2206        if (cpl->vlan_valid) {
2207                qs->port_stats[SGE_PSTAT_VLANEX]++;
2208                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan));
2209        }
2210        napi_gro_frags(&qs->napi);
2211}
2212
2213/**
2214 *      handle_rsp_cntrl_info - handles control information in a response
2215 *      @qs: the queue set corresponding to the response
2216 *      @flags: the response control flags
2217 *
2218 *      Handles the control information of an SGE response, such as GTS
2219 *      indications and completion credits for the queue set's Tx queues.
2220 *      HW coalesces credits, we don't do any extra SW coalescing.
2221 */
2222static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
2223{
2224        unsigned int credits;
2225
2226#if USE_GTS
2227        if (flags & F_RSPD_TXQ0_GTS)
2228                clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2229#endif
2230
2231        credits = G_RSPD_TXQ0_CR(flags);
2232        if (credits)
2233                qs->txq[TXQ_ETH].processed += credits;
2234
2235        credits = G_RSPD_TXQ2_CR(flags);
2236        if (credits)
2237                qs->txq[TXQ_CTRL].processed += credits;
2238
2239# if USE_GTS
2240        if (flags & F_RSPD_TXQ1_GTS)
2241                clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2242# endif
2243        credits = G_RSPD_TXQ1_CR(flags);
2244        if (credits)
2245                qs->txq[TXQ_OFLD].processed += credits;
2246}
2247
2248/**
2249 *      check_ring_db - check if we need to ring any doorbells
2250 *      @adap: the adapter
2251 *      @qs: the queue set whose Tx queues are to be examined
2252 *      @sleeping: indicates which Tx queue sent GTS
2253 *
2254 *      Checks if some of a queue set's Tx queues need to ring their doorbells
2255 *      to resume transmission after idling while they still have unprocessed
2256 *      descriptors.
2257 */
2258static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
2259                          unsigned int sleeping)
2260{
2261        if (sleeping & F_RSPD_TXQ0_GTS) {
2262                struct sge_txq *txq = &qs->txq[TXQ_ETH];
2263
2264                if (txq->cleaned + txq->in_use != txq->processed &&
2265                    !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2266                        set_bit(TXQ_RUNNING, &txq->flags);
2267                        t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2268                                     V_EGRCNTX(txq->cntxt_id));
2269                }
2270        }
2271
2272        if (sleeping & F_RSPD_TXQ1_GTS) {
2273                struct sge_txq *txq = &qs->txq[TXQ_OFLD];
2274
2275                if (txq->cleaned + txq->in_use != txq->processed &&
2276                    !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2277                        set_bit(TXQ_RUNNING, &txq->flags);
2278                        t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2279                                     V_EGRCNTX(txq->cntxt_id));
2280                }
2281        }
2282}
2283
2284/**
2285 *      is_new_response - check if a response is newly written
2286 *      @r: the response descriptor
2287 *      @q: the response queue
2288 *
2289 *      Returns true if a response descriptor contains a yet unprocessed
2290 *      response.
2291 */
2292static inline int is_new_response(const struct rsp_desc *r,
2293                                  const struct sge_rspq *q)
2294{
2295        return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2296}
2297
2298static inline void clear_rspq_bufstate(struct sge_rspq * const q)
2299{
2300        q->pg_skb = NULL;
2301        q->rx_recycle_buf = 0;
2302}
2303
2304#define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2305#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2306                        V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2307                        V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2308                        V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2309
2310/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2311#define NOMEM_INTR_DELAY 2500
2312
2313/**
2314 *      process_responses - process responses from an SGE response queue
2315 *      @adap: the adapter
2316 *      @qs: the queue set to which the response queue belongs
2317 *      @budget: how many responses can be processed in this round
2318 *
2319 *      Process responses from an SGE response queue up to the supplied budget.
2320 *      Responses include received packets as well as credits and other events
2321 *      for the queues that belong to the response queue's queue set.
2322 *      A negative budget is effectively unlimited.
2323 *
2324 *      Additionally choose the interrupt holdoff time for the next interrupt
2325 *      on this queue.  If the system is under memory shortage use a fairly
2326 *      long delay to help recovery.
2327 */
2328static int process_responses(struct adapter *adap, struct sge_qset *qs,
2329                             int budget)
2330{
2331        struct sge_rspq *q = &qs->rspq;
2332        struct rsp_desc *r = &q->desc[q->cidx];
2333        int budget_left = budget;
2334        unsigned int sleeping = 0;
2335        struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2336        int ngathered = 0;
2337
2338        q->next_holdoff = q->holdoff_tmr;
2339
2340        while (likely(budget_left && is_new_response(r, q))) {
2341                int packet_complete, eth, ethpad = 2;
2342                int lro = !!(qs->netdev->features & NETIF_F_GRO);
2343                struct sk_buff *skb = NULL;
2344                u32 len, flags;
2345                __be32 rss_hi, rss_lo;
2346
2347                dma_rmb();
2348                eth = r->rss_hdr.opcode == CPL_RX_PKT;
2349                rss_hi = *(const __be32 *)r;
2350                rss_lo = r->rss_hdr.rss_hash_val;
2351                flags = ntohl(r->flags);
2352
2353                if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2354                        skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2355                        if (!skb)
2356                                goto no_mem;
2357
2358                        __skb_put_data(skb, r, AN_PKT_SIZE);
2359                        skb->data[0] = CPL_ASYNC_NOTIF;
2360                        rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2361                        q->async_notif++;
2362                } else if (flags & F_RSPD_IMM_DATA_VALID) {
2363                        skb = get_imm_packet(r);
2364                        if (unlikely(!skb)) {
2365no_mem:
2366                                q->next_holdoff = NOMEM_INTR_DELAY;
2367                                q->nomem++;
2368                                /* consume one credit since we tried */
2369                                budget_left--;
2370                                break;
2371                        }
2372                        q->imm_data++;
2373                        ethpad = 0;
2374                } else if ((len = ntohl(r->len_cq)) != 0) {
2375                        struct sge_fl *fl;
2376
2377                        lro &= eth && is_eth_tcp(rss_hi);
2378
2379                        fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2380                        if (fl->use_pages) {
2381                                void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2382
2383                                net_prefetch(addr);
2384                                __refill_fl(adap, fl);
2385                                if (lro > 0) {
2386                                        lro_add_page(adap, qs, fl,
2387                                                     G_RSPD_LEN(len),
2388                                                     flags & F_RSPD_EOP);
2389                                        goto next_fl;
2390                                }
2391
2392                                skb = get_packet_pg(adap, fl, q,
2393                                                    G_RSPD_LEN(len),
2394                                                    eth ?
2395                                                    SGE_RX_DROP_THRES : 0);
2396                                q->pg_skb = skb;
2397                        } else
2398                                skb = get_packet(adap, fl, G_RSPD_LEN(len),
2399                                                 eth ? SGE_RX_DROP_THRES : 0);
2400                        if (unlikely(!skb)) {
2401                                if (!eth)
2402                                        goto no_mem;
2403                                q->rx_drops++;
2404                        } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2405                                __skb_pull(skb, 2);
2406next_fl:
2407                        if (++fl->cidx == fl->size)
2408                                fl->cidx = 0;
2409                } else
2410                        q->pure_rsps++;
2411
2412                if (flags & RSPD_CTRL_MASK) {
2413                        sleeping |= flags & RSPD_GTS_MASK;
2414                        handle_rsp_cntrl_info(qs, flags);
2415                }
2416
2417                r++;
2418                if (unlikely(++q->cidx == q->size)) {
2419                        q->cidx = 0;
2420                        q->gen ^= 1;
2421                        r = q->desc;
2422                }
2423                prefetch(r);
2424
2425                if (++q->credits >= (q->size / 4)) {
2426                        refill_rspq(adap, q, q->credits);
2427                        q->credits = 0;
2428                }
2429
2430                packet_complete = flags &
2431                                  (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID |
2432                                   F_RSPD_ASYNC_NOTIF);
2433
2434                if (skb != NULL && packet_complete) {
2435                        if (eth)
2436                                rx_eth(adap, q, skb, ethpad, lro);
2437                        else {
2438                                q->offload_pkts++;
2439                                /* Preserve the RSS info in csum & priority */
2440                                skb->csum = rss_hi;
2441                                skb->priority = rss_lo;
2442                                ngathered = rx_offload(&adap->tdev, q, skb,
2443                                                       offload_skbs,
2444                                                       ngathered);
2445                        }
2446
2447                        if (flags & F_RSPD_EOP)
2448                                clear_rspq_bufstate(q);
2449                }
2450                --budget_left;
2451        }
2452
2453        deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2454
2455        if (sleeping)
2456                check_ring_db(adap, qs, sleeping);
2457
2458        smp_mb();               /* commit Tx queue .processed updates */
2459        if (unlikely(qs->txq_stopped != 0))
2460                restart_tx(qs);
2461
2462        budget -= budget_left;
2463        return budget;
2464}
2465
2466static inline int is_pure_response(const struct rsp_desc *r)
2467{
2468        __be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2469
2470        return (n | r->len_cq) == 0;
2471}
2472
2473/**
2474 *      napi_rx_handler - the NAPI handler for Rx processing
2475 *      @napi: the napi instance
2476 *      @budget: how many packets we can process in this round
2477 *
2478 *      Handler for new data events when using NAPI.
2479 */
2480static int napi_rx_handler(struct napi_struct *napi, int budget)
2481{
2482        struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2483        struct adapter *adap = qs->adap;
2484        int work_done = process_responses(adap, qs, budget);
2485
2486        if (likely(work_done < budget)) {
2487                napi_complete_done(napi, work_done);
2488
2489                /*
2490                 * Because we don't atomically flush the following
2491                 * write it is possible that in very rare cases it can
2492                 * reach the device in a way that races with a new
2493                 * response being written plus an error interrupt
2494                 * causing the NAPI interrupt handler below to return
2495                 * unhandled status to the OS.  To protect against
2496                 * this would require flushing the write and doing
2497                 * both the write and the flush with interrupts off.
2498                 * Way too expensive and unjustifiable given the
2499                 * rarity of the race.
2500                 *
2501                 * The race cannot happen at all with MSI-X.
2502                 */
2503                t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2504                             V_NEWTIMER(qs->rspq.next_holdoff) |
2505                             V_NEWINDEX(qs->rspq.cidx));
2506        }
2507        return work_done;
2508}
2509
2510/*
2511 * Returns true if the device is already scheduled for polling.
2512 */
2513static inline int napi_is_scheduled(struct napi_struct *napi)
2514{
2515        return test_bit(NAPI_STATE_SCHED, &napi->state);
2516}
2517
2518/**
2519 *      process_pure_responses - process pure responses from a response queue
2520 *      @adap: the adapter
2521 *      @qs: the queue set owning the response queue
2522 *      @r: the first pure response to process
2523 *
2524 *      A simpler version of process_responses() that handles only pure (i.e.,
2525 *      non data-carrying) responses.  Such respones are too light-weight to
2526 *      justify calling a softirq under NAPI, so we handle them specially in
2527 *      the interrupt handler.  The function is called with a pointer to a
2528 *      response, which the caller must ensure is a valid pure response.
2529 *
2530 *      Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2531 */
2532static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2533                                  struct rsp_desc *r)
2534{
2535        struct sge_rspq *q = &qs->rspq;
2536        unsigned int sleeping = 0;
2537
2538        do {
2539                u32 flags = ntohl(r->flags);
2540
2541                r++;
2542                if (unlikely(++q->cidx == q->size)) {
2543                        q->cidx = 0;
2544                        q->gen ^= 1;
2545                        r = q->desc;
2546                }
2547                prefetch(r);
2548
2549                if (flags & RSPD_CTRL_MASK) {
2550                        sleeping |= flags & RSPD_GTS_MASK;
2551                        handle_rsp_cntrl_info(qs, flags);
2552                }
2553
2554                q->pure_rsps++;
2555                if (++q->credits >= (q->size / 4)) {
2556                        refill_rspq(adap, q, q->credits);
2557                        q->credits = 0;
2558                }
2559                if (!is_new_response(r, q))
2560                        break;
2561                dma_rmb();
2562        } while (is_pure_response(r));
2563
2564        if (sleeping)
2565                check_ring_db(adap, qs, sleeping);
2566
2567        smp_mb();               /* commit Tx queue .processed updates */
2568        if (unlikely(qs->txq_stopped != 0))
2569                restart_tx(qs);
2570
2571        return is_new_response(r, q);
2572}
2573
2574/**
2575 *      handle_responses - decide what to do with new responses in NAPI mode
2576 *      @adap: the adapter
2577 *      @q: the response queue
2578 *
2579 *      This is used by the NAPI interrupt handlers to decide what to do with
2580 *      new SGE responses.  If there are no new responses it returns -1.  If
2581 *      there are new responses and they are pure (i.e., non-data carrying)
2582 *      it handles them straight in hard interrupt context as they are very
2583 *      cheap and don't deliver any packets.  Finally, if there are any data
2584 *      signaling responses it schedules the NAPI handler.  Returns 1 if it
2585 *      schedules NAPI, 0 if all new responses were pure.
2586 *
2587 *      The caller must ascertain NAPI is not already running.
2588 */
2589static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2590{
2591        struct sge_qset *qs = rspq_to_qset(q);
2592        struct rsp_desc *r = &q->desc[q->cidx];
2593
2594        if (!is_new_response(r, q))
2595                return -1;
2596        dma_rmb();
2597        if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2598                t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2599                             V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2600                return 0;
2601        }
2602        napi_schedule(&qs->napi);
2603        return 1;
2604}
2605
2606/*
2607 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2608 * (i.e., response queue serviced in hard interrupt).
2609 */
2610static irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2611{
2612        struct sge_qset *qs = cookie;
2613        struct adapter *adap = qs->adap;
2614        struct sge_rspq *q = &qs->rspq;
2615
2616        spin_lock(&q->lock);
2617        if (process_responses(adap, qs, -1) == 0)
2618                q->unhandled_irqs++;
2619        t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2620                     V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2621        spin_unlock(&q->lock);
2622        return IRQ_HANDLED;
2623}
2624
2625/*
2626 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2627 * (i.e., response queue serviced by NAPI polling).
2628 */
2629static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2630{
2631        struct sge_qset *qs = cookie;
2632        struct sge_rspq *q = &qs->rspq;
2633
2634        spin_lock(&q->lock);
2635
2636        if (handle_responses(qs->adap, q) < 0)
2637                q->unhandled_irqs++;
2638        spin_unlock(&q->lock);
2639        return IRQ_HANDLED;
2640}
2641
2642/*
2643 * The non-NAPI MSI interrupt handler.  This needs to handle data events from
2644 * SGE response queues as well as error and other async events as they all use
2645 * the same MSI vector.  We use one SGE response queue per port in this mode
2646 * and protect all response queues with queue 0's lock.
2647 */
2648static irqreturn_t t3_intr_msi(int irq, void *cookie)
2649{
2650        int new_packets = 0;
2651        struct adapter *adap = cookie;
2652        struct sge_rspq *q = &adap->sge.qs[0].rspq;
2653
2654        spin_lock(&q->lock);
2655
2656        if (process_responses(adap, &adap->sge.qs[0], -1)) {
2657                t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2658                             V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2659                new_packets = 1;
2660        }
2661
2662        if (adap->params.nports == 2 &&
2663            process_responses(adap, &adap->sge.qs[1], -1)) {
2664                struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2665
2666                t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2667                             V_NEWTIMER(q1->next_holdoff) |
2668                             V_NEWINDEX(q1->cidx));
2669                new_packets = 1;
2670        }
2671
2672        if (!new_packets && t3_slow_intr_handler(adap) == 0)
2673                q->unhandled_irqs++;
2674
2675        spin_unlock(&q->lock);
2676        return IRQ_HANDLED;
2677}
2678
2679static int rspq_check_napi(struct sge_qset *qs)
2680{
2681        struct sge_rspq *q = &qs->rspq;
2682
2683        if (!napi_is_scheduled(&qs->napi) &&
2684            is_new_response(&q->desc[q->cidx], q)) {
2685                napi_schedule(&qs->napi);
2686                return 1;
2687        }
2688        return 0;
2689}
2690
2691/*
2692 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2693 * by NAPI polling).  Handles data events from SGE response queues as well as
2694 * error and other async events as they all use the same MSI vector.  We use
2695 * one SGE response queue per port in this mode and protect all response
2696 * queues with queue 0's lock.
2697 */
2698static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2699{
2700        int new_packets;
2701        struct adapter *adap = cookie;
2702        struct sge_rspq *q = &adap->sge.qs[0].rspq;
2703
2704        spin_lock(&q->lock);
2705
2706        new_packets = rspq_check_napi(&adap->sge.qs[0]);
2707        if (adap->params.nports == 2)
2708                new_packets += rspq_check_napi(&adap->sge.qs[1]);
2709        if (!new_packets && t3_slow_intr_handler(adap) == 0)
2710                q->unhandled_irqs++;
2711
2712        spin_unlock(&q->lock);
2713        return IRQ_HANDLED;
2714}
2715
2716/*
2717 * A helper function that processes responses and issues GTS.
2718 */
2719static inline int process_responses_gts(struct adapter *adap,
2720                                        struct sge_rspq *rq)
2721{
2722        int work;
2723
2724        work = process_responses(adap, rspq_to_qset(rq), -1);
2725        t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2726                     V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2727        return work;
2728}
2729
2730/*
2731 * The legacy INTx interrupt handler.  This needs to handle data events from
2732 * SGE response queues as well as error and other async events as they all use
2733 * the same interrupt pin.  We use one SGE response queue per port in this mode
2734 * and protect all response queues with queue 0's lock.
2735 */
2736static irqreturn_t t3_intr(int irq, void *cookie)
2737{
2738        int work_done, w0, w1;
2739        struct adapter *adap = cookie;
2740        struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2741        struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2742
2743        spin_lock(&q0->lock);
2744
2745        w0 = is_new_response(&q0->desc[q0->cidx], q0);
2746        w1 = adap->params.nports == 2 &&
2747            is_new_response(&q1->desc[q1->cidx], q1);
2748
2749        if (likely(w0 | w1)) {
2750                t3_write_reg(adap, A_PL_CLI, 0);
2751                t3_read_reg(adap, A_PL_CLI);    /* flush */
2752
2753                if (likely(w0))
2754                        process_responses_gts(adap, q0);
2755
2756                if (w1)
2757                        process_responses_gts(adap, q1);
2758
2759                work_done = w0 | w1;
2760        } else
2761                work_done = t3_slow_intr_handler(adap);
2762
2763        spin_unlock(&q0->lock);
2764        return IRQ_RETVAL(work_done != 0);
2765}
2766
2767/*
2768 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2769 * Handles data events from SGE response queues as well as error and other
2770 * async events as they all use the same interrupt pin.  We use one SGE
2771 * response queue per port in this mode and protect all response queues with
2772 * queue 0's lock.
2773 */
2774static irqreturn_t t3b_intr(int irq, void *cookie)
2775{
2776        u32 map;
2777        struct adapter *adap = cookie;
2778        struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2779
2780        t3_write_reg(adap, A_PL_CLI, 0);
2781        map = t3_read_reg(adap, A_SG_DATA_INTR);
2782
2783        if (unlikely(!map))     /* shared interrupt, most likely */
2784                return IRQ_NONE;
2785
2786        spin_lock(&q0->lock);
2787
2788        if (unlikely(map & F_ERRINTR))
2789                t3_slow_intr_handler(adap);
2790
2791        if (likely(map & 1))
2792                process_responses_gts(adap, q0);
2793
2794        if (map & 2)
2795                process_responses_gts(adap, &adap->sge.qs[1].rspq);
2796
2797        spin_unlock(&q0->lock);
2798        return IRQ_HANDLED;
2799}
2800
2801/*
2802 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2803 * Handles data events from SGE response queues as well as error and other
2804 * async events as they all use the same interrupt pin.  We use one SGE
2805 * response queue per port in this mode and protect all response queues with
2806 * queue 0's lock.
2807 */
2808static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2809{
2810        u32 map;
2811        struct adapter *adap = cookie;
2812        struct sge_qset *qs0 = &adap->sge.qs[0];
2813        struct sge_rspq *q0 = &qs0->rspq;
2814
2815        t3_write_reg(adap, A_PL_CLI, 0);
2816        map = t3_read_reg(adap, A_SG_DATA_INTR);
2817
2818        if (unlikely(!map))     /* shared interrupt, most likely */
2819                return IRQ_NONE;
2820
2821        spin_lock(&q0->lock);
2822
2823        if (unlikely(map & F_ERRINTR))
2824                t3_slow_intr_handler(adap);
2825
2826        if (likely(map & 1))
2827                napi_schedule(&qs0->napi);
2828
2829        if (map & 2)
2830                napi_schedule(&adap->sge.qs[1].napi);
2831
2832        spin_unlock(&q0->lock);
2833        return IRQ_HANDLED;
2834}
2835
2836/**
2837 *      t3_intr_handler - select the top-level interrupt handler
2838 *      @adap: the adapter
2839 *      @polling: whether using NAPI to service response queues
2840 *
2841 *      Selects the top-level interrupt handler based on the type of interrupts
2842 *      (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2843 *      response queues.
2844 */
2845irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
2846{
2847        if (adap->flags & USING_MSIX)
2848                return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2849        if (adap->flags & USING_MSI)
2850                return polling ? t3_intr_msi_napi : t3_intr_msi;
2851        if (adap->params.rev > 0)
2852                return polling ? t3b_intr_napi : t3b_intr;
2853        return t3_intr;
2854}
2855
2856#define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2857                    F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2858                    V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2859                    F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2860                    F_HIRCQPARITYERROR)
2861#define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2862#define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2863                      F_RSPQDISABLED)
2864
2865/**
2866 *      t3_sge_err_intr_handler - SGE async event interrupt handler
2867 *      @adapter: the adapter
2868 *
2869 *      Interrupt handler for SGE asynchronous (non-data) events.
2870 */
2871void t3_sge_err_intr_handler(struct adapter *adapter)
2872{
2873        unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE) &
2874                                 ~F_FLEMPTY;
2875
2876        if (status & SGE_PARERR)
2877                CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2878                         status & SGE_PARERR);
2879        if (status & SGE_FRAMINGERR)
2880                CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2881                         status & SGE_FRAMINGERR);
2882
2883        if (status & F_RSPQCREDITOVERFOW)
2884                CH_ALERT(adapter, "SGE response queue credit overflow\n");
2885
2886        if (status & F_RSPQDISABLED) {
2887                v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2888
2889                CH_ALERT(adapter,
2890                         "packet delivered to disabled response queue "
2891                         "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2892        }
2893
2894        if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2895                queue_work(cxgb3_wq, &adapter->db_drop_task);
2896
2897        if (status & (F_HIPRIORITYDBFULL | F_LOPRIORITYDBFULL))
2898                queue_work(cxgb3_wq, &adapter->db_full_task);
2899
2900        if (status & (F_HIPRIORITYDBEMPTY | F_LOPRIORITYDBEMPTY))
2901                queue_work(cxgb3_wq, &adapter->db_empty_task);
2902
2903        t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2904        if (status &  SGE_FATALERR)
2905                t3_fatal_err(adapter);
2906}
2907
2908/**
2909 *      sge_timer_tx - perform periodic maintenance of an SGE qset
2910 *      @t: a timer list containing the SGE queue set to maintain
2911 *
2912 *      Runs periodically from a timer to perform maintenance of an SGE queue
2913 *      set.  It performs two tasks:
2914 *
2915 *      Cleans up any completed Tx descriptors that may still be pending.
2916 *      Normal descriptor cleanup happens when new packets are added to a Tx
2917 *      queue so this timer is relatively infrequent and does any cleanup only
2918 *      if the Tx queue has not seen any new packets in a while.  We make a
2919 *      best effort attempt to reclaim descriptors, in that we don't wait
2920 *      around if we cannot get a queue's lock (which most likely is because
2921 *      someone else is queueing new packets and so will also handle the clean
2922 *      up).  Since control queues use immediate data exclusively we don't
2923 *      bother cleaning them up here.
2924 *
2925 */
2926static void sge_timer_tx(struct timer_list *t)
2927{
2928        struct sge_qset *qs = from_timer(qs, t, tx_reclaim_timer);
2929        struct port_info *pi = netdev_priv(qs->netdev);
2930        struct adapter *adap = pi->adapter;
2931        unsigned int tbd[SGE_TXQ_PER_SET] = {0, 0};
2932        unsigned long next_period;
2933
2934        if (__netif_tx_trylock(qs->tx_q)) {
2935                tbd[TXQ_ETH] = reclaim_completed_tx(adap, &qs->txq[TXQ_ETH],
2936                                                     TX_RECLAIM_TIMER_CHUNK);
2937                __netif_tx_unlock(qs->tx_q);
2938        }
2939
2940        if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2941                tbd[TXQ_OFLD] = reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD],
2942                                                     TX_RECLAIM_TIMER_CHUNK);
2943                spin_unlock(&qs->txq[TXQ_OFLD].lock);
2944        }
2945
2946        next_period = TX_RECLAIM_PERIOD >>
2947                      (max(tbd[TXQ_ETH], tbd[TXQ_OFLD]) /
2948                      TX_RECLAIM_TIMER_CHUNK);
2949        mod_timer(&qs->tx_reclaim_timer, jiffies + next_period);
2950}
2951
2952/**
2953 *      sge_timer_rx - perform periodic maintenance of an SGE qset
2954 *      @t: the timer list containing the SGE queue set to maintain
2955 *
2956 *      a) Replenishes Rx queues that have run out due to memory shortage.
2957 *      Normally new Rx buffers are added when existing ones are consumed but
2958 *      when out of memory a queue can become empty.  We try to add only a few
2959 *      buffers here, the queue will be replenished fully as these new buffers
2960 *      are used up if memory shortage has subsided.
2961 *
2962 *      b) Return coalesced response queue credits in case a response queue is
2963 *      starved.
2964 *
2965 */
2966static void sge_timer_rx(struct timer_list *t)
2967{
2968        spinlock_t *lock;
2969        struct sge_qset *qs = from_timer(qs, t, rx_reclaim_timer);
2970        struct port_info *pi = netdev_priv(qs->netdev);
2971        struct adapter *adap = pi->adapter;
2972        u32 status;
2973
2974        lock = adap->params.rev > 0 ?
2975               &qs->rspq.lock : &adap->sge.qs[0].rspq.lock;
2976
2977        if (!spin_trylock_irq(lock))
2978                goto out;
2979
2980        if (napi_is_scheduled(&qs->napi))
2981                goto unlock;
2982
2983        if (adap->params.rev < 4) {
2984                status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2985
2986                if (status & (1 << qs->rspq.cntxt_id)) {
2987                        qs->rspq.starved++;
2988                        if (qs->rspq.credits) {
2989                                qs->rspq.credits--;
2990                                refill_rspq(adap, &qs->rspq, 1);
2991                                qs->rspq.restarted++;
2992                                t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
2993                                             1 << qs->rspq.cntxt_id);
2994                        }
2995                }
2996        }
2997
2998        if (qs->fl[0].credits < qs->fl[0].size)
2999                __refill_fl(adap, &qs->fl[0]);
3000        if (qs->fl[1].credits < qs->fl[1].size)
3001                __refill_fl(adap, &qs->fl[1]);
3002
3003unlock:
3004        spin_unlock_irq(lock);
3005out:
3006        mod_timer(&qs->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD);
3007}
3008
3009/**
3010 *      t3_update_qset_coalesce - update coalescing settings for a queue set
3011 *      @qs: the SGE queue set
3012 *      @p: new queue set parameters
3013 *
3014 *      Update the coalescing settings for an SGE queue set.  Nothing is done
3015 *      if the queue set is not initialized yet.
3016 */
3017void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
3018{
3019        qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
3020        qs->rspq.polling = p->polling;
3021        qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
3022}
3023
3024/**
3025 *      t3_sge_alloc_qset - initialize an SGE queue set
3026 *      @adapter: the adapter
3027 *      @id: the queue set id
3028 *      @nports: how many Ethernet ports will be using this queue set
3029 *      @irq_vec_idx: the IRQ vector index for response queue interrupts
3030 *      @p: configuration parameters for this queue set
3031 *      @ntxq: number of Tx queues for the queue set
3032 *      @dev: net device associated with this queue set
3033 *      @netdevq: net device TX queue associated with this queue set
3034 *
3035 *      Allocate resources and initialize an SGE queue set.  A queue set
3036 *      comprises a response queue, two Rx free-buffer queues, and up to 3
3037 *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
3038 *      queue, offload queue, and control queue.
3039 */
3040int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
3041                      int irq_vec_idx, const struct qset_params *p,
3042                      int ntxq, struct net_device *dev,
3043                      struct netdev_queue *netdevq)
3044{
3045        int i, avail, ret = -ENOMEM;
3046        struct sge_qset *q = &adapter->sge.qs[id];
3047
3048        init_qset_cntxt(q, id);
3049        timer_setup(&q->tx_reclaim_timer, sge_timer_tx, 0);
3050        timer_setup(&q->rx_reclaim_timer, sge_timer_rx, 0);
3051
3052        q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
3053                                   sizeof(struct rx_desc),
3054                                   sizeof(struct rx_sw_desc),
3055                                   &q->fl[0].phys_addr, &q->fl[0].sdesc);
3056        if (!q->fl[0].desc)
3057                goto err;
3058
3059        q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
3060                                   sizeof(struct rx_desc),
3061                                   sizeof(struct rx_sw_desc),
3062                                   &q->fl[1].phys_addr, &q->fl[1].sdesc);
3063        if (!q->fl[1].desc)
3064                goto err;
3065
3066        q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
3067                                  sizeof(struct rsp_desc), 0,
3068                                  &q->rspq.phys_addr, NULL);
3069        if (!q->rspq.desc)
3070                goto err;
3071
3072        for (i = 0; i < ntxq; ++i) {
3073                /*
3074                 * The control queue always uses immediate data so does not
3075                 * need to keep track of any sk_buffs.
3076                 */
3077                size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
3078
3079                q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
3080                                            sizeof(struct tx_desc), sz,
3081                                            &q->txq[i].phys_addr,
3082                                            &q->txq[i].sdesc);
3083                if (!q->txq[i].desc)
3084                        goto err;
3085
3086                q->txq[i].gen = 1;
3087                q->txq[i].size = p->txq_size[i];
3088                spin_lock_init(&q->txq[i].lock);
3089                skb_queue_head_init(&q->txq[i].sendq);
3090        }
3091
3092        INIT_WORK(&q->txq[TXQ_OFLD].qresume_task, restart_offloadq);
3093        INIT_WORK(&q->txq[TXQ_CTRL].qresume_task, restart_ctrlq);
3094
3095        q->fl[0].gen = q->fl[1].gen = 1;
3096        q->fl[0].size = p->fl_size;
3097        q->fl[1].size = p->jumbo_size;
3098
3099        q->rspq.gen = 1;
3100        q->rspq.size = p->rspq_size;
3101        spin_lock_init(&q->rspq.lock);
3102        skb_queue_head_init(&q->rspq.rx_queue);
3103
3104        q->txq[TXQ_ETH].stop_thres = nports *
3105            flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
3106
3107#if FL0_PG_CHUNK_SIZE > 0
3108        q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
3109#else
3110        q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
3111#endif
3112#if FL1_PG_CHUNK_SIZE > 0
3113        q->fl[1].buf_size = FL1_PG_CHUNK_SIZE;
3114#else
3115        q->fl[1].buf_size = is_offload(adapter) ?
3116                (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
3117                MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
3118#endif
3119
3120        q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
3121        q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
3122        q->fl[0].order = FL0_PG_ORDER;
3123        q->fl[1].order = FL1_PG_ORDER;
3124        q->fl[0].alloc_size = FL0_PG_ALLOC_SIZE;
3125        q->fl[1].alloc_size = FL1_PG_ALLOC_SIZE;
3126
3127        spin_lock_irq(&adapter->sge.reg_lock);
3128
3129        /* FL threshold comparison uses < */
3130        ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
3131                                   q->rspq.phys_addr, q->rspq.size,
3132                                   q->fl[0].buf_size - SGE_PG_RSVD, 1, 0);
3133        if (ret)
3134                goto err_unlock;
3135
3136        for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
3137                ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
3138                                          q->fl[i].phys_addr, q->fl[i].size,
3139                                          q->fl[i].buf_size - SGE_PG_RSVD,
3140                                          p->cong_thres, 1, 0);
3141                if (ret)
3142                        goto err_unlock;
3143        }
3144
3145        ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
3146                                 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
3147                                 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
3148                                 1, 0);
3149        if (ret)
3150                goto err_unlock;
3151
3152        if (ntxq > 1) {
3153                ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
3154                                         USE_GTS, SGE_CNTXT_OFLD, id,
3155                                         q->txq[TXQ_OFLD].phys_addr,
3156                                         q->txq[TXQ_OFLD].size, 0, 1, 0);
3157                if (ret)
3158                        goto err_unlock;
3159        }
3160
3161        if (ntxq > 2) {
3162                ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
3163                                         SGE_CNTXT_CTRL, id,
3164                                         q->txq[TXQ_CTRL].phys_addr,
3165                                         q->txq[TXQ_CTRL].size,
3166                                         q->txq[TXQ_CTRL].token, 1, 0);
3167                if (ret)
3168                        goto err_unlock;
3169        }
3170
3171        spin_unlock_irq(&adapter->sge.reg_lock);
3172
3173        q->adap = adapter;
3174        q->netdev = dev;
3175        q->tx_q = netdevq;
3176        t3_update_qset_coalesce(q, p);
3177
3178        avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,
3179                          GFP_KERNEL | __GFP_COMP);
3180        if (!avail) {
3181                CH_ALERT(adapter, "free list queue 0 initialization failed\n");
3182                ret = -ENOMEM;
3183                goto err;
3184        }
3185        if (avail < q->fl[0].size)
3186                CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
3187                        avail);
3188
3189        avail = refill_fl(adapter, &q->fl[1], q->fl[1].size,
3190                          GFP_KERNEL | __GFP_COMP);
3191        if (avail < q->fl[1].size)
3192                CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
3193                        avail);
3194        refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
3195
3196        t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
3197                     V_NEWTIMER(q->rspq.holdoff_tmr));
3198
3199        return 0;
3200
3201err_unlock:
3202        spin_unlock_irq(&adapter->sge.reg_lock);
3203err:
3204        t3_free_qset(adapter, q);
3205        return ret;
3206}
3207
3208/**
3209 *      t3_start_sge_timers - start SGE timer call backs
3210 *      @adap: the adapter
3211 *
3212 *      Starts each SGE queue set's timer call back
3213 */
3214void t3_start_sge_timers(struct adapter *adap)
3215{
3216        int i;
3217
3218        for (i = 0; i < SGE_QSETS; ++i) {
3219                struct sge_qset *q = &adap->sge.qs[i];
3220
3221                if (q->tx_reclaim_timer.function)
3222                        mod_timer(&q->tx_reclaim_timer,
3223                                  jiffies + TX_RECLAIM_PERIOD);
3224
3225                if (q->rx_reclaim_timer.function)
3226                        mod_timer(&q->rx_reclaim_timer,
3227                                  jiffies + RX_RECLAIM_PERIOD);
3228        }
3229}
3230
3231/**
3232 *      t3_stop_sge_timers - stop SGE timer call backs
3233 *      @adap: the adapter
3234 *
3235 *      Stops each SGE queue set's timer call back
3236 */
3237void t3_stop_sge_timers(struct adapter *adap)
3238{
3239        int i;
3240
3241        for (i = 0; i < SGE_QSETS; ++i) {
3242                struct sge_qset *q = &adap->sge.qs[i];
3243
3244                if (q->tx_reclaim_timer.function)
3245                        del_timer_sync(&q->tx_reclaim_timer);
3246                if (q->rx_reclaim_timer.function)
3247                        del_timer_sync(&q->rx_reclaim_timer);
3248        }
3249}
3250
3251/**
3252 *      t3_free_sge_resources - free SGE resources
3253 *      @adap: the adapter
3254 *
3255 *      Frees resources used by the SGE queue sets.
3256 */
3257void t3_free_sge_resources(struct adapter *adap)
3258{
3259        int i;
3260
3261        for (i = 0; i < SGE_QSETS; ++i)
3262                t3_free_qset(adap, &adap->sge.qs[i]);
3263}
3264
3265/**
3266 *      t3_sge_start - enable SGE
3267 *      @adap: the adapter
3268 *
3269 *      Enables the SGE for DMAs.  This is the last step in starting packet
3270 *      transfers.
3271 */
3272void t3_sge_start(struct adapter *adap)
3273{
3274        t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
3275}
3276
3277/**
3278 *      t3_sge_stop_dma - Disable SGE DMA engine operation
3279 *      @adap: the adapter
3280 *
3281 *      Can be invoked from interrupt context e.g.  error handler.
3282 *
3283 *      Note that this function cannot disable the restart of works as
3284 *      it cannot wait if called from interrupt context, however the
3285 *      works will have no effect since the doorbells are disabled. The
3286 *      driver will call tg3_sge_stop() later from process context, at
3287 *      which time the works will be stopped if they are still running.
3288 */
3289void t3_sge_stop_dma(struct adapter *adap)
3290{
3291        t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
3292}
3293
3294/**
3295 *      t3_sge_stop - disable SGE operation completly
3296 *      @adap: the adapter
3297 *
3298 *      Called from process context. Disables the DMA engine and any
3299 *      pending queue restart works.
3300 */
3301void t3_sge_stop(struct adapter *adap)
3302{
3303        int i;
3304
3305        t3_sge_stop_dma(adap);
3306
3307        /* workqueues aren't initialized otherwise */
3308        if (!(adap->flags & FULL_INIT_DONE))
3309                return;
3310        for (i = 0; i < SGE_QSETS; ++i) {
3311                struct sge_qset *qs = &adap->sge.qs[i];
3312
3313                cancel_work_sync(&qs->txq[TXQ_OFLD].qresume_task);
3314                cancel_work_sync(&qs->txq[TXQ_CTRL].qresume_task);
3315        }
3316}
3317
3318/**
3319 *      t3_sge_init - initialize SGE
3320 *      @adap: the adapter
3321 *      @p: the SGE parameters
3322 *
3323 *      Performs SGE initialization needed every time after a chip reset.
3324 *      We do not initialize any of the queue sets here, instead the driver
3325 *      top-level must request those individually.  We also do not enable DMA
3326 *      here, that should be done after the queues have been set up.
3327 */
3328void t3_sge_init(struct adapter *adap, struct sge_params *p)
3329{
3330        unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
3331
3332        ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
3333            F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
3334            V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
3335            V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
3336#if SGE_NUM_GENBITS == 1
3337        ctrl |= F_EGRGENCTRL;
3338#endif
3339        if (adap->params.rev > 0) {
3340                if (!(adap->flags & (USING_MSIX | USING_MSI)))
3341                        ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
3342        }
3343        t3_write_reg(adap, A_SG_CONTROL, ctrl);
3344        t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
3345                     V_LORCQDRBTHRSH(512));
3346        t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
3347        t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
3348                     V_TIMEOUT(200 * core_ticks_per_usec(adap)));
3349        t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
3350                     adap->params.rev < T3_REV_C ? 1000 : 500);
3351        t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
3352        t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
3353        t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
3354        t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
3355        t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
3356}
3357
3358/**
3359 *      t3_sge_prep - one-time SGE initialization
3360 *      @adap: the associated adapter
3361 *      @p: SGE parameters
3362 *
3363 *      Performs one-time initialization of SGE SW state.  Includes determining
3364 *      defaults for the assorted SGE parameters, which admins can change until
3365 *      they are used to initialize the SGE.
3366 */
3367void t3_sge_prep(struct adapter *adap, struct sge_params *p)
3368{
3369        int i;
3370
3371        p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
3372            SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
3373
3374        for (i = 0; i < SGE_QSETS; ++i) {
3375                struct qset_params *q = p->qset + i;
3376
3377                q->polling = adap->params.rev > 0;
3378                q->coalesce_usecs = 5;
3379                q->rspq_size = 1024;
3380                q->fl_size = 1024;
3381                q->jumbo_size = 512;
3382                q->txq_size[TXQ_ETH] = 1024;
3383                q->txq_size[TXQ_OFLD] = 1024;
3384                q->txq_size[TXQ_CTRL] = 256;
3385                q->cong_thres = 0;
3386        }
3387
3388        spin_lock_init(&adap->sge.reg_lock);
3389}
3390