linux/drivers/net/cxgb3/sge.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/skbuff.h>
  33#include <linux/netdevice.h>
  34#include <linux/etherdevice.h>
  35#include <linux/if_vlan.h>
  36#include <linux/ip.h>
  37#include <linux/tcp.h>
  38#include <linux/dma-mapping.h>
  39#include <net/arp.h>
  40#include "common.h"
  41#include "regs.h"
  42#include "sge_defs.h"
  43#include "t3_cpl.h"
  44#include "firmware_exports.h"
  45
  46#define USE_GTS 0
  47
  48#define SGE_RX_SM_BUF_SIZE 1536
  49
  50#define SGE_RX_COPY_THRES  256
  51#define SGE_RX_PULL_LEN    128
  52
  53#define SGE_PG_RSVD SMP_CACHE_BYTES
  54/*
  55 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
  56 * It must be a divisor of PAGE_SIZE.  If set to 0 FL0 will use sk_buffs
  57 * directly.
  58 */
  59#define FL0_PG_CHUNK_SIZE  2048
  60#define FL0_PG_ORDER 0
  61#define FL0_PG_ALLOC_SIZE (PAGE_SIZE << FL0_PG_ORDER)
  62#define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
  63#define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
  64#define FL1_PG_ALLOC_SIZE (PAGE_SIZE << FL1_PG_ORDER)
  65
  66#define SGE_RX_DROP_THRES 16
  67#define RX_RECLAIM_PERIOD (HZ/4)
  68
  69/*
  70 * Max number of Rx buffers we replenish at a time.
  71 */
  72#define MAX_RX_REFILL 16U
  73/*
  74 * Period of the Tx buffer reclaim timer.  This timer does not need to run
  75 * frequently as Tx buffers are usually reclaimed by new Tx packets.
  76 */
  77#define TX_RECLAIM_PERIOD (HZ / 4)
  78#define TX_RECLAIM_TIMER_CHUNK 64U
  79#define TX_RECLAIM_CHUNK 16U
  80
  81/* WR size in bytes */
  82#define WR_LEN (WR_FLITS * 8)
  83
  84/*
  85 * Types of Tx queues in each queue set.  Order here matters, do not change.
  86 */
  87enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
  88
  89/* Values for sge_txq.flags */
  90enum {
  91        TXQ_RUNNING = 1 << 0,   /* fetch engine is running */
  92        TXQ_LAST_PKT_DB = 1 << 1,       /* last packet rang the doorbell */
  93};
  94
  95struct tx_desc {
  96        __be64 flit[TX_DESC_FLITS];
  97};
  98
  99struct rx_desc {
 100        __be32 addr_lo;
 101        __be32 len_gen;
 102        __be32 gen2;
 103        __be32 addr_hi;
 104};
 105
 106struct tx_sw_desc {             /* SW state per Tx descriptor */
 107        struct sk_buff *skb;
 108        u8 eop;       /* set if last descriptor for packet */
 109        u8 addr_idx;  /* buffer index of first SGL entry in descriptor */
 110        u8 fragidx;   /* first page fragment associated with descriptor */
 111        s8 sflit;     /* start flit of first SGL entry in descriptor */
 112};
 113
 114struct rx_sw_desc {                /* SW state per Rx descriptor */
 115        union {
 116                struct sk_buff *skb;
 117                struct fl_pg_chunk pg_chunk;
 118        };
 119        DECLARE_PCI_UNMAP_ADDR(dma_addr);
 120};
 121
 122struct rsp_desc {               /* response queue descriptor */
 123        struct rss_header rss_hdr;
 124        __be32 flags;
 125        __be32 len_cq;
 126        u8 imm_data[47];
 127        u8 intr_gen;
 128};
 129
 130/*
 131 * Holds unmapping information for Tx packets that need deferred unmapping.
 132 * This structure lives at skb->head and must be allocated by callers.
 133 */
 134struct deferred_unmap_info {
 135        struct pci_dev *pdev;
 136        dma_addr_t addr[MAX_SKB_FRAGS + 1];
 137};
 138
 139/*
 140 * Maps a number of flits to the number of Tx descriptors that can hold them.
 141 * The formula is
 142 *
 143 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
 144 *
 145 * HW allows up to 4 descriptors to be combined into a WR.
 146 */
 147static u8 flit_desc_map[] = {
 148        0,
 149#if SGE_NUM_GENBITS == 1
 150        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 151        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 152        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 153        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
 154#elif SGE_NUM_GENBITS == 2
 155        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 156        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 157        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 158        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 159#else
 160# error "SGE_NUM_GENBITS must be 1 or 2"
 161#endif
 162};
 163
 164static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
 165{
 166        return container_of(q, struct sge_qset, fl[qidx]);
 167}
 168
 169static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
 170{
 171        return container_of(q, struct sge_qset, rspq);
 172}
 173
 174static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
 175{
 176        return container_of(q, struct sge_qset, txq[qidx]);
 177}
 178
 179/**
 180 *      refill_rspq - replenish an SGE response queue
 181 *      @adapter: the adapter
 182 *      @q: the response queue to replenish
 183 *      @credits: how many new responses to make available
 184 *
 185 *      Replenishes a response queue by making the supplied number of responses
 186 *      available to HW.
 187 */
 188static inline void refill_rspq(struct adapter *adapter,
 189                               const struct sge_rspq *q, unsigned int credits)
 190{
 191        rmb();
 192        t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
 193                     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
 194}
 195
 196/**
 197 *      need_skb_unmap - does the platform need unmapping of sk_buffs?
 198 *
 199 *      Returns true if the platfrom needs sk_buff unmapping.  The compiler
 200 *      optimizes away unecessary code if this returns true.
 201 */
 202static inline int need_skb_unmap(void)
 203{
 204        /*
 205         * This structure is used to tell if the platfrom needs buffer
 206         * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
 207         */
 208        struct dummy {
 209                DECLARE_PCI_UNMAP_ADDR(addr);
 210        };
 211
 212        return sizeof(struct dummy) != 0;
 213}
 214
 215/**
 216 *      unmap_skb - unmap a packet main body and its page fragments
 217 *      @skb: the packet
 218 *      @q: the Tx queue containing Tx descriptors for the packet
 219 *      @cidx: index of Tx descriptor
 220 *      @pdev: the PCI device
 221 *
 222 *      Unmap the main body of an sk_buff and its page fragments, if any.
 223 *      Because of the fairly complicated structure of our SGLs and the desire
 224 *      to conserve space for metadata, the information necessary to unmap an
 225 *      sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
 226 *      descriptors (the physical addresses of the various data buffers), and
 227 *      the SW descriptor state (assorted indices).  The send functions
 228 *      initialize the indices for the first packet descriptor so we can unmap
 229 *      the buffers held in the first Tx descriptor here, and we have enough
 230 *      information at this point to set the state for the next Tx descriptor.
 231 *
 232 *      Note that it is possible to clean up the first descriptor of a packet
 233 *      before the send routines have written the next descriptors, but this
 234 *      race does not cause any problem.  We just end up writing the unmapping
 235 *      info for the descriptor first.
 236 */
 237static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
 238                             unsigned int cidx, struct pci_dev *pdev)
 239{
 240        const struct sg_ent *sgp;
 241        struct tx_sw_desc *d = &q->sdesc[cidx];
 242        int nfrags, frag_idx, curflit, j = d->addr_idx;
 243
 244        sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
 245        frag_idx = d->fragidx;
 246
 247        if (frag_idx == 0 && skb_headlen(skb)) {
 248                pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
 249                                 skb_headlen(skb), PCI_DMA_TODEVICE);
 250                j = 1;
 251        }
 252
 253        curflit = d->sflit + 1 + j;
 254        nfrags = skb_shinfo(skb)->nr_frags;
 255
 256        while (frag_idx < nfrags && curflit < WR_FLITS) {
 257                pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
 258                               skb_shinfo(skb)->frags[frag_idx].size,
 259                               PCI_DMA_TODEVICE);
 260                j ^= 1;
 261                if (j == 0) {
 262                        sgp++;
 263                        curflit++;
 264                }
 265                curflit++;
 266                frag_idx++;
 267        }
 268
 269        if (frag_idx < nfrags) {   /* SGL continues into next Tx descriptor */
 270                d = cidx + 1 == q->size ? q->sdesc : d + 1;
 271                d->fragidx = frag_idx;
 272                d->addr_idx = j;
 273                d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
 274        }
 275}
 276
 277/**
 278 *      free_tx_desc - reclaims Tx descriptors and their buffers
 279 *      @adapter: the adapter
 280 *      @q: the Tx queue to reclaim descriptors from
 281 *      @n: the number of descriptors to reclaim
 282 *
 283 *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 284 *      Tx buffers.  Called with the Tx queue lock held.
 285 */
 286static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
 287                         unsigned int n)
 288{
 289        struct tx_sw_desc *d;
 290        struct pci_dev *pdev = adapter->pdev;
 291        unsigned int cidx = q->cidx;
 292
 293        const int need_unmap = need_skb_unmap() &&
 294                               q->cntxt_id >= FW_TUNNEL_SGEEC_START;
 295
 296        d = &q->sdesc[cidx];
 297        while (n--) {
 298                if (d->skb) {   /* an SGL is present */
 299                        if (need_unmap)
 300                                unmap_skb(d->skb, q, cidx, pdev);
 301                        if (d->eop)
 302                                kfree_skb(d->skb);
 303                }
 304                ++d;
 305                if (++cidx == q->size) {
 306                        cidx = 0;
 307                        d = q->sdesc;
 308                }
 309        }
 310        q->cidx = cidx;
 311}
 312
 313/**
 314 *      reclaim_completed_tx - reclaims completed Tx descriptors
 315 *      @adapter: the adapter
 316 *      @q: the Tx queue to reclaim completed descriptors from
 317 *      @chunk: maximum number of descriptors to reclaim
 318 *
 319 *      Reclaims Tx descriptors that the SGE has indicated it has processed,
 320 *      and frees the associated buffers if possible.  Called with the Tx
 321 *      queue's lock held.
 322 */
 323static inline unsigned int reclaim_completed_tx(struct adapter *adapter,
 324                                                struct sge_txq *q,
 325                                                unsigned int chunk)
 326{
 327        unsigned int reclaim = q->processed - q->cleaned;
 328
 329        reclaim = min(chunk, reclaim);
 330        if (reclaim) {
 331                free_tx_desc(adapter, q, reclaim);
 332                q->cleaned += reclaim;
 333                q->in_use -= reclaim;
 334        }
 335        return q->processed - q->cleaned;
 336}
 337
 338/**
 339 *      should_restart_tx - are there enough resources to restart a Tx queue?
 340 *      @q: the Tx queue
 341 *
 342 *      Checks if there are enough descriptors to restart a suspended Tx queue.
 343 */
 344static inline int should_restart_tx(const struct sge_txq *q)
 345{
 346        unsigned int r = q->processed - q->cleaned;
 347
 348        return q->in_use - r < (q->size >> 1);
 349}
 350
 351static void clear_rx_desc(struct pci_dev *pdev, const struct sge_fl *q,
 352                          struct rx_sw_desc *d)
 353{
 354        if (q->use_pages && d->pg_chunk.page) {
 355                (*d->pg_chunk.p_cnt)--;
 356                if (!*d->pg_chunk.p_cnt)
 357                        pci_unmap_page(pdev,
 358                                       d->pg_chunk.mapping,
 359                                       q->alloc_size, PCI_DMA_FROMDEVICE);
 360
 361                put_page(d->pg_chunk.page);
 362                d->pg_chunk.page = NULL;
 363        } else {
 364                pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
 365                                 q->buf_size, PCI_DMA_FROMDEVICE);
 366                kfree_skb(d->skb);
 367                d->skb = NULL;
 368        }
 369}
 370
 371/**
 372 *      free_rx_bufs - free the Rx buffers on an SGE free list
 373 *      @pdev: the PCI device associated with the adapter
 374 *      @rxq: the SGE free list to clean up
 375 *
 376 *      Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
 377 *      this queue should be stopped before calling this function.
 378 */
 379static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
 380{
 381        unsigned int cidx = q->cidx;
 382
 383        while (q->credits--) {
 384                struct rx_sw_desc *d = &q->sdesc[cidx];
 385
 386
 387                clear_rx_desc(pdev, q, d);
 388                if (++cidx == q->size)
 389                        cidx = 0;
 390        }
 391
 392        if (q->pg_chunk.page) {
 393                __free_pages(q->pg_chunk.page, q->order);
 394                q->pg_chunk.page = NULL;
 395        }
 396}
 397
 398/**
 399 *      add_one_rx_buf - add a packet buffer to a free-buffer list
 400 *      @va:  buffer start VA
 401 *      @len: the buffer length
 402 *      @d: the HW Rx descriptor to write
 403 *      @sd: the SW Rx descriptor to write
 404 *      @gen: the generation bit value
 405 *      @pdev: the PCI device associated with the adapter
 406 *
 407 *      Add a buffer of the given length to the supplied HW and SW Rx
 408 *      descriptors.
 409 */
 410static inline int add_one_rx_buf(void *va, unsigned int len,
 411                                 struct rx_desc *d, struct rx_sw_desc *sd,
 412                                 unsigned int gen, struct pci_dev *pdev)
 413{
 414        dma_addr_t mapping;
 415
 416        mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
 417        if (unlikely(pci_dma_mapping_error(pdev, mapping)))
 418                return -ENOMEM;
 419
 420        pci_unmap_addr_set(sd, dma_addr, mapping);
 421
 422        d->addr_lo = cpu_to_be32(mapping);
 423        d->addr_hi = cpu_to_be32((u64) mapping >> 32);
 424        wmb();
 425        d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
 426        d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
 427        return 0;
 428}
 429
 430static inline int add_one_rx_chunk(dma_addr_t mapping, struct rx_desc *d,
 431                                   unsigned int gen)
 432{
 433        d->addr_lo = cpu_to_be32(mapping);
 434        d->addr_hi = cpu_to_be32((u64) mapping >> 32);
 435        wmb();
 436        d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
 437        d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
 438        return 0;
 439}
 440
 441static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q,
 442                          struct rx_sw_desc *sd, gfp_t gfp,
 443                          unsigned int order)
 444{
 445        if (!q->pg_chunk.page) {
 446                dma_addr_t mapping;
 447
 448                q->pg_chunk.page = alloc_pages(gfp, order);
 449                if (unlikely(!q->pg_chunk.page))
 450                        return -ENOMEM;
 451                q->pg_chunk.va = page_address(q->pg_chunk.page);
 452                q->pg_chunk.p_cnt = q->pg_chunk.va + (PAGE_SIZE << order) -
 453                                    SGE_PG_RSVD;
 454                q->pg_chunk.offset = 0;
 455                mapping = pci_map_page(adapter->pdev, q->pg_chunk.page,
 456                                       0, q->alloc_size, PCI_DMA_FROMDEVICE);
 457                q->pg_chunk.mapping = mapping;
 458        }
 459        sd->pg_chunk = q->pg_chunk;
 460
 461        prefetch(sd->pg_chunk.p_cnt);
 462
 463        q->pg_chunk.offset += q->buf_size;
 464        if (q->pg_chunk.offset == (PAGE_SIZE << order))
 465                q->pg_chunk.page = NULL;
 466        else {
 467                q->pg_chunk.va += q->buf_size;
 468                get_page(q->pg_chunk.page);
 469        }
 470
 471        if (sd->pg_chunk.offset == 0)
 472                *sd->pg_chunk.p_cnt = 1;
 473        else
 474                *sd->pg_chunk.p_cnt += 1;
 475
 476        return 0;
 477}
 478
 479static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
 480{
 481        if (q->pend_cred >= q->credits / 4) {
 482                q->pend_cred = 0;
 483                t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
 484        }
 485}
 486
 487/**
 488 *      refill_fl - refill an SGE free-buffer list
 489 *      @adapter: the adapter
 490 *      @q: the free-list to refill
 491 *      @n: the number of new buffers to allocate
 492 *      @gfp: the gfp flags for allocating new buffers
 493 *
 494 *      (Re)populate an SGE free-buffer list with up to @n new packet buffers,
 495 *      allocated with the supplied gfp flags.  The caller must assure that
 496 *      @n does not exceed the queue's capacity.
 497 */
 498static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
 499{
 500        struct rx_sw_desc *sd = &q->sdesc[q->pidx];
 501        struct rx_desc *d = &q->desc[q->pidx];
 502        unsigned int count = 0;
 503
 504        while (n--) {
 505                dma_addr_t mapping;
 506                int err;
 507
 508                if (q->use_pages) {
 509                        if (unlikely(alloc_pg_chunk(adap, q, sd, gfp,
 510                                                    q->order))) {
 511nomem:                          q->alloc_failed++;
 512                                break;
 513                        }
 514                        mapping = sd->pg_chunk.mapping + sd->pg_chunk.offset;
 515                        pci_unmap_addr_set(sd, dma_addr, mapping);
 516
 517                        add_one_rx_chunk(mapping, d, q->gen);
 518                        pci_dma_sync_single_for_device(adap->pdev, mapping,
 519                                                q->buf_size - SGE_PG_RSVD,
 520                                                PCI_DMA_FROMDEVICE);
 521                } else {
 522                        void *buf_start;
 523
 524                        struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
 525                        if (!skb)
 526                                goto nomem;
 527
 528                        sd->skb = skb;
 529                        buf_start = skb->data;
 530                        err = add_one_rx_buf(buf_start, q->buf_size, d, sd,
 531                                             q->gen, adap->pdev);
 532                        if (unlikely(err)) {
 533                                clear_rx_desc(adap->pdev, q, sd);
 534                                break;
 535                        }
 536                }
 537
 538                d++;
 539                sd++;
 540                if (++q->pidx == q->size) {
 541                        q->pidx = 0;
 542                        q->gen ^= 1;
 543                        sd = q->sdesc;
 544                        d = q->desc;
 545                }
 546                count++;
 547        }
 548
 549        q->credits += count;
 550        q->pend_cred += count;
 551        ring_fl_db(adap, q);
 552
 553        return count;
 554}
 555
 556static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
 557{
 558        refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits),
 559                  GFP_ATOMIC | __GFP_COMP);
 560}
 561
 562/**
 563 *      recycle_rx_buf - recycle a receive buffer
 564 *      @adapter: the adapter
 565 *      @q: the SGE free list
 566 *      @idx: index of buffer to recycle
 567 *
 568 *      Recycles the specified buffer on the given free list by adding it at
 569 *      the next available slot on the list.
 570 */
 571static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
 572                           unsigned int idx)
 573{
 574        struct rx_desc *from = &q->desc[idx];
 575        struct rx_desc *to = &q->desc[q->pidx];
 576
 577        q->sdesc[q->pidx] = q->sdesc[idx];
 578        to->addr_lo = from->addr_lo;    /* already big endian */
 579        to->addr_hi = from->addr_hi;    /* likewise */
 580        wmb();
 581        to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
 582        to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
 583
 584        if (++q->pidx == q->size) {
 585                q->pidx = 0;
 586                q->gen ^= 1;
 587        }
 588
 589        q->credits++;
 590        q->pend_cred++;
 591        ring_fl_db(adap, q);
 592}
 593
 594/**
 595 *      alloc_ring - allocate resources for an SGE descriptor ring
 596 *      @pdev: the PCI device
 597 *      @nelem: the number of descriptors
 598 *      @elem_size: the size of each descriptor
 599 *      @sw_size: the size of the SW state associated with each ring element
 600 *      @phys: the physical address of the allocated ring
 601 *      @metadata: address of the array holding the SW state for the ring
 602 *
 603 *      Allocates resources for an SGE descriptor ring, such as Tx queues,
 604 *      free buffer lists, or response queues.  Each SGE ring requires
 605 *      space for its HW descriptors plus, optionally, space for the SW state
 606 *      associated with each HW entry (the metadata).  The function returns
 607 *      three values: the virtual address for the HW ring (the return value
 608 *      of the function), the physical address of the HW ring, and the address
 609 *      of the SW ring.
 610 */
 611static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
 612                        size_t sw_size, dma_addr_t * phys, void *metadata)
 613{
 614        size_t len = nelem * elem_size;
 615        void *s = NULL;
 616        void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
 617
 618        if (!p)
 619                return NULL;
 620        if (sw_size && metadata) {
 621                s = kcalloc(nelem, sw_size, GFP_KERNEL);
 622
 623                if (!s) {
 624                        dma_free_coherent(&pdev->dev, len, p, *phys);
 625                        return NULL;
 626                }
 627                *(void **)metadata = s;
 628        }
 629        memset(p, 0, len);
 630        return p;
 631}
 632
 633/**
 634 *      t3_reset_qset - reset a sge qset
 635 *      @q: the queue set
 636 *
 637 *      Reset the qset structure.
 638 *      the NAPI structure is preserved in the event of
 639 *      the qset's reincarnation, for example during EEH recovery.
 640 */
 641static void t3_reset_qset(struct sge_qset *q)
 642{
 643        if (q->adap &&
 644            !(q->adap->flags & NAPI_INIT)) {
 645                memset(q, 0, sizeof(*q));
 646                return;
 647        }
 648
 649        q->adap = NULL;
 650        memset(&q->rspq, 0, sizeof(q->rspq));
 651        memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
 652        memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
 653        q->txq_stopped = 0;
 654        q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
 655        q->rx_reclaim_timer.function = NULL;
 656        q->nomem = 0;
 657        napi_free_frags(&q->napi);
 658}
 659
 660
 661/**
 662 *      free_qset - free the resources of an SGE queue set
 663 *      @adapter: the adapter owning the queue set
 664 *      @q: the queue set
 665 *
 666 *      Release the HW and SW resources associated with an SGE queue set, such
 667 *      as HW contexts, packet buffers, and descriptor rings.  Traffic to the
 668 *      queue set must be quiesced prior to calling this.
 669 */
 670static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
 671{
 672        int i;
 673        struct pci_dev *pdev = adapter->pdev;
 674
 675        for (i = 0; i < SGE_RXQ_PER_SET; ++i)
 676                if (q->fl[i].desc) {
 677                        spin_lock_irq(&adapter->sge.reg_lock);
 678                        t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
 679                        spin_unlock_irq(&adapter->sge.reg_lock);
 680                        free_rx_bufs(pdev, &q->fl[i]);
 681                        kfree(q->fl[i].sdesc);
 682                        dma_free_coherent(&pdev->dev,
 683                                          q->fl[i].size *
 684                                          sizeof(struct rx_desc), q->fl[i].desc,
 685                                          q->fl[i].phys_addr);
 686                }
 687
 688        for (i = 0; i < SGE_TXQ_PER_SET; ++i)
 689                if (q->txq[i].desc) {
 690                        spin_lock_irq(&adapter->sge.reg_lock);
 691                        t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
 692                        spin_unlock_irq(&adapter->sge.reg_lock);
 693                        if (q->txq[i].sdesc) {
 694                                free_tx_desc(adapter, &q->txq[i],
 695                                             q->txq[i].in_use);
 696                                kfree(q->txq[i].sdesc);
 697                        }
 698                        dma_free_coherent(&pdev->dev,
 699                                          q->txq[i].size *
 700                                          sizeof(struct tx_desc),
 701                                          q->txq[i].desc, q->txq[i].phys_addr);
 702                        __skb_queue_purge(&q->txq[i].sendq);
 703                }
 704
 705        if (q->rspq.desc) {
 706                spin_lock_irq(&adapter->sge.reg_lock);
 707                t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
 708                spin_unlock_irq(&adapter->sge.reg_lock);
 709                dma_free_coherent(&pdev->dev,
 710                                  q->rspq.size * sizeof(struct rsp_desc),
 711                                  q->rspq.desc, q->rspq.phys_addr);
 712        }
 713
 714        t3_reset_qset(q);
 715}
 716
 717/**
 718 *      init_qset_cntxt - initialize an SGE queue set context info
 719 *      @qs: the queue set
 720 *      @id: the queue set id
 721 *
 722 *      Initializes the TIDs and context ids for the queues of a queue set.
 723 */
 724static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
 725{
 726        qs->rspq.cntxt_id = id;
 727        qs->fl[0].cntxt_id = 2 * id;
 728        qs->fl[1].cntxt_id = 2 * id + 1;
 729        qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
 730        qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
 731        qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
 732        qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
 733        qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
 734}
 735
 736/**
 737 *      sgl_len - calculates the size of an SGL of the given capacity
 738 *      @n: the number of SGL entries
 739 *
 740 *      Calculates the number of flits needed for a scatter/gather list that
 741 *      can hold the given number of entries.
 742 */
 743static inline unsigned int sgl_len(unsigned int n)
 744{
 745        /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
 746        return (3 * n) / 2 + (n & 1);
 747}
 748
 749/**
 750 *      flits_to_desc - returns the num of Tx descriptors for the given flits
 751 *      @n: the number of flits
 752 *
 753 *      Calculates the number of Tx descriptors needed for the supplied number
 754 *      of flits.
 755 */
 756static inline unsigned int flits_to_desc(unsigned int n)
 757{
 758        BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
 759        return flit_desc_map[n];
 760}
 761
 762/**
 763 *      get_packet - return the next ingress packet buffer from a free list
 764 *      @adap: the adapter that received the packet
 765 *      @fl: the SGE free list holding the packet
 766 *      @len: the packet length including any SGE padding
 767 *      @drop_thres: # of remaining buffers before we start dropping packets
 768 *
 769 *      Get the next packet from a free list and complete setup of the
 770 *      sk_buff.  If the packet is small we make a copy and recycle the
 771 *      original buffer, otherwise we use the original buffer itself.  If a
 772 *      positive drop threshold is supplied packets are dropped and their
 773 *      buffers recycled if (a) the number of remaining buffers is under the
 774 *      threshold and the packet is too big to copy, or (b) the packet should
 775 *      be copied but there is no memory for the copy.
 776 */
 777static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
 778                                  unsigned int len, unsigned int drop_thres)
 779{
 780        struct sk_buff *skb = NULL;
 781        struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
 782
 783        prefetch(sd->skb->data);
 784        fl->credits--;
 785
 786        if (len <= SGE_RX_COPY_THRES) {
 787                skb = alloc_skb(len, GFP_ATOMIC);
 788                if (likely(skb != NULL)) {
 789                        __skb_put(skb, len);
 790                        pci_dma_sync_single_for_cpu(adap->pdev,
 791                                            pci_unmap_addr(sd, dma_addr), len,
 792                                            PCI_DMA_FROMDEVICE);
 793                        memcpy(skb->data, sd->skb->data, len);
 794                        pci_dma_sync_single_for_device(adap->pdev,
 795                                            pci_unmap_addr(sd, dma_addr), len,
 796                                            PCI_DMA_FROMDEVICE);
 797                } else if (!drop_thres)
 798                        goto use_orig_buf;
 799recycle:
 800                recycle_rx_buf(adap, fl, fl->cidx);
 801                return skb;
 802        }
 803
 804        if (unlikely(fl->credits < drop_thres) &&
 805            refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits - 1),
 806                      GFP_ATOMIC | __GFP_COMP) == 0)
 807                goto recycle;
 808
 809use_orig_buf:
 810        pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
 811                         fl->buf_size, PCI_DMA_FROMDEVICE);
 812        skb = sd->skb;
 813        skb_put(skb, len);
 814        __refill_fl(adap, fl);
 815        return skb;
 816}
 817
 818/**
 819 *      get_packet_pg - return the next ingress packet buffer from a free list
 820 *      @adap: the adapter that received the packet
 821 *      @fl: the SGE free list holding the packet
 822 *      @len: the packet length including any SGE padding
 823 *      @drop_thres: # of remaining buffers before we start dropping packets
 824 *
 825 *      Get the next packet from a free list populated with page chunks.
 826 *      If the packet is small we make a copy and recycle the original buffer,
 827 *      otherwise we attach the original buffer as a page fragment to a fresh
 828 *      sk_buff.  If a positive drop threshold is supplied packets are dropped
 829 *      and their buffers recycled if (a) the number of remaining buffers is
 830 *      under the threshold and the packet is too big to copy, or (b) there's
 831 *      no system memory.
 832 *
 833 *      Note: this function is similar to @get_packet but deals with Rx buffers
 834 *      that are page chunks rather than sk_buffs.
 835 */
 836static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
 837                                     struct sge_rspq *q, unsigned int len,
 838                                     unsigned int drop_thres)
 839{
 840        struct sk_buff *newskb, *skb;
 841        struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
 842
 843        dma_addr_t dma_addr = pci_unmap_addr(sd, dma_addr);
 844
 845        newskb = skb = q->pg_skb;
 846        if (!skb && (len <= SGE_RX_COPY_THRES)) {
 847                newskb = alloc_skb(len, GFP_ATOMIC);
 848                if (likely(newskb != NULL)) {
 849                        __skb_put(newskb, len);
 850                        pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
 851                                            PCI_DMA_FROMDEVICE);
 852                        memcpy(newskb->data, sd->pg_chunk.va, len);
 853                        pci_dma_sync_single_for_device(adap->pdev, dma_addr,
 854                                                       len,
 855                                                       PCI_DMA_FROMDEVICE);
 856                } else if (!drop_thres)
 857                        return NULL;
 858recycle:
 859                fl->credits--;
 860                recycle_rx_buf(adap, fl, fl->cidx);
 861                q->rx_recycle_buf++;
 862                return newskb;
 863        }
 864
 865        if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
 866                goto recycle;
 867
 868        prefetch(sd->pg_chunk.p_cnt);
 869
 870        if (!skb)
 871                newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
 872
 873        if (unlikely(!newskb)) {
 874                if (!drop_thres)
 875                        return NULL;
 876                goto recycle;
 877        }
 878
 879        pci_dma_sync_single_for_cpu(adap->pdev, dma_addr, len,
 880                                    PCI_DMA_FROMDEVICE);
 881        (*sd->pg_chunk.p_cnt)--;
 882        if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
 883                pci_unmap_page(adap->pdev,
 884                               sd->pg_chunk.mapping,
 885                               fl->alloc_size,
 886                               PCI_DMA_FROMDEVICE);
 887        if (!skb) {
 888                __skb_put(newskb, SGE_RX_PULL_LEN);
 889                memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
 890                skb_fill_page_desc(newskb, 0, sd->pg_chunk.page,
 891                                   sd->pg_chunk.offset + SGE_RX_PULL_LEN,
 892                                   len - SGE_RX_PULL_LEN);
 893                newskb->len = len;
 894                newskb->data_len = len - SGE_RX_PULL_LEN;
 895                newskb->truesize += newskb->data_len;
 896        } else {
 897                skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags,
 898                                   sd->pg_chunk.page,
 899                                   sd->pg_chunk.offset, len);
 900                newskb->len += len;
 901                newskb->data_len += len;
 902                newskb->truesize += len;
 903        }
 904
 905        fl->credits--;
 906        /*
 907         * We do not refill FLs here, we let the caller do it to overlap a
 908         * prefetch.
 909         */
 910        return newskb;
 911}
 912
 913/**
 914 *      get_imm_packet - return the next ingress packet buffer from a response
 915 *      @resp: the response descriptor containing the packet data
 916 *
 917 *      Return a packet containing the immediate data of the given response.
 918 */
 919static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
 920{
 921        struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
 922
 923        if (skb) {
 924                __skb_put(skb, IMMED_PKT_SIZE);
 925                skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
 926        }
 927        return skb;
 928}
 929
 930/**
 931 *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 932 *      @skb: the packet
 933 *
 934 *      Returns the number of Tx descriptors needed for the given Ethernet
 935 *      packet.  Ethernet packets require addition of WR and CPL headers.
 936 */
 937static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
 938{
 939        unsigned int flits;
 940
 941        if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
 942                return 1;
 943
 944        flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
 945        if (skb_shinfo(skb)->gso_size)
 946                flits++;
 947        return flits_to_desc(flits);
 948}
 949
 950/**
 951 *      make_sgl - populate a scatter/gather list for a packet
 952 *      @skb: the packet
 953 *      @sgp: the SGL to populate
 954 *      @start: start address of skb main body data to include in the SGL
 955 *      @len: length of skb main body data to include in the SGL
 956 *      @pdev: the PCI device
 957 *
 958 *      Generates a scatter/gather list for the buffers that make up a packet
 959 *      and returns the SGL size in 8-byte words.  The caller must size the SGL
 960 *      appropriately.
 961 */
 962static inline unsigned int make_sgl(const struct sk_buff *skb,
 963                                    struct sg_ent *sgp, unsigned char *start,
 964                                    unsigned int len, struct pci_dev *pdev)
 965{
 966        dma_addr_t mapping;
 967        unsigned int i, j = 0, nfrags;
 968
 969        if (len) {
 970                mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
 971                sgp->len[0] = cpu_to_be32(len);
 972                sgp->addr[0] = cpu_to_be64(mapping);
 973                j = 1;
 974        }
 975
 976        nfrags = skb_shinfo(skb)->nr_frags;
 977        for (i = 0; i < nfrags; i++) {
 978                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 979
 980                mapping = pci_map_page(pdev, frag->page, frag->page_offset,
 981                                       frag->size, PCI_DMA_TODEVICE);
 982                sgp->len[j] = cpu_to_be32(frag->size);
 983                sgp->addr[j] = cpu_to_be64(mapping);
 984                j ^= 1;
 985                if (j == 0)
 986                        ++sgp;
 987        }
 988        if (j)
 989                sgp->len[j] = 0;
 990        return ((nfrags + (len != 0)) * 3) / 2 + j;
 991}
 992
 993/**
 994 *      check_ring_tx_db - check and potentially ring a Tx queue's doorbell
 995 *      @adap: the adapter
 996 *      @q: the Tx queue
 997 *
 998 *      Ring the doorbel if a Tx queue is asleep.  There is a natural race,
 999 *      where the HW is going to sleep just after we checked, however,
1000 *      then the interrupt handler will detect the outstanding TX packet
1001 *      and ring the doorbell for us.
1002 *
1003 *      When GTS is disabled we unconditionally ring the doorbell.
1004 */
1005static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
1006{
1007#if USE_GTS
1008        clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1009        if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1010                set_bit(TXQ_LAST_PKT_DB, &q->flags);
1011                t3_write_reg(adap, A_SG_KDOORBELL,
1012                             F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1013        }
1014#else
1015        wmb();                  /* write descriptors before telling HW */
1016        t3_write_reg(adap, A_SG_KDOORBELL,
1017                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1018#endif
1019}
1020
1021static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
1022{
1023#if SGE_NUM_GENBITS == 2
1024        d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
1025#endif
1026}
1027
1028/**
1029 *      write_wr_hdr_sgl - write a WR header and, optionally, SGL
1030 *      @ndesc: number of Tx descriptors spanned by the SGL
1031 *      @skb: the packet corresponding to the WR
1032 *      @d: first Tx descriptor to be written
1033 *      @pidx: index of above descriptors
1034 *      @q: the SGE Tx queue
1035 *      @sgl: the SGL
1036 *      @flits: number of flits to the start of the SGL in the first descriptor
1037 *      @sgl_flits: the SGL size in flits
1038 *      @gen: the Tx descriptor generation
1039 *      @wr_hi: top 32 bits of WR header based on WR type (big endian)
1040 *      @wr_lo: low 32 bits of WR header based on WR type (big endian)
1041 *
1042 *      Write a work request header and an associated SGL.  If the SGL is
1043 *      small enough to fit into one Tx descriptor it has already been written
1044 *      and we just need to write the WR header.  Otherwise we distribute the
1045 *      SGL across the number of descriptors it spans.
1046 */
1047static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
1048                             struct tx_desc *d, unsigned int pidx,
1049                             const struct sge_txq *q,
1050                             const struct sg_ent *sgl,
1051                             unsigned int flits, unsigned int sgl_flits,
1052                             unsigned int gen, __be32 wr_hi,
1053                             __be32 wr_lo)
1054{
1055        struct work_request_hdr *wrp = (struct work_request_hdr *)d;
1056        struct tx_sw_desc *sd = &q->sdesc[pidx];
1057
1058        sd->skb = skb;
1059        if (need_skb_unmap()) {
1060                sd->fragidx = 0;
1061                sd->addr_idx = 0;
1062                sd->sflit = flits;
1063        }
1064
1065        if (likely(ndesc == 1)) {
1066                sd->eop = 1;
1067                wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1068                                   V_WR_SGLSFLT(flits)) | wr_hi;
1069                wmb();
1070                wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1071                                   V_WR_GEN(gen)) | wr_lo;
1072                wr_gen2(d, gen);
1073        } else {
1074                unsigned int ogen = gen;
1075                const u64 *fp = (const u64 *)sgl;
1076                struct work_request_hdr *wp = wrp;
1077
1078                wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1079                                   V_WR_SGLSFLT(flits)) | wr_hi;
1080
1081                while (sgl_flits) {
1082                        unsigned int avail = WR_FLITS - flits;
1083
1084                        if (avail > sgl_flits)
1085                                avail = sgl_flits;
1086                        memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
1087                        sgl_flits -= avail;
1088                        ndesc--;
1089                        if (!sgl_flits)
1090                                break;
1091
1092                        fp += avail;
1093                        d++;
1094                        sd->eop = 0;
1095                        sd++;
1096                        if (++pidx == q->size) {
1097                                pidx = 0;
1098                                gen ^= 1;
1099                                d = q->desc;
1100                                sd = q->sdesc;
1101                        }
1102
1103                        sd->skb = skb;
1104                        wrp = (struct work_request_hdr *)d;
1105                        wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1106                                           V_WR_SGLSFLT(1)) | wr_hi;
1107                        wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1108                                                        sgl_flits + 1)) |
1109                                           V_WR_GEN(gen)) | wr_lo;
1110                        wr_gen2(d, gen);
1111                        flits = 1;
1112                }
1113                sd->eop = 1;
1114                wrp->wr_hi |= htonl(F_WR_EOP);
1115                wmb();
1116                wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1117                wr_gen2((struct tx_desc *)wp, ogen);
1118                WARN_ON(ndesc != 0);
1119        }
1120}
1121
1122/**
1123 *      write_tx_pkt_wr - write a TX_PKT work request
1124 *      @adap: the adapter
1125 *      @skb: the packet to send
1126 *      @pi: the egress interface
1127 *      @pidx: index of the first Tx descriptor to write
1128 *      @gen: the generation value to use
1129 *      @q: the Tx queue
1130 *      @ndesc: number of descriptors the packet will occupy
1131 *      @compl: the value of the COMPL bit to use
1132 *
1133 *      Generate a TX_PKT work request to send the supplied packet.
1134 */
1135static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1136                            const struct port_info *pi,
1137                            unsigned int pidx, unsigned int gen,
1138                            struct sge_txq *q, unsigned int ndesc,
1139                            unsigned int compl)
1140{
1141        unsigned int flits, sgl_flits, cntrl, tso_info;
1142        struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1143        struct tx_desc *d = &q->desc[pidx];
1144        struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1145
1146        cpl->len = htonl(skb->len);
1147        cntrl = V_TXPKT_INTF(pi->port_id);
1148
1149        if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1150                cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1151
1152        tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1153        if (tso_info) {
1154                int eth_type;
1155                struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1156
1157                d->flit[2] = 0;
1158                cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1159                hdr->cntrl = htonl(cntrl);
1160                eth_type = skb_network_offset(skb) == ETH_HLEN ?
1161                    CPL_ETH_II : CPL_ETH_II_VLAN;
1162                tso_info |= V_LSO_ETH_TYPE(eth_type) |
1163                    V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
1164                    V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
1165                hdr->lso_info = htonl(tso_info);
1166                flits = 3;
1167        } else {
1168                cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1169                cntrl |= F_TXPKT_IPCSUM_DIS;    /* SW calculates IP csum */
1170                cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1171                cpl->cntrl = htonl(cntrl);
1172
1173                if (skb->len <= WR_LEN - sizeof(*cpl)) {
1174                        q->sdesc[pidx].skb = NULL;
1175                        if (!skb->data_len)
1176                                skb_copy_from_linear_data(skb, &d->flit[2],
1177                                                          skb->len);
1178                        else
1179                                skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1180
1181                        flits = (skb->len + 7) / 8 + 2;
1182                        cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1183                                              V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1184                                              | F_WR_SOP | F_WR_EOP | compl);
1185                        wmb();
1186                        cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1187                                              V_WR_TID(q->token));
1188                        wr_gen2(d, gen);
1189                        kfree_skb(skb);
1190                        return;
1191                }
1192
1193                flits = 2;
1194        }
1195
1196        sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1197        sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
1198
1199        write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1200                         htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1201                         htonl(V_WR_TID(q->token)));
1202}
1203
1204static inline void t3_stop_tx_queue(struct netdev_queue *txq,
1205                                    struct sge_qset *qs, struct sge_txq *q)
1206{
1207        netif_tx_stop_queue(txq);
1208        set_bit(TXQ_ETH, &qs->txq_stopped);
1209        q->stops++;
1210}
1211
1212/**
1213 *      eth_xmit - add a packet to the Ethernet Tx queue
1214 *      @skb: the packet
1215 *      @dev: the egress net device
1216 *
1217 *      Add a packet to an SGE Tx queue.  Runs with softirqs disabled.
1218 */
1219netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1220{
1221        int qidx;
1222        unsigned int ndesc, pidx, credits, gen, compl;
1223        const struct port_info *pi = netdev_priv(dev);
1224        struct adapter *adap = pi->adapter;
1225        struct netdev_queue *txq;
1226        struct sge_qset *qs;
1227        struct sge_txq *q;
1228
1229        /*
1230         * The chip min packet length is 9 octets but play safe and reject
1231         * anything shorter than an Ethernet header.
1232         */
1233        if (unlikely(skb->len < ETH_HLEN)) {
1234                dev_kfree_skb(skb);
1235                return NETDEV_TX_OK;
1236        }
1237
1238        qidx = skb_get_queue_mapping(skb);
1239        qs = &pi->qs[qidx];
1240        q = &qs->txq[TXQ_ETH];
1241        txq = netdev_get_tx_queue(dev, qidx);
1242
1243        reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1244
1245        credits = q->size - q->in_use;
1246        ndesc = calc_tx_descs(skb);
1247
1248        if (unlikely(credits < ndesc)) {
1249                t3_stop_tx_queue(txq, qs, q);
1250                dev_err(&adap->pdev->dev,
1251                        "%s: Tx ring %u full while queue awake!\n",
1252                        dev->name, q->cntxt_id & 7);
1253                return NETDEV_TX_BUSY;
1254        }
1255
1256        q->in_use += ndesc;
1257        if (unlikely(credits - ndesc < q->stop_thres)) {
1258                t3_stop_tx_queue(txq, qs, q);
1259
1260                if (should_restart_tx(q) &&
1261                    test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1262                        q->restarts++;
1263                        netif_tx_wake_queue(txq);
1264                }
1265        }
1266
1267        gen = q->gen;
1268        q->unacked += ndesc;
1269        compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1270        q->unacked &= 7;
1271        pidx = q->pidx;
1272        q->pidx += ndesc;
1273        if (q->pidx >= q->size) {
1274                q->pidx -= q->size;
1275                q->gen ^= 1;
1276        }
1277
1278        /* update port statistics */
1279        if (skb->ip_summed == CHECKSUM_COMPLETE)
1280                qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1281        if (skb_shinfo(skb)->gso_size)
1282                qs->port_stats[SGE_PSTAT_TSO]++;
1283        if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1284                qs->port_stats[SGE_PSTAT_VLANINS]++;
1285
1286        /*
1287         * We do not use Tx completion interrupts to free DMAd Tx packets.
1288         * This is good for performamce but means that we rely on new Tx
1289         * packets arriving to run the destructors of completed packets,
1290         * which open up space in their sockets' send queues.  Sometimes
1291         * we do not get such new packets causing Tx to stall.  A single
1292         * UDP transmitter is a good example of this situation.  We have
1293         * a clean up timer that periodically reclaims completed packets
1294         * but it doesn't run often enough (nor do we want it to) to prevent
1295         * lengthy stalls.  A solution to this problem is to run the
1296         * destructor early, after the packet is queued but before it's DMAd.
1297         * A cons is that we lie to socket memory accounting, but the amount
1298         * of extra memory is reasonable (limited by the number of Tx
1299         * descriptors), the packets do actually get freed quickly by new
1300         * packets almost always, and for protocols like TCP that wait for
1301         * acks to really free up the data the extra memory is even less.
1302         * On the positive side we run the destructors on the sending CPU
1303         * rather than on a potentially different completing CPU, usually a
1304         * good thing.  We also run them without holding our Tx queue lock,
1305         * unlike what reclaim_completed_tx() would otherwise do.
1306         *
1307         * Run the destructor before telling the DMA engine about the packet
1308         * to make sure it doesn't complete and get freed prematurely.
1309         */
1310        if (likely(!skb_shared(skb)))
1311                skb_orphan(skb);
1312
1313        write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1314        check_ring_tx_db(adap, q);
1315        return NETDEV_TX_OK;
1316}
1317
1318/**
1319 *      write_imm - write a packet into a Tx descriptor as immediate data
1320 *      @d: the Tx descriptor to write
1321 *      @skb: the packet
1322 *      @len: the length of packet data to write as immediate data
1323 *      @gen: the generation bit value to write
1324 *
1325 *      Writes a packet as immediate data into a Tx descriptor.  The packet
1326 *      contains a work request at its beginning.  We must write the packet
1327 *      carefully so the SGE doesn't read it accidentally before it's written
1328 *      in its entirety.
1329 */
1330static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1331                             unsigned int len, unsigned int gen)
1332{
1333        struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1334        struct work_request_hdr *to = (struct work_request_hdr *)d;
1335
1336        if (likely(!skb->data_len))
1337                memcpy(&to[1], &from[1], len - sizeof(*from));
1338        else
1339                skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1340
1341        to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1342                                        V_WR_BCNTLFLT(len & 7));
1343        wmb();
1344        to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1345                                        V_WR_LEN((len + 7) / 8));
1346        wr_gen2(d, gen);
1347        kfree_skb(skb);
1348}
1349
1350/**
1351 *      check_desc_avail - check descriptor availability on a send queue
1352 *      @adap: the adapter
1353 *      @q: the send queue
1354 *      @skb: the packet needing the descriptors
1355 *      @ndesc: the number of Tx descriptors needed
1356 *      @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1357 *
1358 *      Checks if the requested number of Tx descriptors is available on an
1359 *      SGE send queue.  If the queue is already suspended or not enough
1360 *      descriptors are available the packet is queued for later transmission.
1361 *      Must be called with the Tx queue locked.
1362 *
1363 *      Returns 0 if enough descriptors are available, 1 if there aren't
1364 *      enough descriptors and the packet has been queued, and 2 if the caller
1365 *      needs to retry because there weren't enough descriptors at the
1366 *      beginning of the call but some freed up in the mean time.
1367 */
1368static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1369                                   struct sk_buff *skb, unsigned int ndesc,
1370                                   unsigned int qid)
1371{
1372        if (unlikely(!skb_queue_empty(&q->sendq))) {
1373              addq_exit:__skb_queue_tail(&q->sendq, skb);
1374                return 1;
1375        }
1376        if (unlikely(q->size - q->in_use < ndesc)) {
1377                struct sge_qset *qs = txq_to_qset(q, qid);
1378
1379                set_bit(qid, &qs->txq_stopped);
1380                smp_mb__after_clear_bit();
1381
1382                if (should_restart_tx(q) &&
1383                    test_and_clear_bit(qid, &qs->txq_stopped))
1384                        return 2;
1385
1386                q->stops++;
1387                goto addq_exit;
1388        }
1389        return 0;
1390}
1391
1392/**
1393 *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1394 *      @q: the SGE control Tx queue
1395 *
1396 *      This is a variant of reclaim_completed_tx() that is used for Tx queues
1397 *      that send only immediate data (presently just the control queues) and
1398 *      thus do not have any sk_buffs to release.
1399 */
1400static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1401{
1402        unsigned int reclaim = q->processed - q->cleaned;
1403
1404        q->in_use -= reclaim;
1405        q->cleaned += reclaim;
1406}
1407
1408static inline int immediate(const struct sk_buff *skb)
1409{
1410        return skb->len <= WR_LEN;
1411}
1412
1413/**
1414 *      ctrl_xmit - send a packet through an SGE control Tx queue
1415 *      @adap: the adapter
1416 *      @q: the control queue
1417 *      @skb: the packet
1418 *
1419 *      Send a packet through an SGE control Tx queue.  Packets sent through
1420 *      a control queue must fit entirely as immediate data in a single Tx
1421 *      descriptor and have no page fragments.
1422 */
1423static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1424                     struct sk_buff *skb)
1425{
1426        int ret;
1427        struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1428
1429        if (unlikely(!immediate(skb))) {
1430                WARN_ON(1);
1431                dev_kfree_skb(skb);
1432                return NET_XMIT_SUCCESS;
1433        }
1434
1435        wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1436        wrp->wr_lo = htonl(V_WR_TID(q->token));
1437
1438        spin_lock(&q->lock);
1439      again:reclaim_completed_tx_imm(q);
1440
1441        ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1442        if (unlikely(ret)) {
1443                if (ret == 1) {
1444                        spin_unlock(&q->lock);
1445                        return NET_XMIT_CN;
1446                }
1447                goto again;
1448        }
1449
1450        write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1451
1452        q->in_use++;
1453        if (++q->pidx >= q->size) {
1454                q->pidx = 0;
1455                q->gen ^= 1;
1456        }
1457        spin_unlock(&q->lock);
1458        wmb();
1459        t3_write_reg(adap, A_SG_KDOORBELL,
1460                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1461        return NET_XMIT_SUCCESS;
1462}
1463
1464/**
1465 *      restart_ctrlq - restart a suspended control queue
1466 *      @qs: the queue set cotaining the control queue
1467 *
1468 *      Resumes transmission on a suspended Tx control queue.
1469 */
1470static void restart_ctrlq(unsigned long data)
1471{
1472        struct sk_buff *skb;
1473        struct sge_qset *qs = (struct sge_qset *)data;
1474        struct sge_txq *q = &qs->txq[TXQ_CTRL];
1475
1476        spin_lock(&q->lock);
1477      again:reclaim_completed_tx_imm(q);
1478
1479        while (q->in_use < q->size &&
1480               (skb = __skb_dequeue(&q->sendq)) != NULL) {
1481
1482                write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1483
1484                if (++q->pidx >= q->size) {
1485                        q->pidx = 0;
1486                        q->gen ^= 1;
1487                }
1488                q->in_use++;
1489        }
1490
1491        if (!skb_queue_empty(&q->sendq)) {
1492                set_bit(TXQ_CTRL, &qs->txq_stopped);
1493                smp_mb__after_clear_bit();
1494
1495                if (should_restart_tx(q) &&
1496                    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1497                        goto again;
1498                q->stops++;
1499        }
1500
1501        spin_unlock(&q->lock);
1502        wmb();
1503        t3_write_reg(qs->adap, A_SG_KDOORBELL,
1504                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1505}
1506
1507/*
1508 * Send a management message through control queue 0
1509 */
1510int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1511{
1512        int ret;
1513        local_bh_disable();
1514        ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1515        local_bh_enable();
1516
1517        return ret;
1518}
1519
1520/**
1521 *      deferred_unmap_destructor - unmap a packet when it is freed
1522 *      @skb: the packet
1523 *
1524 *      This is the packet destructor used for Tx packets that need to remain
1525 *      mapped until they are freed rather than until their Tx descriptors are
1526 *      freed.
1527 */
1528static void deferred_unmap_destructor(struct sk_buff *skb)
1529{
1530        int i;
1531        const dma_addr_t *p;
1532        const struct skb_shared_info *si;
1533        const struct deferred_unmap_info *dui;
1534
1535        dui = (struct deferred_unmap_info *)skb->head;
1536        p = dui->addr;
1537
1538        if (skb->tail - skb->transport_header)
1539                pci_unmap_single(dui->pdev, *p++,
1540                                 skb->tail - skb->transport_header,
1541                                 PCI_DMA_TODEVICE);
1542
1543        si = skb_shinfo(skb);
1544        for (i = 0; i < si->nr_frags; i++)
1545                pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1546                               PCI_DMA_TODEVICE);
1547}
1548
1549static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1550                                     const struct sg_ent *sgl, int sgl_flits)
1551{
1552        dma_addr_t *p;
1553        struct deferred_unmap_info *dui;
1554
1555        dui = (struct deferred_unmap_info *)skb->head;
1556        dui->pdev = pdev;
1557        for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1558                *p++ = be64_to_cpu(sgl->addr[0]);
1559                *p++ = be64_to_cpu(sgl->addr[1]);
1560        }
1561        if (sgl_flits)
1562                *p = be64_to_cpu(sgl->addr[0]);
1563}
1564
1565/**
1566 *      write_ofld_wr - write an offload work request
1567 *      @adap: the adapter
1568 *      @skb: the packet to send
1569 *      @q: the Tx queue
1570 *      @pidx: index of the first Tx descriptor to write
1571 *      @gen: the generation value to use
1572 *      @ndesc: number of descriptors the packet will occupy
1573 *
1574 *      Write an offload work request to send the supplied packet.  The packet
1575 *      data already carry the work request with most fields populated.
1576 */
1577static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1578                          struct sge_txq *q, unsigned int pidx,
1579                          unsigned int gen, unsigned int ndesc)
1580{
1581        unsigned int sgl_flits, flits;
1582        struct work_request_hdr *from;
1583        struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1584        struct tx_desc *d = &q->desc[pidx];
1585
1586        if (immediate(skb)) {
1587                q->sdesc[pidx].skb = NULL;
1588                write_imm(d, skb, skb->len, gen);
1589                return;
1590        }
1591
1592        /* Only TX_DATA builds SGLs */
1593
1594        from = (struct work_request_hdr *)skb->data;
1595        memcpy(&d->flit[1], &from[1],
1596               skb_transport_offset(skb) - sizeof(*from));
1597
1598        flits = skb_transport_offset(skb) / 8;
1599        sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1600        sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
1601                             skb->tail - skb->transport_header,
1602                             adap->pdev);
1603        if (need_skb_unmap()) {
1604                setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1605                skb->destructor = deferred_unmap_destructor;
1606        }
1607
1608        write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1609                         gen, from->wr_hi, from->wr_lo);
1610}
1611
1612/**
1613 *      calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1614 *      @skb: the packet
1615 *
1616 *      Returns the number of Tx descriptors needed for the given offload
1617 *      packet.  These packets are already fully constructed.
1618 */
1619static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1620{
1621        unsigned int flits, cnt;
1622
1623        if (skb->len <= WR_LEN)
1624                return 1;       /* packet fits as immediate data */
1625
1626        flits = skb_transport_offset(skb) / 8;  /* headers */
1627        cnt = skb_shinfo(skb)->nr_frags;
1628        if (skb->tail != skb->transport_header)
1629                cnt++;
1630        return flits_to_desc(flits + sgl_len(cnt));
1631}
1632
1633/**
1634 *      ofld_xmit - send a packet through an offload queue
1635 *      @adap: the adapter
1636 *      @q: the Tx offload queue
1637 *      @skb: the packet
1638 *
1639 *      Send an offload packet through an SGE offload queue.
1640 */
1641static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1642                     struct sk_buff *skb)
1643{
1644        int ret;
1645        unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1646
1647        spin_lock(&q->lock);
1648again:  reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1649
1650        ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1651        if (unlikely(ret)) {
1652                if (ret == 1) {
1653                        skb->priority = ndesc;  /* save for restart */
1654                        spin_unlock(&q->lock);
1655                        return NET_XMIT_CN;
1656                }
1657                goto again;
1658        }
1659
1660        gen = q->gen;
1661        q->in_use += ndesc;
1662        pidx = q->pidx;
1663        q->pidx += ndesc;
1664        if (q->pidx >= q->size) {
1665                q->pidx -= q->size;
1666                q->gen ^= 1;
1667        }
1668        spin_unlock(&q->lock);
1669
1670        write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1671        check_ring_tx_db(adap, q);
1672        return NET_XMIT_SUCCESS;
1673}
1674
1675/**
1676 *      restart_offloadq - restart a suspended offload queue
1677 *      @qs: the queue set cotaining the offload queue
1678 *
1679 *      Resumes transmission on a suspended Tx offload queue.
1680 */
1681static void restart_offloadq(unsigned long data)
1682{
1683        struct sk_buff *skb;
1684        struct sge_qset *qs = (struct sge_qset *)data;
1685        struct sge_txq *q = &qs->txq[TXQ_OFLD];
1686        const struct port_info *pi = netdev_priv(qs->netdev);
1687        struct adapter *adap = pi->adapter;
1688
1689        spin_lock(&q->lock);
1690again:  reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
1691
1692        while ((skb = skb_peek(&q->sendq)) != NULL) {
1693                unsigned int gen, pidx;
1694                unsigned int ndesc = skb->priority;
1695
1696                if (unlikely(q->size - q->in_use < ndesc)) {
1697                        set_bit(TXQ_OFLD, &qs->txq_stopped);
1698                        smp_mb__after_clear_bit();
1699
1700                        if (should_restart_tx(q) &&
1701                            test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1702                                goto again;
1703                        q->stops++;
1704                        break;
1705                }
1706
1707                gen = q->gen;
1708                q->in_use += ndesc;
1709                pidx = q->pidx;
1710                q->pidx += ndesc;
1711                if (q->pidx >= q->size) {
1712                        q->pidx -= q->size;
1713                        q->gen ^= 1;
1714                }
1715                __skb_unlink(skb, &q->sendq);
1716                spin_unlock(&q->lock);
1717
1718                write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1719                spin_lock(&q->lock);
1720        }
1721        spin_unlock(&q->lock);
1722
1723#if USE_GTS
1724        set_bit(TXQ_RUNNING, &q->flags);
1725        set_bit(TXQ_LAST_PKT_DB, &q->flags);
1726#endif
1727        wmb();
1728        t3_write_reg(adap, A_SG_KDOORBELL,
1729                     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1730}
1731
1732/**
1733 *      queue_set - return the queue set a packet should use
1734 *      @skb: the packet
1735 *
1736 *      Maps a packet to the SGE queue set it should use.  The desired queue
1737 *      set is carried in bits 1-3 in the packet's priority.
1738 */
1739static inline int queue_set(const struct sk_buff *skb)
1740{
1741        return skb->priority >> 1;
1742}
1743
1744/**
1745 *      is_ctrl_pkt - return whether an offload packet is a control packet
1746 *      @skb: the packet
1747 *
1748 *      Determines whether an offload packet should use an OFLD or a CTRL
1749 *      Tx queue.  This is indicated by bit 0 in the packet's priority.
1750 */
1751static inline int is_ctrl_pkt(const struct sk_buff *skb)
1752{
1753        return skb->priority & 1;
1754}
1755
1756/**
1757 *      t3_offload_tx - send an offload packet
1758 *      @tdev: the offload device to send to
1759 *      @skb: the packet
1760 *
1761 *      Sends an offload packet.  We use the packet priority to select the
1762 *      appropriate Tx queue as follows: bit 0 indicates whether the packet
1763 *      should be sent as regular or control, bits 1-3 select the queue set.
1764 */
1765int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1766{
1767        struct adapter *adap = tdev2adap(tdev);
1768        struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1769
1770        if (unlikely(is_ctrl_pkt(skb)))
1771                return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1772
1773        return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1774}
1775
1776/**
1777 *      offload_enqueue - add an offload packet to an SGE offload receive queue
1778 *      @q: the SGE response queue
1779 *      @skb: the packet
1780 *
1781 *      Add a new offload packet to an SGE response queue's offload packet
1782 *      queue.  If the packet is the first on the queue it schedules the RX
1783 *      softirq to process the queue.
1784 */
1785static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1786{
1787        int was_empty = skb_queue_empty(&q->rx_queue);
1788
1789        __skb_queue_tail(&q->rx_queue, skb);
1790
1791        if (was_empty) {
1792                struct sge_qset *qs = rspq_to_qset(q);
1793
1794                napi_schedule(&qs->napi);
1795        }
1796}
1797
1798/**
1799 *      deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1800 *      @tdev: the offload device that will be receiving the packets
1801 *      @q: the SGE response queue that assembled the bundle
1802 *      @skbs: the partial bundle
1803 *      @n: the number of packets in the bundle
1804 *
1805 *      Delivers a (partial) bundle of Rx offload packets to an offload device.
1806 */
1807static inline void deliver_partial_bundle(struct t3cdev *tdev,
1808                                          struct sge_rspq *q,
1809                                          struct sk_buff *skbs[], int n)
1810{
1811        if (n) {
1812                q->offload_bundles++;
1813                tdev->recv(tdev, skbs, n);
1814        }
1815}
1816
1817/**
1818 *      ofld_poll - NAPI handler for offload packets in interrupt mode
1819 *      @dev: the network device doing the polling
1820 *      @budget: polling budget
1821 *
1822 *      The NAPI handler for offload packets when a response queue is serviced
1823 *      by the hard interrupt handler, i.e., when it's operating in non-polling
1824 *      mode.  Creates small packet batches and sends them through the offload
1825 *      receive handler.  Batches need to be of modest size as we do prefetches
1826 *      on the packets in each.
1827 */
1828static int ofld_poll(struct napi_struct *napi, int budget)
1829{
1830        struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
1831        struct sge_rspq *q = &qs->rspq;
1832        struct adapter *adapter = qs->adap;
1833        int work_done = 0;
1834
1835        while (work_done < budget) {
1836                struct sk_buff *skb, *tmp, *skbs[RX_BUNDLE_SIZE];
1837                struct sk_buff_head queue;
1838                int ngathered;
1839
1840                spin_lock_irq(&q->lock);
1841                __skb_queue_head_init(&queue);
1842                skb_queue_splice_init(&q->rx_queue, &queue);
1843                if (skb_queue_empty(&queue)) {
1844                        napi_complete(napi);
1845                        spin_unlock_irq(&q->lock);
1846                        return work_done;
1847                }
1848                spin_unlock_irq(&q->lock);
1849
1850                ngathered = 0;
1851                skb_queue_walk_safe(&queue, skb, tmp) {
1852                        if (work_done >= budget)
1853                                break;
1854                        work_done++;
1855
1856                        __skb_unlink(skb, &queue);
1857                        prefetch(skb->data);
1858                        skbs[ngathered] = skb;
1859                        if (++ngathered == RX_BUNDLE_SIZE) {
1860                                q->offload_bundles++;
1861                                adapter->tdev.recv(&adapter->tdev, skbs,
1862                                                   ngathered);
1863                                ngathered = 0;
1864                        }
1865                }
1866                if (!skb_queue_empty(&queue)) {
1867                        /* splice remaining packets back onto Rx queue */
1868                        spin_lock_irq(&q->lock);
1869                        skb_queue_splice(&queue, &q->rx_queue);
1870                        spin_unlock_irq(&q->lock);
1871                }
1872                deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1873        }
1874
1875        return work_done;
1876}
1877
1878/**
1879 *      rx_offload - process a received offload packet
1880 *      @tdev: the offload device receiving the packet
1881 *      @rq: the response queue that received the packet
1882 *      @skb: the packet
1883 *      @rx_gather: a gather list of packets if we are building a bundle
1884 *      @gather_idx: index of the next available slot in the bundle
1885 *
1886 *      Process an ingress offload pakcet and add it to the offload ingress
1887 *      queue.  Returns the index of the next available slot in the bundle.
1888 */
1889static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1890                             struct sk_buff *skb, struct sk_buff *rx_gather[],
1891                             unsigned int gather_idx)
1892{
1893        skb_reset_mac_header(skb);
1894        skb_reset_network_header(skb);
1895        skb_reset_transport_header(skb);
1896
1897        if (rq->polling) {
1898                rx_gather[gather_idx++] = skb;
1899                if (gather_idx == RX_BUNDLE_SIZE) {
1900                        tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1901                        gather_idx = 0;
1902                        rq->offload_bundles++;
1903                }
1904        } else
1905                offload_enqueue(rq, skb);
1906
1907        return gather_idx;
1908}
1909
1910/**
1911 *      restart_tx - check whether to restart suspended Tx queues
1912 *      @qs: the queue set to resume
1913 *
1914 *      Restarts suspended Tx queues of an SGE queue set if they have enough
1915 *      free resources to resume operation.
1916 */
1917static void restart_tx(struct sge_qset *qs)
1918{
1919        if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1920            should_restart_tx(&qs->txq[TXQ_ETH]) &&
1921            test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1922                qs->txq[TXQ_ETH].restarts++;
1923                if (netif_running(qs->netdev))
1924                        netif_tx_wake_queue(qs->tx_q);
1925        }
1926
1927        if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1928            should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1929            test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1930                qs->txq[TXQ_OFLD].restarts++;
1931                tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1932        }
1933        if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1934            should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1935            test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1936                qs->txq[TXQ_CTRL].restarts++;
1937                tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1938        }
1939}
1940
1941/**
1942 *      cxgb3_arp_process - process an ARP request probing a private IP address
1943 *      @adapter: the adapter
1944 *      @skb: the skbuff containing the ARP request
1945 *
1946 *      Check if the ARP request is probing the private IP address
1947 *      dedicated to iSCSI, generate an ARP reply if so.
1948 */
1949static void cxgb3_arp_process(struct adapter *adapter, struct sk_buff *skb)
1950{
1951        struct net_device *dev = skb->dev;
1952        struct port_info *pi;
1953        struct arphdr *arp;
1954        unsigned char *arp_ptr;
1955        unsigned char *sha;
1956        __be32 sip, tip;
1957
1958        if (!dev)
1959                return;
1960
1961        skb_reset_network_header(skb);
1962        arp = arp_hdr(skb);
1963
1964        if (arp->ar_op != htons(ARPOP_REQUEST))
1965                return;
1966
1967        arp_ptr = (unsigned char *)(arp + 1);
1968        sha = arp_ptr;
1969        arp_ptr += dev->addr_len;
1970        memcpy(&sip, arp_ptr, sizeof(sip));
1971        arp_ptr += sizeof(sip);
1972        arp_ptr += dev->addr_len;
1973        memcpy(&tip, arp_ptr, sizeof(tip));
1974
1975        pi = netdev_priv(dev);
1976        if (tip != pi->iscsi_ipv4addr)
1977                return;
1978
1979        arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
1980                 dev->dev_addr, sha);
1981
1982}
1983
1984static inline int is_arp(struct sk_buff *skb)
1985{
1986        return skb->protocol == htons(ETH_P_ARP);
1987}
1988
1989/**
1990 *      rx_eth - process an ingress ethernet packet
1991 *      @adap: the adapter
1992 *      @rq: the response queue that received the packet
1993 *      @skb: the packet
1994 *      @pad: amount of padding at the start of the buffer
1995 *
1996 *      Process an ingress ethernet pakcet and deliver it to the stack.
1997 *      The padding is 2 if the packet was delivered in an Rx buffer and 0
1998 *      if it was immediate data in a response.
1999 */
2000static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
2001                   struct sk_buff *skb, int pad, int lro)
2002{
2003        struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
2004        struct sge_qset *qs = rspq_to_qset(rq);
2005        struct port_info *pi;
2006
2007        skb_pull(skb, sizeof(*p) + pad);
2008        skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
2009        pi = netdev_priv(skb->dev);
2010        if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid &&
2011            p->csum == htons(0xffff) && !p->fragment) {
2012                qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2013                skb->ip_summed = CHECKSUM_UNNECESSARY;
2014        } else
2015                skb->ip_summed = CHECKSUM_NONE;
2016        skb_record_rx_queue(skb, qs - &adap->sge.qs[0]);
2017
2018        if (unlikely(p->vlan_valid)) {
2019                struct vlan_group *grp = pi->vlan_grp;
2020
2021                qs->port_stats[SGE_PSTAT_VLANEX]++;
2022                if (likely(grp))
2023                        if (lro)
2024                                vlan_gro_receive(&qs->napi, grp,
2025                                                 ntohs(p->vlan), skb);
2026                        else {
2027                                if (unlikely(pi->iscsi_ipv4addr &&
2028                                    is_arp(skb))) {
2029                                        unsigned short vtag = ntohs(p->vlan) &
2030                                                                VLAN_VID_MASK;
2031                                        skb->dev = vlan_group_get_device(grp,
2032                                                                         vtag);
2033                                        cxgb3_arp_process(adap, skb);
2034                                }
2035                                __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
2036                                                  rq->polling);
2037                        }
2038                else
2039                        dev_kfree_skb_any(skb);
2040        } else if (rq->polling) {
2041                if (lro)
2042                        napi_gro_receive(&qs->napi, skb);
2043                else {
2044                        if (unlikely(pi->iscsi_ipv4addr && is_arp(skb)))
2045                                cxgb3_arp_process(adap, skb);
2046                        netif_receive_skb(skb);
2047                }
2048        } else
2049                netif_rx(skb);
2050}
2051
2052static inline int is_eth_tcp(u32 rss)
2053{
2054        return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE;
2055}
2056
2057/**
2058 *      lro_add_page - add a page chunk to an LRO session
2059 *      @adap: the adapter
2060 *      @qs: the associated queue set
2061 *      @fl: the free list containing the page chunk to add
2062 *      @len: packet length
2063 *      @complete: Indicates the last fragment of a frame
2064 *
2065 *      Add a received packet contained in a page chunk to an existing LRO
2066 *      session.
2067 */
2068static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2069                         struct sge_fl *fl, int len, int complete)
2070{
2071        struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2072        struct sk_buff *skb = NULL;
2073        struct cpl_rx_pkt *cpl;
2074        struct skb_frag_struct *rx_frag;
2075        int nr_frags;
2076        int offset = 0;
2077
2078        if (!qs->nomem) {
2079                skb = napi_get_frags(&qs->napi);
2080                qs->nomem = !skb;
2081        }
2082
2083        fl->credits--;
2084
2085        pci_dma_sync_single_for_cpu(adap->pdev,
2086                                    pci_unmap_addr(sd, dma_addr),
2087                                    fl->buf_size - SGE_PG_RSVD,
2088                                    PCI_DMA_FROMDEVICE);
2089
2090        (*sd->pg_chunk.p_cnt)--;
2091        if (!*sd->pg_chunk.p_cnt && sd->pg_chunk.page != fl->pg_chunk.page)
2092                pci_unmap_page(adap->pdev,
2093                               sd->pg_chunk.mapping,
2094                               fl->alloc_size,
2095                               PCI_DMA_FROMDEVICE);
2096
2097        if (!skb) {
2098                put_page(sd->pg_chunk.page);
2099                if (complete)
2100                        qs->nomem = 0;
2101                return;
2102        }
2103
2104        rx_frag = skb_shinfo(skb)->frags;
2105        nr_frags = skb_shinfo(skb)->nr_frags;
2106
2107        if (!nr_frags) {
2108                offset = 2 + sizeof(struct cpl_rx_pkt);
2109                qs->lro_va = sd->pg_chunk.va + 2;
2110        }
2111        len -= offset;
2112
2113        prefetch(qs->lro_va);
2114
2115        rx_frag += nr_frags;
2116        rx_frag->page = sd->pg_chunk.page;
2117        rx_frag->page_offset = sd->pg_chunk.offset + offset;
2118        rx_frag->size = len;
2119
2120        skb->len += len;
2121        skb->data_len += len;
2122        skb->truesize += len;
2123        skb_shinfo(skb)->nr_frags++;
2124
2125        if (!complete)
2126                return;
2127
2128        skb->ip_summed = CHECKSUM_UNNECESSARY;
2129        cpl = qs->lro_va;
2130
2131        if (unlikely(cpl->vlan_valid)) {
2132                struct net_device *dev = qs->netdev;
2133                struct port_info *pi = netdev_priv(dev);
2134                struct vlan_group *grp = pi->vlan_grp;
2135
2136                if (likely(grp != NULL)) {
2137                        vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan));
2138                        return;
2139                }
2140        }
2141        napi_gro_frags(&qs->napi);
2142}
2143
2144/**
2145 *      handle_rsp_cntrl_info - handles control information in a response
2146 *      @qs: the queue set corresponding to the response
2147 *      @flags: the response control flags
2148 *
2149 *      Handles the control information of an SGE response, such as GTS
2150 *      indications and completion credits for the queue set's Tx queues.
2151 *      HW coalesces credits, we don't do any extra SW coalescing.
2152 */
2153static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
2154{
2155        unsigned int credits;
2156
2157#if USE_GTS
2158        if (flags & F_RSPD_TXQ0_GTS)
2159                clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2160#endif
2161
2162        credits = G_RSPD_TXQ0_CR(flags);
2163        if (credits)
2164                qs->txq[TXQ_ETH].processed += credits;
2165
2166        credits = G_RSPD_TXQ2_CR(flags);
2167        if (credits)
2168                qs->txq[TXQ_CTRL].processed += credits;
2169
2170# if USE_GTS
2171        if (flags & F_RSPD_TXQ1_GTS)
2172                clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2173# endif
2174        credits = G_RSPD_TXQ1_CR(flags);
2175        if (credits)
2176                qs->txq[TXQ_OFLD].processed += credits;
2177}
2178
2179/**
2180 *      check_ring_db - check if we need to ring any doorbells
2181 *      @adapter: the adapter
2182 *      @qs: the queue set whose Tx queues are to be examined
2183 *      @sleeping: indicates which Tx queue sent GTS
2184 *
2185 *      Checks if some of a queue set's Tx queues need to ring their doorbells
2186 *      to resume transmission after idling while they still have unprocessed
2187 *      descriptors.
2188 */
2189static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
2190                          unsigned int sleeping)
2191{
2192        if (sleeping & F_RSPD_TXQ0_GTS) {
2193                struct sge_txq *txq = &qs->txq[TXQ_ETH];
2194
2195                if (txq->cleaned + txq->in_use != txq->processed &&
2196                    !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2197                        set_bit(TXQ_RUNNING, &txq->flags);
2198                        t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2199                                     V_EGRCNTX(txq->cntxt_id));
2200                }
2201        }
2202
2203        if (sleeping & F_RSPD_TXQ1_GTS) {
2204                struct sge_txq *txq = &qs->txq[TXQ_OFLD];
2205
2206                if (txq->cleaned + txq->in_use != txq->processed &&
2207                    !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2208                        set_bit(TXQ_RUNNING, &txq->flags);
2209                        t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2210                                     V_EGRCNTX(txq->cntxt_id));
2211                }
2212        }
2213}
2214
2215/**
2216 *      is_new_response - check if a response is newly written
2217 *      @r: the response descriptor
2218 *      @q: the response queue
2219 *
2220 *      Returns true if a response descriptor contains a yet unprocessed
2221 *      response.
2222 */
2223static inline int is_new_response(const struct rsp_desc *r,
2224                                  const struct sge_rspq *q)
2225{
2226        return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2227}
2228
2229static inline void clear_rspq_bufstate(struct sge_rspq * const q)
2230{
2231        q->pg_skb = NULL;
2232        q->rx_recycle_buf = 0;
2233}
2234
2235#define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2236#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2237                        V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2238                        V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2239                        V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2240
2241/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2242#define NOMEM_INTR_DELAY 2500
2243
2244/**
2245 *      process_responses - process responses from an SGE response queue
2246 *      @adap: the adapter
2247 *      @qs: the queue set to which the response queue belongs
2248 *      @budget: how many responses can be processed in this round
2249 *
2250 *      Process responses from an SGE response queue up to the supplied budget.
2251 *      Responses include received packets as well as credits and other events
2252 *      for the queues that belong to the response queue's queue set.
2253 *      A negative budget is effectively unlimited.
2254 *
2255 *      Additionally choose the interrupt holdoff time for the next interrupt
2256 *      on this queue.  If the system is under memory shortage use a fairly
2257 *      long delay to help recovery.
2258 */
2259static int process_responses(struct adapter *adap, struct sge_qset *qs,
2260                             int budget)
2261{
2262        struct sge_rspq *q = &qs->rspq;
2263        struct rsp_desc *r = &q->desc[q->cidx];
2264        int budget_left = budget;
2265        unsigned int sleeping = 0;
2266        struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2267        int ngathered = 0;
2268
2269        q->next_holdoff = q->holdoff_tmr;
2270
2271        while (likely(budget_left && is_new_response(r, q))) {
2272                int packet_complete, eth, ethpad = 2, lro = qs->lro_enabled;
2273                struct sk_buff *skb = NULL;
2274                u32 len, flags = ntohl(r->flags);
2275                __be32 rss_hi = *(const __be32 *)r,
2276                       rss_lo = r->rss_hdr.rss_hash_val;
2277
2278                eth = r->rss_hdr.opcode == CPL_RX_PKT;
2279
2280                if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2281                        skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2282                        if (!skb)
2283                                goto no_mem;
2284
2285                        memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
2286                        skb->data[0] = CPL_ASYNC_NOTIF;
2287                        rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2288                        q->async_notif++;
2289                } else if (flags & F_RSPD_IMM_DATA_VALID) {
2290                        skb = get_imm_packet(r);
2291                        if (unlikely(!skb)) {
2292no_mem:
2293                                q->next_holdoff = NOMEM_INTR_DELAY;
2294                                q->nomem++;
2295                                /* consume one credit since we tried */
2296                                budget_left--;
2297                                break;
2298                        }
2299                        q->imm_data++;
2300                        ethpad = 0;
2301                } else if ((len = ntohl(r->len_cq)) != 0) {
2302                        struct sge_fl *fl;
2303
2304                        lro &= eth && is_eth_tcp(rss_hi);
2305
2306                        fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2307                        if (fl->use_pages) {
2308                                void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
2309
2310                                prefetch(addr);
2311#if L1_CACHE_BYTES < 128
2312                                prefetch(addr + L1_CACHE_BYTES);
2313#endif
2314                                __refill_fl(adap, fl);
2315                                if (lro > 0) {
2316                                        lro_add_page(adap, qs, fl,
2317                                                     G_RSPD_LEN(len),
2318                                                     flags & F_RSPD_EOP);
2319                                         goto next_fl;
2320                                }
2321
2322                                skb = get_packet_pg(adap, fl, q,
2323                                                    G_RSPD_LEN(len),
2324                                                    eth ?
2325                                                    SGE_RX_DROP_THRES : 0);
2326                                q->pg_skb = skb;
2327                        } else
2328                                skb = get_packet(adap, fl, G_RSPD_LEN(len),
2329                                                 eth ? SGE_RX_DROP_THRES : 0);
2330                        if (unlikely(!skb)) {
2331                                if (!eth)
2332                                        goto no_mem;
2333                                q->rx_drops++;
2334                        } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2335                                __skb_pull(skb, 2);
2336next_fl:
2337                        if (++fl->cidx == fl->size)
2338                                fl->cidx = 0;
2339                } else
2340                        q->pure_rsps++;
2341
2342                if (flags & RSPD_CTRL_MASK) {
2343                        sleeping |= flags & RSPD_GTS_MASK;
2344                        handle_rsp_cntrl_info(qs, flags);
2345                }
2346
2347                r++;
2348                if (unlikely(++q->cidx == q->size)) {
2349                        q->cidx = 0;
2350                        q->gen ^= 1;
2351                        r = q->desc;
2352                }
2353                prefetch(r);
2354
2355                if (++q->credits >= (q->size / 4)) {
2356                        refill_rspq(adap, q, q->credits);
2357                        q->credits = 0;
2358                }
2359
2360                packet_complete = flags &
2361                                  (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID |
2362                                   F_RSPD_ASYNC_NOTIF);
2363
2364                if (skb != NULL && packet_complete) {
2365                        if (eth)
2366                                rx_eth(adap, q, skb, ethpad, lro);
2367                        else {
2368                                q->offload_pkts++;
2369                                /* Preserve the RSS info in csum & priority */
2370                                skb->csum = rss_hi;
2371                                skb->priority = rss_lo;
2372                                ngathered = rx_offload(&adap->tdev, q, skb,
2373                                                       offload_skbs,
2374                                                       ngathered);
2375                        }
2376
2377                        if (flags & F_RSPD_EOP)
2378                                clear_rspq_bufstate(q);
2379                }
2380                --budget_left;
2381        }
2382
2383        deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
2384
2385        if (sleeping)
2386                check_ring_db(adap, qs, sleeping);
2387
2388        smp_mb();               /* commit Tx queue .processed updates */
2389        if (unlikely(qs->txq_stopped != 0))
2390                restart_tx(qs);
2391
2392        budget -= budget_left;
2393        return budget;
2394}
2395
2396static inline int is_pure_response(const struct rsp_desc *r)
2397{
2398        __be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
2399
2400        return (n | r->len_cq) == 0;
2401}
2402
2403/**
2404 *      napi_rx_handler - the NAPI handler for Rx processing
2405 *      @napi: the napi instance
2406 *      @budget: how many packets we can process in this round
2407 *
2408 *      Handler for new data events when using NAPI.
2409 */
2410static int napi_rx_handler(struct napi_struct *napi, int budget)
2411{
2412        struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2413        struct adapter *adap = qs->adap;
2414        int work_done = process_responses(adap, qs, budget);
2415
2416        if (likely(work_done < budget)) {
2417                napi_complete(napi);
2418
2419                /*
2420                 * Because we don't atomically flush the following
2421                 * write it is possible that in very rare cases it can
2422                 * reach the device in a way that races with a new
2423                 * response being written plus an error interrupt
2424                 * causing the NAPI interrupt handler below to return
2425                 * unhandled status to the OS.  To protect against
2426                 * this would require flushing the write and doing
2427                 * both the write and the flush with interrupts off.
2428                 * Way too expensive and unjustifiable given the
2429                 * rarity of the race.
2430                 *
2431                 * The race cannot happen at all with MSI-X.
2432                 */
2433                t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2434                             V_NEWTIMER(qs->rspq.next_holdoff) |
2435                             V_NEWINDEX(qs->rspq.cidx));
2436        }
2437        return work_done;
2438}
2439
2440/*
2441 * Returns true if the device is already scheduled for polling.
2442 */
2443static inline int napi_is_scheduled(struct napi_struct *napi)
2444{
2445        return test_bit(NAPI_STATE_SCHED, &napi->state);
2446}
2447
2448/**
2449 *      process_pure_responses - process pure responses from a response queue
2450 *      @adap: the adapter
2451 *      @qs: the queue set owning the response queue
2452 *      @r: the first pure response to process
2453 *
2454 *      A simpler version of process_responses() that handles only pure (i.e.,
2455 *      non data-carrying) responses.  Such respones are too light-weight to
2456 *      justify calling a softirq under NAPI, so we handle them specially in
2457 *      the interrupt handler.  The function is called with a pointer to a
2458 *      response, which the caller must ensure is a valid pure response.
2459 *
2460 *      Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2461 */
2462static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2463                                  struct rsp_desc *r)
2464{
2465        struct sge_rspq *q = &qs->rspq;
2466        unsigned int sleeping = 0;
2467
2468        do {
2469                u32 flags = ntohl(r->flags);
2470
2471                r++;
2472                if (unlikely(++q->cidx == q->size)) {
2473                        q->cidx = 0;
2474                        q->gen ^= 1;
2475                        r = q->desc;
2476                }
2477                prefetch(r);
2478
2479                if (flags & RSPD_CTRL_MASK) {
2480                        sleeping |= flags & RSPD_GTS_MASK;
2481                        handle_rsp_cntrl_info(qs, flags);
2482                }
2483
2484                q->pure_rsps++;
2485                if (++q->credits >= (q->size / 4)) {
2486                        refill_rspq(adap, q, q->credits);
2487                        q->credits = 0;
2488                }
2489        } while (is_new_response(r, q) && is_pure_response(r));
2490
2491        if (sleeping)
2492                check_ring_db(adap, qs, sleeping);
2493
2494        smp_mb();               /* commit Tx queue .processed updates */
2495        if (unlikely(qs->txq_stopped != 0))
2496                restart_tx(qs);
2497
2498        return is_new_response(r, q);
2499}
2500
2501/**
2502 *      handle_responses - decide what to do with new responses in NAPI mode
2503 *      @adap: the adapter
2504 *      @q: the response queue
2505 *
2506 *      This is used by the NAPI interrupt handlers to decide what to do with
2507 *      new SGE responses.  If there are no new responses it returns -1.  If
2508 *      there are new responses and they are pure (i.e., non-data carrying)
2509 *      it handles them straight in hard interrupt context as they are very
2510 *      cheap and don't deliver any packets.  Finally, if there are any data
2511 *      signaling responses it schedules the NAPI handler.  Returns 1 if it
2512 *      schedules NAPI, 0 if all new responses were pure.
2513 *
2514 *      The caller must ascertain NAPI is not already running.
2515 */
2516static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2517{
2518        struct sge_qset *qs = rspq_to_qset(q);
2519        struct rsp_desc *r = &q->desc[q->cidx];
2520
2521        if (!is_new_response(r, q))
2522                return -1;
2523        if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2524                t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2525                             V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2526                return 0;
2527        }
2528        napi_schedule(&qs->napi);
2529        return 1;
2530}
2531
2532/*
2533 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2534 * (i.e., response queue serviced in hard interrupt).
2535 */
2536irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2537{
2538        struct sge_qset *qs = cookie;
2539        struct adapter *adap = qs->adap;
2540        struct sge_rspq *q = &qs->rspq;
2541
2542        spin_lock(&q->lock);
2543        if (process_responses(adap, qs, -1) == 0)
2544                q->unhandled_irqs++;
2545        t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2546                     V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2547        spin_unlock(&q->lock);
2548        return IRQ_HANDLED;
2549}
2550
2551/*
2552 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2553 * (i.e., response queue serviced by NAPI polling).
2554 */
2555static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
2556{
2557        struct sge_qset *qs = cookie;
2558        struct sge_rspq *q = &qs->rspq;
2559
2560        spin_lock(&q->lock);
2561
2562        if (handle_responses(qs->adap, q) < 0)
2563                q->unhandled_irqs++;
2564        spin_unlock(&q->lock);
2565        return IRQ_HANDLED;
2566}
2567
2568/*
2569 * The non-NAPI MSI interrupt handler.  This needs to handle data events from
2570 * SGE response queues as well as error and other async events as they all use
2571 * the same MSI vector.  We use one SGE response queue per port in this mode
2572 * and protect all response queues with queue 0's lock.
2573 */
2574static irqreturn_t t3_intr_msi(int irq, void *cookie)
2575{
2576        int new_packets = 0;
2577        struct adapter *adap = cookie;
2578        struct sge_rspq *q = &adap->sge.qs[0].rspq;
2579
2580        spin_lock(&q->lock);
2581
2582        if (process_responses(adap, &adap->sge.qs[0], -1)) {
2583                t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2584                             V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2585                new_packets = 1;
2586        }
2587
2588        if (adap->params.nports == 2 &&
2589            process_responses(adap, &adap->sge.qs[1], -1)) {
2590                struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2591
2592                t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2593                             V_NEWTIMER(q1->next_holdoff) |
2594                             V_NEWINDEX(q1->cidx));
2595                new_packets = 1;
2596        }
2597
2598        if (!new_packets && t3_slow_intr_handler(adap) == 0)
2599                q->unhandled_irqs++;
2600
2601        spin_unlock(&q->lock);
2602        return IRQ_HANDLED;
2603}
2604
2605static int rspq_check_napi(struct sge_qset *qs)
2606{
2607        struct sge_rspq *q = &qs->rspq;
2608
2609        if (!napi_is_scheduled(&qs->napi) &&
2610            is_new_response(&q->desc[q->cidx], q)) {
2611                napi_schedule(&qs->napi);
2612                return 1;
2613        }
2614        return 0;
2615}
2616
2617/*
2618 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2619 * by NAPI polling).  Handles data events from SGE response queues as well as
2620 * error and other async events as they all use the same MSI vector.  We use
2621 * one SGE response queue per port in this mode and protect all response
2622 * queues with queue 0's lock.
2623 */
2624static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
2625{
2626        int new_packets;
2627        struct adapter *adap = cookie;
2628        struct sge_rspq *q = &adap->sge.qs[0].rspq;
2629
2630        spin_lock(&q->lock);
2631
2632        new_packets = rspq_check_napi(&adap->sge.qs[0]);
2633        if (adap->params.nports == 2)
2634                new_packets += rspq_check_napi(&adap->sge.qs[1]);
2635        if (!new_packets && t3_slow_intr_handler(adap) == 0)
2636                q->unhandled_irqs++;
2637
2638        spin_unlock(&q->lock);
2639        return IRQ_HANDLED;
2640}
2641
2642/*
2643 * A helper function that processes responses and issues GTS.
2644 */
2645static inline int process_responses_gts(struct adapter *adap,
2646                                        struct sge_rspq *rq)
2647{
2648        int work;
2649
2650        work = process_responses(adap, rspq_to_qset(rq), -1);
2651        t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2652                     V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2653        return work;
2654}
2655
2656/*
2657 * The legacy INTx interrupt handler.  This needs to handle data events from
2658 * SGE response queues as well as error and other async events as they all use
2659 * the same interrupt pin.  We use one SGE response queue per port in this mode
2660 * and protect all response queues with queue 0's lock.
2661 */
2662static irqreturn_t t3_intr(int irq, void *cookie)
2663{
2664        int work_done, w0, w1;
2665        struct adapter *adap = cookie;
2666        struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2667        struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2668
2669        spin_lock(&q0->lock);
2670
2671        w0 = is_new_response(&q0->desc[q0->cidx], q0);
2672        w1 = adap->params.nports == 2 &&
2673            is_new_response(&q1->desc[q1->cidx], q1);
2674
2675        if (likely(w0 | w1)) {
2676                t3_write_reg(adap, A_PL_CLI, 0);
2677                t3_read_reg(adap, A_PL_CLI);    /* flush */
2678
2679                if (likely(w0))
2680                        process_responses_gts(adap, q0);
2681
2682                if (w1)
2683                        process_responses_gts(adap, q1);
2684
2685                work_done = w0 | w1;
2686        } else
2687                work_done = t3_slow_intr_handler(adap);
2688
2689        spin_unlock(&q0->lock);
2690        return IRQ_RETVAL(work_done != 0);
2691}
2692
2693/*
2694 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2695 * Handles data events from SGE response queues as well as error and other
2696 * async events as they all use the same interrupt pin.  We use one SGE
2697 * response queue per port in this mode and protect all response queues with
2698 * queue 0's lock.
2699 */
2700static irqreturn_t t3b_intr(int irq, void *cookie)
2701{
2702        u32 map;
2703        struct adapter *adap = cookie;
2704        struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2705
2706        t3_write_reg(adap, A_PL_CLI, 0);
2707        map = t3_read_reg(adap, A_SG_DATA_INTR);
2708
2709        if (unlikely(!map))     /* shared interrupt, most likely */
2710                return IRQ_NONE;
2711
2712        spin_lock(&q0->lock);
2713
2714        if (unlikely(map & F_ERRINTR))
2715                t3_slow_intr_handler(adap);
2716
2717        if (likely(map & 1))
2718                process_responses_gts(adap, q0);
2719
2720        if (map & 2)
2721                process_responses_gts(adap, &adap->sge.qs[1].rspq);
2722
2723        spin_unlock(&q0->lock);
2724        return IRQ_HANDLED;
2725}
2726
2727/*
2728 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2729 * Handles data events from SGE response queues as well as error and other
2730 * async events as they all use the same interrupt pin.  We use one SGE
2731 * response queue per port in this mode and protect all response queues with
2732 * queue 0's lock.
2733 */
2734static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2735{
2736        u32 map;
2737        struct adapter *adap = cookie;
2738        struct sge_qset *qs0 = &adap->sge.qs[0];
2739        struct sge_rspq *q0 = &qs0->rspq;
2740
2741        t3_write_reg(adap, A_PL_CLI, 0);
2742        map = t3_read_reg(adap, A_SG_DATA_INTR);
2743
2744        if (unlikely(!map))     /* shared interrupt, most likely */
2745                return IRQ_NONE;
2746
2747        spin_lock(&q0->lock);
2748
2749        if (unlikely(map & F_ERRINTR))
2750                t3_slow_intr_handler(adap);
2751
2752        if (likely(map & 1))
2753                napi_schedule(&qs0->napi);
2754
2755        if (map & 2)
2756                napi_schedule(&adap->sge.qs[1].napi);
2757
2758        spin_unlock(&q0->lock);
2759        return IRQ_HANDLED;
2760}
2761
2762/**
2763 *      t3_intr_handler - select the top-level interrupt handler
2764 *      @adap: the adapter
2765 *      @polling: whether using NAPI to service response queues
2766 *
2767 *      Selects the top-level interrupt handler based on the type of interrupts
2768 *      (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2769 *      response queues.
2770 */
2771irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
2772{
2773        if (adap->flags & USING_MSIX)
2774                return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2775        if (adap->flags & USING_MSI)
2776                return polling ? t3_intr_msi_napi : t3_intr_msi;
2777        if (adap->params.rev > 0)
2778                return polling ? t3b_intr_napi : t3b_intr;
2779        return t3_intr;
2780}
2781
2782#define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2783                    F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2784                    V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2785                    F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2786                    F_HIRCQPARITYERROR)
2787#define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2788#define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2789                      F_RSPQDISABLED)
2790
2791/**
2792 *      t3_sge_err_intr_handler - SGE async event interrupt handler
2793 *      @adapter: the adapter
2794 *
2795 *      Interrupt handler for SGE asynchronous (non-data) events.
2796 */
2797void t3_sge_err_intr_handler(struct adapter *adapter)
2798{
2799        unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE) &
2800                                 ~F_FLEMPTY;
2801
2802        if (status & SGE_PARERR)
2803                CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2804                         status & SGE_PARERR);
2805        if (status & SGE_FRAMINGERR)
2806                CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2807                         status & SGE_FRAMINGERR);
2808
2809        if (status & F_RSPQCREDITOVERFOW)
2810                CH_ALERT(adapter, "SGE response queue credit overflow\n");
2811
2812        if (status & F_RSPQDISABLED) {
2813                v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2814
2815                CH_ALERT(adapter,
2816                         "packet delivered to disabled response queue "
2817                         "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2818        }
2819
2820        if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2821                CH_ALERT(adapter, "SGE dropped %s priority doorbell\n",
2822                         status & F_HIPIODRBDROPERR ? "high" : "lo");
2823
2824        t3_write_reg(adapter, A_SG_INT_CAUSE, status);
2825        if (status &  SGE_FATALERR)
2826                t3_fatal_err(adapter);
2827}
2828
2829/**
2830 *      sge_timer_tx - perform periodic maintenance of an SGE qset
2831 *      @data: the SGE queue set to maintain
2832 *
2833 *      Runs periodically from a timer to perform maintenance of an SGE queue
2834 *      set.  It performs two tasks:
2835 *
2836 *      Cleans up any completed Tx descriptors that may still be pending.
2837 *      Normal descriptor cleanup happens when new packets are added to a Tx
2838 *      queue so this timer is relatively infrequent and does any cleanup only
2839 *      if the Tx queue has not seen any new packets in a while.  We make a
2840 *      best effort attempt to reclaim descriptors, in that we don't wait
2841 *      around if we cannot get a queue's lock (which most likely is because
2842 *      someone else is queueing new packets and so will also handle the clean
2843 *      up).  Since control queues use immediate data exclusively we don't
2844 *      bother cleaning them up here.
2845 *
2846 */
2847static void sge_timer_tx(unsigned long data)
2848{
2849        struct sge_qset *qs = (struct sge_qset *)data;
2850        struct port_info *pi = netdev_priv(qs->netdev);
2851        struct adapter *adap = pi->adapter;
2852        unsigned int tbd[SGE_TXQ_PER_SET] = {0, 0};
2853        unsigned long next_period;
2854
2855        if (__netif_tx_trylock(qs->tx_q)) {
2856                tbd[TXQ_ETH] = reclaim_completed_tx(adap, &qs->txq[TXQ_ETH],
2857                                                     TX_RECLAIM_TIMER_CHUNK);
2858                __netif_tx_unlock(qs->tx_q);
2859        }
2860
2861        if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2862                tbd[TXQ_OFLD] = reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD],
2863                                                     TX_RECLAIM_TIMER_CHUNK);
2864                spin_unlock(&qs->txq[TXQ_OFLD].lock);
2865        }
2866
2867        next_period = TX_RECLAIM_PERIOD >>
2868                      (max(tbd[TXQ_ETH], tbd[TXQ_OFLD]) /
2869                      TX_RECLAIM_TIMER_CHUNK);
2870        mod_timer(&qs->tx_reclaim_timer, jiffies + next_period);
2871}
2872
2873/*
2874 *      sge_timer_rx - perform periodic maintenance of an SGE qset
2875 *      @data: the SGE queue set to maintain
2876 *
2877 *      a) Replenishes Rx queues that have run out due to memory shortage.
2878 *      Normally new Rx buffers are added when existing ones are consumed but
2879 *      when out of memory a queue can become empty.  We try to add only a few
2880 *      buffers here, the queue will be replenished fully as these new buffers
2881 *      are used up if memory shortage has subsided.
2882 *
2883 *      b) Return coalesced response queue credits in case a response queue is
2884 *      starved.
2885 *
2886 */
2887static void sge_timer_rx(unsigned long data)
2888{
2889        spinlock_t *lock;
2890        struct sge_qset *qs = (struct sge_qset *)data;
2891        struct port_info *pi = netdev_priv(qs->netdev);
2892        struct adapter *adap = pi->adapter;
2893        u32 status;
2894
2895        lock = adap->params.rev > 0 ?
2896               &qs->rspq.lock : &adap->sge.qs[0].rspq.lock;
2897
2898        if (!spin_trylock_irq(lock))
2899                goto out;
2900
2901        if (napi_is_scheduled(&qs->napi))
2902                goto unlock;
2903
2904        if (adap->params.rev < 4) {
2905                status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2906
2907                if (status & (1 << qs->rspq.cntxt_id)) {
2908                        qs->rspq.starved++;
2909                        if (qs->rspq.credits) {
2910                                qs->rspq.credits--;
2911                                refill_rspq(adap, &qs->rspq, 1);
2912                                qs->rspq.restarted++;
2913                                t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
2914                                             1 << qs->rspq.cntxt_id);
2915                        }
2916                }
2917        }
2918
2919        if (qs->fl[0].credits < qs->fl[0].size)
2920                __refill_fl(adap, &qs->fl[0]);
2921        if (qs->fl[1].credits < qs->fl[1].size)
2922                __refill_fl(adap, &qs->fl[1]);
2923
2924unlock:
2925        spin_unlock_irq(lock);
2926out:
2927        mod_timer(&qs->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD);
2928}
2929
2930/**
2931 *      t3_update_qset_coalesce - update coalescing settings for a queue set
2932 *      @qs: the SGE queue set
2933 *      @p: new queue set parameters
2934 *
2935 *      Update the coalescing settings for an SGE queue set.  Nothing is done
2936 *      if the queue set is not initialized yet.
2937 */
2938void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2939{
2940        qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2941        qs->rspq.polling = p->polling;
2942        qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
2943}
2944
2945/**
2946 *      t3_sge_alloc_qset - initialize an SGE queue set
2947 *      @adapter: the adapter
2948 *      @id: the queue set id
2949 *      @nports: how many Ethernet ports will be using this queue set
2950 *      @irq_vec_idx: the IRQ vector index for response queue interrupts
2951 *      @p: configuration parameters for this queue set
2952 *      @ntxq: number of Tx queues for the queue set
2953 *      @netdev: net device associated with this queue set
2954 *      @netdevq: net device TX queue associated with this queue set
2955 *
2956 *      Allocate resources and initialize an SGE queue set.  A queue set
2957 *      comprises a response queue, two Rx free-buffer queues, and up to 3
2958 *      Tx queues.  The Tx queues are assigned roles in the order Ethernet
2959 *      queue, offload queue, and control queue.
2960 */
2961int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2962                      int irq_vec_idx, const struct qset_params *p,
2963                      int ntxq, struct net_device *dev,
2964                      struct netdev_queue *netdevq)
2965{
2966        int i, avail, ret = -ENOMEM;
2967        struct sge_qset *q = &adapter->sge.qs[id];
2968
2969        init_qset_cntxt(q, id);
2970        setup_timer(&q->tx_reclaim_timer, sge_timer_tx, (unsigned long)q);
2971        setup_timer(&q->rx_reclaim_timer, sge_timer_rx, (unsigned long)q);
2972
2973        q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2974                                   sizeof(struct rx_desc),
2975                                   sizeof(struct rx_sw_desc),
2976                                   &q->fl[0].phys_addr, &q->fl[0].sdesc);
2977        if (!q->fl[0].desc)
2978                goto err;
2979
2980        q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2981                                   sizeof(struct rx_desc),
2982                                   sizeof(struct rx_sw_desc),
2983                                   &q->fl[1].phys_addr, &q->fl[1].sdesc);
2984        if (!q->fl[1].desc)
2985                goto err;
2986
2987        q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2988                                  sizeof(struct rsp_desc), 0,
2989                                  &q->rspq.phys_addr, NULL);
2990        if (!q->rspq.desc)
2991                goto err;
2992
2993        for (i = 0; i < ntxq; ++i) {
2994                /*
2995                 * The control queue always uses immediate data so does not
2996                 * need to keep track of any sk_buffs.
2997                 */
2998                size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2999
3000                q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
3001                                            sizeof(struct tx_desc), sz,
3002                                            &q->txq[i].phys_addr,
3003                                            &q->txq[i].sdesc);
3004                if (!q->txq[i].desc)
3005                        goto err;
3006
3007                q->txq[i].gen = 1;
3008                q->txq[i].size = p->txq_size[i];
3009                spin_lock_init(&q->txq[i].lock);
3010                skb_queue_head_init(&q->txq[i].sendq);
3011        }
3012
3013        tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
3014                     (unsigned long)q);
3015        tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
3016                     (unsigned long)q);
3017
3018        q->fl[0].gen = q->fl[1].gen = 1;
3019        q->fl[0].size = p->fl_size;
3020        q->fl[1].size = p->jumbo_size;
3021
3022        q->rspq.gen = 1;
3023        q->rspq.size = p->rspq_size;
3024        spin_lock_init(&q->rspq.lock);
3025        skb_queue_head_init(&q->rspq.rx_queue);
3026
3027        q->txq[TXQ_ETH].stop_thres = nports *
3028            flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
3029
3030#if FL0_PG_CHUNK_SIZE > 0
3031        q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
3032#else
3033        q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
3034#endif
3035#if FL1_PG_CHUNK_SIZE > 0
3036        q->fl[1].buf_size = FL1_PG_CHUNK_SIZE;
3037#else
3038        q->fl[1].buf_size = is_offload(adapter) ?
3039                (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
3040                MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
3041#endif
3042
3043        q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
3044        q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
3045        q->fl[0].order = FL0_PG_ORDER;
3046        q->fl[1].order = FL1_PG_ORDER;
3047        q->fl[0].alloc_size = FL0_PG_ALLOC_SIZE;
3048        q->fl[1].alloc_size = FL1_PG_ALLOC_SIZE;
3049
3050        spin_lock_irq(&adapter->sge.reg_lock);
3051
3052        /* FL threshold comparison uses < */
3053        ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
3054                                   q->rspq.phys_addr, q->rspq.size,
3055                                   q->fl[0].buf_size - SGE_PG_RSVD, 1, 0);
3056        if (ret)
3057                goto err_unlock;
3058
3059        for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
3060                ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
3061                                          q->fl[i].phys_addr, q->fl[i].size,
3062                                          q->fl[i].buf_size - SGE_PG_RSVD,
3063                                          p->cong_thres, 1, 0);
3064                if (ret)
3065                        goto err_unlock;
3066        }
3067
3068        ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
3069                                 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
3070                                 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
3071                                 1, 0);
3072        if (ret)
3073                goto err_unlock;
3074
3075        if (ntxq > 1) {
3076                ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
3077                                         USE_GTS, SGE_CNTXT_OFLD, id,
3078                                         q->txq[TXQ_OFLD].phys_addr,
3079                                         q->txq[TXQ_OFLD].size, 0, 1, 0);
3080                if (ret)
3081                        goto err_unlock;
3082        }
3083
3084        if (ntxq > 2) {
3085                ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
3086                                         SGE_CNTXT_CTRL, id,
3087                                         q->txq[TXQ_CTRL].phys_addr,
3088                                         q->txq[TXQ_CTRL].size,
3089                                         q->txq[TXQ_CTRL].token, 1, 0);
3090                if (ret)
3091                        goto err_unlock;
3092        }
3093
3094        spin_unlock_irq(&adapter->sge.reg_lock);
3095
3096        q->adap = adapter;
3097        q->netdev = dev;
3098        q->tx_q = netdevq;
3099        t3_update_qset_coalesce(q, p);
3100
3101        avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,
3102                          GFP_KERNEL | __GFP_COMP);
3103        if (!avail) {
3104                CH_ALERT(adapter, "free list queue 0 initialization failed\n");
3105                goto err;
3106        }
3107        if (avail < q->fl[0].size)
3108                CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
3109                        avail);
3110
3111        avail = refill_fl(adapter, &q->fl[1], q->fl[1].size,
3112                          GFP_KERNEL | __GFP_COMP);
3113        if (avail < q->fl[1].size)
3114                CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
3115                        avail);
3116        refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
3117
3118        t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
3119                     V_NEWTIMER(q->rspq.holdoff_tmr));
3120
3121        return 0;
3122
3123err_unlock:
3124        spin_unlock_irq(&adapter->sge.reg_lock);
3125err:
3126        t3_free_qset(adapter, q);
3127        return ret;
3128}
3129
3130/**
3131 *      t3_start_sge_timers - start SGE timer call backs
3132 *      @adap: the adapter
3133 *
3134 *      Starts each SGE queue set's timer call back
3135 */
3136void t3_start_sge_timers(struct adapter *adap)
3137{
3138        int i;
3139
3140        for (i = 0; i < SGE_QSETS; ++i) {
3141                struct sge_qset *q = &adap->sge.qs[i];
3142
3143        if (q->tx_reclaim_timer.function)
3144                mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
3145
3146        if (q->rx_reclaim_timer.function)
3147                mod_timer(&q->rx_reclaim_timer, jiffies + RX_RECLAIM_PERIOD);
3148        }
3149}
3150
3151/**
3152 *      t3_stop_sge_timers - stop SGE timer call backs
3153 *      @adap: the adapter
3154 *
3155 *      Stops each SGE queue set's timer call back
3156 */
3157void t3_stop_sge_timers(struct adapter *adap)
3158{
3159        int i;
3160
3161        for (i = 0; i < SGE_QSETS; ++i) {
3162                struct sge_qset *q = &adap->sge.qs[i];
3163
3164                if (q->tx_reclaim_timer.function)
3165                        del_timer_sync(&q->tx_reclaim_timer);
3166                if (q->rx_reclaim_timer.function)
3167                        del_timer_sync(&q->rx_reclaim_timer);
3168        }
3169}
3170
3171/**
3172 *      t3_free_sge_resources - free SGE resources
3173 *      @adap: the adapter
3174 *
3175 *      Frees resources used by the SGE queue sets.
3176 */
3177void t3_free_sge_resources(struct adapter *adap)
3178{
3179        int i;
3180
3181        for (i = 0; i < SGE_QSETS; ++i)
3182                t3_free_qset(adap, &adap->sge.qs[i]);
3183}
3184
3185/**
3186 *      t3_sge_start - enable SGE
3187 *      @adap: the adapter
3188 *
3189 *      Enables the SGE for DMAs.  This is the last step in starting packet
3190 *      transfers.
3191 */
3192void t3_sge_start(struct adapter *adap)
3193{
3194        t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
3195}
3196
3197/**
3198 *      t3_sge_stop - disable SGE operation
3199 *      @adap: the adapter
3200 *
3201 *      Disables the DMA engine.  This can be called in emeregencies (e.g.,
3202 *      from error interrupts) or from normal process context.  In the latter
3203 *      case it also disables any pending queue restart tasklets.  Note that
3204 *      if it is called in interrupt context it cannot disable the restart
3205 *      tasklets as it cannot wait, however the tasklets will have no effect
3206 *      since the doorbells are disabled and the driver will call this again
3207 *      later from process context, at which time the tasklets will be stopped
3208 *      if they are still running.
3209 */
3210void t3_sge_stop(struct adapter *adap)
3211{
3212        t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
3213        if (!in_interrupt()) {
3214                int i;
3215
3216                for (i = 0; i < SGE_QSETS; ++i) {
3217                        struct sge_qset *qs = &adap->sge.qs[i];
3218
3219                        tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
3220                        tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
3221                }
3222        }
3223}
3224
3225/**
3226 *      t3_sge_init - initialize SGE
3227 *      @adap: the adapter
3228 *      @p: the SGE parameters
3229 *
3230 *      Performs SGE initialization needed every time after a chip reset.
3231 *      We do not initialize any of the queue sets here, instead the driver
3232 *      top-level must request those individually.  We also do not enable DMA
3233 *      here, that should be done after the queues have been set up.
3234 */
3235void t3_sge_init(struct adapter *adap, struct sge_params *p)
3236{
3237        unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
3238
3239        ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
3240            F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
3241            V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
3242            V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
3243#if SGE_NUM_GENBITS == 1
3244        ctrl |= F_EGRGENCTRL;
3245#endif
3246        if (adap->params.rev > 0) {
3247                if (!(adap->flags & (USING_MSIX | USING_MSI)))
3248                        ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
3249        }
3250        t3_write_reg(adap, A_SG_CONTROL, ctrl);
3251        t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
3252                     V_LORCQDRBTHRSH(512));
3253        t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
3254        t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
3255                     V_TIMEOUT(200 * core_ticks_per_usec(adap)));
3256        t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
3257                     adap->params.rev < T3_REV_C ? 1000 : 500);
3258        t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
3259        t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
3260        t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
3261        t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
3262        t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
3263}
3264
3265/**
3266 *      t3_sge_prep - one-time SGE initialization
3267 *      @adap: the associated adapter
3268 *      @p: SGE parameters
3269 *
3270 *      Performs one-time initialization of SGE SW state.  Includes determining
3271 *      defaults for the assorted SGE parameters, which admins can change until
3272 *      they are used to initialize the SGE.
3273 */
3274void t3_sge_prep(struct adapter *adap, struct sge_params *p)
3275{
3276        int i;
3277
3278        p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
3279            SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
3280
3281        for (i = 0; i < SGE_QSETS; ++i) {
3282                struct qset_params *q = p->qset + i;
3283
3284                q->polling = adap->params.rev > 0;
3285                q->coalesce_usecs = 5;
3286                q->rspq_size = 1024;
3287                q->fl_size = 1024;
3288                q->jumbo_size = 512;
3289                q->txq_size[TXQ_ETH] = 1024;
3290                q->txq_size[TXQ_OFLD] = 1024;
3291                q->txq_size[TXQ_CTRL] = 256;
3292                q->cong_thres = 0;
3293        }
3294
3295        spin_lock_init(&adap->sge.reg_lock);
3296}
3297
3298/**
3299 *      t3_get_desc - dump an SGE descriptor for debugging purposes
3300 *      @qs: the queue set
3301 *      @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3302 *      @idx: the descriptor index in the queue
3303 *      @data: where to dump the descriptor contents
3304 *
3305 *      Dumps the contents of a HW descriptor of an SGE queue.  Returns the
3306 *      size of the descriptor.
3307 */
3308int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3309                unsigned char *data)
3310{
3311        if (qnum >= 6)
3312                return -EINVAL;
3313
3314        if (qnum < 3) {
3315                if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3316                        return -EINVAL;
3317                memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3318                return sizeof(struct tx_desc);
3319        }
3320
3321        if (qnum == 3) {
3322                if (!qs->rspq.desc || idx >= qs->rspq.size)
3323                        return -EINVAL;
3324                memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3325                return sizeof(struct rsp_desc);
3326        }
3327
3328        qnum -= 4;
3329        if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3330                return -EINVAL;
3331        memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3332        return sizeof(struct rx_desc);
3333}
3334