linux/drivers/net/ethernet/chelsio/cxgb4/sge.c
<<
>>
Prefs
   1/*
   2 * This file is part of the Chelsio T4 Ethernet driver for Linux.
   3 *
   4 * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
   5 *
   6 * This software is available to you under a choice of one of two
   7 * licenses.  You may choose to be licensed under the terms of the GNU
   8 * General Public License (GPL) Version 2, available from the file
   9 * COPYING in the main directory of this source tree, or the
  10 * OpenIB.org BSD license below:
  11 *
  12 *     Redistribution and use in source and binary forms, with or
  13 *     without modification, are permitted provided that the following
  14 *     conditions are met:
  15 *
  16 *      - Redistributions of source code must retain the above
  17 *        copyright notice, this list of conditions and the following
  18 *        disclaimer.
  19 *
  20 *      - Redistributions in binary form must reproduce the above
  21 *        copyright notice, this list of conditions and the following
  22 *        disclaimer in the documentation and/or other materials
  23 *        provided with the distribution.
  24 *
  25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  32 * SOFTWARE.
  33 */
  34
  35#include <linux/skbuff.h>
  36#include <linux/netdevice.h>
  37#include <linux/etherdevice.h>
  38#include <linux/if_vlan.h>
  39#include <linux/ip.h>
  40#include <linux/dma-mapping.h>
  41#include <linux/jiffies.h>
  42#include <linux/prefetch.h>
  43#include <linux/export.h>
  44#include <net/xfrm.h>
  45#include <net/ipv6.h>
  46#include <net/tcp.h>
  47#include <net/busy_poll.h>
  48#ifdef CONFIG_CHELSIO_T4_FCOE
  49#include <scsi/fc/fc_fcoe.h>
  50#endif /* CONFIG_CHELSIO_T4_FCOE */
  51#include "cxgb4.h"
  52#include "t4_regs.h"
  53#include "t4_values.h"
  54#include "t4_msg.h"
  55#include "t4fw_api.h"
  56#include "cxgb4_ptp.h"
  57#include "cxgb4_uld.h"
  58
  59/*
  60 * Rx buffer size.  We use largish buffers if possible but settle for single
  61 * pages under memory shortage.
  62 */
  63#if PAGE_SHIFT >= 16
  64# define FL_PG_ORDER 0
  65#else
  66# define FL_PG_ORDER (16 - PAGE_SHIFT)
  67#endif
  68
  69/* RX_PULL_LEN should be <= RX_COPY_THRES */
  70#define RX_COPY_THRES    256
  71#define RX_PULL_LEN      128
  72
  73/*
  74 * Main body length for sk_buffs used for Rx Ethernet packets with fragments.
  75 * Should be >= RX_PULL_LEN but possibly bigger to give pskb_may_pull some room.
  76 */
  77#define RX_PKT_SKB_LEN   512
  78
  79/*
  80 * Max number of Tx descriptors we clean up at a time.  Should be modest as
  81 * freeing skbs isn't cheap and it happens while holding locks.  We just need
  82 * to free packets faster than they arrive, we eventually catch up and keep
  83 * the amortized cost reasonable.  Must be >= 2 * TXQ_STOP_THRES.  It should
  84 * also match the CIDX Flush Threshold.
  85 */
  86#define MAX_TX_RECLAIM 32
  87
  88/*
  89 * Max number of Rx buffers we replenish at a time.  Again keep this modest,
  90 * allocating buffers isn't cheap either.
  91 */
  92#define MAX_RX_REFILL 16U
  93
  94/*
  95 * Period of the Rx queue check timer.  This timer is infrequent as it has
  96 * something to do only when the system experiences severe memory shortage.
  97 */
  98#define RX_QCHECK_PERIOD (HZ / 2)
  99
 100/*
 101 * Period of the Tx queue check timer.
 102 */
 103#define TX_QCHECK_PERIOD (HZ / 2)
 104
 105/*
 106 * Max number of Tx descriptors to be reclaimed by the Tx timer.
 107 */
 108#define MAX_TIMER_TX_RECLAIM 100
 109
 110/*
 111 * Timer index used when backing off due to memory shortage.
 112 */
 113#define NOMEM_TMR_IDX (SGE_NTIMERS - 1)
 114
 115/*
 116 * Suspension threshold for non-Ethernet Tx queues.  We require enough room
 117 * for a full sized WR.
 118 */
 119#define TXQ_STOP_THRES (SGE_MAX_WR_LEN / sizeof(struct tx_desc))
 120
 121/*
 122 * Max Tx descriptor space we allow for an Ethernet packet to be inlined
 123 * into a WR.
 124 */
 125#define MAX_IMM_TX_PKT_LEN 256
 126
 127/*
 128 * Max size of a WR sent through a control Tx queue.
 129 */
 130#define MAX_CTRL_WR_LEN SGE_MAX_WR_LEN
 131
 132struct rx_sw_desc {                /* SW state per Rx descriptor */
 133        struct page *page;
 134        dma_addr_t dma_addr;
 135};
 136
 137/*
 138 * Rx buffer sizes for "useskbs" Free List buffers (one ingress packet pe skb
 139 * buffer).  We currently only support two sizes for 1500- and 9000-byte MTUs.
 140 * We could easily support more but there doesn't seem to be much need for
 141 * that ...
 142 */
 143#define FL_MTU_SMALL 1500
 144#define FL_MTU_LARGE 9000
 145
 146static inline unsigned int fl_mtu_bufsize(struct adapter *adapter,
 147                                          unsigned int mtu)
 148{
 149        struct sge *s = &adapter->sge;
 150
 151        return ALIGN(s->pktshift + ETH_HLEN + VLAN_HLEN + mtu, s->fl_align);
 152}
 153
 154#define FL_MTU_SMALL_BUFSIZE(adapter) fl_mtu_bufsize(adapter, FL_MTU_SMALL)
 155#define FL_MTU_LARGE_BUFSIZE(adapter) fl_mtu_bufsize(adapter, FL_MTU_LARGE)
 156
 157/*
 158 * Bits 0..3 of rx_sw_desc.dma_addr have special meaning.  The hardware uses
 159 * these to specify the buffer size as an index into the SGE Free List Buffer
 160 * Size register array.  We also use bit 4, when the buffer has been unmapped
 161 * for DMA, but this is of course never sent to the hardware and is only used
 162 * to prevent double unmappings.  All of the above requires that the Free List
 163 * Buffers which we allocate have the bottom 5 bits free (0) -- i.e. are
 164 * 32-byte or or a power of 2 greater in alignment.  Since the SGE's minimal
 165 * Free List Buffer alignment is 32 bytes, this works out for us ...
 166 */
 167enum {
 168        RX_BUF_FLAGS     = 0x1f,   /* bottom five bits are special */
 169        RX_BUF_SIZE      = 0x0f,   /* bottom three bits are for buf sizes */
 170        RX_UNMAPPED_BUF  = 0x10,   /* buffer is not mapped */
 171
 172        /*
 173         * XXX We shouldn't depend on being able to use these indices.
 174         * XXX Especially when some other Master PF has initialized the
 175         * XXX adapter or we use the Firmware Configuration File.  We
 176         * XXX should really search through the Host Buffer Size register
 177         * XXX array for the appropriately sized buffer indices.
 178         */
 179        RX_SMALL_PG_BUF  = 0x0,   /* small (PAGE_SIZE) page buffer */
 180        RX_LARGE_PG_BUF  = 0x1,   /* buffer large (FL_PG_ORDER) page buffer */
 181
 182        RX_SMALL_MTU_BUF = 0x2,   /* small MTU buffer */
 183        RX_LARGE_MTU_BUF = 0x3,   /* large MTU buffer */
 184};
 185
 186static int timer_pkt_quota[] = {1, 1, 2, 3, 4, 5};
 187#define MIN_NAPI_WORK  1
 188
 189static inline dma_addr_t get_buf_addr(const struct rx_sw_desc *d)
 190{
 191        return d->dma_addr & ~(dma_addr_t)RX_BUF_FLAGS;
 192}
 193
 194static inline bool is_buf_mapped(const struct rx_sw_desc *d)
 195{
 196        return !(d->dma_addr & RX_UNMAPPED_BUF);
 197}
 198
 199/**
 200 *      txq_avail - return the number of available slots in a Tx queue
 201 *      @q: the Tx queue
 202 *
 203 *      Returns the number of descriptors in a Tx queue available to write new
 204 *      packets.
 205 */
 206static inline unsigned int txq_avail(const struct sge_txq *q)
 207{
 208        return q->size - 1 - q->in_use;
 209}
 210
 211/**
 212 *      fl_cap - return the capacity of a free-buffer list
 213 *      @fl: the FL
 214 *
 215 *      Returns the capacity of a free-buffer list.  The capacity is less than
 216 *      the size because one descriptor needs to be left unpopulated, otherwise
 217 *      HW will think the FL is empty.
 218 */
 219static inline unsigned int fl_cap(const struct sge_fl *fl)
 220{
 221        return fl->size - 8;   /* 1 descriptor = 8 buffers */
 222}
 223
 224/**
 225 *      fl_starving - return whether a Free List is starving.
 226 *      @adapter: pointer to the adapter
 227 *      @fl: the Free List
 228 *
 229 *      Tests specified Free List to see whether the number of buffers
 230 *      available to the hardware has falled below our "starvation"
 231 *      threshold.
 232 */
 233static inline bool fl_starving(const struct adapter *adapter,
 234                               const struct sge_fl *fl)
 235{
 236        const struct sge *s = &adapter->sge;
 237
 238        return fl->avail - fl->pend_cred <= s->fl_starve_thres;
 239}
 240
 241int cxgb4_map_skb(struct device *dev, const struct sk_buff *skb,
 242                  dma_addr_t *addr)
 243{
 244        const skb_frag_t *fp, *end;
 245        const struct skb_shared_info *si;
 246
 247        *addr = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
 248        if (dma_mapping_error(dev, *addr))
 249                goto out_err;
 250
 251        si = skb_shinfo(skb);
 252        end = &si->frags[si->nr_frags];
 253
 254        for (fp = si->frags; fp < end; fp++) {
 255                *++addr = skb_frag_dma_map(dev, fp, 0, skb_frag_size(fp),
 256                                           DMA_TO_DEVICE);
 257                if (dma_mapping_error(dev, *addr))
 258                        goto unwind;
 259        }
 260        return 0;
 261
 262unwind:
 263        while (fp-- > si->frags)
 264                dma_unmap_page(dev, *--addr, skb_frag_size(fp), DMA_TO_DEVICE);
 265
 266        dma_unmap_single(dev, addr[-1], skb_headlen(skb), DMA_TO_DEVICE);
 267out_err:
 268        return -ENOMEM;
 269}
 270EXPORT_SYMBOL(cxgb4_map_skb);
 271
 272#ifdef CONFIG_NEED_DMA_MAP_STATE
 273static void unmap_skb(struct device *dev, const struct sk_buff *skb,
 274                      const dma_addr_t *addr)
 275{
 276        const skb_frag_t *fp, *end;
 277        const struct skb_shared_info *si;
 278
 279        dma_unmap_single(dev, *addr++, skb_headlen(skb), DMA_TO_DEVICE);
 280
 281        si = skb_shinfo(skb);
 282        end = &si->frags[si->nr_frags];
 283        for (fp = si->frags; fp < end; fp++)
 284                dma_unmap_page(dev, *addr++, skb_frag_size(fp), DMA_TO_DEVICE);
 285}
 286
 287/**
 288 *      deferred_unmap_destructor - unmap a packet when it is freed
 289 *      @skb: the packet
 290 *
 291 *      This is the packet destructor used for Tx packets that need to remain
 292 *      mapped until they are freed rather than until their Tx descriptors are
 293 *      freed.
 294 */
 295static void deferred_unmap_destructor(struct sk_buff *skb)
 296{
 297        unmap_skb(skb->dev->dev.parent, skb, (dma_addr_t *)skb->head);
 298}
 299#endif
 300
 301static void unmap_sgl(struct device *dev, const struct sk_buff *skb,
 302                      const struct ulptx_sgl *sgl, const struct sge_txq *q)
 303{
 304        const struct ulptx_sge_pair *p;
 305        unsigned int nfrags = skb_shinfo(skb)->nr_frags;
 306
 307        if (likely(skb_headlen(skb)))
 308                dma_unmap_single(dev, be64_to_cpu(sgl->addr0), ntohl(sgl->len0),
 309                                 DMA_TO_DEVICE);
 310        else {
 311                dma_unmap_page(dev, be64_to_cpu(sgl->addr0), ntohl(sgl->len0),
 312                               DMA_TO_DEVICE);
 313                nfrags--;
 314        }
 315
 316        /*
 317         * the complexity below is because of the possibility of a wrap-around
 318         * in the middle of an SGL
 319         */
 320        for (p = sgl->sge; nfrags >= 2; nfrags -= 2) {
 321                if (likely((u8 *)(p + 1) <= (u8 *)q->stat)) {
 322unmap:                  dma_unmap_page(dev, be64_to_cpu(p->addr[0]),
 323                                       ntohl(p->len[0]), DMA_TO_DEVICE);
 324                        dma_unmap_page(dev, be64_to_cpu(p->addr[1]),
 325                                       ntohl(p->len[1]), DMA_TO_DEVICE);
 326                        p++;
 327                } else if ((u8 *)p == (u8 *)q->stat) {
 328                        p = (const struct ulptx_sge_pair *)q->desc;
 329                        goto unmap;
 330                } else if ((u8 *)p + 8 == (u8 *)q->stat) {
 331                        const __be64 *addr = (const __be64 *)q->desc;
 332
 333                        dma_unmap_page(dev, be64_to_cpu(addr[0]),
 334                                       ntohl(p->len[0]), DMA_TO_DEVICE);
 335                        dma_unmap_page(dev, be64_to_cpu(addr[1]),
 336                                       ntohl(p->len[1]), DMA_TO_DEVICE);
 337                        p = (const struct ulptx_sge_pair *)&addr[2];
 338                } else {
 339                        const __be64 *addr = (const __be64 *)q->desc;
 340
 341                        dma_unmap_page(dev, be64_to_cpu(p->addr[0]),
 342                                       ntohl(p->len[0]), DMA_TO_DEVICE);
 343                        dma_unmap_page(dev, be64_to_cpu(addr[0]),
 344                                       ntohl(p->len[1]), DMA_TO_DEVICE);
 345                        p = (const struct ulptx_sge_pair *)&addr[1];
 346                }
 347        }
 348        if (nfrags) {
 349                __be64 addr;
 350
 351                if ((u8 *)p == (u8 *)q->stat)
 352                        p = (const struct ulptx_sge_pair *)q->desc;
 353                addr = (u8 *)p + 16 <= (u8 *)q->stat ? p->addr[0] :
 354                                                       *(const __be64 *)q->desc;
 355                dma_unmap_page(dev, be64_to_cpu(addr), ntohl(p->len[0]),
 356                               DMA_TO_DEVICE);
 357        }
 358}
 359
 360/**
 361 *      free_tx_desc - reclaims Tx descriptors and their buffers
 362 *      @adapter: the adapter
 363 *      @q: the Tx queue to reclaim descriptors from
 364 *      @n: the number of descriptors to reclaim
 365 *      @unmap: whether the buffers should be unmapped for DMA
 366 *
 367 *      Reclaims Tx descriptors from an SGE Tx queue and frees the associated
 368 *      Tx buffers.  Called with the Tx queue lock held.
 369 */
 370void free_tx_desc(struct adapter *adap, struct sge_txq *q,
 371                  unsigned int n, bool unmap)
 372{
 373        struct tx_sw_desc *d;
 374        unsigned int cidx = q->cidx;
 375        struct device *dev = adap->pdev_dev;
 376
 377        d = &q->sdesc[cidx];
 378        while (n--) {
 379                if (d->skb) {                       /* an SGL is present */
 380                        if (unmap)
 381                                unmap_sgl(dev, d->skb, d->sgl, q);
 382                        dev_consume_skb_any(d->skb);
 383                        d->skb = NULL;
 384                }
 385                ++d;
 386                if (++cidx == q->size) {
 387                        cidx = 0;
 388                        d = q->sdesc;
 389                }
 390        }
 391        q->cidx = cidx;
 392}
 393
 394/*
 395 * Return the number of reclaimable descriptors in a Tx queue.
 396 */
 397static inline int reclaimable(const struct sge_txq *q)
 398{
 399        int hw_cidx = ntohs(READ_ONCE(q->stat->cidx));
 400        hw_cidx -= q->cidx;
 401        return hw_cidx < 0 ? hw_cidx + q->size : hw_cidx;
 402}
 403
 404/**
 405 *      reclaim_completed_tx - reclaims completed TX Descriptors
 406 *      @adap: the adapter
 407 *      @q: the Tx queue to reclaim completed descriptors from
 408 *      @maxreclaim: the maximum number of TX Descriptors to reclaim or -1
 409 *      @unmap: whether the buffers should be unmapped for DMA
 410 *
 411 *      Reclaims Tx Descriptors that the SGE has indicated it has processed,
 412 *      and frees the associated buffers if possible.  If @max == -1, then
 413 *      we'll use a defaiult maximum.  Called with the TX Queue locked.
 414 */
 415static inline int reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
 416                                       int maxreclaim, bool unmap)
 417{
 418        int reclaim = reclaimable(q);
 419
 420        if (reclaim) {
 421                /*
 422                 * Limit the amount of clean up work we do at a time to keep
 423                 * the Tx lock hold time O(1).
 424                 */
 425                if (maxreclaim < 0)
 426                        maxreclaim = MAX_TX_RECLAIM;
 427                if (reclaim > maxreclaim)
 428                        reclaim = maxreclaim;
 429
 430                free_tx_desc(adap, q, reclaim, unmap);
 431                q->in_use -= reclaim;
 432        }
 433
 434        return reclaim;
 435}
 436
 437/**
 438 *      cxgb4_reclaim_completed_tx - reclaims completed Tx descriptors
 439 *      @adap: the adapter
 440 *      @q: the Tx queue to reclaim completed descriptors from
 441 *      @unmap: whether the buffers should be unmapped for DMA
 442 *
 443 *      Reclaims Tx descriptors that the SGE has indicated it has processed,
 444 *      and frees the associated buffers if possible.  Called with the Tx
 445 *      queue locked.
 446 */
 447void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
 448                                bool unmap)
 449{
 450        (void)reclaim_completed_tx(adap, q, -1, unmap);
 451}
 452EXPORT_SYMBOL(cxgb4_reclaim_completed_tx);
 453
 454static inline int get_buf_size(struct adapter *adapter,
 455                               const struct rx_sw_desc *d)
 456{
 457        struct sge *s = &adapter->sge;
 458        unsigned int rx_buf_size_idx = d->dma_addr & RX_BUF_SIZE;
 459        int buf_size;
 460
 461        switch (rx_buf_size_idx) {
 462        case RX_SMALL_PG_BUF:
 463                buf_size = PAGE_SIZE;
 464                break;
 465
 466        case RX_LARGE_PG_BUF:
 467                buf_size = PAGE_SIZE << s->fl_pg_order;
 468                break;
 469
 470        case RX_SMALL_MTU_BUF:
 471                buf_size = FL_MTU_SMALL_BUFSIZE(adapter);
 472                break;
 473
 474        case RX_LARGE_MTU_BUF:
 475                buf_size = FL_MTU_LARGE_BUFSIZE(adapter);
 476                break;
 477
 478        default:
 479                BUG();
 480        }
 481
 482        return buf_size;
 483}
 484
 485/**
 486 *      free_rx_bufs - free the Rx buffers on an SGE free list
 487 *      @adap: the adapter
 488 *      @q: the SGE free list to free buffers from
 489 *      @n: how many buffers to free
 490 *
 491 *      Release the next @n buffers on an SGE free-buffer Rx queue.   The
 492 *      buffers must be made inaccessible to HW before calling this function.
 493 */
 494static void free_rx_bufs(struct adapter *adap, struct sge_fl *q, int n)
 495{
 496        while (n--) {
 497                struct rx_sw_desc *d = &q->sdesc[q->cidx];
 498
 499                if (is_buf_mapped(d))
 500                        dma_unmap_page(adap->pdev_dev, get_buf_addr(d),
 501                                       get_buf_size(adap, d),
 502                                       PCI_DMA_FROMDEVICE);
 503                put_page(d->page);
 504                d->page = NULL;
 505                if (++q->cidx == q->size)
 506                        q->cidx = 0;
 507                q->avail--;
 508        }
 509}
 510
 511/**
 512 *      unmap_rx_buf - unmap the current Rx buffer on an SGE free list
 513 *      @adap: the adapter
 514 *      @q: the SGE free list
 515 *
 516 *      Unmap the current buffer on an SGE free-buffer Rx queue.   The
 517 *      buffer must be made inaccessible to HW before calling this function.
 518 *
 519 *      This is similar to @free_rx_bufs above but does not free the buffer.
 520 *      Do note that the FL still loses any further access to the buffer.
 521 */
 522static void unmap_rx_buf(struct adapter *adap, struct sge_fl *q)
 523{
 524        struct rx_sw_desc *d = &q->sdesc[q->cidx];
 525
 526        if (is_buf_mapped(d))
 527                dma_unmap_page(adap->pdev_dev, get_buf_addr(d),
 528                               get_buf_size(adap, d), PCI_DMA_FROMDEVICE);
 529        d->page = NULL;
 530        if (++q->cidx == q->size)
 531                q->cidx = 0;
 532        q->avail--;
 533}
 534
 535static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
 536{
 537        if (q->pend_cred >= 8) {
 538                u32 val = adap->params.arch.sge_fl_db;
 539
 540                if (is_t4(adap->params.chip))
 541                        val |= PIDX_V(q->pend_cred / 8);
 542                else
 543                        val |= PIDX_T5_V(q->pend_cred / 8);
 544
 545                /* Make sure all memory writes to the Free List queue are
 546                 * committed before we tell the hardware about them.
 547                 */
 548                wmb();
 549
 550                /* If we don't have access to the new User Doorbell (T5+), use
 551                 * the old doorbell mechanism; otherwise use the new BAR2
 552                 * mechanism.
 553                 */
 554                if (unlikely(q->bar2_addr == NULL)) {
 555                        t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
 556                                     val | QID_V(q->cntxt_id));
 557                } else {
 558                        writel(val | QID_V(q->bar2_qid),
 559                               q->bar2_addr + SGE_UDB_KDOORBELL);
 560
 561                        /* This Write memory Barrier will force the write to
 562                         * the User Doorbell area to be flushed.
 563                         */
 564                        wmb();
 565                }
 566                q->pend_cred &= 7;
 567        }
 568}
 569
 570static inline void set_rx_sw_desc(struct rx_sw_desc *sd, struct page *pg,
 571                                  dma_addr_t mapping)
 572{
 573        sd->page = pg;
 574        sd->dma_addr = mapping;      /* includes size low bits */
 575}
 576
 577/**
 578 *      refill_fl - refill an SGE Rx buffer ring
 579 *      @adap: the adapter
 580 *      @q: the ring to refill
 581 *      @n: the number of new buffers to allocate
 582 *      @gfp: the gfp flags for the allocations
 583 *
 584 *      (Re)populate an SGE free-buffer queue with up to @n new packet buffers,
 585 *      allocated with the supplied gfp flags.  The caller must assure that
 586 *      @n does not exceed the queue's capacity.  If afterwards the queue is
 587 *      found critically low mark it as starving in the bitmap of starving FLs.
 588 *
 589 *      Returns the number of buffers allocated.
 590 */
 591static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
 592                              gfp_t gfp)
 593{
 594        struct sge *s = &adap->sge;
 595        struct page *pg;
 596        dma_addr_t mapping;
 597        unsigned int cred = q->avail;
 598        __be64 *d = &q->desc[q->pidx];
 599        struct rx_sw_desc *sd = &q->sdesc[q->pidx];
 600        int node;
 601
 602#ifdef CONFIG_DEBUG_FS
 603        if (test_bit(q->cntxt_id - adap->sge.egr_start, adap->sge.blocked_fl))
 604                goto out;
 605#endif
 606
 607        gfp |= __GFP_NOWARN;
 608        node = dev_to_node(adap->pdev_dev);
 609
 610        if (s->fl_pg_order == 0)
 611                goto alloc_small_pages;
 612
 613        /*
 614         * Prefer large buffers
 615         */
 616        while (n) {
 617                pg = alloc_pages_node(node, gfp | __GFP_COMP, s->fl_pg_order);
 618                if (unlikely(!pg)) {
 619                        q->large_alloc_failed++;
 620                        break;       /* fall back to single pages */
 621                }
 622
 623                mapping = dma_map_page(adap->pdev_dev, pg, 0,
 624                                       PAGE_SIZE << s->fl_pg_order,
 625                                       PCI_DMA_FROMDEVICE);
 626                if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
 627                        __free_pages(pg, s->fl_pg_order);
 628                        q->mapping_err++;
 629                        goto out;   /* do not try small pages for this error */
 630                }
 631                mapping |= RX_LARGE_PG_BUF;
 632                *d++ = cpu_to_be64(mapping);
 633
 634                set_rx_sw_desc(sd, pg, mapping);
 635                sd++;
 636
 637                q->avail++;
 638                if (++q->pidx == q->size) {
 639                        q->pidx = 0;
 640                        sd = q->sdesc;
 641                        d = q->desc;
 642                }
 643                n--;
 644        }
 645
 646alloc_small_pages:
 647        while (n--) {
 648                pg = alloc_pages_node(node, gfp, 0);
 649                if (unlikely(!pg)) {
 650                        q->alloc_failed++;
 651                        break;
 652                }
 653
 654                mapping = dma_map_page(adap->pdev_dev, pg, 0, PAGE_SIZE,
 655                                       PCI_DMA_FROMDEVICE);
 656                if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
 657                        put_page(pg);
 658                        q->mapping_err++;
 659                        goto out;
 660                }
 661                *d++ = cpu_to_be64(mapping);
 662
 663                set_rx_sw_desc(sd, pg, mapping);
 664                sd++;
 665
 666                q->avail++;
 667                if (++q->pidx == q->size) {
 668                        q->pidx = 0;
 669                        sd = q->sdesc;
 670                        d = q->desc;
 671                }
 672        }
 673
 674out:    cred = q->avail - cred;
 675        q->pend_cred += cred;
 676        ring_fl_db(adap, q);
 677
 678        if (unlikely(fl_starving(adap, q))) {
 679                smp_wmb();
 680                q->low++;
 681                set_bit(q->cntxt_id - adap->sge.egr_start,
 682                        adap->sge.starving_fl);
 683        }
 684
 685        return cred;
 686}
 687
 688static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
 689{
 690        refill_fl(adap, fl, min(MAX_RX_REFILL, fl_cap(fl) - fl->avail),
 691                  GFP_ATOMIC);
 692}
 693
 694/**
 695 *      alloc_ring - allocate resources for an SGE descriptor ring
 696 *      @dev: the PCI device's core device
 697 *      @nelem: the number of descriptors
 698 *      @elem_size: the size of each descriptor
 699 *      @sw_size: the size of the SW state associated with each ring element
 700 *      @phys: the physical address of the allocated ring
 701 *      @metadata: address of the array holding the SW state for the ring
 702 *      @stat_size: extra space in HW ring for status information
 703 *      @node: preferred node for memory allocations
 704 *
 705 *      Allocates resources for an SGE descriptor ring, such as Tx queues,
 706 *      free buffer lists, or response queues.  Each SGE ring requires
 707 *      space for its HW descriptors plus, optionally, space for the SW state
 708 *      associated with each HW entry (the metadata).  The function returns
 709 *      three values: the virtual address for the HW ring (the return value
 710 *      of the function), the bus address of the HW ring, and the address
 711 *      of the SW ring.
 712 */
 713static void *alloc_ring(struct device *dev, size_t nelem, size_t elem_size,
 714                        size_t sw_size, dma_addr_t *phys, void *metadata,
 715                        size_t stat_size, int node)
 716{
 717        size_t len = nelem * elem_size + stat_size;
 718        void *s = NULL;
 719        void *p = dma_alloc_coherent(dev, len, phys, GFP_KERNEL);
 720
 721        if (!p)
 722                return NULL;
 723        if (sw_size) {
 724                s = kcalloc_node(sw_size, nelem, GFP_KERNEL, node);
 725
 726                if (!s) {
 727                        dma_free_coherent(dev, len, p, *phys);
 728                        return NULL;
 729                }
 730        }
 731        if (metadata)
 732                *(void **)metadata = s;
 733        return p;
 734}
 735
 736/**
 737 *      sgl_len - calculates the size of an SGL of the given capacity
 738 *      @n: the number of SGL entries
 739 *
 740 *      Calculates the number of flits needed for a scatter/gather list that
 741 *      can hold the given number of entries.
 742 */
 743static inline unsigned int sgl_len(unsigned int n)
 744{
 745        /* A Direct Scatter Gather List uses 32-bit lengths and 64-bit PCI DMA
 746         * addresses.  The DSGL Work Request starts off with a 32-bit DSGL
 747         * ULPTX header, then Length0, then Address0, then, for 1 <= i <= N,
 748         * repeated sequences of { Length[i], Length[i+1], Address[i],
 749         * Address[i+1] } (this ensures that all addresses are on 64-bit
 750         * boundaries).  If N is even, then Length[N+1] should be set to 0 and
 751         * Address[N+1] is omitted.
 752         *
 753         * The following calculation incorporates all of the above.  It's
 754         * somewhat hard to follow but, briefly: the "+2" accounts for the
 755         * first two flits which include the DSGL header, Length0 and
 756         * Address0; the "(3*(n-1))/2" covers the main body of list entries (3
 757         * flits for every pair of the remaining N) +1 if (n-1) is odd; and
 758         * finally the "+((n-1)&1)" adds the one remaining flit needed if
 759         * (n-1) is odd ...
 760         */
 761        n--;
 762        return (3 * n) / 2 + (n & 1) + 2;
 763}
 764
 765/**
 766 *      flits_to_desc - returns the num of Tx descriptors for the given flits
 767 *      @n: the number of flits
 768 *
 769 *      Returns the number of Tx descriptors needed for the supplied number
 770 *      of flits.
 771 */
 772static inline unsigned int flits_to_desc(unsigned int n)
 773{
 774        BUG_ON(n > SGE_MAX_WR_LEN / 8);
 775        return DIV_ROUND_UP(n, 8);
 776}
 777
 778/**
 779 *      is_eth_imm - can an Ethernet packet be sent as immediate data?
 780 *      @skb: the packet
 781 *
 782 *      Returns whether an Ethernet packet is small enough to fit as
 783 *      immediate data. Return value corresponds to headroom required.
 784 */
 785static inline int is_eth_imm(const struct sk_buff *skb, unsigned int chip_ver)
 786{
 787        int hdrlen = 0;
 788
 789        if (skb->encapsulation && skb_shinfo(skb)->gso_size &&
 790            chip_ver > CHELSIO_T5) {
 791                hdrlen = sizeof(struct cpl_tx_tnl_lso);
 792                hdrlen += sizeof(struct cpl_tx_pkt_core);
 793        } else {
 794                hdrlen = skb_shinfo(skb)->gso_size ?
 795                         sizeof(struct cpl_tx_pkt_lso_core) : 0;
 796                hdrlen += sizeof(struct cpl_tx_pkt);
 797        }
 798        if (skb->len <= MAX_IMM_TX_PKT_LEN - hdrlen)
 799                return hdrlen;
 800        return 0;
 801}
 802
 803/**
 804 *      calc_tx_flits - calculate the number of flits for a packet Tx WR
 805 *      @skb: the packet
 806 *
 807 *      Returns the number of flits needed for a Tx WR for the given Ethernet
 808 *      packet, including the needed WR and CPL headers.
 809 */
 810static inline unsigned int calc_tx_flits(const struct sk_buff *skb,
 811                                         unsigned int chip_ver)
 812{
 813        unsigned int flits;
 814        int hdrlen = is_eth_imm(skb, chip_ver);
 815
 816        /* If the skb is small enough, we can pump it out as a work request
 817         * with only immediate data.  In that case we just have to have the
 818         * TX Packet header plus the skb data in the Work Request.
 819         */
 820
 821        if (hdrlen)
 822                return DIV_ROUND_UP(skb->len + hdrlen, sizeof(__be64));
 823
 824        /* Otherwise, we're going to have to construct a Scatter gather list
 825         * of the skb body and fragments.  We also include the flits necessary
 826         * for the TX Packet Work Request and CPL.  We always have a firmware
 827         * Write Header (incorporated as part of the cpl_tx_pkt_lso and
 828         * cpl_tx_pkt structures), followed by either a TX Packet Write CPL
 829         * message or, if we're doing a Large Send Offload, an LSO CPL message
 830         * with an embedded TX Packet Write CPL message.
 831         */
 832        flits = sgl_len(skb_shinfo(skb)->nr_frags + 1);
 833        if (skb_shinfo(skb)->gso_size) {
 834                if (skb->encapsulation && chip_ver > CHELSIO_T5)
 835                        hdrlen = sizeof(struct fw_eth_tx_pkt_wr) +
 836                                 sizeof(struct cpl_tx_tnl_lso);
 837                else
 838                        hdrlen = sizeof(struct fw_eth_tx_pkt_wr) +
 839                                 sizeof(struct cpl_tx_pkt_lso_core);
 840
 841                hdrlen += sizeof(struct cpl_tx_pkt_core);
 842                flits += (hdrlen / sizeof(__be64));
 843        } else {
 844                flits += (sizeof(struct fw_eth_tx_pkt_wr) +
 845                          sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
 846        }
 847        return flits;
 848}
 849
 850/**
 851 *      calc_tx_descs - calculate the number of Tx descriptors for a packet
 852 *      @skb: the packet
 853 *
 854 *      Returns the number of Tx descriptors needed for the given Ethernet
 855 *      packet, including the needed WR and CPL headers.
 856 */
 857static inline unsigned int calc_tx_descs(const struct sk_buff *skb,
 858                                         unsigned int chip_ver)
 859{
 860        return flits_to_desc(calc_tx_flits(skb, chip_ver));
 861}
 862
 863/**
 864 *      cxgb4_write_sgl - populate a scatter/gather list for a packet
 865 *      @skb: the packet
 866 *      @q: the Tx queue we are writing into
 867 *      @sgl: starting location for writing the SGL
 868 *      @end: points right after the end of the SGL
 869 *      @start: start offset into skb main-body data to include in the SGL
 870 *      @addr: the list of bus addresses for the SGL elements
 871 *
 872 *      Generates a gather list for the buffers that make up a packet.
 873 *      The caller must provide adequate space for the SGL that will be written.
 874 *      The SGL includes all of the packet's page fragments and the data in its
 875 *      main body except for the first @start bytes.  @sgl must be 16-byte
 876 *      aligned and within a Tx descriptor with available space.  @end points
 877 *      right after the end of the SGL but does not account for any potential
 878 *      wrap around, i.e., @end > @sgl.
 879 */
 880void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
 881                     struct ulptx_sgl *sgl, u64 *end, unsigned int start,
 882                     const dma_addr_t *addr)
 883{
 884        unsigned int i, len;
 885        struct ulptx_sge_pair *to;
 886        const struct skb_shared_info *si = skb_shinfo(skb);
 887        unsigned int nfrags = si->nr_frags;
 888        struct ulptx_sge_pair buf[MAX_SKB_FRAGS / 2 + 1];
 889
 890        len = skb_headlen(skb) - start;
 891        if (likely(len)) {
 892                sgl->len0 = htonl(len);
 893                sgl->addr0 = cpu_to_be64(addr[0] + start);
 894                nfrags++;
 895        } else {
 896                sgl->len0 = htonl(skb_frag_size(&si->frags[0]));
 897                sgl->addr0 = cpu_to_be64(addr[1]);
 898        }
 899
 900        sgl->cmd_nsge = htonl(ULPTX_CMD_V(ULP_TX_SC_DSGL) |
 901                              ULPTX_NSGE_V(nfrags));
 902        if (likely(--nfrags == 0))
 903                return;
 904        /*
 905         * Most of the complexity below deals with the possibility we hit the
 906         * end of the queue in the middle of writing the SGL.  For this case
 907         * only we create the SGL in a temporary buffer and then copy it.
 908         */
 909        to = (u8 *)end > (u8 *)q->stat ? buf : sgl->sge;
 910
 911        for (i = (nfrags != si->nr_frags); nfrags >= 2; nfrags -= 2, to++) {
 912                to->len[0] = cpu_to_be32(skb_frag_size(&si->frags[i]));
 913                to->len[1] = cpu_to_be32(skb_frag_size(&si->frags[++i]));
 914                to->addr[0] = cpu_to_be64(addr[i]);
 915                to->addr[1] = cpu_to_be64(addr[++i]);
 916        }
 917        if (nfrags) {
 918                to->len[0] = cpu_to_be32(skb_frag_size(&si->frags[i]));
 919                to->len[1] = cpu_to_be32(0);
 920                to->addr[0] = cpu_to_be64(addr[i + 1]);
 921        }
 922        if (unlikely((u8 *)end > (u8 *)q->stat)) {
 923                unsigned int part0 = (u8 *)q->stat - (u8 *)sgl->sge, part1;
 924
 925                if (likely(part0))
 926                        memcpy(sgl->sge, buf, part0);
 927                part1 = (u8 *)end - (u8 *)q->stat;
 928                memcpy(q->desc, (u8 *)buf + part0, part1);
 929                end = (void *)q->desc + part1;
 930        }
 931        if ((uintptr_t)end & 8)           /* 0-pad to multiple of 16 */
 932                *end = 0;
 933}
 934EXPORT_SYMBOL(cxgb4_write_sgl);
 935
 936/* This function copies 64 byte coalesced work request to
 937 * memory mapped BAR2 space. For coalesced WR SGE fetches
 938 * data from the FIFO instead of from Host.
 939 */
 940static void cxgb_pio_copy(u64 __iomem *dst, u64 *src)
 941{
 942        int count = 8;
 943
 944        while (count) {
 945                writeq(*src, dst);
 946                src++;
 947                dst++;
 948                count--;
 949        }
 950}
 951
 952/**
 953 *      cxgb4_ring_tx_db - check and potentially ring a Tx queue's doorbell
 954 *      @adap: the adapter
 955 *      @q: the Tx queue
 956 *      @n: number of new descriptors to give to HW
 957 *
 958 *      Ring the doorbel for a Tx queue.
 959 */
 960inline void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
 961{
 962        /* Make sure that all writes to the TX Descriptors are committed
 963         * before we tell the hardware about them.
 964         */
 965        wmb();
 966
 967        /* If we don't have access to the new User Doorbell (T5+), use the old
 968         * doorbell mechanism; otherwise use the new BAR2 mechanism.
 969         */
 970        if (unlikely(q->bar2_addr == NULL)) {
 971                u32 val = PIDX_V(n);
 972                unsigned long flags;
 973
 974                /* For T4 we need to participate in the Doorbell Recovery
 975                 * mechanism.
 976                 */
 977                spin_lock_irqsave(&q->db_lock, flags);
 978                if (!q->db_disabled)
 979                        t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
 980                                     QID_V(q->cntxt_id) | val);
 981                else
 982                        q->db_pidx_inc += n;
 983                q->db_pidx = q->pidx;
 984                spin_unlock_irqrestore(&q->db_lock, flags);
 985        } else {
 986                u32 val = PIDX_T5_V(n);
 987
 988                /* T4 and later chips share the same PIDX field offset within
 989                 * the doorbell, but T5 and later shrank the field in order to
 990                 * gain a bit for Doorbell Priority.  The field was absurdly
 991                 * large in the first place (14 bits) so we just use the T5
 992                 * and later limits and warn if a Queue ID is too large.
 993                 */
 994                WARN_ON(val & DBPRIO_F);
 995
 996                /* If we're only writing a single TX Descriptor and we can use
 997                 * Inferred QID registers, we can use the Write Combining
 998                 * Gather Buffer; otherwise we use the simple doorbell.
 999                 */
1000                if (n == 1 && q->bar2_qid == 0) {
1001                        int index = (q->pidx
1002                                     ? (q->pidx - 1)
1003                                     : (q->size - 1));
1004                        u64 *wr = (u64 *)&q->desc[index];
1005
1006                        cxgb_pio_copy((u64 __iomem *)
1007                                      (q->bar2_addr + SGE_UDB_WCDOORBELL),
1008                                      wr);
1009                } else {
1010                        writel(val | QID_V(q->bar2_qid),
1011                               q->bar2_addr + SGE_UDB_KDOORBELL);
1012                }
1013
1014                /* This Write Memory Barrier will force the write to the User
1015                 * Doorbell area to be flushed.  This is needed to prevent
1016                 * writes on different CPUs for the same queue from hitting
1017                 * the adapter out of order.  This is required when some Work
1018                 * Requests take the Write Combine Gather Buffer path (user
1019                 * doorbell area offset [SGE_UDB_WCDOORBELL..+63]) and some
1020                 * take the traditional path where we simply increment the
1021                 * PIDX (User Doorbell area SGE_UDB_KDOORBELL) and have the
1022                 * hardware DMA read the actual Work Request.
1023                 */
1024                wmb();
1025        }
1026}
1027EXPORT_SYMBOL(cxgb4_ring_tx_db);
1028
1029/**
1030 *      cxgb4_inline_tx_skb - inline a packet's data into Tx descriptors
1031 *      @skb: the packet
1032 *      @q: the Tx queue where the packet will be inlined
1033 *      @pos: starting position in the Tx queue where to inline the packet
1034 *
1035 *      Inline a packet's contents directly into Tx descriptors, starting at
1036 *      the given position within the Tx DMA ring.
1037 *      Most of the complexity of this operation is dealing with wrap arounds
1038 *      in the middle of the packet we want to inline.
1039 */
1040void cxgb4_inline_tx_skb(const struct sk_buff *skb,
1041                         const struct sge_txq *q, void *pos)
1042{
1043        int left = (void *)q->stat - pos;
1044        u64 *p;
1045
1046        if (likely(skb->len <= left)) {
1047                if (likely(!skb->data_len))
1048                        skb_copy_from_linear_data(skb, pos, skb->len);
1049                else
1050                        skb_copy_bits(skb, 0, pos, skb->len);
1051                pos += skb->len;
1052        } else {
1053                skb_copy_bits(skb, 0, pos, left);
1054                skb_copy_bits(skb, left, q->desc, skb->len - left);
1055                pos = (void *)q->desc + (skb->len - left);
1056        }
1057
1058        /* 0-pad to multiple of 16 */
1059        p = PTR_ALIGN(pos, 8);
1060        if ((uintptr_t)p & 8)
1061                *p = 0;
1062}
1063EXPORT_SYMBOL(cxgb4_inline_tx_skb);
1064
1065static void *inline_tx_skb_header(const struct sk_buff *skb,
1066                                  const struct sge_txq *q,  void *pos,
1067                                  int length)
1068{
1069        u64 *p;
1070        int left = (void *)q->stat - pos;
1071
1072        if (likely(length <= left)) {
1073                memcpy(pos, skb->data, length);
1074                pos += length;
1075        } else {
1076                memcpy(pos, skb->data, left);
1077                memcpy(q->desc, skb->data + left, length - left);
1078                pos = (void *)q->desc + (length - left);
1079        }
1080        /* 0-pad to multiple of 16 */
1081        p = PTR_ALIGN(pos, 8);
1082        if ((uintptr_t)p & 8) {
1083                *p = 0;
1084                return p + 1;
1085        }
1086        return p;
1087}
1088
1089/*
1090 * Figure out what HW csum a packet wants and return the appropriate control
1091 * bits.
1092 */
1093static u64 hwcsum(enum chip_type chip, const struct sk_buff *skb)
1094{
1095        int csum_type;
1096        bool inner_hdr_csum = false;
1097        u16 proto, ver;
1098
1099        if (skb->encapsulation &&
1100            (CHELSIO_CHIP_VERSION(chip) > CHELSIO_T5))
1101                inner_hdr_csum = true;
1102
1103        if (inner_hdr_csum) {
1104                ver = inner_ip_hdr(skb)->version;
1105                proto = (ver == 4) ? inner_ip_hdr(skb)->protocol :
1106                        inner_ipv6_hdr(skb)->nexthdr;
1107        } else {
1108                ver = ip_hdr(skb)->version;
1109                proto = (ver == 4) ? ip_hdr(skb)->protocol :
1110                        ipv6_hdr(skb)->nexthdr;
1111        }
1112
1113        if (ver == 4) {
1114                if (proto == IPPROTO_TCP)
1115                        csum_type = TX_CSUM_TCPIP;
1116                else if (proto == IPPROTO_UDP)
1117                        csum_type = TX_CSUM_UDPIP;
1118                else {
1119nocsum:                 /*
1120                         * unknown protocol, disable HW csum
1121                         * and hope a bad packet is detected
1122                         */
1123                        return TXPKT_L4CSUM_DIS_F;
1124                }
1125        } else {
1126                /*
1127                 * this doesn't work with extension headers
1128                 */
1129                if (proto == IPPROTO_TCP)
1130                        csum_type = TX_CSUM_TCPIP6;
1131                else if (proto == IPPROTO_UDP)
1132                        csum_type = TX_CSUM_UDPIP6;
1133                else
1134                        goto nocsum;
1135        }
1136
1137        if (likely(csum_type >= TX_CSUM_TCPIP)) {
1138                int eth_hdr_len, l4_len;
1139                u64 hdr_len;
1140
1141                if (inner_hdr_csum) {
1142                        /* This allows checksum offload for all encapsulated
1143                         * packets like GRE etc..
1144                         */
1145                        l4_len = skb_inner_network_header_len(skb);
1146                        eth_hdr_len = skb_inner_network_offset(skb) - ETH_HLEN;
1147                } else {
1148                        l4_len = skb_network_header_len(skb);
1149                        eth_hdr_len = skb_network_offset(skb) - ETH_HLEN;
1150                }
1151                hdr_len = TXPKT_IPHDR_LEN_V(l4_len);
1152
1153                if (CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5)
1154                        hdr_len |= TXPKT_ETHHDR_LEN_V(eth_hdr_len);
1155                else
1156                        hdr_len |= T6_TXPKT_ETHHDR_LEN_V(eth_hdr_len);
1157                return TXPKT_CSUM_TYPE_V(csum_type) | hdr_len;
1158        } else {
1159                int start = skb_transport_offset(skb);
1160
1161                return TXPKT_CSUM_TYPE_V(csum_type) |
1162                        TXPKT_CSUM_START_V(start) |
1163                        TXPKT_CSUM_LOC_V(start + skb->csum_offset);
1164        }
1165}
1166
1167static void eth_txq_stop(struct sge_eth_txq *q)
1168{
1169        netif_tx_stop_queue(q->txq);
1170        q->q.stops++;
1171}
1172
1173static inline void txq_advance(struct sge_txq *q, unsigned int n)
1174{
1175        q->in_use += n;
1176        q->pidx += n;
1177        if (q->pidx >= q->size)
1178                q->pidx -= q->size;
1179}
1180
1181#ifdef CONFIG_CHELSIO_T4_FCOE
1182static inline int
1183cxgb_fcoe_offload(struct sk_buff *skb, struct adapter *adap,
1184                  const struct port_info *pi, u64 *cntrl)
1185{
1186        const struct cxgb_fcoe *fcoe = &pi->fcoe;
1187
1188        if (!(fcoe->flags & CXGB_FCOE_ENABLED))
1189                return 0;
1190
1191        if (skb->protocol != htons(ETH_P_FCOE))
1192                return 0;
1193
1194        skb_reset_mac_header(skb);
1195        skb->mac_len = sizeof(struct ethhdr);
1196
1197        skb_set_network_header(skb, skb->mac_len);
1198        skb_set_transport_header(skb, skb->mac_len + sizeof(struct fcoe_hdr));
1199
1200        if (!cxgb_fcoe_sof_eof_supported(adap, skb))
1201                return -ENOTSUPP;
1202
1203        /* FC CRC offload */
1204        *cntrl = TXPKT_CSUM_TYPE_V(TX_CSUM_FCOE) |
1205                     TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F |
1206                     TXPKT_CSUM_START_V(CXGB_FCOE_TXPKT_CSUM_START) |
1207                     TXPKT_CSUM_END_V(CXGB_FCOE_TXPKT_CSUM_END) |
1208                     TXPKT_CSUM_LOC_V(CXGB_FCOE_TXPKT_CSUM_END);
1209        return 0;
1210}
1211#endif /* CONFIG_CHELSIO_T4_FCOE */
1212
1213/* Returns tunnel type if hardware supports offloading of the same.
1214 * It is called only for T5 and onwards.
1215 */
1216enum cpl_tx_tnl_lso_type cxgb_encap_offload_supported(struct sk_buff *skb)
1217{
1218        u8 l4_hdr = 0;
1219        enum cpl_tx_tnl_lso_type tnl_type = TX_TNL_TYPE_OPAQUE;
1220        struct port_info *pi = netdev_priv(skb->dev);
1221        struct adapter *adapter = pi->adapter;
1222
1223        if (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
1224            skb->inner_protocol != htons(ETH_P_TEB))
1225                return tnl_type;
1226
1227        switch (vlan_get_protocol(skb)) {
1228        case htons(ETH_P_IP):
1229                l4_hdr = ip_hdr(skb)->protocol;
1230                break;
1231        case htons(ETH_P_IPV6):
1232                l4_hdr = ipv6_hdr(skb)->nexthdr;
1233                break;
1234        default:
1235                return tnl_type;
1236        }
1237
1238        switch (l4_hdr) {
1239        case IPPROTO_UDP:
1240                if (adapter->vxlan_port == udp_hdr(skb)->dest)
1241                        tnl_type = TX_TNL_TYPE_VXLAN;
1242                else if (adapter->geneve_port == udp_hdr(skb)->dest)
1243                        tnl_type = TX_TNL_TYPE_GENEVE;
1244                break;
1245        default:
1246                return tnl_type;
1247        }
1248
1249        return tnl_type;
1250}
1251
1252static inline void t6_fill_tnl_lso(struct sk_buff *skb,
1253                                   struct cpl_tx_tnl_lso *tnl_lso,
1254                                   enum cpl_tx_tnl_lso_type tnl_type)
1255{
1256        u32 val;
1257        int in_eth_xtra_len;
1258        int l3hdr_len = skb_network_header_len(skb);
1259        int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN;
1260        const struct skb_shared_info *ssi = skb_shinfo(skb);
1261        bool v6 = (ip_hdr(skb)->version == 6);
1262
1263        val = CPL_TX_TNL_LSO_OPCODE_V(CPL_TX_TNL_LSO) |
1264              CPL_TX_TNL_LSO_FIRST_F |
1265              CPL_TX_TNL_LSO_LAST_F |
1266              (v6 ? CPL_TX_TNL_LSO_IPV6OUT_F : 0) |
1267              CPL_TX_TNL_LSO_ETHHDRLENOUT_V(eth_xtra_len / 4) |
1268              CPL_TX_TNL_LSO_IPHDRLENOUT_V(l3hdr_len / 4) |
1269              (v6 ? 0 : CPL_TX_TNL_LSO_IPHDRCHKOUT_F) |
1270              CPL_TX_TNL_LSO_IPLENSETOUT_F |
1271              (v6 ? 0 : CPL_TX_TNL_LSO_IPIDINCOUT_F);
1272        tnl_lso->op_to_IpIdSplitOut = htonl(val);
1273
1274        tnl_lso->IpIdOffsetOut = 0;
1275
1276        /* Get the tunnel header length */
1277        val = skb_inner_mac_header(skb) - skb_mac_header(skb);
1278        in_eth_xtra_len = skb_inner_network_header(skb) -
1279                          skb_inner_mac_header(skb) - ETH_HLEN;
1280
1281        switch (tnl_type) {
1282        case TX_TNL_TYPE_VXLAN:
1283        case TX_TNL_TYPE_GENEVE:
1284                tnl_lso->UdpLenSetOut_to_TnlHdrLen =
1285                        htons(CPL_TX_TNL_LSO_UDPCHKCLROUT_F |
1286                        CPL_TX_TNL_LSO_UDPLENSETOUT_F);
1287                break;
1288        default:
1289                tnl_lso->UdpLenSetOut_to_TnlHdrLen = 0;
1290                break;
1291        }
1292
1293        tnl_lso->UdpLenSetOut_to_TnlHdrLen |=
1294                 htons(CPL_TX_TNL_LSO_TNLHDRLEN_V(val) |
1295                       CPL_TX_TNL_LSO_TNLTYPE_V(tnl_type));
1296
1297        tnl_lso->r1 = 0;
1298
1299        val = CPL_TX_TNL_LSO_ETHHDRLEN_V(in_eth_xtra_len / 4) |
1300              CPL_TX_TNL_LSO_IPV6_V(inner_ip_hdr(skb)->version == 6) |
1301              CPL_TX_TNL_LSO_IPHDRLEN_V(skb_inner_network_header_len(skb) / 4) |
1302              CPL_TX_TNL_LSO_TCPHDRLEN_V(inner_tcp_hdrlen(skb) / 4);
1303        tnl_lso->Flow_to_TcpHdrLen = htonl(val);
1304
1305        tnl_lso->IpIdOffset = htons(0);
1306
1307        tnl_lso->IpIdSplit_to_Mss = htons(CPL_TX_TNL_LSO_MSS_V(ssi->gso_size));
1308        tnl_lso->TCPSeqOffset = htonl(0);
1309        tnl_lso->EthLenOffset_Size = htonl(CPL_TX_TNL_LSO_SIZE_V(skb->len));
1310}
1311
1312/**
1313 *      t4_sge_eth_txq_egress_update - handle Ethernet TX Queue update
1314 *      @adap: the adapter
1315 *      @eq: the Ethernet TX Queue
1316 *      @maxreclaim: the maximum number of TX Descriptors to reclaim or -1
1317 *
1318 *      We're typically called here to update the state of an Ethernet TX
1319 *      Queue with respect to the hardware's progress in consuming the TX
1320 *      Work Requests that we've put on that Egress Queue.  This happens
1321 *      when we get Egress Queue Update messages and also prophylactically
1322 *      in regular timer-based Ethernet TX Queue maintenance.
1323 */
1324int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq,
1325                                 int maxreclaim)
1326{
1327        struct sge_txq *q = &eq->q;
1328        unsigned int reclaimed;
1329
1330        if (!q->in_use || !__netif_tx_trylock(eq->txq))
1331                return 0;
1332
1333        /* Reclaim pending completed TX Descriptors. */
1334        reclaimed = reclaim_completed_tx(adap, &eq->q, maxreclaim, true);
1335
1336        /* If the TX Queue is currently stopped and there's now more than half
1337         * the queue available, restart it.  Otherwise bail out since the rest
1338         * of what we want do here is with the possibility of shipping any
1339         * currently buffered Coalesced TX Work Request.
1340         */
1341        if (netif_tx_queue_stopped(eq->txq) && txq_avail(q) > (q->size / 2)) {
1342                netif_tx_wake_queue(eq->txq);
1343                eq->q.restarts++;
1344        }
1345
1346        __netif_tx_unlock(eq->txq);
1347        return reclaimed;
1348}
1349
1350/**
1351 *      cxgb4_eth_xmit - add a packet to an Ethernet Tx queue
1352 *      @skb: the packet
1353 *      @dev: the egress net device
1354 *
1355 *      Add a packet to an SGE Ethernet Tx queue.  Runs with softirqs disabled.
1356 */
1357static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1358{
1359        u32 wr_mid, ctrl0, op;
1360        u64 cntrl, *end, *sgl;
1361        int qidx, credits;
1362        unsigned int flits, ndesc;
1363        struct adapter *adap;
1364        struct sge_eth_txq *q;
1365        const struct port_info *pi;
1366        struct fw_eth_tx_pkt_wr *wr;
1367        struct cpl_tx_pkt_core *cpl;
1368        const struct skb_shared_info *ssi;
1369        dma_addr_t addr[MAX_SKB_FRAGS + 1];
1370        bool immediate = false;
1371        int len, max_pkt_len;
1372        bool ptp_enabled = is_ptp_enabled(skb, dev);
1373        unsigned int chip_ver;
1374        enum cpl_tx_tnl_lso_type tnl_type = TX_TNL_TYPE_OPAQUE;
1375
1376#ifdef CONFIG_CHELSIO_T4_FCOE
1377        int err;
1378#endif /* CONFIG_CHELSIO_T4_FCOE */
1379
1380        /*
1381         * The chip min packet length is 10 octets but play safe and reject
1382         * anything shorter than an Ethernet header.
1383         */
1384        if (unlikely(skb->len < ETH_HLEN)) {
1385out_free:       dev_kfree_skb_any(skb);
1386                return NETDEV_TX_OK;
1387        }
1388
1389        /* Discard the packet if the length is greater than mtu */
1390        max_pkt_len = ETH_HLEN + dev->mtu;
1391        if (skb_vlan_tagged(skb))
1392                max_pkt_len += VLAN_HLEN;
1393        if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len)))
1394                goto out_free;
1395
1396        pi = netdev_priv(dev);
1397        adap = pi->adapter;
1398        ssi = skb_shinfo(skb);
1399#ifdef CONFIG_CHELSIO_IPSEC_INLINE
1400        if (xfrm_offload(skb) && !ssi->gso_size)
1401                return adap->uld[CXGB4_ULD_CRYPTO].tx_handler(skb, dev);
1402#endif /* CHELSIO_IPSEC_INLINE */
1403
1404        qidx = skb_get_queue_mapping(skb);
1405        if (ptp_enabled) {
1406                spin_lock(&adap->ptp_lock);
1407                if (!(adap->ptp_tx_skb)) {
1408                        skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
1409                        adap->ptp_tx_skb = skb_get(skb);
1410                } else {
1411                        spin_unlock(&adap->ptp_lock);
1412                        goto out_free;
1413                }
1414                q = &adap->sge.ptptxq;
1415        } else {
1416                q = &adap->sge.ethtxq[qidx + pi->first_qset];
1417        }
1418        skb_tx_timestamp(skb);
1419
1420        reclaim_completed_tx(adap, &q->q, -1, true);
1421        cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
1422
1423#ifdef CONFIG_CHELSIO_T4_FCOE
1424        err = cxgb_fcoe_offload(skb, adap, pi, &cntrl);
1425        if (unlikely(err == -ENOTSUPP)) {
1426                if (ptp_enabled)
1427                        spin_unlock(&adap->ptp_lock);
1428                goto out_free;
1429        }
1430#endif /* CONFIG_CHELSIO_T4_FCOE */
1431
1432        chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
1433        flits = calc_tx_flits(skb, chip_ver);
1434        ndesc = flits_to_desc(flits);
1435        credits = txq_avail(&q->q) - ndesc;
1436
1437        if (unlikely(credits < 0)) {
1438                eth_txq_stop(q);
1439                dev_err(adap->pdev_dev,
1440                        "%s: Tx ring %u full while queue awake!\n",
1441                        dev->name, qidx);
1442                if (ptp_enabled)
1443                        spin_unlock(&adap->ptp_lock);
1444                return NETDEV_TX_BUSY;
1445        }
1446
1447        if (is_eth_imm(skb, chip_ver))
1448                immediate = true;
1449
1450        if (skb->encapsulation && chip_ver > CHELSIO_T5)
1451                tnl_type = cxgb_encap_offload_supported(skb);
1452
1453        if (!immediate &&
1454            unlikely(cxgb4_map_skb(adap->pdev_dev, skb, addr) < 0)) {
1455                q->mapping_err++;
1456                if (ptp_enabled)
1457                        spin_unlock(&adap->ptp_lock);
1458                goto out_free;
1459        }
1460
1461        wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2));
1462        if (unlikely(credits < ETHTXQ_STOP_THRES)) {
1463                /* After we're done injecting the Work Request for this
1464                 * packet, we'll be below our "stop threshold" so stop the TX
1465                 * Queue now and schedule a request for an SGE Egress Queue
1466                 * Update message. The queue will get started later on when
1467                 * the firmware processes this Work Request and sends us an
1468                 * Egress Queue Status Update message indicating that space
1469                 * has opened up.
1470                 */
1471                eth_txq_stop(q);
1472
1473                /* If we're using the SGE Doorbell Queue Timer facility, we
1474                 * don't need to ask the Firmware to send us Egress Queue CIDX
1475                 * Updates: the Hardware will do this automatically.  And
1476                 * since we send the Ingress Queue CIDX Updates to the
1477                 * corresponding Ethernet Response Queue, we'll get them very
1478                 * quickly.
1479                 */
1480                if (!q->dbqt)
1481                        wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
1482        }
1483
1484        wr = (void *)&q->q.desc[q->q.pidx];
1485        wr->equiq_to_len16 = htonl(wr_mid);
1486        wr->r3 = cpu_to_be64(0);
1487        end = (u64 *)wr + flits;
1488
1489        len = immediate ? skb->len : 0;
1490        len += sizeof(*cpl);
1491        if (ssi->gso_size) {
1492                struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
1493                bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0;
1494                int l3hdr_len = skb_network_header_len(skb);
1495                int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN;
1496                struct cpl_tx_tnl_lso *tnl_lso = (void *)(wr + 1);
1497
1498                if (tnl_type)
1499                        len += sizeof(*tnl_lso);
1500                else
1501                        len += sizeof(*lso);
1502
1503                wr->op_immdlen = htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR) |
1504                                       FW_WR_IMMDLEN_V(len));
1505                if (tnl_type) {
1506                        struct iphdr *iph = ip_hdr(skb);
1507
1508                        t6_fill_tnl_lso(skb, tnl_lso, tnl_type);
1509                        cpl = (void *)(tnl_lso + 1);
1510                        /* Driver is expected to compute partial checksum that
1511                         * does not include the IP Total Length.
1512                         */
1513                        if (iph->version == 4) {
1514                                iph->check = 0;
1515                                iph->tot_len = 0;
1516                                iph->check = (u16)(~ip_fast_csum((u8 *)iph,
1517                                                                 iph->ihl));
1518                        }
1519                        if (skb->ip_summed == CHECKSUM_PARTIAL)
1520                                cntrl = hwcsum(adap->params.chip, skb);
1521                } else {
1522                        lso->lso_ctrl = htonl(LSO_OPCODE_V(CPL_TX_PKT_LSO) |
1523                                        LSO_FIRST_SLICE_F | LSO_LAST_SLICE_F |
1524                                        LSO_IPV6_V(v6) |
1525                                        LSO_ETHHDR_LEN_V(eth_xtra_len / 4) |
1526                                        LSO_IPHDR_LEN_V(l3hdr_len / 4) |
1527                                        LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff));
1528                        lso->ipid_ofst = htons(0);
1529                        lso->mss = htons(ssi->gso_size);
1530                        lso->seqno_offset = htonl(0);
1531                        if (is_t4(adap->params.chip))
1532                                lso->len = htonl(skb->len);
1533                        else
1534                                lso->len = htonl(LSO_T5_XFER_SIZE_V(skb->len));
1535                        cpl = (void *)(lso + 1);
1536
1537                        if (CHELSIO_CHIP_VERSION(adap->params.chip)
1538                            <= CHELSIO_T5)
1539                                cntrl = TXPKT_ETHHDR_LEN_V(eth_xtra_len);
1540                        else
1541                                cntrl = T6_TXPKT_ETHHDR_LEN_V(eth_xtra_len);
1542
1543                        cntrl |= TXPKT_CSUM_TYPE_V(v6 ?
1544                                 TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) |
1545                                 TXPKT_IPHDR_LEN_V(l3hdr_len);
1546                }
1547                sgl = (u64 *)(cpl + 1); /* sgl start here */
1548                if (unlikely((u8 *)sgl >= (u8 *)q->q.stat)) {
1549                        /* If current position is already at the end of the
1550                         * txq, reset the current to point to start of the queue
1551                         * and update the end ptr as well.
1552                         */
1553                        if (sgl == (u64 *)q->q.stat) {
1554                                int left = (u8 *)end - (u8 *)q->q.stat;
1555
1556                                end = (void *)q->q.desc + left;
1557                                sgl = (void *)q->q.desc;
1558                        }
1559                }
1560                q->tso++;
1561                q->tx_cso += ssi->gso_segs;
1562        } else {
1563                if (ptp_enabled)
1564                        op = FW_PTP_TX_PKT_WR;
1565                else
1566                        op = FW_ETH_TX_PKT_WR;
1567                wr->op_immdlen = htonl(FW_WR_OP_V(op) |
1568                                       FW_WR_IMMDLEN_V(len));
1569                cpl = (void *)(wr + 1);
1570                sgl = (u64 *)(cpl + 1);
1571                if (skb->ip_summed == CHECKSUM_PARTIAL) {
1572                        cntrl = hwcsum(adap->params.chip, skb) |
1573                                TXPKT_IPCSUM_DIS_F;
1574                        q->tx_cso++;
1575                }
1576        }
1577
1578        if (skb_vlan_tag_present(skb)) {
1579                q->vlan_ins++;
1580                cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb));
1581#ifdef CONFIG_CHELSIO_T4_FCOE
1582                if (skb->protocol == htons(ETH_P_FCOE))
1583                        cntrl |= TXPKT_VLAN_V(
1584                                 ((skb->priority & 0x7) << VLAN_PRIO_SHIFT));
1585#endif /* CONFIG_CHELSIO_T4_FCOE */
1586        }
1587
1588        ctrl0 = TXPKT_OPCODE_V(CPL_TX_PKT_XT) | TXPKT_INTF_V(pi->tx_chan) |
1589                TXPKT_PF_V(adap->pf);
1590        if (ptp_enabled)
1591                ctrl0 |= TXPKT_TSTAMP_F;
1592#ifdef CONFIG_CHELSIO_T4_DCB
1593        if (is_t4(adap->params.chip))
1594                ctrl0 |= TXPKT_OVLAN_IDX_V(q->dcb_prio);
1595        else
1596                ctrl0 |= TXPKT_T5_OVLAN_IDX_V(q->dcb_prio);
1597#endif
1598        cpl->ctrl0 = htonl(ctrl0);
1599        cpl->pack = htons(0);
1600        cpl->len = htons(skb->len);
1601        cpl->ctrl1 = cpu_to_be64(cntrl);
1602
1603        if (immediate) {
1604                cxgb4_inline_tx_skb(skb, &q->q, sgl);
1605                dev_consume_skb_any(skb);
1606        } else {
1607                int last_desc;
1608
1609                cxgb4_write_sgl(skb, &q->q, (void *)sgl, end, 0, addr);
1610                skb_orphan(skb);
1611
1612                last_desc = q->q.pidx + ndesc - 1;
1613                if (last_desc >= q->q.size)
1614                        last_desc -= q->q.size;
1615                q->q.sdesc[last_desc].skb = skb;
1616                q->q.sdesc[last_desc].sgl = (struct ulptx_sgl *)sgl;
1617        }
1618
1619        txq_advance(&q->q, ndesc);
1620
1621        cxgb4_ring_tx_db(adap, &q->q, ndesc);
1622        if (ptp_enabled)
1623                spin_unlock(&adap->ptp_lock);
1624        return NETDEV_TX_OK;
1625}
1626
1627/* Constants ... */
1628enum {
1629        /* Egress Queue sizes, producer and consumer indices are all in units
1630         * of Egress Context Units bytes.  Note that as far as the hardware is
1631         * concerned, the free list is an Egress Queue (the host produces free
1632         * buffers which the hardware consumes) and free list entries are
1633         * 64-bit PCI DMA addresses.
1634         */
1635        EQ_UNIT = SGE_EQ_IDXSIZE,
1636        FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
1637        TXD_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
1638
1639        T4VF_ETHTXQ_MAX_HDR = (sizeof(struct fw_eth_tx_pkt_vm_wr) +
1640                               sizeof(struct cpl_tx_pkt_lso_core) +
1641                               sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64),
1642};
1643
1644/**
1645 *      t4vf_is_eth_imm - can an Ethernet packet be sent as immediate data?
1646 *      @skb: the packet
1647 *
1648 *      Returns whether an Ethernet packet is small enough to fit completely as
1649 *      immediate data.
1650 */
1651static inline int t4vf_is_eth_imm(const struct sk_buff *skb)
1652{
1653        /* The VF Driver uses the FW_ETH_TX_PKT_VM_WR firmware Work Request
1654         * which does not accommodate immediate data.  We could dike out all
1655         * of the support code for immediate data but that would tie our hands
1656         * too much if we ever want to enhace the firmware.  It would also
1657         * create more differences between the PF and VF Drivers.
1658         */
1659        return false;
1660}
1661
1662/**
1663 *      t4vf_calc_tx_flits - calculate the number of flits for a packet TX WR
1664 *      @skb: the packet
1665 *
1666 *      Returns the number of flits needed for a TX Work Request for the
1667 *      given Ethernet packet, including the needed WR and CPL headers.
1668 */
1669static inline unsigned int t4vf_calc_tx_flits(const struct sk_buff *skb)
1670{
1671        unsigned int flits;
1672
1673        /* If the skb is small enough, we can pump it out as a work request
1674         * with only immediate data.  In that case we just have to have the
1675         * TX Packet header plus the skb data in the Work Request.
1676         */
1677        if (t4vf_is_eth_imm(skb))
1678                return DIV_ROUND_UP(skb->len + sizeof(struct cpl_tx_pkt),
1679                                    sizeof(__be64));
1680
1681        /* Otherwise, we're going to have to construct a Scatter gather list
1682         * of the skb body and fragments.  We also include the flits necessary
1683         * for the TX Packet Work Request and CPL.  We always have a firmware
1684         * Write Header (incorporated as part of the cpl_tx_pkt_lso and
1685         * cpl_tx_pkt structures), followed by either a TX Packet Write CPL
1686         * message or, if we're doing a Large Send Offload, an LSO CPL message
1687         * with an embedded TX Packet Write CPL message.
1688         */
1689        flits = sgl_len(skb_shinfo(skb)->nr_frags + 1);
1690        if (skb_shinfo(skb)->gso_size)
1691                flits += (sizeof(struct fw_eth_tx_pkt_vm_wr) +
1692                          sizeof(struct cpl_tx_pkt_lso_core) +
1693                          sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
1694        else
1695                flits += (sizeof(struct fw_eth_tx_pkt_vm_wr) +
1696                          sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
1697        return flits;
1698}
1699
1700/**
1701 *      cxgb4_vf_eth_xmit - add a packet to an Ethernet TX queue
1702 *      @skb: the packet
1703 *      @dev: the egress net device
1704 *
1705 *      Add a packet to an SGE Ethernet TX queue.  Runs with softirqs disabled.
1706 */
1707static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
1708                                     struct net_device *dev)
1709{
1710        dma_addr_t addr[MAX_SKB_FRAGS + 1];
1711        const struct skb_shared_info *ssi;
1712        struct fw_eth_tx_pkt_vm_wr *wr;
1713        int qidx, credits, max_pkt_len;
1714        struct cpl_tx_pkt_core *cpl;
1715        const struct port_info *pi;
1716        unsigned int flits, ndesc;
1717        struct sge_eth_txq *txq;
1718        struct adapter *adapter;
1719        u64 cntrl, *end;
1720        u32 wr_mid;
1721        const size_t fw_hdr_copy_len = sizeof(wr->ethmacdst) +
1722                                       sizeof(wr->ethmacsrc) +
1723                                       sizeof(wr->ethtype) +
1724                                       sizeof(wr->vlantci);
1725
1726        /* The chip minimum packet length is 10 octets but the firmware
1727         * command that we are using requires that we copy the Ethernet header
1728         * (including the VLAN tag) into the header so we reject anything
1729         * smaller than that ...
1730         */
1731        if (unlikely(skb->len < fw_hdr_copy_len))
1732                goto out_free;
1733
1734        /* Discard the packet if the length is greater than mtu */
1735        max_pkt_len = ETH_HLEN + dev->mtu;
1736        if (skb_vlan_tag_present(skb))
1737                max_pkt_len += VLAN_HLEN;
1738        if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len)))
1739                goto out_free;
1740
1741        /* Figure out which TX Queue we're going to use. */
1742        pi = netdev_priv(dev);
1743        adapter = pi->adapter;
1744        qidx = skb_get_queue_mapping(skb);
1745        WARN_ON(qidx >= pi->nqsets);
1746        txq = &adapter->sge.ethtxq[pi->first_qset + qidx];
1747
1748        /* Take this opportunity to reclaim any TX Descriptors whose DMA
1749         * transfers have completed.
1750         */
1751        reclaim_completed_tx(adapter, &txq->q, -1, true);
1752
1753        /* Calculate the number of flits and TX Descriptors we're going to
1754         * need along with how many TX Descriptors will be left over after
1755         * we inject our Work Request.
1756         */
1757        flits = t4vf_calc_tx_flits(skb);
1758        ndesc = flits_to_desc(flits);
1759        credits = txq_avail(&txq->q) - ndesc;
1760
1761        if (unlikely(credits < 0)) {
1762                /* Not enough room for this packet's Work Request.  Stop the
1763                 * TX Queue and return a "busy" condition.  The queue will get
1764                 * started later on when the firmware informs us that space
1765                 * has opened up.
1766                 */
1767                eth_txq_stop(txq);
1768                dev_err(adapter->pdev_dev,
1769                        "%s: TX ring %u full while queue awake!\n",
1770                        dev->name, qidx);
1771                return NETDEV_TX_BUSY;
1772        }
1773
1774        if (!t4vf_is_eth_imm(skb) &&
1775            unlikely(cxgb4_map_skb(adapter->pdev_dev, skb, addr) < 0)) {
1776                /* We need to map the skb into PCI DMA space (because it can't
1777                 * be in-lined directly into the Work Request) and the mapping
1778                 * operation failed.  Record the error and drop the packet.
1779                 */
1780                txq->mapping_err++;
1781                goto out_free;
1782        }
1783
1784        wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2));
1785        if (unlikely(credits < ETHTXQ_STOP_THRES)) {
1786                /* After we're done injecting the Work Request for this
1787                 * packet, we'll be below our "stop threshold" so stop the TX
1788                 * Queue now and schedule a request for an SGE Egress Queue
1789                 * Update message.  The queue will get started later on when
1790                 * the firmware processes this Work Request and sends us an
1791                 * Egress Queue Status Update message indicating that space
1792                 * has opened up.
1793                 */
1794                eth_txq_stop(txq);
1795
1796                /* If we're using the SGE Doorbell Queue Timer facility, we
1797                 * don't need to ask the Firmware to send us Egress Queue CIDX
1798                 * Updates: the Hardware will do this automatically.  And
1799                 * since we send the Ingress Queue CIDX Updates to the
1800                 * corresponding Ethernet Response Queue, we'll get them very
1801                 * quickly.
1802                 */
1803                if (!txq->dbqt)
1804                        wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
1805        }
1806
1807        /* Start filling in our Work Request.  Note that we do _not_ handle
1808         * the WR Header wrapping around the TX Descriptor Ring.  If our
1809         * maximum header size ever exceeds one TX Descriptor, we'll need to
1810         * do something else here.
1811         */
1812        WARN_ON(DIV_ROUND_UP(T4VF_ETHTXQ_MAX_HDR, TXD_PER_EQ_UNIT) > 1);
1813        wr = (void *)&txq->q.desc[txq->q.pidx];
1814        wr->equiq_to_len16 = cpu_to_be32(wr_mid);
1815        wr->r3[0] = cpu_to_be32(0);
1816        wr->r3[1] = cpu_to_be32(0);
1817        skb_copy_from_linear_data(skb, (void *)wr->ethmacdst, fw_hdr_copy_len);
1818        end = (u64 *)wr + flits;
1819
1820        /* If this is a Large Send Offload packet we'll put in an LSO CPL
1821         * message with an encapsulated TX Packet CPL message.  Otherwise we
1822         * just use a TX Packet CPL message.
1823         */
1824        ssi = skb_shinfo(skb);
1825        if (ssi->gso_size) {
1826                struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
1827                bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0;
1828                int l3hdr_len = skb_network_header_len(skb);
1829                int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN;
1830
1831                wr->op_immdlen =
1832                        cpu_to_be32(FW_WR_OP_V(FW_ETH_TX_PKT_VM_WR) |
1833                                    FW_WR_IMMDLEN_V(sizeof(*lso) +
1834                                                    sizeof(*cpl)));
1835                 /* Fill in the LSO CPL message. */
1836                lso->lso_ctrl =
1837                        cpu_to_be32(LSO_OPCODE_V(CPL_TX_PKT_LSO) |
1838                                    LSO_FIRST_SLICE_F |
1839                                    LSO_LAST_SLICE_F |
1840                                    LSO_IPV6_V(v6) |
1841                                    LSO_ETHHDR_LEN_V(eth_xtra_len / 4) |
1842                                    LSO_IPHDR_LEN_V(l3hdr_len / 4) |
1843                                    LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff));
1844                lso->ipid_ofst = cpu_to_be16(0);
1845                lso->mss = cpu_to_be16(ssi->gso_size);
1846                lso->seqno_offset = cpu_to_be32(0);
1847                if (is_t4(adapter->params.chip))
1848                        lso->len = cpu_to_be32(skb->len);
1849                else
1850                        lso->len = cpu_to_be32(LSO_T5_XFER_SIZE_V(skb->len));
1851
1852                /* Set up TX Packet CPL pointer, control word and perform
1853                 * accounting.
1854                 */
1855                cpl = (void *)(lso + 1);
1856
1857                if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5)
1858                        cntrl = TXPKT_ETHHDR_LEN_V(eth_xtra_len);
1859                else
1860                        cntrl = T6_TXPKT_ETHHDR_LEN_V(eth_xtra_len);
1861
1862                cntrl |= TXPKT_CSUM_TYPE_V(v6 ?
1863                                           TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) |
1864                         TXPKT_IPHDR_LEN_V(l3hdr_len);
1865                txq->tso++;
1866                txq->tx_cso += ssi->gso_segs;
1867        } else {
1868                int len;
1869
1870                len = (t4vf_is_eth_imm(skb)
1871                       ? skb->len + sizeof(*cpl)
1872                       : sizeof(*cpl));
1873                wr->op_immdlen =
1874                        cpu_to_be32(FW_WR_OP_V(FW_ETH_TX_PKT_VM_WR) |
1875                                    FW_WR_IMMDLEN_V(len));
1876
1877                /* Set up TX Packet CPL pointer, control word and perform
1878                 * accounting.
1879                 */
1880                cpl = (void *)(wr + 1);
1881                if (skb->ip_summed == CHECKSUM_PARTIAL) {
1882                        cntrl = hwcsum(adapter->params.chip, skb) |
1883                                TXPKT_IPCSUM_DIS_F;
1884                        txq->tx_cso++;
1885                } else {
1886                        cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
1887                }
1888        }
1889
1890        /* If there's a VLAN tag present, add that to the list of things to
1891         * do in this Work Request.
1892         */
1893        if (skb_vlan_tag_present(skb)) {
1894                txq->vlan_ins++;
1895                cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb));
1896        }
1897
1898         /* Fill in the TX Packet CPL message header. */
1899        cpl->ctrl0 = cpu_to_be32(TXPKT_OPCODE_V(CPL_TX_PKT_XT) |
1900                                 TXPKT_INTF_V(pi->port_id) |
1901                                 TXPKT_PF_V(0));
1902        cpl->pack = cpu_to_be16(0);
1903        cpl->len = cpu_to_be16(skb->len);
1904        cpl->ctrl1 = cpu_to_be64(cntrl);
1905
1906        /* Fill in the body of the TX Packet CPL message with either in-lined
1907         * data or a Scatter/Gather List.
1908         */
1909        if (t4vf_is_eth_imm(skb)) {
1910                /* In-line the packet's data and free the skb since we don't
1911                 * need it any longer.
1912                 */
1913                cxgb4_inline_tx_skb(skb, &txq->q, cpl + 1);
1914                dev_consume_skb_any(skb);
1915        } else {
1916                /* Write the skb's Scatter/Gather list into the TX Packet CPL
1917                 * message and retain a pointer to the skb so we can free it
1918                 * later when its DMA completes.  (We store the skb pointer
1919                 * in the Software Descriptor corresponding to the last TX
1920                 * Descriptor used by the Work Request.)
1921                 *
1922                 * The retained skb will be freed when the corresponding TX
1923                 * Descriptors are reclaimed after their DMAs complete.
1924                 * However, this could take quite a while since, in general,
1925                 * the hardware is set up to be lazy about sending DMA
1926                 * completion notifications to us and we mostly perform TX
1927                 * reclaims in the transmit routine.
1928                 *
1929                 * This is good for performamce but means that we rely on new
1930                 * TX packets arriving to run the destructors of completed
1931                 * packets, which open up space in their sockets' send queues.
1932                 * Sometimes we do not get such new packets causing TX to
1933                 * stall.  A single UDP transmitter is a good example of this
1934                 * situation.  We have a clean up timer that periodically
1935                 * reclaims completed packets but it doesn't run often enough
1936                 * (nor do we want it to) to prevent lengthy stalls.  A
1937                 * solution to this problem is to run the destructor early,
1938                 * after the packet is queued but before it's DMAd.  A con is
1939                 * that we lie to socket memory accounting, but the amount of
1940                 * extra memory is reasonable (limited by the number of TX
1941                 * descriptors), the packets do actually get freed quickly by
1942                 * new packets almost always, and for protocols like TCP that
1943                 * wait for acks to really free up the data the extra memory
1944                 * is even less.  On the positive side we run the destructors
1945                 * on the sending CPU rather than on a potentially different
1946                 * completing CPU, usually a good thing.
1947                 *
1948                 * Run the destructor before telling the DMA engine about the
1949                 * packet to make sure it doesn't complete and get freed
1950                 * prematurely.
1951                 */
1952                struct ulptx_sgl *sgl = (struct ulptx_sgl *)(cpl + 1);
1953                struct sge_txq *tq = &txq->q;
1954                int last_desc;
1955
1956                /* If the Work Request header was an exact multiple of our TX
1957                 * Descriptor length, then it's possible that the starting SGL
1958                 * pointer lines up exactly with the end of our TX Descriptor
1959                 * ring.  If that's the case, wrap around to the beginning
1960                 * here ...
1961                 */
1962                if (unlikely((void *)sgl == (void *)tq->stat)) {
1963                        sgl = (void *)tq->desc;
1964                        end = (void *)((void *)tq->desc +
1965                                       ((void *)end - (void *)tq->stat));
1966                }
1967
1968                cxgb4_write_sgl(skb, tq, sgl, end, 0, addr);
1969                skb_orphan(skb);
1970
1971                last_desc = tq->pidx + ndesc - 1;
1972                if (last_desc >= tq->size)
1973                        last_desc -= tq->size;
1974                tq->sdesc[last_desc].skb = skb;
1975                tq->sdesc[last_desc].sgl = sgl;
1976        }
1977
1978        /* Advance our internal TX Queue state, tell the hardware about
1979         * the new TX descriptors and return success.
1980         */
1981        txq_advance(&txq->q, ndesc);
1982
1983        cxgb4_ring_tx_db(adapter, &txq->q, ndesc);
1984        return NETDEV_TX_OK;
1985
1986out_free:
1987        /* An error of some sort happened.  Free the TX skb and tell the
1988         * OS that we've "dealt" with the packet ...
1989         */
1990        dev_kfree_skb_any(skb);
1991        return NETDEV_TX_OK;
1992}
1993
1994netdev_tx_t t4_start_xmit(struct sk_buff *skb, struct net_device *dev)
1995{
1996        struct port_info *pi = netdev_priv(dev);
1997
1998        if (unlikely(pi->eth_flags & PRIV_FLAG_PORT_TX_VM))
1999                return cxgb4_vf_eth_xmit(skb, dev);
2000
2001        return cxgb4_eth_xmit(skb, dev);
2002}
2003
2004/**
2005 *      reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
2006 *      @q: the SGE control Tx queue
2007 *
2008 *      This is a variant of cxgb4_reclaim_completed_tx() that is used
2009 *      for Tx queues that send only immediate data (presently just
2010 *      the control queues) and thus do not have any sk_buffs to release.
2011 */
2012static inline void reclaim_completed_tx_imm(struct sge_txq *q)
2013{
2014        int hw_cidx = ntohs(READ_ONCE(q->stat->cidx));
2015        int reclaim = hw_cidx - q->cidx;
2016
2017        if (reclaim < 0)
2018                reclaim += q->size;
2019
2020        q->in_use -= reclaim;
2021        q->cidx = hw_cidx;
2022}
2023
2024/**
2025 *      is_imm - check whether a packet can be sent as immediate data
2026 *      @skb: the packet
2027 *
2028 *      Returns true if a packet can be sent as a WR with immediate data.
2029 */
2030static inline int is_imm(const struct sk_buff *skb)
2031{
2032        return skb->len <= MAX_CTRL_WR_LEN;
2033}
2034
2035/**
2036 *      ctrlq_check_stop - check if a control queue is full and should stop
2037 *      @q: the queue
2038 *      @wr: most recent WR written to the queue
2039 *
2040 *      Check if a control queue has become full and should be stopped.
2041 *      We clean up control queue descriptors very lazily, only when we are out.
2042 *      If the queue is still full after reclaiming any completed descriptors
2043 *      we suspend it and have the last WR wake it up.
2044 */
2045static void ctrlq_check_stop(struct sge_ctrl_txq *q, struct fw_wr_hdr *wr)
2046{
2047        reclaim_completed_tx_imm(&q->q);
2048        if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) {
2049                wr->lo |= htonl(FW_WR_EQUEQ_F | FW_WR_EQUIQ_F);
2050                q->q.stops++;
2051                q->full = 1;
2052        }
2053}
2054
2055/**
2056 *      ctrl_xmit - send a packet through an SGE control Tx queue
2057 *      @q: the control queue
2058 *      @skb: the packet
2059 *
2060 *      Send a packet through an SGE control Tx queue.  Packets sent through
2061 *      a control queue must fit entirely as immediate data.
2062 */
2063static int ctrl_xmit(struct sge_ctrl_txq *q, struct sk_buff *skb)
2064{
2065        unsigned int ndesc;
2066        struct fw_wr_hdr *wr;
2067
2068        if (unlikely(!is_imm(skb))) {
2069                WARN_ON(1);
2070                dev_kfree_skb(skb);
2071                return NET_XMIT_DROP;
2072        }
2073
2074        ndesc = DIV_ROUND_UP(skb->len, sizeof(struct tx_desc));
2075        spin_lock(&q->sendq.lock);
2076
2077        if (unlikely(q->full)) {
2078                skb->priority = ndesc;                  /* save for restart */
2079                __skb_queue_tail(&q->sendq, skb);
2080                spin_unlock(&q->sendq.lock);
2081                return NET_XMIT_CN;
2082        }
2083
2084        wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx];
2085        cxgb4_inline_tx_skb(skb, &q->q, wr);
2086
2087        txq_advance(&q->q, ndesc);
2088        if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES))
2089                ctrlq_check_stop(q, wr);
2090
2091        cxgb4_ring_tx_db(q->adap, &q->q, ndesc);
2092        spin_unlock(&q->sendq.lock);
2093
2094        kfree_skb(skb);
2095        return NET_XMIT_SUCCESS;
2096}
2097
2098/**
2099 *      restart_ctrlq - restart a suspended control queue
2100 *      @data: the control queue to restart
2101 *
2102 *      Resumes transmission on a suspended Tx control queue.
2103 */
2104static void restart_ctrlq(unsigned long data)
2105{
2106        struct sk_buff *skb;
2107        unsigned int written = 0;
2108        struct sge_ctrl_txq *q = (struct sge_ctrl_txq *)data;
2109
2110        spin_lock(&q->sendq.lock);
2111        reclaim_completed_tx_imm(&q->q);
2112        BUG_ON(txq_avail(&q->q) < TXQ_STOP_THRES);  /* q should be empty */
2113
2114        while ((skb = __skb_dequeue(&q->sendq)) != NULL) {
2115                struct fw_wr_hdr *wr;
2116                unsigned int ndesc = skb->priority;     /* previously saved */
2117
2118                written += ndesc;
2119                /* Write descriptors and free skbs outside the lock to limit
2120                 * wait times.  q->full is still set so new skbs will be queued.
2121                 */
2122                wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx];
2123                txq_advance(&q->q, ndesc);
2124                spin_unlock(&q->sendq.lock);
2125
2126                cxgb4_inline_tx_skb(skb, &q->q, wr);
2127                kfree_skb(skb);
2128
2129                if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) {
2130                        unsigned long old = q->q.stops;
2131
2132                        ctrlq_check_stop(q, wr);
2133                        if (q->q.stops != old) {          /* suspended anew */
2134                                spin_lock(&q->sendq.lock);
2135                                goto ringdb;
2136                        }
2137                }
2138                if (written > 16) {
2139                        cxgb4_ring_tx_db(q->adap, &q->q, written);
2140                        written = 0;
2141                }
2142                spin_lock(&q->sendq.lock);
2143        }
2144        q->full = 0;
2145ringdb:
2146        if (written)
2147                cxgb4_ring_tx_db(q->adap, &q->q, written);
2148        spin_unlock(&q->sendq.lock);
2149}
2150
2151/**
2152 *      t4_mgmt_tx - send a management message
2153 *      @adap: the adapter
2154 *      @skb: the packet containing the management message
2155 *
2156 *      Send a management message through control queue 0.
2157 */
2158int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
2159{
2160        int ret;
2161
2162        local_bh_disable();
2163        ret = ctrl_xmit(&adap->sge.ctrlq[0], skb);
2164        local_bh_enable();
2165        return ret;
2166}
2167
2168/**
2169 *      is_ofld_imm - check whether a packet can be sent as immediate data
2170 *      @skb: the packet
2171 *
2172 *      Returns true if a packet can be sent as an offload WR with immediate
2173 *      data.  We currently use the same limit as for Ethernet packets.
2174 */
2175static inline int is_ofld_imm(const struct sk_buff *skb)
2176{
2177        struct work_request_hdr *req = (struct work_request_hdr *)skb->data;
2178        unsigned long opcode = FW_WR_OP_G(ntohl(req->wr_hi));
2179
2180        if (opcode == FW_CRYPTO_LOOKASIDE_WR)
2181                return skb->len <= SGE_MAX_WR_LEN;
2182        else
2183                return skb->len <= MAX_IMM_TX_PKT_LEN;
2184}
2185
2186/**
2187 *      calc_tx_flits_ofld - calculate # of flits for an offload packet
2188 *      @skb: the packet
2189 *
2190 *      Returns the number of flits needed for the given offload packet.
2191 *      These packets are already fully constructed and no additional headers
2192 *      will be added.
2193 */
2194static inline unsigned int calc_tx_flits_ofld(const struct sk_buff *skb)
2195{
2196        unsigned int flits, cnt;
2197
2198        if (is_ofld_imm(skb))
2199                return DIV_ROUND_UP(skb->len, 8);
2200
2201        flits = skb_transport_offset(skb) / 8U;   /* headers */
2202        cnt = skb_shinfo(skb)->nr_frags;
2203        if (skb_tail_pointer(skb) != skb_transport_header(skb))
2204                cnt++;
2205        return flits + sgl_len(cnt);
2206}
2207
2208/**
2209 *      txq_stop_maperr - stop a Tx queue due to I/O MMU exhaustion
2210 *      @adap: the adapter
2211 *      @q: the queue to stop
2212 *
2213 *      Mark a Tx queue stopped due to I/O MMU exhaustion and resulting
2214 *      inability to map packets.  A periodic timer attempts to restart
2215 *      queues so marked.
2216 */
2217static void txq_stop_maperr(struct sge_uld_txq *q)
2218{
2219        q->mapping_err++;
2220        q->q.stops++;
2221        set_bit(q->q.cntxt_id - q->adap->sge.egr_start,
2222                q->adap->sge.txq_maperr);
2223}
2224
2225/**
2226 *      ofldtxq_stop - stop an offload Tx queue that has become full
2227 *      @q: the queue to stop
2228 *      @wr: the Work Request causing the queue to become full
2229 *
2230 *      Stops an offload Tx queue that has become full and modifies the packet
2231 *      being written to request a wakeup.
2232 */
2233static void ofldtxq_stop(struct sge_uld_txq *q, struct fw_wr_hdr *wr)
2234{
2235        wr->lo |= htonl(FW_WR_EQUEQ_F | FW_WR_EQUIQ_F);
2236        q->q.stops++;
2237        q->full = 1;
2238}
2239
2240/**
2241 *      service_ofldq - service/restart a suspended offload queue
2242 *      @q: the offload queue
2243 *
2244 *      Services an offload Tx queue by moving packets from its Pending Send
2245 *      Queue to the Hardware TX ring.  The function starts and ends with the
2246 *      Send Queue locked, but drops the lock while putting the skb at the
2247 *      head of the Send Queue onto the Hardware TX Ring.  Dropping the lock
2248 *      allows more skbs to be added to the Send Queue by other threads.
2249 *      The packet being processed at the head of the Pending Send Queue is
2250 *      left on the queue in case we experience DMA Mapping errors, etc.
2251 *      and need to give up and restart later.
2252 *
2253 *      service_ofldq() can be thought of as a task which opportunistically
2254 *      uses other threads execution contexts.  We use the Offload Queue
2255 *      boolean "service_ofldq_running" to make sure that only one instance
2256 *      is ever running at a time ...
2257 */
2258static void service_ofldq(struct sge_uld_txq *q)
2259{
2260        u64 *pos, *before, *end;
2261        int credits;
2262        struct sk_buff *skb;
2263        struct sge_txq *txq;
2264        unsigned int left;
2265        unsigned int written = 0;
2266        unsigned int flits, ndesc;
2267
2268        /* If another thread is currently in service_ofldq() processing the
2269         * Pending Send Queue then there's nothing to do. Otherwise, flag
2270         * that we're doing the work and continue.  Examining/modifying
2271         * the Offload Queue boolean "service_ofldq_running" must be done
2272         * while holding the Pending Send Queue Lock.
2273         */
2274        if (q->service_ofldq_running)
2275                return;
2276        q->service_ofldq_running = true;
2277
2278        while ((skb = skb_peek(&q->sendq)) != NULL && !q->full) {
2279                /* We drop the lock while we're working with the skb at the
2280                 * head of the Pending Send Queue.  This allows more skbs to
2281                 * be added to the Pending Send Queue while we're working on
2282                 * this one.  We don't need to lock to guard the TX Ring
2283                 * updates because only one thread of execution is ever
2284                 * allowed into service_ofldq() at a time.
2285                 */
2286                spin_unlock(&q->sendq.lock);
2287
2288                cxgb4_reclaim_completed_tx(q->adap, &q->q, false);
2289
2290                flits = skb->priority;                /* previously saved */
2291                ndesc = flits_to_desc(flits);
2292                credits = txq_avail(&q->q) - ndesc;
2293                BUG_ON(credits < 0);
2294                if (unlikely(credits < TXQ_STOP_THRES))
2295                        ofldtxq_stop(q, (struct fw_wr_hdr *)skb->data);
2296
2297                pos = (u64 *)&q->q.desc[q->q.pidx];
2298                if (is_ofld_imm(skb))
2299                        cxgb4_inline_tx_skb(skb, &q->q, pos);
2300                else if (cxgb4_map_skb(q->adap->pdev_dev, skb,
2301                                       (dma_addr_t *)skb->head)) {
2302                        txq_stop_maperr(q);
2303                        spin_lock(&q->sendq.lock);
2304                        break;
2305                } else {
2306                        int last_desc, hdr_len = skb_transport_offset(skb);
2307
2308                        /* The WR headers  may not fit within one descriptor.
2309                         * So we need to deal with wrap-around here.
2310                         */
2311                        before = (u64 *)pos;
2312                        end = (u64 *)pos + flits;
2313                        txq = &q->q;
2314                        pos = (void *)inline_tx_skb_header(skb, &q->q,
2315                                                           (void *)pos,
2316                                                           hdr_len);
2317                        if (before > (u64 *)pos) {
2318                                left = (u8 *)end - (u8 *)txq->stat;
2319                                end = (void *)txq->desc + left;
2320                        }
2321
2322                        /* If current position is already at the end of the
2323                         * ofld queue, reset the current to point to
2324                         * start of the queue and update the end ptr as well.
2325                         */
2326                        if (pos == (u64 *)txq->stat) {
2327                                left = (u8 *)end - (u8 *)txq->stat;
2328                                end = (void *)txq->desc + left;
2329                                pos = (void *)txq->desc;
2330                        }
2331
2332                        cxgb4_write_sgl(skb, &q->q, (void *)pos,
2333                                        end, hdr_len,
2334                                        (dma_addr_t *)skb->head);
2335#ifdef CONFIG_NEED_DMA_MAP_STATE
2336                        skb->dev = q->adap->port[0];
2337                        skb->destructor = deferred_unmap_destructor;
2338#endif
2339                        last_desc = q->q.pidx + ndesc - 1;
2340                        if (last_desc >= q->q.size)
2341                                last_desc -= q->q.size;
2342                        q->q.sdesc[last_desc].skb = skb;
2343                }
2344
2345                txq_advance(&q->q, ndesc);
2346                written += ndesc;
2347                if (unlikely(written > 32)) {
2348                        cxgb4_ring_tx_db(q->adap, &q->q, written);
2349                        written = 0;
2350                }
2351
2352                /* Reacquire the Pending Send Queue Lock so we can unlink the
2353                 * skb we've just successfully transferred to the TX Ring and
2354                 * loop for the next skb which may be at the head of the
2355                 * Pending Send Queue.
2356                 */
2357                spin_lock(&q->sendq.lock);
2358                __skb_unlink(skb, &q->sendq);
2359                if (is_ofld_imm(skb))
2360                        kfree_skb(skb);
2361        }
2362        if (likely(written))
2363                cxgb4_ring_tx_db(q->adap, &q->q, written);
2364
2365        /*Indicate that no thread is processing the Pending Send Queue
2366         * currently.
2367         */
2368        q->service_ofldq_running = false;
2369}
2370
2371/**
2372 *      ofld_xmit - send a packet through an offload queue
2373 *      @q: the Tx offload queue
2374 *      @skb: the packet
2375 *
2376 *      Send an offload packet through an SGE offload queue.
2377 */
2378static int ofld_xmit(struct sge_uld_txq *q, struct sk_buff *skb)
2379{
2380        skb->priority = calc_tx_flits_ofld(skb);       /* save for restart */
2381        spin_lock(&q->sendq.lock);
2382
2383        /* Queue the new skb onto the Offload Queue's Pending Send Queue.  If
2384         * that results in this new skb being the only one on the queue, start
2385         * servicing it.  If there are other skbs already on the list, then
2386         * either the queue is currently being processed or it's been stopped
2387         * for some reason and it'll be restarted at a later time.  Restart
2388         * paths are triggered by events like experiencing a DMA Mapping Error
2389         * or filling the Hardware TX Ring.
2390         */
2391        __skb_queue_tail(&q->sendq, skb);
2392        if (q->sendq.qlen == 1)
2393                service_ofldq(q);
2394
2395        spin_unlock(&q->sendq.lock);
2396        return NET_XMIT_SUCCESS;
2397}
2398
2399/**
2400 *      restart_ofldq - restart a suspended offload queue
2401 *      @data: the offload queue to restart
2402 *
2403 *      Resumes transmission on a suspended Tx offload queue.
2404 */
2405static void restart_ofldq(unsigned long data)
2406{
2407        struct sge_uld_txq *q = (struct sge_uld_txq *)data;
2408
2409        spin_lock(&q->sendq.lock);
2410        q->full = 0;            /* the queue actually is completely empty now */
2411        service_ofldq(q);
2412        spin_unlock(&q->sendq.lock);
2413}
2414
2415/**
2416 *      skb_txq - return the Tx queue an offload packet should use
2417 *      @skb: the packet
2418 *
2419 *      Returns the Tx queue an offload packet should use as indicated by bits
2420 *      1-15 in the packet's queue_mapping.
2421 */
2422static inline unsigned int skb_txq(const struct sk_buff *skb)
2423{
2424        return skb->queue_mapping >> 1;
2425}
2426
2427/**
2428 *      is_ctrl_pkt - return whether an offload packet is a control packet
2429 *      @skb: the packet
2430 *
2431 *      Returns whether an offload packet should use an OFLD or a CTRL
2432 *      Tx queue as indicated by bit 0 in the packet's queue_mapping.
2433 */
2434static inline unsigned int is_ctrl_pkt(const struct sk_buff *skb)
2435{
2436        return skb->queue_mapping & 1;
2437}
2438
2439static inline int uld_send(struct adapter *adap, struct sk_buff *skb,
2440                           unsigned int tx_uld_type)
2441{
2442        struct sge_uld_txq_info *txq_info;
2443        struct sge_uld_txq *txq;
2444        unsigned int idx = skb_txq(skb);
2445
2446        if (unlikely(is_ctrl_pkt(skb))) {
2447                /* Single ctrl queue is a requirement for LE workaround path */
2448                if (adap->tids.nsftids)
2449                        idx = 0;
2450                return ctrl_xmit(&adap->sge.ctrlq[idx], skb);
2451        }
2452
2453        txq_info = adap->sge.uld_txq_info[tx_uld_type];
2454        if (unlikely(!txq_info)) {
2455                WARN_ON(true);
2456                return NET_XMIT_DROP;
2457        }
2458
2459        txq = &txq_info->uldtxq[idx];
2460        return ofld_xmit(txq, skb);
2461}
2462
2463/**
2464 *      t4_ofld_send - send an offload packet
2465 *      @adap: the adapter
2466 *      @skb: the packet
2467 *
2468 *      Sends an offload packet.  We use the packet queue_mapping to select the
2469 *      appropriate Tx queue as follows: bit 0 indicates whether the packet
2470 *      should be sent as regular or control, bits 1-15 select the queue.
2471 */
2472int t4_ofld_send(struct adapter *adap, struct sk_buff *skb)
2473{
2474        int ret;
2475
2476        local_bh_disable();
2477        ret = uld_send(adap, skb, CXGB4_TX_OFLD);
2478        local_bh_enable();
2479        return ret;
2480}
2481
2482/**
2483 *      cxgb4_ofld_send - send an offload packet
2484 *      @dev: the net device
2485 *      @skb: the packet
2486 *
2487 *      Sends an offload packet.  This is an exported version of @t4_ofld_send,
2488 *      intended for ULDs.
2489 */
2490int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb)
2491{
2492        return t4_ofld_send(netdev2adap(dev), skb);
2493}
2494EXPORT_SYMBOL(cxgb4_ofld_send);
2495
2496static void *inline_tx_header(const void *src,
2497                              const struct sge_txq *q,
2498                              void *pos, int length)
2499{
2500        int left = (void *)q->stat - pos;
2501        u64 *p;
2502
2503        if (likely(length <= left)) {
2504                memcpy(pos, src, length);
2505                pos += length;
2506        } else {
2507                memcpy(pos, src, left);
2508                memcpy(q->desc, src + left, length - left);
2509                pos = (void *)q->desc + (length - left);
2510        }
2511        /* 0-pad to multiple of 16 */
2512        p = PTR_ALIGN(pos, 8);
2513        if ((uintptr_t)p & 8) {
2514                *p = 0;
2515                return p + 1;
2516        }
2517        return p;
2518}
2519
2520/**
2521 *      ofld_xmit_direct - copy a WR into offload queue
2522 *      @q: the Tx offload queue
2523 *      @src: location of WR
2524 *      @len: WR length
2525 *
2526 *      Copy an immediate WR into an uncontended SGE offload queue.
2527 */
2528static int ofld_xmit_direct(struct sge_uld_txq *q, const void *src,
2529                            unsigned int len)
2530{
2531        unsigned int ndesc;
2532        int credits;
2533        u64 *pos;
2534
2535        /* Use the lower limit as the cut-off */
2536        if (len > MAX_IMM_OFLD_TX_DATA_WR_LEN) {
2537                WARN_ON(1);
2538                return NET_XMIT_DROP;
2539        }
2540
2541        /* Don't return NET_XMIT_CN here as the current
2542         * implementation doesn't queue the request
2543         * using an skb when the following conditions not met
2544         */
2545        if (!spin_trylock(&q->sendq.lock))
2546                return NET_XMIT_DROP;
2547
2548        if (q->full || !skb_queue_empty(&q->sendq) ||
2549            q->service_ofldq_running) {
2550                spin_unlock(&q->sendq.lock);
2551                return NET_XMIT_DROP;
2552        }
2553        ndesc = flits_to_desc(DIV_ROUND_UP(len, 8));
2554        credits = txq_avail(&q->q) - ndesc;
2555        pos = (u64 *)&q->q.desc[q->q.pidx];
2556
2557        /* ofldtxq_stop modifies WR header in-situ */
2558        inline_tx_header(src, &q->q, pos, len);
2559        if (unlikely(credits < TXQ_STOP_THRES))
2560                ofldtxq_stop(q, (struct fw_wr_hdr *)pos);
2561        txq_advance(&q->q, ndesc);
2562        cxgb4_ring_tx_db(q->adap, &q->q, ndesc);
2563
2564        spin_unlock(&q->sendq.lock);
2565        return NET_XMIT_SUCCESS;
2566}
2567
2568int cxgb4_immdata_send(struct net_device *dev, unsigned int idx,
2569                       const void *src, unsigned int len)
2570{
2571        struct sge_uld_txq_info *txq_info;
2572        struct sge_uld_txq *txq;
2573        struct adapter *adap;
2574        int ret;
2575
2576        adap = netdev2adap(dev);
2577
2578        local_bh_disable();
2579        txq_info = adap->sge.uld_txq_info[CXGB4_TX_OFLD];
2580        if (unlikely(!txq_info)) {
2581                WARN_ON(true);
2582                local_bh_enable();
2583                return NET_XMIT_DROP;
2584        }
2585        txq = &txq_info->uldtxq[idx];
2586
2587        ret = ofld_xmit_direct(txq, src, len);
2588        local_bh_enable();
2589        return net_xmit_eval(ret);
2590}
2591EXPORT_SYMBOL(cxgb4_immdata_send);
2592
2593/**
2594 *      t4_crypto_send - send crypto packet
2595 *      @adap: the adapter
2596 *      @skb: the packet
2597 *
2598 *      Sends crypto packet.  We use the packet queue_mapping to select the
2599 *      appropriate Tx queue as follows: bit 0 indicates whether the packet
2600 *      should be sent as regular or control, bits 1-15 select the queue.
2601 */
2602static int t4_crypto_send(struct adapter *adap, struct sk_buff *skb)
2603{
2604        int ret;
2605
2606        local_bh_disable();
2607        ret = uld_send(adap, skb, CXGB4_TX_CRYPTO);
2608        local_bh_enable();
2609        return ret;
2610}
2611
2612/**
2613 *      cxgb4_crypto_send - send crypto packet
2614 *      @dev: the net device
2615 *      @skb: the packet
2616 *
2617 *      Sends crypto packet.  This is an exported version of @t4_crypto_send,
2618 *      intended for ULDs.
2619 */
2620int cxgb4_crypto_send(struct net_device *dev, struct sk_buff *skb)
2621{
2622        return t4_crypto_send(netdev2adap(dev), skb);
2623}
2624EXPORT_SYMBOL(cxgb4_crypto_send);
2625
2626static inline void copy_frags(struct sk_buff *skb,
2627                              const struct pkt_gl *gl, unsigned int offset)
2628{
2629        int i;
2630
2631        /* usually there's just one frag */
2632        __skb_fill_page_desc(skb, 0, gl->frags[0].page,
2633                             gl->frags[0].offset + offset,
2634                             gl->frags[0].size - offset);
2635        skb_shinfo(skb)->nr_frags = gl->nfrags;
2636        for (i = 1; i < gl->nfrags; i++)
2637                __skb_fill_page_desc(skb, i, gl->frags[i].page,
2638                                     gl->frags[i].offset,
2639                                     gl->frags[i].size);
2640
2641        /* get a reference to the last page, we don't own it */
2642        get_page(gl->frags[gl->nfrags - 1].page);
2643}
2644
2645/**
2646 *      cxgb4_pktgl_to_skb - build an sk_buff from a packet gather list
2647 *      @gl: the gather list
2648 *      @skb_len: size of sk_buff main body if it carries fragments
2649 *      @pull_len: amount of data to move to the sk_buff's main body
2650 *
2651 *      Builds an sk_buff from the given packet gather list.  Returns the
2652 *      sk_buff or %NULL if sk_buff allocation failed.
2653 */
2654struct sk_buff *cxgb4_pktgl_to_skb(const struct pkt_gl *gl,
2655                                   unsigned int skb_len, unsigned int pull_len)
2656{
2657        struct sk_buff *skb;
2658
2659        /*
2660         * Below we rely on RX_COPY_THRES being less than the smallest Rx buffer
2661         * size, which is expected since buffers are at least PAGE_SIZEd.
2662         * In this case packets up to RX_COPY_THRES have only one fragment.
2663         */
2664        if (gl->tot_len <= RX_COPY_THRES) {
2665                skb = dev_alloc_skb(gl->tot_len);
2666                if (unlikely(!skb))
2667                        goto out;
2668                __skb_put(skb, gl->tot_len);
2669                skb_copy_to_linear_data(skb, gl->va, gl->tot_len);
2670        } else {
2671                skb = dev_alloc_skb(skb_len);
2672                if (unlikely(!skb))
2673                        goto out;
2674                __skb_put(skb, pull_len);
2675                skb_copy_to_linear_data(skb, gl->va, pull_len);
2676
2677                copy_frags(skb, gl, pull_len);
2678                skb->len = gl->tot_len;
2679                skb->data_len = skb->len - pull_len;
2680                skb->truesize += skb->data_len;
2681        }
2682out:    return skb;
2683}
2684EXPORT_SYMBOL(cxgb4_pktgl_to_skb);
2685
2686/**
2687 *      t4_pktgl_free - free a packet gather list
2688 *      @gl: the gather list
2689 *
2690 *      Releases the pages of a packet gather list.  We do not own the last
2691 *      page on the list and do not free it.
2692 */
2693static void t4_pktgl_free(const struct pkt_gl *gl)
2694{
2695        int n;
2696        const struct page_frag *p;
2697
2698        for (p = gl->frags, n = gl->nfrags - 1; n--; p++)
2699                put_page(p->page);
2700}
2701
2702/*
2703 * Process an MPS trace packet.  Give it an unused protocol number so it won't
2704 * be delivered to anyone and send it to the stack for capture.
2705 */
2706static noinline int handle_trace_pkt(struct adapter *adap,
2707                                     const struct pkt_gl *gl)
2708{
2709        struct sk_buff *skb;
2710
2711        skb = cxgb4_pktgl_to_skb(gl, RX_PULL_LEN, RX_PULL_LEN);
2712        if (unlikely(!skb)) {
2713                t4_pktgl_free(gl);
2714                return 0;
2715        }
2716
2717        if (is_t4(adap->params.chip))
2718                __skb_pull(skb, sizeof(struct cpl_trace_pkt));
2719        else
2720                __skb_pull(skb, sizeof(struct cpl_t5_trace_pkt));
2721
2722        skb_reset_mac_header(skb);
2723        skb->protocol = htons(0xffff);
2724        skb->dev = adap->port[0];
2725        netif_receive_skb(skb);
2726        return 0;
2727}
2728
2729/**
2730 * cxgb4_sgetim_to_hwtstamp - convert sge time stamp to hw time stamp
2731 * @adap: the adapter
2732 * @hwtstamps: time stamp structure to update
2733 * @sgetstamp: 60bit iqe timestamp
2734 *
2735 * Every ingress queue entry has the 60-bit timestamp, convert that timestamp
2736 * which is in Core Clock ticks into ktime_t and assign it
2737 **/
2738static void cxgb4_sgetim_to_hwtstamp(struct adapter *adap,
2739                                     struct skb_shared_hwtstamps *hwtstamps,
2740                                     u64 sgetstamp)
2741{
2742        u64 ns;
2743        u64 tmp = (sgetstamp * 1000 * 1000 + adap->params.vpd.cclk / 2);
2744
2745        ns = div_u64(tmp, adap->params.vpd.cclk);
2746
2747        memset(hwtstamps, 0, sizeof(*hwtstamps));
2748        hwtstamps->hwtstamp = ns_to_ktime(ns);
2749}
2750
2751static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
2752                   const struct cpl_rx_pkt *pkt, unsigned long tnl_hdr_len)
2753{
2754        struct adapter *adapter = rxq->rspq.adap;
2755        struct sge *s = &adapter->sge;
2756        struct port_info *pi;
2757        int ret;
2758        struct sk_buff *skb;
2759
2760        skb = napi_get_frags(&rxq->rspq.napi);
2761        if (unlikely(!skb)) {
2762                t4_pktgl_free(gl);
2763                rxq->stats.rx_drops++;
2764                return;
2765        }
2766
2767        copy_frags(skb, gl, s->pktshift);
2768        if (tnl_hdr_len)
2769                skb->csum_level = 1;
2770        skb->len = gl->tot_len - s->pktshift;
2771        skb->data_len = skb->len;
2772        skb->truesize += skb->data_len;
2773        skb->ip_summed = CHECKSUM_UNNECESSARY;
2774        skb_record_rx_queue(skb, rxq->rspq.idx);
2775        pi = netdev_priv(skb->dev);
2776        if (pi->rxtstamp)
2777                cxgb4_sgetim_to_hwtstamp(adapter, skb_hwtstamps(skb),
2778                                         gl->sgetstamp);
2779        if (rxq->rspq.netdev->features & NETIF_F_RXHASH)
2780                skb_set_hash(skb, (__force u32)pkt->rsshdr.hash_val,
2781                             PKT_HASH_TYPE_L3);
2782
2783        if (unlikely(pkt->vlan_ex)) {
2784                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(pkt->vlan));
2785                rxq->stats.vlan_ex++;
2786        }
2787        ret = napi_gro_frags(&rxq->rspq.napi);
2788        if (ret == GRO_HELD)
2789                rxq->stats.lro_pkts++;
2790        else if (ret == GRO_MERGED || ret == GRO_MERGED_FREE)
2791                rxq->stats.lro_merged++;
2792        rxq->stats.pkts++;
2793        rxq->stats.rx_cso++;
2794}
2795
2796enum {
2797        RX_NON_PTP_PKT = 0,
2798        RX_PTP_PKT_SUC = 1,
2799        RX_PTP_PKT_ERR = 2
2800};
2801
2802/**
2803 *     t4_systim_to_hwstamp - read hardware time stamp
2804 *     @adap: the adapter
2805 *     @skb: the packet
2806 *
2807 *     Read Time Stamp from MPS packet and insert in skb which
2808 *     is forwarded to PTP application
2809 */
2810static noinline int t4_systim_to_hwstamp(struct adapter *adapter,
2811                                         struct sk_buff *skb)
2812{
2813        struct skb_shared_hwtstamps *hwtstamps;
2814        struct cpl_rx_mps_pkt *cpl = NULL;
2815        unsigned char *data;
2816        int offset;
2817
2818        cpl = (struct cpl_rx_mps_pkt *)skb->data;
2819        if (!(CPL_RX_MPS_PKT_TYPE_G(ntohl(cpl->op_to_r1_hi)) &
2820             X_CPL_RX_MPS_PKT_TYPE_PTP))
2821                return RX_PTP_PKT_ERR;
2822
2823        data = skb->data + sizeof(*cpl);
2824        skb_pull(skb, 2 * sizeof(u64) + sizeof(struct cpl_rx_mps_pkt));
2825        offset = ETH_HLEN + IPV4_HLEN(skb->data) + UDP_HLEN;
2826        if (skb->len < offset + OFF_PTP_SEQUENCE_ID + sizeof(short))
2827                return RX_PTP_PKT_ERR;
2828
2829        hwtstamps = skb_hwtstamps(skb);
2830        memset(hwtstamps, 0, sizeof(*hwtstamps));
2831        hwtstamps->hwtstamp = ns_to_ktime(be64_to_cpu(*((u64 *)data)));
2832
2833        return RX_PTP_PKT_SUC;
2834}
2835
2836/**
2837 *     t4_rx_hststamp - Recv PTP Event Message
2838 *     @adap: the adapter
2839 *     @rsp: the response queue descriptor holding the RX_PKT message
2840 *     @skb: the packet
2841 *
2842 *     PTP enabled and MPS packet, read HW timestamp
2843 */
2844static int t4_rx_hststamp(struct adapter *adapter, const __be64 *rsp,
2845                          struct sge_eth_rxq *rxq, struct sk_buff *skb)
2846{
2847        int ret;
2848
2849        if (unlikely((*(u8 *)rsp == CPL_RX_MPS_PKT) &&
2850                     !is_t4(adapter->params.chip))) {
2851                ret = t4_systim_to_hwstamp(adapter, skb);
2852                if (ret == RX_PTP_PKT_ERR) {
2853                        kfree_skb(skb);
2854                        rxq->stats.rx_drops++;
2855                }
2856                return ret;
2857        }
2858        return RX_NON_PTP_PKT;
2859}
2860
2861/**
2862 *      t4_tx_hststamp - Loopback PTP Transmit Event Message
2863 *      @adap: the adapter
2864 *      @skb: the packet
2865 *      @dev: the ingress net device
2866 *
2867 *      Read hardware timestamp for the loopback PTP Tx event message
2868 */
2869static int t4_tx_hststamp(struct adapter *adapter, struct sk_buff *skb,
2870                          struct net_device *dev)
2871{
2872        struct port_info *pi = netdev_priv(dev);
2873
2874        if (!is_t4(adapter->params.chip) && adapter->ptp_tx_skb) {
2875                cxgb4_ptp_read_hwstamp(adapter, pi);
2876                kfree_skb(skb);
2877                return 0;
2878        }
2879        return 1;
2880}
2881
2882/**
2883 *      t4_tx_completion_handler - handle CPL_SGE_EGR_UPDATE messages
2884 *      @rspq: Ethernet RX Response Queue associated with Ethernet TX Queue
2885 *      @rsp: Response Entry pointer into Response Queue
2886 *      @gl: Gather List pointer
2887 *
2888 *      For adapters which support the SGE Doorbell Queue Timer facility,
2889 *      we configure the Ethernet TX Queues to send CIDX Updates to the
2890 *      Associated Ethernet RX Response Queue with CPL_SGE_EGR_UPDATE
2891 *      messages.  This adds a small load to PCIe Link RX bandwidth and,
2892 *      potentially, higher CPU Interrupt load, but allows us to respond
2893 *      much more quickly to the CIDX Updates.  This is important for
2894 *      Upper Layer Software which isn't willing to have a large amount
2895 *      of TX Data outstanding before receiving DMA Completions.
2896 */
2897static void t4_tx_completion_handler(struct sge_rspq *rspq,
2898                                     const __be64 *rsp,
2899                                     const struct pkt_gl *gl)
2900{
2901        u8 opcode = ((const struct rss_header *)rsp)->opcode;
2902        struct port_info *pi = netdev_priv(rspq->netdev);
2903        struct adapter *adapter = rspq->adap;
2904        struct sge *s = &adapter->sge;
2905        struct sge_eth_txq *txq;
2906
2907        /* skip RSS header */
2908        rsp++;
2909
2910        /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
2911         */
2912        if (unlikely(opcode == CPL_FW4_MSG &&
2913                     ((const struct cpl_fw4_msg *)rsp)->type ==
2914                                                        FW_TYPE_RSSCPL)) {
2915                rsp++;
2916                opcode = ((const struct rss_header *)rsp)->opcode;
2917                rsp++;
2918        }
2919
2920        if (unlikely(opcode != CPL_SGE_EGR_UPDATE)) {
2921                pr_info("%s: unexpected FW4/CPL %#x on Rx queue\n",
2922                        __func__, opcode);
2923                return;
2924        }
2925
2926        txq = &s->ethtxq[pi->first_qset + rspq->idx];
2927
2928        /* We've got the Hardware Consumer Index Update in the Egress Update
2929         * message.  If we're using the SGE Doorbell Queue Timer mechanism,
2930         * these Egress Update messages will be our sole CIDX Updates we get
2931         * since we don't want to chew up PCIe bandwidth for both Ingress
2932         * Messages and Status Page writes.  However, The code which manages
2933         * reclaiming successfully DMA'ed TX Work Requests uses the CIDX value
2934         * stored in the Status Page at the end of the TX Queue.  It's easiest
2935         * to simply copy the CIDX Update value from the Egress Update message
2936         * to the Status Page.  Also note that no Endian issues need to be
2937         * considered here since both are Big Endian and we're just copying
2938         * bytes consistently ...
2939         */
2940        if (txq->dbqt) {
2941                struct cpl_sge_egr_update *egr;
2942
2943                egr = (struct cpl_sge_egr_update *)rsp;
2944                WRITE_ONCE(txq->q.stat->cidx, egr->cidx);
2945        }
2946
2947        t4_sge_eth_txq_egress_update(adapter, txq, -1);
2948}
2949
2950/**
2951 *      t4_ethrx_handler - process an ingress ethernet packet
2952 *      @q: the response queue that received the packet
2953 *      @rsp: the response queue descriptor holding the RX_PKT message
2954 *      @si: the gather list of packet fragments
2955 *
2956 *      Process an ingress ethernet packet and deliver it to the stack.
2957 */
2958int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
2959                     const struct pkt_gl *si)
2960{
2961        bool csum_ok;
2962        struct sk_buff *skb;
2963        const struct cpl_rx_pkt *pkt;
2964        struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq);
2965        struct adapter *adapter = q->adap;
2966        struct sge *s = &q->adap->sge;
2967        int cpl_trace_pkt = is_t4(q->adap->params.chip) ?
2968                            CPL_TRACE_PKT : CPL_TRACE_PKT_T5;
2969        u16 err_vec, tnl_hdr_len = 0;
2970        struct port_info *pi;
2971        int ret = 0;
2972
2973        /* If we're looking at TX Queue CIDX Update, handle that separately
2974         * and return.
2975         */
2976        if (unlikely((*(u8 *)rsp == CPL_FW4_MSG) ||
2977                     (*(u8 *)rsp == CPL_SGE_EGR_UPDATE))) {
2978                t4_tx_completion_handler(q, rsp, si);
2979                return 0;
2980        }
2981
2982        if (unlikely(*(u8 *)rsp == cpl_trace_pkt))
2983                return handle_trace_pkt(q->adap, si);
2984
2985        pkt = (const struct cpl_rx_pkt *)rsp;
2986        /* Compressed error vector is enabled for T6 only */
2987        if (q->adap->params.tp.rx_pkt_encap) {
2988                err_vec = T6_COMPR_RXERR_VEC_G(be16_to_cpu(pkt->err_vec));
2989                tnl_hdr_len = T6_RX_TNLHDR_LEN_G(ntohs(pkt->err_vec));
2990        } else {
2991                err_vec = be16_to_cpu(pkt->err_vec);
2992        }
2993
2994        csum_ok = pkt->csum_calc && !err_vec &&
2995                  (q->netdev->features & NETIF_F_RXCSUM);
2996
2997        if (err_vec)
2998                rxq->stats.bad_rx_pkts++;
2999
3000        if (((pkt->l2info & htonl(RXF_TCP_F)) ||
3001             tnl_hdr_len) &&
3002            (q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) {
3003                do_gro(rxq, si, pkt, tnl_hdr_len);
3004                return 0;
3005        }
3006
3007        skb = cxgb4_pktgl_to_skb(si, RX_PKT_SKB_LEN, RX_PULL_LEN);
3008        if (unlikely(!skb)) {
3009                t4_pktgl_free(si);
3010                rxq->stats.rx_drops++;
3011                return 0;
3012        }
3013        pi = netdev_priv(q->netdev);
3014
3015        /* Handle PTP Event Rx packet */
3016        if (unlikely(pi->ptp_enable)) {
3017                ret = t4_rx_hststamp(adapter, rsp, rxq, skb);
3018                if (ret == RX_PTP_PKT_ERR)
3019                        return 0;
3020        }
3021        if (likely(!ret))
3022                __skb_pull(skb, s->pktshift); /* remove ethernet header pad */
3023
3024        /* Handle the PTP Event Tx Loopback packet */
3025        if (unlikely(pi->ptp_enable && !ret &&
3026                     (pkt->l2info & htonl(RXF_UDP_F)) &&
3027                     cxgb4_ptp_is_ptp_rx(skb))) {
3028                if (!t4_tx_hststamp(adapter, skb, q->netdev))
3029                        return 0;
3030        }
3031
3032        skb->protocol = eth_type_trans(skb, q->netdev);
3033        skb_record_rx_queue(skb, q->idx);
3034        if (skb->dev->features & NETIF_F_RXHASH)
3035                skb_set_hash(skb, (__force u32)pkt->rsshdr.hash_val,
3036                             PKT_HASH_TYPE_L3);
3037
3038        rxq->stats.pkts++;
3039
3040        if (pi->rxtstamp)
3041                cxgb4_sgetim_to_hwtstamp(q->adap, skb_hwtstamps(skb),
3042                                         si->sgetstamp);
3043        if (csum_ok && (pkt->l2info & htonl(RXF_UDP_F | RXF_TCP_F))) {
3044                if (!pkt->ip_frag) {
3045                        skb->ip_summed = CHECKSUM_UNNECESSARY;
3046                        rxq->stats.rx_cso++;
3047                } else if (pkt->l2info & htonl(RXF_IP_F)) {
3048                        __sum16 c = (__force __sum16)pkt->csum;
3049                        skb->csum = csum_unfold(c);
3050
3051                        if (tnl_hdr_len) {
3052                                skb->ip_summed = CHECKSUM_UNNECESSARY;
3053                                skb->csum_level = 1;
3054                        } else {
3055                                skb->ip_summed = CHECKSUM_COMPLETE;
3056                        }
3057                        rxq->stats.rx_cso++;
3058                }
3059        } else {
3060                skb_checksum_none_assert(skb);
3061#ifdef CONFIG_CHELSIO_T4_FCOE
3062#define CPL_RX_PKT_FLAGS (RXF_PSH_F | RXF_SYN_F | RXF_UDP_F | \
3063                          RXF_TCP_F | RXF_IP_F | RXF_IP6_F | RXF_LRO_F)
3064
3065                if (!(pkt->l2info & cpu_to_be32(CPL_RX_PKT_FLAGS))) {
3066                        if ((pkt->l2info & cpu_to_be32(RXF_FCOE_F)) &&
3067                            (pi->fcoe.flags & CXGB_FCOE_ENABLED)) {
3068                                if (q->adap->params.tp.rx_pkt_encap)
3069                                        csum_ok = err_vec &
3070                                                  T6_COMPR_RXERR_SUM_F;
3071                                else
3072                                        csum_ok = err_vec & RXERR_CSUM_F;
3073                                if (!csum_ok)
3074                                        skb->ip_summed = CHECKSUM_UNNECESSARY;
3075                        }
3076                }
3077
3078#undef CPL_RX_PKT_FLAGS
3079#endif /* CONFIG_CHELSIO_T4_FCOE */
3080        }
3081
3082        if (unlikely(pkt->vlan_ex)) {
3083                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(pkt->vlan));
3084                rxq->stats.vlan_ex++;
3085        }
3086        skb_mark_napi_id(skb, &q->napi);
3087        netif_receive_skb(skb);
3088        return 0;
3089}
3090
3091/**
3092 *      restore_rx_bufs - put back a packet's Rx buffers
3093 *      @si: the packet gather list
3094 *      @q: the SGE free list
3095 *      @frags: number of FL buffers to restore
3096 *
3097 *      Puts back on an FL the Rx buffers associated with @si.  The buffers
3098 *      have already been unmapped and are left unmapped, we mark them so to
3099 *      prevent further unmapping attempts.
3100 *
3101 *      This function undoes a series of @unmap_rx_buf calls when we find out
3102 *      that the current packet can't be processed right away afterall and we
3103 *      need to come back to it later.  This is a very rare event and there's
3104 *      no effort to make this particularly efficient.
3105 */
3106static void restore_rx_bufs(const struct pkt_gl *si, struct sge_fl *q,
3107                            int frags)
3108{
3109        struct rx_sw_desc *d;
3110
3111        while (frags--) {
3112                if (q->cidx == 0)
3113                        q->cidx = q->size - 1;
3114                else
3115                        q->cidx--;
3116                d = &q->sdesc[q->cidx];
3117                d->page = si->frags[frags].page;
3118                d->dma_addr |= RX_UNMAPPED_BUF;
3119                q->avail++;
3120        }
3121}
3122
3123/**
3124 *      is_new_response - check if a response is newly written
3125 *      @r: the response descriptor
3126 *      @q: the response queue
3127 *
3128 *      Returns true if a response descriptor contains a yet unprocessed
3129 *      response.
3130 */
3131static inline bool is_new_response(const struct rsp_ctrl *r,
3132                                   const struct sge_rspq *q)
3133{
3134        return (r->type_gen >> RSPD_GEN_S) == q->gen;
3135}
3136
3137/**
3138 *      rspq_next - advance to the next entry in a response queue
3139 *      @q: the queue
3140 *
3141 *      Updates the state of a response queue to advance it to the next entry.
3142 */
3143static inline void rspq_next(struct sge_rspq *q)
3144{
3145        q->cur_desc = (void *)q->cur_desc + q->iqe_len;
3146        if (unlikely(++q->cidx == q->size)) {
3147                q->cidx = 0;
3148                q->gen ^= 1;
3149                q->cur_desc = q->desc;
3150        }
3151}
3152
3153/**
3154 *      process_responses - process responses from an SGE response queue
3155 *      @q: the ingress queue to process
3156 *      @budget: how many responses can be processed in this round
3157 *
3158 *      Process responses from an SGE response queue up to the supplied budget.
3159 *      Responses include received packets as well as control messages from FW
3160 *      or HW.
3161 *
3162 *      Additionally choose the interrupt holdoff time for the next interrupt
3163 *      on this queue.  If the system is under memory shortage use a fairly
3164 *      long delay to help recovery.
3165 */
3166static int process_responses(struct sge_rspq *q, int budget)
3167{
3168        int ret, rsp_type;
3169        int budget_left = budget;
3170        const struct rsp_ctrl *rc;
3171        struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq);
3172        struct adapter *adapter = q->adap;
3173        struct sge *s = &adapter->sge;
3174
3175        while (likely(budget_left)) {
3176                rc = (void *)q->cur_desc + (q->iqe_len - sizeof(*rc));
3177                if (!is_new_response(rc, q)) {
3178                        if (q->flush_handler)
3179                                q->flush_handler(q);
3180                        break;
3181                }
3182
3183                dma_rmb();
3184                rsp_type = RSPD_TYPE_G(rc->type_gen);
3185                if (likely(rsp_type == RSPD_TYPE_FLBUF_X)) {
3186                        struct page_frag *fp;
3187                        struct pkt_gl si;
3188                        const struct rx_sw_desc *rsd;
3189                        u32 len = ntohl(rc->pldbuflen_qid), bufsz, frags;
3190
3191                        if (len & RSPD_NEWBUF_F) {
3192                                if (likely(q->offset > 0)) {
3193                                        free_rx_bufs(q->adap, &rxq->fl, 1);
3194                                        q->offset = 0;
3195                                }
3196                                len = RSPD_LEN_G(len);
3197                        }
3198                        si.tot_len = len;
3199
3200                        /* gather packet fragments */
3201                        for (frags = 0, fp = si.frags; ; frags++, fp++) {
3202                                rsd = &rxq->fl.sdesc[rxq->fl.cidx];
3203                                bufsz = get_buf_size(adapter, rsd);
3204                                fp->page = rsd->page;
3205                                fp->offset = q->offset;
3206                                fp->size = min(bufsz, len);
3207                                len -= fp->size;
3208                                if (!len)
3209                                        break;
3210                                unmap_rx_buf(q->adap, &rxq->fl);
3211                        }
3212
3213                        si.sgetstamp = SGE_TIMESTAMP_G(
3214                                        be64_to_cpu(rc->last_flit));
3215                        /*
3216                         * Last buffer remains mapped so explicitly make it
3217                         * coherent for CPU access.
3218                         */
3219                        dma_sync_single_for_cpu(q->adap->pdev_dev,
3220                                                get_buf_addr(rsd),
3221                                                fp->size, DMA_FROM_DEVICE);
3222
3223                        si.va = page_address(si.frags[0].page) +
3224                                si.frags[0].offset;
3225                        prefetch(si.va);
3226
3227                        si.nfrags = frags + 1;
3228                        ret = q->handler(q, q->cur_desc, &si);
3229                        if (likely(ret == 0))
3230                                q->offset += ALIGN(fp->size, s->fl_align);
3231                        else
3232                                restore_rx_bufs(&si, &rxq->fl, frags);
3233                } else if (likely(rsp_type == RSPD_TYPE_CPL_X)) {
3234                        ret = q->handler(q, q->cur_desc, NULL);
3235                } else {
3236                        ret = q->handler(q, (const __be64 *)rc, CXGB4_MSG_AN);
3237                }
3238
3239                if (unlikely(ret)) {
3240                        /* couldn't process descriptor, back off for recovery */
3241                        q->next_intr_params = QINTR_TIMER_IDX_V(NOMEM_TMR_IDX);
3242                        break;
3243                }
3244
3245                rspq_next(q);
3246                budget_left--;
3247        }
3248
3249        if (q->offset >= 0 && fl_cap(&rxq->fl) - rxq->fl.avail >= 16)
3250                __refill_fl(q->adap, &rxq->fl);
3251        return budget - budget_left;
3252}
3253
3254/**
3255 *      napi_rx_handler - the NAPI handler for Rx processing
3256 *      @napi: the napi instance
3257 *      @budget: how many packets we can process in this round
3258 *
3259 *      Handler for new data events when using NAPI.  This does not need any
3260 *      locking or protection from interrupts as data interrupts are off at
3261 *      this point and other adapter interrupts do not interfere (the latter
3262 *      in not a concern at all with MSI-X as non-data interrupts then have
3263 *      a separate handler).
3264 */
3265static int napi_rx_handler(struct napi_struct *napi, int budget)
3266{
3267        unsigned int params;
3268        struct sge_rspq *q = container_of(napi, struct sge_rspq, napi);
3269        int work_done;
3270        u32 val;
3271
3272        work_done = process_responses(q, budget);
3273        if (likely(work_done < budget)) {
3274                int timer_index;
3275
3276                napi_complete_done(napi, work_done);
3277                timer_index = QINTR_TIMER_IDX_G(q->next_intr_params);
3278
3279                if (q->adaptive_rx) {
3280                        if (work_done > max(timer_pkt_quota[timer_index],
3281                                            MIN_NAPI_WORK))
3282                                timer_index = (timer_index + 1);
3283                        else
3284                                timer_index = timer_index - 1;
3285
3286                        timer_index = clamp(timer_index, 0, SGE_TIMERREGS - 1);
3287                        q->next_intr_params =
3288                                        QINTR_TIMER_IDX_V(timer_index) |
3289                                        QINTR_CNT_EN_V(0);
3290                        params = q->next_intr_params;
3291                } else {
3292                        params = q->next_intr_params;
3293                        q->next_intr_params = q->intr_params;
3294                }
3295        } else
3296                params = QINTR_TIMER_IDX_V(7);
3297
3298        val = CIDXINC_V(work_done) | SEINTARM_V(params);
3299
3300        /* If we don't have access to the new User GTS (T5+), use the old
3301         * doorbell mechanism; otherwise use the new BAR2 mechanism.
3302         */
3303        if (unlikely(q->bar2_addr == NULL)) {
3304                t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS_A),
3305                             val | INGRESSQID_V((u32)q->cntxt_id));
3306        } else {
3307                writel(val | INGRESSQID_V(q->bar2_qid),
3308                       q->bar2_addr + SGE_UDB_GTS);
3309                wmb();
3310        }
3311        return work_done;
3312}
3313
3314/*
3315 * The MSI-X interrupt handler for an SGE response queue.
3316 */
3317irqreturn_t t4_sge_intr_msix(int irq, void *cookie)
3318{
3319        struct sge_rspq *q = cookie;
3320
3321        napi_schedule(&q->napi);
3322        return IRQ_HANDLED;
3323}
3324
3325/*
3326 * Process the indirect interrupt entries in the interrupt queue and kick off
3327 * NAPI for each queue that has generated an entry.
3328 */
3329static unsigned int process_intrq(struct adapter *adap)
3330{
3331        unsigned int credits;
3332        const struct rsp_ctrl *rc;
3333        struct sge_rspq *q = &adap->sge.intrq;
3334        u32 val;
3335
3336        spin_lock(&adap->sge.intrq_lock);
3337        for (credits = 0; ; credits++) {
3338                rc = (void *)q->cur_desc + (q->iqe_len - sizeof(*rc));
3339                if (!is_new_response(rc, q))
3340                        break;
3341
3342                dma_rmb();
3343                if (RSPD_TYPE_G(rc->type_gen) == RSPD_TYPE_INTR_X) {
3344                        unsigned int qid = ntohl(rc->pldbuflen_qid);
3345
3346                        qid -= adap->sge.ingr_start;
3347                        napi_schedule(&adap->sge.ingr_map[qid]->napi);
3348                }
3349
3350                rspq_next(q);
3351        }
3352
3353        val =  CIDXINC_V(credits) | SEINTARM_V(q->intr_params);
3354
3355        /* If we don't have access to the new User GTS (T5+), use the old
3356         * doorbell mechanism; otherwise use the new BAR2 mechanism.
3357         */
3358        if (unlikely(q->bar2_addr == NULL)) {
3359                t4_write_reg(adap, MYPF_REG(SGE_PF_GTS_A),
3360                             val | INGRESSQID_V(q->cntxt_id));
3361        } else {
3362                writel(val | INGRESSQID_V(q->bar2_qid),
3363                       q->bar2_addr + SGE_UDB_GTS);
3364                wmb();
3365        }
3366        spin_unlock(&adap->sge.intrq_lock);
3367        return credits;
3368}
3369
3370/*
3371 * The MSI interrupt handler, which handles data events from SGE response queues
3372 * as well as error and other async events as they all use the same MSI vector.
3373 */
3374static irqreturn_t t4_intr_msi(int irq, void *cookie)
3375{
3376        struct adapter *adap = cookie;
3377
3378        if (adap->flags & CXGB4_MASTER_PF)
3379                t4_slow_intr_handler(adap);
3380        process_intrq(adap);
3381        return IRQ_HANDLED;
3382}
3383
3384/*
3385 * Interrupt handler for legacy INTx interrupts.
3386 * Handles data events from SGE response queues as well as error and other
3387 * async events as they all use the same interrupt line.
3388 */
3389static irqreturn_t t4_intr_intx(int irq, void *cookie)
3390{
3391        struct adapter *adap = cookie;
3392
3393        t4_write_reg(adap, MYPF_REG(PCIE_PF_CLI_A), 0);
3394        if (((adap->flags & CXGB4_MASTER_PF) && t4_slow_intr_handler(adap)) |
3395            process_intrq(adap))
3396                return IRQ_HANDLED;
3397        return IRQ_NONE;             /* probably shared interrupt */
3398}
3399
3400/**
3401 *      t4_intr_handler - select the top-level interrupt handler
3402 *      @adap: the adapter
3403 *
3404 *      Selects the top-level interrupt handler based on the type of interrupts
3405 *      (MSI-X, MSI, or INTx).
3406 */
3407irq_handler_t t4_intr_handler(struct adapter *adap)
3408{
3409        if (adap->flags & CXGB4_USING_MSIX)
3410                return t4_sge_intr_msix;
3411        if (adap->flags & CXGB4_USING_MSI)
3412                return t4_intr_msi;
3413        return t4_intr_intx;
3414}
3415
3416static void sge_rx_timer_cb(struct timer_list *t)
3417{
3418        unsigned long m;
3419        unsigned int i;
3420        struct adapter *adap = from_timer(adap, t, sge.rx_timer);
3421        struct sge *s = &adap->sge;
3422
3423        for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
3424                for (m = s->starving_fl[i]; m; m &= m - 1) {
3425                        struct sge_eth_rxq *rxq;
3426                        unsigned int id = __ffs(m) + i * BITS_PER_LONG;
3427                        struct sge_fl *fl = s->egr_map[id];
3428
3429                        clear_bit(id, s->starving_fl);
3430                        smp_mb__after_atomic();
3431
3432                        if (fl_starving(adap, fl)) {
3433                                rxq = container_of(fl, struct sge_eth_rxq, fl);
3434                                if (napi_reschedule(&rxq->rspq.napi))
3435                                        fl->starving++;
3436                                else
3437                                        set_bit(id, s->starving_fl);
3438                        }
3439                }
3440        /* The remainder of the SGE RX Timer Callback routine is dedicated to
3441         * global Master PF activities like checking for chip ingress stalls,
3442         * etc.
3443         */
3444        if (!(adap->flags & CXGB4_MASTER_PF))
3445                goto done;
3446
3447        t4_idma_monitor(adap, &s->idma_monitor, HZ, RX_QCHECK_PERIOD);
3448
3449done:
3450        mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD);
3451}
3452
3453static void sge_tx_timer_cb(struct timer_list *t)
3454{
3455        struct adapter *adap = from_timer(adap, t, sge.tx_timer);
3456        struct sge *s = &adap->sge;
3457        unsigned long m, period;
3458        unsigned int i, budget;
3459
3460        for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
3461                for (m = s->txq_maperr[i]; m; m &= m - 1) {
3462                        unsigned long id = __ffs(m) + i * BITS_PER_LONG;
3463                        struct sge_uld_txq *txq = s->egr_map[id];
3464
3465                        clear_bit(id, s->txq_maperr);
3466                        tasklet_schedule(&txq->qresume_tsk);
3467                }
3468
3469        if (!is_t4(adap->params.chip)) {
3470                struct sge_eth_txq *q = &s->ptptxq;
3471                int avail;
3472
3473                spin_lock(&adap->ptp_lock);
3474                avail = reclaimable(&q->q);
3475
3476                if (avail) {
3477                        free_tx_desc(adap, &q->q, avail, false);
3478                        q->q.in_use -= avail;
3479                }
3480                spin_unlock(&adap->ptp_lock);
3481        }
3482
3483        budget = MAX_TIMER_TX_RECLAIM;
3484        i = s->ethtxq_rover;
3485        do {
3486                budget -= t4_sge_eth_txq_egress_update(adap, &s->ethtxq[i],
3487                                                       budget);
3488                if (!budget)
3489                        break;
3490
3491                if (++i >= s->ethqsets)
3492                        i = 0;
3493        } while (i != s->ethtxq_rover);
3494        s->ethtxq_rover = i;
3495
3496        if (budget == 0) {
3497                /* If we found too many reclaimable packets schedule a timer
3498                 * in the near future to continue where we left off.
3499                 */
3500                period = 2;
3501        } else {
3502                /* We reclaimed all reclaimable TX Descriptors, so reschedule
3503                 * at the normal period.
3504                 */
3505                period = TX_QCHECK_PERIOD;
3506        }
3507
3508        mod_timer(&s->tx_timer, jiffies + period);
3509}
3510
3511/**
3512 *      bar2_address - return the BAR2 address for an SGE Queue's Registers
3513 *      @adapter: the adapter
3514 *      @qid: the SGE Queue ID
3515 *      @qtype: the SGE Queue Type (Egress or Ingress)
3516 *      @pbar2_qid: BAR2 Queue ID or 0 for Queue ID inferred SGE Queues
3517 *
3518 *      Returns the BAR2 address for the SGE Queue Registers associated with
3519 *      @qid.  If BAR2 SGE Registers aren't available, returns NULL.  Also
3520 *      returns the BAR2 Queue ID to be used with writes to the BAR2 SGE
3521 *      Queue Registers.  If the BAR2 Queue ID is 0, then "Inferred Queue ID"
3522 *      Registers are supported (e.g. the Write Combining Doorbell Buffer).
3523 */
3524static void __iomem *bar2_address(struct adapter *adapter,
3525                                  unsigned int qid,
3526                                  enum t4_bar2_qtype qtype,
3527                                  unsigned int *pbar2_qid)
3528{
3529        u64 bar2_qoffset;
3530        int ret;
3531
3532        ret = t4_bar2_sge_qregs(adapter, qid, qtype, 0,
3533                                &bar2_qoffset, pbar2_qid);
3534        if (ret)
3535                return NULL;
3536
3537        return adapter->bar2 + bar2_qoffset;
3538}
3539
3540/* @intr_idx: MSI/MSI-X vector if >=0, -(absolute qid + 1) if < 0
3541 * @cong: < 0 -> no congestion feedback, >= 0 -> congestion channel map
3542 */
3543int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
3544                     struct net_device *dev, int intr_idx,
3545                     struct sge_fl *fl, rspq_handler_t hnd,
3546                     rspq_flush_handler_t flush_hnd, int cong)
3547{
3548        int ret, flsz = 0;
3549        struct fw_iq_cmd c;
3550        struct sge *s = &adap->sge;
3551        struct port_info *pi = netdev_priv(dev);
3552        int relaxed = !(adap->flags & CXGB4_ROOT_NO_RELAXED_ORDERING);
3553
3554        /* Size needs to be multiple of 16, including status entry. */
3555        iq->size = roundup(iq->size, 16);
3556
3557        iq->desc = alloc_ring(adap->pdev_dev, iq->size, iq->iqe_len, 0,
3558                              &iq->phys_addr, NULL, 0,
3559                              dev_to_node(adap->pdev_dev));
3560        if (!iq->desc)
3561                return -ENOMEM;
3562
3563        memset(&c, 0, sizeof(c));
3564        c.op_to_vfn = htonl(FW_CMD_OP_V(FW_IQ_CMD) | FW_CMD_REQUEST_F |
3565                            FW_CMD_WRITE_F | FW_CMD_EXEC_F |
3566                            FW_IQ_CMD_PFN_V(adap->pf) | FW_IQ_CMD_VFN_V(0));
3567        c.alloc_to_len16 = htonl(FW_IQ_CMD_ALLOC_F | FW_IQ_CMD_IQSTART_F |
3568                                 FW_LEN16(c));
3569        c.type_to_iqandstindex = htonl(FW_IQ_CMD_TYPE_V(FW_IQ_TYPE_FL_INT_CAP) |
3570                FW_IQ_CMD_IQASYNCH_V(fwevtq) | FW_IQ_CMD_VIID_V(pi->viid) |
3571                FW_IQ_CMD_IQANDST_V(intr_idx < 0) |
3572                FW_IQ_CMD_IQANUD_V(UPDATEDELIVERY_INTERRUPT_X) |
3573                FW_IQ_CMD_IQANDSTINDEX_V(intr_idx >= 0 ? intr_idx :
3574                                                        -intr_idx - 1));
3575        c.iqdroprss_to_iqesize = htons(FW_IQ_CMD_IQPCIECH_V(pi->tx_chan) |
3576                FW_IQ_CMD_IQGTSMODE_F |
3577                FW_IQ_CMD_IQINTCNTTHRESH_V(iq->pktcnt_idx) |
3578                FW_IQ_CMD_IQESIZE_V(ilog2(iq->iqe_len) - 4));
3579        c.iqsize = htons(iq->size);
3580        c.iqaddr = cpu_to_be64(iq->phys_addr);
3581        if (cong >= 0)
3582                c.iqns_to_fl0congen = htonl(FW_IQ_CMD_IQFLINTCONGEN_F |
3583                                FW_IQ_CMD_IQTYPE_V(cong ? FW_IQ_IQTYPE_NIC
3584                                                        :  FW_IQ_IQTYPE_OFLD));
3585
3586        if (fl) {
3587                unsigned int chip_ver =
3588                        CHELSIO_CHIP_VERSION(adap->params.chip);
3589
3590                /* Allocate the ring for the hardware free list (with space
3591                 * for its status page) along with the associated software
3592                 * descriptor ring.  The free list size needs to be a multiple
3593                 * of the Egress Queue Unit and at least 2 Egress Units larger
3594                 * than the SGE's Egress Congrestion Threshold
3595                 * (fl_starve_thres - 1).
3596                 */
3597                if (fl->size < s->fl_starve_thres - 1 + 2 * 8)
3598                        fl->size = s->fl_starve_thres - 1 + 2 * 8;
3599                fl->size = roundup(fl->size, 8);
3600                fl->desc = alloc_ring(adap->pdev_dev, fl->size, sizeof(__be64),
3601                                      sizeof(struct rx_sw_desc), &fl->addr,
3602                                      &fl->sdesc, s->stat_len,
3603                                      dev_to_node(adap->pdev_dev));
3604                if (!fl->desc)
3605                        goto fl_nomem;
3606
3607                flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
3608                c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F |
3609                                             FW_IQ_CMD_FL0FETCHRO_V(relaxed) |
3610                                             FW_IQ_CMD_FL0DATARO_V(relaxed) |
3611                                             FW_IQ_CMD_FL0PADEN_F);
3612                if (cong >= 0)
3613                        c.iqns_to_fl0congen |=
3614                                htonl(FW_IQ_CMD_FL0CNGCHMAP_V(cong) |
3615                                      FW_IQ_CMD_FL0CONGCIF_F |
3616                                      FW_IQ_CMD_FL0CONGEN_F);
3617                /* In T6, for egress queue type FL there is internal overhead
3618                 * of 16B for header going into FLM module.  Hence the maximum
3619                 * allowed burst size is 448 bytes.  For T4/T5, the hardware
3620                 * doesn't coalesce fetch requests if more than 64 bytes of
3621                 * Free List pointers are provided, so we use a 128-byte Fetch
3622                 * Burst Minimum there (T6 implements coalescing so we can use
3623                 * the smaller 64-byte value there).
3624                 */
3625                c.fl0dcaen_to_fl0cidxfthresh =
3626                        htons(FW_IQ_CMD_FL0FBMIN_V(chip_ver <= CHELSIO_T5 ?
3627                                                   FETCHBURSTMIN_128B_X :
3628                                                   FETCHBURSTMIN_64B_T6_X) |
3629                              FW_IQ_CMD_FL0FBMAX_V((chip_ver <= CHELSIO_T5) ?
3630                                                   FETCHBURSTMAX_512B_X :
3631                                                   FETCHBURSTMAX_256B_X));
3632                c.fl0size = htons(flsz);
3633                c.fl0addr = cpu_to_be64(fl->addr);
3634        }
3635
3636        ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c);
3637        if (ret)
3638                goto err;
3639
3640        netif_napi_add(dev, &iq->napi, napi_rx_handler, 64);
3641        iq->cur_desc = iq->desc;
3642        iq->cidx = 0;
3643        iq->gen = 1;
3644        iq->next_intr_params = iq->intr_params;
3645        iq->cntxt_id = ntohs(c.iqid);
3646        iq->abs_id = ntohs(c.physiqid);
3647        iq->bar2_addr = bar2_address(adap,
3648                                     iq->cntxt_id,
3649                                     T4_BAR2_QTYPE_INGRESS,
3650                                     &iq->bar2_qid);
3651        iq->size--;                           /* subtract status entry */
3652        iq->netdev = dev;
3653        iq->handler = hnd;
3654        iq->flush_handler = flush_hnd;
3655
3656        memset(&iq->lro_mgr, 0, sizeof(struct t4_lro_mgr));
3657        skb_queue_head_init(&iq->lro_mgr.lroq);
3658
3659        /* set offset to -1 to distinguish ingress queues without FL */
3660        iq->offset = fl ? 0 : -1;
3661
3662        adap->sge.ingr_map[iq->cntxt_id - adap->sge.ingr_start] = iq;
3663
3664        if (fl) {
3665                fl->cntxt_id = ntohs(c.fl0id);
3666                fl->avail = fl->pend_cred = 0;
3667                fl->pidx = fl->cidx = 0;
3668                fl->alloc_failed = fl->large_alloc_failed = fl->starving = 0;
3669                adap->sge.egr_map[fl->cntxt_id - adap->sge.egr_start] = fl;
3670
3671                /* Note, we must initialize the BAR2 Free List User Doorbell
3672                 * information before refilling the Free List!
3673                 */
3674                fl->bar2_addr = bar2_address(adap,
3675                                             fl->cntxt_id,
3676                                             T4_BAR2_QTYPE_EGRESS,
3677                                             &fl->bar2_qid);
3678                refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL);
3679        }
3680
3681        /* For T5 and later we attempt to set up the Congestion Manager values
3682         * of the new RX Ethernet Queue.  This should really be handled by
3683         * firmware because it's more complex than any host driver wants to
3684         * get involved with and it's different per chip and this is almost
3685         * certainly wrong.  Firmware would be wrong as well, but it would be
3686         * a lot easier to fix in one place ...  For now we do something very
3687         * simple (and hopefully less wrong).
3688         */
3689        if (!is_t4(adap->params.chip) && cong >= 0) {
3690                u32 param, val, ch_map = 0;
3691                int i;
3692                u16 cng_ch_bits_log = adap->params.arch.cng_ch_bits_log;
3693
3694                param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
3695                         FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
3696                         FW_PARAMS_PARAM_YZ_V(iq->cntxt_id));
3697                if (cong == 0) {
3698                        val = CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_QUEUE_X);
3699                } else {
3700                        val =
3701                            CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_CHANNEL_X);
3702                        for (i = 0; i < 4; i++) {
3703                                if (cong & (1 << i))
3704                                        ch_map |= 1 << (i << cng_ch_bits_log);
3705                        }
3706                        val |= CONMCTXT_CNGCHMAP_V(ch_map);
3707                }
3708                ret = t4_set_params(adap, adap->mbox, adap->pf, 0, 1,
3709                                    &param, &val);
3710                if (ret)
3711                        dev_warn(adap->pdev_dev, "Failed to set Congestion"
3712                                 " Manager Context for Ingress Queue %d: %d\n",
3713                                 iq->cntxt_id, -ret);
3714        }
3715
3716        return 0;
3717
3718fl_nomem:
3719        ret = -ENOMEM;
3720err:
3721        if (iq->desc) {
3722                dma_free_coherent(adap->pdev_dev, iq->size * iq->iqe_len,
3723                                  iq->desc, iq->phys_addr);
3724                iq->desc = NULL;
3725        }
3726        if (fl && fl->desc) {
3727                kfree(fl->sdesc);
3728                fl->sdesc = NULL;
3729                dma_free_coherent(adap->pdev_dev, flsz * sizeof(struct tx_desc),
3730                                  fl->desc, fl->addr);
3731                fl->desc = NULL;
3732        }
3733        return ret;
3734}
3735
3736static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id)
3737{
3738        q->cntxt_id = id;
3739        q->bar2_addr = bar2_address(adap,
3740                                    q->cntxt_id,
3741                                    T4_BAR2_QTYPE_EGRESS,
3742                                    &q->bar2_qid);
3743        q->in_use = 0;
3744        q->cidx = q->pidx = 0;
3745        q->stops = q->restarts = 0;
3746        q->stat = (void *)&q->desc[q->size];
3747        spin_lock_init(&q->db_lock);
3748        adap->sge.egr_map[id - adap->sge.egr_start] = q;
3749}
3750
3751/**
3752 *      t4_sge_alloc_eth_txq - allocate an Ethernet TX Queue
3753 *      @adap: the adapter
3754 *      @txq: the SGE Ethernet TX Queue to initialize
3755 *      @dev: the Linux Network Device
3756 *      @netdevq: the corresponding Linux TX Queue
3757 *      @iqid: the Ingress Queue to which to deliver CIDX Update messages
3758 *      @dbqt: whether this TX Queue will use the SGE Doorbell Queue Timers
3759 */
3760int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
3761                         struct net_device *dev, struct netdev_queue *netdevq,
3762                         unsigned int iqid, u8 dbqt)
3763{
3764        unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
3765        struct port_info *pi = netdev_priv(dev);
3766        struct sge *s = &adap->sge;
3767        struct fw_eq_eth_cmd c;
3768        int ret, nentries;
3769
3770        /* Add status entries */
3771        nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
3772
3773        txq->q.desc = alloc_ring(adap->pdev_dev, txq->q.size,
3774                        sizeof(struct tx_desc), sizeof(struct tx_sw_desc),
3775                        &txq->q.phys_addr, &txq->q.sdesc, s->stat_len,
3776                        netdev_queue_numa_node_read(netdevq));
3777        if (!txq->q.desc)
3778                return -ENOMEM;
3779
3780        memset(&c, 0, sizeof(c));
3781        c.op_to_vfn = htonl(FW_CMD_OP_V(FW_EQ_ETH_CMD) | FW_CMD_REQUEST_F |
3782                            FW_CMD_WRITE_F | FW_CMD_EXEC_F |
3783                            FW_EQ_ETH_CMD_PFN_V(adap->pf) |
3784                            FW_EQ_ETH_CMD_VFN_V(0));
3785        c.alloc_to_len16 = htonl(FW_EQ_ETH_CMD_ALLOC_F |
3786                                 FW_EQ_ETH_CMD_EQSTART_F | FW_LEN16(c));
3787
3788        /* For TX Ethernet Queues using the SGE Doorbell Queue Timer
3789         * mechanism, we use Ingress Queue messages for Hardware Consumer
3790         * Index Updates on the TX Queue.  Otherwise we have the Hardware
3791         * write the CIDX Updates into the Status Page at the end of the
3792         * TX Queue.
3793         */
3794        c.autoequiqe_to_viid = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
3795                                     FW_EQ_ETH_CMD_VIID_V(pi->viid));
3796
3797        c.fetchszm_to_iqid =
3798                htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) |
3799                      FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) |
3800                      FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid));
3801
3802        /* Note that the CIDX Flush Threshold should match MAX_TX_RECLAIM. */
3803        c.dcaen_to_eqsize =
3804                htonl(FW_EQ_ETH_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
3805                                            ? FETCHBURSTMIN_64B_X
3806                                            : FETCHBURSTMIN_64B_T6_X) |
3807                      FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
3808                      FW_EQ_ETH_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
3809                      FW_EQ_ETH_CMD_EQSIZE_V(nentries));
3810
3811        c.eqaddr = cpu_to_be64(txq->q.phys_addr);
3812
3813        /* If we're using the SGE Doorbell Queue Timer mechanism, pass in the
3814         * currently configured Timer Index.  THis can be changed later via an
3815         * ethtool -C tx-usecs {Timer Val} command.  Note that the SGE
3816         * Doorbell Queue mode is currently automatically enabled in the
3817         * Firmware by setting either AUTOEQUEQE or AUTOEQUIQE ...
3818         */
3819        if (dbqt)
3820                c.timeren_timerix =
3821                        cpu_to_be32(FW_EQ_ETH_CMD_TIMEREN_F |
3822                                    FW_EQ_ETH_CMD_TIMERIX_V(txq->dbqtimerix));
3823
3824        ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c);
3825        if (ret) {
3826                kfree(txq->q.sdesc);
3827                txq->q.sdesc = NULL;
3828                dma_free_coherent(adap->pdev_dev,
3829                                  nentries * sizeof(struct tx_desc),
3830                                  txq->q.desc, txq->q.phys_addr);
3831                txq->q.desc = NULL;
3832                return ret;
3833        }
3834
3835        txq->q.q_type = CXGB4_TXQ_ETH;
3836        init_txq(adap, &txq->q, FW_EQ_ETH_CMD_EQID_G(ntohl(c.eqid_pkd)));
3837        txq->txq = netdevq;
3838        txq->tso = txq->tx_cso = txq->vlan_ins = 0;
3839        txq->mapping_err = 0;
3840        txq->dbqt = dbqt;
3841
3842        return 0;
3843}
3844
3845int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
3846                          struct net_device *dev, unsigned int iqid,
3847                          unsigned int cmplqid)
3848{
3849        unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
3850        struct port_info *pi = netdev_priv(dev);
3851        struct sge *s = &adap->sge;
3852        struct fw_eq_ctrl_cmd c;
3853        int ret, nentries;
3854
3855        /* Add status entries */
3856        nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
3857
3858        txq->q.desc = alloc_ring(adap->pdev_dev, nentries,
3859                                 sizeof(struct tx_desc), 0, &txq->q.phys_addr,
3860                                 NULL, 0, dev_to_node(adap->pdev_dev));
3861        if (!txq->q.desc)
3862                return -ENOMEM;
3863
3864        c.op_to_vfn = htonl(FW_CMD_OP_V(FW_EQ_CTRL_CMD) | FW_CMD_REQUEST_F |
3865                            FW_CMD_WRITE_F | FW_CMD_EXEC_F |
3866                            FW_EQ_CTRL_CMD_PFN_V(adap->pf) |
3867                            FW_EQ_CTRL_CMD_VFN_V(0));
3868        c.alloc_to_len16 = htonl(FW_EQ_CTRL_CMD_ALLOC_F |
3869                                 FW_EQ_CTRL_CMD_EQSTART_F | FW_LEN16(c));
3870        c.cmpliqid_eqid = htonl(FW_EQ_CTRL_CMD_CMPLIQID_V(cmplqid));
3871        c.physeqid_pkd = htonl(0);
3872        c.fetchszm_to_iqid =
3873                htonl(FW_EQ_CTRL_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) |
3874                      FW_EQ_CTRL_CMD_PCIECHN_V(pi->tx_chan) |
3875                      FW_EQ_CTRL_CMD_FETCHRO_F | FW_EQ_CTRL_CMD_IQID_V(iqid));
3876        c.dcaen_to_eqsize =
3877                htonl(FW_EQ_CTRL_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
3878                                             ? FETCHBURSTMIN_64B_X
3879                                             : FETCHBURSTMIN_64B_T6_X) |
3880                      FW_EQ_CTRL_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
3881                      FW_EQ_CTRL_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
3882                      FW_EQ_CTRL_CMD_EQSIZE_V(nentries));
3883        c.eqaddr = cpu_to_be64(txq->q.phys_addr);
3884
3885        ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c);
3886        if (ret) {
3887                dma_free_coherent(adap->pdev_dev,
3888                                  nentries * sizeof(struct tx_desc),
3889                                  txq->q.desc, txq->q.phys_addr);
3890                txq->q.desc = NULL;
3891                return ret;
3892        }
3893
3894        txq->q.q_type = CXGB4_TXQ_CTRL;
3895        init_txq(adap, &txq->q, FW_EQ_CTRL_CMD_EQID_G(ntohl(c.cmpliqid_eqid)));
3896        txq->adap = adap;
3897        skb_queue_head_init(&txq->sendq);
3898        tasklet_init(&txq->qresume_tsk, restart_ctrlq, (unsigned long)txq);
3899        txq->full = 0;
3900        return 0;
3901}
3902
3903int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid,
3904                        unsigned int cmplqid)
3905{
3906        u32 param, val;
3907
3908        param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
3909                 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL) |
3910                 FW_PARAMS_PARAM_YZ_V(eqid));
3911        val = cmplqid;
3912        return t4_set_params(adap, adap->mbox, adap->pf, 0, 1, &param, &val);
3913}
3914
3915int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
3916                         struct net_device *dev, unsigned int iqid,
3917                         unsigned int uld_type)
3918{
3919        unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
3920        int ret, nentries;
3921        struct fw_eq_ofld_cmd c;
3922        struct sge *s = &adap->sge;
3923        struct port_info *pi = netdev_priv(dev);
3924        int cmd = FW_EQ_OFLD_CMD;
3925
3926        /* Add status entries */
3927        nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
3928
3929        txq->q.desc = alloc_ring(adap->pdev_dev, txq->q.size,
3930                        sizeof(struct tx_desc), sizeof(struct tx_sw_desc),
3931                        &txq->q.phys_addr, &txq->q.sdesc, s->stat_len,
3932                        NUMA_NO_NODE);
3933        if (!txq->q.desc)
3934                return -ENOMEM;
3935
3936        memset(&c, 0, sizeof(c));
3937        if (unlikely(uld_type == CXGB4_TX_CRYPTO))
3938                cmd = FW_EQ_CTRL_CMD;
3939        c.op_to_vfn = htonl(FW_CMD_OP_V(cmd) | FW_CMD_REQUEST_F |
3940                            FW_CMD_WRITE_F | FW_CMD_EXEC_F |
3941                            FW_EQ_OFLD_CMD_PFN_V(adap->pf) |
3942                            FW_EQ_OFLD_CMD_VFN_V(0));
3943        c.alloc_to_len16 = htonl(FW_EQ_OFLD_CMD_ALLOC_F |
3944                                 FW_EQ_OFLD_CMD_EQSTART_F | FW_LEN16(c));
3945        c.fetchszm_to_iqid =
3946                htonl(FW_EQ_OFLD_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) |
3947                      FW_EQ_OFLD_CMD_PCIECHN_V(pi->tx_chan) |
3948                      FW_EQ_OFLD_CMD_FETCHRO_F | FW_EQ_OFLD_CMD_IQID_V(iqid));
3949        c.dcaen_to_eqsize =
3950                htonl(FW_EQ_OFLD_CMD_FBMIN_V(chip_ver <= CHELSIO_T5
3951                                             ? FETCHBURSTMIN_64B_X
3952                                             : FETCHBURSTMIN_64B_T6_X) |
3953                      FW_EQ_OFLD_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) |
3954                      FW_EQ_OFLD_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) |
3955                      FW_EQ_OFLD_CMD_EQSIZE_V(nentries));
3956        c.eqaddr = cpu_to_be64(txq->q.phys_addr);
3957
3958        ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c);
3959        if (ret) {
3960                kfree(txq->q.sdesc);
3961                txq->q.sdesc = NULL;
3962                dma_free_coherent(adap->pdev_dev,
3963                                  nentries * sizeof(struct tx_desc),
3964                                  txq->q.desc, txq->q.phys_addr);
3965                txq->q.desc = NULL;
3966                return ret;
3967        }
3968
3969        txq->q.q_type = CXGB4_TXQ_ULD;
3970        init_txq(adap, &txq->q, FW_EQ_OFLD_CMD_EQID_G(ntohl(c.eqid_pkd)));
3971        txq->adap = adap;
3972        skb_queue_head_init(&txq->sendq);
3973        tasklet_init(&txq->qresume_tsk, restart_ofldq, (unsigned long)txq);
3974        txq->full = 0;
3975        txq->mapping_err = 0;
3976        return 0;
3977}
3978
3979void free_txq(struct adapter *adap, struct sge_txq *q)
3980{
3981        struct sge *s = &adap->sge;
3982
3983        dma_free_coherent(adap->pdev_dev,
3984                          q->size * sizeof(struct tx_desc) + s->stat_len,
3985                          q->desc, q->phys_addr);
3986        q->cntxt_id = 0;
3987        q->sdesc = NULL;
3988        q->desc = NULL;
3989}
3990
3991void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq,
3992                  struct sge_fl *fl)
3993{
3994        struct sge *s = &adap->sge;
3995        unsigned int fl_id = fl ? fl->cntxt_id : 0xffff;
3996
3997        adap->sge.ingr_map[rq->cntxt_id - adap->sge.ingr_start] = NULL;
3998        t4_iq_free(adap, adap->mbox, adap->pf, 0, FW_IQ_TYPE_FL_INT_CAP,
3999                   rq->cntxt_id, fl_id, 0xffff);
4000        dma_free_coherent(adap->pdev_dev, (rq->size + 1) * rq->iqe_len,
4001                          rq->desc, rq->phys_addr);
4002        netif_napi_del(&rq->napi);
4003        rq->netdev = NULL;
4004        rq->cntxt_id = rq->abs_id = 0;
4005        rq->desc = NULL;
4006
4007        if (fl) {
4008                free_rx_bufs(adap, fl, fl->avail);
4009                dma_free_coherent(adap->pdev_dev, fl->size * 8 + s->stat_len,
4010                                  fl->desc, fl->addr);
4011                kfree(fl->sdesc);
4012                fl->sdesc = NULL;
4013                fl->cntxt_id = 0;
4014                fl->desc = NULL;
4015        }
4016}
4017
4018/**
4019 *      t4_free_ofld_rxqs - free a block of consecutive Rx queues
4020 *      @adap: the adapter
4021 *      @n: number of queues
4022 *      @q: pointer to first queue
4023 *
4024 *      Release the resources of a consecutive block of offload Rx queues.
4025 */
4026void t4_free_ofld_rxqs(struct adapter *adap, int n, struct sge_ofld_rxq *q)
4027{
4028        for ( ; n; n--, q++)
4029                if (q->rspq.desc)
4030                        free_rspq_fl(adap, &q->rspq,
4031                                     q->fl.size ? &q->fl : NULL);
4032}
4033
4034/**
4035 *      t4_free_sge_resources - free SGE resources
4036 *      @adap: the adapter
4037 *
4038 *      Frees resources used by the SGE queue sets.
4039 */
4040void t4_free_sge_resources(struct adapter *adap)
4041{
4042        int i;
4043        struct sge_eth_rxq *eq;
4044        struct sge_eth_txq *etq;
4045
4046        /* stop all Rx queues in order to start them draining */
4047        for (i = 0; i < adap->sge.ethqsets; i++) {
4048                eq = &adap->sge.ethrxq[i];
4049                if (eq->rspq.desc)
4050                        t4_iq_stop(adap, adap->mbox, adap->pf, 0,
4051                                   FW_IQ_TYPE_FL_INT_CAP,
4052                                   eq->rspq.cntxt_id,
4053                                   eq->fl.size ? eq->fl.cntxt_id : 0xffff,
4054                                   0xffff);
4055        }
4056
4057        /* clean up Ethernet Tx/Rx queues */
4058        for (i = 0; i < adap->sge.ethqsets; i++) {
4059                eq = &adap->sge.ethrxq[i];
4060                if (eq->rspq.desc)
4061                        free_rspq_fl(adap, &eq->rspq,
4062                                     eq->fl.size ? &eq->fl : NULL);
4063
4064                etq = &adap->sge.ethtxq[i];
4065                if (etq->q.desc) {
4066                        t4_eth_eq_free(adap, adap->mbox, adap->pf, 0,
4067                                       etq->q.cntxt_id);
4068                        __netif_tx_lock_bh(etq->txq);
4069                        free_tx_desc(adap, &etq->q, etq->q.in_use, true);
4070                        __netif_tx_unlock_bh(etq->txq);
4071                        kfree(etq->q.sdesc);
4072                        free_txq(adap, &etq->q);
4073                }
4074        }
4075
4076        /* clean up control Tx queues */
4077        for (i = 0; i < ARRAY_SIZE(adap->sge.ctrlq); i++) {
4078                struct sge_ctrl_txq *cq = &adap->sge.ctrlq[i];
4079
4080                if (cq->q.desc) {
4081                        tasklet_kill(&cq->qresume_tsk);
4082                        t4_ctrl_eq_free(adap, adap->mbox, adap->pf, 0,
4083                                        cq->q.cntxt_id);
4084                        __skb_queue_purge(&cq->sendq);
4085                        free_txq(adap, &cq->q);
4086                }
4087        }
4088
4089        if (adap->sge.fw_evtq.desc)
4090                free_rspq_fl(adap, &adap->sge.fw_evtq, NULL);
4091
4092        if (adap->sge.intrq.desc)
4093                free_rspq_fl(adap, &adap->sge.intrq, NULL);
4094
4095        if (!is_t4(adap->params.chip)) {
4096                etq = &adap->sge.ptptxq;
4097                if (etq->q.desc) {
4098                        t4_eth_eq_free(adap, adap->mbox, adap->pf, 0,
4099                                       etq->q.cntxt_id);
4100                        spin_lock_bh(&adap->ptp_lock);
4101                        free_tx_desc(adap, &etq->q, etq->q.in_use, true);
4102                        spin_unlock_bh(&adap->ptp_lock);
4103                        kfree(etq->q.sdesc);
4104                        free_txq(adap, &etq->q);
4105                }
4106        }
4107
4108        /* clear the reverse egress queue map */
4109        memset(adap->sge.egr_map, 0,
4110               adap->sge.egr_sz * sizeof(*adap->sge.egr_map));
4111}
4112
4113void t4_sge_start(struct adapter *adap)
4114{
4115        adap->sge.ethtxq_rover = 0;
4116        mod_timer(&adap->sge.rx_timer, jiffies + RX_QCHECK_PERIOD);
4117        mod_timer(&adap->sge.tx_timer, jiffies + TX_QCHECK_PERIOD);
4118}
4119
4120/**
4121 *      t4_sge_stop - disable SGE operation
4122 *      @adap: the adapter
4123 *
4124 *      Stop tasklets and timers associated with the DMA engine.  Note that
4125 *      this is effective only if measures have been taken to disable any HW
4126 *      events that may restart them.
4127 */
4128void t4_sge_stop(struct adapter *adap)
4129{
4130        int i;
4131        struct sge *s = &adap->sge;
4132
4133        if (in_interrupt())  /* actions below require waiting */
4134                return;
4135
4136        if (s->rx_timer.function)
4137                del_timer_sync(&s->rx_timer);
4138        if (s->tx_timer.function)
4139                del_timer_sync(&s->tx_timer);
4140
4141        if (is_offload(adap)) {
4142                struct sge_uld_txq_info *txq_info;
4143
4144                txq_info = adap->sge.uld_txq_info[CXGB4_TX_OFLD];
4145                if (txq_info) {
4146                        struct sge_uld_txq *txq = txq_info->uldtxq;
4147
4148                        for_each_ofldtxq(&adap->sge, i) {
4149                                if (txq->q.desc)
4150                                        tasklet_kill(&txq->qresume_tsk);
4151                        }
4152                }
4153        }
4154
4155        if (is_pci_uld(adap)) {
4156                struct sge_uld_txq_info *txq_info;
4157
4158                txq_info = adap->sge.uld_txq_info[CXGB4_TX_CRYPTO];
4159                if (txq_info) {
4160                        struct sge_uld_txq *txq = txq_info->uldtxq;
4161
4162                        for_each_ofldtxq(&adap->sge, i) {
4163                                if (txq->q.desc)
4164                                        tasklet_kill(&txq->qresume_tsk);
4165                        }
4166                }
4167        }
4168
4169        for (i = 0; i < ARRAY_SIZE(s->ctrlq); i++) {
4170                struct sge_ctrl_txq *cq = &s->ctrlq[i];
4171
4172                if (cq->q.desc)
4173                        tasklet_kill(&cq->qresume_tsk);
4174        }
4175}
4176
4177/**
4178 *      t4_sge_init_soft - grab core SGE values needed by SGE code
4179 *      @adap: the adapter
4180 *
4181 *      We need to grab the SGE operating parameters that we need to have
4182 *      in order to do our job and make sure we can live with them.
4183 */
4184
4185static int t4_sge_init_soft(struct adapter *adap)
4186{
4187        struct sge *s = &adap->sge;
4188        u32 fl_small_pg, fl_large_pg, fl_small_mtu, fl_large_mtu;
4189        u32 timer_value_0_and_1, timer_value_2_and_3, timer_value_4_and_5;
4190        u32 ingress_rx_threshold;
4191
4192        /*
4193         * Verify that CPL messages are going to the Ingress Queue for
4194         * process_responses() and that only packet data is going to the
4195         * Free Lists.
4196         */
4197        if ((t4_read_reg(adap, SGE_CONTROL_A) & RXPKTCPLMODE_F) !=
4198            RXPKTCPLMODE_V(RXPKTCPLMODE_SPLIT_X)) {
4199                dev_err(adap->pdev_dev, "bad SGE CPL MODE\n");
4200                return -EINVAL;
4201        }
4202
4203        /*
4204         * Validate the Host Buffer Register Array indices that we want to
4205         * use ...
4206         *
4207         * XXX Note that we should really read through the Host Buffer Size
4208         * XXX register array and find the indices of the Buffer Sizes which
4209         * XXX meet our needs!
4210         */
4211        #define READ_FL_BUF(x) \
4212                t4_read_reg(adap, SGE_FL_BUFFER_SIZE0_A+(x)*sizeof(u32))
4213
4214        fl_small_pg = READ_FL_BUF(RX_SMALL_PG_BUF);
4215        fl_large_pg = READ_FL_BUF(RX_LARGE_PG_BUF);
4216        fl_small_mtu = READ_FL_BUF(RX_SMALL_MTU_BUF);
4217        fl_large_mtu = READ_FL_BUF(RX_LARGE_MTU_BUF);
4218
4219        /* We only bother using the Large Page logic if the Large Page Buffer
4220         * is larger than our Page Size Buffer.
4221         */
4222        if (fl_large_pg <= fl_small_pg)
4223                fl_large_pg = 0;
4224
4225        #undef READ_FL_BUF
4226
4227        /* The Page Size Buffer must be exactly equal to our Page Size and the
4228         * Large Page Size Buffer should be 0 (per above) or a power of 2.
4229         */
4230        if (fl_small_pg != PAGE_SIZE ||
4231            (fl_large_pg & (fl_large_pg-1)) != 0) {
4232                dev_err(adap->pdev_dev, "bad SGE FL page buffer sizes [%d, %d]\n",
4233                        fl_small_pg, fl_large_pg);
4234                return -EINVAL;
4235        }
4236        if (fl_large_pg)
4237                s->fl_pg_order = ilog2(fl_large_pg) - PAGE_SHIFT;
4238
4239        if (fl_small_mtu < FL_MTU_SMALL_BUFSIZE(adap) ||
4240            fl_large_mtu < FL_MTU_LARGE_BUFSIZE(adap)) {
4241                dev_err(adap->pdev_dev, "bad SGE FL MTU sizes [%d, %d]\n",
4242                        fl_small_mtu, fl_large_mtu);
4243                return -EINVAL;
4244        }
4245
4246        /*
4247         * Retrieve our RX interrupt holdoff timer values and counter
4248         * threshold values from the SGE parameters.
4249         */
4250        timer_value_0_and_1 = t4_read_reg(adap, SGE_TIMER_VALUE_0_AND_1_A);
4251        timer_value_2_and_3 = t4_read_reg(adap, SGE_TIMER_VALUE_2_AND_3_A);
4252        timer_value_4_and_5 = t4_read_reg(adap, SGE_TIMER_VALUE_4_AND_5_A);
4253        s->timer_val[0] = core_ticks_to_us(adap,
4254                TIMERVALUE0_G(timer_value_0_and_1));
4255        s->timer_val[1] = core_ticks_to_us(adap,
4256                TIMERVALUE1_G(timer_value_0_and_1));
4257        s->timer_val[2] = core_ticks_to_us(adap,
4258                TIMERVALUE2_G(timer_value_2_and_3));
4259        s->timer_val[3] = core_ticks_to_us(adap,
4260                TIMERVALUE3_G(timer_value_2_and_3));
4261        s->timer_val[4] = core_ticks_to_us(adap,
4262                TIMERVALUE4_G(timer_value_4_and_5));
4263        s->timer_val[5] = core_ticks_to_us(adap,
4264                TIMERVALUE5_G(timer_value_4_and_5));
4265
4266        ingress_rx_threshold = t4_read_reg(adap, SGE_INGRESS_RX_THRESHOLD_A);
4267        s->counter_val[0] = THRESHOLD_0_G(ingress_rx_threshold);
4268        s->counter_val[1] = THRESHOLD_1_G(ingress_rx_threshold);
4269        s->counter_val[2] = THRESHOLD_2_G(ingress_rx_threshold);
4270        s->counter_val[3] = THRESHOLD_3_G(ingress_rx_threshold);
4271
4272        return 0;
4273}
4274
4275/**
4276 *     t4_sge_init - initialize SGE
4277 *     @adap: the adapter
4278 *
4279 *     Perform low-level SGE code initialization needed every time after a
4280 *     chip reset.
4281 */
4282int t4_sge_init(struct adapter *adap)
4283{
4284        struct sge *s = &adap->sge;
4285        u32 sge_control, sge_conm_ctrl;
4286        int ret, egress_threshold;
4287
4288        /*
4289         * Ingress Padding Boundary and Egress Status Page Size are set up by
4290         * t4_fixup_host_params().
4291         */
4292        sge_control = t4_read_reg(adap, SGE_CONTROL_A);
4293        s->pktshift = PKTSHIFT_G(sge_control);
4294        s->stat_len = (sge_control & EGRSTATUSPAGESIZE_F) ? 128 : 64;
4295
4296        s->fl_align = t4_fl_pkt_align(adap);
4297        ret = t4_sge_init_soft(adap);
4298        if (ret < 0)
4299                return ret;
4300
4301        /*
4302         * A FL with <= fl_starve_thres buffers is starving and a periodic
4303         * timer will attempt to refill it.  This needs to be larger than the
4304         * SGE's Egress Congestion Threshold.  If it isn't, then we can get
4305         * stuck waiting for new packets while the SGE is waiting for us to
4306         * give it more Free List entries.  (Note that the SGE's Egress
4307         * Congestion Threshold is in units of 2 Free List pointers.) For T4,
4308         * there was only a single field to control this.  For T5 there's the
4309         * original field which now only applies to Unpacked Mode Free List
4310         * buffers and a new field which only applies to Packed Mode Free List
4311         * buffers.
4312         */
4313        sge_conm_ctrl = t4_read_reg(adap, SGE_CONM_CTRL_A);
4314        switch (CHELSIO_CHIP_VERSION(adap->params.chip)) {
4315        case CHELSIO_T4:
4316                egress_threshold = EGRTHRESHOLD_G(sge_conm_ctrl);
4317                break;
4318        case CHELSIO_T5:
4319                egress_threshold = EGRTHRESHOLDPACKING_G(sge_conm_ctrl);
4320                break;
4321        case CHELSIO_T6:
4322                egress_threshold = T6_EGRTHRESHOLDPACKING_G(sge_conm_ctrl);
4323                break;
4324        default:
4325                dev_err(adap->pdev_dev, "Unsupported Chip version %d\n",
4326                        CHELSIO_CHIP_VERSION(adap->params.chip));
4327                return -EINVAL;
4328        }
4329        s->fl_starve_thres = 2*egress_threshold + 1;
4330
4331        t4_idma_monitor_init(adap, &s->idma_monitor);
4332
4333        /* Set up timers used for recuring callbacks to process RX and TX
4334         * administrative tasks.
4335         */
4336        timer_setup(&s->rx_timer, sge_rx_timer_cb, 0);
4337        timer_setup(&s->tx_timer, sge_tx_timer_cb, 0);
4338
4339        spin_lock_init(&s->intrq_lock);
4340
4341        return 0;
4342}
4343