dpdk/drivers/net/ixgbe/ixgbe_rxtx.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2016 Intel Corporation.
   3 * Copyright 2014 6WIND S.A.
   4 */
   5
   6#include <sys/queue.h>
   7
   8#include <stdio.h>
   9#include <stdlib.h>
  10#include <string.h>
  11#include <errno.h>
  12#include <stdint.h>
  13#include <stdarg.h>
  14#include <unistd.h>
  15#include <inttypes.h>
  16
  17#include <rte_byteorder.h>
  18#include <rte_common.h>
  19#include <rte_cycles.h>
  20#include <rte_log.h>
  21#include <rte_debug.h>
  22#include <rte_interrupts.h>
  23#include <rte_pci.h>
  24#include <rte_memory.h>
  25#include <rte_memzone.h>
  26#include <rte_launch.h>
  27#include <rte_eal.h>
  28#include <rte_per_lcore.h>
  29#include <rte_lcore.h>
  30#include <rte_atomic.h>
  31#include <rte_branch_prediction.h>
  32#include <rte_mempool.h>
  33#include <rte_malloc.h>
  34#include <rte_mbuf.h>
  35#include <rte_ether.h>
  36#include <ethdev_driver.h>
  37#include <rte_security_driver.h>
  38#include <rte_prefetch.h>
  39#include <rte_udp.h>
  40#include <rte_tcp.h>
  41#include <rte_sctp.h>
  42#include <rte_string_fns.h>
  43#include <rte_errno.h>
  44#include <rte_ip.h>
  45#include <rte_net.h>
  46#include <rte_vect.h>
  47
  48#include "ixgbe_logs.h"
  49#include "base/ixgbe_api.h"
  50#include "base/ixgbe_vf.h"
  51#include "ixgbe_ethdev.h"
  52#include "base/ixgbe_dcb.h"
  53#include "base/ixgbe_common.h"
  54#include "ixgbe_rxtx.h"
  55
  56#ifdef RTE_LIBRTE_IEEE1588
  57#define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
  58#else
  59#define IXGBE_TX_IEEE1588_TMST 0
  60#endif
  61/* Bit Mask to indicate what bits required for building TX context */
  62#define IXGBE_TX_OFFLOAD_MASK (                  \
  63                PKT_TX_OUTER_IPV6 |              \
  64                PKT_TX_OUTER_IPV4 |              \
  65                PKT_TX_IPV6 |                    \
  66                PKT_TX_IPV4 |                    \
  67                PKT_TX_VLAN_PKT |                \
  68                PKT_TX_IP_CKSUM |                \
  69                PKT_TX_L4_MASK |                 \
  70                PKT_TX_TCP_SEG |                 \
  71                PKT_TX_MACSEC |                  \
  72                PKT_TX_OUTER_IP_CKSUM |          \
  73                PKT_TX_SEC_OFFLOAD |     \
  74                IXGBE_TX_IEEE1588_TMST)
  75
  76#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
  77                (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
  78
  79#if 1
  80#define RTE_PMD_USE_PREFETCH
  81#endif
  82
  83#ifdef RTE_PMD_USE_PREFETCH
  84/*
  85 * Prefetch a cache line into all cache levels.
  86 */
  87#define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
  88#else
  89#define rte_ixgbe_prefetch(p)   do {} while (0)
  90#endif
  91
  92/*********************************************************************
  93 *
  94 *  TX functions
  95 *
  96 **********************************************************************/
  97
  98/*
  99 * Check for descriptors with their DD bit set and free mbufs.
 100 * Return the total number of buffers freed.
 101 */
 102static __rte_always_inline int
 103ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
 104{
 105        struct ixgbe_tx_entry *txep;
 106        uint32_t status;
 107        int i, nb_free = 0;
 108        struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
 109
 110        /* check DD bit on threshold descriptor */
 111        status = txq->tx_ring[txq->tx_next_dd].wb.status;
 112        if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
 113                return 0;
 114
 115        /*
 116         * first buffer to free from S/W ring is at index
 117         * tx_next_dd - (tx_rs_thresh-1)
 118         */
 119        txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
 120
 121        for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
 122                /* free buffers one at a time */
 123                m = rte_pktmbuf_prefree_seg(txep->mbuf);
 124                txep->mbuf = NULL;
 125
 126                if (unlikely(m == NULL))
 127                        continue;
 128
 129                if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
 130                    (nb_free > 0 && m->pool != free[0]->pool)) {
 131                        rte_mempool_put_bulk(free[0]->pool,
 132                                             (void **)free, nb_free);
 133                        nb_free = 0;
 134                }
 135
 136                free[nb_free++] = m;
 137        }
 138
 139        if (nb_free > 0)
 140                rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
 141
 142        /* buffers were freed, update counters */
 143        txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
 144        txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
 145        if (txq->tx_next_dd >= txq->nb_tx_desc)
 146                txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
 147
 148        return txq->tx_rs_thresh;
 149}
 150
 151/* Populate 4 descriptors with data from 4 mbufs */
 152static inline void
 153tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
 154{
 155        uint64_t buf_dma_addr;
 156        uint32_t pkt_len;
 157        int i;
 158
 159        for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
 160                buf_dma_addr = rte_mbuf_data_iova(*pkts);
 161                pkt_len = (*pkts)->data_len;
 162
 163                /* write data to descriptor */
 164                txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
 165
 166                txdp->read.cmd_type_len =
 167                        rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
 168
 169                txdp->read.olinfo_status =
 170                        rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
 171
 172                rte_prefetch0(&(*pkts)->pool);
 173        }
 174}
 175
 176/* Populate 1 descriptor with data from 1 mbuf */
 177static inline void
 178tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
 179{
 180        uint64_t buf_dma_addr;
 181        uint32_t pkt_len;
 182
 183        buf_dma_addr = rte_mbuf_data_iova(*pkts);
 184        pkt_len = (*pkts)->data_len;
 185
 186        /* write data to descriptor */
 187        txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
 188        txdp->read.cmd_type_len =
 189                        rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
 190        txdp->read.olinfo_status =
 191                        rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
 192        rte_prefetch0(&(*pkts)->pool);
 193}
 194
 195/*
 196 * Fill H/W descriptor ring with mbuf data.
 197 * Copy mbuf pointers to the S/W ring.
 198 */
 199static inline void
 200ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
 201                      uint16_t nb_pkts)
 202{
 203        volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
 204        struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
 205        const int N_PER_LOOP = 4;
 206        const int N_PER_LOOP_MASK = N_PER_LOOP-1;
 207        int mainpart, leftover;
 208        int i, j;
 209
 210        /*
 211         * Process most of the packets in chunks of N pkts.  Any
 212         * leftover packets will get processed one at a time.
 213         */
 214        mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
 215        leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
 216        for (i = 0; i < mainpart; i += N_PER_LOOP) {
 217                /* Copy N mbuf pointers to the S/W ring */
 218                for (j = 0; j < N_PER_LOOP; ++j) {
 219                        (txep + i + j)->mbuf = *(pkts + i + j);
 220                }
 221                tx4(txdp + i, pkts + i);
 222        }
 223
 224        if (unlikely(leftover > 0)) {
 225                for (i = 0; i < leftover; ++i) {
 226                        (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
 227                        tx1(txdp + mainpart + i, pkts + mainpart + i);
 228                }
 229        }
 230}
 231
 232static inline uint16_t
 233tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 234             uint16_t nb_pkts)
 235{
 236        struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
 237        volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
 238        uint16_t n = 0;
 239
 240        /*
 241         * Begin scanning the H/W ring for done descriptors when the
 242         * number of available descriptors drops below tx_free_thresh.  For
 243         * each done descriptor, free the associated buffer.
 244         */
 245        if (txq->nb_tx_free < txq->tx_free_thresh)
 246                ixgbe_tx_free_bufs(txq);
 247
 248        /* Only use descriptors that are available */
 249        nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
 250        if (unlikely(nb_pkts == 0))
 251                return 0;
 252
 253        /* Use exactly nb_pkts descriptors */
 254        txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
 255
 256        /*
 257         * At this point, we know there are enough descriptors in the
 258         * ring to transmit all the packets.  This assumes that each
 259         * mbuf contains a single segment, and that no new offloads
 260         * are expected, which would require a new context descriptor.
 261         */
 262
 263        /*
 264         * See if we're going to wrap-around. If so, handle the top
 265         * of the descriptor ring first, then do the bottom.  If not,
 266         * the processing looks just like the "bottom" part anyway...
 267         */
 268        if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
 269                n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
 270                ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
 271
 272                /*
 273                 * We know that the last descriptor in the ring will need to
 274                 * have its RS bit set because tx_rs_thresh has to be
 275                 * a divisor of the ring size
 276                 */
 277                tx_r[txq->tx_next_rs].read.cmd_type_len |=
 278                        rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
 279                txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
 280
 281                txq->tx_tail = 0;
 282        }
 283
 284        /* Fill H/W descriptor ring with mbuf data */
 285        ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
 286        txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
 287
 288        /*
 289         * Determine if RS bit should be set
 290         * This is what we actually want:
 291         *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
 292         * but instead of subtracting 1 and doing >=, we can just do
 293         * greater than without subtracting.
 294         */
 295        if (txq->tx_tail > txq->tx_next_rs) {
 296                tx_r[txq->tx_next_rs].read.cmd_type_len |=
 297                        rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
 298                txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
 299                                                txq->tx_rs_thresh);
 300                if (txq->tx_next_rs >= txq->nb_tx_desc)
 301                        txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
 302        }
 303
 304        /*
 305         * Check for wrap-around. This would only happen if we used
 306         * up to the last descriptor in the ring, no more, no less.
 307         */
 308        if (txq->tx_tail >= txq->nb_tx_desc)
 309                txq->tx_tail = 0;
 310
 311        /* update tail pointer */
 312        rte_wmb();
 313        IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
 314
 315        return nb_pkts;
 316}
 317
 318uint16_t
 319ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 320                       uint16_t nb_pkts)
 321{
 322        uint16_t nb_tx;
 323
 324        /* Try to transmit at least chunks of TX_MAX_BURST pkts */
 325        if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
 326                return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
 327
 328        /* transmit more than the max burst, in chunks of TX_MAX_BURST */
 329        nb_tx = 0;
 330        while (nb_pkts) {
 331                uint16_t ret, n;
 332
 333                n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
 334                ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
 335                nb_tx = (uint16_t)(nb_tx + ret);
 336                nb_pkts = (uint16_t)(nb_pkts - ret);
 337                if (ret < n)
 338                        break;
 339        }
 340
 341        return nb_tx;
 342}
 343
 344static uint16_t
 345ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 346                    uint16_t nb_pkts)
 347{
 348        uint16_t nb_tx = 0;
 349        struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
 350
 351        while (nb_pkts) {
 352                uint16_t ret, num;
 353
 354                num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
 355                ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
 356                                                 num);
 357                nb_tx += ret;
 358                nb_pkts -= ret;
 359                if (ret < num)
 360                        break;
 361        }
 362
 363        return nb_tx;
 364}
 365
 366static inline void
 367ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
 368                volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
 369                uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
 370                __rte_unused uint64_t *mdata)
 371{
 372        uint32_t type_tucmd_mlhl;
 373        uint32_t mss_l4len_idx = 0;
 374        uint32_t ctx_idx;
 375        uint32_t vlan_macip_lens;
 376        union ixgbe_tx_offload tx_offload_mask;
 377        uint32_t seqnum_seed = 0;
 378
 379        ctx_idx = txq->ctx_curr;
 380        tx_offload_mask.data[0] = 0;
 381        tx_offload_mask.data[1] = 0;
 382        type_tucmd_mlhl = 0;
 383
 384        /* Specify which HW CTX to upload. */
 385        mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
 386
 387        if (ol_flags & PKT_TX_VLAN_PKT) {
 388                tx_offload_mask.vlan_tci |= ~0;
 389        }
 390
 391        /* check if TCP segmentation required for this packet */
 392        if (ol_flags & PKT_TX_TCP_SEG) {
 393                /* implies IP cksum in IPv4 */
 394                if (ol_flags & PKT_TX_IP_CKSUM)
 395                        type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
 396                                IXGBE_ADVTXD_TUCMD_L4T_TCP |
 397                                IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
 398                else
 399                        type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
 400                                IXGBE_ADVTXD_TUCMD_L4T_TCP |
 401                                IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
 402
 403                tx_offload_mask.l2_len |= ~0;
 404                tx_offload_mask.l3_len |= ~0;
 405                tx_offload_mask.l4_len |= ~0;
 406                tx_offload_mask.tso_segsz |= ~0;
 407                mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
 408                mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
 409        } else { /* no TSO, check if hardware checksum is needed */
 410                if (ol_flags & PKT_TX_IP_CKSUM) {
 411                        type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
 412                        tx_offload_mask.l2_len |= ~0;
 413                        tx_offload_mask.l3_len |= ~0;
 414                }
 415
 416                switch (ol_flags & PKT_TX_L4_MASK) {
 417                case PKT_TX_UDP_CKSUM:
 418                        type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
 419                                IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
 420                        mss_l4len_idx |= sizeof(struct rte_udp_hdr)
 421                                << IXGBE_ADVTXD_L4LEN_SHIFT;
 422                        tx_offload_mask.l2_len |= ~0;
 423                        tx_offload_mask.l3_len |= ~0;
 424                        break;
 425                case PKT_TX_TCP_CKSUM:
 426                        type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
 427                                IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
 428                        mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
 429                                << IXGBE_ADVTXD_L4LEN_SHIFT;
 430                        tx_offload_mask.l2_len |= ~0;
 431                        tx_offload_mask.l3_len |= ~0;
 432                        break;
 433                case PKT_TX_SCTP_CKSUM:
 434                        type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
 435                                IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
 436                        mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
 437                                << IXGBE_ADVTXD_L4LEN_SHIFT;
 438                        tx_offload_mask.l2_len |= ~0;
 439                        tx_offload_mask.l3_len |= ~0;
 440                        break;
 441                default:
 442                        type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
 443                                IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
 444                        break;
 445                }
 446        }
 447
 448        if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
 449                tx_offload_mask.outer_l2_len |= ~0;
 450                tx_offload_mask.outer_l3_len |= ~0;
 451                tx_offload_mask.l2_len |= ~0;
 452                seqnum_seed |= tx_offload.outer_l3_len
 453                               << IXGBE_ADVTXD_OUTER_IPLEN;
 454                seqnum_seed |= tx_offload.l2_len
 455                               << IXGBE_ADVTXD_TUNNEL_LEN;
 456        }
 457#ifdef RTE_LIB_SECURITY
 458        if (ol_flags & PKT_TX_SEC_OFFLOAD) {
 459                union ixgbe_crypto_tx_desc_md *md =
 460                                (union ixgbe_crypto_tx_desc_md *)mdata;
 461                seqnum_seed |=
 462                        (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
 463                type_tucmd_mlhl |= md->enc ?
 464                                (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
 465                                IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
 466                type_tucmd_mlhl |=
 467                        (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
 468                tx_offload_mask.sa_idx |= ~0;
 469                tx_offload_mask.sec_pad_len |= ~0;
 470        }
 471#endif
 472
 473        txq->ctx_cache[ctx_idx].flags = ol_flags;
 474        txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
 475                tx_offload_mask.data[0] & tx_offload.data[0];
 476        txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
 477                tx_offload_mask.data[1] & tx_offload.data[1];
 478        txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
 479
 480        ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
 481        vlan_macip_lens = tx_offload.l3_len;
 482        if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
 483                vlan_macip_lens |= (tx_offload.outer_l2_len <<
 484                                    IXGBE_ADVTXD_MACLEN_SHIFT);
 485        else
 486                vlan_macip_lens |= (tx_offload.l2_len <<
 487                                    IXGBE_ADVTXD_MACLEN_SHIFT);
 488        vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
 489        ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
 490        ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
 491        ctx_txd->seqnum_seed     = seqnum_seed;
 492}
 493
 494/*
 495 * Check which hardware context can be used. Use the existing match
 496 * or create a new context descriptor.
 497 */
 498static inline uint32_t
 499what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
 500                   union ixgbe_tx_offload tx_offload)
 501{
 502        /* If match with the current used context */
 503        if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
 504                   (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
 505                    (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
 506                     & tx_offload.data[0])) &&
 507                   (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
 508                    (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
 509                     & tx_offload.data[1]))))
 510                return txq->ctx_curr;
 511
 512        /* What if match with the next context  */
 513        txq->ctx_curr ^= 1;
 514        if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
 515                   (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
 516                    (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
 517                     & tx_offload.data[0])) &&
 518                   (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
 519                    (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
 520                     & tx_offload.data[1]))))
 521                return txq->ctx_curr;
 522
 523        /* Mismatch, use the previous context */
 524        return IXGBE_CTX_NUM;
 525}
 526
 527static inline uint32_t
 528tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
 529{
 530        uint32_t tmp = 0;
 531
 532        if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
 533                tmp |= IXGBE_ADVTXD_POPTS_TXSM;
 534        if (ol_flags & PKT_TX_IP_CKSUM)
 535                tmp |= IXGBE_ADVTXD_POPTS_IXSM;
 536        if (ol_flags & PKT_TX_TCP_SEG)
 537                tmp |= IXGBE_ADVTXD_POPTS_TXSM;
 538        return tmp;
 539}
 540
 541static inline uint32_t
 542tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
 543{
 544        uint32_t cmdtype = 0;
 545
 546        if (ol_flags & PKT_TX_VLAN_PKT)
 547                cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
 548        if (ol_flags & PKT_TX_TCP_SEG)
 549                cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
 550        if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
 551                cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
 552        if (ol_flags & PKT_TX_MACSEC)
 553                cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
 554        return cmdtype;
 555}
 556
 557/* Default RS bit threshold values */
 558#ifndef DEFAULT_TX_RS_THRESH
 559#define DEFAULT_TX_RS_THRESH   32
 560#endif
 561#ifndef DEFAULT_TX_FREE_THRESH
 562#define DEFAULT_TX_FREE_THRESH 32
 563#endif
 564
 565/* Reset transmit descriptors after they have been used */
 566static inline int
 567ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
 568{
 569        struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
 570        volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
 571        uint16_t last_desc_cleaned = txq->last_desc_cleaned;
 572        uint16_t nb_tx_desc = txq->nb_tx_desc;
 573        uint16_t desc_to_clean_to;
 574        uint16_t nb_tx_to_clean;
 575        uint32_t status;
 576
 577        /* Determine the last descriptor needing to be cleaned */
 578        desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
 579        if (desc_to_clean_to >= nb_tx_desc)
 580                desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
 581
 582        /* Check to make sure the last descriptor to clean is done */
 583        desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
 584        status = txr[desc_to_clean_to].wb.status;
 585        if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
 586                PMD_TX_LOG(DEBUG,
 587                           "TX descriptor %4u is not done"
 588                           "(port=%d queue=%d)",
 589                           desc_to_clean_to,
 590                           txq->port_id, txq->queue_id);
 591                /* Failed to clean any descriptors, better luck next time */
 592                return -(1);
 593        }
 594
 595        /* Figure out how many descriptors will be cleaned */
 596        if (last_desc_cleaned > desc_to_clean_to)
 597                nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
 598                                                        desc_to_clean_to);
 599        else
 600                nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
 601                                                last_desc_cleaned);
 602
 603        PMD_TX_LOG(DEBUG,
 604                   "Cleaning %4u TX descriptors: %4u to %4u "
 605                   "(port=%d queue=%d)",
 606                   nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
 607                   txq->port_id, txq->queue_id);
 608
 609        /*
 610         * The last descriptor to clean is done, so that means all the
 611         * descriptors from the last descriptor that was cleaned
 612         * up to the last descriptor with the RS bit set
 613         * are done. Only reset the threshold descriptor.
 614         */
 615        txr[desc_to_clean_to].wb.status = 0;
 616
 617        /* Update the txq to reflect the last descriptor that was cleaned */
 618        txq->last_desc_cleaned = desc_to_clean_to;
 619        txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
 620
 621        /* No Error */
 622        return 0;
 623}
 624
 625uint16_t
 626ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 627                uint16_t nb_pkts)
 628{
 629        struct ixgbe_tx_queue *txq;
 630        struct ixgbe_tx_entry *sw_ring;
 631        struct ixgbe_tx_entry *txe, *txn;
 632        volatile union ixgbe_adv_tx_desc *txr;
 633        volatile union ixgbe_adv_tx_desc *txd, *txp;
 634        struct rte_mbuf     *tx_pkt;
 635        struct rte_mbuf     *m_seg;
 636        uint64_t buf_dma_addr;
 637        uint32_t olinfo_status;
 638        uint32_t cmd_type_len;
 639        uint32_t pkt_len;
 640        uint16_t slen;
 641        uint64_t ol_flags;
 642        uint16_t tx_id;
 643        uint16_t tx_last;
 644        uint16_t nb_tx;
 645        uint16_t nb_used;
 646        uint64_t tx_ol_req;
 647        uint32_t ctx = 0;
 648        uint32_t new_ctx;
 649        union ixgbe_tx_offload tx_offload;
 650#ifdef RTE_LIB_SECURITY
 651        uint8_t use_ipsec;
 652#endif
 653
 654        tx_offload.data[0] = 0;
 655        tx_offload.data[1] = 0;
 656        txq = tx_queue;
 657        sw_ring = txq->sw_ring;
 658        txr     = txq->tx_ring;
 659        tx_id   = txq->tx_tail;
 660        txe = &sw_ring[tx_id];
 661        txp = NULL;
 662
 663        /* Determine if the descriptor ring needs to be cleaned. */
 664        if (txq->nb_tx_free < txq->tx_free_thresh)
 665                ixgbe_xmit_cleanup(txq);
 666
 667        rte_prefetch0(&txe->mbuf->pool);
 668
 669        /* TX loop */
 670        for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
 671                new_ctx = 0;
 672                tx_pkt = *tx_pkts++;
 673                pkt_len = tx_pkt->pkt_len;
 674
 675                /*
 676                 * Determine how many (if any) context descriptors
 677                 * are needed for offload functionality.
 678                 */
 679                ol_flags = tx_pkt->ol_flags;
 680#ifdef RTE_LIB_SECURITY
 681                use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
 682#endif
 683
 684                /* If hardware offload required */
 685                tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
 686                if (tx_ol_req) {
 687                        tx_offload.l2_len = tx_pkt->l2_len;
 688                        tx_offload.l3_len = tx_pkt->l3_len;
 689                        tx_offload.l4_len = tx_pkt->l4_len;
 690                        tx_offload.vlan_tci = tx_pkt->vlan_tci;
 691                        tx_offload.tso_segsz = tx_pkt->tso_segsz;
 692                        tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
 693                        tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
 694#ifdef RTE_LIB_SECURITY
 695                        if (use_ipsec) {
 696                                union ixgbe_crypto_tx_desc_md *ipsec_mdata =
 697                                        (union ixgbe_crypto_tx_desc_md *)
 698                                                rte_security_dynfield(tx_pkt);
 699                                tx_offload.sa_idx = ipsec_mdata->sa_idx;
 700                                tx_offload.sec_pad_len = ipsec_mdata->pad_len;
 701                        }
 702#endif
 703
 704                        /* If new context need be built or reuse the exist ctx. */
 705                        ctx = what_advctx_update(txq, tx_ol_req,
 706                                tx_offload);
 707                        /* Only allocate context descriptor if required*/
 708                        new_ctx = (ctx == IXGBE_CTX_NUM);
 709                        ctx = txq->ctx_curr;
 710                }
 711
 712                /*
 713                 * Keep track of how many descriptors are used this loop
 714                 * This will always be the number of segments + the number of
 715                 * Context descriptors required to transmit the packet
 716                 */
 717                nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
 718
 719                if (txp != NULL &&
 720                                nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
 721                        /* set RS on the previous packet in the burst */
 722                        txp->read.cmd_type_len |=
 723                                rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
 724
 725                /*
 726                 * The number of descriptors that must be allocated for a
 727                 * packet is the number of segments of that packet, plus 1
 728                 * Context Descriptor for the hardware offload, if any.
 729                 * Determine the last TX descriptor to allocate in the TX ring
 730                 * for the packet, starting from the current position (tx_id)
 731                 * in the ring.
 732                 */
 733                tx_last = (uint16_t) (tx_id + nb_used - 1);
 734
 735                /* Circular ring */
 736                if (tx_last >= txq->nb_tx_desc)
 737                        tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
 738
 739                PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
 740                           " tx_first=%u tx_last=%u",
 741                           (unsigned) txq->port_id,
 742                           (unsigned) txq->queue_id,
 743                           (unsigned) pkt_len,
 744                           (unsigned) tx_id,
 745                           (unsigned) tx_last);
 746
 747                /*
 748                 * Make sure there are enough TX descriptors available to
 749                 * transmit the entire packet.
 750                 * nb_used better be less than or equal to txq->tx_rs_thresh
 751                 */
 752                if (nb_used > txq->nb_tx_free) {
 753                        PMD_TX_LOG(DEBUG,
 754                                   "Not enough free TX descriptors "
 755                                   "nb_used=%4u nb_free=%4u "
 756                                   "(port=%d queue=%d)",
 757                                   nb_used, txq->nb_tx_free,
 758                                   txq->port_id, txq->queue_id);
 759
 760                        if (ixgbe_xmit_cleanup(txq) != 0) {
 761                                /* Could not clean any descriptors */
 762                                if (nb_tx == 0)
 763                                        return 0;
 764                                goto end_of_tx;
 765                        }
 766
 767                        /* nb_used better be <= txq->tx_rs_thresh */
 768                        if (unlikely(nb_used > txq->tx_rs_thresh)) {
 769                                PMD_TX_LOG(DEBUG,
 770                                           "The number of descriptors needed to "
 771                                           "transmit the packet exceeds the "
 772                                           "RS bit threshold. This will impact "
 773                                           "performance."
 774                                           "nb_used=%4u nb_free=%4u "
 775                                           "tx_rs_thresh=%4u. "
 776                                           "(port=%d queue=%d)",
 777                                           nb_used, txq->nb_tx_free,
 778                                           txq->tx_rs_thresh,
 779                                           txq->port_id, txq->queue_id);
 780                                /*
 781                                 * Loop here until there are enough TX
 782                                 * descriptors or until the ring cannot be
 783                                 * cleaned.
 784                                 */
 785                                while (nb_used > txq->nb_tx_free) {
 786                                        if (ixgbe_xmit_cleanup(txq) != 0) {
 787                                                /*
 788                                                 * Could not clean any
 789                                                 * descriptors
 790                                                 */
 791                                                if (nb_tx == 0)
 792                                                        return 0;
 793                                                goto end_of_tx;
 794                                        }
 795                                }
 796                        }
 797                }
 798
 799                /*
 800                 * By now there are enough free TX descriptors to transmit
 801                 * the packet.
 802                 */
 803
 804                /*
 805                 * Set common flags of all TX Data Descriptors.
 806                 *
 807                 * The following bits must be set in all Data Descriptors:
 808                 *   - IXGBE_ADVTXD_DTYP_DATA
 809                 *   - IXGBE_ADVTXD_DCMD_DEXT
 810                 *
 811                 * The following bits must be set in the first Data Descriptor
 812                 * and are ignored in the other ones:
 813                 *   - IXGBE_ADVTXD_DCMD_IFCS
 814                 *   - IXGBE_ADVTXD_MAC_1588
 815                 *   - IXGBE_ADVTXD_DCMD_VLE
 816                 *
 817                 * The following bits must only be set in the last Data
 818                 * Descriptor:
 819                 *   - IXGBE_TXD_CMD_EOP
 820                 *
 821                 * The following bits can be set in any Data Descriptor, but
 822                 * are only set in the last Data Descriptor:
 823                 *   - IXGBE_TXD_CMD_RS
 824                 */
 825                cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
 826                        IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
 827
 828#ifdef RTE_LIBRTE_IEEE1588
 829                if (ol_flags & PKT_TX_IEEE1588_TMST)
 830                        cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
 831#endif
 832
 833                olinfo_status = 0;
 834                if (tx_ol_req) {
 835
 836                        if (ol_flags & PKT_TX_TCP_SEG) {
 837                                /* when TSO is on, paylen in descriptor is the
 838                                 * not the packet len but the tcp payload len */
 839                                pkt_len -= (tx_offload.l2_len +
 840                                        tx_offload.l3_len + tx_offload.l4_len);
 841                        }
 842
 843                        /*
 844                         * Setup the TX Advanced Context Descriptor if required
 845                         */
 846                        if (new_ctx) {
 847                                volatile struct ixgbe_adv_tx_context_desc *
 848                                    ctx_txd;
 849
 850                                ctx_txd = (volatile struct
 851                                    ixgbe_adv_tx_context_desc *)
 852                                    &txr[tx_id];
 853
 854                                txn = &sw_ring[txe->next_id];
 855                                rte_prefetch0(&txn->mbuf->pool);
 856
 857                                if (txe->mbuf != NULL) {
 858                                        rte_pktmbuf_free_seg(txe->mbuf);
 859                                        txe->mbuf = NULL;
 860                                }
 861
 862                                ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
 863                                        tx_offload,
 864                                        rte_security_dynfield(tx_pkt));
 865
 866                                txe->last_id = tx_last;
 867                                tx_id = txe->next_id;
 868                                txe = txn;
 869                        }
 870
 871                        /*
 872                         * Setup the TX Advanced Data Descriptor,
 873                         * This path will go through
 874                         * whatever new/reuse the context descriptor
 875                         */
 876                        cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
 877                        olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
 878                        olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
 879                }
 880
 881                olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
 882#ifdef RTE_LIB_SECURITY
 883                if (use_ipsec)
 884                        olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
 885#endif
 886
 887                m_seg = tx_pkt;
 888                do {
 889                        txd = &txr[tx_id];
 890                        txn = &sw_ring[txe->next_id];
 891                        rte_prefetch0(&txn->mbuf->pool);
 892
 893                        if (txe->mbuf != NULL)
 894                                rte_pktmbuf_free_seg(txe->mbuf);
 895                        txe->mbuf = m_seg;
 896
 897                        /*
 898                         * Set up Transmit Data Descriptor.
 899                         */
 900                        slen = m_seg->data_len;
 901                        buf_dma_addr = rte_mbuf_data_iova(m_seg);
 902                        txd->read.buffer_addr =
 903                                rte_cpu_to_le_64(buf_dma_addr);
 904                        txd->read.cmd_type_len =
 905                                rte_cpu_to_le_32(cmd_type_len | slen);
 906                        txd->read.olinfo_status =
 907                                rte_cpu_to_le_32(olinfo_status);
 908                        txe->last_id = tx_last;
 909                        tx_id = txe->next_id;
 910                        txe = txn;
 911                        m_seg = m_seg->next;
 912                } while (m_seg != NULL);
 913
 914                /*
 915                 * The last packet data descriptor needs End Of Packet (EOP)
 916                 */
 917                cmd_type_len |= IXGBE_TXD_CMD_EOP;
 918                txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
 919                txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
 920
 921                /* Set RS bit only on threshold packets' last descriptor */
 922                if (txq->nb_tx_used >= txq->tx_rs_thresh) {
 923                        PMD_TX_LOG(DEBUG,
 924                                   "Setting RS bit on TXD id="
 925                                   "%4u (port=%d queue=%d)",
 926                                   tx_last, txq->port_id, txq->queue_id);
 927
 928                        cmd_type_len |= IXGBE_TXD_CMD_RS;
 929
 930                        /* Update txq RS bit counters */
 931                        txq->nb_tx_used = 0;
 932                        txp = NULL;
 933                } else
 934                        txp = txd;
 935
 936                txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
 937        }
 938
 939end_of_tx:
 940        /* set RS on last packet in the burst */
 941        if (txp != NULL)
 942                txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
 943
 944        rte_wmb();
 945
 946        /*
 947         * Set the Transmit Descriptor Tail (TDT)
 948         */
 949        PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
 950                   (unsigned) txq->port_id, (unsigned) txq->queue_id,
 951                   (unsigned) tx_id, (unsigned) nb_tx);
 952        IXGBE_PCI_REG_WC_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
 953        txq->tx_tail = tx_id;
 954
 955        return nb_tx;
 956}
 957
 958/*********************************************************************
 959 *
 960 *  TX prep functions
 961 *
 962 **********************************************************************/
 963uint16_t
 964ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 965{
 966        int i, ret;
 967        uint64_t ol_flags;
 968        struct rte_mbuf *m;
 969        struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
 970
 971        for (i = 0; i < nb_pkts; i++) {
 972                m = tx_pkts[i];
 973                ol_flags = m->ol_flags;
 974
 975                /**
 976                 * Check if packet meets requirements for number of segments
 977                 *
 978                 * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
 979                 *       non-TSO
 980                 */
 981
 982                if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
 983                        rte_errno = EINVAL;
 984                        return i;
 985                }
 986
 987                if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
 988                        rte_errno = ENOTSUP;
 989                        return i;
 990                }
 991
 992                /* check the size of packet */
 993                if (m->pkt_len < IXGBE_TX_MIN_PKT_LEN) {
 994                        rte_errno = EINVAL;
 995                        return i;
 996                }
 997
 998#ifdef RTE_ETHDEV_DEBUG_TX
 999                ret = rte_validate_tx_offload(m);
1000                if (ret != 0) {
1001                        rte_errno = -ret;
1002                        return i;
1003                }
1004#endif
1005                ret = rte_net_intel_cksum_prepare(m);
1006                if (ret != 0) {
1007                        rte_errno = -ret;
1008                        return i;
1009                }
1010        }
1011
1012        return i;
1013}
1014
1015/*********************************************************************
1016 *
1017 *  RX functions
1018 *
1019 **********************************************************************/
1020
1021#define IXGBE_PACKET_TYPE_ETHER                         0X00
1022#define IXGBE_PACKET_TYPE_IPV4                          0X01
1023#define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1024#define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1025#define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1026#define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1027#define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1028#define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1029#define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1030#define IXGBE_PACKET_TYPE_IPV6                          0X04
1031#define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1032#define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1033#define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1034#define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1035#define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1036#define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1037#define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1038#define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1039#define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1040#define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1041#define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1042#define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1043#define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1044#define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1045#define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1046#define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1047#define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1048#define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1049#define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1050#define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1051#define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1052#define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1053#define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1054
1055#define IXGBE_PACKET_TYPE_NVGRE                   0X00
1056#define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1057#define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1058#define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1059#define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1060#define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1061#define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1062#define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1063#define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1064#define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1065#define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1066#define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1067#define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1068#define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1069#define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1070#define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1071#define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1072#define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1073#define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1074#define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1075#define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1076#define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1077#define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1078
1079#define IXGBE_PACKET_TYPE_VXLAN                   0X80
1080#define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1081#define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1082#define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1083#define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1084#define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1085#define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1086#define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1087#define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1088#define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1089#define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1090#define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1091#define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1092#define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1093#define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1094#define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1095#define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1096#define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1097#define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1098#define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1099#define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1100#define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1101#define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1102
1103/**
1104 * Use 2 different table for normal packet and tunnel packet
1105 * to save the space.
1106 */
1107const uint32_t
1108        ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1109        [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1110        [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1111                RTE_PTYPE_L3_IPV4,
1112        [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1113                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1114        [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1115                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1116        [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1117                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1118        [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1119                RTE_PTYPE_L3_IPV4_EXT,
1120        [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1121                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1122        [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1123                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1124        [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1125                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1126        [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1127                RTE_PTYPE_L3_IPV6,
1128        [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1129                RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1130        [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1131                RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1132        [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1133                RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1134        [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1135                RTE_PTYPE_L3_IPV6_EXT,
1136        [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1137                RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1138        [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1139                RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1140        [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1141                RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1142        [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1143                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1144                RTE_PTYPE_INNER_L3_IPV6,
1145        [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1146                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1147                RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1148        [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1149                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1150        RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1151        [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1152                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1153                RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1154        [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1155                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1156                RTE_PTYPE_INNER_L3_IPV6,
1157        [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1158                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1159                RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1160        [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1161                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1162                RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1163        [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1164                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1165                RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1166        [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1167                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1168                RTE_PTYPE_INNER_L3_IPV6_EXT,
1169        [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1170                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1171                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1172        [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1173                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1174                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1175        [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1176                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1177                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1178        [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1179                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1180                RTE_PTYPE_INNER_L3_IPV6_EXT,
1181        [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1182                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1183                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1184        [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1185                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1186                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1187        [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1188                RTE_PTYPE_L2_ETHER |
1189                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1190                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1191};
1192
1193const uint32_t
1194        ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1195        [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1196                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1197                RTE_PTYPE_INNER_L2_ETHER,
1198        [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1199                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1201        [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1202                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1203                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1204        [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1205                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1206                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1207        [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1208                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1209                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1210        [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1211                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1212                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1213        [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1214                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1215                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1216        [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1217                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1218                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1219                RTE_PTYPE_INNER_L4_TCP,
1220        [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1221                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1222                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1223                RTE_PTYPE_INNER_L4_TCP,
1224        [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1225                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1226                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1227        [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1228                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1229                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1230                RTE_PTYPE_INNER_L4_TCP,
1231        [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1232                RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1233                RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1234                RTE_PTYPE_INNER_L3_IPV4,
1235        [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1236                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1237                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1238                RTE_PTYPE_INNER_L4_UDP,
1239        [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1240                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1241                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1242                RTE_PTYPE_INNER_L4_UDP,
1243        [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1244                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1245                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1246                RTE_PTYPE_INNER_L4_SCTP,
1247        [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1248                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1249                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1250        [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1251                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1252                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1253                RTE_PTYPE_INNER_L4_UDP,
1254        [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1255                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1256                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1257                RTE_PTYPE_INNER_L4_SCTP,
1258        [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1259                RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1260                RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1261                RTE_PTYPE_INNER_L3_IPV4,
1262        [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1263                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1264                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1265                RTE_PTYPE_INNER_L4_SCTP,
1266        [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1267                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1268                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1269                RTE_PTYPE_INNER_L4_SCTP,
1270        [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1271                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1272                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1273                RTE_PTYPE_INNER_L4_TCP,
1274        [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1275                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1276                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1277                RTE_PTYPE_INNER_L4_UDP,
1278
1279        [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1280                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1281                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1282        [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1283                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1284                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1285                RTE_PTYPE_INNER_L3_IPV4,
1286        [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1287                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1288                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1289                RTE_PTYPE_INNER_L3_IPV4_EXT,
1290        [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1291                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1292                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1293                RTE_PTYPE_INNER_L3_IPV6,
1294        [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1295                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1296                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1297                RTE_PTYPE_INNER_L3_IPV4,
1298        [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1299                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1300                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1301                RTE_PTYPE_INNER_L3_IPV6_EXT,
1302        [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1303                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1304                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1305                RTE_PTYPE_INNER_L3_IPV4,
1306        [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1307                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1308                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1309                RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1310        [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1311                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1312                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1313                RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1314        [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1315                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1316                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1317                RTE_PTYPE_INNER_L3_IPV4,
1318        [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1319                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1320                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1321                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1322        [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1323                RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1324                RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1325                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1326        [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1327                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1328                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1329                RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1330        [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1331                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1332                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1333                RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1334        [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1335                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1336                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1337                RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1338        [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1339                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1340                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1341                RTE_PTYPE_INNER_L3_IPV4,
1342        [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1343                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1344                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1345                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1346        [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1347                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1348                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1349                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1350        [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1351                RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1352                RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1353                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1354        [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1355                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1356                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1357                RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1358        [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1359                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1360                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1361                RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1362        [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1363                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1364                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1365                RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1366        [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1367                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1368                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1369                RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1370};
1371
1372static int
1373ixgbe_monitor_callback(const uint64_t value,
1374                const uint64_t arg[RTE_POWER_MONITOR_OPAQUE_SZ] __rte_unused)
1375{
1376        const uint64_t m = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD);
1377        /*
1378         * we expect the DD bit to be set to 1 if this descriptor was already
1379         * written to.
1380         */
1381        return (value & m) == m ? -1 : 0;
1382}
1383
1384int
1385ixgbe_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1386{
1387        volatile union ixgbe_adv_rx_desc *rxdp;
1388        struct ixgbe_rx_queue *rxq = rx_queue;
1389        uint16_t desc;
1390
1391        desc = rxq->rx_tail;
1392        rxdp = &rxq->rx_ring[desc];
1393        /* watch for changes in status bit */
1394        pmc->addr = &rxdp->wb.upper.status_error;
1395
1396        /* comparison callback */
1397        pmc->fn = ixgbe_monitor_callback;
1398
1399        /* the registers are 32-bit */
1400        pmc->size = sizeof(uint32_t);
1401
1402        return 0;
1403}
1404
1405/* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1406static inline uint32_t
1407ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1408{
1409
1410        if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1411                return RTE_PTYPE_UNKNOWN;
1412
1413        pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1414
1415        /* For tunnel packet */
1416        if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1417                /* Remove the tunnel bit to save the space. */
1418                pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1419                return ptype_table_tn[pkt_info];
1420        }
1421
1422        /**
1423         * For x550, if it's not tunnel,
1424         * tunnel type bit should be set to 0.
1425         * Reuse 82599's mask.
1426         */
1427        pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1428
1429        return ptype_table[pkt_info];
1430}
1431
1432static inline uint64_t
1433ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1434{
1435        static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1436                0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1437                0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1438                PKT_RX_RSS_HASH, 0, 0, 0,
1439                0, 0, 0,  PKT_RX_FDIR,
1440        };
1441#ifdef RTE_LIBRTE_IEEE1588
1442        static uint64_t ip_pkt_etqf_map[8] = {
1443                0, 0, 0, PKT_RX_IEEE1588_PTP,
1444                0, 0, 0, 0,
1445        };
1446
1447        if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1448                return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1449                                ip_rss_types_map[pkt_info & 0XF];
1450        else
1451                return ip_rss_types_map[pkt_info & 0XF];
1452#else
1453        return ip_rss_types_map[pkt_info & 0XF];
1454#endif
1455}
1456
1457static inline uint64_t
1458rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1459{
1460        uint64_t pkt_flags;
1461
1462        /*
1463         * Check if VLAN present only.
1464         * Do not check whether L3/L4 rx checksum done by NIC or not,
1465         * That can be found from rte_eth_rxmode.offloads flag
1466         */
1467        pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1468
1469#ifdef RTE_LIBRTE_IEEE1588
1470        if (rx_status & IXGBE_RXD_STAT_TMST)
1471                pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1472#endif
1473        return pkt_flags;
1474}
1475
1476static inline uint64_t
1477rx_desc_error_to_pkt_flags(uint32_t rx_status, uint16_t pkt_info,
1478                           uint8_t rx_udp_csum_zero_err)
1479{
1480        uint64_t pkt_flags;
1481
1482        /*
1483         * Bit 31: IPE, IPv4 checksum error
1484         * Bit 30: L4I, L4I integrity error
1485         */
1486        static uint64_t error_to_pkt_flags_map[4] = {
1487                PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1488                PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1489                PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1490                PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1491        };
1492        pkt_flags = error_to_pkt_flags_map[(rx_status >>
1493                IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1494
1495        /* Mask out the bad UDP checksum error if the hardware has UDP zero
1496         * checksum error issue, so that the software application will then
1497         * have to recompute the checksum itself if needed.
1498         */
1499        if ((rx_status & IXGBE_RXDADV_ERR_TCPE) &&
1500            (pkt_info & IXGBE_RXDADV_PKTTYPE_UDP) &&
1501            rx_udp_csum_zero_err)
1502                pkt_flags &= ~PKT_RX_L4_CKSUM_BAD;
1503
1504        if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1505            (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1506                pkt_flags |= PKT_RX_OUTER_IP_CKSUM_BAD;
1507        }
1508
1509#ifdef RTE_LIB_SECURITY
1510        if (rx_status & IXGBE_RXD_STAT_SECP) {
1511                pkt_flags |= PKT_RX_SEC_OFFLOAD;
1512                if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1513                        pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1514        }
1515#endif
1516
1517        return pkt_flags;
1518}
1519
1520/*
1521 * LOOK_AHEAD defines how many desc statuses to check beyond the
1522 * current descriptor.
1523 * It must be a pound define for optimal performance.
1524 * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1525 * function only works with LOOK_AHEAD=8.
1526 */
1527#define LOOK_AHEAD 8
1528#if (LOOK_AHEAD != 8)
1529#error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1530#endif
1531static inline int
1532ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1533{
1534        volatile union ixgbe_adv_rx_desc *rxdp;
1535        struct ixgbe_rx_entry *rxep;
1536        struct rte_mbuf *mb;
1537        uint16_t pkt_len;
1538        uint64_t pkt_flags;
1539        int nb_dd;
1540        uint32_t s[LOOK_AHEAD];
1541        uint32_t pkt_info[LOOK_AHEAD];
1542        int i, j, nb_rx = 0;
1543        uint32_t status;
1544        uint64_t vlan_flags = rxq->vlan_flags;
1545
1546        /* get references to current descriptor and S/W ring entry */
1547        rxdp = &rxq->rx_ring[rxq->rx_tail];
1548        rxep = &rxq->sw_ring[rxq->rx_tail];
1549
1550        status = rxdp->wb.upper.status_error;
1551        /* check to make sure there is at least 1 packet to receive */
1552        if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1553                return 0;
1554
1555        /*
1556         * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1557         * reference packets that are ready to be received.
1558         */
1559        for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1560             i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1561                /* Read desc statuses backwards to avoid race condition */
1562                for (j = 0; j < LOOK_AHEAD; j++)
1563                        s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1564
1565                rte_smp_rmb();
1566
1567                /* Compute how many status bits were set */
1568                for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1569                                (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1570                        ;
1571
1572                for (j = 0; j < nb_dd; j++)
1573                        pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1574                                                       lo_dword.data);
1575
1576                nb_rx += nb_dd;
1577
1578                /* Translate descriptor info to mbuf format */
1579                for (j = 0; j < nb_dd; ++j) {
1580                        mb = rxep[j].mbuf;
1581                        pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1582                                  rxq->crc_len;
1583                        mb->data_len = pkt_len;
1584                        mb->pkt_len = pkt_len;
1585                        mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1586
1587                        /* convert descriptor fields to rte mbuf flags */
1588                        pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1589                                vlan_flags);
1590                        pkt_flags |= rx_desc_error_to_pkt_flags(s[j],
1591                                        (uint16_t)pkt_info[j],
1592                                        rxq->rx_udp_csum_zero_err);
1593                        pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1594                                        ((uint16_t)pkt_info[j]);
1595                        mb->ol_flags = pkt_flags;
1596                        mb->packet_type =
1597                                ixgbe_rxd_pkt_info_to_pkt_type
1598                                        (pkt_info[j], rxq->pkt_type_mask);
1599
1600                        if (likely(pkt_flags & PKT_RX_RSS_HASH))
1601                                mb->hash.rss = rte_le_to_cpu_32(
1602                                    rxdp[j].wb.lower.hi_dword.rss);
1603                        else if (pkt_flags & PKT_RX_FDIR) {
1604                                mb->hash.fdir.hash = rte_le_to_cpu_16(
1605                                    rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1606                                    IXGBE_ATR_HASH_MASK;
1607                                mb->hash.fdir.id = rte_le_to_cpu_16(
1608                                    rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1609                        }
1610                }
1611
1612                /* Move mbuf pointers from the S/W ring to the stage */
1613                for (j = 0; j < LOOK_AHEAD; ++j) {
1614                        rxq->rx_stage[i + j] = rxep[j].mbuf;
1615                }
1616
1617                /* stop if all requested packets could not be received */
1618                if (nb_dd != LOOK_AHEAD)
1619                        break;
1620        }
1621
1622        /* clear software ring entries so we can cleanup correctly */
1623        for (i = 0; i < nb_rx; ++i) {
1624                rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1625        }
1626
1627
1628        return nb_rx;
1629}
1630
1631static inline int
1632ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1633{
1634        volatile union ixgbe_adv_rx_desc *rxdp;
1635        struct ixgbe_rx_entry *rxep;
1636        struct rte_mbuf *mb;
1637        uint16_t alloc_idx;
1638        __le64 dma_addr;
1639        int diag, i;
1640
1641        /* allocate buffers in bulk directly into the S/W ring */
1642        alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1643        rxep = &rxq->sw_ring[alloc_idx];
1644        diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1645                                    rxq->rx_free_thresh);
1646        if (unlikely(diag != 0))
1647                return -ENOMEM;
1648
1649        rxdp = &rxq->rx_ring[alloc_idx];
1650        for (i = 0; i < rxq->rx_free_thresh; ++i) {
1651                /* populate the static rte mbuf fields */
1652                mb = rxep[i].mbuf;
1653                if (reset_mbuf) {
1654                        mb->port = rxq->port_id;
1655                }
1656
1657                rte_mbuf_refcnt_set(mb, 1);
1658                mb->data_off = RTE_PKTMBUF_HEADROOM;
1659
1660                /* populate the descriptors */
1661                dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1662                rxdp[i].read.hdr_addr = 0;
1663                rxdp[i].read.pkt_addr = dma_addr;
1664        }
1665
1666        /* update state of internal queue structure */
1667        rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1668        if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1669                rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1670
1671        /* no errors */
1672        return 0;
1673}
1674
1675static inline uint16_t
1676ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1677                         uint16_t nb_pkts)
1678{
1679        struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1680        int i;
1681
1682        /* how many packets are ready to return? */
1683        nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1684
1685        /* copy mbuf pointers to the application's packet list */
1686        for (i = 0; i < nb_pkts; ++i)
1687                rx_pkts[i] = stage[i];
1688
1689        /* update internal queue state */
1690        rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1691        rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1692
1693        return nb_pkts;
1694}
1695
1696static inline uint16_t
1697rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1698             uint16_t nb_pkts)
1699{
1700        struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1701        uint16_t nb_rx = 0;
1702
1703        /* Any previously recv'd pkts will be returned from the Rx stage */
1704        if (rxq->rx_nb_avail)
1705                return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1706
1707        /* Scan the H/W ring for packets to receive */
1708        nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1709
1710        /* update internal queue state */
1711        rxq->rx_next_avail = 0;
1712        rxq->rx_nb_avail = nb_rx;
1713        rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1714
1715        /* if required, allocate new buffers to replenish descriptors */
1716        if (rxq->rx_tail > rxq->rx_free_trigger) {
1717                uint16_t cur_free_trigger = rxq->rx_free_trigger;
1718
1719                if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1720                        int i, j;
1721
1722                        PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1723                                   "queue_id=%u", (unsigned) rxq->port_id,
1724                                   (unsigned) rxq->queue_id);
1725
1726                        rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1727                                rxq->rx_free_thresh;
1728
1729                        /*
1730                         * Need to rewind any previous receives if we cannot
1731                         * allocate new buffers to replenish the old ones.
1732                         */
1733                        rxq->rx_nb_avail = 0;
1734                        rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1735                        for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1736                                rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1737
1738                        return 0;
1739                }
1740
1741                /* update tail pointer */
1742                rte_wmb();
1743                IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr,
1744                                            cur_free_trigger);
1745        }
1746
1747        if (rxq->rx_tail >= rxq->nb_rx_desc)
1748                rxq->rx_tail = 0;
1749
1750        /* received any packets this loop? */
1751        if (rxq->rx_nb_avail)
1752                return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1753
1754        return 0;
1755}
1756
1757/* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1758uint16_t
1759ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1760                           uint16_t nb_pkts)
1761{
1762        uint16_t nb_rx;
1763
1764        if (unlikely(nb_pkts == 0))
1765                return 0;
1766
1767        if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1768                return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1769
1770        /* request is relatively large, chunk it up */
1771        nb_rx = 0;
1772        while (nb_pkts) {
1773                uint16_t ret, n;
1774
1775                n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1776                ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1777                nb_rx = (uint16_t)(nb_rx + ret);
1778                nb_pkts = (uint16_t)(nb_pkts - ret);
1779                if (ret < n)
1780                        break;
1781        }
1782
1783        return nb_rx;
1784}
1785
1786uint16_t
1787ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1788                uint16_t nb_pkts)
1789{
1790        struct ixgbe_rx_queue *rxq;
1791        volatile union ixgbe_adv_rx_desc *rx_ring;
1792        volatile union ixgbe_adv_rx_desc *rxdp;
1793        struct ixgbe_rx_entry *sw_ring;
1794        struct ixgbe_rx_entry *rxe;
1795        struct rte_mbuf *rxm;
1796        struct rte_mbuf *nmb;
1797        union ixgbe_adv_rx_desc rxd;
1798        uint64_t dma_addr;
1799        uint32_t staterr;
1800        uint32_t pkt_info;
1801        uint16_t pkt_len;
1802        uint16_t rx_id;
1803        uint16_t nb_rx;
1804        uint16_t nb_hold;
1805        uint64_t pkt_flags;
1806        uint64_t vlan_flags;
1807
1808        nb_rx = 0;
1809        nb_hold = 0;
1810        rxq = rx_queue;
1811        rx_id = rxq->rx_tail;
1812        rx_ring = rxq->rx_ring;
1813        sw_ring = rxq->sw_ring;
1814        vlan_flags = rxq->vlan_flags;
1815        while (nb_rx < nb_pkts) {
1816                /*
1817                 * The order of operations here is important as the DD status
1818                 * bit must not be read after any other descriptor fields.
1819                 * rx_ring and rxdp are pointing to volatile data so the order
1820                 * of accesses cannot be reordered by the compiler. If they were
1821                 * not volatile, they could be reordered which could lead to
1822                 * using invalid descriptor fields when read from rxd.
1823                 */
1824                rxdp = &rx_ring[rx_id];
1825                staterr = rxdp->wb.upper.status_error;
1826                if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1827                        break;
1828                rxd = *rxdp;
1829
1830                /*
1831                 * End of packet.
1832                 *
1833                 * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1834                 * is likely to be invalid and to be dropped by the various
1835                 * validation checks performed by the network stack.
1836                 *
1837                 * Allocate a new mbuf to replenish the RX ring descriptor.
1838                 * If the allocation fails:
1839                 *    - arrange for that RX descriptor to be the first one
1840                 *      being parsed the next time the receive function is
1841                 *      invoked [on the same queue].
1842                 *
1843                 *    - Stop parsing the RX ring and return immediately.
1844                 *
1845                 * This policy do not drop the packet received in the RX
1846                 * descriptor for which the allocation of a new mbuf failed.
1847                 * Thus, it allows that packet to be later retrieved if
1848                 * mbuf have been freed in the mean time.
1849                 * As a side effect, holding RX descriptors instead of
1850                 * systematically giving them back to the NIC may lead to
1851                 * RX ring exhaustion situations.
1852                 * However, the NIC can gracefully prevent such situations
1853                 * to happen by sending specific "back-pressure" flow control
1854                 * frames to its peer(s).
1855                 */
1856                PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1857                           "ext_err_stat=0x%08x pkt_len=%u",
1858                           (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1859                           (unsigned) rx_id, (unsigned) staterr,
1860                           (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1861
1862                nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1863                if (nmb == NULL) {
1864                        PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1865                                   "queue_id=%u", (unsigned) rxq->port_id,
1866                                   (unsigned) rxq->queue_id);
1867                        rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1868                        break;
1869                }
1870
1871                nb_hold++;
1872                rxe = &sw_ring[rx_id];
1873                rx_id++;
1874                if (rx_id == rxq->nb_rx_desc)
1875                        rx_id = 0;
1876
1877                /* Prefetch next mbuf while processing current one. */
1878                rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1879
1880                /*
1881                 * When next RX descriptor is on a cache-line boundary,
1882                 * prefetch the next 4 RX descriptors and the next 8 pointers
1883                 * to mbufs.
1884                 */
1885                if ((rx_id & 0x3) == 0) {
1886                        rte_ixgbe_prefetch(&rx_ring[rx_id]);
1887                        rte_ixgbe_prefetch(&sw_ring[rx_id]);
1888                }
1889
1890                rxm = rxe->mbuf;
1891                rxe->mbuf = nmb;
1892                dma_addr =
1893                        rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1894                rxdp->read.hdr_addr = 0;
1895                rxdp->read.pkt_addr = dma_addr;
1896
1897                /*
1898                 * Initialize the returned mbuf.
1899                 * 1) setup generic mbuf fields:
1900                 *    - number of segments,
1901                 *    - next segment,
1902                 *    - packet length,
1903                 *    - RX port identifier.
1904                 * 2) integrate hardware offload data, if any:
1905                 *    - RSS flag & hash,
1906                 *    - IP checksum flag,
1907                 *    - VLAN TCI, if any,
1908                 *    - error flags.
1909                 */
1910                pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1911                                      rxq->crc_len);
1912                rxm->data_off = RTE_PKTMBUF_HEADROOM;
1913                rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1914                rxm->nb_segs = 1;
1915                rxm->next = NULL;
1916                rxm->pkt_len = pkt_len;
1917                rxm->data_len = pkt_len;
1918                rxm->port = rxq->port_id;
1919
1920                pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1921                /* Only valid if PKT_RX_VLAN set in pkt_flags */
1922                rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1923
1924                pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1925                pkt_flags = pkt_flags |
1926                        rx_desc_error_to_pkt_flags(staterr, (uint16_t)pkt_info,
1927                                                   rxq->rx_udp_csum_zero_err);
1928                pkt_flags = pkt_flags |
1929                        ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1930                rxm->ol_flags = pkt_flags;
1931                rxm->packet_type =
1932                        ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1933                                                       rxq->pkt_type_mask);
1934
1935                if (likely(pkt_flags & PKT_RX_RSS_HASH))
1936                        rxm->hash.rss = rte_le_to_cpu_32(
1937                                                rxd.wb.lower.hi_dword.rss);
1938                else if (pkt_flags & PKT_RX_FDIR) {
1939                        rxm->hash.fdir.hash = rte_le_to_cpu_16(
1940                                        rxd.wb.lower.hi_dword.csum_ip.csum) &
1941                                        IXGBE_ATR_HASH_MASK;
1942                        rxm->hash.fdir.id = rte_le_to_cpu_16(
1943                                        rxd.wb.lower.hi_dword.csum_ip.ip_id);
1944                }
1945                /*
1946                 * Store the mbuf address into the next entry of the array
1947                 * of returned packets.
1948                 */
1949                rx_pkts[nb_rx++] = rxm;
1950        }
1951        rxq->rx_tail = rx_id;
1952
1953        /*
1954         * If the number of free RX descriptors is greater than the RX free
1955         * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1956         * register.
1957         * Update the RDT with the value of the last processed RX descriptor
1958         * minus 1, to guarantee that the RDT register is never equal to the
1959         * RDH register, which creates a "full" ring situtation from the
1960         * hardware point of view...
1961         */
1962        nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1963        if (nb_hold > rxq->rx_free_thresh) {
1964                PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1965                           "nb_hold=%u nb_rx=%u",
1966                           (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1967                           (unsigned) rx_id, (unsigned) nb_hold,
1968                           (unsigned) nb_rx);
1969                rx_id = (uint16_t) ((rx_id == 0) ?
1970                                     (rxq->nb_rx_desc - 1) : (rx_id - 1));
1971                IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id);
1972                nb_hold = 0;
1973        }
1974        rxq->nb_rx_hold = nb_hold;
1975        return nb_rx;
1976}
1977
1978/**
1979 * Detect an RSC descriptor.
1980 */
1981static inline uint32_t
1982ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1983{
1984        return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1985                IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1986}
1987
1988/**
1989 * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1990 *
1991 * Fill the following info in the HEAD buffer of the Rx cluster:
1992 *    - RX port identifier
1993 *    - hardware offload data, if any:
1994 *      - RSS flag & hash
1995 *      - IP checksum flag
1996 *      - VLAN TCI, if any
1997 *      - error flags
1998 * @head HEAD of the packet cluster
1999 * @desc HW descriptor to get data from
2000 * @rxq Pointer to the Rx queue
2001 */
2002static inline void
2003ixgbe_fill_cluster_head_buf(
2004        struct rte_mbuf *head,
2005        union ixgbe_adv_rx_desc *desc,
2006        struct ixgbe_rx_queue *rxq,
2007        uint32_t staterr)
2008{
2009        uint32_t pkt_info;
2010        uint64_t pkt_flags;
2011
2012        head->port = rxq->port_id;
2013
2014        /* The vlan_tci field is only valid when PKT_RX_VLAN is
2015         * set in the pkt_flags field.
2016         */
2017        head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
2018        pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
2019        pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
2020        pkt_flags |= rx_desc_error_to_pkt_flags(staterr, (uint16_t)pkt_info,
2021                                                rxq->rx_udp_csum_zero_err);
2022        pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
2023        head->ol_flags = pkt_flags;
2024        head->packet_type =
2025                ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
2026
2027        if (likely(pkt_flags & PKT_RX_RSS_HASH))
2028                head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
2029        else if (pkt_flags & PKT_RX_FDIR) {
2030                head->hash.fdir.hash =
2031                        rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
2032                                                          & IXGBE_ATR_HASH_MASK;
2033                head->hash.fdir.id =
2034                        rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
2035        }
2036}
2037
2038/**
2039 * ixgbe_recv_pkts_lro - receive handler for and LRO case.
2040 *
2041 * @rx_queue Rx queue handle
2042 * @rx_pkts table of received packets
2043 * @nb_pkts size of rx_pkts table
2044 * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
2045 *
2046 * Handles the Rx HW ring completions when RSC feature is configured. Uses an
2047 * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
2048 *
2049 * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
2050 * 1) When non-EOP RSC completion arrives:
2051 *    a) Update the HEAD of the current RSC aggregation cluster with the new
2052 *       segment's data length.
2053 *    b) Set the "next" pointer of the current segment to point to the segment
2054 *       at the NEXTP index.
2055 *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2056 *       in the sw_rsc_ring.
2057 * 2) When EOP arrives we just update the cluster's total length and offload
2058 *    flags and deliver the cluster up to the upper layers. In our case - put it
2059 *    in the rx_pkts table.
2060 *
2061 * Returns the number of received packets/clusters (according to the "bulk
2062 * receive" interface).
2063 */
2064static inline uint16_t
2065ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2066                    bool bulk_alloc)
2067{
2068        struct ixgbe_rx_queue *rxq = rx_queue;
2069        volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2070        struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2071        struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2072        uint16_t rx_id = rxq->rx_tail;
2073        uint16_t nb_rx = 0;
2074        uint16_t nb_hold = rxq->nb_rx_hold;
2075        uint16_t prev_id = rxq->rx_tail;
2076
2077        while (nb_rx < nb_pkts) {
2078                bool eop;
2079                struct ixgbe_rx_entry *rxe;
2080                struct ixgbe_scattered_rx_entry *sc_entry;
2081                struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
2082                struct ixgbe_rx_entry *next_rxe = NULL;
2083                struct rte_mbuf *first_seg;
2084                struct rte_mbuf *rxm;
2085                struct rte_mbuf *nmb = NULL;
2086                union ixgbe_adv_rx_desc rxd;
2087                uint16_t data_len;
2088                uint16_t next_id;
2089                volatile union ixgbe_adv_rx_desc *rxdp;
2090                uint32_t staterr;
2091
2092next_desc:
2093                /*
2094                 * The code in this whole file uses the volatile pointer to
2095                 * ensure the read ordering of the status and the rest of the
2096                 * descriptor fields (on the compiler level only!!!). This is so
2097                 * UGLY - why not to just use the compiler barrier instead? DPDK
2098                 * even has the rte_compiler_barrier() for that.
2099                 *
2100                 * But most importantly this is just wrong because this doesn't
2101                 * ensure memory ordering in a general case at all. For
2102                 * instance, DPDK is supposed to work on Power CPUs where
2103                 * compiler barrier may just not be enough!
2104                 *
2105                 * I tried to write only this function properly to have a
2106                 * starting point (as a part of an LRO/RSC series) but the
2107                 * compiler cursed at me when I tried to cast away the
2108                 * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2109                 * keeping it the way it is for now.
2110                 *
2111                 * The code in this file is broken in so many other places and
2112                 * will just not work on a big endian CPU anyway therefore the
2113                 * lines below will have to be revisited together with the rest
2114                 * of the ixgbe PMD.
2115                 *
2116                 * TODO:
2117                 *    - Get rid of "volatile" and let the compiler do its job.
2118                 *    - Use the proper memory barrier (rte_rmb()) to ensure the
2119                 *      memory ordering below.
2120                 */
2121                rxdp = &rx_ring[rx_id];
2122                staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2123
2124                if (!(staterr & IXGBE_RXDADV_STAT_DD))
2125                        break;
2126
2127                rxd = *rxdp;
2128
2129                PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2130                                  "staterr=0x%x data_len=%u",
2131                           rxq->port_id, rxq->queue_id, rx_id, staterr,
2132                           rte_le_to_cpu_16(rxd.wb.upper.length));
2133
2134                if (!bulk_alloc) {
2135                        nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2136                        if (nmb == NULL) {
2137                                PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2138                                                  "port_id=%u queue_id=%u",
2139                                           rxq->port_id, rxq->queue_id);
2140
2141                                rte_eth_devices[rxq->port_id].data->
2142                                                        rx_mbuf_alloc_failed++;
2143                                break;
2144                        }
2145                } else if (nb_hold > rxq->rx_free_thresh) {
2146                        uint16_t next_rdt = rxq->rx_free_trigger;
2147
2148                        if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2149                                rte_wmb();
2150                                IXGBE_PCI_REG_WC_WRITE_RELAXED(
2151                                                        rxq->rdt_reg_addr,
2152                                                        next_rdt);
2153                                nb_hold -= rxq->rx_free_thresh;
2154                        } else {
2155                                PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2156                                                  "port_id=%u queue_id=%u",
2157                                           rxq->port_id, rxq->queue_id);
2158
2159                                rte_eth_devices[rxq->port_id].data->
2160                                                        rx_mbuf_alloc_failed++;
2161                                break;
2162                        }
2163                }
2164
2165                nb_hold++;
2166                rxe = &sw_ring[rx_id];
2167                eop = staterr & IXGBE_RXDADV_STAT_EOP;
2168
2169                next_id = rx_id + 1;
2170                if (next_id == rxq->nb_rx_desc)
2171                        next_id = 0;
2172
2173                /* Prefetch next mbuf while processing current one. */
2174                rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2175
2176                /*
2177                 * When next RX descriptor is on a cache-line boundary,
2178                 * prefetch the next 4 RX descriptors and the next 4 pointers
2179                 * to mbufs.
2180                 */
2181                if ((next_id & 0x3) == 0) {
2182                        rte_ixgbe_prefetch(&rx_ring[next_id]);
2183                        rte_ixgbe_prefetch(&sw_ring[next_id]);
2184                }
2185
2186                rxm = rxe->mbuf;
2187
2188                if (!bulk_alloc) {
2189                        __le64 dma =
2190                          rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2191                        /*
2192                         * Update RX descriptor with the physical address of the
2193                         * new data buffer of the new allocated mbuf.
2194                         */
2195                        rxe->mbuf = nmb;
2196
2197                        rxm->data_off = RTE_PKTMBUF_HEADROOM;
2198                        rxdp->read.hdr_addr = 0;
2199                        rxdp->read.pkt_addr = dma;
2200                } else
2201                        rxe->mbuf = NULL;
2202
2203                /*
2204                 * Set data length & data buffer address of mbuf.
2205                 */
2206                data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2207                rxm->data_len = data_len;
2208
2209                if (!eop) {
2210                        uint16_t nextp_id;
2211                        /*
2212                         * Get next descriptor index:
2213                         *  - For RSC it's in the NEXTP field.
2214                         *  - For a scattered packet - it's just a following
2215                         *    descriptor.
2216                         */
2217                        if (ixgbe_rsc_count(&rxd))
2218                                nextp_id =
2219                                        (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2220                                                       IXGBE_RXDADV_NEXTP_SHIFT;
2221                        else
2222                                nextp_id = next_id;
2223
2224                        next_sc_entry = &sw_sc_ring[nextp_id];
2225                        next_rxe = &sw_ring[nextp_id];
2226                        rte_ixgbe_prefetch(next_rxe);
2227                }
2228
2229                sc_entry = &sw_sc_ring[rx_id];
2230                first_seg = sc_entry->fbuf;
2231                sc_entry->fbuf = NULL;
2232
2233                /*
2234                 * If this is the first buffer of the received packet,
2235                 * set the pointer to the first mbuf of the packet and
2236                 * initialize its context.
2237                 * Otherwise, update the total length and the number of segments
2238                 * of the current scattered packet, and update the pointer to
2239                 * the last mbuf of the current packet.
2240                 */
2241                if (first_seg == NULL) {
2242                        first_seg = rxm;
2243                        first_seg->pkt_len = data_len;
2244                        first_seg->nb_segs = 1;
2245                } else {
2246                        first_seg->pkt_len += data_len;
2247                        first_seg->nb_segs++;
2248                }
2249
2250                prev_id = rx_id;
2251                rx_id = next_id;
2252
2253                /*
2254                 * If this is not the last buffer of the received packet, update
2255                 * the pointer to the first mbuf at the NEXTP entry in the
2256                 * sw_sc_ring and continue to parse the RX ring.
2257                 */
2258                if (!eop && next_rxe) {
2259                        rxm->next = next_rxe->mbuf;
2260                        next_sc_entry->fbuf = first_seg;
2261                        goto next_desc;
2262                }
2263
2264                /* Initialize the first mbuf of the returned packet */
2265                ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2266
2267                /*
2268                 * Deal with the case, when HW CRC srip is disabled.
2269                 * That can't happen when LRO is enabled, but still could
2270                 * happen for scattered RX mode.
2271                 */
2272                first_seg->pkt_len -= rxq->crc_len;
2273                if (unlikely(rxm->data_len <= rxq->crc_len)) {
2274                        struct rte_mbuf *lp;
2275
2276                        for (lp = first_seg; lp->next != rxm; lp = lp->next)
2277                                ;
2278
2279                        first_seg->nb_segs--;
2280                        lp->data_len -= rxq->crc_len - rxm->data_len;
2281                        lp->next = NULL;
2282                        rte_pktmbuf_free_seg(rxm);
2283                } else
2284                        rxm->data_len -= rxq->crc_len;
2285
2286                /* Prefetch data of first segment, if configured to do so. */
2287                rte_packet_prefetch((char *)first_seg->buf_addr +
2288                        first_seg->data_off);
2289
2290                /*
2291                 * Store the mbuf address into the next entry of the array
2292                 * of returned packets.
2293                 */
2294                rx_pkts[nb_rx++] = first_seg;
2295        }
2296
2297        /*
2298         * Record index of the next RX descriptor to probe.
2299         */
2300        rxq->rx_tail = rx_id;
2301
2302        /*
2303         * If the number of free RX descriptors is greater than the RX free
2304         * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2305         * register.
2306         * Update the RDT with the value of the last processed RX descriptor
2307         * minus 1, to guarantee that the RDT register is never equal to the
2308         * RDH register, which creates a "full" ring situtation from the
2309         * hardware point of view...
2310         */
2311        if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2312                PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2313                           "nb_hold=%u nb_rx=%u",
2314                           rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2315
2316                rte_wmb();
2317                IXGBE_PCI_REG_WC_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2318                nb_hold = 0;
2319        }
2320
2321        rxq->nb_rx_hold = nb_hold;
2322        return nb_rx;
2323}
2324
2325uint16_t
2326ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2327                                 uint16_t nb_pkts)
2328{
2329        return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2330}
2331
2332uint16_t
2333ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2334                               uint16_t nb_pkts)
2335{
2336        return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2337}
2338
2339/*********************************************************************
2340 *
2341 *  Queue management functions
2342 *
2343 **********************************************************************/
2344
2345static void __rte_cold
2346ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2347{
2348        unsigned i;
2349
2350        if (txq->sw_ring != NULL) {
2351                for (i = 0; i < txq->nb_tx_desc; i++) {
2352                        if (txq->sw_ring[i].mbuf != NULL) {
2353                                rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2354                                txq->sw_ring[i].mbuf = NULL;
2355                        }
2356                }
2357        }
2358}
2359
2360static int
2361ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2362{
2363        struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2364        uint16_t i, tx_last, tx_id;
2365        uint16_t nb_tx_free_last;
2366        uint16_t nb_tx_to_clean;
2367        uint32_t pkt_cnt;
2368
2369        /* Start free mbuf from the next of tx_tail */
2370        tx_last = txq->tx_tail;
2371        tx_id  = swr_ring[tx_last].next_id;
2372
2373        if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2374                return 0;
2375
2376        nb_tx_to_clean = txq->nb_tx_free;
2377        nb_tx_free_last = txq->nb_tx_free;
2378        if (!free_cnt)
2379                free_cnt = txq->nb_tx_desc;
2380
2381        /* Loop through swr_ring to count the amount of
2382         * freeable mubfs and packets.
2383         */
2384        for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2385                for (i = 0; i < nb_tx_to_clean &&
2386                        pkt_cnt < free_cnt &&
2387                        tx_id != tx_last; i++) {
2388                        if (swr_ring[tx_id].mbuf != NULL) {
2389                                rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2390                                swr_ring[tx_id].mbuf = NULL;
2391
2392                                /*
2393                                 * last segment in the packet,
2394                                 * increment packet count
2395                                 */
2396                                pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2397                        }
2398
2399                        tx_id = swr_ring[tx_id].next_id;
2400                }
2401
2402                if (txq->tx_rs_thresh > txq->nb_tx_desc -
2403                        txq->nb_tx_free || tx_id == tx_last)
2404                        break;
2405
2406                if (pkt_cnt < free_cnt) {
2407                        if (ixgbe_xmit_cleanup(txq))
2408                                break;
2409
2410                        nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2411                        nb_tx_free_last = txq->nb_tx_free;
2412                }
2413        }
2414
2415        return (int)pkt_cnt;
2416}
2417
2418static int
2419ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2420                        uint32_t free_cnt)
2421{
2422        int i, n, cnt;
2423
2424        if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2425                free_cnt = txq->nb_tx_desc;
2426
2427        cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2428
2429        for (i = 0; i < cnt; i += n) {
2430                if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2431                        break;
2432
2433                n = ixgbe_tx_free_bufs(txq);
2434
2435                if (n == 0)
2436                        break;
2437        }
2438
2439        return i;
2440}
2441
2442static int
2443ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2444                        uint32_t free_cnt __rte_unused)
2445{
2446        return -ENOTSUP;
2447}
2448
2449int
2450ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2451{
2452        struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2453        if (txq->offloads == 0 &&
2454#ifdef RTE_LIB_SECURITY
2455                        !(txq->using_ipsec) &&
2456#endif
2457                        txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2458                if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2459                                rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2460                                (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2461                                        txq->sw_ring_v != NULL)) {
2462                        return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2463                } else {
2464                        return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2465                }
2466        }
2467
2468        return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2469}
2470
2471static void __rte_cold
2472ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2473{
2474        if (txq != NULL &&
2475            txq->sw_ring != NULL)
2476                rte_free(txq->sw_ring);
2477}
2478
2479static void __rte_cold
2480ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2481{
2482        if (txq != NULL && txq->ops != NULL) {
2483                txq->ops->release_mbufs(txq);
2484                txq->ops->free_swring(txq);
2485                rte_free(txq);
2486        }
2487}
2488
2489void __rte_cold
2490ixgbe_dev_tx_queue_release(void *txq)
2491{
2492        ixgbe_tx_queue_release(txq);
2493}
2494
2495/* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2496static void __rte_cold
2497ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2498{
2499        static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2500        struct ixgbe_tx_entry *txe = txq->sw_ring;
2501        uint16_t prev, i;
2502
2503        /* Zero out HW ring memory */
2504        for (i = 0; i < txq->nb_tx_desc; i++) {
2505                txq->tx_ring[i] = zeroed_desc;
2506        }
2507
2508        /* Initialize SW ring entries */
2509        prev = (uint16_t) (txq->nb_tx_desc - 1);
2510        for (i = 0; i < txq->nb_tx_desc; i++) {
2511                volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2512
2513                txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2514                txe[i].mbuf = NULL;
2515                txe[i].last_id = i;
2516                txe[prev].next_id = i;
2517                prev = i;
2518        }
2519
2520        txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2521        txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2522
2523        txq->tx_tail = 0;
2524        txq->nb_tx_used = 0;
2525        /*
2526         * Always allow 1 descriptor to be un-allocated to avoid
2527         * a H/W race condition
2528         */
2529        txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2530        txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2531        txq->ctx_curr = 0;
2532        memset((void *)&txq->ctx_cache, 0,
2533                IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2534}
2535
2536static const struct ixgbe_txq_ops def_txq_ops = {
2537        .release_mbufs = ixgbe_tx_queue_release_mbufs,
2538        .free_swring = ixgbe_tx_free_swring,
2539        .reset = ixgbe_reset_tx_queue,
2540};
2541
2542/* Takes an ethdev and a queue and sets up the tx function to be used based on
2543 * the queue parameters. Used in tx_queue_setup by primary process and then
2544 * in dev_init by secondary process when attaching to an existing ethdev.
2545 */
2546void __rte_cold
2547ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2548{
2549        /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2550        if ((txq->offloads == 0) &&
2551#ifdef RTE_LIB_SECURITY
2552                        !(txq->using_ipsec) &&
2553#endif
2554                        (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2555                PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2556                dev->tx_pkt_prepare = NULL;
2557                if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2558                                rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128 &&
2559                                (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2560                                        ixgbe_txq_vec_setup(txq) == 0)) {
2561                        PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2562                        dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2563                } else
2564                dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2565        } else {
2566                PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2567                PMD_INIT_LOG(DEBUG,
2568                                " - offloads = 0x%" PRIx64,
2569                                txq->offloads);
2570                PMD_INIT_LOG(DEBUG,
2571                                " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2572                                (unsigned long)txq->tx_rs_thresh,
2573                                (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2574                dev->tx_pkt_burst = ixgbe_xmit_pkts;
2575                dev->tx_pkt_prepare = ixgbe_prep_pkts;
2576        }
2577}
2578
2579uint64_t
2580ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2581{
2582        RTE_SET_USED(dev);
2583
2584        return 0;
2585}
2586
2587uint64_t
2588ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2589{
2590        uint64_t tx_offload_capa;
2591        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2592
2593        tx_offload_capa =
2594                DEV_TX_OFFLOAD_VLAN_INSERT |
2595                DEV_TX_OFFLOAD_IPV4_CKSUM  |
2596                DEV_TX_OFFLOAD_UDP_CKSUM   |
2597                DEV_TX_OFFLOAD_TCP_CKSUM   |
2598                DEV_TX_OFFLOAD_SCTP_CKSUM  |
2599                DEV_TX_OFFLOAD_TCP_TSO     |
2600                DEV_TX_OFFLOAD_MULTI_SEGS;
2601
2602        if (hw->mac.type == ixgbe_mac_82599EB ||
2603            hw->mac.type == ixgbe_mac_X540)
2604                tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2605
2606        if (hw->mac.type == ixgbe_mac_X550 ||
2607            hw->mac.type == ixgbe_mac_X550EM_x ||
2608            hw->mac.type == ixgbe_mac_X550EM_a)
2609                tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2610
2611#ifdef RTE_LIB_SECURITY
2612        if (dev->security_ctx)
2613                tx_offload_capa |= DEV_TX_OFFLOAD_SECURITY;
2614#endif
2615        return tx_offload_capa;
2616}
2617
2618int __rte_cold
2619ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2620                         uint16_t queue_idx,
2621                         uint16_t nb_desc,
2622                         unsigned int socket_id,
2623                         const struct rte_eth_txconf *tx_conf)
2624{
2625        const struct rte_memzone *tz;
2626        struct ixgbe_tx_queue *txq;
2627        struct ixgbe_hw     *hw;
2628        uint16_t tx_rs_thresh, tx_free_thresh;
2629        uint64_t offloads;
2630
2631        PMD_INIT_FUNC_TRACE();
2632        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2633
2634        offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2635
2636        /*
2637         * Validate number of transmit descriptors.
2638         * It must not exceed hardware maximum, and must be multiple
2639         * of IXGBE_ALIGN.
2640         */
2641        if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2642                        (nb_desc > IXGBE_MAX_RING_DESC) ||
2643                        (nb_desc < IXGBE_MIN_RING_DESC)) {
2644                return -EINVAL;
2645        }
2646
2647        /*
2648         * The following two parameters control the setting of the RS bit on
2649         * transmit descriptors.
2650         * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2651         * descriptors have been used.
2652         * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2653         * descriptors are used or if the number of descriptors required
2654         * to transmit a packet is greater than the number of free TX
2655         * descriptors.
2656         * The following constraints must be satisfied:
2657         *  tx_rs_thresh must be greater than 0.
2658         *  tx_rs_thresh must be less than the size of the ring minus 2.
2659         *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2660         *  tx_rs_thresh must be a divisor of the ring size.
2661         *  tx_free_thresh must be greater than 0.
2662         *  tx_free_thresh must be less than the size of the ring minus 3.
2663         *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2664         * One descriptor in the TX ring is used as a sentinel to avoid a
2665         * H/W race condition, hence the maximum threshold constraints.
2666         * When set to zero use default values.
2667         */
2668        tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2669                        tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2670        /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2671        tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2672                        nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2673        if (tx_conf->tx_rs_thresh > 0)
2674                tx_rs_thresh = tx_conf->tx_rs_thresh;
2675        if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2676                PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2677                             "exceed nb_desc. (tx_rs_thresh=%u "
2678                             "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2679                             (unsigned int)tx_rs_thresh,
2680                             (unsigned int)tx_free_thresh,
2681                             (unsigned int)nb_desc,
2682                             (int)dev->data->port_id,
2683                             (int)queue_idx);
2684                return -(EINVAL);
2685        }
2686        if (tx_rs_thresh >= (nb_desc - 2)) {
2687                PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2688                        "of TX descriptors minus 2. (tx_rs_thresh=%u "
2689                        "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2690                        (int)dev->data->port_id, (int)queue_idx);
2691                return -(EINVAL);
2692        }
2693        if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2694                PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2695                        "(tx_rs_thresh=%u port=%d queue=%d)",
2696                        DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2697                        (int)dev->data->port_id, (int)queue_idx);
2698                return -(EINVAL);
2699        }
2700        if (tx_free_thresh >= (nb_desc - 3)) {
2701                PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2702                             "tx_free_thresh must be less than the number of "
2703                             "TX descriptors minus 3. (tx_free_thresh=%u "
2704                             "port=%d queue=%d)",
2705                             (unsigned int)tx_free_thresh,
2706                             (int)dev->data->port_id, (int)queue_idx);
2707                return -(EINVAL);
2708        }
2709        if (tx_rs_thresh > tx_free_thresh) {
2710                PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2711                             "tx_free_thresh. (tx_free_thresh=%u "
2712                             "tx_rs_thresh=%u port=%d queue=%d)",
2713                             (unsigned int)tx_free_thresh,
2714                             (unsigned int)tx_rs_thresh,
2715                             (int)dev->data->port_id,
2716                             (int)queue_idx);
2717                return -(EINVAL);
2718        }
2719        if ((nb_desc % tx_rs_thresh) != 0) {
2720                PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2721                             "number of TX descriptors. (tx_rs_thresh=%u "
2722                             "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2723                             (int)dev->data->port_id, (int)queue_idx);
2724                return -(EINVAL);
2725        }
2726
2727        /*
2728         * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2729         * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2730         * by the NIC and all descriptors are written back after the NIC
2731         * accumulates WTHRESH descriptors.
2732         */
2733        if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2734                PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2735                             "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2736                             "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2737                             (int)dev->data->port_id, (int)queue_idx);
2738                return -(EINVAL);
2739        }
2740
2741        /* Free memory prior to re-allocation if needed... */
2742        if (dev->data->tx_queues[queue_idx] != NULL) {
2743                ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2744                dev->data->tx_queues[queue_idx] = NULL;
2745        }
2746
2747        /* First allocate the tx queue data structure */
2748        txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2749                                 RTE_CACHE_LINE_SIZE, socket_id);
2750        if (txq == NULL)
2751                return -ENOMEM;
2752
2753        /*
2754         * Allocate TX ring hardware descriptors. A memzone large enough to
2755         * handle the maximum ring size is allocated in order to allow for
2756         * resizing in later calls to the queue setup function.
2757         */
2758        tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2759                        sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2760                        IXGBE_ALIGN, socket_id);
2761        if (tz == NULL) {
2762                ixgbe_tx_queue_release(txq);
2763                return -ENOMEM;
2764        }
2765
2766        txq->nb_tx_desc = nb_desc;
2767        txq->tx_rs_thresh = tx_rs_thresh;
2768        txq->tx_free_thresh = tx_free_thresh;
2769        txq->pthresh = tx_conf->tx_thresh.pthresh;
2770        txq->hthresh = tx_conf->tx_thresh.hthresh;
2771        txq->wthresh = tx_conf->tx_thresh.wthresh;
2772        txq->queue_id = queue_idx;
2773        txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2774                queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2775        txq->port_id = dev->data->port_id;
2776        txq->offloads = offloads;
2777        txq->ops = &def_txq_ops;
2778        txq->tx_deferred_start = tx_conf->tx_deferred_start;
2779#ifdef RTE_LIB_SECURITY
2780        txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2781                        DEV_TX_OFFLOAD_SECURITY);
2782#endif
2783
2784        /*
2785         * Modification to set VFTDT for virtual function if vf is detected
2786         */
2787        if (hw->mac.type == ixgbe_mac_82599_vf ||
2788            hw->mac.type == ixgbe_mac_X540_vf ||
2789            hw->mac.type == ixgbe_mac_X550_vf ||
2790            hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2791            hw->mac.type == ixgbe_mac_X550EM_a_vf)
2792                txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2793        else
2794                txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2795
2796        txq->tx_ring_phys_addr = tz->iova;
2797        txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2798
2799        /* Allocate software ring */
2800        txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2801                                sizeof(struct ixgbe_tx_entry) * nb_desc,
2802                                RTE_CACHE_LINE_SIZE, socket_id);
2803        if (txq->sw_ring == NULL) {
2804                ixgbe_tx_queue_release(txq);
2805                return -ENOMEM;
2806        }
2807        PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2808                     txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2809
2810        /* set up vector or scalar TX function as appropriate */
2811        ixgbe_set_tx_function(dev, txq);
2812
2813        txq->ops->reset(txq);
2814
2815        dev->data->tx_queues[queue_idx] = txq;
2816
2817
2818        return 0;
2819}
2820
2821/**
2822 * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2823 *
2824 * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2825 * in the sw_rsc_ring is not set to NULL but rather points to the next
2826 * mbuf of this RSC aggregation (that has not been completed yet and still
2827 * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2828 * will just free first "nb_segs" segments of the cluster explicitly by calling
2829 * an rte_pktmbuf_free_seg().
2830 *
2831 * @m scattered cluster head
2832 */
2833static void __rte_cold
2834ixgbe_free_sc_cluster(struct rte_mbuf *m)
2835{
2836        uint16_t i, nb_segs = m->nb_segs;
2837        struct rte_mbuf *next_seg;
2838
2839        for (i = 0; i < nb_segs; i++) {
2840                next_seg = m->next;
2841                rte_pktmbuf_free_seg(m);
2842                m = next_seg;
2843        }
2844}
2845
2846static void __rte_cold
2847ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2848{
2849        unsigned i;
2850
2851        /* SSE Vector driver has a different way of releasing mbufs. */
2852        if (rxq->rx_using_sse) {
2853                ixgbe_rx_queue_release_mbufs_vec(rxq);
2854                return;
2855        }
2856
2857        if (rxq->sw_ring != NULL) {
2858                for (i = 0; i < rxq->nb_rx_desc; i++) {
2859                        if (rxq->sw_ring[i].mbuf != NULL) {
2860                                rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2861                                rxq->sw_ring[i].mbuf = NULL;
2862                        }
2863                }
2864                if (rxq->rx_nb_avail) {
2865                        for (i = 0; i < rxq->rx_nb_avail; ++i) {
2866                                struct rte_mbuf *mb;
2867
2868                                mb = rxq->rx_stage[rxq->rx_next_avail + i];
2869                                rte_pktmbuf_free_seg(mb);
2870                        }
2871                        rxq->rx_nb_avail = 0;
2872                }
2873        }
2874
2875        if (rxq->sw_sc_ring)
2876                for (i = 0; i < rxq->nb_rx_desc; i++)
2877                        if (rxq->sw_sc_ring[i].fbuf) {
2878                                ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2879                                rxq->sw_sc_ring[i].fbuf = NULL;
2880                        }
2881}
2882
2883static void __rte_cold
2884ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2885{
2886        if (rxq != NULL) {
2887                ixgbe_rx_queue_release_mbufs(rxq);
2888                rte_free(rxq->sw_ring);
2889                rte_free(rxq->sw_sc_ring);
2890                rte_free(rxq);
2891        }
2892}
2893
2894void __rte_cold
2895ixgbe_dev_rx_queue_release(void *rxq)
2896{
2897        ixgbe_rx_queue_release(rxq);
2898}
2899
2900/*
2901 * Check if Rx Burst Bulk Alloc function can be used.
2902 * Return
2903 *        0: the preconditions are satisfied and the bulk allocation function
2904 *           can be used.
2905 *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2906 *           function must be used.
2907 */
2908static inline int __rte_cold
2909check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2910{
2911        int ret = 0;
2912
2913        /*
2914         * Make sure the following pre-conditions are satisfied:
2915         *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2916         *   rxq->rx_free_thresh < rxq->nb_rx_desc
2917         *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2918         * Scattered packets are not supported.  This should be checked
2919         * outside of this function.
2920         */
2921        if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2922                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2923                             "rxq->rx_free_thresh=%d, "
2924                             "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2925                             rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2926                ret = -EINVAL;
2927        } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2928                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2929                             "rxq->rx_free_thresh=%d, "
2930                             "rxq->nb_rx_desc=%d",
2931                             rxq->rx_free_thresh, rxq->nb_rx_desc);
2932                ret = -EINVAL;
2933        } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2934                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2935                             "rxq->nb_rx_desc=%d, "
2936                             "rxq->rx_free_thresh=%d",
2937                             rxq->nb_rx_desc, rxq->rx_free_thresh);
2938                ret = -EINVAL;
2939        }
2940
2941        return ret;
2942}
2943
2944/* Reset dynamic ixgbe_rx_queue fields back to defaults */
2945static void __rte_cold
2946ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2947{
2948        static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2949        unsigned i;
2950        uint16_t len = rxq->nb_rx_desc;
2951
2952        /*
2953         * By default, the Rx queue setup function allocates enough memory for
2954         * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2955         * extra memory at the end of the descriptor ring to be zero'd out.
2956         */
2957        if (adapter->rx_bulk_alloc_allowed)
2958                /* zero out extra memory */
2959                len += RTE_PMD_IXGBE_RX_MAX_BURST;
2960
2961        /*
2962         * Zero out HW ring memory. Zero out extra memory at the end of
2963         * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2964         * reads extra memory as zeros.
2965         */
2966        for (i = 0; i < len; i++) {
2967                rxq->rx_ring[i] = zeroed_desc;
2968        }
2969
2970        /*
2971         * initialize extra software ring entries. Space for these extra
2972         * entries is always allocated
2973         */
2974        memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2975        for (i = rxq->nb_rx_desc; i < len; ++i) {
2976                rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2977        }
2978
2979        rxq->rx_nb_avail = 0;
2980        rxq->rx_next_avail = 0;
2981        rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2982        rxq->rx_tail = 0;
2983        rxq->nb_rx_hold = 0;
2984        rxq->pkt_first_seg = NULL;
2985        rxq->pkt_last_seg = NULL;
2986
2987#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2988        rxq->rxrearm_start = 0;
2989        rxq->rxrearm_nb = 0;
2990#endif
2991}
2992
2993static int
2994ixgbe_is_vf(struct rte_eth_dev *dev)
2995{
2996        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2997
2998        switch (hw->mac.type) {
2999        case ixgbe_mac_82599_vf:
3000        case ixgbe_mac_X540_vf:
3001        case ixgbe_mac_X550_vf:
3002        case ixgbe_mac_X550EM_x_vf:
3003        case ixgbe_mac_X550EM_a_vf:
3004                return 1;
3005        default:
3006                return 0;
3007        }
3008}
3009
3010uint64_t
3011ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
3012{
3013        uint64_t offloads = 0;
3014        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3015
3016        if (hw->mac.type != ixgbe_mac_82598EB)
3017                offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
3018
3019        return offloads;
3020}
3021
3022uint64_t
3023ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
3024{
3025        uint64_t offloads;
3026        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3027
3028        offloads = DEV_RX_OFFLOAD_IPV4_CKSUM  |
3029                   DEV_RX_OFFLOAD_UDP_CKSUM   |
3030                   DEV_RX_OFFLOAD_TCP_CKSUM   |
3031                   DEV_RX_OFFLOAD_KEEP_CRC    |
3032                   DEV_RX_OFFLOAD_JUMBO_FRAME |
3033                   DEV_RX_OFFLOAD_VLAN_FILTER |
3034                   DEV_RX_OFFLOAD_SCATTER |
3035                   DEV_RX_OFFLOAD_RSS_HASH;
3036
3037        if (hw->mac.type == ixgbe_mac_82598EB)
3038                offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
3039
3040        if (ixgbe_is_vf(dev) == 0)
3041                offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
3042
3043        /*
3044         * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
3045         * mode.
3046         */
3047        if ((hw->mac.type == ixgbe_mac_82599EB ||
3048             hw->mac.type == ixgbe_mac_X540 ||
3049             hw->mac.type == ixgbe_mac_X550) &&
3050            !RTE_ETH_DEV_SRIOV(dev).active)
3051                offloads |= DEV_RX_OFFLOAD_TCP_LRO;
3052
3053        if (hw->mac.type == ixgbe_mac_82599EB ||
3054            hw->mac.type == ixgbe_mac_X540)
3055                offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
3056
3057        if (hw->mac.type == ixgbe_mac_X550 ||
3058            hw->mac.type == ixgbe_mac_X550EM_x ||
3059            hw->mac.type == ixgbe_mac_X550EM_a)
3060                offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3061
3062#ifdef RTE_LIB_SECURITY
3063        if (dev->security_ctx)
3064                offloads |= DEV_RX_OFFLOAD_SECURITY;
3065#endif
3066
3067        return offloads;
3068}
3069
3070int __rte_cold
3071ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3072                         uint16_t queue_idx,
3073                         uint16_t nb_desc,
3074                         unsigned int socket_id,
3075                         const struct rte_eth_rxconf *rx_conf,
3076                         struct rte_mempool *mp)
3077{
3078        const struct rte_memzone *rz;
3079        struct ixgbe_rx_queue *rxq;
3080        struct ixgbe_hw     *hw;
3081        uint16_t len;
3082        struct ixgbe_adapter *adapter = dev->data->dev_private;
3083        uint64_t offloads;
3084
3085        PMD_INIT_FUNC_TRACE();
3086        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3087
3088        offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3089
3090        /*
3091         * Validate number of receive descriptors.
3092         * It must not exceed hardware maximum, and must be multiple
3093         * of IXGBE_ALIGN.
3094         */
3095        if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3096                        (nb_desc > IXGBE_MAX_RING_DESC) ||
3097                        (nb_desc < IXGBE_MIN_RING_DESC)) {
3098                return -EINVAL;
3099        }
3100
3101        /* Free memory prior to re-allocation if needed... */
3102        if (dev->data->rx_queues[queue_idx] != NULL) {
3103                ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3104                dev->data->rx_queues[queue_idx] = NULL;
3105        }
3106
3107        /* First allocate the rx queue data structure */
3108        rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3109                                 RTE_CACHE_LINE_SIZE, socket_id);
3110        if (rxq == NULL)
3111                return -ENOMEM;
3112        rxq->mb_pool = mp;
3113        rxq->nb_rx_desc = nb_desc;
3114        rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3115        rxq->queue_id = queue_idx;
3116        rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3117                queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3118        rxq->port_id = dev->data->port_id;
3119        if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
3120                rxq->crc_len = RTE_ETHER_CRC_LEN;
3121        else
3122                rxq->crc_len = 0;
3123        rxq->drop_en = rx_conf->rx_drop_en;
3124        rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3125        rxq->offloads = offloads;
3126
3127        /*
3128         * The packet type in RX descriptor is different for different NICs.
3129         * Some bits are used for x550 but reserved for other NICS.
3130         * So set different masks for different NICs.
3131         */
3132        if (hw->mac.type == ixgbe_mac_X550 ||
3133            hw->mac.type == ixgbe_mac_X550EM_x ||
3134            hw->mac.type == ixgbe_mac_X550EM_a ||
3135            hw->mac.type == ixgbe_mac_X550_vf ||
3136            hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3137            hw->mac.type == ixgbe_mac_X550EM_a_vf)
3138                rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3139        else
3140                rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3141
3142        /*
3143         * 82599 errata, UDP frames with a 0 checksum can be marked as checksum
3144         * errors.
3145         */
3146        if (hw->mac.type == ixgbe_mac_82599EB)
3147                rxq->rx_udp_csum_zero_err = 1;
3148
3149        /*
3150         * Allocate RX ring hardware descriptors. A memzone large enough to
3151         * handle the maximum ring size is allocated in order to allow for
3152         * resizing in later calls to the queue setup function.
3153         */
3154        rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3155                                      RX_RING_SZ, IXGBE_ALIGN, socket_id);
3156        if (rz == NULL) {
3157                ixgbe_rx_queue_release(rxq);
3158                return -ENOMEM;
3159        }
3160
3161        /*
3162         * Zero init all the descriptors in the ring.
3163         */
3164        memset(rz->addr, 0, RX_RING_SZ);
3165
3166        /*
3167         * Modified to setup VFRDT for Virtual Function
3168         */
3169        if (hw->mac.type == ixgbe_mac_82599_vf ||
3170            hw->mac.type == ixgbe_mac_X540_vf ||
3171            hw->mac.type == ixgbe_mac_X550_vf ||
3172            hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3173            hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3174                rxq->rdt_reg_addr =
3175                        IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3176                rxq->rdh_reg_addr =
3177                        IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3178        } else {
3179                rxq->rdt_reg_addr =
3180                        IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3181                rxq->rdh_reg_addr =
3182                        IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3183        }
3184
3185        rxq->rx_ring_phys_addr = rz->iova;
3186        rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3187
3188        /*
3189         * Certain constraints must be met in order to use the bulk buffer
3190         * allocation Rx burst function. If any of Rx queues doesn't meet them
3191         * the feature should be disabled for the whole port.
3192         */
3193        if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3194                PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3195                                    "preconditions - canceling the feature for "
3196                                    "the whole port[%d]",
3197                             rxq->queue_id, rxq->port_id);
3198                adapter->rx_bulk_alloc_allowed = false;
3199        }
3200
3201        /*
3202         * Allocate software ring. Allow for space at the end of the
3203         * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3204         * function does not access an invalid memory region.
3205         */
3206        len = nb_desc;
3207        if (adapter->rx_bulk_alloc_allowed)
3208                len += RTE_PMD_IXGBE_RX_MAX_BURST;
3209
3210        rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3211                                          sizeof(struct ixgbe_rx_entry) * len,
3212                                          RTE_CACHE_LINE_SIZE, socket_id);
3213        if (!rxq->sw_ring) {
3214                ixgbe_rx_queue_release(rxq);
3215                return -ENOMEM;
3216        }
3217
3218        /*
3219         * Always allocate even if it's not going to be needed in order to
3220         * simplify the code.
3221         *
3222         * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3223         * be requested in ixgbe_dev_rx_init(), which is called later from
3224         * dev_start() flow.
3225         */
3226        rxq->sw_sc_ring =
3227                rte_zmalloc_socket("rxq->sw_sc_ring",
3228                                   sizeof(struct ixgbe_scattered_rx_entry) * len,
3229                                   RTE_CACHE_LINE_SIZE, socket_id);
3230        if (!rxq->sw_sc_ring) {
3231                ixgbe_rx_queue_release(rxq);
3232                return -ENOMEM;
3233        }
3234
3235        PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3236                            "dma_addr=0x%"PRIx64,
3237                     rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3238                     rxq->rx_ring_phys_addr);
3239
3240        if (!rte_is_power_of_2(nb_desc)) {
3241                PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3242                                    "preconditions - canceling the feature for "
3243                                    "the whole port[%d]",
3244                             rxq->queue_id, rxq->port_id);
3245                adapter->rx_vec_allowed = false;
3246        } else
3247                ixgbe_rxq_vec_setup(rxq);
3248
3249        dev->data->rx_queues[queue_idx] = rxq;
3250
3251        ixgbe_reset_rx_queue(adapter, rxq);
3252
3253        return 0;
3254}
3255
3256uint32_t
3257ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3258{
3259#define IXGBE_RXQ_SCAN_INTERVAL 4
3260        volatile union ixgbe_adv_rx_desc *rxdp;
3261        struct ixgbe_rx_queue *rxq;
3262        uint32_t desc = 0;
3263
3264        rxq = dev->data->rx_queues[rx_queue_id];
3265        rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3266
3267        while ((desc < rxq->nb_rx_desc) &&
3268                (rxdp->wb.upper.status_error &
3269                        rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3270                desc += IXGBE_RXQ_SCAN_INTERVAL;
3271                rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3272                if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3273                        rxdp = &(rxq->rx_ring[rxq->rx_tail +
3274                                desc - rxq->nb_rx_desc]);
3275        }
3276
3277        return desc;
3278}
3279
3280int
3281ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3282{
3283        volatile union ixgbe_adv_rx_desc *rxdp;
3284        struct ixgbe_rx_queue *rxq = rx_queue;
3285        uint32_t desc;
3286
3287        if (unlikely(offset >= rxq->nb_rx_desc))
3288                return 0;
3289        desc = rxq->rx_tail + offset;
3290        if (desc >= rxq->nb_rx_desc)
3291                desc -= rxq->nb_rx_desc;
3292
3293        rxdp = &rxq->rx_ring[desc];
3294        return !!(rxdp->wb.upper.status_error &
3295                        rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3296}
3297
3298int
3299ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3300{
3301        struct ixgbe_rx_queue *rxq = rx_queue;
3302        volatile uint32_t *status;
3303        uint32_t nb_hold, desc;
3304
3305        if (unlikely(offset >= rxq->nb_rx_desc))
3306                return -EINVAL;
3307
3308#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
3309        if (rxq->rx_using_sse)
3310                nb_hold = rxq->rxrearm_nb;
3311        else
3312#endif
3313                nb_hold = rxq->nb_rx_hold;
3314        if (offset >= rxq->nb_rx_desc - nb_hold)
3315                return RTE_ETH_RX_DESC_UNAVAIL;
3316
3317        desc = rxq->rx_tail + offset;
3318        if (desc >= rxq->nb_rx_desc)
3319                desc -= rxq->nb_rx_desc;
3320
3321        status = &rxq->rx_ring[desc].wb.upper.status_error;
3322        if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3323                return RTE_ETH_RX_DESC_DONE;
3324
3325        return RTE_ETH_RX_DESC_AVAIL;
3326}
3327
3328int
3329ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3330{
3331        struct ixgbe_tx_queue *txq = tx_queue;
3332        volatile uint32_t *status;
3333        uint32_t desc;
3334
3335        if (unlikely(offset >= txq->nb_tx_desc))
3336                return -EINVAL;
3337
3338        desc = txq->tx_tail + offset;
3339        /* go to next desc that has the RS bit */
3340        desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3341                txq->tx_rs_thresh;
3342        if (desc >= txq->nb_tx_desc) {
3343                desc -= txq->nb_tx_desc;
3344                if (desc >= txq->nb_tx_desc)
3345                        desc -= txq->nb_tx_desc;
3346        }
3347
3348        status = &txq->tx_ring[desc].wb.status;
3349        if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3350                return RTE_ETH_TX_DESC_DONE;
3351
3352        return RTE_ETH_TX_DESC_FULL;
3353}
3354
3355/*
3356 * Set up link loopback for X540/X550 mode Tx->Rx.
3357 */
3358static inline void __rte_cold
3359ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3360{
3361        uint32_t macc;
3362        PMD_INIT_FUNC_TRACE();
3363
3364        u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3365
3366        hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3367                             IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3368        macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3369
3370        if (enable) {
3371                /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3372                autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3373                /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3374                macc |= IXGBE_MACC_FLU;
3375        } else {
3376                autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3377                macc &= ~IXGBE_MACC_FLU;
3378        }
3379
3380        hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3381                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3382
3383        IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3384}
3385
3386void __rte_cold
3387ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3388{
3389        unsigned i;
3390        struct ixgbe_adapter *adapter = dev->data->dev_private;
3391        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3392
3393        PMD_INIT_FUNC_TRACE();
3394
3395        for (i = 0; i < dev->data->nb_tx_queues; i++) {
3396                struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3397
3398                if (txq != NULL) {
3399                        txq->ops->release_mbufs(txq);
3400                        txq->ops->reset(txq);
3401                }
3402        }
3403
3404        for (i = 0; i < dev->data->nb_rx_queues; i++) {
3405                struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3406
3407                if (rxq != NULL) {
3408                        ixgbe_rx_queue_release_mbufs(rxq);
3409                        ixgbe_reset_rx_queue(adapter, rxq);
3410                }
3411        }
3412        /* If loopback mode was enabled, reconfigure the link accordingly */
3413        if (dev->data->dev_conf.lpbk_mode != 0) {
3414                if (hw->mac.type == ixgbe_mac_X540 ||
3415                     hw->mac.type == ixgbe_mac_X550 ||
3416                     hw->mac.type == ixgbe_mac_X550EM_x ||
3417                     hw->mac.type == ixgbe_mac_X550EM_a)
3418                        ixgbe_setup_loopback_link_x540_x550(hw, false);
3419        }
3420}
3421
3422void
3423ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3424{
3425        unsigned i;
3426
3427        PMD_INIT_FUNC_TRACE();
3428
3429        for (i = 0; i < dev->data->nb_rx_queues; i++) {
3430                ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3431                dev->data->rx_queues[i] = NULL;
3432                rte_eth_dma_zone_free(dev, "rx_ring", i);
3433        }
3434        dev->data->nb_rx_queues = 0;
3435
3436        for (i = 0; i < dev->data->nb_tx_queues; i++) {
3437                ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3438                dev->data->tx_queues[i] = NULL;
3439                rte_eth_dma_zone_free(dev, "tx_ring", i);
3440        }
3441        dev->data->nb_tx_queues = 0;
3442}
3443
3444/*********************************************************************
3445 *
3446 *  Device RX/TX init functions
3447 *
3448 **********************************************************************/
3449
3450/**
3451 * Receive Side Scaling (RSS)
3452 * See section 7.1.2.8 in the following document:
3453 *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3454 *
3455 * Principles:
3456 * The source and destination IP addresses of the IP header and the source
3457 * and destination ports of TCP/UDP headers, if any, of received packets are
3458 * hashed against a configurable random key to compute a 32-bit RSS hash result.
3459 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3460 * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3461 * RSS output index which is used as the RX queue index where to store the
3462 * received packets.
3463 * The following output is supplied in the RX write-back descriptor:
3464 *     - 32-bit result of the Microsoft RSS hash function,
3465 *     - 4-bit RSS type field.
3466 */
3467
3468/*
3469 * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3470 * Used as the default key.
3471 */
3472static uint8_t rss_intel_key[40] = {
3473        0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3474        0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3475        0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3476        0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3477        0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3478};
3479
3480static void
3481ixgbe_rss_disable(struct rte_eth_dev *dev)
3482{
3483        struct ixgbe_hw *hw;
3484        uint32_t mrqc;
3485        uint32_t mrqc_reg;
3486
3487        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3488        mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3489        mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3490        mrqc &= ~IXGBE_MRQC_RSSEN;
3491        IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3492}
3493
3494static void
3495ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3496{
3497        uint8_t  *hash_key;
3498        uint32_t mrqc;
3499        uint32_t rss_key;
3500        uint64_t rss_hf;
3501        uint16_t i;
3502        uint32_t mrqc_reg;
3503        uint32_t rssrk_reg;
3504
3505        mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3506        rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3507
3508        hash_key = rss_conf->rss_key;
3509        if (hash_key != NULL) {
3510                /* Fill in RSS hash key */
3511                for (i = 0; i < 10; i++) {
3512                        rss_key  = hash_key[(i * 4)];
3513                        rss_key |= hash_key[(i * 4) + 1] << 8;
3514                        rss_key |= hash_key[(i * 4) + 2] << 16;
3515                        rss_key |= hash_key[(i * 4) + 3] << 24;
3516                        IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3517                }
3518        }
3519
3520        /* Set configured hashing protocols in MRQC register */
3521        rss_hf = rss_conf->rss_hf;
3522        mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3523        if (rss_hf & ETH_RSS_IPV4)
3524                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3525        if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3526                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3527        if (rss_hf & ETH_RSS_IPV6)
3528                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3529        if (rss_hf & ETH_RSS_IPV6_EX)
3530                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3531        if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3532                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3533        if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3534                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3535        if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3536                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3537        if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3538                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3539        if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3540                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3541        IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3542}
3543
3544int
3545ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3546                          struct rte_eth_rss_conf *rss_conf)
3547{
3548        struct ixgbe_hw *hw;
3549        uint32_t mrqc;
3550        uint64_t rss_hf;
3551        uint32_t mrqc_reg;
3552
3553        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3554
3555        if (!ixgbe_rss_update_sp(hw->mac.type)) {
3556                PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3557                        "NIC.");
3558                return -ENOTSUP;
3559        }
3560        mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3561
3562        /*
3563         * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3564         *     "RSS enabling cannot be done dynamically while it must be
3565         *      preceded by a software reset"
3566         * Before changing anything, first check that the update RSS operation
3567         * does not attempt to disable RSS, if RSS was enabled at
3568         * initialization time, or does not attempt to enable RSS, if RSS was
3569         * disabled at initialization time.
3570         */
3571        rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3572        mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3573        if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3574                if (rss_hf != 0) /* Enable RSS */
3575                        return -(EINVAL);
3576                return 0; /* Nothing to do */
3577        }
3578        /* RSS enabled */
3579        if (rss_hf == 0) /* Disable RSS */
3580                return -(EINVAL);
3581        ixgbe_hw_rss_hash_set(hw, rss_conf);
3582        return 0;
3583}
3584
3585int
3586ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3587                            struct rte_eth_rss_conf *rss_conf)
3588{
3589        struct ixgbe_hw *hw;
3590        uint8_t *hash_key;
3591        uint32_t mrqc;
3592        uint32_t rss_key;
3593        uint64_t rss_hf;
3594        uint16_t i;
3595        uint32_t mrqc_reg;
3596        uint32_t rssrk_reg;
3597
3598        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3599        mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3600        rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3601        hash_key = rss_conf->rss_key;
3602        if (hash_key != NULL) {
3603                /* Return RSS hash key */
3604                for (i = 0; i < 10; i++) {
3605                        rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3606                        hash_key[(i * 4)] = rss_key & 0x000000FF;
3607                        hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3608                        hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3609                        hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3610                }
3611        }
3612
3613        /* Get RSS functions configured in MRQC register */
3614        mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3615        if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3616                rss_conf->rss_hf = 0;
3617                return 0;
3618        }
3619        rss_hf = 0;
3620        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3621                rss_hf |= ETH_RSS_IPV4;
3622        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3623                rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3624        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3625                rss_hf |= ETH_RSS_IPV6;
3626        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3627                rss_hf |= ETH_RSS_IPV6_EX;
3628        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3629                rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3630        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3631                rss_hf |= ETH_RSS_IPV6_TCP_EX;
3632        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3633                rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3634        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3635                rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3636        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3637                rss_hf |= ETH_RSS_IPV6_UDP_EX;
3638        rss_conf->rss_hf = rss_hf;
3639        return 0;
3640}
3641
3642static void
3643ixgbe_rss_configure(struct rte_eth_dev *dev)
3644{
3645        struct rte_eth_rss_conf rss_conf;
3646        struct ixgbe_adapter *adapter;
3647        struct ixgbe_hw *hw;
3648        uint32_t reta;
3649        uint16_t i;
3650        uint16_t j;
3651        uint16_t sp_reta_size;
3652        uint32_t reta_reg;
3653
3654        PMD_INIT_FUNC_TRACE();
3655        adapter = dev->data->dev_private;
3656        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3657
3658        sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3659
3660        /*
3661         * Fill in redirection table
3662         * The byte-swap is needed because NIC registers are in
3663         * little-endian order.
3664         */
3665        if (adapter->rss_reta_updated == 0) {
3666                reta = 0;
3667                for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3668                        reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3669
3670                        if (j == dev->data->nb_rx_queues)
3671                                j = 0;
3672                        reta = (reta << 8) | j;
3673                        if ((i & 3) == 3)
3674                                IXGBE_WRITE_REG(hw, reta_reg,
3675                                                rte_bswap32(reta));
3676                }
3677        }
3678
3679        /*
3680         * Configure the RSS key and the RSS protocols used to compute
3681         * the RSS hash of input packets.
3682         */
3683        rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3684        if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3685                ixgbe_rss_disable(dev);
3686                return;
3687        }
3688        if (rss_conf.rss_key == NULL)
3689                rss_conf.rss_key = rss_intel_key; /* Default hash key */
3690        ixgbe_hw_rss_hash_set(hw, &rss_conf);
3691}
3692
3693#define NUM_VFTA_REGISTERS 128
3694#define NIC_RX_BUFFER_SIZE 0x200
3695#define X550_RX_BUFFER_SIZE 0x180
3696
3697static void
3698ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3699{
3700        struct rte_eth_vmdq_dcb_conf *cfg;
3701        struct ixgbe_hw *hw;
3702        enum rte_eth_nb_pools num_pools;
3703        uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3704        uint16_t pbsize;
3705        uint8_t nb_tcs; /* number of traffic classes */
3706        int i;
3707
3708        PMD_INIT_FUNC_TRACE();
3709        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3710        cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3711        num_pools = cfg->nb_queue_pools;
3712        /* Check we have a valid number of pools */
3713        if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3714                ixgbe_rss_disable(dev);
3715                return;
3716        }
3717        /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3718        nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3719
3720        /*
3721         * RXPBSIZE
3722         * split rx buffer up into sections, each for 1 traffic class
3723         */
3724        switch (hw->mac.type) {
3725        case ixgbe_mac_X550:
3726        case ixgbe_mac_X550EM_x:
3727        case ixgbe_mac_X550EM_a:
3728                pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3729                break;
3730        default:
3731                pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3732                break;
3733        }
3734        for (i = 0; i < nb_tcs; i++) {
3735                uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3736
3737                rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3738                /* clear 10 bits. */
3739                rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3740                IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3741        }
3742        /* zero alloc all unused TCs */
3743        for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3744                uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3745
3746                rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3747                /* clear 10 bits. */
3748                IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3749        }
3750
3751        /* MRQC: enable vmdq and dcb */
3752        mrqc = (num_pools == ETH_16_POOLS) ?
3753                IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3754        IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3755
3756        /* PFVTCTL: turn on virtualisation and set the default pool */
3757        vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3758        if (cfg->enable_default_pool) {
3759                vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3760        } else {
3761                vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3762        }
3763
3764        IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3765
3766        /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3767        queue_mapping = 0;
3768        for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3769                /*
3770                 * mapping is done with 3 bits per priority,
3771                 * so shift by i*3 each time
3772                 */
3773                queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3774
3775        IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3776
3777        /* RTRPCS: DCB related */
3778        IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3779
3780        /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3781        vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3782        vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3783        IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3784
3785        /* VFTA - enable all vlan filters */
3786        for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3787                IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3788        }
3789
3790        /* VFRE: pool enabling for receive - 16 or 32 */
3791        IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3792                        num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3793
3794        /*
3795         * MPSAR - allow pools to read specific mac addresses
3796         * In this case, all pools should be able to read from mac addr 0
3797         */
3798        IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3799        IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3800
3801        /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3802        for (i = 0; i < cfg->nb_pool_maps; i++) {
3803                /* set vlan id in VF register and set the valid bit */
3804                IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3805                                (cfg->pool_map[i].vlan_id & 0xFFF)));
3806                /*
3807                 * Put the allowed pools in VFB reg. As we only have 16 or 32
3808                 * pools, we only need to use the first half of the register
3809                 * i.e. bits 0-31
3810                 */
3811                IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3812        }
3813}
3814
3815/**
3816 * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3817 * @dev: pointer to eth_dev structure
3818 * @dcb_config: pointer to ixgbe_dcb_config structure
3819 */
3820static void
3821ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3822                       struct ixgbe_dcb_config *dcb_config)
3823{
3824        uint32_t reg;
3825        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3826
3827        PMD_INIT_FUNC_TRACE();
3828        if (hw->mac.type != ixgbe_mac_82598EB) {
3829                /* Disable the Tx desc arbiter so that MTQC can be changed */
3830                reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3831                reg |= IXGBE_RTTDCS_ARBDIS;
3832                IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3833
3834                /* Enable DCB for Tx with 8 TCs */
3835                if (dcb_config->num_tcs.pg_tcs == 8) {
3836                        reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3837                } else {
3838                        reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3839                }
3840                if (dcb_config->vt_mode)
3841                        reg |= IXGBE_MTQC_VT_ENA;
3842                IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3843
3844                /* Enable the Tx desc arbiter */
3845                reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3846                reg &= ~IXGBE_RTTDCS_ARBDIS;
3847                IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3848
3849                /* Enable Security TX Buffer IFG for DCB */
3850                reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3851                reg |= IXGBE_SECTX_DCB;
3852                IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3853        }
3854}
3855
3856/**
3857 * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3858 * @dev: pointer to rte_eth_dev structure
3859 * @dcb_config: pointer to ixgbe_dcb_config structure
3860 */
3861static void
3862ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3863                        struct ixgbe_dcb_config *dcb_config)
3864{
3865        struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3866                        &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3867        struct ixgbe_hw *hw =
3868                        IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3869
3870        PMD_INIT_FUNC_TRACE();
3871        if (hw->mac.type != ixgbe_mac_82598EB)
3872                /*PF VF Transmit Enable*/
3873                IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3874                        vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3875
3876        /*Configure general DCB TX parameters*/
3877        ixgbe_dcb_tx_hw_config(dev, dcb_config);
3878}
3879
3880static void
3881ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3882                        struct ixgbe_dcb_config *dcb_config)
3883{
3884        struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3885                        &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3886        struct ixgbe_dcb_tc_config *tc;
3887        uint8_t i, j;
3888
3889        /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3890        if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3891                dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3892                dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3893        } else {
3894                dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3895                dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3896        }
3897
3898        /* Initialize User Priority to Traffic Class mapping */
3899        for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3900                tc = &dcb_config->tc_config[j];
3901                tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3902        }
3903
3904        /* User Priority to Traffic Class mapping */
3905        for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3906                j = vmdq_rx_conf->dcb_tc[i];
3907                tc = &dcb_config->tc_config[j];
3908                tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3909                                                (uint8_t)(1 << i);
3910        }
3911}
3912
3913static void
3914ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3915                        struct ixgbe_dcb_config *dcb_config)
3916{
3917        struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3918                        &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3919        struct ixgbe_dcb_tc_config *tc;
3920        uint8_t i, j;
3921
3922        /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3923        if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3924                dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3925                dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3926        } else {
3927                dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3928                dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3929        }
3930
3931        /* Initialize User Priority to Traffic Class mapping */
3932        for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3933                tc = &dcb_config->tc_config[j];
3934                tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3935        }
3936
3937        /* User Priority to Traffic Class mapping */
3938        for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3939                j = vmdq_tx_conf->dcb_tc[i];
3940                tc = &dcb_config->tc_config[j];
3941                tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3942                                                (uint8_t)(1 << i);
3943        }
3944}
3945
3946static void
3947ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3948                struct ixgbe_dcb_config *dcb_config)
3949{
3950        struct rte_eth_dcb_rx_conf *rx_conf =
3951                        &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3952        struct ixgbe_dcb_tc_config *tc;
3953        uint8_t i, j;
3954
3955        dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3956        dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3957
3958        /* Initialize User Priority to Traffic Class mapping */
3959        for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3960                tc = &dcb_config->tc_config[j];
3961                tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3962        }
3963
3964        /* User Priority to Traffic Class mapping */
3965        for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3966                j = rx_conf->dcb_tc[i];
3967                tc = &dcb_config->tc_config[j];
3968                tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3969                                                (uint8_t)(1 << i);
3970        }
3971}
3972
3973static void
3974ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3975                struct ixgbe_dcb_config *dcb_config)
3976{
3977        struct rte_eth_dcb_tx_conf *tx_conf =
3978                        &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3979        struct ixgbe_dcb_tc_config *tc;
3980        uint8_t i, j;
3981
3982        dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3983        dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3984
3985        /* Initialize User Priority to Traffic Class mapping */
3986        for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3987                tc = &dcb_config->tc_config[j];
3988                tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3989        }
3990
3991        /* User Priority to Traffic Class mapping */
3992        for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3993                j = tx_conf->dcb_tc[i];
3994                tc = &dcb_config->tc_config[j];
3995                tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3996                                                (uint8_t)(1 << i);
3997        }
3998}
3999
4000/**
4001 * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
4002 * @dev: pointer to eth_dev structure
4003 * @dcb_config: pointer to ixgbe_dcb_config structure
4004 */
4005static void
4006ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
4007                       struct ixgbe_dcb_config *dcb_config)
4008{
4009        uint32_t reg;
4010        uint32_t vlanctrl;
4011        uint8_t i;
4012        uint32_t q;
4013        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4014
4015        PMD_INIT_FUNC_TRACE();
4016        /*
4017         * Disable the arbiter before changing parameters
4018         * (always enable recycle mode; WSP)
4019         */
4020        reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
4021        IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4022
4023        if (hw->mac.type != ixgbe_mac_82598EB) {
4024                reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
4025                if (dcb_config->num_tcs.pg_tcs == 4) {
4026                        if (dcb_config->vt_mode)
4027                                reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4028                                        IXGBE_MRQC_VMDQRT4TCEN;
4029                        else {
4030                                /* no matter the mode is DCB or DCB_RSS, just
4031                                 * set the MRQE to RSSXTCEN. RSS is controlled
4032                                 * by RSS_FIELD
4033                                 */
4034                                IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4035                                reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4036                                        IXGBE_MRQC_RTRSS4TCEN;
4037                        }
4038                }
4039                if (dcb_config->num_tcs.pg_tcs == 8) {
4040                        if (dcb_config->vt_mode)
4041                                reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4042                                        IXGBE_MRQC_VMDQRT8TCEN;
4043                        else {
4044                                IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
4045                                reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
4046                                        IXGBE_MRQC_RTRSS8TCEN;
4047                        }
4048                }
4049
4050                IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
4051
4052                if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4053                        /* Disable drop for all queues in VMDQ mode*/
4054                        for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4055                                IXGBE_WRITE_REG(hw, IXGBE_QDE,
4056                                                (IXGBE_QDE_WRITE |
4057                                                 (q << IXGBE_QDE_IDX_SHIFT)));
4058                } else {
4059                        /* Enable drop for all queues in SRIOV mode */
4060                        for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4061                                IXGBE_WRITE_REG(hw, IXGBE_QDE,
4062                                                (IXGBE_QDE_WRITE |
4063                                                 (q << IXGBE_QDE_IDX_SHIFT) |
4064                                                 IXGBE_QDE_ENABLE));
4065                }
4066        }
4067
4068        /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4069        vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4070        vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4071        IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4072
4073        /* VFTA - enable all vlan filters */
4074        for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4075                IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4076        }
4077
4078        /*
4079         * Configure Rx packet plane (recycle mode; WSP) and
4080         * enable arbiter
4081         */
4082        reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4083        IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4084}
4085
4086static void
4087ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4088                        uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4089{
4090        switch (hw->mac.type) {
4091        case ixgbe_mac_82598EB:
4092                ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4093                break;
4094        case ixgbe_mac_82599EB:
4095        case ixgbe_mac_X540:
4096        case ixgbe_mac_X550:
4097        case ixgbe_mac_X550EM_x:
4098        case ixgbe_mac_X550EM_a:
4099                ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4100                                                  tsa, map);
4101                break;
4102        default:
4103                break;
4104        }
4105}
4106
4107static void
4108ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4109                            uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4110{
4111        switch (hw->mac.type) {
4112        case ixgbe_mac_82598EB:
4113                ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4114                ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4115                break;
4116        case ixgbe_mac_82599EB:
4117        case ixgbe_mac_X540:
4118        case ixgbe_mac_X550:
4119        case ixgbe_mac_X550EM_x:
4120        case ixgbe_mac_X550EM_a:
4121                ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4122                ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4123                break;
4124        default:
4125                break;
4126        }
4127}
4128
4129#define DCB_RX_CONFIG  1
4130#define DCB_TX_CONFIG  1
4131#define DCB_TX_PB      1024
4132/**
4133 * ixgbe_dcb_hw_configure - Enable DCB and configure
4134 * general DCB in VT mode and non-VT mode parameters
4135 * @dev: pointer to rte_eth_dev structure
4136 * @dcb_config: pointer to ixgbe_dcb_config structure
4137 */
4138static int
4139ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4140                        struct ixgbe_dcb_config *dcb_config)
4141{
4142        int     ret = 0;
4143        uint8_t i, pfc_en, nb_tcs;
4144        uint16_t pbsize, rx_buffer_size;
4145        uint8_t config_dcb_rx = 0;
4146        uint8_t config_dcb_tx = 0;
4147        uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4148        uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4149        uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4150        uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4151        uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4152        struct ixgbe_dcb_tc_config *tc;
4153        uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4154                RTE_ETHER_CRC_LEN;
4155        struct ixgbe_hw *hw =
4156                        IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4157        struct ixgbe_bw_conf *bw_conf =
4158                IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4159
4160        switch (dev->data->dev_conf.rxmode.mq_mode) {
4161        case ETH_MQ_RX_VMDQ_DCB:
4162                dcb_config->vt_mode = true;
4163                if (hw->mac.type != ixgbe_mac_82598EB) {
4164                        config_dcb_rx = DCB_RX_CONFIG;
4165                        /*
4166                         *get dcb and VT rx configuration parameters
4167                         *from rte_eth_conf
4168                         */
4169                        ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4170                        /*Configure general VMDQ and DCB RX parameters*/
4171                        ixgbe_vmdq_dcb_configure(dev);
4172                }
4173                break;
4174        case ETH_MQ_RX_DCB:
4175        case ETH_MQ_RX_DCB_RSS:
4176                dcb_config->vt_mode = false;
4177                config_dcb_rx = DCB_RX_CONFIG;
4178                /* Get dcb TX configuration parameters from rte_eth_conf */
4179                ixgbe_dcb_rx_config(dev, dcb_config);
4180                /*Configure general DCB RX parameters*/
4181                ixgbe_dcb_rx_hw_config(dev, dcb_config);
4182                break;
4183        default:
4184                PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4185                break;
4186        }
4187        switch (dev->data->dev_conf.txmode.mq_mode) {
4188        case ETH_MQ_TX_VMDQ_DCB:
4189                dcb_config->vt_mode = true;
4190                config_dcb_tx = DCB_TX_CONFIG;
4191                /* get DCB and VT TX configuration parameters
4192                 * from rte_eth_conf
4193                 */
4194                ixgbe_dcb_vt_tx_config(dev, dcb_config);
4195                /*Configure general VMDQ and DCB TX parameters*/
4196                ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4197                break;
4198
4199        case ETH_MQ_TX_DCB:
4200                dcb_config->vt_mode = false;
4201                config_dcb_tx = DCB_TX_CONFIG;
4202                /*get DCB TX configuration parameters from rte_eth_conf*/
4203                ixgbe_dcb_tx_config(dev, dcb_config);
4204                /*Configure general DCB TX parameters*/
4205                ixgbe_dcb_tx_hw_config(dev, dcb_config);
4206                break;
4207        default:
4208                PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4209                break;
4210        }
4211
4212        nb_tcs = dcb_config->num_tcs.pfc_tcs;
4213        /* Unpack map */
4214        ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4215        if (nb_tcs == ETH_4_TCS) {
4216                /* Avoid un-configured priority mapping to TC0 */
4217                uint8_t j = 4;
4218                uint8_t mask = 0xFF;
4219
4220                for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4221                        mask = (uint8_t)(mask & (~(1 << map[i])));
4222                for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4223                        if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
4224                                map[j++] = i;
4225                        mask >>= 1;
4226                }
4227                /* Re-configure 4 TCs BW */
4228                for (i = 0; i < nb_tcs; i++) {
4229                        tc = &dcb_config->tc_config[i];
4230                        if (bw_conf->tc_num != nb_tcs)
4231                                tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4232                                        (uint8_t)(100 / nb_tcs);
4233                        tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4234                                                (uint8_t)(100 / nb_tcs);
4235                }
4236                for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4237                        tc = &dcb_config->tc_config[i];
4238                        tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4239                        tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4240                }
4241        } else {
4242                /* Re-configure 8 TCs BW */
4243                for (i = 0; i < nb_tcs; i++) {
4244                        tc = &dcb_config->tc_config[i];
4245                        if (bw_conf->tc_num != nb_tcs)
4246                                tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4247                                        (uint8_t)(100 / nb_tcs + (i & 1));
4248                        tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4249                                (uint8_t)(100 / nb_tcs + (i & 1));
4250                }
4251        }
4252
4253        switch (hw->mac.type) {
4254        case ixgbe_mac_X550:
4255        case ixgbe_mac_X550EM_x:
4256        case ixgbe_mac_X550EM_a:
4257                rx_buffer_size = X550_RX_BUFFER_SIZE;
4258                break;
4259        default:
4260                rx_buffer_size = NIC_RX_BUFFER_SIZE;
4261                break;
4262        }
4263
4264        if (config_dcb_rx) {
4265                /* Set RX buffer size */
4266                pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4267                uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4268
4269                for (i = 0; i < nb_tcs; i++) {
4270                        IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4271                }
4272                /* zero alloc all unused TCs */
4273                for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4274                        IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4275                }
4276        }
4277        if (config_dcb_tx) {
4278                /* Only support an equally distributed
4279                 *  Tx packet buffer strategy.
4280                 */
4281                uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4282                uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4283
4284                for (i = 0; i < nb_tcs; i++) {
4285                        IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4286                        IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4287                }
4288                /* Clear unused TCs, if any, to zero buffer size*/
4289                for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4290                        IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4291                        IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4292                }
4293        }
4294
4295        /*Calculates traffic class credits*/
4296        ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4297                                IXGBE_DCB_TX_CONFIG);
4298        ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4299                                IXGBE_DCB_RX_CONFIG);
4300
4301        if (config_dcb_rx) {
4302                /* Unpack CEE standard containers */
4303                ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4304                ixgbe_dcb_unpack_max_cee(dcb_config, max);
4305                ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4306                ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4307                /* Configure PG(ETS) RX */
4308                ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4309        }
4310
4311        if (config_dcb_tx) {
4312                /* Unpack CEE standard containers */
4313                ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4314                ixgbe_dcb_unpack_max_cee(dcb_config, max);
4315                ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4316                ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4317                /* Configure PG(ETS) TX */
4318                ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4319        }
4320
4321        /*Configure queue statistics registers*/
4322        ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4323
4324        /* Check if the PFC is supported */
4325        if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
4326                pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4327                for (i = 0; i < nb_tcs; i++) {
4328                        /*
4329                        * If the TC count is 8,and the default high_water is 48,
4330                        * the low_water is 16 as default.
4331                        */
4332                        hw->fc.high_water[i] = (pbsize * 3) / 4;
4333                        hw->fc.low_water[i] = pbsize / 4;
4334                        /* Enable pfc for this TC */
4335                        tc = &dcb_config->tc_config[i];
4336                        tc->pfc = ixgbe_dcb_pfc_enabled;
4337                }
4338                ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4339                if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4340                        pfc_en &= 0x0F;
4341                ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4342        }
4343
4344        return ret;
4345}
4346
4347/**
4348 * ixgbe_configure_dcb - Configure DCB  Hardware
4349 * @dev: pointer to rte_eth_dev
4350 */
4351void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4352{
4353        struct ixgbe_dcb_config *dcb_cfg =
4354                        IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4355        struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4356
4357        PMD_INIT_FUNC_TRACE();
4358
4359        /* check support mq_mode for DCB */
4360        if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4361            (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4362            (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4363                return;
4364
4365        if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4366                return;
4367
4368        /** Configure DCB hardware **/
4369        ixgbe_dcb_hw_configure(dev, dcb_cfg);
4370}
4371
4372/*
4373 * VMDq only support for 10 GbE NIC.
4374 */
4375static void
4376ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4377{
4378        struct rte_eth_vmdq_rx_conf *cfg;
4379        struct ixgbe_hw *hw;
4380        enum rte_eth_nb_pools num_pools;
4381        uint32_t mrqc, vt_ctl, vlanctrl;
4382        uint32_t vmolr = 0;
4383        int i;
4384
4385        PMD_INIT_FUNC_TRACE();
4386        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4387        cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4388        num_pools = cfg->nb_queue_pools;
4389
4390        ixgbe_rss_disable(dev);
4391
4392        /* MRQC: enable vmdq */
4393        mrqc = IXGBE_MRQC_VMDQEN;
4394        IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4395
4396        /* PFVTCTL: turn on virtualisation and set the default pool */
4397        vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4398        if (cfg->enable_default_pool)
4399                vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4400        else
4401                vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4402
4403        IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4404
4405        for (i = 0; i < (int)num_pools; i++) {
4406                vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4407                IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4408        }
4409
4410        /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4411        vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4412        vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4413        IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4414
4415        /* VFTA - enable all vlan filters */
4416        for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4417                IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4418
4419        /* VFRE: pool enabling for receive - 64 */
4420        IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4421        if (num_pools == ETH_64_POOLS)
4422                IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4423
4424        /*
4425         * MPSAR - allow pools to read specific mac addresses
4426         * In this case, all pools should be able to read from mac addr 0
4427         */
4428        IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4429        IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4430
4431        /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4432        for (i = 0; i < cfg->nb_pool_maps; i++) {
4433                /* set vlan id in VF register and set the valid bit */
4434                IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4435                                (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4436                /*
4437                 * Put the allowed pools in VFB reg. As we only have 16 or 64
4438                 * pools, we only need to use the first half of the register
4439                 * i.e. bits 0-31
4440                 */
4441                if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4442                        IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4443                                        (cfg->pool_map[i].pools & UINT32_MAX));
4444                else
4445                        IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4446                                        ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4447
4448        }
4449
4450        /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4451        if (cfg->enable_loop_back) {
4452                IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4453                for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4454                        IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4455        }
4456
4457        IXGBE_WRITE_FLUSH(hw);
4458}
4459
4460/*
4461 * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4462 * @hw: pointer to hardware structure
4463 */
4464static void
4465ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4466{
4467        uint32_t reg;
4468        uint32_t q;
4469
4470        PMD_INIT_FUNC_TRACE();
4471        /*PF VF Transmit Enable*/
4472        IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4473        IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4474
4475        /* Disable the Tx desc arbiter so that MTQC can be changed */
4476        reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4477        reg |= IXGBE_RTTDCS_ARBDIS;
4478        IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4479
4480        reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4481        IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4482
4483        /* Disable drop for all queues */
4484        for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4485                IXGBE_WRITE_REG(hw, IXGBE_QDE,
4486                  (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4487
4488        /* Enable the Tx desc arbiter */
4489        reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4490        reg &= ~IXGBE_RTTDCS_ARBDIS;
4491        IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4492
4493        IXGBE_WRITE_FLUSH(hw);
4494}
4495
4496static int __rte_cold
4497ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4498{
4499        struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4500        uint64_t dma_addr;
4501        unsigned int i;
4502
4503        /* Initialize software ring entries */
4504        for (i = 0; i < rxq->nb_rx_desc; i++) {
4505                volatile union ixgbe_adv_rx_desc *rxd;
4506                struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4507
4508                if (mbuf == NULL) {
4509                        PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4510                                     (unsigned) rxq->queue_id);
4511                        return -ENOMEM;
4512                }
4513
4514                mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4515                mbuf->port = rxq->port_id;
4516
4517                dma_addr =
4518                        rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4519                rxd = &rxq->rx_ring[i];
4520                rxd->read.hdr_addr = 0;
4521                rxd->read.pkt_addr = dma_addr;
4522                rxe[i].mbuf = mbuf;
4523        }
4524
4525        return 0;
4526}
4527
4528static int
4529ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4530{
4531        struct ixgbe_hw *hw;
4532        uint32_t mrqc;
4533
4534        ixgbe_rss_configure(dev);
4535
4536        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4537
4538        /* MRQC: enable VF RSS */
4539        mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4540        mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4541        switch (RTE_ETH_DEV_SRIOV(dev).active) {
4542        case ETH_64_POOLS:
4543                mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4544                break;
4545
4546        case ETH_32_POOLS:
4547                mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4548                break;
4549
4550        default:
4551                PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4552                return -EINVAL;
4553        }
4554
4555        IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4556
4557        return 0;
4558}
4559
4560static int
4561ixgbe_config_vf_default(struct rte_eth_dev *dev)
4562{
4563        struct ixgbe_hw *hw =
4564                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4565
4566        switch (RTE_ETH_DEV_SRIOV(dev).active) {
4567        case ETH_64_POOLS:
4568                IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4569                        IXGBE_MRQC_VMDQEN);
4570                break;
4571
4572        case ETH_32_POOLS:
4573                IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4574                        IXGBE_MRQC_VMDQRT4TCEN);
4575                break;
4576
4577        case ETH_16_POOLS:
4578                IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4579                        IXGBE_MRQC_VMDQRT8TCEN);
4580                break;
4581        default:
4582                PMD_INIT_LOG(ERR,
4583                        "invalid pool number in IOV mode");
4584                break;
4585        }
4586        return 0;
4587}
4588
4589static int
4590ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4591{
4592        struct ixgbe_hw *hw =
4593                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4594
4595        if (hw->mac.type == ixgbe_mac_82598EB)
4596                return 0;
4597
4598        if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4599                /*
4600                 * SRIOV inactive scheme
4601                 * any DCB/RSS w/o VMDq multi-queue setting
4602                 */
4603                switch (dev->data->dev_conf.rxmode.mq_mode) {
4604                case ETH_MQ_RX_RSS:
4605                case ETH_MQ_RX_DCB_RSS:
4606                case ETH_MQ_RX_VMDQ_RSS:
4607                        ixgbe_rss_configure(dev);
4608                        break;
4609
4610                case ETH_MQ_RX_VMDQ_DCB:
4611                        ixgbe_vmdq_dcb_configure(dev);
4612                        break;
4613
4614                case ETH_MQ_RX_VMDQ_ONLY:
4615                        ixgbe_vmdq_rx_hw_configure(dev);
4616                        break;
4617
4618                case ETH_MQ_RX_NONE:
4619                default:
4620                        /* if mq_mode is none, disable rss mode.*/
4621                        ixgbe_rss_disable(dev);
4622                        break;
4623                }
4624        } else {
4625                /* SRIOV active scheme
4626                 * Support RSS together with SRIOV.
4627                 */
4628                switch (dev->data->dev_conf.rxmode.mq_mode) {
4629                case ETH_MQ_RX_RSS:
4630                case ETH_MQ_RX_VMDQ_RSS:
4631                        ixgbe_config_vf_rss(dev);
4632                        break;
4633                case ETH_MQ_RX_VMDQ_DCB:
4634                case ETH_MQ_RX_DCB:
4635                /* In SRIOV, the configuration is the same as VMDq case */
4636                        ixgbe_vmdq_dcb_configure(dev);
4637                        break;
4638                /* DCB/RSS together with SRIOV is not supported */
4639                case ETH_MQ_RX_VMDQ_DCB_RSS:
4640                case ETH_MQ_RX_DCB_RSS:
4641                        PMD_INIT_LOG(ERR,
4642                                "Could not support DCB/RSS with VMDq & SRIOV");
4643                        return -1;
4644                default:
4645                        ixgbe_config_vf_default(dev);
4646                        break;
4647                }
4648        }
4649
4650        return 0;
4651}
4652
4653static int
4654ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4655{
4656        struct ixgbe_hw *hw =
4657                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4658        uint32_t mtqc;
4659        uint32_t rttdcs;
4660
4661        if (hw->mac.type == ixgbe_mac_82598EB)
4662                return 0;
4663
4664        /* disable arbiter before setting MTQC */
4665        rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4666        rttdcs |= IXGBE_RTTDCS_ARBDIS;
4667        IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4668
4669        if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4670                /*
4671                 * SRIOV inactive scheme
4672                 * any DCB w/o VMDq multi-queue setting
4673                 */
4674                if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4675                        ixgbe_vmdq_tx_hw_configure(hw);
4676                else {
4677                        mtqc = IXGBE_MTQC_64Q_1PB;
4678                        IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4679                }
4680        } else {
4681                switch (RTE_ETH_DEV_SRIOV(dev).active) {
4682
4683                /*
4684                 * SRIOV active scheme
4685                 * FIXME if support DCB together with VMDq & SRIOV
4686                 */
4687                case ETH_64_POOLS:
4688                        mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4689                        break;
4690                case ETH_32_POOLS:
4691                        mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4692                        break;
4693                case ETH_16_POOLS:
4694                        mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4695                                IXGBE_MTQC_8TC_8TQ;
4696                        break;
4697                default:
4698                        mtqc = IXGBE_MTQC_64Q_1PB;
4699                        PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4700                }
4701                IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4702        }
4703
4704        /* re-enable arbiter */
4705        rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4706        IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4707
4708        return 0;
4709}
4710
4711/**
4712 * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4713 *
4714 * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4715 * spec rev. 3.0 chapter 8.2.3.8.13.
4716 *
4717 * @pool Memory pool of the Rx queue
4718 */
4719static inline uint32_t
4720ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4721{
4722        struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4723
4724        /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4725        uint16_t maxdesc =
4726                RTE_IPV4_MAX_PKT_LEN /
4727                        (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4728
4729        if (maxdesc >= 16)
4730                return IXGBE_RSCCTL_MAXDESC_16;
4731        else if (maxdesc >= 8)
4732                return IXGBE_RSCCTL_MAXDESC_8;
4733        else if (maxdesc >= 4)
4734                return IXGBE_RSCCTL_MAXDESC_4;
4735        else
4736                return IXGBE_RSCCTL_MAXDESC_1;
4737}
4738
4739/**
4740 * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4741 * interrupt
4742 *
4743 * (Taken from FreeBSD tree)
4744 * (yes this is all very magic and confusing :)
4745 *
4746 * @dev port handle
4747 * @entry the register array entry
4748 * @vector the MSIX vector for this queue
4749 * @type RX/TX/MISC
4750 */
4751static void
4752ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4753{
4754        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4755        u32 ivar, index;
4756
4757        vector |= IXGBE_IVAR_ALLOC_VAL;
4758
4759        switch (hw->mac.type) {
4760
4761        case ixgbe_mac_82598EB:
4762                if (type == -1)
4763                        entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4764                else
4765                        entry += (type * 64);
4766                index = (entry >> 2) & 0x1F;
4767                ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4768                ivar &= ~(0xFF << (8 * (entry & 0x3)));
4769                ivar |= (vector << (8 * (entry & 0x3)));
4770                IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4771                break;
4772
4773        case ixgbe_mac_82599EB:
4774        case ixgbe_mac_X540:
4775                if (type == -1) { /* MISC IVAR */
4776                        index = (entry & 1) * 8;
4777                        ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4778                        ivar &= ~(0xFF << index);
4779                        ivar |= (vector << index);
4780                        IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4781                } else {        /* RX/TX IVARS */
4782                        index = (16 * (entry & 1)) + (8 * type);
4783                        ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4784                        ivar &= ~(0xFF << index);
4785                        ivar |= (vector << index);
4786                        IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4787                }
4788
4789                break;
4790
4791        default:
4792                break;
4793        }
4794}
4795
4796void __rte_cold
4797ixgbe_set_rx_function(struct rte_eth_dev *dev)
4798{
4799        uint16_t i, rx_using_sse;
4800        struct ixgbe_adapter *adapter = dev->data->dev_private;
4801
4802        /*
4803         * In order to allow Vector Rx there are a few configuration
4804         * conditions to be met and Rx Bulk Allocation should be allowed.
4805         */
4806        if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4807            !adapter->rx_bulk_alloc_allowed ||
4808                        rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) {
4809                PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4810                                    "preconditions",
4811                             dev->data->port_id);
4812
4813                adapter->rx_vec_allowed = false;
4814        }
4815
4816        /*
4817         * Initialize the appropriate LRO callback.
4818         *
4819         * If all queues satisfy the bulk allocation preconditions
4820         * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4821         * Otherwise use a single allocation version.
4822         */
4823        if (dev->data->lro) {
4824                if (adapter->rx_bulk_alloc_allowed) {
4825                        PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4826                                           "allocation version");
4827                        dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4828                } else {
4829                        PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4830                                           "allocation version");
4831                        dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4832                }
4833        } else if (dev->data->scattered_rx) {
4834                /*
4835                 * Set the non-LRO scattered callback: there are Vector and
4836                 * single allocation versions.
4837                 */
4838                if (adapter->rx_vec_allowed) {
4839                        PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4840                                            "callback (port=%d).",
4841                                     dev->data->port_id);
4842
4843                        dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4844                } else if (adapter->rx_bulk_alloc_allowed) {
4845                        PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4846                                           "allocation callback (port=%d).",
4847                                     dev->data->port_id);
4848                        dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4849                } else {
4850                        PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4851                                            "single allocation) "
4852                                            "Scattered Rx callback "
4853                                            "(port=%d).",
4854                                     dev->data->port_id);
4855
4856                        dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4857                }
4858        /*
4859         * Below we set "simple" callbacks according to port/queues parameters.
4860         * If parameters allow we are going to choose between the following
4861         * callbacks:
4862         *    - Vector
4863         *    - Bulk Allocation
4864         *    - Single buffer allocation (the simplest one)
4865         */
4866        } else if (adapter->rx_vec_allowed) {
4867                PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4868                                    "burst size no less than %d (port=%d).",
4869                             RTE_IXGBE_DESCS_PER_LOOP,
4870                             dev->data->port_id);
4871
4872                dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4873        } else if (adapter->rx_bulk_alloc_allowed) {
4874                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4875                                    "satisfied. Rx Burst Bulk Alloc function "
4876                                    "will be used on port=%d.",
4877                             dev->data->port_id);
4878
4879                dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4880        } else {
4881                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4882                                    "satisfied, or Scattered Rx is requested "
4883                                    "(port=%d).",
4884                             dev->data->port_id);
4885
4886                dev->rx_pkt_burst = ixgbe_recv_pkts;
4887        }
4888
4889        /* Propagate information about RX function choice through all queues. */
4890
4891        rx_using_sse =
4892                (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4893                dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4894
4895        for (i = 0; i < dev->data->nb_rx_queues; i++) {
4896                struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4897
4898                rxq->rx_using_sse = rx_using_sse;
4899#ifdef RTE_LIB_SECURITY
4900                rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4901                                DEV_RX_OFFLOAD_SECURITY);
4902#endif
4903        }
4904}
4905
4906/**
4907 * ixgbe_set_rsc - configure RSC related port HW registers
4908 *
4909 * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4910 * of 82599 Spec (x540 configuration is virtually the same).
4911 *
4912 * @dev port handle
4913 *
4914 * Returns 0 in case of success or a non-zero error code
4915 */
4916static int
4917ixgbe_set_rsc(struct rte_eth_dev *dev)
4918{
4919        struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4920        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4921        struct rte_eth_dev_info dev_info = { 0 };
4922        bool rsc_capable = false;
4923        uint16_t i;
4924        uint32_t rdrxctl;
4925        uint32_t rfctl;
4926
4927        /* Sanity check */
4928        dev->dev_ops->dev_infos_get(dev, &dev_info);
4929        if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4930                rsc_capable = true;
4931
4932        if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4933                PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4934                                   "support it");
4935                return -EINVAL;
4936        }
4937
4938        /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4939
4940        if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
4941             (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4942                /*
4943                 * According to chapter of 4.6.7.2.1 of the Spec Rev.
4944                 * 3.0 RSC configuration requires HW CRC stripping being
4945                 * enabled. If user requested both HW CRC stripping off
4946                 * and RSC on - return an error.
4947                 */
4948                PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4949                                    "is disabled");
4950                return -EINVAL;
4951        }
4952
4953        /* RFCTL configuration  */
4954        rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4955        if ((rsc_capable) && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4956                rfctl &= ~IXGBE_RFCTL_RSC_DIS;
4957        else
4958                rfctl |= IXGBE_RFCTL_RSC_DIS;
4959        /* disable NFS filtering */
4960        rfctl |= IXGBE_RFCTL_NFSW_DIS | IXGBE_RFCTL_NFSR_DIS;
4961        IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4962
4963        /* If LRO hasn't been requested - we are done here. */
4964        if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4965                return 0;
4966
4967        /* Set RDRXCTL.RSCACKC bit */
4968        rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4969        rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4970        IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4971
4972        /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4973        for (i = 0; i < dev->data->nb_rx_queues; i++) {
4974                struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4975                uint32_t srrctl =
4976                        IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4977                uint32_t rscctl =
4978                        IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4979                uint32_t psrtype =
4980                        IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4981                uint32_t eitr =
4982                        IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4983
4984                /*
4985                 * ixgbe PMD doesn't support header-split at the moment.
4986                 *
4987                 * Following the 4.6.7.2.1 chapter of the 82599/x540
4988                 * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4989                 * should be configured even if header split is not
4990                 * enabled. We will configure it 128 bytes following the
4991                 * recommendation in the spec.
4992                 */
4993                srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4994                srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4995                                            IXGBE_SRRCTL_BSIZEHDR_MASK;
4996
4997                /*
4998                 * TODO: Consider setting the Receive Descriptor Minimum
4999                 * Threshold Size for an RSC case. This is not an obviously
5000                 * beneficiary option but the one worth considering...
5001                 */
5002
5003                rscctl |= IXGBE_RSCCTL_RSCEN;
5004                rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
5005                psrtype |= IXGBE_PSRTYPE_TCPHDR;
5006
5007                /*
5008                 * RSC: Set ITR interval corresponding to 2K ints/s.
5009                 *
5010                 * Full-sized RSC aggregations for a 10Gb/s link will
5011                 * arrive at about 20K aggregation/s rate.
5012                 *
5013                 * 2K inst/s rate will make only 10% of the
5014                 * aggregations to be closed due to the interrupt timer
5015                 * expiration for a streaming at wire-speed case.
5016                 *
5017                 * For a sparse streaming case this setting will yield
5018                 * at most 500us latency for a single RSC aggregation.
5019                 */
5020                eitr &= ~IXGBE_EITR_ITR_INT_MASK;
5021                eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
5022                eitr |= IXGBE_EITR_CNT_WDIS;
5023
5024                IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5025                IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
5026                IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
5027                IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
5028
5029                /*
5030                 * RSC requires the mapping of the queue to the
5031                 * interrupt vector.
5032                 */
5033                ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
5034        }
5035
5036        dev->data->lro = 1;
5037
5038        PMD_INIT_LOG(DEBUG, "enabling LRO mode");
5039
5040        return 0;
5041}
5042
5043/*
5044 * Initializes Receive Unit.
5045 */
5046int __rte_cold
5047ixgbe_dev_rx_init(struct rte_eth_dev *dev)
5048{
5049        struct ixgbe_hw     *hw;
5050        struct ixgbe_rx_queue *rxq;
5051        uint64_t bus_addr;
5052        uint32_t rxctrl;
5053        uint32_t fctrl;
5054        uint32_t hlreg0;
5055        uint32_t maxfrs;
5056        uint32_t srrctl;
5057        uint32_t rdrxctl;
5058        uint32_t rxcsum;
5059        uint16_t buf_size;
5060        uint16_t i;
5061        struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
5062        int rc;
5063
5064        PMD_INIT_FUNC_TRACE();
5065        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5066
5067        /*
5068         * Make sure receives are disabled while setting
5069         * up the RX context (registers, descriptor rings, etc.).
5070         */
5071        rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5072        IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5073
5074        /* Enable receipt of broadcasted frames */
5075        fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5076        fctrl |= IXGBE_FCTRL_BAM;
5077        fctrl |= IXGBE_FCTRL_DPF;
5078        fctrl |= IXGBE_FCTRL_PMCF;
5079        IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5080
5081        /*
5082         * Configure CRC stripping, if any.
5083         */
5084        hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5085        if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5086                hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5087        else
5088                hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5089
5090        /*
5091         * Configure jumbo frame support, if any.
5092         */
5093        if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
5094                hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5095                maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5096                maxfrs &= 0x0000FFFF;
5097                maxfrs |= (rx_conf->max_rx_pkt_len << 16);
5098                IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5099        } else
5100                hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5101
5102        /*
5103         * If loopback mode is configured, set LPBK bit.
5104         */
5105        if (dev->data->dev_conf.lpbk_mode != 0) {
5106                rc = ixgbe_check_supported_loopback_mode(dev);
5107                if (rc < 0) {
5108                        PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5109                        return rc;
5110                }
5111                hlreg0 |= IXGBE_HLREG0_LPBK;
5112        } else {
5113                hlreg0 &= ~IXGBE_HLREG0_LPBK;
5114        }
5115
5116        IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5117
5118        /*
5119         * Assume no header split and no VLAN strip support
5120         * on any Rx queue first .
5121         */
5122        rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5123        /* Setup RX queues */
5124        for (i = 0; i < dev->data->nb_rx_queues; i++) {
5125                rxq = dev->data->rx_queues[i];
5126
5127                /*
5128                 * Reset crc_len in case it was changed after queue setup by a
5129                 * call to configure.
5130                 */
5131                if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5132                        rxq->crc_len = RTE_ETHER_CRC_LEN;
5133                else
5134                        rxq->crc_len = 0;
5135
5136                /* Setup the Base and Length of the Rx Descriptor Rings */
5137                bus_addr = rxq->rx_ring_phys_addr;
5138                IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5139                                (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5140                IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5141                                (uint32_t)(bus_addr >> 32));
5142                IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5143                                rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5144                IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5145                IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5146
5147                /* Configure the SRRCTL register */
5148                srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5149
5150                /* Set if packets are dropped when no descriptors available */
5151                if (rxq->drop_en)
5152                        srrctl |= IXGBE_SRRCTL_DROP_EN;
5153
5154                /*
5155                 * Configure the RX buffer size in the BSIZEPACKET field of
5156                 * the SRRCTL register of the queue.
5157                 * The value is in 1 KB resolution. Valid values can be from
5158                 * 1 KB to 16 KB.
5159                 */
5160                buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5161                        RTE_PKTMBUF_HEADROOM);
5162                srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5163                           IXGBE_SRRCTL_BSIZEPKT_MASK);
5164
5165                IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5166
5167                buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5168                                       IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5169
5170                /* It adds dual VLAN length for supporting dual VLAN */
5171                if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5172                                            2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5173                        dev->data->scattered_rx = 1;
5174                if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5175                        rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5176        }
5177
5178        if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
5179                dev->data->scattered_rx = 1;
5180
5181        /*
5182         * Device configured with multiple RX queues.
5183         */
5184        ixgbe_dev_mq_rx_configure(dev);
5185
5186        /*
5187         * Setup the Checksum Register.
5188         * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5189         * Enable IP/L4 checkum computation by hardware if requested to do so.
5190         */
5191        rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5192        rxcsum |= IXGBE_RXCSUM_PCSD;
5193        if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
5194                rxcsum |= IXGBE_RXCSUM_IPPCSE;
5195        else
5196                rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5197
5198        IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5199
5200        if (hw->mac.type == ixgbe_mac_82599EB ||
5201            hw->mac.type == ixgbe_mac_X540) {
5202                rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5203                if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5204                        rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5205                else
5206                        rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5207                rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5208                IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5209        }
5210
5211        rc = ixgbe_set_rsc(dev);
5212        if (rc)
5213                return rc;
5214
5215        ixgbe_set_rx_function(dev);
5216
5217        return 0;
5218}
5219
5220/*
5221 * Initializes Transmit Unit.
5222 */
5223void __rte_cold
5224ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5225{
5226        struct ixgbe_hw     *hw;
5227        struct ixgbe_tx_queue *txq;
5228        uint64_t bus_addr;
5229        uint32_t hlreg0;
5230        uint32_t txctrl;
5231        uint16_t i;
5232
5233        PMD_INIT_FUNC_TRACE();
5234        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5235
5236        /* Enable TX CRC (checksum offload requirement) and hw padding
5237         * (TSO requirement)
5238         */
5239        hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5240        hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5241        IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5242
5243        /* Setup the Base and Length of the Tx Descriptor Rings */
5244        for (i = 0; i < dev->data->nb_tx_queues; i++) {
5245                txq = dev->data->tx_queues[i];
5246
5247                bus_addr = txq->tx_ring_phys_addr;
5248                IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5249                                (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5250                IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5251                                (uint32_t)(bus_addr >> 32));
5252                IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5253                                txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5254                /* Setup the HW Tx Head and TX Tail descriptor pointers */
5255                IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5256                IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5257
5258                /*
5259                 * Disable Tx Head Writeback RO bit, since this hoses
5260                 * bookkeeping if things aren't delivered in order.
5261                 */
5262                switch (hw->mac.type) {
5263                case ixgbe_mac_82598EB:
5264                        txctrl = IXGBE_READ_REG(hw,
5265                                                IXGBE_DCA_TXCTRL(txq->reg_idx));
5266                        txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5267                        IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5268                                        txctrl);
5269                        break;
5270
5271                case ixgbe_mac_82599EB:
5272                case ixgbe_mac_X540:
5273                case ixgbe_mac_X550:
5274                case ixgbe_mac_X550EM_x:
5275                case ixgbe_mac_X550EM_a:
5276                default:
5277                        txctrl = IXGBE_READ_REG(hw,
5278                                                IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5279                        txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5280                        IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5281                                        txctrl);
5282                        break;
5283                }
5284        }
5285
5286        /* Device configured with multiple TX queues. */
5287        ixgbe_dev_mq_tx_configure(dev);
5288}
5289
5290/*
5291 * Check if requested loopback mode is supported
5292 */
5293int
5294ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5295{
5296        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5297
5298        if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5299                if (hw->mac.type == ixgbe_mac_82599EB ||
5300                     hw->mac.type == ixgbe_mac_X540 ||
5301                     hw->mac.type == ixgbe_mac_X550 ||
5302                     hw->mac.type == ixgbe_mac_X550EM_x ||
5303                     hw->mac.type == ixgbe_mac_X550EM_a)
5304                        return 0;
5305
5306        return -ENOTSUP;
5307}
5308
5309/*
5310 * Set up link for 82599 loopback mode Tx->Rx.
5311 */
5312static inline void __rte_cold
5313ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5314{
5315        PMD_INIT_FUNC_TRACE();
5316
5317        if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5318                if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5319                                IXGBE_SUCCESS) {
5320                        PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5321                        /* ignore error */
5322                        return;
5323                }
5324        }
5325
5326        /* Restart link */
5327        IXGBE_WRITE_REG(hw,
5328                        IXGBE_AUTOC,
5329                        IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5330        ixgbe_reset_pipeline_82599(hw);
5331
5332        hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5333        msec_delay(50);
5334}
5335
5336
5337/*
5338 * Start Transmit and Receive Units.
5339 */
5340int __rte_cold
5341ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5342{
5343        struct ixgbe_hw     *hw;
5344        struct ixgbe_tx_queue *txq;
5345        struct ixgbe_rx_queue *rxq;
5346        uint32_t txdctl;
5347        uint32_t dmatxctl;
5348        uint32_t rxctrl;
5349        uint16_t i;
5350        int ret = 0;
5351
5352        PMD_INIT_FUNC_TRACE();
5353        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5354
5355        for (i = 0; i < dev->data->nb_tx_queues; i++) {
5356                txq = dev->data->tx_queues[i];
5357                /* Setup Transmit Threshold Registers */
5358                txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5359                txdctl |= txq->pthresh & 0x7F;
5360                txdctl |= ((txq->hthresh & 0x7F) << 8);
5361                txdctl |= ((txq->wthresh & 0x7F) << 16);
5362                IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5363        }
5364
5365        if (hw->mac.type != ixgbe_mac_82598EB) {
5366                dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5367                dmatxctl |= IXGBE_DMATXCTL_TE;
5368                IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5369        }
5370
5371        for (i = 0; i < dev->data->nb_tx_queues; i++) {
5372                txq = dev->data->tx_queues[i];
5373                if (!txq->tx_deferred_start) {
5374                        ret = ixgbe_dev_tx_queue_start(dev, i);
5375                        if (ret < 0)
5376                                return ret;
5377                }
5378        }
5379
5380        for (i = 0; i < dev->data->nb_rx_queues; i++) {
5381                rxq = dev->data->rx_queues[i];
5382                if (!rxq->rx_deferred_start) {
5383                        ret = ixgbe_dev_rx_queue_start(dev, i);
5384                        if (ret < 0)
5385                                return ret;
5386                }
5387        }
5388
5389        /* Enable Receive engine */
5390        rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5391        if (hw->mac.type == ixgbe_mac_82598EB)
5392                rxctrl |= IXGBE_RXCTRL_DMBYPS;
5393        rxctrl |= IXGBE_RXCTRL_RXEN;
5394        hw->mac.ops.enable_rx_dma(hw, rxctrl);
5395
5396        /* If loopback mode is enabled, set up the link accordingly */
5397        if (dev->data->dev_conf.lpbk_mode != 0) {
5398                if (hw->mac.type == ixgbe_mac_82599EB)
5399                        ixgbe_setup_loopback_link_82599(hw);
5400                else if (hw->mac.type == ixgbe_mac_X540 ||
5401                     hw->mac.type == ixgbe_mac_X550 ||
5402                     hw->mac.type == ixgbe_mac_X550EM_x ||
5403                     hw->mac.type == ixgbe_mac_X550EM_a)
5404                        ixgbe_setup_loopback_link_x540_x550(hw, true);
5405        }
5406
5407#ifdef RTE_LIB_SECURITY
5408        if ((dev->data->dev_conf.rxmode.offloads &
5409                        DEV_RX_OFFLOAD_SECURITY) ||
5410                (dev->data->dev_conf.txmode.offloads &
5411                        DEV_TX_OFFLOAD_SECURITY)) {
5412                ret = ixgbe_crypto_enable_ipsec(dev);
5413                if (ret != 0) {
5414                        PMD_DRV_LOG(ERR,
5415                                    "ixgbe_crypto_enable_ipsec fails with %d.",
5416                                    ret);
5417                        return ret;
5418                }
5419        }
5420#endif
5421
5422        return 0;
5423}
5424
5425/*
5426 * Start Receive Units for specified queue.
5427 */
5428int __rte_cold
5429ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5430{
5431        struct ixgbe_hw     *hw;
5432        struct ixgbe_rx_queue *rxq;
5433        uint32_t rxdctl;
5434        int poll_ms;
5435
5436        PMD_INIT_FUNC_TRACE();
5437        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5438
5439        rxq = dev->data->rx_queues[rx_queue_id];
5440
5441        /* Allocate buffers for descriptor rings */
5442        if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5443                PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5444                             rx_queue_id);
5445                return -1;
5446        }
5447        rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5448        rxdctl |= IXGBE_RXDCTL_ENABLE;
5449        IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5450
5451        /* Wait until RX Enable ready */
5452        poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5453        do {
5454                rte_delay_ms(1);
5455                rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5456        } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5457        if (!poll_ms)
5458                PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5459        rte_wmb();
5460        IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5461        IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5462        dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5463
5464        return 0;
5465}
5466
5467/*
5468 * Stop Receive Units for specified queue.
5469 */
5470int __rte_cold
5471ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5472{
5473        struct ixgbe_hw     *hw;
5474        struct ixgbe_adapter *adapter = dev->data->dev_private;
5475        struct ixgbe_rx_queue *rxq;
5476        uint32_t rxdctl;
5477        int poll_ms;
5478
5479        PMD_INIT_FUNC_TRACE();
5480        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5481
5482        rxq = dev->data->rx_queues[rx_queue_id];
5483
5484        rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5485        rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5486        IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5487
5488        /* Wait until RX Enable bit clear */
5489        poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5490        do {
5491                rte_delay_ms(1);
5492                rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5493        } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5494        if (!poll_ms)
5495                PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5496
5497        rte_delay_us(RTE_IXGBE_WAIT_100_US);
5498
5499        ixgbe_rx_queue_release_mbufs(rxq);
5500        ixgbe_reset_rx_queue(adapter, rxq);
5501        dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5502
5503        return 0;
5504}
5505
5506
5507/*
5508 * Start Transmit Units for specified queue.
5509 */
5510int __rte_cold
5511ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5512{
5513        struct ixgbe_hw     *hw;
5514        struct ixgbe_tx_queue *txq;
5515        uint32_t txdctl;
5516        int poll_ms;
5517
5518        PMD_INIT_FUNC_TRACE();
5519        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5520
5521        txq = dev->data->tx_queues[tx_queue_id];
5522        IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5523        txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5524        txdctl |= IXGBE_TXDCTL_ENABLE;
5525        IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5526
5527        /* Wait until TX Enable ready */
5528        if (hw->mac.type == ixgbe_mac_82599EB) {
5529                poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5530                do {
5531                        rte_delay_ms(1);
5532                        txdctl = IXGBE_READ_REG(hw,
5533                                IXGBE_TXDCTL(txq->reg_idx));
5534                } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5535                if (!poll_ms)
5536                        PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5537                                tx_queue_id);
5538        }
5539        rte_wmb();
5540        IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5541        dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5542
5543        return 0;
5544}
5545
5546/*
5547 * Stop Transmit Units for specified queue.
5548 */
5549int __rte_cold
5550ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5551{
5552        struct ixgbe_hw     *hw;
5553        struct ixgbe_tx_queue *txq;
5554        uint32_t txdctl;
5555        uint32_t txtdh, txtdt;
5556        int poll_ms;
5557
5558        PMD_INIT_FUNC_TRACE();
5559        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5560
5561        txq = dev->data->tx_queues[tx_queue_id];
5562
5563        /* Wait until TX queue is empty */
5564        if (hw->mac.type == ixgbe_mac_82599EB) {
5565                poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5566                do {
5567                        rte_delay_us(RTE_IXGBE_WAIT_100_US);
5568                        txtdh = IXGBE_READ_REG(hw,
5569                                               IXGBE_TDH(txq->reg_idx));
5570                        txtdt = IXGBE_READ_REG(hw,
5571                                               IXGBE_TDT(txq->reg_idx));
5572                } while (--poll_ms && (txtdh != txtdt));
5573                if (!poll_ms)
5574                        PMD_INIT_LOG(ERR,
5575                                "Tx Queue %d is not empty when stopping.",
5576                                tx_queue_id);
5577        }
5578
5579        txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5580        txdctl &= ~IXGBE_TXDCTL_ENABLE;
5581        IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5582
5583        /* Wait until TX Enable bit clear */
5584        if (hw->mac.type == ixgbe_mac_82599EB) {
5585                poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5586                do {
5587                        rte_delay_ms(1);
5588                        txdctl = IXGBE_READ_REG(hw,
5589                                                IXGBE_TXDCTL(txq->reg_idx));
5590                } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5591                if (!poll_ms)
5592                        PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5593                                tx_queue_id);
5594        }
5595
5596        if (txq->ops != NULL) {
5597                txq->ops->release_mbufs(txq);
5598                txq->ops->reset(txq);
5599        }
5600        dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5601
5602        return 0;
5603}
5604
5605void
5606ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5607        struct rte_eth_rxq_info *qinfo)
5608{
5609        struct ixgbe_rx_queue *rxq;
5610
5611        rxq = dev->data->rx_queues[queue_id];
5612
5613        qinfo->mp = rxq->mb_pool;
5614        qinfo->scattered_rx = dev->data->scattered_rx;
5615        qinfo->nb_desc = rxq->nb_rx_desc;
5616
5617        qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5618        qinfo->conf.rx_drop_en = rxq->drop_en;
5619        qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5620        qinfo->conf.offloads = rxq->offloads;
5621}
5622
5623void
5624ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5625        struct rte_eth_txq_info *qinfo)
5626{
5627        struct ixgbe_tx_queue *txq;
5628
5629        txq = dev->data->tx_queues[queue_id];
5630
5631        qinfo->nb_desc = txq->nb_tx_desc;
5632
5633        qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5634        qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5635        qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5636
5637        qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5638        qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5639        qinfo->conf.offloads = txq->offloads;
5640        qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5641}
5642
5643/*
5644 * [VF] Initializes Receive Unit.
5645 */
5646int __rte_cold
5647ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5648{
5649        struct ixgbe_hw     *hw;
5650        struct ixgbe_rx_queue *rxq;
5651        struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5652        uint64_t bus_addr;
5653        uint32_t srrctl, psrtype = 0;
5654        uint16_t buf_size;
5655        uint16_t i;
5656        int ret;
5657
5658        PMD_INIT_FUNC_TRACE();
5659        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5660
5661        if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5662                PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5663                        "it should be power of 2");
5664                return -1;
5665        }
5666
5667        if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5668                PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5669                        "it should be equal to or less than %d",
5670                        hw->mac.max_rx_queues);
5671                return -1;
5672        }
5673
5674        /*
5675         * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5676         * disables the VF receipt of packets if the PF MTU is > 1500.
5677         * This is done to deal with 82599 limitations that imposes
5678         * the PF and all VFs to share the same MTU.
5679         * Then, the PF driver enables again the VF receipt of packet when
5680         * the VF driver issues a IXGBE_VF_SET_LPE request.
5681         * In the meantime, the VF device cannot be used, even if the VF driver
5682         * and the Guest VM network stack are ready to accept packets with a
5683         * size up to the PF MTU.
5684         * As a work-around to this PF behaviour, force the call to
5685         * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5686         * VF packets received can work in all cases.
5687         */
5688        if (ixgbevf_rlpml_set_vf(hw,
5689            (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len)) {
5690                PMD_INIT_LOG(ERR, "Set max packet length to %d failed.",
5691                             dev->data->dev_conf.rxmode.max_rx_pkt_len);
5692                return -EINVAL;
5693        }
5694
5695        /*
5696         * Assume no header split and no VLAN strip support
5697         * on any Rx queue first .
5698         */
5699        rxmode->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5700        /* Setup RX queues */
5701        for (i = 0; i < dev->data->nb_rx_queues; i++) {
5702                rxq = dev->data->rx_queues[i];
5703
5704                /* Allocate buffers for descriptor rings */
5705                ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5706                if (ret)
5707                        return ret;
5708
5709                /* Setup the Base and Length of the Rx Descriptor Rings */
5710                bus_addr = rxq->rx_ring_phys_addr;
5711
5712                IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5713                                (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5714                IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5715                                (uint32_t)(bus_addr >> 32));
5716                IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5717                                rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5718                IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5719                IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5720
5721
5722                /* Configure the SRRCTL register */
5723                srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5724
5725                /* Set if packets are dropped when no descriptors available */
5726                if (rxq->drop_en)
5727                        srrctl |= IXGBE_SRRCTL_DROP_EN;
5728
5729                /*
5730                 * Configure the RX buffer size in the BSIZEPACKET field of
5731                 * the SRRCTL register of the queue.
5732                 * The value is in 1 KB resolution. Valid values can be from
5733                 * 1 KB to 16 KB.
5734                 */
5735                buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5736                        RTE_PKTMBUF_HEADROOM);
5737                srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5738                           IXGBE_SRRCTL_BSIZEPKT_MASK);
5739
5740                /*
5741                 * VF modification to write virtual function SRRCTL register
5742                 */
5743                IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5744
5745                buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5746                                       IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5747
5748                if (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ||
5749                    /* It adds dual VLAN length for supporting dual VLAN */
5750                    (rxmode->max_rx_pkt_len +
5751                                2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5752                        if (!dev->data->scattered_rx)
5753                                PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5754                        dev->data->scattered_rx = 1;
5755                }
5756
5757                if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5758                        rxmode->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5759        }
5760
5761        /* Set RQPL for VF RSS according to max Rx queue */
5762        psrtype |= (dev->data->nb_rx_queues >> 1) <<
5763                IXGBE_PSRTYPE_RQPL_SHIFT;
5764        IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5765
5766        ixgbe_set_rx_function(dev);
5767
5768        return 0;
5769}
5770
5771/*
5772 * [VF] Initializes Transmit Unit.
5773 */
5774void __rte_cold
5775ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5776{
5777        struct ixgbe_hw     *hw;
5778        struct ixgbe_tx_queue *txq;
5779        uint64_t bus_addr;
5780        uint32_t txctrl;
5781        uint16_t i;
5782
5783        PMD_INIT_FUNC_TRACE();
5784        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5785
5786        /* Setup the Base and Length of the Tx Descriptor Rings */
5787        for (i = 0; i < dev->data->nb_tx_queues; i++) {
5788                txq = dev->data->tx_queues[i];
5789                bus_addr = txq->tx_ring_phys_addr;
5790                IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5791                                (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5792                IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5793                                (uint32_t)(bus_addr >> 32));
5794                IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5795                                txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5796                /* Setup the HW Tx Head and TX Tail descriptor pointers */
5797                IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5798                IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5799
5800                /*
5801                 * Disable Tx Head Writeback RO bit, since this hoses
5802                 * bookkeeping if things aren't delivered in order.
5803                 */
5804                txctrl = IXGBE_READ_REG(hw,
5805                                IXGBE_VFDCA_TXCTRL(i));
5806                txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5807                IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5808                                txctrl);
5809        }
5810}
5811
5812/*
5813 * [VF] Start Transmit and Receive Units.
5814 */
5815void __rte_cold
5816ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5817{
5818        struct ixgbe_hw     *hw;
5819        struct ixgbe_tx_queue *txq;
5820        struct ixgbe_rx_queue *rxq;
5821        uint32_t txdctl;
5822        uint32_t rxdctl;
5823        uint16_t i;
5824        int poll_ms;
5825
5826        PMD_INIT_FUNC_TRACE();
5827        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5828
5829        for (i = 0; i < dev->data->nb_tx_queues; i++) {
5830                txq = dev->data->tx_queues[i];
5831                /* Setup Transmit Threshold Registers */
5832                txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5833                txdctl |= txq->pthresh & 0x7F;
5834                txdctl |= ((txq->hthresh & 0x7F) << 8);
5835                txdctl |= ((txq->wthresh & 0x7F) << 16);
5836                IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5837        }
5838
5839        for (i = 0; i < dev->data->nb_tx_queues; i++) {
5840
5841                txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5842                txdctl |= IXGBE_TXDCTL_ENABLE;
5843                IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5844
5845                poll_ms = 10;
5846                /* Wait until TX Enable ready */
5847                do {
5848                        rte_delay_ms(1);
5849                        txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5850                } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5851                if (!poll_ms)
5852                        PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5853        }
5854        for (i = 0; i < dev->data->nb_rx_queues; i++) {
5855
5856                rxq = dev->data->rx_queues[i];
5857
5858                rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5859                rxdctl |= IXGBE_RXDCTL_ENABLE;
5860                IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5861
5862                /* Wait until RX Enable ready */
5863                poll_ms = 10;
5864                do {
5865                        rte_delay_ms(1);
5866                        rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5867                } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5868                if (!poll_ms)
5869                        PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5870                rte_wmb();
5871                IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5872
5873        }
5874}
5875
5876int
5877ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5878                    const struct rte_flow_action_rss *in)
5879{
5880        if (in->key_len > RTE_DIM(out->key) ||
5881            in->queue_num > RTE_DIM(out->queue))
5882                return -EINVAL;
5883        out->conf = (struct rte_flow_action_rss){
5884                .func = in->func,
5885                .level = in->level,
5886                .types = in->types,
5887                .key_len = in->key_len,
5888                .queue_num = in->queue_num,
5889                .key = memcpy(out->key, in->key, in->key_len),
5890                .queue = memcpy(out->queue, in->queue,
5891                                sizeof(*in->queue) * in->queue_num),
5892        };
5893        return 0;
5894}
5895
5896int
5897ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5898                      const struct rte_flow_action_rss *with)
5899{
5900        return (comp->func == with->func &&
5901                comp->level == with->level &&
5902                comp->types == with->types &&
5903                comp->key_len == with->key_len &&
5904                comp->queue_num == with->queue_num &&
5905                !memcmp(comp->key, with->key, with->key_len) &&
5906                !memcmp(comp->queue, with->queue,
5907                        sizeof(*with->queue) * with->queue_num));
5908}
5909
5910int
5911ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5912                struct ixgbe_rte_flow_rss_conf *conf, bool add)
5913{
5914        struct ixgbe_hw *hw;
5915        uint32_t reta;
5916        uint16_t i;
5917        uint16_t j;
5918        uint16_t sp_reta_size;
5919        uint32_t reta_reg;
5920        struct rte_eth_rss_conf rss_conf = {
5921                .rss_key = conf->conf.key_len ?
5922                        (void *)(uintptr_t)conf->conf.key : NULL,
5923                .rss_key_len = conf->conf.key_len,
5924                .rss_hf = conf->conf.types,
5925        };
5926        struct ixgbe_filter_info *filter_info =
5927                IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5928
5929        PMD_INIT_FUNC_TRACE();
5930        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5931
5932        sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5933
5934        if (!add) {
5935                if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5936                                          &conf->conf)) {
5937                        ixgbe_rss_disable(dev);
5938                        memset(&filter_info->rss_info, 0,
5939                                sizeof(struct ixgbe_rte_flow_rss_conf));
5940                        return 0;
5941                }
5942                return -EINVAL;
5943        }
5944
5945        if (filter_info->rss_info.conf.queue_num)
5946                return -EINVAL;
5947        /* Fill in redirection table
5948         * The byte-swap is needed because NIC registers are in
5949         * little-endian order.
5950         */
5951        reta = 0;
5952        for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5953                reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5954
5955                if (j == conf->conf.queue_num)
5956                        j = 0;
5957                reta = (reta << 8) | conf->conf.queue[j];
5958                if ((i & 3) == 3)
5959                        IXGBE_WRITE_REG(hw, reta_reg,
5960                                        rte_bswap32(reta));
5961        }
5962
5963        /* Configure the RSS key and the RSS protocols used to compute
5964         * the RSS hash of input packets.
5965         */
5966        if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5967                ixgbe_rss_disable(dev);
5968                return 0;
5969        }
5970        if (rss_conf.rss_key == NULL)
5971                rss_conf.rss_key = rss_intel_key; /* Default hash key */
5972        ixgbe_hw_rss_hash_set(hw, &rss_conf);
5973
5974        if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5975                return -EINVAL;
5976
5977        return 0;
5978}
5979
5980/* Stubs needed for linkage when RTE_ARCH_PPC_64 is set */
5981#if defined(RTE_ARCH_PPC_64)
5982int
5983ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5984{
5985        return -1;
5986}
5987
5988uint16_t
5989ixgbe_recv_pkts_vec(
5990        void __rte_unused *rx_queue,
5991        struct rte_mbuf __rte_unused **rx_pkts,
5992        uint16_t __rte_unused nb_pkts)
5993{
5994        return 0;
5995}
5996
5997uint16_t
5998ixgbe_recv_scattered_pkts_vec(
5999        void __rte_unused *rx_queue,
6000        struct rte_mbuf __rte_unused **rx_pkts,
6001        uint16_t __rte_unused nb_pkts)
6002{
6003        return 0;
6004}
6005
6006int
6007ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
6008{
6009        return -1;
6010}
6011
6012uint16_t
6013ixgbe_xmit_fixed_burst_vec(void __rte_unused *tx_queue,
6014                struct rte_mbuf __rte_unused **tx_pkts,
6015                uint16_t __rte_unused nb_pkts)
6016{
6017        return 0;
6018}
6019
6020int
6021ixgbe_txq_vec_setup(struct ixgbe_tx_queue __rte_unused *txq)
6022{
6023        return -1;
6024}
6025
6026void
6027ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
6028{
6029        return;
6030}
6031#endif
6032