dpdk/drivers/net/ixgbe/ixgbe_rxtx.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2016 Intel Corporation.
   3 * Copyright 2014 6WIND S.A.
   4 */
   5
   6#include <sys/queue.h>
   7
   8#include <stdio.h>
   9#include <stdlib.h>
  10#include <string.h>
  11#include <errno.h>
  12#include <stdint.h>
  13#include <stdarg.h>
  14#include <unistd.h>
  15#include <inttypes.h>
  16
  17#include <rte_byteorder.h>
  18#include <rte_common.h>
  19#include <rte_cycles.h>
  20#include <rte_log.h>
  21#include <rte_debug.h>
  22#include <rte_interrupts.h>
  23#include <rte_pci.h>
  24#include <rte_memory.h>
  25#include <rte_memzone.h>
  26#include <rte_launch.h>
  27#include <rte_eal.h>
  28#include <rte_per_lcore.h>
  29#include <rte_lcore.h>
  30#include <rte_atomic.h>
  31#include <rte_branch_prediction.h>
  32#include <rte_mempool.h>
  33#include <rte_malloc.h>
  34#include <rte_mbuf.h>
  35#include <rte_ether.h>
  36#include <rte_ethdev_driver.h>
  37#include <rte_prefetch.h>
  38#include <rte_udp.h>
  39#include <rte_tcp.h>
  40#include <rte_sctp.h>
  41#include <rte_string_fns.h>
  42#include <rte_errno.h>
  43#include <rte_ip.h>
  44#include <rte_net.h>
  45
  46#include "ixgbe_logs.h"
  47#include "base/ixgbe_api.h"
  48#include "base/ixgbe_vf.h"
  49#include "ixgbe_ethdev.h"
  50#include "base/ixgbe_dcb.h"
  51#include "base/ixgbe_common.h"
  52#include "ixgbe_rxtx.h"
  53
  54#ifdef RTE_LIBRTE_IEEE1588
  55#define IXGBE_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
  56#else
  57#define IXGBE_TX_IEEE1588_TMST 0
  58#endif
  59/* Bit Mask to indicate what bits required for building TX context */
  60#define IXGBE_TX_OFFLOAD_MASK (                  \
  61                PKT_TX_OUTER_IPV6 |              \
  62                PKT_TX_OUTER_IPV4 |              \
  63                PKT_TX_IPV6 |                    \
  64                PKT_TX_IPV4 |                    \
  65                PKT_TX_VLAN_PKT |                \
  66                PKT_TX_IP_CKSUM |                \
  67                PKT_TX_L4_MASK |                 \
  68                PKT_TX_TCP_SEG |                 \
  69                PKT_TX_MACSEC |                  \
  70                PKT_TX_OUTER_IP_CKSUM |          \
  71                PKT_TX_SEC_OFFLOAD |     \
  72                IXGBE_TX_IEEE1588_TMST)
  73
  74#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \
  75                (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK)
  76
  77#if 1
  78#define RTE_PMD_USE_PREFETCH
  79#endif
  80
  81#ifdef RTE_PMD_USE_PREFETCH
  82/*
  83 * Prefetch a cache line into all cache levels.
  84 */
  85#define rte_ixgbe_prefetch(p)   rte_prefetch0(p)
  86#else
  87#define rte_ixgbe_prefetch(p)   do {} while (0)
  88#endif
  89
  90/*********************************************************************
  91 *
  92 *  TX functions
  93 *
  94 **********************************************************************/
  95
  96/*
  97 * Check for descriptors with their DD bit set and free mbufs.
  98 * Return the total number of buffers freed.
  99 */
 100static __rte_always_inline int
 101ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
 102{
 103        struct ixgbe_tx_entry *txep;
 104        uint32_t status;
 105        int i, nb_free = 0;
 106        struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
 107
 108        /* check DD bit on threshold descriptor */
 109        status = txq->tx_ring[txq->tx_next_dd].wb.status;
 110        if (!(status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD)))
 111                return 0;
 112
 113        /*
 114         * first buffer to free from S/W ring is at index
 115         * tx_next_dd - (tx_rs_thresh-1)
 116         */
 117        txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
 118
 119        for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
 120                /* free buffers one at a time */
 121                m = rte_pktmbuf_prefree_seg(txep->mbuf);
 122                txep->mbuf = NULL;
 123
 124                if (unlikely(m == NULL))
 125                        continue;
 126
 127                if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
 128                    (nb_free > 0 && m->pool != free[0]->pool)) {
 129                        rte_mempool_put_bulk(free[0]->pool,
 130                                             (void **)free, nb_free);
 131                        nb_free = 0;
 132                }
 133
 134                free[nb_free++] = m;
 135        }
 136
 137        if (nb_free > 0)
 138                rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
 139
 140        /* buffers were freed, update counters */
 141        txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
 142        txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
 143        if (txq->tx_next_dd >= txq->nb_tx_desc)
 144                txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
 145
 146        return txq->tx_rs_thresh;
 147}
 148
 149/* Populate 4 descriptors with data from 4 mbufs */
 150static inline void
 151tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
 152{
 153        uint64_t buf_dma_addr;
 154        uint32_t pkt_len;
 155        int i;
 156
 157        for (i = 0; i < 4; ++i, ++txdp, ++pkts) {
 158                buf_dma_addr = rte_mbuf_data_iova(*pkts);
 159                pkt_len = (*pkts)->data_len;
 160
 161                /* write data to descriptor */
 162                txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
 163
 164                txdp->read.cmd_type_len =
 165                        rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
 166
 167                txdp->read.olinfo_status =
 168                        rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
 169
 170                rte_prefetch0(&(*pkts)->pool);
 171        }
 172}
 173
 174/* Populate 1 descriptor with data from 1 mbuf */
 175static inline void
 176tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)
 177{
 178        uint64_t buf_dma_addr;
 179        uint32_t pkt_len;
 180
 181        buf_dma_addr = rte_mbuf_data_iova(*pkts);
 182        pkt_len = (*pkts)->data_len;
 183
 184        /* write data to descriptor */
 185        txdp->read.buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
 186        txdp->read.cmd_type_len =
 187                        rte_cpu_to_le_32((uint32_t)DCMD_DTYP_FLAGS | pkt_len);
 188        txdp->read.olinfo_status =
 189                        rte_cpu_to_le_32(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
 190        rte_prefetch0(&(*pkts)->pool);
 191}
 192
 193/*
 194 * Fill H/W descriptor ring with mbuf data.
 195 * Copy mbuf pointers to the S/W ring.
 196 */
 197static inline void
 198ixgbe_tx_fill_hw_ring(struct ixgbe_tx_queue *txq, struct rte_mbuf **pkts,
 199                      uint16_t nb_pkts)
 200{
 201        volatile union ixgbe_adv_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
 202        struct ixgbe_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
 203        const int N_PER_LOOP = 4;
 204        const int N_PER_LOOP_MASK = N_PER_LOOP-1;
 205        int mainpart, leftover;
 206        int i, j;
 207
 208        /*
 209         * Process most of the packets in chunks of N pkts.  Any
 210         * leftover packets will get processed one at a time.
 211         */
 212        mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
 213        leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
 214        for (i = 0; i < mainpart; i += N_PER_LOOP) {
 215                /* Copy N mbuf pointers to the S/W ring */
 216                for (j = 0; j < N_PER_LOOP; ++j) {
 217                        (txep + i + j)->mbuf = *(pkts + i + j);
 218                }
 219                tx4(txdp + i, pkts + i);
 220        }
 221
 222        if (unlikely(leftover > 0)) {
 223                for (i = 0; i < leftover; ++i) {
 224                        (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
 225                        tx1(txdp + mainpart + i, pkts + mainpart + i);
 226                }
 227        }
 228}
 229
 230static inline uint16_t
 231tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 232             uint16_t nb_pkts)
 233{
 234        struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
 235        volatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;
 236        uint16_t n = 0;
 237
 238        /*
 239         * Begin scanning the H/W ring for done descriptors when the
 240         * number of available descriptors drops below tx_free_thresh.  For
 241         * each done descriptor, free the associated buffer.
 242         */
 243        if (txq->nb_tx_free < txq->tx_free_thresh)
 244                ixgbe_tx_free_bufs(txq);
 245
 246        /* Only use descriptors that are available */
 247        nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
 248        if (unlikely(nb_pkts == 0))
 249                return 0;
 250
 251        /* Use exactly nb_pkts descriptors */
 252        txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
 253
 254        /*
 255         * At this point, we know there are enough descriptors in the
 256         * ring to transmit all the packets.  This assumes that each
 257         * mbuf contains a single segment, and that no new offloads
 258         * are expected, which would require a new context descriptor.
 259         */
 260
 261        /*
 262         * See if we're going to wrap-around. If so, handle the top
 263         * of the descriptor ring first, then do the bottom.  If not,
 264         * the processing looks just like the "bottom" part anyway...
 265         */
 266        if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
 267                n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
 268                ixgbe_tx_fill_hw_ring(txq, tx_pkts, n);
 269
 270                /*
 271                 * We know that the last descriptor in the ring will need to
 272                 * have its RS bit set because tx_rs_thresh has to be
 273                 * a divisor of the ring size
 274                 */
 275                tx_r[txq->tx_next_rs].read.cmd_type_len |=
 276                        rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
 277                txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
 278
 279                txq->tx_tail = 0;
 280        }
 281
 282        /* Fill H/W descriptor ring with mbuf data */
 283        ixgbe_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
 284        txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
 285
 286        /*
 287         * Determine if RS bit should be set
 288         * This is what we actually want:
 289         *   if ((txq->tx_tail - 1) >= txq->tx_next_rs)
 290         * but instead of subtracting 1 and doing >=, we can just do
 291         * greater than without subtracting.
 292         */
 293        if (txq->tx_tail > txq->tx_next_rs) {
 294                tx_r[txq->tx_next_rs].read.cmd_type_len |=
 295                        rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
 296                txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
 297                                                txq->tx_rs_thresh);
 298                if (txq->tx_next_rs >= txq->nb_tx_desc)
 299                        txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
 300        }
 301
 302        /*
 303         * Check for wrap-around. This would only happen if we used
 304         * up to the last descriptor in the ring, no more, no less.
 305         */
 306        if (txq->tx_tail >= txq->nb_tx_desc)
 307                txq->tx_tail = 0;
 308
 309        /* update tail pointer */
 310        rte_wmb();
 311        IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail);
 312
 313        return nb_pkts;
 314}
 315
 316uint16_t
 317ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 318                       uint16_t nb_pkts)
 319{
 320        uint16_t nb_tx;
 321
 322        /* Try to transmit at least chunks of TX_MAX_BURST pkts */
 323        if (likely(nb_pkts <= RTE_PMD_IXGBE_TX_MAX_BURST))
 324                return tx_xmit_pkts(tx_queue, tx_pkts, nb_pkts);
 325
 326        /* transmit more than the max burst, in chunks of TX_MAX_BURST */
 327        nb_tx = 0;
 328        while (nb_pkts) {
 329                uint16_t ret, n;
 330
 331                n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_TX_MAX_BURST);
 332                ret = tx_xmit_pkts(tx_queue, &(tx_pkts[nb_tx]), n);
 333                nb_tx = (uint16_t)(nb_tx + ret);
 334                nb_pkts = (uint16_t)(nb_pkts - ret);
 335                if (ret < n)
 336                        break;
 337        }
 338
 339        return nb_tx;
 340}
 341
 342static uint16_t
 343ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 344                    uint16_t nb_pkts)
 345{
 346        uint16_t nb_tx = 0;
 347        struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
 348
 349        while (nb_pkts) {
 350                uint16_t ret, num;
 351
 352                num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
 353                ret = ixgbe_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
 354                                                 num);
 355                nb_tx += ret;
 356                nb_pkts -= ret;
 357                if (ret < num)
 358                        break;
 359        }
 360
 361        return nb_tx;
 362}
 363
 364static inline void
 365ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
 366                volatile struct ixgbe_adv_tx_context_desc *ctx_txd,
 367                uint64_t ol_flags, union ixgbe_tx_offload tx_offload,
 368                __rte_unused uint64_t *mdata)
 369{
 370        uint32_t type_tucmd_mlhl;
 371        uint32_t mss_l4len_idx = 0;
 372        uint32_t ctx_idx;
 373        uint32_t vlan_macip_lens;
 374        union ixgbe_tx_offload tx_offload_mask;
 375        uint32_t seqnum_seed = 0;
 376
 377        ctx_idx = txq->ctx_curr;
 378        tx_offload_mask.data[0] = 0;
 379        tx_offload_mask.data[1] = 0;
 380        type_tucmd_mlhl = 0;
 381
 382        /* Specify which HW CTX to upload. */
 383        mss_l4len_idx |= (ctx_idx << IXGBE_ADVTXD_IDX_SHIFT);
 384
 385        if (ol_flags & PKT_TX_VLAN_PKT) {
 386                tx_offload_mask.vlan_tci |= ~0;
 387        }
 388
 389        /* check if TCP segmentation required for this packet */
 390        if (ol_flags & PKT_TX_TCP_SEG) {
 391                /* implies IP cksum in IPv4 */
 392                if (ol_flags & PKT_TX_IP_CKSUM)
 393                        type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4 |
 394                                IXGBE_ADVTXD_TUCMD_L4T_TCP |
 395                                IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
 396                else
 397                        type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV6 |
 398                                IXGBE_ADVTXD_TUCMD_L4T_TCP |
 399                                IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
 400
 401                tx_offload_mask.l2_len |= ~0;
 402                tx_offload_mask.l3_len |= ~0;
 403                tx_offload_mask.l4_len |= ~0;
 404                tx_offload_mask.tso_segsz |= ~0;
 405                mss_l4len_idx |= tx_offload.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT;
 406                mss_l4len_idx |= tx_offload.l4_len << IXGBE_ADVTXD_L4LEN_SHIFT;
 407        } else { /* no TSO, check if hardware checksum is needed */
 408                if (ol_flags & PKT_TX_IP_CKSUM) {
 409                        type_tucmd_mlhl = IXGBE_ADVTXD_TUCMD_IPV4;
 410                        tx_offload_mask.l2_len |= ~0;
 411                        tx_offload_mask.l3_len |= ~0;
 412                }
 413
 414                switch (ol_flags & PKT_TX_L4_MASK) {
 415                case PKT_TX_UDP_CKSUM:
 416                        type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP |
 417                                IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
 418                        mss_l4len_idx |= sizeof(struct rte_udp_hdr)
 419                                << IXGBE_ADVTXD_L4LEN_SHIFT;
 420                        tx_offload_mask.l2_len |= ~0;
 421                        tx_offload_mask.l3_len |= ~0;
 422                        break;
 423                case PKT_TX_TCP_CKSUM:
 424                        type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP |
 425                                IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
 426                        mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
 427                                << IXGBE_ADVTXD_L4LEN_SHIFT;
 428                        tx_offload_mask.l2_len |= ~0;
 429                        tx_offload_mask.l3_len |= ~0;
 430                        break;
 431                case PKT_TX_SCTP_CKSUM:
 432                        type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP |
 433                                IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
 434                        mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
 435                                << IXGBE_ADVTXD_L4LEN_SHIFT;
 436                        tx_offload_mask.l2_len |= ~0;
 437                        tx_offload_mask.l3_len |= ~0;
 438                        break;
 439                default:
 440                        type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_RSV |
 441                                IXGBE_ADVTXD_DTYP_CTXT | IXGBE_ADVTXD_DCMD_DEXT;
 442                        break;
 443                }
 444        }
 445
 446        if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
 447                tx_offload_mask.outer_l2_len |= ~0;
 448                tx_offload_mask.outer_l3_len |= ~0;
 449                tx_offload_mask.l2_len |= ~0;
 450                seqnum_seed |= tx_offload.outer_l3_len
 451                               << IXGBE_ADVTXD_OUTER_IPLEN;
 452                seqnum_seed |= tx_offload.l2_len
 453                               << IXGBE_ADVTXD_TUNNEL_LEN;
 454        }
 455#ifdef RTE_LIBRTE_SECURITY
 456        if (ol_flags & PKT_TX_SEC_OFFLOAD) {
 457                union ixgbe_crypto_tx_desc_md *md =
 458                                (union ixgbe_crypto_tx_desc_md *)mdata;
 459                seqnum_seed |=
 460                        (IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK & md->sa_idx);
 461                type_tucmd_mlhl |= md->enc ?
 462                                (IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
 463                                IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN) : 0;
 464                type_tucmd_mlhl |=
 465                        (md->pad_len & IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK);
 466                tx_offload_mask.sa_idx |= ~0;
 467                tx_offload_mask.sec_pad_len |= ~0;
 468        }
 469#endif
 470
 471        txq->ctx_cache[ctx_idx].flags = ol_flags;
 472        txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
 473                tx_offload_mask.data[0] & tx_offload.data[0];
 474        txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
 475                tx_offload_mask.data[1] & tx_offload.data[1];
 476        txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
 477
 478        ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
 479        vlan_macip_lens = tx_offload.l3_len;
 480        if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
 481                vlan_macip_lens |= (tx_offload.outer_l2_len <<
 482                                    IXGBE_ADVTXD_MACLEN_SHIFT);
 483        else
 484                vlan_macip_lens |= (tx_offload.l2_len <<
 485                                    IXGBE_ADVTXD_MACLEN_SHIFT);
 486        vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
 487        ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
 488        ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
 489        ctx_txd->seqnum_seed     = seqnum_seed;
 490}
 491
 492/*
 493 * Check which hardware context can be used. Use the existing match
 494 * or create a new context descriptor.
 495 */
 496static inline uint32_t
 497what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
 498                   union ixgbe_tx_offload tx_offload)
 499{
 500        /* If match with the current used context */
 501        if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
 502                   (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
 503                    (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
 504                     & tx_offload.data[0])) &&
 505                   (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
 506                    (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
 507                     & tx_offload.data[1]))))
 508                return txq->ctx_curr;
 509
 510        /* What if match with the next context  */
 511        txq->ctx_curr ^= 1;
 512        if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
 513                   (txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
 514                    (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
 515                     & tx_offload.data[0])) &&
 516                   (txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
 517                    (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
 518                     & tx_offload.data[1]))))
 519                return txq->ctx_curr;
 520
 521        /* Mismatch, use the previous context */
 522        return IXGBE_CTX_NUM;
 523}
 524
 525static inline uint32_t
 526tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
 527{
 528        uint32_t tmp = 0;
 529
 530        if ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM)
 531                tmp |= IXGBE_ADVTXD_POPTS_TXSM;
 532        if (ol_flags & PKT_TX_IP_CKSUM)
 533                tmp |= IXGBE_ADVTXD_POPTS_IXSM;
 534        if (ol_flags & PKT_TX_TCP_SEG)
 535                tmp |= IXGBE_ADVTXD_POPTS_TXSM;
 536        return tmp;
 537}
 538
 539static inline uint32_t
 540tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
 541{
 542        uint32_t cmdtype = 0;
 543
 544        if (ol_flags & PKT_TX_VLAN_PKT)
 545                cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
 546        if (ol_flags & PKT_TX_TCP_SEG)
 547                cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
 548        if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
 549                cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
 550        if (ol_flags & PKT_TX_MACSEC)
 551                cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC;
 552        return cmdtype;
 553}
 554
 555/* Default RS bit threshold values */
 556#ifndef DEFAULT_TX_RS_THRESH
 557#define DEFAULT_TX_RS_THRESH   32
 558#endif
 559#ifndef DEFAULT_TX_FREE_THRESH
 560#define DEFAULT_TX_FREE_THRESH 32
 561#endif
 562
 563/* Reset transmit descriptors after they have been used */
 564static inline int
 565ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
 566{
 567        struct ixgbe_tx_entry *sw_ring = txq->sw_ring;
 568        volatile union ixgbe_adv_tx_desc *txr = txq->tx_ring;
 569        uint16_t last_desc_cleaned = txq->last_desc_cleaned;
 570        uint16_t nb_tx_desc = txq->nb_tx_desc;
 571        uint16_t desc_to_clean_to;
 572        uint16_t nb_tx_to_clean;
 573        uint32_t status;
 574
 575        /* Determine the last descriptor needing to be cleaned */
 576        desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
 577        if (desc_to_clean_to >= nb_tx_desc)
 578                desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
 579
 580        /* Check to make sure the last descriptor to clean is done */
 581        desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
 582        status = txr[desc_to_clean_to].wb.status;
 583        if (!(status & rte_cpu_to_le_32(IXGBE_TXD_STAT_DD))) {
 584                PMD_TX_FREE_LOG(DEBUG,
 585                                "TX descriptor %4u is not done"
 586                                "(port=%d queue=%d)",
 587                                desc_to_clean_to,
 588                                txq->port_id, txq->queue_id);
 589                /* Failed to clean any descriptors, better luck next time */
 590                return -(1);
 591        }
 592
 593        /* Figure out how many descriptors will be cleaned */
 594        if (last_desc_cleaned > desc_to_clean_to)
 595                nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
 596                                                        desc_to_clean_to);
 597        else
 598                nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
 599                                                last_desc_cleaned);
 600
 601        PMD_TX_FREE_LOG(DEBUG,
 602                        "Cleaning %4u TX descriptors: %4u to %4u "
 603                        "(port=%d queue=%d)",
 604                        nb_tx_to_clean, last_desc_cleaned, desc_to_clean_to,
 605                        txq->port_id, txq->queue_id);
 606
 607        /*
 608         * The last descriptor to clean is done, so that means all the
 609         * descriptors from the last descriptor that was cleaned
 610         * up to the last descriptor with the RS bit set
 611         * are done. Only reset the threshold descriptor.
 612         */
 613        txr[desc_to_clean_to].wb.status = 0;
 614
 615        /* Update the txq to reflect the last descriptor that was cleaned */
 616        txq->last_desc_cleaned = desc_to_clean_to;
 617        txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
 618
 619        /* No Error */
 620        return 0;
 621}
 622
 623uint16_t
 624ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 625                uint16_t nb_pkts)
 626{
 627        struct ixgbe_tx_queue *txq;
 628        struct ixgbe_tx_entry *sw_ring;
 629        struct ixgbe_tx_entry *txe, *txn;
 630        volatile union ixgbe_adv_tx_desc *txr;
 631        volatile union ixgbe_adv_tx_desc *txd, *txp;
 632        struct rte_mbuf     *tx_pkt;
 633        struct rte_mbuf     *m_seg;
 634        uint64_t buf_dma_addr;
 635        uint32_t olinfo_status;
 636        uint32_t cmd_type_len;
 637        uint32_t pkt_len;
 638        uint16_t slen;
 639        uint64_t ol_flags;
 640        uint16_t tx_id;
 641        uint16_t tx_last;
 642        uint16_t nb_tx;
 643        uint16_t nb_used;
 644        uint64_t tx_ol_req;
 645        uint32_t ctx = 0;
 646        uint32_t new_ctx;
 647        union ixgbe_tx_offload tx_offload;
 648#ifdef RTE_LIBRTE_SECURITY
 649        uint8_t use_ipsec;
 650#endif
 651
 652        tx_offload.data[0] = 0;
 653        tx_offload.data[1] = 0;
 654        txq = tx_queue;
 655        sw_ring = txq->sw_ring;
 656        txr     = txq->tx_ring;
 657        tx_id   = txq->tx_tail;
 658        txe = &sw_ring[tx_id];
 659        txp = NULL;
 660
 661        /* Determine if the descriptor ring needs to be cleaned. */
 662        if (txq->nb_tx_free < txq->tx_free_thresh)
 663                ixgbe_xmit_cleanup(txq);
 664
 665        rte_prefetch0(&txe->mbuf->pool);
 666
 667        /* TX loop */
 668        for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
 669                new_ctx = 0;
 670                tx_pkt = *tx_pkts++;
 671                pkt_len = tx_pkt->pkt_len;
 672
 673                /*
 674                 * Determine how many (if any) context descriptors
 675                 * are needed for offload functionality.
 676                 */
 677                ol_flags = tx_pkt->ol_flags;
 678#ifdef RTE_LIBRTE_SECURITY
 679                use_ipsec = txq->using_ipsec && (ol_flags & PKT_TX_SEC_OFFLOAD);
 680#endif
 681
 682                /* If hardware offload required */
 683                tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
 684                if (tx_ol_req) {
 685                        tx_offload.l2_len = tx_pkt->l2_len;
 686                        tx_offload.l3_len = tx_pkt->l3_len;
 687                        tx_offload.l4_len = tx_pkt->l4_len;
 688                        tx_offload.vlan_tci = tx_pkt->vlan_tci;
 689                        tx_offload.tso_segsz = tx_pkt->tso_segsz;
 690                        tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
 691                        tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
 692#ifdef RTE_LIBRTE_SECURITY
 693                        if (use_ipsec) {
 694                                union ixgbe_crypto_tx_desc_md *ipsec_mdata =
 695                                        (union ixgbe_crypto_tx_desc_md *)
 696                                                        &tx_pkt->udata64;
 697                                tx_offload.sa_idx = ipsec_mdata->sa_idx;
 698                                tx_offload.sec_pad_len = ipsec_mdata->pad_len;
 699                        }
 700#endif
 701
 702                        /* If new context need be built or reuse the exist ctx. */
 703                        ctx = what_advctx_update(txq, tx_ol_req,
 704                                tx_offload);
 705                        /* Only allocate context descriptor if required*/
 706                        new_ctx = (ctx == IXGBE_CTX_NUM);
 707                        ctx = txq->ctx_curr;
 708                }
 709
 710                /*
 711                 * Keep track of how many descriptors are used this loop
 712                 * This will always be the number of segments + the number of
 713                 * Context descriptors required to transmit the packet
 714                 */
 715                nb_used = (uint16_t)(tx_pkt->nb_segs + new_ctx);
 716
 717                if (txp != NULL &&
 718                                nb_used + txq->nb_tx_used >= txq->tx_rs_thresh)
 719                        /* set RS on the previous packet in the burst */
 720                        txp->read.cmd_type_len |=
 721                                rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
 722
 723                /*
 724                 * The number of descriptors that must be allocated for a
 725                 * packet is the number of segments of that packet, plus 1
 726                 * Context Descriptor for the hardware offload, if any.
 727                 * Determine the last TX descriptor to allocate in the TX ring
 728                 * for the packet, starting from the current position (tx_id)
 729                 * in the ring.
 730                 */
 731                tx_last = (uint16_t) (tx_id + nb_used - 1);
 732
 733                /* Circular ring */
 734                if (tx_last >= txq->nb_tx_desc)
 735                        tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
 736
 737                PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
 738                           " tx_first=%u tx_last=%u",
 739                           (unsigned) txq->port_id,
 740                           (unsigned) txq->queue_id,
 741                           (unsigned) pkt_len,
 742                           (unsigned) tx_id,
 743                           (unsigned) tx_last);
 744
 745                /*
 746                 * Make sure there are enough TX descriptors available to
 747                 * transmit the entire packet.
 748                 * nb_used better be less than or equal to txq->tx_rs_thresh
 749                 */
 750                if (nb_used > txq->nb_tx_free) {
 751                        PMD_TX_FREE_LOG(DEBUG,
 752                                        "Not enough free TX descriptors "
 753                                        "nb_used=%4u nb_free=%4u "
 754                                        "(port=%d queue=%d)",
 755                                        nb_used, txq->nb_tx_free,
 756                                        txq->port_id, txq->queue_id);
 757
 758                        if (ixgbe_xmit_cleanup(txq) != 0) {
 759                                /* Could not clean any descriptors */
 760                                if (nb_tx == 0)
 761                                        return 0;
 762                                goto end_of_tx;
 763                        }
 764
 765                        /* nb_used better be <= txq->tx_rs_thresh */
 766                        if (unlikely(nb_used > txq->tx_rs_thresh)) {
 767                                PMD_TX_FREE_LOG(DEBUG,
 768                                        "The number of descriptors needed to "
 769                                        "transmit the packet exceeds the "
 770                                        "RS bit threshold. This will impact "
 771                                        "performance."
 772                                        "nb_used=%4u nb_free=%4u "
 773                                        "tx_rs_thresh=%4u. "
 774                                        "(port=%d queue=%d)",
 775                                        nb_used, txq->nb_tx_free,
 776                                        txq->tx_rs_thresh,
 777                                        txq->port_id, txq->queue_id);
 778                                /*
 779                                 * Loop here until there are enough TX
 780                                 * descriptors or until the ring cannot be
 781                                 * cleaned.
 782                                 */
 783                                while (nb_used > txq->nb_tx_free) {
 784                                        if (ixgbe_xmit_cleanup(txq) != 0) {
 785                                                /*
 786                                                 * Could not clean any
 787                                                 * descriptors
 788                                                 */
 789                                                if (nb_tx == 0)
 790                                                        return 0;
 791                                                goto end_of_tx;
 792                                        }
 793                                }
 794                        }
 795                }
 796
 797                /*
 798                 * By now there are enough free TX descriptors to transmit
 799                 * the packet.
 800                 */
 801
 802                /*
 803                 * Set common flags of all TX Data Descriptors.
 804                 *
 805                 * The following bits must be set in all Data Descriptors:
 806                 *   - IXGBE_ADVTXD_DTYP_DATA
 807                 *   - IXGBE_ADVTXD_DCMD_DEXT
 808                 *
 809                 * The following bits must be set in the first Data Descriptor
 810                 * and are ignored in the other ones:
 811                 *   - IXGBE_ADVTXD_DCMD_IFCS
 812                 *   - IXGBE_ADVTXD_MAC_1588
 813                 *   - IXGBE_ADVTXD_DCMD_VLE
 814                 *
 815                 * The following bits must only be set in the last Data
 816                 * Descriptor:
 817                 *   - IXGBE_TXD_CMD_EOP
 818                 *
 819                 * The following bits can be set in any Data Descriptor, but
 820                 * are only set in the last Data Descriptor:
 821                 *   - IXGBE_TXD_CMD_RS
 822                 */
 823                cmd_type_len = IXGBE_ADVTXD_DTYP_DATA |
 824                        IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
 825
 826#ifdef RTE_LIBRTE_IEEE1588
 827                if (ol_flags & PKT_TX_IEEE1588_TMST)
 828                        cmd_type_len |= IXGBE_ADVTXD_MAC_1588;
 829#endif
 830
 831                olinfo_status = 0;
 832                if (tx_ol_req) {
 833
 834                        if (ol_flags & PKT_TX_TCP_SEG) {
 835                                /* when TSO is on, paylen in descriptor is the
 836                                 * not the packet len but the tcp payload len */
 837                                pkt_len -= (tx_offload.l2_len +
 838                                        tx_offload.l3_len + tx_offload.l4_len);
 839                        }
 840
 841                        /*
 842                         * Setup the TX Advanced Context Descriptor if required
 843                         */
 844                        if (new_ctx) {
 845                                volatile struct ixgbe_adv_tx_context_desc *
 846                                    ctx_txd;
 847
 848                                ctx_txd = (volatile struct
 849                                    ixgbe_adv_tx_context_desc *)
 850                                    &txr[tx_id];
 851
 852                                txn = &sw_ring[txe->next_id];
 853                                rte_prefetch0(&txn->mbuf->pool);
 854
 855                                if (txe->mbuf != NULL) {
 856                                        rte_pktmbuf_free_seg(txe->mbuf);
 857                                        txe->mbuf = NULL;
 858                                }
 859
 860                                ixgbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req,
 861                                        tx_offload, &tx_pkt->udata64);
 862
 863                                txe->last_id = tx_last;
 864                                tx_id = txe->next_id;
 865                                txe = txn;
 866                        }
 867
 868                        /*
 869                         * Setup the TX Advanced Data Descriptor,
 870                         * This path will go through
 871                         * whatever new/reuse the context descriptor
 872                         */
 873                        cmd_type_len  |= tx_desc_ol_flags_to_cmdtype(ol_flags);
 874                        olinfo_status |= tx_desc_cksum_flags_to_olinfo(ol_flags);
 875                        olinfo_status |= ctx << IXGBE_ADVTXD_IDX_SHIFT;
 876                }
 877
 878                olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
 879#ifdef RTE_LIBRTE_SECURITY
 880                if (use_ipsec)
 881                        olinfo_status |= IXGBE_ADVTXD_POPTS_IPSEC;
 882#endif
 883
 884                m_seg = tx_pkt;
 885                do {
 886                        txd = &txr[tx_id];
 887                        txn = &sw_ring[txe->next_id];
 888                        rte_prefetch0(&txn->mbuf->pool);
 889
 890                        if (txe->mbuf != NULL)
 891                                rte_pktmbuf_free_seg(txe->mbuf);
 892                        txe->mbuf = m_seg;
 893
 894                        /*
 895                         * Set up Transmit Data Descriptor.
 896                         */
 897                        slen = m_seg->data_len;
 898                        buf_dma_addr = rte_mbuf_data_iova(m_seg);
 899                        txd->read.buffer_addr =
 900                                rte_cpu_to_le_64(buf_dma_addr);
 901                        txd->read.cmd_type_len =
 902                                rte_cpu_to_le_32(cmd_type_len | slen);
 903                        txd->read.olinfo_status =
 904                                rte_cpu_to_le_32(olinfo_status);
 905                        txe->last_id = tx_last;
 906                        tx_id = txe->next_id;
 907                        txe = txn;
 908                        m_seg = m_seg->next;
 909                } while (m_seg != NULL);
 910
 911                /*
 912                 * The last packet data descriptor needs End Of Packet (EOP)
 913                 */
 914                cmd_type_len |= IXGBE_TXD_CMD_EOP;
 915                txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
 916                txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
 917
 918                /* Set RS bit only on threshold packets' last descriptor */
 919                if (txq->nb_tx_used >= txq->tx_rs_thresh) {
 920                        PMD_TX_FREE_LOG(DEBUG,
 921                                        "Setting RS bit on TXD id="
 922                                        "%4u (port=%d queue=%d)",
 923                                        tx_last, txq->port_id, txq->queue_id);
 924
 925                        cmd_type_len |= IXGBE_TXD_CMD_RS;
 926
 927                        /* Update txq RS bit counters */
 928                        txq->nb_tx_used = 0;
 929                        txp = NULL;
 930                } else
 931                        txp = txd;
 932
 933                txd->read.cmd_type_len |= rte_cpu_to_le_32(cmd_type_len);
 934        }
 935
 936end_of_tx:
 937        /* set RS on last packet in the burst */
 938        if (txp != NULL)
 939                txp->read.cmd_type_len |= rte_cpu_to_le_32(IXGBE_TXD_CMD_RS);
 940
 941        rte_wmb();
 942
 943        /*
 944         * Set the Transmit Descriptor Tail (TDT)
 945         */
 946        PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
 947                   (unsigned) txq->port_id, (unsigned) txq->queue_id,
 948                   (unsigned) tx_id, (unsigned) nb_tx);
 949        IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
 950        txq->tx_tail = tx_id;
 951
 952        return nb_tx;
 953}
 954
 955/*********************************************************************
 956 *
 957 *  TX prep functions
 958 *
 959 **********************************************************************/
 960uint16_t
 961ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 962{
 963        int i, ret;
 964        uint64_t ol_flags;
 965        struct rte_mbuf *m;
 966        struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
 967
 968        for (i = 0; i < nb_pkts; i++) {
 969                m = tx_pkts[i];
 970                ol_flags = m->ol_flags;
 971
 972                /**
 973                 * Check if packet meets requirements for number of segments
 974                 *
 975                 * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and
 976                 *       non-TSO
 977                 */
 978
 979                if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) {
 980                        rte_errno = EINVAL;
 981                        return i;
 982                }
 983
 984                if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) {
 985                        rte_errno = ENOTSUP;
 986                        return i;
 987                }
 988
 989                /* check the size of packet */
 990                if (m->pkt_len < IXGBE_TX_MIN_PKT_LEN) {
 991                        rte_errno = EINVAL;
 992                        return i;
 993                }
 994
 995#ifdef RTE_LIBRTE_ETHDEV_DEBUG
 996                ret = rte_validate_tx_offload(m);
 997                if (ret != 0) {
 998                        rte_errno = -ret;
 999                        return i;
1000                }
1001#endif
1002                ret = rte_net_intel_cksum_prepare(m);
1003                if (ret != 0) {
1004                        rte_errno = -ret;
1005                        return i;
1006                }
1007        }
1008
1009        return i;
1010}
1011
1012/*********************************************************************
1013 *
1014 *  RX functions
1015 *
1016 **********************************************************************/
1017
1018#define IXGBE_PACKET_TYPE_ETHER                         0X00
1019#define IXGBE_PACKET_TYPE_IPV4                          0X01
1020#define IXGBE_PACKET_TYPE_IPV4_TCP                      0X11
1021#define IXGBE_PACKET_TYPE_IPV4_UDP                      0X21
1022#define IXGBE_PACKET_TYPE_IPV4_SCTP                     0X41
1023#define IXGBE_PACKET_TYPE_IPV4_EXT                      0X03
1024#define IXGBE_PACKET_TYPE_IPV4_EXT_TCP                  0X13
1025#define IXGBE_PACKET_TYPE_IPV4_EXT_UDP                  0X23
1026#define IXGBE_PACKET_TYPE_IPV4_EXT_SCTP                 0X43
1027#define IXGBE_PACKET_TYPE_IPV6                          0X04
1028#define IXGBE_PACKET_TYPE_IPV6_TCP                      0X14
1029#define IXGBE_PACKET_TYPE_IPV6_UDP                      0X24
1030#define IXGBE_PACKET_TYPE_IPV6_SCTP                     0X44
1031#define IXGBE_PACKET_TYPE_IPV6_EXT                      0X0C
1032#define IXGBE_PACKET_TYPE_IPV6_EXT_TCP                  0X1C
1033#define IXGBE_PACKET_TYPE_IPV6_EXT_UDP                  0X2C
1034#define IXGBE_PACKET_TYPE_IPV6_EXT_SCTP                 0X4C
1035#define IXGBE_PACKET_TYPE_IPV4_IPV6                     0X05
1036#define IXGBE_PACKET_TYPE_IPV4_IPV6_TCP                 0X15
1037#define IXGBE_PACKET_TYPE_IPV4_IPV6_UDP                 0X25
1038#define IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP                0X45
1039#define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6                 0X07
1040#define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP             0X17
1041#define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP             0X27
1042#define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP            0X47
1043#define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT                 0X0D
1044#define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP             0X1D
1045#define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP             0X2D
1046#define IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP            0X4D
1047#define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT             0X0F
1048#define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP         0X1F
1049#define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP         0X2F
1050#define IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP        0X4F
1051
1052#define IXGBE_PACKET_TYPE_NVGRE                   0X00
1053#define IXGBE_PACKET_TYPE_NVGRE_IPV4              0X01
1054#define IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP          0X11
1055#define IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP          0X21
1056#define IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP         0X41
1057#define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT          0X03
1058#define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP      0X13
1059#define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP      0X23
1060#define IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP     0X43
1061#define IXGBE_PACKET_TYPE_NVGRE_IPV6              0X04
1062#define IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP          0X14
1063#define IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP          0X24
1064#define IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP         0X44
1065#define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT          0X0C
1066#define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP      0X1C
1067#define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP      0X2C
1068#define IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP     0X4C
1069#define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6         0X05
1070#define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP     0X15
1071#define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP     0X25
1072#define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT     0X0D
1073#define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP 0X1D
1074#define IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP 0X2D
1075
1076#define IXGBE_PACKET_TYPE_VXLAN                   0X80
1077#define IXGBE_PACKET_TYPE_VXLAN_IPV4              0X81
1078#define IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP          0x91
1079#define IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP          0xA1
1080#define IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP         0xC1
1081#define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT          0x83
1082#define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP      0X93
1083#define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP      0XA3
1084#define IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP     0XC3
1085#define IXGBE_PACKET_TYPE_VXLAN_IPV6              0X84
1086#define IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP          0X94
1087#define IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP          0XA4
1088#define IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP         0XC4
1089#define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT          0X8C
1090#define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP      0X9C
1091#define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP      0XAC
1092#define IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP     0XCC
1093#define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6         0X85
1094#define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP     0X95
1095#define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP     0XA5
1096#define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT     0X8D
1097#define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP 0X9D
1098#define IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP 0XAD
1099
1100/**
1101 * Use 2 different table for normal packet and tunnel packet
1102 * to save the space.
1103 */
1104const uint32_t
1105        ptype_table[IXGBE_PACKET_TYPE_MAX] __rte_cache_aligned = {
1106        [IXGBE_PACKET_TYPE_ETHER] = RTE_PTYPE_L2_ETHER,
1107        [IXGBE_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
1108                RTE_PTYPE_L3_IPV4,
1109        [IXGBE_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1110                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
1111        [IXGBE_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1112                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
1113        [IXGBE_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1114                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
1115        [IXGBE_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1116                RTE_PTYPE_L3_IPV4_EXT,
1117        [IXGBE_PACKET_TYPE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1118                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_TCP,
1119        [IXGBE_PACKET_TYPE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1120                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_UDP,
1121        [IXGBE_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1122                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
1123        [IXGBE_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
1124                RTE_PTYPE_L3_IPV6,
1125        [IXGBE_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1126                RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
1127        [IXGBE_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1128                RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
1129        [IXGBE_PACKET_TYPE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1130                RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_SCTP,
1131        [IXGBE_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1132                RTE_PTYPE_L3_IPV6_EXT,
1133        [IXGBE_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1134                RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
1135        [IXGBE_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1136                RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
1137        [IXGBE_PACKET_TYPE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1138                RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_SCTP,
1139        [IXGBE_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1140                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1141                RTE_PTYPE_INNER_L3_IPV6,
1142        [IXGBE_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1143                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1144                RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1145        [IXGBE_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1146                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1147        RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1148        [IXGBE_PACKET_TYPE_IPV4_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1149                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1150                RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1151        [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6] = RTE_PTYPE_L2_ETHER |
1152                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1153                RTE_PTYPE_INNER_L3_IPV6,
1154        [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1155                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1156                RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1157        [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1158                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1159                RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1160        [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1161                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1162                RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1163        [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1164                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1165                RTE_PTYPE_INNER_L3_IPV6_EXT,
1166        [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1167                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1168                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1169        [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1170                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1171                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1172        [IXGBE_PACKET_TYPE_IPV4_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1173                RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
1174                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1175        [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1176                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1177                RTE_PTYPE_INNER_L3_IPV6_EXT,
1178        [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1179                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1180                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1181        [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1182                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1183                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1184        [IXGBE_PACKET_TYPE_IPV4_EXT_IPV6_EXT_SCTP] =
1185                RTE_PTYPE_L2_ETHER |
1186                RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_TUNNEL_IP |
1187                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1188};
1189
1190const uint32_t
1191        ptype_table_tn[IXGBE_PACKET_TYPE_TN_MAX] __rte_cache_aligned = {
1192        [IXGBE_PACKET_TYPE_NVGRE] = RTE_PTYPE_L2_ETHER |
1193                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1194                RTE_PTYPE_INNER_L2_ETHER,
1195        [IXGBE_PACKET_TYPE_NVGRE_IPV4] = RTE_PTYPE_L2_ETHER |
1196                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1197                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1198        [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1199                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1200                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT,
1201        [IXGBE_PACKET_TYPE_NVGRE_IPV6] = RTE_PTYPE_L2_ETHER |
1202                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1203                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6,
1204        [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1205                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1206                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1207        [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1208                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1209                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT,
1210        [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1211                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1212                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1213        [IXGBE_PACKET_TYPE_NVGRE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1214                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1215                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1216                RTE_PTYPE_INNER_L4_TCP,
1217        [IXGBE_PACKET_TYPE_NVGRE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1218                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1219                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1220                RTE_PTYPE_INNER_L4_TCP,
1221        [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1222                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1223                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1224        [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1225                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1226                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1227                RTE_PTYPE_INNER_L4_TCP,
1228        [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_TCP] =
1229                RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1230                RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1231                RTE_PTYPE_INNER_L3_IPV4,
1232        [IXGBE_PACKET_TYPE_NVGRE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1233                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1234                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1235                RTE_PTYPE_INNER_L4_UDP,
1236        [IXGBE_PACKET_TYPE_NVGRE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1237                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1238                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1239                RTE_PTYPE_INNER_L4_UDP,
1240        [IXGBE_PACKET_TYPE_NVGRE_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1241                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1242                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6 |
1243                RTE_PTYPE_INNER_L4_SCTP,
1244        [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1245                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1246                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1247        [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1248                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1249                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1250                RTE_PTYPE_INNER_L4_UDP,
1251        [IXGBE_PACKET_TYPE_NVGRE_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1252                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1253                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV6_EXT |
1254                RTE_PTYPE_INNER_L4_SCTP,
1255        [IXGBE_PACKET_TYPE_NVGRE_IPV4_IPV6_EXT_UDP] =
1256                RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1257                RTE_PTYPE_TUNNEL_GRE | RTE_PTYPE_INNER_L2_ETHER |
1258                RTE_PTYPE_INNER_L3_IPV4,
1259        [IXGBE_PACKET_TYPE_NVGRE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1260                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1261                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4 |
1262                RTE_PTYPE_INNER_L4_SCTP,
1263        [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1264                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1265                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1266                RTE_PTYPE_INNER_L4_SCTP,
1267        [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1268                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1269                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1270                RTE_PTYPE_INNER_L4_TCP,
1271        [IXGBE_PACKET_TYPE_NVGRE_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1272                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_TUNNEL_GRE |
1273                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4_EXT |
1274                RTE_PTYPE_INNER_L4_UDP,
1275
1276        [IXGBE_PACKET_TYPE_VXLAN] = RTE_PTYPE_L2_ETHER |
1277                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1278                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER,
1279        [IXGBE_PACKET_TYPE_VXLAN_IPV4] = RTE_PTYPE_L2_ETHER |
1280                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1281                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1282                RTE_PTYPE_INNER_L3_IPV4,
1283        [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
1284                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1285                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1286                RTE_PTYPE_INNER_L3_IPV4_EXT,
1287        [IXGBE_PACKET_TYPE_VXLAN_IPV6] = RTE_PTYPE_L2_ETHER |
1288                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1289                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1290                RTE_PTYPE_INNER_L3_IPV6,
1291        [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
1292                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1293                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1294                RTE_PTYPE_INNER_L3_IPV4,
1295        [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1296                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1297                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1298                RTE_PTYPE_INNER_L3_IPV6_EXT,
1299        [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
1300                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1301                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1302                RTE_PTYPE_INNER_L3_IPV4,
1303        [IXGBE_PACKET_TYPE_VXLAN_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
1304                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1305                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1306                RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_TCP,
1307        [IXGBE_PACKET_TYPE_VXLAN_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1308                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1309                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1310                RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
1311        [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
1312                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1313                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1314                RTE_PTYPE_INNER_L3_IPV4,
1315        [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1316                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1317                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1318                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
1319        [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_TCP] =
1320                RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1321                RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1322                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1323        [IXGBE_PACKET_TYPE_VXLAN_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
1324                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1325                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1326                RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_UDP,
1327        [IXGBE_PACKET_TYPE_VXLAN_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1328                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1329                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1330                RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
1331        [IXGBE_PACKET_TYPE_VXLAN_IPV6_SCTP] = RTE_PTYPE_L2_ETHER |
1332                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1333                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1334                RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_SCTP,
1335        [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
1336                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1337                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1338                RTE_PTYPE_INNER_L3_IPV4,
1339        [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1340                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1341                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1342                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
1343        [IXGBE_PACKET_TYPE_VXLAN_IPV6_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1344                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1345                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1346                RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_SCTP,
1347        [IXGBE_PACKET_TYPE_VXLAN_IPV4_IPV6_EXT_UDP] =
1348                RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
1349                RTE_PTYPE_L4_UDP | RTE_PTYPE_TUNNEL_VXLAN |
1350                RTE_PTYPE_INNER_L2_ETHER | RTE_PTYPE_INNER_L3_IPV4,
1351        [IXGBE_PACKET_TYPE_VXLAN_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
1352                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1353                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1354                RTE_PTYPE_INNER_L3_IPV4 | RTE_PTYPE_INNER_L4_SCTP,
1355        [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
1356                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1357                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1358                RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_SCTP,
1359        [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_TCP] = RTE_PTYPE_L2_ETHER |
1360                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1361                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1362                RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_TCP,
1363        [IXGBE_PACKET_TYPE_VXLAN_IPV4_EXT_UDP] = RTE_PTYPE_L2_ETHER |
1364                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
1365                RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_INNER_L2_ETHER |
1366                RTE_PTYPE_INNER_L3_IPV4_EXT | RTE_PTYPE_INNER_L4_UDP,
1367};
1368
1369/* @note: fix ixgbe_dev_supported_ptypes_get() if any change here. */
1370static inline uint32_t
1371ixgbe_rxd_pkt_info_to_pkt_type(uint32_t pkt_info, uint16_t ptype_mask)
1372{
1373
1374        if (unlikely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1375                return RTE_PTYPE_UNKNOWN;
1376
1377        pkt_info = (pkt_info >> IXGBE_PACKET_TYPE_SHIFT) & ptype_mask;
1378
1379        /* For tunnel packet */
1380        if (pkt_info & IXGBE_PACKET_TYPE_TUNNEL_BIT) {
1381                /* Remove the tunnel bit to save the space. */
1382                pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
1383                return ptype_table_tn[pkt_info];
1384        }
1385
1386        /**
1387         * For x550, if it's not tunnel,
1388         * tunnel type bit should be set to 0.
1389         * Reuse 82599's mask.
1390         */
1391        pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
1392
1393        return ptype_table[pkt_info];
1394}
1395
1396static inline uint64_t
1397ixgbe_rxd_pkt_info_to_pkt_flags(uint16_t pkt_info)
1398{
1399        static uint64_t ip_rss_types_map[16] __rte_cache_aligned = {
1400                0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
1401                0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH,
1402                PKT_RX_RSS_HASH, 0, 0, 0,
1403                0, 0, 0,  PKT_RX_FDIR,
1404        };
1405#ifdef RTE_LIBRTE_IEEE1588
1406        static uint64_t ip_pkt_etqf_map[8] = {
1407                0, 0, 0, PKT_RX_IEEE1588_PTP,
1408                0, 0, 0, 0,
1409        };
1410
1411        if (likely(pkt_info & IXGBE_RXDADV_PKTTYPE_ETQF))
1412                return ip_pkt_etqf_map[(pkt_info >> 4) & 0X07] |
1413                                ip_rss_types_map[pkt_info & 0XF];
1414        else
1415                return ip_rss_types_map[pkt_info & 0XF];
1416#else
1417        return ip_rss_types_map[pkt_info & 0XF];
1418#endif
1419}
1420
1421static inline uint64_t
1422rx_desc_status_to_pkt_flags(uint32_t rx_status, uint64_t vlan_flags)
1423{
1424        uint64_t pkt_flags;
1425
1426        /*
1427         * Check if VLAN present only.
1428         * Do not check whether L3/L4 rx checksum done by NIC or not,
1429         * That can be found from rte_eth_rxmode.offloads flag
1430         */
1431        pkt_flags = (rx_status & IXGBE_RXD_STAT_VP) ?  vlan_flags : 0;
1432
1433#ifdef RTE_LIBRTE_IEEE1588
1434        if (rx_status & IXGBE_RXD_STAT_TMST)
1435                pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
1436#endif
1437        return pkt_flags;
1438}
1439
1440static inline uint64_t
1441rx_desc_error_to_pkt_flags(uint32_t rx_status)
1442{
1443        uint64_t pkt_flags;
1444
1445        /*
1446         * Bit 31: IPE, IPv4 checksum error
1447         * Bit 30: L4I, L4I integrity error
1448         */
1449        static uint64_t error_to_pkt_flags_map[4] = {
1450                PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
1451                PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
1452                PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
1453                PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
1454        };
1455        pkt_flags = error_to_pkt_flags_map[(rx_status >>
1456                IXGBE_RXDADV_ERR_CKSUM_BIT) & IXGBE_RXDADV_ERR_CKSUM_MSK];
1457
1458        if ((rx_status & IXGBE_RXD_STAT_OUTERIPCS) &&
1459            (rx_status & IXGBE_RXDADV_ERR_OUTERIPER)) {
1460                pkt_flags |= PKT_RX_EIP_CKSUM_BAD;
1461        }
1462
1463#ifdef RTE_LIBRTE_SECURITY
1464        if (rx_status & IXGBE_RXD_STAT_SECP) {
1465                pkt_flags |= PKT_RX_SEC_OFFLOAD;
1466                if (rx_status & IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG)
1467                        pkt_flags |= PKT_RX_SEC_OFFLOAD_FAILED;
1468        }
1469#endif
1470
1471        return pkt_flags;
1472}
1473
1474/*
1475 * LOOK_AHEAD defines how many desc statuses to check beyond the
1476 * current descriptor.
1477 * It must be a pound define for optimal performance.
1478 * Do not change the value of LOOK_AHEAD, as the ixgbe_rx_scan_hw_ring
1479 * function only works with LOOK_AHEAD=8.
1480 */
1481#define LOOK_AHEAD 8
1482#if (LOOK_AHEAD != 8)
1483#error "PMD IXGBE: LOOK_AHEAD must be 8\n"
1484#endif
1485static inline int
1486ixgbe_rx_scan_hw_ring(struct ixgbe_rx_queue *rxq)
1487{
1488        volatile union ixgbe_adv_rx_desc *rxdp;
1489        struct ixgbe_rx_entry *rxep;
1490        struct rte_mbuf *mb;
1491        uint16_t pkt_len;
1492        uint64_t pkt_flags;
1493        int nb_dd;
1494        uint32_t s[LOOK_AHEAD];
1495        uint32_t pkt_info[LOOK_AHEAD];
1496        int i, j, nb_rx = 0;
1497        uint32_t status;
1498        uint64_t vlan_flags = rxq->vlan_flags;
1499
1500        /* get references to current descriptor and S/W ring entry */
1501        rxdp = &rxq->rx_ring[rxq->rx_tail];
1502        rxep = &rxq->sw_ring[rxq->rx_tail];
1503
1504        status = rxdp->wb.upper.status_error;
1505        /* check to make sure there is at least 1 packet to receive */
1506        if (!(status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1507                return 0;
1508
1509        /*
1510         * Scan LOOK_AHEAD descriptors at a time to determine which descriptors
1511         * reference packets that are ready to be received.
1512         */
1513        for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST;
1514             i += LOOK_AHEAD, rxdp += LOOK_AHEAD, rxep += LOOK_AHEAD) {
1515                /* Read desc statuses backwards to avoid race condition */
1516                for (j = 0; j < LOOK_AHEAD; j++)
1517                        s[j] = rte_le_to_cpu_32(rxdp[j].wb.upper.status_error);
1518
1519                rte_smp_rmb();
1520
1521                /* Compute how many status bits were set */
1522                for (nb_dd = 0; nb_dd < LOOK_AHEAD &&
1523                                (s[nb_dd] & IXGBE_RXDADV_STAT_DD); nb_dd++)
1524                        ;
1525
1526                for (j = 0; j < nb_dd; j++)
1527                        pkt_info[j] = rte_le_to_cpu_32(rxdp[j].wb.lower.
1528                                                       lo_dword.data);
1529
1530                nb_rx += nb_dd;
1531
1532                /* Translate descriptor info to mbuf format */
1533                for (j = 0; j < nb_dd; ++j) {
1534                        mb = rxep[j].mbuf;
1535                        pkt_len = rte_le_to_cpu_16(rxdp[j].wb.upper.length) -
1536                                  rxq->crc_len;
1537                        mb->data_len = pkt_len;
1538                        mb->pkt_len = pkt_len;
1539                        mb->vlan_tci = rte_le_to_cpu_16(rxdp[j].wb.upper.vlan);
1540
1541                        /* convert descriptor fields to rte mbuf flags */
1542                        pkt_flags = rx_desc_status_to_pkt_flags(s[j],
1543                                vlan_flags);
1544                        pkt_flags |= rx_desc_error_to_pkt_flags(s[j]);
1545                        pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags
1546                                        ((uint16_t)pkt_info[j]);
1547                        mb->ol_flags = pkt_flags;
1548                        mb->packet_type =
1549                                ixgbe_rxd_pkt_info_to_pkt_type
1550                                        (pkt_info[j], rxq->pkt_type_mask);
1551
1552                        if (likely(pkt_flags & PKT_RX_RSS_HASH))
1553                                mb->hash.rss = rte_le_to_cpu_32(
1554                                    rxdp[j].wb.lower.hi_dword.rss);
1555                        else if (pkt_flags & PKT_RX_FDIR) {
1556                                mb->hash.fdir.hash = rte_le_to_cpu_16(
1557                                    rxdp[j].wb.lower.hi_dword.csum_ip.csum) &
1558                                    IXGBE_ATR_HASH_MASK;
1559                                mb->hash.fdir.id = rte_le_to_cpu_16(
1560                                    rxdp[j].wb.lower.hi_dword.csum_ip.ip_id);
1561                        }
1562                }
1563
1564                /* Move mbuf pointers from the S/W ring to the stage */
1565                for (j = 0; j < LOOK_AHEAD; ++j) {
1566                        rxq->rx_stage[i + j] = rxep[j].mbuf;
1567                }
1568
1569                /* stop if all requested packets could not be received */
1570                if (nb_dd != LOOK_AHEAD)
1571                        break;
1572        }
1573
1574        /* clear software ring entries so we can cleanup correctly */
1575        for (i = 0; i < nb_rx; ++i) {
1576                rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
1577        }
1578
1579
1580        return nb_rx;
1581}
1582
1583static inline int
1584ixgbe_rx_alloc_bufs(struct ixgbe_rx_queue *rxq, bool reset_mbuf)
1585{
1586        volatile union ixgbe_adv_rx_desc *rxdp;
1587        struct ixgbe_rx_entry *rxep;
1588        struct rte_mbuf *mb;
1589        uint16_t alloc_idx;
1590        __le64 dma_addr;
1591        int diag, i;
1592
1593        /* allocate buffers in bulk directly into the S/W ring */
1594        alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1);
1595        rxep = &rxq->sw_ring[alloc_idx];
1596        diag = rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
1597                                    rxq->rx_free_thresh);
1598        if (unlikely(diag != 0))
1599                return -ENOMEM;
1600
1601        rxdp = &rxq->rx_ring[alloc_idx];
1602        for (i = 0; i < rxq->rx_free_thresh; ++i) {
1603                /* populate the static rte mbuf fields */
1604                mb = rxep[i].mbuf;
1605                if (reset_mbuf) {
1606                        mb->port = rxq->port_id;
1607                }
1608
1609                rte_mbuf_refcnt_set(mb, 1);
1610                mb->data_off = RTE_PKTMBUF_HEADROOM;
1611
1612                /* populate the descriptors */
1613                dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mb));
1614                rxdp[i].read.hdr_addr = 0;
1615                rxdp[i].read.pkt_addr = dma_addr;
1616        }
1617
1618        /* update state of internal queue structure */
1619        rxq->rx_free_trigger = rxq->rx_free_trigger + rxq->rx_free_thresh;
1620        if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
1621                rxq->rx_free_trigger = rxq->rx_free_thresh - 1;
1622
1623        /* no errors */
1624        return 0;
1625}
1626
1627static inline uint16_t
1628ixgbe_rx_fill_from_stage(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
1629                         uint16_t nb_pkts)
1630{
1631        struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
1632        int i;
1633
1634        /* how many packets are ready to return? */
1635        nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
1636
1637        /* copy mbuf pointers to the application's packet list */
1638        for (i = 0; i < nb_pkts; ++i)
1639                rx_pkts[i] = stage[i];
1640
1641        /* update internal queue state */
1642        rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
1643        rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
1644
1645        return nb_pkts;
1646}
1647
1648static inline uint16_t
1649rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1650             uint16_t nb_pkts)
1651{
1652        struct ixgbe_rx_queue *rxq = (struct ixgbe_rx_queue *)rx_queue;
1653        uint16_t nb_rx = 0;
1654
1655        /* Any previously recv'd pkts will be returned from the Rx stage */
1656        if (rxq->rx_nb_avail)
1657                return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1658
1659        /* Scan the H/W ring for packets to receive */
1660        nb_rx = (uint16_t)ixgbe_rx_scan_hw_ring(rxq);
1661
1662        /* update internal queue state */
1663        rxq->rx_next_avail = 0;
1664        rxq->rx_nb_avail = nb_rx;
1665        rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
1666
1667        /* if required, allocate new buffers to replenish descriptors */
1668        if (rxq->rx_tail > rxq->rx_free_trigger) {
1669                uint16_t cur_free_trigger = rxq->rx_free_trigger;
1670
1671                if (ixgbe_rx_alloc_bufs(rxq, true) != 0) {
1672                        int i, j;
1673
1674                        PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1675                                   "queue_id=%u", (unsigned) rxq->port_id,
1676                                   (unsigned) rxq->queue_id);
1677
1678                        rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
1679                                rxq->rx_free_thresh;
1680
1681                        /*
1682                         * Need to rewind any previous receives if we cannot
1683                         * allocate new buffers to replenish the old ones.
1684                         */
1685                        rxq->rx_nb_avail = 0;
1686                        rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
1687                        for (i = 0, j = rxq->rx_tail; i < nb_rx; ++i, ++j)
1688                                rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
1689
1690                        return 0;
1691                }
1692
1693                /* update tail pointer */
1694                rte_wmb();
1695                IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
1696                                            cur_free_trigger);
1697        }
1698
1699        if (rxq->rx_tail >= rxq->nb_rx_desc)
1700                rxq->rx_tail = 0;
1701
1702        /* received any packets this loop? */
1703        if (rxq->rx_nb_avail)
1704                return ixgbe_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
1705
1706        return 0;
1707}
1708
1709/* split requests into chunks of size RTE_PMD_IXGBE_RX_MAX_BURST */
1710uint16_t
1711ixgbe_recv_pkts_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
1712                           uint16_t nb_pkts)
1713{
1714        uint16_t nb_rx;
1715
1716        if (unlikely(nb_pkts == 0))
1717                return 0;
1718
1719        if (likely(nb_pkts <= RTE_PMD_IXGBE_RX_MAX_BURST))
1720                return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
1721
1722        /* request is relatively large, chunk it up */
1723        nb_rx = 0;
1724        while (nb_pkts) {
1725                uint16_t ret, n;
1726
1727                n = (uint16_t)RTE_MIN(nb_pkts, RTE_PMD_IXGBE_RX_MAX_BURST);
1728                ret = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
1729                nb_rx = (uint16_t)(nb_rx + ret);
1730                nb_pkts = (uint16_t)(nb_pkts - ret);
1731                if (ret < n)
1732                        break;
1733        }
1734
1735        return nb_rx;
1736}
1737
1738uint16_t
1739ixgbe_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1740                uint16_t nb_pkts)
1741{
1742        struct ixgbe_rx_queue *rxq;
1743        volatile union ixgbe_adv_rx_desc *rx_ring;
1744        volatile union ixgbe_adv_rx_desc *rxdp;
1745        struct ixgbe_rx_entry *sw_ring;
1746        struct ixgbe_rx_entry *rxe;
1747        struct rte_mbuf *rxm;
1748        struct rte_mbuf *nmb;
1749        union ixgbe_adv_rx_desc rxd;
1750        uint64_t dma_addr;
1751        uint32_t staterr;
1752        uint32_t pkt_info;
1753        uint16_t pkt_len;
1754        uint16_t rx_id;
1755        uint16_t nb_rx;
1756        uint16_t nb_hold;
1757        uint64_t pkt_flags;
1758        uint64_t vlan_flags;
1759
1760        nb_rx = 0;
1761        nb_hold = 0;
1762        rxq = rx_queue;
1763        rx_id = rxq->rx_tail;
1764        rx_ring = rxq->rx_ring;
1765        sw_ring = rxq->sw_ring;
1766        vlan_flags = rxq->vlan_flags;
1767        while (nb_rx < nb_pkts) {
1768                /*
1769                 * The order of operations here is important as the DD status
1770                 * bit must not be read after any other descriptor fields.
1771                 * rx_ring and rxdp are pointing to volatile data so the order
1772                 * of accesses cannot be reordered by the compiler. If they were
1773                 * not volatile, they could be reordered which could lead to
1774                 * using invalid descriptor fields when read from rxd.
1775                 */
1776                rxdp = &rx_ring[rx_id];
1777                staterr = rxdp->wb.upper.status_error;
1778                if (!(staterr & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
1779                        break;
1780                rxd = *rxdp;
1781
1782                /*
1783                 * End of packet.
1784                 *
1785                 * If the IXGBE_RXDADV_STAT_EOP flag is not set, the RX packet
1786                 * is likely to be invalid and to be dropped by the various
1787                 * validation checks performed by the network stack.
1788                 *
1789                 * Allocate a new mbuf to replenish the RX ring descriptor.
1790                 * If the allocation fails:
1791                 *    - arrange for that RX descriptor to be the first one
1792                 *      being parsed the next time the receive function is
1793                 *      invoked [on the same queue].
1794                 *
1795                 *    - Stop parsing the RX ring and return immediately.
1796                 *
1797                 * This policy do not drop the packet received in the RX
1798                 * descriptor for which the allocation of a new mbuf failed.
1799                 * Thus, it allows that packet to be later retrieved if
1800                 * mbuf have been freed in the mean time.
1801                 * As a side effect, holding RX descriptors instead of
1802                 * systematically giving them back to the NIC may lead to
1803                 * RX ring exhaustion situations.
1804                 * However, the NIC can gracefully prevent such situations
1805                 * to happen by sending specific "back-pressure" flow control
1806                 * frames to its peer(s).
1807                 */
1808                PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1809                           "ext_err_stat=0x%08x pkt_len=%u",
1810                           (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1811                           (unsigned) rx_id, (unsigned) staterr,
1812                           (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1813
1814                nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1815                if (nmb == NULL) {
1816                        PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1817                                   "queue_id=%u", (unsigned) rxq->port_id,
1818                                   (unsigned) rxq->queue_id);
1819                        rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1820                        break;
1821                }
1822
1823                nb_hold++;
1824                rxe = &sw_ring[rx_id];
1825                rx_id++;
1826                if (rx_id == rxq->nb_rx_desc)
1827                        rx_id = 0;
1828
1829                /* Prefetch next mbuf while processing current one. */
1830                rte_ixgbe_prefetch(sw_ring[rx_id].mbuf);
1831
1832                /*
1833                 * When next RX descriptor is on a cache-line boundary,
1834                 * prefetch the next 4 RX descriptors and the next 8 pointers
1835                 * to mbufs.
1836                 */
1837                if ((rx_id & 0x3) == 0) {
1838                        rte_ixgbe_prefetch(&rx_ring[rx_id]);
1839                        rte_ixgbe_prefetch(&sw_ring[rx_id]);
1840                }
1841
1842                rxm = rxe->mbuf;
1843                rxe->mbuf = nmb;
1844                dma_addr =
1845                        rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1846                rxdp->read.hdr_addr = 0;
1847                rxdp->read.pkt_addr = dma_addr;
1848
1849                /*
1850                 * Initialize the returned mbuf.
1851                 * 1) setup generic mbuf fields:
1852                 *    - number of segments,
1853                 *    - next segment,
1854                 *    - packet length,
1855                 *    - RX port identifier.
1856                 * 2) integrate hardware offload data, if any:
1857                 *    - RSS flag & hash,
1858                 *    - IP checksum flag,
1859                 *    - VLAN TCI, if any,
1860                 *    - error flags.
1861                 */
1862                pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
1863                                      rxq->crc_len);
1864                rxm->data_off = RTE_PKTMBUF_HEADROOM;
1865                rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
1866                rxm->nb_segs = 1;
1867                rxm->next = NULL;
1868                rxm->pkt_len = pkt_len;
1869                rxm->data_len = pkt_len;
1870                rxm->port = rxq->port_id;
1871
1872                pkt_info = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1873                /* Only valid if PKT_RX_VLAN set in pkt_flags */
1874                rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
1875
1876                pkt_flags = rx_desc_status_to_pkt_flags(staterr, vlan_flags);
1877                pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1878                pkt_flags = pkt_flags |
1879                        ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1880                rxm->ol_flags = pkt_flags;
1881                rxm->packet_type =
1882                        ixgbe_rxd_pkt_info_to_pkt_type(pkt_info,
1883                                                       rxq->pkt_type_mask);
1884
1885                if (likely(pkt_flags & PKT_RX_RSS_HASH))
1886                        rxm->hash.rss = rte_le_to_cpu_32(
1887                                                rxd.wb.lower.hi_dword.rss);
1888                else if (pkt_flags & PKT_RX_FDIR) {
1889                        rxm->hash.fdir.hash = rte_le_to_cpu_16(
1890                                        rxd.wb.lower.hi_dword.csum_ip.csum) &
1891                                        IXGBE_ATR_HASH_MASK;
1892                        rxm->hash.fdir.id = rte_le_to_cpu_16(
1893                                        rxd.wb.lower.hi_dword.csum_ip.ip_id);
1894                }
1895                /*
1896                 * Store the mbuf address into the next entry of the array
1897                 * of returned packets.
1898                 */
1899                rx_pkts[nb_rx++] = rxm;
1900        }
1901        rxq->rx_tail = rx_id;
1902
1903        /*
1904         * If the number of free RX descriptors is greater than the RX free
1905         * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1906         * register.
1907         * Update the RDT with the value of the last processed RX descriptor
1908         * minus 1, to guarantee that the RDT register is never equal to the
1909         * RDH register, which creates a "full" ring situtation from the
1910         * hardware point of view...
1911         */
1912        nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1913        if (nb_hold > rxq->rx_free_thresh) {
1914                PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1915                           "nb_hold=%u nb_rx=%u",
1916                           (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1917                           (unsigned) rx_id, (unsigned) nb_hold,
1918                           (unsigned) nb_rx);
1919                rx_id = (uint16_t) ((rx_id == 0) ?
1920                                     (rxq->nb_rx_desc - 1) : (rx_id - 1));
1921                IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1922                nb_hold = 0;
1923        }
1924        rxq->nb_rx_hold = nb_hold;
1925        return nb_rx;
1926}
1927
1928/**
1929 * Detect an RSC descriptor.
1930 */
1931static inline uint32_t
1932ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1933{
1934        return (rte_le_to_cpu_32(rx->wb.lower.lo_dword.data) &
1935                IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1936}
1937
1938/**
1939 * ixgbe_fill_cluster_head_buf - fill the first mbuf of the returned packet
1940 *
1941 * Fill the following info in the HEAD buffer of the Rx cluster:
1942 *    - RX port identifier
1943 *    - hardware offload data, if any:
1944 *      - RSS flag & hash
1945 *      - IP checksum flag
1946 *      - VLAN TCI, if any
1947 *      - error flags
1948 * @head HEAD of the packet cluster
1949 * @desc HW descriptor to get data from
1950 * @rxq Pointer to the Rx queue
1951 */
1952static inline void
1953ixgbe_fill_cluster_head_buf(
1954        struct rte_mbuf *head,
1955        union ixgbe_adv_rx_desc *desc,
1956        struct ixgbe_rx_queue *rxq,
1957        uint32_t staterr)
1958{
1959        uint32_t pkt_info;
1960        uint64_t pkt_flags;
1961
1962        head->port = rxq->port_id;
1963
1964        /* The vlan_tci field is only valid when PKT_RX_VLAN is
1965         * set in the pkt_flags field.
1966         */
1967        head->vlan_tci = rte_le_to_cpu_16(desc->wb.upper.vlan);
1968        pkt_info = rte_le_to_cpu_32(desc->wb.lower.lo_dword.data);
1969        pkt_flags = rx_desc_status_to_pkt_flags(staterr, rxq->vlan_flags);
1970        pkt_flags |= rx_desc_error_to_pkt_flags(staterr);
1971        pkt_flags |= ixgbe_rxd_pkt_info_to_pkt_flags((uint16_t)pkt_info);
1972        head->ol_flags = pkt_flags;
1973        head->packet_type =
1974                ixgbe_rxd_pkt_info_to_pkt_type(pkt_info, rxq->pkt_type_mask);
1975
1976        if (likely(pkt_flags & PKT_RX_RSS_HASH))
1977                head->hash.rss = rte_le_to_cpu_32(desc->wb.lower.hi_dword.rss);
1978        else if (pkt_flags & PKT_RX_FDIR) {
1979                head->hash.fdir.hash =
1980                        rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.csum)
1981                                                          & IXGBE_ATR_HASH_MASK;
1982                head->hash.fdir.id =
1983                        rte_le_to_cpu_16(desc->wb.lower.hi_dword.csum_ip.ip_id);
1984        }
1985}
1986
1987/**
1988 * ixgbe_recv_pkts_lro - receive handler for and LRO case.
1989 *
1990 * @rx_queue Rx queue handle
1991 * @rx_pkts table of received packets
1992 * @nb_pkts size of rx_pkts table
1993 * @bulk_alloc if TRUE bulk allocation is used for a HW ring refilling
1994 *
1995 * Handles the Rx HW ring completions when RSC feature is configured. Uses an
1996 * additional ring of ixgbe_rsc_entry's that will hold the relevant RSC info.
1997 *
1998 * We use the same logic as in Linux and in FreeBSD ixgbe drivers:
1999 * 1) When non-EOP RSC completion arrives:
2000 *    a) Update the HEAD of the current RSC aggregation cluster with the new
2001 *       segment's data length.
2002 *    b) Set the "next" pointer of the current segment to point to the segment
2003 *       at the NEXTP index.
2004 *    c) Pass the HEAD of RSC aggregation cluster on to the next NEXTP entry
2005 *       in the sw_rsc_ring.
2006 * 2) When EOP arrives we just update the cluster's total length and offload
2007 *    flags and deliver the cluster up to the upper layers. In our case - put it
2008 *    in the rx_pkts table.
2009 *
2010 * Returns the number of received packets/clusters (according to the "bulk
2011 * receive" interface).
2012 */
2013static inline uint16_t
2014ixgbe_recv_pkts_lro(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts,
2015                    bool bulk_alloc)
2016{
2017        struct ixgbe_rx_queue *rxq = rx_queue;
2018        volatile union ixgbe_adv_rx_desc *rx_ring = rxq->rx_ring;
2019        struct ixgbe_rx_entry *sw_ring = rxq->sw_ring;
2020        struct ixgbe_scattered_rx_entry *sw_sc_ring = rxq->sw_sc_ring;
2021        uint16_t rx_id = rxq->rx_tail;
2022        uint16_t nb_rx = 0;
2023        uint16_t nb_hold = rxq->nb_rx_hold;
2024        uint16_t prev_id = rxq->rx_tail;
2025
2026        while (nb_rx < nb_pkts) {
2027                bool eop;
2028                struct ixgbe_rx_entry *rxe;
2029                struct ixgbe_scattered_rx_entry *sc_entry;
2030                struct ixgbe_scattered_rx_entry *next_sc_entry = NULL;
2031                struct ixgbe_rx_entry *next_rxe = NULL;
2032                struct rte_mbuf *first_seg;
2033                struct rte_mbuf *rxm;
2034                struct rte_mbuf *nmb = NULL;
2035                union ixgbe_adv_rx_desc rxd;
2036                uint16_t data_len;
2037                uint16_t next_id;
2038                volatile union ixgbe_adv_rx_desc *rxdp;
2039                uint32_t staterr;
2040
2041next_desc:
2042                /*
2043                 * The code in this whole file uses the volatile pointer to
2044                 * ensure the read ordering of the status and the rest of the
2045                 * descriptor fields (on the compiler level only!!!). This is so
2046                 * UGLY - why not to just use the compiler barrier instead? DPDK
2047                 * even has the rte_compiler_barrier() for that.
2048                 *
2049                 * But most importantly this is just wrong because this doesn't
2050                 * ensure memory ordering in a general case at all. For
2051                 * instance, DPDK is supposed to work on Power CPUs where
2052                 * compiler barrier may just not be enough!
2053                 *
2054                 * I tried to write only this function properly to have a
2055                 * starting point (as a part of an LRO/RSC series) but the
2056                 * compiler cursed at me when I tried to cast away the
2057                 * "volatile" from rx_ring (yes, it's volatile too!!!). So, I'm
2058                 * keeping it the way it is for now.
2059                 *
2060                 * The code in this file is broken in so many other places and
2061                 * will just not work on a big endian CPU anyway therefore the
2062                 * lines below will have to be revisited together with the rest
2063                 * of the ixgbe PMD.
2064                 *
2065                 * TODO:
2066                 *    - Get rid of "volatile" and let the compiler do its job.
2067                 *    - Use the proper memory barrier (rte_rmb()) to ensure the
2068                 *      memory ordering below.
2069                 */
2070                rxdp = &rx_ring[rx_id];
2071                staterr = rte_le_to_cpu_32(rxdp->wb.upper.status_error);
2072
2073                if (!(staterr & IXGBE_RXDADV_STAT_DD))
2074                        break;
2075
2076                rxd = *rxdp;
2077
2078                PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
2079                                  "staterr=0x%x data_len=%u",
2080                           rxq->port_id, rxq->queue_id, rx_id, staterr,
2081                           rte_le_to_cpu_16(rxd.wb.upper.length));
2082
2083                if (!bulk_alloc) {
2084                        nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
2085                        if (nmb == NULL) {
2086                                PMD_RX_LOG(DEBUG, "RX mbuf alloc failed "
2087                                                  "port_id=%u queue_id=%u",
2088                                           rxq->port_id, rxq->queue_id);
2089
2090                                rte_eth_devices[rxq->port_id].data->
2091                                                        rx_mbuf_alloc_failed++;
2092                                break;
2093                        }
2094                } else if (nb_hold > rxq->rx_free_thresh) {
2095                        uint16_t next_rdt = rxq->rx_free_trigger;
2096
2097                        if (!ixgbe_rx_alloc_bufs(rxq, false)) {
2098                                rte_wmb();
2099                                IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr,
2100                                                            next_rdt);
2101                                nb_hold -= rxq->rx_free_thresh;
2102                        } else {
2103                                PMD_RX_LOG(DEBUG, "RX bulk alloc failed "
2104                                                  "port_id=%u queue_id=%u",
2105                                           rxq->port_id, rxq->queue_id);
2106
2107                                rte_eth_devices[rxq->port_id].data->
2108                                                        rx_mbuf_alloc_failed++;
2109                                break;
2110                        }
2111                }
2112
2113                nb_hold++;
2114                rxe = &sw_ring[rx_id];
2115                eop = staterr & IXGBE_RXDADV_STAT_EOP;
2116
2117                next_id = rx_id + 1;
2118                if (next_id == rxq->nb_rx_desc)
2119                        next_id = 0;
2120
2121                /* Prefetch next mbuf while processing current one. */
2122                rte_ixgbe_prefetch(sw_ring[next_id].mbuf);
2123
2124                /*
2125                 * When next RX descriptor is on a cache-line boundary,
2126                 * prefetch the next 4 RX descriptors and the next 4 pointers
2127                 * to mbufs.
2128                 */
2129                if ((next_id & 0x3) == 0) {
2130                        rte_ixgbe_prefetch(&rx_ring[next_id]);
2131                        rte_ixgbe_prefetch(&sw_ring[next_id]);
2132                }
2133
2134                rxm = rxe->mbuf;
2135
2136                if (!bulk_alloc) {
2137                        __le64 dma =
2138                          rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
2139                        /*
2140                         * Update RX descriptor with the physical address of the
2141                         * new data buffer of the new allocated mbuf.
2142                         */
2143                        rxe->mbuf = nmb;
2144
2145                        rxm->data_off = RTE_PKTMBUF_HEADROOM;
2146                        rxdp->read.hdr_addr = 0;
2147                        rxdp->read.pkt_addr = dma;
2148                } else
2149                        rxe->mbuf = NULL;
2150
2151                /*
2152                 * Set data length & data buffer address of mbuf.
2153                 */
2154                data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
2155                rxm->data_len = data_len;
2156
2157                if (!eop) {
2158                        uint16_t nextp_id;
2159                        /*
2160                         * Get next descriptor index:
2161                         *  - For RSC it's in the NEXTP field.
2162                         *  - For a scattered packet - it's just a following
2163                         *    descriptor.
2164                         */
2165                        if (ixgbe_rsc_count(&rxd))
2166                                nextp_id =
2167                                        (staterr & IXGBE_RXDADV_NEXTP_MASK) >>
2168                                                       IXGBE_RXDADV_NEXTP_SHIFT;
2169                        else
2170                                nextp_id = next_id;
2171
2172                        next_sc_entry = &sw_sc_ring[nextp_id];
2173                        next_rxe = &sw_ring[nextp_id];
2174                        rte_ixgbe_prefetch(next_rxe);
2175                }
2176
2177                sc_entry = &sw_sc_ring[rx_id];
2178                first_seg = sc_entry->fbuf;
2179                sc_entry->fbuf = NULL;
2180
2181                /*
2182                 * If this is the first buffer of the received packet,
2183                 * set the pointer to the first mbuf of the packet and
2184                 * initialize its context.
2185                 * Otherwise, update the total length and the number of segments
2186                 * of the current scattered packet, and update the pointer to
2187                 * the last mbuf of the current packet.
2188                 */
2189                if (first_seg == NULL) {
2190                        first_seg = rxm;
2191                        first_seg->pkt_len = data_len;
2192                        first_seg->nb_segs = 1;
2193                } else {
2194                        first_seg->pkt_len += data_len;
2195                        first_seg->nb_segs++;
2196                }
2197
2198                prev_id = rx_id;
2199                rx_id = next_id;
2200
2201                /*
2202                 * If this is not the last buffer of the received packet, update
2203                 * the pointer to the first mbuf at the NEXTP entry in the
2204                 * sw_sc_ring and continue to parse the RX ring.
2205                 */
2206                if (!eop && next_rxe) {
2207                        rxm->next = next_rxe->mbuf;
2208                        next_sc_entry->fbuf = first_seg;
2209                        goto next_desc;
2210                }
2211
2212                /* Initialize the first mbuf of the returned packet */
2213                ixgbe_fill_cluster_head_buf(first_seg, &rxd, rxq, staterr);
2214
2215                /*
2216                 * Deal with the case, when HW CRC srip is disabled.
2217                 * That can't happen when LRO is enabled, but still could
2218                 * happen for scattered RX mode.
2219                 */
2220                first_seg->pkt_len -= rxq->crc_len;
2221                if (unlikely(rxm->data_len <= rxq->crc_len)) {
2222                        struct rte_mbuf *lp;
2223
2224                        for (lp = first_seg; lp->next != rxm; lp = lp->next)
2225                                ;
2226
2227                        first_seg->nb_segs--;
2228                        lp->data_len -= rxq->crc_len - rxm->data_len;
2229                        lp->next = NULL;
2230                        rte_pktmbuf_free_seg(rxm);
2231                } else
2232                        rxm->data_len -= rxq->crc_len;
2233
2234                /* Prefetch data of first segment, if configured to do so. */
2235                rte_packet_prefetch((char *)first_seg->buf_addr +
2236                        first_seg->data_off);
2237
2238                /*
2239                 * Store the mbuf address into the next entry of the array
2240                 * of returned packets.
2241                 */
2242                rx_pkts[nb_rx++] = first_seg;
2243        }
2244
2245        /*
2246         * Record index of the next RX descriptor to probe.
2247         */
2248        rxq->rx_tail = rx_id;
2249
2250        /*
2251         * If the number of free RX descriptors is greater than the RX free
2252         * threshold of the queue, advance the Receive Descriptor Tail (RDT)
2253         * register.
2254         * Update the RDT with the value of the last processed RX descriptor
2255         * minus 1, to guarantee that the RDT register is never equal to the
2256         * RDH register, which creates a "full" ring situtation from the
2257         * hardware point of view...
2258         */
2259        if (!bulk_alloc && nb_hold > rxq->rx_free_thresh) {
2260                PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
2261                           "nb_hold=%u nb_rx=%u",
2262                           rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx);
2263
2264                rte_wmb();
2265                IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id);
2266                nb_hold = 0;
2267        }
2268
2269        rxq->nb_rx_hold = nb_hold;
2270        return nb_rx;
2271}
2272
2273uint16_t
2274ixgbe_recv_pkts_lro_single_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2275                                 uint16_t nb_pkts)
2276{
2277        return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, false);
2278}
2279
2280uint16_t
2281ixgbe_recv_pkts_lro_bulk_alloc(void *rx_queue, struct rte_mbuf **rx_pkts,
2282                               uint16_t nb_pkts)
2283{
2284        return ixgbe_recv_pkts_lro(rx_queue, rx_pkts, nb_pkts, true);
2285}
2286
2287/*********************************************************************
2288 *
2289 *  Queue management functions
2290 *
2291 **********************************************************************/
2292
2293static void __rte_cold
2294ixgbe_tx_queue_release_mbufs(struct ixgbe_tx_queue *txq)
2295{
2296        unsigned i;
2297
2298        if (txq->sw_ring != NULL) {
2299                for (i = 0; i < txq->nb_tx_desc; i++) {
2300                        if (txq->sw_ring[i].mbuf != NULL) {
2301                                rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2302                                txq->sw_ring[i].mbuf = NULL;
2303                        }
2304                }
2305        }
2306}
2307
2308static int
2309ixgbe_tx_done_cleanup_full(struct ixgbe_tx_queue *txq, uint32_t free_cnt)
2310{
2311        struct ixgbe_tx_entry *swr_ring = txq->sw_ring;
2312        uint16_t i, tx_last, tx_id;
2313        uint16_t nb_tx_free_last;
2314        uint16_t nb_tx_to_clean;
2315        uint32_t pkt_cnt;
2316
2317        /* Start free mbuf from the next of tx_tail */
2318        tx_last = txq->tx_tail;
2319        tx_id  = swr_ring[tx_last].next_id;
2320
2321        if (txq->nb_tx_free == 0 && ixgbe_xmit_cleanup(txq))
2322                return 0;
2323
2324        nb_tx_to_clean = txq->nb_tx_free;
2325        nb_tx_free_last = txq->nb_tx_free;
2326        if (!free_cnt)
2327                free_cnt = txq->nb_tx_desc;
2328
2329        /* Loop through swr_ring to count the amount of
2330         * freeable mubfs and packets.
2331         */
2332        for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2333                for (i = 0; i < nb_tx_to_clean &&
2334                        pkt_cnt < free_cnt &&
2335                        tx_id != tx_last; i++) {
2336                        if (swr_ring[tx_id].mbuf != NULL) {
2337                                rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2338                                swr_ring[tx_id].mbuf = NULL;
2339
2340                                /*
2341                                 * last segment in the packet,
2342                                 * increment packet count
2343                                 */
2344                                pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2345                        }
2346
2347                        tx_id = swr_ring[tx_id].next_id;
2348                }
2349
2350                if (txq->tx_rs_thresh > txq->nb_tx_desc -
2351                        txq->nb_tx_free || tx_id == tx_last)
2352                        break;
2353
2354                if (pkt_cnt < free_cnt) {
2355                        if (ixgbe_xmit_cleanup(txq))
2356                                break;
2357
2358                        nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2359                        nb_tx_free_last = txq->nb_tx_free;
2360                }
2361        }
2362
2363        return (int)pkt_cnt;
2364}
2365
2366static int
2367ixgbe_tx_done_cleanup_simple(struct ixgbe_tx_queue *txq,
2368                        uint32_t free_cnt)
2369{
2370        int i, n, cnt;
2371
2372        if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2373                free_cnt = txq->nb_tx_desc;
2374
2375        cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2376
2377        for (i = 0; i < cnt; i += n) {
2378                if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2379                        break;
2380
2381                n = ixgbe_tx_free_bufs(txq);
2382
2383                if (n == 0)
2384                        break;
2385        }
2386
2387        return i;
2388}
2389
2390static int
2391ixgbe_tx_done_cleanup_vec(struct ixgbe_tx_queue *txq __rte_unused,
2392                        uint32_t free_cnt __rte_unused)
2393{
2394        return -ENOTSUP;
2395}
2396
2397int
2398ixgbe_dev_tx_done_cleanup(void *tx_queue, uint32_t free_cnt)
2399{
2400        struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
2401        if (txq->offloads == 0 &&
2402#ifdef RTE_LIBRTE_SECURITY
2403                        !(txq->using_ipsec) &&
2404#endif
2405                        txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST) {
2406                if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2407                                (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2408                                        txq->sw_ring_v != NULL)) {
2409                        return ixgbe_tx_done_cleanup_vec(txq, free_cnt);
2410                } else {
2411                        return ixgbe_tx_done_cleanup_simple(txq, free_cnt);
2412                }
2413        }
2414
2415        return ixgbe_tx_done_cleanup_full(txq, free_cnt);
2416}
2417
2418static void __rte_cold
2419ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
2420{
2421        if (txq != NULL &&
2422            txq->sw_ring != NULL)
2423                rte_free(txq->sw_ring);
2424}
2425
2426static void __rte_cold
2427ixgbe_tx_queue_release(struct ixgbe_tx_queue *txq)
2428{
2429        if (txq != NULL && txq->ops != NULL) {
2430                txq->ops->release_mbufs(txq);
2431                txq->ops->free_swring(txq);
2432                rte_free(txq);
2433        }
2434}
2435
2436void __rte_cold
2437ixgbe_dev_tx_queue_release(void *txq)
2438{
2439        ixgbe_tx_queue_release(txq);
2440}
2441
2442/* (Re)set dynamic ixgbe_tx_queue fields to defaults */
2443static void __rte_cold
2444ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
2445{
2446        static const union ixgbe_adv_tx_desc zeroed_desc = {{0}};
2447        struct ixgbe_tx_entry *txe = txq->sw_ring;
2448        uint16_t prev, i;
2449
2450        /* Zero out HW ring memory */
2451        for (i = 0; i < txq->nb_tx_desc; i++) {
2452                txq->tx_ring[i] = zeroed_desc;
2453        }
2454
2455        /* Initialize SW ring entries */
2456        prev = (uint16_t) (txq->nb_tx_desc - 1);
2457        for (i = 0; i < txq->nb_tx_desc; i++) {
2458                volatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];
2459
2460                txd->wb.status = rte_cpu_to_le_32(IXGBE_TXD_STAT_DD);
2461                txe[i].mbuf = NULL;
2462                txe[i].last_id = i;
2463                txe[prev].next_id = i;
2464                prev = i;
2465        }
2466
2467        txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2468        txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2469
2470        txq->tx_tail = 0;
2471        txq->nb_tx_used = 0;
2472        /*
2473         * Always allow 1 descriptor to be un-allocated to avoid
2474         * a H/W race condition
2475         */
2476        txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2477        txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2478        txq->ctx_curr = 0;
2479        memset((void *)&txq->ctx_cache, 0,
2480                IXGBE_CTX_NUM * sizeof(struct ixgbe_advctx_info));
2481}
2482
2483static const struct ixgbe_txq_ops def_txq_ops = {
2484        .release_mbufs = ixgbe_tx_queue_release_mbufs,
2485        .free_swring = ixgbe_tx_free_swring,
2486        .reset = ixgbe_reset_tx_queue,
2487};
2488
2489/* Takes an ethdev and a queue and sets up the tx function to be used based on
2490 * the queue parameters. Used in tx_queue_setup by primary process and then
2491 * in dev_init by secondary process when attaching to an existing ethdev.
2492 */
2493void __rte_cold
2494ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq)
2495{
2496        /* Use a simple Tx queue (no offloads, no multi segs) if possible */
2497        if ((txq->offloads == 0) &&
2498#ifdef RTE_LIBRTE_SECURITY
2499                        !(txq->using_ipsec) &&
2500#endif
2501                        (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) {
2502                PMD_INIT_LOG(DEBUG, "Using simple tx code path");
2503                dev->tx_pkt_prepare = NULL;
2504                if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
2505                                (rte_eal_process_type() != RTE_PROC_PRIMARY ||
2506                                        ixgbe_txq_vec_setup(txq) == 0)) {
2507                        PMD_INIT_LOG(DEBUG, "Vector tx enabled.");
2508                        dev->tx_pkt_burst = ixgbe_xmit_pkts_vec;
2509                } else
2510                dev->tx_pkt_burst = ixgbe_xmit_pkts_simple;
2511        } else {
2512                PMD_INIT_LOG(DEBUG, "Using full-featured tx code path");
2513                PMD_INIT_LOG(DEBUG,
2514                                " - offloads = 0x%" PRIx64,
2515                                txq->offloads);
2516                PMD_INIT_LOG(DEBUG,
2517                                " - tx_rs_thresh = %lu " "[RTE_PMD_IXGBE_TX_MAX_BURST=%lu]",
2518                                (unsigned long)txq->tx_rs_thresh,
2519                                (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST);
2520                dev->tx_pkt_burst = ixgbe_xmit_pkts;
2521                dev->tx_pkt_prepare = ixgbe_prep_pkts;
2522        }
2523}
2524
2525uint64_t
2526ixgbe_get_tx_queue_offloads(struct rte_eth_dev *dev)
2527{
2528        RTE_SET_USED(dev);
2529
2530        return 0;
2531}
2532
2533uint64_t
2534ixgbe_get_tx_port_offloads(struct rte_eth_dev *dev)
2535{
2536        uint64_t tx_offload_capa;
2537        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2538
2539        tx_offload_capa =
2540                DEV_TX_OFFLOAD_VLAN_INSERT |
2541                DEV_TX_OFFLOAD_IPV4_CKSUM  |
2542                DEV_TX_OFFLOAD_UDP_CKSUM   |
2543                DEV_TX_OFFLOAD_TCP_CKSUM   |
2544                DEV_TX_OFFLOAD_SCTP_CKSUM  |
2545                DEV_TX_OFFLOAD_TCP_TSO     |
2546                DEV_TX_OFFLOAD_MULTI_SEGS;
2547
2548        if (hw->mac.type == ixgbe_mac_82599EB ||
2549            hw->mac.type == ixgbe_mac_X540)
2550                tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT;
2551
2552        if (hw->mac.type == ixgbe_mac_X550 ||
2553            hw->mac.type == ixgbe_mac_X550EM_x ||
2554            hw->mac.type == ixgbe_mac_X550EM_a)
2555                tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
2556
2557#ifdef RTE_LIBRTE_SECURITY
2558        if (dev->security_ctx)
2559                tx_offload_capa |= DEV_TX_OFFLOAD_SECURITY;
2560#endif
2561        return tx_offload_capa;
2562}
2563
2564int __rte_cold
2565ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,
2566                         uint16_t queue_idx,
2567                         uint16_t nb_desc,
2568                         unsigned int socket_id,
2569                         const struct rte_eth_txconf *tx_conf)
2570{
2571        const struct rte_memzone *tz;
2572        struct ixgbe_tx_queue *txq;
2573        struct ixgbe_hw     *hw;
2574        uint16_t tx_rs_thresh, tx_free_thresh;
2575        uint64_t offloads;
2576
2577        PMD_INIT_FUNC_TRACE();
2578        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2579
2580        offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2581
2582        /*
2583         * Validate number of transmit descriptors.
2584         * It must not exceed hardware maximum, and must be multiple
2585         * of IXGBE_ALIGN.
2586         */
2587        if (nb_desc % IXGBE_TXD_ALIGN != 0 ||
2588                        (nb_desc > IXGBE_MAX_RING_DESC) ||
2589                        (nb_desc < IXGBE_MIN_RING_DESC)) {
2590                return -EINVAL;
2591        }
2592
2593        /*
2594         * The following two parameters control the setting of the RS bit on
2595         * transmit descriptors.
2596         * TX descriptors will have their RS bit set after txq->tx_rs_thresh
2597         * descriptors have been used.
2598         * The TX descriptor ring will be cleaned after txq->tx_free_thresh
2599         * descriptors are used or if the number of descriptors required
2600         * to transmit a packet is greater than the number of free TX
2601         * descriptors.
2602         * The following constraints must be satisfied:
2603         *  tx_rs_thresh must be greater than 0.
2604         *  tx_rs_thresh must be less than the size of the ring minus 2.
2605         *  tx_rs_thresh must be less than or equal to tx_free_thresh.
2606         *  tx_rs_thresh must be a divisor of the ring size.
2607         *  tx_free_thresh must be greater than 0.
2608         *  tx_free_thresh must be less than the size of the ring minus 3.
2609         *  tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2610         * One descriptor in the TX ring is used as a sentinel to avoid a
2611         * H/W race condition, hence the maximum threshold constraints.
2612         * When set to zero use default values.
2613         */
2614        tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2615                        tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2616        /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2617        tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2618                        nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2619        if (tx_conf->tx_rs_thresh > 0)
2620                tx_rs_thresh = tx_conf->tx_rs_thresh;
2621        if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2622                PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2623                             "exceed nb_desc. (tx_rs_thresh=%u "
2624                             "tx_free_thresh=%u nb_desc=%u port = %d queue=%d)",
2625                             (unsigned int)tx_rs_thresh,
2626                             (unsigned int)tx_free_thresh,
2627                             (unsigned int)nb_desc,
2628                             (int)dev->data->port_id,
2629                             (int)queue_idx);
2630                return -(EINVAL);
2631        }
2632        if (tx_rs_thresh >= (nb_desc - 2)) {
2633                PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the number "
2634                        "of TX descriptors minus 2. (tx_rs_thresh=%u "
2635                        "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2636                        (int)dev->data->port_id, (int)queue_idx);
2637                return -(EINVAL);
2638        }
2639        if (tx_rs_thresh > DEFAULT_TX_RS_THRESH) {
2640                PMD_INIT_LOG(ERR, "tx_rs_thresh must be less or equal than %u. "
2641                        "(tx_rs_thresh=%u port=%d queue=%d)",
2642                        DEFAULT_TX_RS_THRESH, (unsigned int)tx_rs_thresh,
2643                        (int)dev->data->port_id, (int)queue_idx);
2644                return -(EINVAL);
2645        }
2646        if (tx_free_thresh >= (nb_desc - 3)) {
2647                PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2648                             "tx_free_thresh must be less than the number of "
2649                             "TX descriptors minus 3. (tx_free_thresh=%u "
2650                             "port=%d queue=%d)",
2651                             (unsigned int)tx_free_thresh,
2652                             (int)dev->data->port_id, (int)queue_idx);
2653                return -(EINVAL);
2654        }
2655        if (tx_rs_thresh > tx_free_thresh) {
2656                PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or equal to "
2657                             "tx_free_thresh. (tx_free_thresh=%u "
2658                             "tx_rs_thresh=%u port=%d queue=%d)",
2659                             (unsigned int)tx_free_thresh,
2660                             (unsigned int)tx_rs_thresh,
2661                             (int)dev->data->port_id,
2662                             (int)queue_idx);
2663                return -(EINVAL);
2664        }
2665        if ((nb_desc % tx_rs_thresh) != 0) {
2666                PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2667                             "number of TX descriptors. (tx_rs_thresh=%u "
2668                             "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2669                             (int)dev->data->port_id, (int)queue_idx);
2670                return -(EINVAL);
2671        }
2672
2673        /*
2674         * If rs_bit_thresh is greater than 1, then TX WTHRESH should be
2675         * set to 0. If WTHRESH is greater than zero, the RS bit is ignored
2676         * by the NIC and all descriptors are written back after the NIC
2677         * accumulates WTHRESH descriptors.
2678         */
2679        if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2680                PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2681                             "tx_rs_thresh is greater than 1. (tx_rs_thresh=%u "
2682                             "port=%d queue=%d)", (unsigned int)tx_rs_thresh,
2683                             (int)dev->data->port_id, (int)queue_idx);
2684                return -(EINVAL);
2685        }
2686
2687        /* Free memory prior to re-allocation if needed... */
2688        if (dev->data->tx_queues[queue_idx] != NULL) {
2689                ixgbe_tx_queue_release(dev->data->tx_queues[queue_idx]);
2690                dev->data->tx_queues[queue_idx] = NULL;
2691        }
2692
2693        /* First allocate the tx queue data structure */
2694        txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct ixgbe_tx_queue),
2695                                 RTE_CACHE_LINE_SIZE, socket_id);
2696        if (txq == NULL)
2697                return -ENOMEM;
2698
2699        /*
2700         * Allocate TX ring hardware descriptors. A memzone large enough to
2701         * handle the maximum ring size is allocated in order to allow for
2702         * resizing in later calls to the queue setup function.
2703         */
2704        tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2705                        sizeof(union ixgbe_adv_tx_desc) * IXGBE_MAX_RING_DESC,
2706                        IXGBE_ALIGN, socket_id);
2707        if (tz == NULL) {
2708                ixgbe_tx_queue_release(txq);
2709                return -ENOMEM;
2710        }
2711
2712        txq->nb_tx_desc = nb_desc;
2713        txq->tx_rs_thresh = tx_rs_thresh;
2714        txq->tx_free_thresh = tx_free_thresh;
2715        txq->pthresh = tx_conf->tx_thresh.pthresh;
2716        txq->hthresh = tx_conf->tx_thresh.hthresh;
2717        txq->wthresh = tx_conf->tx_thresh.wthresh;
2718        txq->queue_id = queue_idx;
2719        txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
2720                queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
2721        txq->port_id = dev->data->port_id;
2722        txq->offloads = offloads;
2723        txq->ops = &def_txq_ops;
2724        txq->tx_deferred_start = tx_conf->tx_deferred_start;
2725#ifdef RTE_LIBRTE_SECURITY
2726        txq->using_ipsec = !!(dev->data->dev_conf.txmode.offloads &
2727                        DEV_TX_OFFLOAD_SECURITY);
2728#endif
2729
2730        /*
2731         * Modification to set VFTDT for virtual function if vf is detected
2732         */
2733        if (hw->mac.type == ixgbe_mac_82599_vf ||
2734            hw->mac.type == ixgbe_mac_X540_vf ||
2735            hw->mac.type == ixgbe_mac_X550_vf ||
2736            hw->mac.type == ixgbe_mac_X550EM_x_vf ||
2737            hw->mac.type == ixgbe_mac_X550EM_a_vf)
2738                txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_VFTDT(queue_idx));
2739        else
2740                txq->tdt_reg_addr = IXGBE_PCI_REG_ADDR(hw, IXGBE_TDT(txq->reg_idx));
2741
2742        txq->tx_ring_phys_addr = tz->iova;
2743        txq->tx_ring = (union ixgbe_adv_tx_desc *) tz->addr;
2744
2745        /* Allocate software ring */
2746        txq->sw_ring = rte_zmalloc_socket("txq->sw_ring",
2747                                sizeof(struct ixgbe_tx_entry) * nb_desc,
2748                                RTE_CACHE_LINE_SIZE, socket_id);
2749        if (txq->sw_ring == NULL) {
2750                ixgbe_tx_queue_release(txq);
2751                return -ENOMEM;
2752        }
2753        PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
2754                     txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
2755
2756        /* set up vector or scalar TX function as appropriate */
2757        ixgbe_set_tx_function(dev, txq);
2758
2759        txq->ops->reset(txq);
2760
2761        dev->data->tx_queues[queue_idx] = txq;
2762
2763
2764        return 0;
2765}
2766
2767/**
2768 * ixgbe_free_sc_cluster - free the not-yet-completed scattered cluster
2769 *
2770 * The "next" pointer of the last segment of (not-yet-completed) RSC clusters
2771 * in the sw_rsc_ring is not set to NULL but rather points to the next
2772 * mbuf of this RSC aggregation (that has not been completed yet and still
2773 * resides on the HW ring). So, instead of calling for rte_pktmbuf_free() we
2774 * will just free first "nb_segs" segments of the cluster explicitly by calling
2775 * an rte_pktmbuf_free_seg().
2776 *
2777 * @m scattered cluster head
2778 */
2779static void __rte_cold
2780ixgbe_free_sc_cluster(struct rte_mbuf *m)
2781{
2782        uint16_t i, nb_segs = m->nb_segs;
2783        struct rte_mbuf *next_seg;
2784
2785        for (i = 0; i < nb_segs; i++) {
2786                next_seg = m->next;
2787                rte_pktmbuf_free_seg(m);
2788                m = next_seg;
2789        }
2790}
2791
2792static void __rte_cold
2793ixgbe_rx_queue_release_mbufs(struct ixgbe_rx_queue *rxq)
2794{
2795        unsigned i;
2796
2797        /* SSE Vector driver has a different way of releasing mbufs. */
2798        if (rxq->rx_using_sse) {
2799                ixgbe_rx_queue_release_mbufs_vec(rxq);
2800                return;
2801        }
2802
2803        if (rxq->sw_ring != NULL) {
2804                for (i = 0; i < rxq->nb_rx_desc; i++) {
2805                        if (rxq->sw_ring[i].mbuf != NULL) {
2806                                rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2807                                rxq->sw_ring[i].mbuf = NULL;
2808                        }
2809                }
2810                if (rxq->rx_nb_avail) {
2811                        for (i = 0; i < rxq->rx_nb_avail; ++i) {
2812                                struct rte_mbuf *mb;
2813
2814                                mb = rxq->rx_stage[rxq->rx_next_avail + i];
2815                                rte_pktmbuf_free_seg(mb);
2816                        }
2817                        rxq->rx_nb_avail = 0;
2818                }
2819        }
2820
2821        if (rxq->sw_sc_ring)
2822                for (i = 0; i < rxq->nb_rx_desc; i++)
2823                        if (rxq->sw_sc_ring[i].fbuf) {
2824                                ixgbe_free_sc_cluster(rxq->sw_sc_ring[i].fbuf);
2825                                rxq->sw_sc_ring[i].fbuf = NULL;
2826                        }
2827}
2828
2829static void __rte_cold
2830ixgbe_rx_queue_release(struct ixgbe_rx_queue *rxq)
2831{
2832        if (rxq != NULL) {
2833                ixgbe_rx_queue_release_mbufs(rxq);
2834                rte_free(rxq->sw_ring);
2835                rte_free(rxq->sw_sc_ring);
2836                rte_free(rxq);
2837        }
2838}
2839
2840void __rte_cold
2841ixgbe_dev_rx_queue_release(void *rxq)
2842{
2843        ixgbe_rx_queue_release(rxq);
2844}
2845
2846/*
2847 * Check if Rx Burst Bulk Alloc function can be used.
2848 * Return
2849 *        0: the preconditions are satisfied and the bulk allocation function
2850 *           can be used.
2851 *  -EINVAL: the preconditions are NOT satisfied and the default Rx burst
2852 *           function must be used.
2853 */
2854static inline int __rte_cold
2855check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq)
2856{
2857        int ret = 0;
2858
2859        /*
2860         * Make sure the following pre-conditions are satisfied:
2861         *   rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST
2862         *   rxq->rx_free_thresh < rxq->nb_rx_desc
2863         *   (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0
2864         * Scattered packets are not supported.  This should be checked
2865         * outside of this function.
2866         */
2867        if (!(rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST)) {
2868                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2869                             "rxq->rx_free_thresh=%d, "
2870                             "RTE_PMD_IXGBE_RX_MAX_BURST=%d",
2871                             rxq->rx_free_thresh, RTE_PMD_IXGBE_RX_MAX_BURST);
2872                ret = -EINVAL;
2873        } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
2874                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2875                             "rxq->rx_free_thresh=%d, "
2876                             "rxq->nb_rx_desc=%d",
2877                             rxq->rx_free_thresh, rxq->nb_rx_desc);
2878                ret = -EINVAL;
2879        } else if (!((rxq->nb_rx_desc % rxq->rx_free_thresh) == 0)) {
2880                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
2881                             "rxq->nb_rx_desc=%d, "
2882                             "rxq->rx_free_thresh=%d",
2883                             rxq->nb_rx_desc, rxq->rx_free_thresh);
2884                ret = -EINVAL;
2885        }
2886
2887        return ret;
2888}
2889
2890/* Reset dynamic ixgbe_rx_queue fields back to defaults */
2891static void __rte_cold
2892ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq)
2893{
2894        static const union ixgbe_adv_rx_desc zeroed_desc = {{0}};
2895        unsigned i;
2896        uint16_t len = rxq->nb_rx_desc;
2897
2898        /*
2899         * By default, the Rx queue setup function allocates enough memory for
2900         * IXGBE_MAX_RING_DESC.  The Rx Burst bulk allocation function requires
2901         * extra memory at the end of the descriptor ring to be zero'd out.
2902         */
2903        if (adapter->rx_bulk_alloc_allowed)
2904                /* zero out extra memory */
2905                len += RTE_PMD_IXGBE_RX_MAX_BURST;
2906
2907        /*
2908         * Zero out HW ring memory. Zero out extra memory at the end of
2909         * the H/W ring so look-ahead logic in Rx Burst bulk alloc function
2910         * reads extra memory as zeros.
2911         */
2912        for (i = 0; i < len; i++) {
2913                rxq->rx_ring[i] = zeroed_desc;
2914        }
2915
2916        /*
2917         * initialize extra software ring entries. Space for these extra
2918         * entries is always allocated
2919         */
2920        memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2921        for (i = rxq->nb_rx_desc; i < len; ++i) {
2922                rxq->sw_ring[i].mbuf = &rxq->fake_mbuf;
2923        }
2924
2925        rxq->rx_nb_avail = 0;
2926        rxq->rx_next_avail = 0;
2927        rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2928        rxq->rx_tail = 0;
2929        rxq->nb_rx_hold = 0;
2930        rxq->pkt_first_seg = NULL;
2931        rxq->pkt_last_seg = NULL;
2932
2933#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
2934        rxq->rxrearm_start = 0;
2935        rxq->rxrearm_nb = 0;
2936#endif
2937}
2938
2939static int
2940ixgbe_is_vf(struct rte_eth_dev *dev)
2941{
2942        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2943
2944        switch (hw->mac.type) {
2945        case ixgbe_mac_82599_vf:
2946        case ixgbe_mac_X540_vf:
2947        case ixgbe_mac_X550_vf:
2948        case ixgbe_mac_X550EM_x_vf:
2949        case ixgbe_mac_X550EM_a_vf:
2950                return 1;
2951        default:
2952                return 0;
2953        }
2954}
2955
2956uint64_t
2957ixgbe_get_rx_queue_offloads(struct rte_eth_dev *dev)
2958{
2959        uint64_t offloads = 0;
2960        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2961
2962        if (hw->mac.type != ixgbe_mac_82598EB)
2963                offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2964
2965        return offloads;
2966}
2967
2968uint64_t
2969ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
2970{
2971        uint64_t offloads;
2972        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2973
2974        offloads = DEV_RX_OFFLOAD_IPV4_CKSUM  |
2975                   DEV_RX_OFFLOAD_UDP_CKSUM   |
2976                   DEV_RX_OFFLOAD_TCP_CKSUM   |
2977                   DEV_RX_OFFLOAD_KEEP_CRC    |
2978                   DEV_RX_OFFLOAD_JUMBO_FRAME |
2979                   DEV_RX_OFFLOAD_VLAN_FILTER |
2980                   DEV_RX_OFFLOAD_SCATTER |
2981                   DEV_RX_OFFLOAD_RSS_HASH;
2982
2983        if (hw->mac.type == ixgbe_mac_82598EB)
2984                offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
2985
2986        if (ixgbe_is_vf(dev) == 0)
2987                offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
2988
2989        /*
2990         * RSC is only supported by 82599 and x540 PF devices in a non-SR-IOV
2991         * mode.
2992         */
2993        if ((hw->mac.type == ixgbe_mac_82599EB ||
2994             hw->mac.type == ixgbe_mac_X540 ||
2995             hw->mac.type == ixgbe_mac_X550) &&
2996            !RTE_ETH_DEV_SRIOV(dev).active)
2997                offloads |= DEV_RX_OFFLOAD_TCP_LRO;
2998
2999        if (hw->mac.type == ixgbe_mac_82599EB ||
3000            hw->mac.type == ixgbe_mac_X540)
3001                offloads |= DEV_RX_OFFLOAD_MACSEC_STRIP;
3002
3003        if (hw->mac.type == ixgbe_mac_X550 ||
3004            hw->mac.type == ixgbe_mac_X550EM_x ||
3005            hw->mac.type == ixgbe_mac_X550EM_a)
3006                offloads |= DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
3007
3008#ifdef RTE_LIBRTE_SECURITY
3009        if (dev->security_ctx)
3010                offloads |= DEV_RX_OFFLOAD_SECURITY;
3011#endif
3012
3013        return offloads;
3014}
3015
3016int __rte_cold
3017ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
3018                         uint16_t queue_idx,
3019                         uint16_t nb_desc,
3020                         unsigned int socket_id,
3021                         const struct rte_eth_rxconf *rx_conf,
3022                         struct rte_mempool *mp)
3023{
3024        const struct rte_memzone *rz;
3025        struct ixgbe_rx_queue *rxq;
3026        struct ixgbe_hw     *hw;
3027        uint16_t len;
3028        struct ixgbe_adapter *adapter = dev->data->dev_private;
3029        uint64_t offloads;
3030
3031        PMD_INIT_FUNC_TRACE();
3032        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3033
3034        offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
3035
3036        /*
3037         * Validate number of receive descriptors.
3038         * It must not exceed hardware maximum, and must be multiple
3039         * of IXGBE_ALIGN.
3040         */
3041        if (nb_desc % IXGBE_RXD_ALIGN != 0 ||
3042                        (nb_desc > IXGBE_MAX_RING_DESC) ||
3043                        (nb_desc < IXGBE_MIN_RING_DESC)) {
3044                return -EINVAL;
3045        }
3046
3047        /* Free memory prior to re-allocation if needed... */
3048        if (dev->data->rx_queues[queue_idx] != NULL) {
3049                ixgbe_rx_queue_release(dev->data->rx_queues[queue_idx]);
3050                dev->data->rx_queues[queue_idx] = NULL;
3051        }
3052
3053        /* First allocate the rx queue data structure */
3054        rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct ixgbe_rx_queue),
3055                                 RTE_CACHE_LINE_SIZE, socket_id);
3056        if (rxq == NULL)
3057                return -ENOMEM;
3058        rxq->mb_pool = mp;
3059        rxq->nb_rx_desc = nb_desc;
3060        rxq->rx_free_thresh = rx_conf->rx_free_thresh;
3061        rxq->queue_id = queue_idx;
3062        rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
3063                queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
3064        rxq->port_id = dev->data->port_id;
3065        if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
3066                rxq->crc_len = RTE_ETHER_CRC_LEN;
3067        else
3068                rxq->crc_len = 0;
3069        rxq->drop_en = rx_conf->rx_drop_en;
3070        rxq->rx_deferred_start = rx_conf->rx_deferred_start;
3071        rxq->offloads = offloads;
3072
3073        /*
3074         * The packet type in RX descriptor is different for different NICs.
3075         * Some bits are used for x550 but reserved for other NICS.
3076         * So set different masks for different NICs.
3077         */
3078        if (hw->mac.type == ixgbe_mac_X550 ||
3079            hw->mac.type == ixgbe_mac_X550EM_x ||
3080            hw->mac.type == ixgbe_mac_X550EM_a ||
3081            hw->mac.type == ixgbe_mac_X550_vf ||
3082            hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3083            hw->mac.type == ixgbe_mac_X550EM_a_vf)
3084                rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_X550;
3085        else
3086                rxq->pkt_type_mask = IXGBE_PACKET_TYPE_MASK_82599;
3087
3088        /*
3089         * Allocate RX ring hardware descriptors. A memzone large enough to
3090         * handle the maximum ring size is allocated in order to allow for
3091         * resizing in later calls to the queue setup function.
3092         */
3093        rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
3094                                      RX_RING_SZ, IXGBE_ALIGN, socket_id);
3095        if (rz == NULL) {
3096                ixgbe_rx_queue_release(rxq);
3097                return -ENOMEM;
3098        }
3099
3100        /*
3101         * Zero init all the descriptors in the ring.
3102         */
3103        memset(rz->addr, 0, RX_RING_SZ);
3104
3105        /*
3106         * Modified to setup VFRDT for Virtual Function
3107         */
3108        if (hw->mac.type == ixgbe_mac_82599_vf ||
3109            hw->mac.type == ixgbe_mac_X540_vf ||
3110            hw->mac.type == ixgbe_mac_X550_vf ||
3111            hw->mac.type == ixgbe_mac_X550EM_x_vf ||
3112            hw->mac.type == ixgbe_mac_X550EM_a_vf) {
3113                rxq->rdt_reg_addr =
3114                        IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDT(queue_idx));
3115                rxq->rdh_reg_addr =
3116                        IXGBE_PCI_REG_ADDR(hw, IXGBE_VFRDH(queue_idx));
3117        } else {
3118                rxq->rdt_reg_addr =
3119                        IXGBE_PCI_REG_ADDR(hw, IXGBE_RDT(rxq->reg_idx));
3120                rxq->rdh_reg_addr =
3121                        IXGBE_PCI_REG_ADDR(hw, IXGBE_RDH(rxq->reg_idx));
3122        }
3123
3124        rxq->rx_ring_phys_addr = rz->iova;
3125        rxq->rx_ring = (union ixgbe_adv_rx_desc *) rz->addr;
3126
3127        /*
3128         * Certain constraints must be met in order to use the bulk buffer
3129         * allocation Rx burst function. If any of Rx queues doesn't meet them
3130         * the feature should be disabled for the whole port.
3131         */
3132        if (check_rx_burst_bulk_alloc_preconditions(rxq)) {
3133                PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Rx Bulk Alloc "
3134                                    "preconditions - canceling the feature for "
3135                                    "the whole port[%d]",
3136                             rxq->queue_id, rxq->port_id);
3137                adapter->rx_bulk_alloc_allowed = false;
3138        }
3139
3140        /*
3141         * Allocate software ring. Allow for space at the end of the
3142         * S/W ring to make sure look-ahead logic in bulk alloc Rx burst
3143         * function does not access an invalid memory region.
3144         */
3145        len = nb_desc;
3146        if (adapter->rx_bulk_alloc_allowed)
3147                len += RTE_PMD_IXGBE_RX_MAX_BURST;
3148
3149        rxq->sw_ring = rte_zmalloc_socket("rxq->sw_ring",
3150                                          sizeof(struct ixgbe_rx_entry) * len,
3151                                          RTE_CACHE_LINE_SIZE, socket_id);
3152        if (!rxq->sw_ring) {
3153                ixgbe_rx_queue_release(rxq);
3154                return -ENOMEM;
3155        }
3156
3157        /*
3158         * Always allocate even if it's not going to be needed in order to
3159         * simplify the code.
3160         *
3161         * This ring is used in LRO and Scattered Rx cases and Scattered Rx may
3162         * be requested in ixgbe_dev_rx_init(), which is called later from
3163         * dev_start() flow.
3164         */
3165        rxq->sw_sc_ring =
3166                rte_zmalloc_socket("rxq->sw_sc_ring",
3167                                   sizeof(struct ixgbe_scattered_rx_entry) * len,
3168                                   RTE_CACHE_LINE_SIZE, socket_id);
3169        if (!rxq->sw_sc_ring) {
3170                ixgbe_rx_queue_release(rxq);
3171                return -ENOMEM;
3172        }
3173
3174        PMD_INIT_LOG(DEBUG, "sw_ring=%p sw_sc_ring=%p hw_ring=%p "
3175                            "dma_addr=0x%"PRIx64,
3176                     rxq->sw_ring, rxq->sw_sc_ring, rxq->rx_ring,
3177                     rxq->rx_ring_phys_addr);
3178
3179        if (!rte_is_power_of_2(nb_desc)) {
3180                PMD_INIT_LOG(DEBUG, "queue[%d] doesn't meet Vector Rx "
3181                                    "preconditions - canceling the feature for "
3182                                    "the whole port[%d]",
3183                             rxq->queue_id, rxq->port_id);
3184                adapter->rx_vec_allowed = false;
3185        } else
3186                ixgbe_rxq_vec_setup(rxq);
3187
3188        dev->data->rx_queues[queue_idx] = rxq;
3189
3190        ixgbe_reset_rx_queue(adapter, rxq);
3191
3192        return 0;
3193}
3194
3195uint32_t
3196ixgbe_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
3197{
3198#define IXGBE_RXQ_SCAN_INTERVAL 4
3199        volatile union ixgbe_adv_rx_desc *rxdp;
3200        struct ixgbe_rx_queue *rxq;
3201        uint32_t desc = 0;
3202
3203        rxq = dev->data->rx_queues[rx_queue_id];
3204        rxdp = &(rxq->rx_ring[rxq->rx_tail]);
3205
3206        while ((desc < rxq->nb_rx_desc) &&
3207                (rxdp->wb.upper.status_error &
3208                        rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) {
3209                desc += IXGBE_RXQ_SCAN_INTERVAL;
3210                rxdp += IXGBE_RXQ_SCAN_INTERVAL;
3211                if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
3212                        rxdp = &(rxq->rx_ring[rxq->rx_tail +
3213                                desc - rxq->nb_rx_desc]);
3214        }
3215
3216        return desc;
3217}
3218
3219int
3220ixgbe_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
3221{
3222        volatile union ixgbe_adv_rx_desc *rxdp;
3223        struct ixgbe_rx_queue *rxq = rx_queue;
3224        uint32_t desc;
3225
3226        if (unlikely(offset >= rxq->nb_rx_desc))
3227                return 0;
3228        desc = rxq->rx_tail + offset;
3229        if (desc >= rxq->nb_rx_desc)
3230                desc -= rxq->nb_rx_desc;
3231
3232        rxdp = &rxq->rx_ring[desc];
3233        return !!(rxdp->wb.upper.status_error &
3234                        rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD));
3235}
3236
3237int
3238ixgbe_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
3239{
3240        struct ixgbe_rx_queue *rxq = rx_queue;
3241        volatile uint32_t *status;
3242        uint32_t nb_hold, desc;
3243
3244        if (unlikely(offset >= rxq->nb_rx_desc))
3245                return -EINVAL;
3246
3247#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
3248        if (rxq->rx_using_sse)
3249                nb_hold = rxq->rxrearm_nb;
3250        else
3251#endif
3252                nb_hold = rxq->nb_rx_hold;
3253        if (offset >= rxq->nb_rx_desc - nb_hold)
3254                return RTE_ETH_RX_DESC_UNAVAIL;
3255
3256        desc = rxq->rx_tail + offset;
3257        if (desc >= rxq->nb_rx_desc)
3258                desc -= rxq->nb_rx_desc;
3259
3260        status = &rxq->rx_ring[desc].wb.upper.status_error;
3261        if (*status & rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))
3262                return RTE_ETH_RX_DESC_DONE;
3263
3264        return RTE_ETH_RX_DESC_AVAIL;
3265}
3266
3267int
3268ixgbe_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
3269{
3270        struct ixgbe_tx_queue *txq = tx_queue;
3271        volatile uint32_t *status;
3272        uint32_t desc;
3273
3274        if (unlikely(offset >= txq->nb_tx_desc))
3275                return -EINVAL;
3276
3277        desc = txq->tx_tail + offset;
3278        /* go to next desc that has the RS bit */
3279        desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
3280                txq->tx_rs_thresh;
3281        if (desc >= txq->nb_tx_desc) {
3282                desc -= txq->nb_tx_desc;
3283                if (desc >= txq->nb_tx_desc)
3284                        desc -= txq->nb_tx_desc;
3285        }
3286
3287        status = &txq->tx_ring[desc].wb.status;
3288        if (*status & rte_cpu_to_le_32(IXGBE_ADVTXD_STAT_DD))
3289                return RTE_ETH_TX_DESC_DONE;
3290
3291        return RTE_ETH_TX_DESC_FULL;
3292}
3293
3294/*
3295 * Set up link loopback for X540/X550 mode Tx->Rx.
3296 */
3297static inline void __rte_cold
3298ixgbe_setup_loopback_link_x540_x550(struct ixgbe_hw *hw, bool enable)
3299{
3300        uint32_t macc;
3301        PMD_INIT_FUNC_TRACE();
3302
3303        u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
3304
3305        hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3306                             IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg);
3307        macc = IXGBE_READ_REG(hw, IXGBE_MACC);
3308
3309        if (enable) {
3310                /* datasheet 15.2.1: disable AUTONEG (PHY Bit 7.0.C) */
3311                autoneg_reg |= IXGBE_MII_AUTONEG_ENABLE;
3312                /* datasheet 15.2.1: MACC.FLU = 1 (force link up) */
3313                macc |= IXGBE_MACC_FLU;
3314        } else {
3315                autoneg_reg &= ~IXGBE_MII_AUTONEG_ENABLE;
3316                macc &= ~IXGBE_MACC_FLU;
3317        }
3318
3319        hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL,
3320                              IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg);
3321
3322        IXGBE_WRITE_REG(hw, IXGBE_MACC, macc);
3323}
3324
3325void __rte_cold
3326ixgbe_dev_clear_queues(struct rte_eth_dev *dev)
3327{
3328        unsigned i;
3329        struct ixgbe_adapter *adapter = dev->data->dev_private;
3330        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3331
3332        PMD_INIT_FUNC_TRACE();
3333
3334        for (i = 0; i < dev->data->nb_tx_queues; i++) {
3335                struct ixgbe_tx_queue *txq = dev->data->tx_queues[i];
3336
3337                if (txq != NULL) {
3338                        txq->ops->release_mbufs(txq);
3339                        txq->ops->reset(txq);
3340                }
3341        }
3342
3343        for (i = 0; i < dev->data->nb_rx_queues; i++) {
3344                struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
3345
3346                if (rxq != NULL) {
3347                        ixgbe_rx_queue_release_mbufs(rxq);
3348                        ixgbe_reset_rx_queue(adapter, rxq);
3349                }
3350        }
3351        /* If loopback mode was enabled, reconfigure the link accordingly */
3352        if (dev->data->dev_conf.lpbk_mode != 0) {
3353                if (hw->mac.type == ixgbe_mac_X540 ||
3354                     hw->mac.type == ixgbe_mac_X550 ||
3355                     hw->mac.type == ixgbe_mac_X550EM_x ||
3356                     hw->mac.type == ixgbe_mac_X550EM_a)
3357                        ixgbe_setup_loopback_link_x540_x550(hw, false);
3358        }
3359}
3360
3361void
3362ixgbe_dev_free_queues(struct rte_eth_dev *dev)
3363{
3364        unsigned i;
3365
3366        PMD_INIT_FUNC_TRACE();
3367
3368        for (i = 0; i < dev->data->nb_rx_queues; i++) {
3369                ixgbe_dev_rx_queue_release(dev->data->rx_queues[i]);
3370                dev->data->rx_queues[i] = NULL;
3371                rte_eth_dma_zone_free(dev, "rx_ring", i);
3372        }
3373        dev->data->nb_rx_queues = 0;
3374
3375        for (i = 0; i < dev->data->nb_tx_queues; i++) {
3376                ixgbe_dev_tx_queue_release(dev->data->tx_queues[i]);
3377                dev->data->tx_queues[i] = NULL;
3378                rte_eth_dma_zone_free(dev, "tx_ring", i);
3379        }
3380        dev->data->nb_tx_queues = 0;
3381}
3382
3383/*********************************************************************
3384 *
3385 *  Device RX/TX init functions
3386 *
3387 **********************************************************************/
3388
3389/**
3390 * Receive Side Scaling (RSS)
3391 * See section 7.1.2.8 in the following document:
3392 *     "Intel 82599 10 GbE Controller Datasheet" - Revision 2.1 October 2009
3393 *
3394 * Principles:
3395 * The source and destination IP addresses of the IP header and the source
3396 * and destination ports of TCP/UDP headers, if any, of received packets are
3397 * hashed against a configurable random key to compute a 32-bit RSS hash result.
3398 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
3399 * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
3400 * RSS output index which is used as the RX queue index where to store the
3401 * received packets.
3402 * The following output is supplied in the RX write-back descriptor:
3403 *     - 32-bit result of the Microsoft RSS hash function,
3404 *     - 4-bit RSS type field.
3405 */
3406
3407/*
3408 * RSS random key supplied in section 7.1.2.8.3 of the Intel 82599 datasheet.
3409 * Used as the default key.
3410 */
3411static uint8_t rss_intel_key[40] = {
3412        0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
3413        0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
3414        0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3415        0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
3416        0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
3417};
3418
3419static void
3420ixgbe_rss_disable(struct rte_eth_dev *dev)
3421{
3422        struct ixgbe_hw *hw;
3423        uint32_t mrqc;
3424        uint32_t mrqc_reg;
3425
3426        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3427        mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3428        mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3429        mrqc &= ~IXGBE_MRQC_RSSEN;
3430        IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3431}
3432
3433static void
3434ixgbe_hw_rss_hash_set(struct ixgbe_hw *hw, struct rte_eth_rss_conf *rss_conf)
3435{
3436        uint8_t  *hash_key;
3437        uint32_t mrqc;
3438        uint32_t rss_key;
3439        uint64_t rss_hf;
3440        uint16_t i;
3441        uint32_t mrqc_reg;
3442        uint32_t rssrk_reg;
3443
3444        mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3445        rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3446
3447        hash_key = rss_conf->rss_key;
3448        if (hash_key != NULL) {
3449                /* Fill in RSS hash key */
3450                for (i = 0; i < 10; i++) {
3451                        rss_key  = hash_key[(i * 4)];
3452                        rss_key |= hash_key[(i * 4) + 1] << 8;
3453                        rss_key |= hash_key[(i * 4) + 2] << 16;
3454                        rss_key |= hash_key[(i * 4) + 3] << 24;
3455                        IXGBE_WRITE_REG_ARRAY(hw, rssrk_reg, i, rss_key);
3456                }
3457        }
3458
3459        /* Set configured hashing protocols in MRQC register */
3460        rss_hf = rss_conf->rss_hf;
3461        mrqc = IXGBE_MRQC_RSSEN; /* Enable RSS */
3462        if (rss_hf & ETH_RSS_IPV4)
3463                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
3464        if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
3465                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
3466        if (rss_hf & ETH_RSS_IPV6)
3467                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
3468        if (rss_hf & ETH_RSS_IPV6_EX)
3469                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
3470        if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
3471                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
3472        if (rss_hf & ETH_RSS_IPV6_TCP_EX)
3473                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
3474        if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
3475                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
3476        if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
3477                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
3478        if (rss_hf & ETH_RSS_IPV6_UDP_EX)
3479                mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
3480        IXGBE_WRITE_REG(hw, mrqc_reg, mrqc);
3481}
3482
3483int
3484ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev,
3485                          struct rte_eth_rss_conf *rss_conf)
3486{
3487        struct ixgbe_hw *hw;
3488        uint32_t mrqc;
3489        uint64_t rss_hf;
3490        uint32_t mrqc_reg;
3491
3492        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3493
3494        if (!ixgbe_rss_update_sp(hw->mac.type)) {
3495                PMD_DRV_LOG(ERR, "RSS hash update is not supported on this "
3496                        "NIC.");
3497                return -ENOTSUP;
3498        }
3499        mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3500
3501        /*
3502         * Excerpt from section 7.1.2.8 Receive-Side Scaling (RSS):
3503         *     "RSS enabling cannot be done dynamically while it must be
3504         *      preceded by a software reset"
3505         * Before changing anything, first check that the update RSS operation
3506         * does not attempt to disable RSS, if RSS was enabled at
3507         * initialization time, or does not attempt to enable RSS, if RSS was
3508         * disabled at initialization time.
3509         */
3510        rss_hf = rss_conf->rss_hf & IXGBE_RSS_OFFLOAD_ALL;
3511        mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3512        if (!(mrqc & IXGBE_MRQC_RSSEN)) { /* RSS disabled */
3513                if (rss_hf != 0) /* Enable RSS */
3514                        return -(EINVAL);
3515                return 0; /* Nothing to do */
3516        }
3517        /* RSS enabled */
3518        if (rss_hf == 0) /* Disable RSS */
3519                return -(EINVAL);
3520        ixgbe_hw_rss_hash_set(hw, rss_conf);
3521        return 0;
3522}
3523
3524int
3525ixgbe_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
3526                            struct rte_eth_rss_conf *rss_conf)
3527{
3528        struct ixgbe_hw *hw;
3529        uint8_t *hash_key;
3530        uint32_t mrqc;
3531        uint32_t rss_key;
3532        uint64_t rss_hf;
3533        uint16_t i;
3534        uint32_t mrqc_reg;
3535        uint32_t rssrk_reg;
3536
3537        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3538        mrqc_reg = ixgbe_mrqc_reg_get(hw->mac.type);
3539        rssrk_reg = ixgbe_rssrk_reg_get(hw->mac.type, 0);
3540        hash_key = rss_conf->rss_key;
3541        if (hash_key != NULL) {
3542                /* Return RSS hash key */
3543                for (i = 0; i < 10; i++) {
3544                        rss_key = IXGBE_READ_REG_ARRAY(hw, rssrk_reg, i);
3545                        hash_key[(i * 4)] = rss_key & 0x000000FF;
3546                        hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
3547                        hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
3548                        hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
3549                }
3550        }
3551
3552        /* Get RSS functions configured in MRQC register */
3553        mrqc = IXGBE_READ_REG(hw, mrqc_reg);
3554        if ((mrqc & IXGBE_MRQC_RSSEN) == 0) { /* RSS is disabled */
3555                rss_conf->rss_hf = 0;
3556                return 0;
3557        }
3558        rss_hf = 0;
3559        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4)
3560                rss_hf |= ETH_RSS_IPV4;
3561        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_TCP)
3562                rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
3563        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6)
3564                rss_hf |= ETH_RSS_IPV6;
3565        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX)
3566                rss_hf |= ETH_RSS_IPV6_EX;
3567        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_TCP)
3568                rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
3569        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP)
3570                rss_hf |= ETH_RSS_IPV6_TCP_EX;
3571        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV4_UDP)
3572                rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
3573        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_UDP)
3574                rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
3575        if (mrqc & IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP)
3576                rss_hf |= ETH_RSS_IPV6_UDP_EX;
3577        rss_conf->rss_hf = rss_hf;
3578        return 0;
3579}
3580
3581static void
3582ixgbe_rss_configure(struct rte_eth_dev *dev)
3583{
3584        struct rte_eth_rss_conf rss_conf;
3585        struct ixgbe_adapter *adapter;
3586        struct ixgbe_hw *hw;
3587        uint32_t reta;
3588        uint16_t i;
3589        uint16_t j;
3590        uint16_t sp_reta_size;
3591        uint32_t reta_reg;
3592
3593        PMD_INIT_FUNC_TRACE();
3594        adapter = dev->data->dev_private;
3595        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3596
3597        sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
3598
3599        /*
3600         * Fill in redirection table
3601         * The byte-swap is needed because NIC registers are in
3602         * little-endian order.
3603         */
3604        if (adapter->rss_reta_updated == 0) {
3605                reta = 0;
3606                for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
3607                        reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
3608
3609                        if (j == dev->data->nb_rx_queues)
3610                                j = 0;
3611                        reta = (reta << 8) | j;
3612                        if ((i & 3) == 3)
3613                                IXGBE_WRITE_REG(hw, reta_reg,
3614                                                rte_bswap32(reta));
3615                }
3616        }
3617
3618        /*
3619         * Configure the RSS key and the RSS protocols used to compute
3620         * the RSS hash of input packets.
3621         */
3622        rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
3623        if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
3624                ixgbe_rss_disable(dev);
3625                return;
3626        }
3627        if (rss_conf.rss_key == NULL)
3628                rss_conf.rss_key = rss_intel_key; /* Default hash key */
3629        ixgbe_hw_rss_hash_set(hw, &rss_conf);
3630}
3631
3632#define NUM_VFTA_REGISTERS 128
3633#define NIC_RX_BUFFER_SIZE 0x200
3634#define X550_RX_BUFFER_SIZE 0x180
3635
3636static void
3637ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev)
3638{
3639        struct rte_eth_vmdq_dcb_conf *cfg;
3640        struct ixgbe_hw *hw;
3641        enum rte_eth_nb_pools num_pools;
3642        uint32_t mrqc, vt_ctl, queue_mapping, vlanctrl;
3643        uint16_t pbsize;
3644        uint8_t nb_tcs; /* number of traffic classes */
3645        int i;
3646
3647        PMD_INIT_FUNC_TRACE();
3648        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3649        cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3650        num_pools = cfg->nb_queue_pools;
3651        /* Check we have a valid number of pools */
3652        if (num_pools != ETH_16_POOLS && num_pools != ETH_32_POOLS) {
3653                ixgbe_rss_disable(dev);
3654                return;
3655        }
3656        /* 16 pools -> 8 traffic classes, 32 pools -> 4 traffic classes */
3657        nb_tcs = (uint8_t)(ETH_VMDQ_DCB_NUM_QUEUES / (int)num_pools);
3658
3659        /*
3660         * RXPBSIZE
3661         * split rx buffer up into sections, each for 1 traffic class
3662         */
3663        switch (hw->mac.type) {
3664        case ixgbe_mac_X550:
3665        case ixgbe_mac_X550EM_x:
3666        case ixgbe_mac_X550EM_a:
3667                pbsize = (uint16_t)(X550_RX_BUFFER_SIZE / nb_tcs);
3668                break;
3669        default:
3670                pbsize = (uint16_t)(NIC_RX_BUFFER_SIZE / nb_tcs);
3671                break;
3672        }
3673        for (i = 0; i < nb_tcs; i++) {
3674                uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3675
3676                rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3677                /* clear 10 bits. */
3678                rxpbsize |= (pbsize << IXGBE_RXPBSIZE_SHIFT); /* set value */
3679                IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3680        }
3681        /* zero alloc all unused TCs */
3682        for (i = nb_tcs; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3683                uint32_t rxpbsize = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
3684
3685                rxpbsize &= (~(0x3FF << IXGBE_RXPBSIZE_SHIFT));
3686                /* clear 10 bits. */
3687                IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
3688        }
3689
3690        /* MRQC: enable vmdq and dcb */
3691        mrqc = (num_pools == ETH_16_POOLS) ?
3692                IXGBE_MRQC_VMDQRT8TCEN : IXGBE_MRQC_VMDQRT4TCEN;
3693        IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
3694
3695        /* PFVTCTL: turn on virtualisation and set the default pool */
3696        vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
3697        if (cfg->enable_default_pool) {
3698                vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
3699        } else {
3700                vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
3701        }
3702
3703        IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
3704
3705        /* RTRUP2TC: mapping user priorities to traffic classes (TCs) */
3706        queue_mapping = 0;
3707        for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++)
3708                /*
3709                 * mapping is done with 3 bits per priority,
3710                 * so shift by i*3 each time
3711                 */
3712                queue_mapping |= ((cfg->dcb_tc[i] & 0x07) << (i * 3));
3713
3714        IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, queue_mapping);
3715
3716        /* RTRPCS: DCB related */
3717        IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, IXGBE_RMCS_RRM);
3718
3719        /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
3720        vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
3721        vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
3722        IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
3723
3724        /* VFTA - enable all vlan filters */
3725        for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
3726                IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
3727        }
3728
3729        /* VFRE: pool enabling for receive - 16 or 32 */
3730        IXGBE_WRITE_REG(hw, IXGBE_VFRE(0),
3731                        num_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3732
3733        /*
3734         * MPSAR - allow pools to read specific mac addresses
3735         * In this case, all pools should be able to read from mac addr 0
3736         */
3737        IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), 0xFFFFFFFF);
3738        IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), 0xFFFFFFFF);
3739
3740        /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
3741        for (i = 0; i < cfg->nb_pool_maps; i++) {
3742                /* set vlan id in VF register and set the valid bit */
3743                IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
3744                                (cfg->pool_map[i].vlan_id & 0xFFF)));
3745                /*
3746                 * Put the allowed pools in VFB reg. As we only have 16 or 32
3747                 * pools, we only need to use the first half of the register
3748                 * i.e. bits 0-31
3749                 */
3750                IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i*2), cfg->pool_map[i].pools);
3751        }
3752}
3753
3754/**
3755 * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters
3756 * @dev: pointer to eth_dev structure
3757 * @dcb_config: pointer to ixgbe_dcb_config structure
3758 */
3759static void
3760ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev,
3761                       struct ixgbe_dcb_config *dcb_config)
3762{
3763        uint32_t reg;
3764        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3765
3766        PMD_INIT_FUNC_TRACE();
3767        if (hw->mac.type != ixgbe_mac_82598EB) {
3768                /* Disable the Tx desc arbiter so that MTQC can be changed */
3769                reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3770                reg |= IXGBE_RTTDCS_ARBDIS;
3771                IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3772
3773                /* Enable DCB for Tx with 8 TCs */
3774                if (dcb_config->num_tcs.pg_tcs == 8) {
3775                        reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
3776                } else {
3777                        reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
3778                }
3779                if (dcb_config->vt_mode)
3780                        reg |= IXGBE_MTQC_VT_ENA;
3781                IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
3782
3783                /* Enable the Tx desc arbiter */
3784                reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3785                reg &= ~IXGBE_RTTDCS_ARBDIS;
3786                IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
3787
3788                /* Enable Security TX Buffer IFG for DCB */
3789                reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
3790                reg |= IXGBE_SECTX_DCB;
3791                IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
3792        }
3793}
3794
3795/**
3796 * ixgbe_vmdq_dcb_hw_tx_config - Configure general VMDQ+DCB TX parameters
3797 * @dev: pointer to rte_eth_dev structure
3798 * @dcb_config: pointer to ixgbe_dcb_config structure
3799 */
3800static void
3801ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev,
3802                        struct ixgbe_dcb_config *dcb_config)
3803{
3804        struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3805                        &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3806        struct ixgbe_hw *hw =
3807                        IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3808
3809        PMD_INIT_FUNC_TRACE();
3810        if (hw->mac.type != ixgbe_mac_82598EB)
3811                /*PF VF Transmit Enable*/
3812                IXGBE_WRITE_REG(hw, IXGBE_VFTE(0),
3813                        vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF);
3814
3815        /*Configure general DCB TX parameters*/
3816        ixgbe_dcb_tx_hw_config(dev, dcb_config);
3817}
3818
3819static void
3820ixgbe_vmdq_dcb_rx_config(struct rte_eth_dev *dev,
3821                        struct ixgbe_dcb_config *dcb_config)
3822{
3823        struct rte_eth_vmdq_dcb_conf *vmdq_rx_conf =
3824                        &dev->data->dev_conf.rx_adv_conf.vmdq_dcb_conf;
3825        struct ixgbe_dcb_tc_config *tc;
3826        uint8_t i, j;
3827
3828        /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3829        if (vmdq_rx_conf->nb_queue_pools == ETH_16_POOLS) {
3830                dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3831                dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3832        } else {
3833                dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3834                dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3835        }
3836
3837        /* Initialize User Priority to Traffic Class mapping */
3838        for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3839                tc = &dcb_config->tc_config[j];
3840                tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3841        }
3842
3843        /* User Priority to Traffic Class mapping */
3844        for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3845                j = vmdq_rx_conf->dcb_tc[i];
3846                tc = &dcb_config->tc_config[j];
3847                tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3848                                                (uint8_t)(1 << i);
3849        }
3850}
3851
3852static void
3853ixgbe_dcb_vt_tx_config(struct rte_eth_dev *dev,
3854                        struct ixgbe_dcb_config *dcb_config)
3855{
3856        struct rte_eth_vmdq_dcb_tx_conf *vmdq_tx_conf =
3857                        &dev->data->dev_conf.tx_adv_conf.vmdq_dcb_tx_conf;
3858        struct ixgbe_dcb_tc_config *tc;
3859        uint8_t i, j;
3860
3861        /* convert rte_eth_conf.rx_adv_conf to struct ixgbe_dcb_config */
3862        if (vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS) {
3863                dcb_config->num_tcs.pg_tcs = ETH_8_TCS;
3864                dcb_config->num_tcs.pfc_tcs = ETH_8_TCS;
3865        } else {
3866                dcb_config->num_tcs.pg_tcs = ETH_4_TCS;
3867                dcb_config->num_tcs.pfc_tcs = ETH_4_TCS;
3868        }
3869
3870        /* Initialize User Priority to Traffic Class mapping */
3871        for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3872                tc = &dcb_config->tc_config[j];
3873                tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3874        }
3875
3876        /* User Priority to Traffic Class mapping */
3877        for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3878                j = vmdq_tx_conf->dcb_tc[i];
3879                tc = &dcb_config->tc_config[j];
3880                tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3881                                                (uint8_t)(1 << i);
3882        }
3883}
3884
3885static void
3886ixgbe_dcb_rx_config(struct rte_eth_dev *dev,
3887                struct ixgbe_dcb_config *dcb_config)
3888{
3889        struct rte_eth_dcb_rx_conf *rx_conf =
3890                        &dev->data->dev_conf.rx_adv_conf.dcb_rx_conf;
3891        struct ixgbe_dcb_tc_config *tc;
3892        uint8_t i, j;
3893
3894        dcb_config->num_tcs.pg_tcs = (uint8_t)rx_conf->nb_tcs;
3895        dcb_config->num_tcs.pfc_tcs = (uint8_t)rx_conf->nb_tcs;
3896
3897        /* Initialize User Priority to Traffic Class mapping */
3898        for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3899                tc = &dcb_config->tc_config[j];
3900                tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0;
3901        }
3902
3903        /* User Priority to Traffic Class mapping */
3904        for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3905                j = rx_conf->dcb_tc[i];
3906                tc = &dcb_config->tc_config[j];
3907                tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap |=
3908                                                (uint8_t)(1 << i);
3909        }
3910}
3911
3912static void
3913ixgbe_dcb_tx_config(struct rte_eth_dev *dev,
3914                struct ixgbe_dcb_config *dcb_config)
3915{
3916        struct rte_eth_dcb_tx_conf *tx_conf =
3917                        &dev->data->dev_conf.tx_adv_conf.dcb_tx_conf;
3918        struct ixgbe_dcb_tc_config *tc;
3919        uint8_t i, j;
3920
3921        dcb_config->num_tcs.pg_tcs = (uint8_t)tx_conf->nb_tcs;
3922        dcb_config->num_tcs.pfc_tcs = (uint8_t)tx_conf->nb_tcs;
3923
3924        /* Initialize User Priority to Traffic Class mapping */
3925        for (j = 0; j < IXGBE_DCB_MAX_TRAFFIC_CLASS; j++) {
3926                tc = &dcb_config->tc_config[j];
3927                tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0;
3928        }
3929
3930        /* User Priority to Traffic Class mapping */
3931        for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
3932                j = tx_conf->dcb_tc[i];
3933                tc = &dcb_config->tc_config[j];
3934                tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap |=
3935                                                (uint8_t)(1 << i);
3936        }
3937}
3938
3939/**
3940 * ixgbe_dcb_rx_hw_config - Configure general DCB RX HW parameters
3941 * @dev: pointer to eth_dev structure
3942 * @dcb_config: pointer to ixgbe_dcb_config structure
3943 */
3944static void
3945ixgbe_dcb_rx_hw_config(struct rte_eth_dev *dev,
3946                       struct ixgbe_dcb_config *dcb_config)
3947{
3948        uint32_t reg;
3949        uint32_t vlanctrl;
3950        uint8_t i;
3951        uint32_t q;
3952        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3953
3954        PMD_INIT_FUNC_TRACE();
3955        /*
3956         * Disable the arbiter before changing parameters
3957         * (always enable recycle mode; WSP)
3958         */
3959        reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
3960        IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
3961
3962        if (hw->mac.type != ixgbe_mac_82598EB) {
3963                reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
3964                if (dcb_config->num_tcs.pg_tcs == 4) {
3965                        if (dcb_config->vt_mode)
3966                                reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3967                                        IXGBE_MRQC_VMDQRT4TCEN;
3968                        else {
3969                                /* no matter the mode is DCB or DCB_RSS, just
3970                                 * set the MRQE to RSSXTCEN. RSS is controlled
3971                                 * by RSS_FIELD
3972                                 */
3973                                IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3974                                reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3975                                        IXGBE_MRQC_RTRSS4TCEN;
3976                        }
3977                }
3978                if (dcb_config->num_tcs.pg_tcs == 8) {
3979                        if (dcb_config->vt_mode)
3980                                reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3981                                        IXGBE_MRQC_VMDQRT8TCEN;
3982                        else {
3983                                IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
3984                                reg = (reg & ~IXGBE_MRQC_MRQE_MASK) |
3985                                        IXGBE_MRQC_RTRSS8TCEN;
3986                        }
3987                }
3988
3989                IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
3990
3991                if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
3992                        /* Disable drop for all queues in VMDQ mode*/
3993                        for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
3994                                IXGBE_WRITE_REG(hw, IXGBE_QDE,
3995                                                (IXGBE_QDE_WRITE |
3996                                                 (q << IXGBE_QDE_IDX_SHIFT)));
3997                } else {
3998                        /* Enable drop for all queues in SRIOV mode */
3999                        for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4000                                IXGBE_WRITE_REG(hw, IXGBE_QDE,
4001                                                (IXGBE_QDE_WRITE |
4002                                                 (q << IXGBE_QDE_IDX_SHIFT) |
4003                                                 IXGBE_QDE_ENABLE));
4004                }
4005        }
4006
4007        /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4008        vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4009        vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4010        IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4011
4012        /* VFTA - enable all vlan filters */
4013        for (i = 0; i < NUM_VFTA_REGISTERS; i++) {
4014                IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), 0xFFFFFFFF);
4015        }
4016
4017        /*
4018         * Configure Rx packet plane (recycle mode; WSP) and
4019         * enable arbiter
4020         */
4021        reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
4022        IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
4023}
4024
4025static void
4026ixgbe_dcb_hw_arbite_rx_config(struct ixgbe_hw *hw, uint16_t *refill,
4027                        uint16_t *max, uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4028{
4029        switch (hw->mac.type) {
4030        case ixgbe_mac_82598EB:
4031                ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, tsa);
4032                break;
4033        case ixgbe_mac_82599EB:
4034        case ixgbe_mac_X540:
4035        case ixgbe_mac_X550:
4036        case ixgbe_mac_X550EM_x:
4037        case ixgbe_mac_X550EM_a:
4038                ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
4039                                                  tsa, map);
4040                break;
4041        default:
4042                break;
4043        }
4044}
4045
4046static void
4047ixgbe_dcb_hw_arbite_tx_config(struct ixgbe_hw *hw, uint16_t *refill, uint16_t *max,
4048                            uint8_t *bwg_id, uint8_t *tsa, uint8_t *map)
4049{
4050        switch (hw->mac.type) {
4051        case ixgbe_mac_82598EB:
4052                ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max, bwg_id, tsa);
4053                ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max, bwg_id, tsa);
4054                break;
4055        case ixgbe_mac_82599EB:
4056        case ixgbe_mac_X540:
4057        case ixgbe_mac_X550:
4058        case ixgbe_mac_X550EM_x:
4059        case ixgbe_mac_X550EM_a:
4060                ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max, bwg_id, tsa);
4061                ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id, tsa, map);
4062                break;
4063        default:
4064                break;
4065        }
4066}
4067
4068#define DCB_RX_CONFIG  1
4069#define DCB_TX_CONFIG  1
4070#define DCB_TX_PB      1024
4071/**
4072 * ixgbe_dcb_hw_configure - Enable DCB and configure
4073 * general DCB in VT mode and non-VT mode parameters
4074 * @dev: pointer to rte_eth_dev structure
4075 * @dcb_config: pointer to ixgbe_dcb_config structure
4076 */
4077static int
4078ixgbe_dcb_hw_configure(struct rte_eth_dev *dev,
4079                        struct ixgbe_dcb_config *dcb_config)
4080{
4081        int     ret = 0;
4082        uint8_t i, pfc_en, nb_tcs;
4083        uint16_t pbsize, rx_buffer_size;
4084        uint8_t config_dcb_rx = 0;
4085        uint8_t config_dcb_tx = 0;
4086        uint8_t tsa[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4087        uint8_t bwgid[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4088        uint16_t refill[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4089        uint16_t max[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4090        uint8_t map[IXGBE_DCB_MAX_TRAFFIC_CLASS] = {0};
4091        struct ixgbe_dcb_tc_config *tc;
4092        uint32_t max_frame = dev->data->mtu + RTE_ETHER_HDR_LEN +
4093                RTE_ETHER_CRC_LEN;
4094        struct ixgbe_hw *hw =
4095                        IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4096        struct ixgbe_bw_conf *bw_conf =
4097                IXGBE_DEV_PRIVATE_TO_BW_CONF(dev->data->dev_private);
4098
4099        switch (dev->data->dev_conf.rxmode.mq_mode) {
4100        case ETH_MQ_RX_VMDQ_DCB:
4101                dcb_config->vt_mode = true;
4102                if (hw->mac.type != ixgbe_mac_82598EB) {
4103                        config_dcb_rx = DCB_RX_CONFIG;
4104                        /*
4105                         *get dcb and VT rx configuration parameters
4106                         *from rte_eth_conf
4107                         */
4108                        ixgbe_vmdq_dcb_rx_config(dev, dcb_config);
4109                        /*Configure general VMDQ and DCB RX parameters*/
4110                        ixgbe_vmdq_dcb_configure(dev);
4111                }
4112                break;
4113        case ETH_MQ_RX_DCB:
4114        case ETH_MQ_RX_DCB_RSS:
4115                dcb_config->vt_mode = false;
4116                config_dcb_rx = DCB_RX_CONFIG;
4117                /* Get dcb TX configuration parameters from rte_eth_conf */
4118                ixgbe_dcb_rx_config(dev, dcb_config);
4119                /*Configure general DCB RX parameters*/
4120                ixgbe_dcb_rx_hw_config(dev, dcb_config);
4121                break;
4122        default:
4123                PMD_INIT_LOG(ERR, "Incorrect DCB RX mode configuration");
4124                break;
4125        }
4126        switch (dev->data->dev_conf.txmode.mq_mode) {
4127        case ETH_MQ_TX_VMDQ_DCB:
4128                dcb_config->vt_mode = true;
4129                config_dcb_tx = DCB_TX_CONFIG;
4130                /* get DCB and VT TX configuration parameters
4131                 * from rte_eth_conf
4132                 */
4133                ixgbe_dcb_vt_tx_config(dev, dcb_config);
4134                /*Configure general VMDQ and DCB TX parameters*/
4135                ixgbe_vmdq_dcb_hw_tx_config(dev, dcb_config);
4136                break;
4137
4138        case ETH_MQ_TX_DCB:
4139                dcb_config->vt_mode = false;
4140                config_dcb_tx = DCB_TX_CONFIG;
4141                /*get DCB TX configuration parameters from rte_eth_conf*/
4142                ixgbe_dcb_tx_config(dev, dcb_config);
4143                /*Configure general DCB TX parameters*/
4144                ixgbe_dcb_tx_hw_config(dev, dcb_config);
4145                break;
4146        default:
4147                PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration");
4148                break;
4149        }
4150
4151        nb_tcs = dcb_config->num_tcs.pfc_tcs;
4152        /* Unpack map */
4153        ixgbe_dcb_unpack_map_cee(dcb_config, IXGBE_DCB_RX_CONFIG, map);
4154        if (nb_tcs == ETH_4_TCS) {
4155                /* Avoid un-configured priority mapping to TC0 */
4156                uint8_t j = 4;
4157                uint8_t mask = 0xFF;
4158
4159                for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES - 4; i++)
4160                        mask = (uint8_t)(mask & (~(1 << map[i])));
4161                for (i = 0; mask && (i < IXGBE_DCB_MAX_TRAFFIC_CLASS); i++) {
4162                        if ((mask & 0x1) && (j < ETH_DCB_NUM_USER_PRIORITIES))
4163                                map[j++] = i;
4164                        mask >>= 1;
4165                }
4166                /* Re-configure 4 TCs BW */
4167                for (i = 0; i < nb_tcs; i++) {
4168                        tc = &dcb_config->tc_config[i];
4169                        if (bw_conf->tc_num != nb_tcs)
4170                                tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4171                                        (uint8_t)(100 / nb_tcs);
4172                        tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4173                                                (uint8_t)(100 / nb_tcs);
4174                }
4175                for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) {
4176                        tc = &dcb_config->tc_config[i];
4177                        tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 0;
4178                        tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 0;
4179                }
4180        } else {
4181                /* Re-configure 8 TCs BW */
4182                for (i = 0; i < nb_tcs; i++) {
4183                        tc = &dcb_config->tc_config[i];
4184                        if (bw_conf->tc_num != nb_tcs)
4185                                tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent =
4186                                        (uint8_t)(100 / nb_tcs + (i & 1));
4187                        tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent =
4188                                (uint8_t)(100 / nb_tcs + (i & 1));
4189                }
4190        }
4191
4192        switch (hw->mac.type) {
4193        case ixgbe_mac_X550:
4194        case ixgbe_mac_X550EM_x:
4195        case ixgbe_mac_X550EM_a:
4196                rx_buffer_size = X550_RX_BUFFER_SIZE;
4197                break;
4198        default:
4199                rx_buffer_size = NIC_RX_BUFFER_SIZE;
4200                break;
4201        }
4202
4203        if (config_dcb_rx) {
4204                /* Set RX buffer size */
4205                pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4206                uint32_t rxpbsize = pbsize << IXGBE_RXPBSIZE_SHIFT;
4207
4208                for (i = 0; i < nb_tcs; i++) {
4209                        IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpbsize);
4210                }
4211                /* zero alloc all unused TCs */
4212                for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4213                        IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
4214                }
4215        }
4216        if (config_dcb_tx) {
4217                /* Only support an equally distributed
4218                 *  Tx packet buffer strategy.
4219                 */
4220                uint32_t txpktsize = IXGBE_TXPBSIZE_MAX / nb_tcs;
4221                uint32_t txpbthresh = (txpktsize / DCB_TX_PB) - IXGBE_TXPKT_SIZE_MAX;
4222
4223                for (i = 0; i < nb_tcs; i++) {
4224                        IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
4225                        IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
4226                }
4227                /* Clear unused TCs, if any, to zero buffer size*/
4228                for (; i < ETH_DCB_NUM_USER_PRIORITIES; i++) {
4229                        IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
4230                        IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
4231                }
4232        }
4233
4234        /*Calculates traffic class credits*/
4235        ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4236                                IXGBE_DCB_TX_CONFIG);
4237        ixgbe_dcb_calculate_tc_credits_cee(hw, dcb_config, max_frame,
4238                                IXGBE_DCB_RX_CONFIG);
4239
4240        if (config_dcb_rx) {
4241                /* Unpack CEE standard containers */
4242                ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_RX_CONFIG, refill);
4243                ixgbe_dcb_unpack_max_cee(dcb_config, max);
4244                ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_RX_CONFIG, bwgid);
4245                ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_RX_CONFIG, tsa);
4246                /* Configure PG(ETS) RX */
4247                ixgbe_dcb_hw_arbite_rx_config(hw, refill, max, bwgid, tsa, map);
4248        }
4249
4250        if (config_dcb_tx) {
4251                /* Unpack CEE standard containers */
4252                ixgbe_dcb_unpack_refill_cee(dcb_config, IXGBE_DCB_TX_CONFIG, refill);
4253                ixgbe_dcb_unpack_max_cee(dcb_config, max);
4254                ixgbe_dcb_unpack_bwgid_cee(dcb_config, IXGBE_DCB_TX_CONFIG, bwgid);
4255                ixgbe_dcb_unpack_tsa_cee(dcb_config, IXGBE_DCB_TX_CONFIG, tsa);
4256                /* Configure PG(ETS) TX */
4257                ixgbe_dcb_hw_arbite_tx_config(hw, refill, max, bwgid, tsa, map);
4258        }
4259
4260        /*Configure queue statistics registers*/
4261        ixgbe_dcb_config_tc_stats_82599(hw, dcb_config);
4262
4263        /* Check if the PFC is supported */
4264        if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
4265                pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
4266                for (i = 0; i < nb_tcs; i++) {
4267                        /*
4268                        * If the TC count is 8,and the default high_water is 48,
4269                        * the low_water is 16 as default.
4270                        */
4271                        hw->fc.high_water[i] = (pbsize * 3) / 4;
4272                        hw->fc.low_water[i] = pbsize / 4;
4273                        /* Enable pfc for this TC */
4274                        tc = &dcb_config->tc_config[i];
4275                        tc->pfc = ixgbe_dcb_pfc_enabled;
4276                }
4277                ixgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
4278                if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
4279                        pfc_en &= 0x0F;
4280                ret = ixgbe_dcb_config_pfc(hw, pfc_en, map);
4281        }
4282
4283        return ret;
4284}
4285
4286/**
4287 * ixgbe_configure_dcb - Configure DCB  Hardware
4288 * @dev: pointer to rte_eth_dev
4289 */
4290void ixgbe_configure_dcb(struct rte_eth_dev *dev)
4291{
4292        struct ixgbe_dcb_config *dcb_cfg =
4293                        IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private);
4294        struct rte_eth_conf *dev_conf = &(dev->data->dev_conf);
4295
4296        PMD_INIT_FUNC_TRACE();
4297
4298        /* check support mq_mode for DCB */
4299        if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) &&
4300            (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) &&
4301            (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS))
4302                return;
4303
4304        if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES)
4305                return;
4306
4307        /** Configure DCB hardware **/
4308        ixgbe_dcb_hw_configure(dev, dcb_cfg);
4309}
4310
4311/*
4312 * VMDq only support for 10 GbE NIC.
4313 */
4314static void
4315ixgbe_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
4316{
4317        struct rte_eth_vmdq_rx_conf *cfg;
4318        struct ixgbe_hw *hw;
4319        enum rte_eth_nb_pools num_pools;
4320        uint32_t mrqc, vt_ctl, vlanctrl;
4321        uint32_t vmolr = 0;
4322        int i;
4323
4324        PMD_INIT_FUNC_TRACE();
4325        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4326        cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
4327        num_pools = cfg->nb_queue_pools;
4328
4329        ixgbe_rss_disable(dev);
4330
4331        /* MRQC: enable vmdq */
4332        mrqc = IXGBE_MRQC_VMDQEN;
4333        IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4334
4335        /* PFVTCTL: turn on virtualisation and set the default pool */
4336        vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
4337        if (cfg->enable_default_pool)
4338                vt_ctl |= (cfg->default_pool << IXGBE_VT_CTL_POOL_SHIFT);
4339        else
4340                vt_ctl |= IXGBE_VT_CTL_DIS_DEFPL;
4341
4342        IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
4343
4344        for (i = 0; i < (int)num_pools; i++) {
4345                vmolr = ixgbe_convert_vm_rx_mask_to_val(cfg->rx_mode, vmolr);
4346                IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
4347        }
4348
4349        /* VLNCTRL: enable vlan filtering and allow all vlan tags through */
4350        vlanctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4351        vlanctrl |= IXGBE_VLNCTRL_VFE; /* enable vlan filters */
4352        IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlanctrl);
4353
4354        /* VFTA - enable all vlan filters */
4355        for (i = 0; i < NUM_VFTA_REGISTERS; i++)
4356                IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), UINT32_MAX);
4357
4358        /* VFRE: pool enabling for receive - 64 */
4359        IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), UINT32_MAX);
4360        if (num_pools == ETH_64_POOLS)
4361                IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), UINT32_MAX);
4362
4363        /*
4364         * MPSAR - allow pools to read specific mac addresses
4365         * In this case, all pools should be able to read from mac addr 0
4366         */
4367        IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(0), UINT32_MAX);
4368        IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(0), UINT32_MAX);
4369
4370        /* PFVLVF, PFVLVFB: set up filters for vlan tags as configured */
4371        for (i = 0; i < cfg->nb_pool_maps; i++) {
4372                /* set vlan id in VF register and set the valid bit */
4373                IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), (IXGBE_VLVF_VIEN |
4374                                (cfg->pool_map[i].vlan_id & IXGBE_RXD_VLAN_ID_MASK)));
4375                /*
4376                 * Put the allowed pools in VFB reg. As we only have 16 or 64
4377                 * pools, we only need to use the first half of the register
4378                 * i.e. bits 0-31
4379                 */
4380                if (((cfg->pool_map[i].pools >> 32) & UINT32_MAX) == 0)
4381                        IXGBE_WRITE_REG(hw, IXGBE_VLVFB(i * 2),
4382                                        (cfg->pool_map[i].pools & UINT32_MAX));
4383                else
4384                        IXGBE_WRITE_REG(hw, IXGBE_VLVFB((i * 2 + 1)),
4385                                        ((cfg->pool_map[i].pools >> 32) & UINT32_MAX));
4386
4387        }
4388
4389        /* PFDMA Tx General Switch Control Enables VMDQ loopback */
4390        if (cfg->enable_loop_back) {
4391                IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
4392                for (i = 0; i < RTE_IXGBE_VMTXSW_REGISTER_COUNT; i++)
4393                        IXGBE_WRITE_REG(hw, IXGBE_VMTXSW(i), UINT32_MAX);
4394        }
4395
4396        IXGBE_WRITE_FLUSH(hw);
4397}
4398
4399/*
4400 * ixgbe_dcb_config_tx_hw_config - Configure general VMDq TX parameters
4401 * @hw: pointer to hardware structure
4402 */
4403static void
4404ixgbe_vmdq_tx_hw_configure(struct ixgbe_hw *hw)
4405{
4406        uint32_t reg;
4407        uint32_t q;
4408
4409        PMD_INIT_FUNC_TRACE();
4410        /*PF VF Transmit Enable*/
4411        IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), UINT32_MAX);
4412        IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), UINT32_MAX);
4413
4414        /* Disable the Tx desc arbiter so that MTQC can be changed */
4415        reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4416        reg |= IXGBE_RTTDCS_ARBDIS;
4417        IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4418
4419        reg = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4420        IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
4421
4422        /* Disable drop for all queues */
4423        for (q = 0; q < IXGBE_MAX_RX_QUEUE_NUM; q++)
4424                IXGBE_WRITE_REG(hw, IXGBE_QDE,
4425                  (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT)));
4426
4427        /* Enable the Tx desc arbiter */
4428        reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4429        reg &= ~IXGBE_RTTDCS_ARBDIS;
4430        IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
4431
4432        IXGBE_WRITE_FLUSH(hw);
4433}
4434
4435static int __rte_cold
4436ixgbe_alloc_rx_queue_mbufs(struct ixgbe_rx_queue *rxq)
4437{
4438        struct ixgbe_rx_entry *rxe = rxq->sw_ring;
4439        uint64_t dma_addr;
4440        unsigned int i;
4441
4442        /* Initialize software ring entries */
4443        for (i = 0; i < rxq->nb_rx_desc; i++) {
4444                volatile union ixgbe_adv_rx_desc *rxd;
4445                struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
4446
4447                if (mbuf == NULL) {
4448                        PMD_INIT_LOG(ERR, "RX mbuf alloc failed queue_id=%u",
4449                                     (unsigned) rxq->queue_id);
4450                        return -ENOMEM;
4451                }
4452
4453                mbuf->data_off = RTE_PKTMBUF_HEADROOM;
4454                mbuf->port = rxq->port_id;
4455
4456                dma_addr =
4457                        rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
4458                rxd = &rxq->rx_ring[i];
4459                rxd->read.hdr_addr = 0;
4460                rxd->read.pkt_addr = dma_addr;
4461                rxe[i].mbuf = mbuf;
4462        }
4463
4464        return 0;
4465}
4466
4467static int
4468ixgbe_config_vf_rss(struct rte_eth_dev *dev)
4469{
4470        struct ixgbe_hw *hw;
4471        uint32_t mrqc;
4472
4473        ixgbe_rss_configure(dev);
4474
4475        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4476
4477        /* MRQC: enable VF RSS */
4478        mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
4479        mrqc &= ~IXGBE_MRQC_MRQE_MASK;
4480        switch (RTE_ETH_DEV_SRIOV(dev).active) {
4481        case ETH_64_POOLS:
4482                mrqc |= IXGBE_MRQC_VMDQRSS64EN;
4483                break;
4484
4485        case ETH_32_POOLS:
4486                mrqc |= IXGBE_MRQC_VMDQRSS32EN;
4487                break;
4488
4489        default:
4490                PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode with VMDQ RSS");
4491                return -EINVAL;
4492        }
4493
4494        IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4495
4496        return 0;
4497}
4498
4499static int
4500ixgbe_config_vf_default(struct rte_eth_dev *dev)
4501{
4502        struct ixgbe_hw *hw =
4503                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4504
4505        switch (RTE_ETH_DEV_SRIOV(dev).active) {
4506        case ETH_64_POOLS:
4507                IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4508                        IXGBE_MRQC_VMDQEN);
4509                break;
4510
4511        case ETH_32_POOLS:
4512                IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4513                        IXGBE_MRQC_VMDQRT4TCEN);
4514                break;
4515
4516        case ETH_16_POOLS:
4517                IXGBE_WRITE_REG(hw, IXGBE_MRQC,
4518                        IXGBE_MRQC_VMDQRT8TCEN);
4519                break;
4520        default:
4521                PMD_INIT_LOG(ERR,
4522                        "invalid pool number in IOV mode");
4523                break;
4524        }
4525        return 0;
4526}
4527
4528static int
4529ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
4530{
4531        struct ixgbe_hw *hw =
4532                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4533
4534        if (hw->mac.type == ixgbe_mac_82598EB)
4535                return 0;
4536
4537        if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4538                /*
4539                 * SRIOV inactive scheme
4540                 * any DCB/RSS w/o VMDq multi-queue setting
4541                 */
4542                switch (dev->data->dev_conf.rxmode.mq_mode) {
4543                case ETH_MQ_RX_RSS:
4544                case ETH_MQ_RX_DCB_RSS:
4545                case ETH_MQ_RX_VMDQ_RSS:
4546                        ixgbe_rss_configure(dev);
4547                        break;
4548
4549                case ETH_MQ_RX_VMDQ_DCB:
4550                        ixgbe_vmdq_dcb_configure(dev);
4551                        break;
4552
4553                case ETH_MQ_RX_VMDQ_ONLY:
4554                        ixgbe_vmdq_rx_hw_configure(dev);
4555                        break;
4556
4557                case ETH_MQ_RX_NONE:
4558                default:
4559                        /* if mq_mode is none, disable rss mode.*/
4560                        ixgbe_rss_disable(dev);
4561                        break;
4562                }
4563        } else {
4564                /* SRIOV active scheme
4565                 * Support RSS together with SRIOV.
4566                 */
4567                switch (dev->data->dev_conf.rxmode.mq_mode) {
4568                case ETH_MQ_RX_RSS:
4569                case ETH_MQ_RX_VMDQ_RSS:
4570                        ixgbe_config_vf_rss(dev);
4571                        break;
4572                case ETH_MQ_RX_VMDQ_DCB:
4573                case ETH_MQ_RX_DCB:
4574                /* In SRIOV, the configuration is the same as VMDq case */
4575                        ixgbe_vmdq_dcb_configure(dev);
4576                        break;
4577                /* DCB/RSS together with SRIOV is not supported */
4578                case ETH_MQ_RX_VMDQ_DCB_RSS:
4579                case ETH_MQ_RX_DCB_RSS:
4580                        PMD_INIT_LOG(ERR,
4581                                "Could not support DCB/RSS with VMDq & SRIOV");
4582                        return -1;
4583                default:
4584                        ixgbe_config_vf_default(dev);
4585                        break;
4586                }
4587        }
4588
4589        return 0;
4590}
4591
4592static int
4593ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev)
4594{
4595        struct ixgbe_hw *hw =
4596                IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4597        uint32_t mtqc;
4598        uint32_t rttdcs;
4599
4600        if (hw->mac.type == ixgbe_mac_82598EB)
4601                return 0;
4602
4603        /* disable arbiter before setting MTQC */
4604        rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
4605        rttdcs |= IXGBE_RTTDCS_ARBDIS;
4606        IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4607
4608        if (RTE_ETH_DEV_SRIOV(dev).active == 0) {
4609                /*
4610                 * SRIOV inactive scheme
4611                 * any DCB w/o VMDq multi-queue setting
4612                 */
4613                if (dev->data->dev_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_ONLY)
4614                        ixgbe_vmdq_tx_hw_configure(hw);
4615                else {
4616                        mtqc = IXGBE_MTQC_64Q_1PB;
4617                        IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4618                }
4619        } else {
4620                switch (RTE_ETH_DEV_SRIOV(dev).active) {
4621
4622                /*
4623                 * SRIOV active scheme
4624                 * FIXME if support DCB together with VMDq & SRIOV
4625                 */
4626                case ETH_64_POOLS:
4627                        mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF;
4628                        break;
4629                case ETH_32_POOLS:
4630                        mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_32VF;
4631                        break;
4632                case ETH_16_POOLS:
4633                        mtqc = IXGBE_MTQC_VT_ENA | IXGBE_MTQC_RT_ENA |
4634                                IXGBE_MTQC_8TC_8TQ;
4635                        break;
4636                default:
4637                        mtqc = IXGBE_MTQC_64Q_1PB;
4638                        PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
4639                }
4640                IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
4641        }
4642
4643        /* re-enable arbiter */
4644        rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
4645        IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
4646
4647        return 0;
4648}
4649
4650/**
4651 * ixgbe_get_rscctl_maxdesc - Calculate the RSCCTL[n].MAXDESC for PF
4652 *
4653 * Return the RSCCTL[n].MAXDESC for 82599 and x540 PF devices according to the
4654 * spec rev. 3.0 chapter 8.2.3.8.13.
4655 *
4656 * @pool Memory pool of the Rx queue
4657 */
4658static inline uint32_t
4659ixgbe_get_rscctl_maxdesc(struct rte_mempool *pool)
4660{
4661        struct rte_pktmbuf_pool_private *mp_priv = rte_mempool_get_priv(pool);
4662
4663        /* MAXDESC * SRRCTL.BSIZEPKT must not exceed 64 KB minus one */
4664        uint16_t maxdesc =
4665                RTE_IPV4_MAX_PKT_LEN /
4666                        (mp_priv->mbuf_data_room_size - RTE_PKTMBUF_HEADROOM);
4667
4668        if (maxdesc >= 16)
4669                return IXGBE_RSCCTL_MAXDESC_16;
4670        else if (maxdesc >= 8)
4671                return IXGBE_RSCCTL_MAXDESC_8;
4672        else if (maxdesc >= 4)
4673                return IXGBE_RSCCTL_MAXDESC_4;
4674        else
4675                return IXGBE_RSCCTL_MAXDESC_1;
4676}
4677
4678/**
4679 * ixgbe_set_ivar - Setup the correct IVAR register for a particular MSIX
4680 * interrupt
4681 *
4682 * (Taken from FreeBSD tree)
4683 * (yes this is all very magic and confusing :)
4684 *
4685 * @dev port handle
4686 * @entry the register array entry
4687 * @vector the MSIX vector for this queue
4688 * @type RX/TX/MISC
4689 */
4690static void
4691ixgbe_set_ivar(struct rte_eth_dev *dev, u8 entry, u8 vector, s8 type)
4692{
4693        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4694        u32 ivar, index;
4695
4696        vector |= IXGBE_IVAR_ALLOC_VAL;
4697
4698        switch (hw->mac.type) {
4699
4700        case ixgbe_mac_82598EB:
4701                if (type == -1)
4702                        entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4703                else
4704                        entry += (type * 64);
4705                index = (entry >> 2) & 0x1F;
4706                ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4707                ivar &= ~(0xFF << (8 * (entry & 0x3)));
4708                ivar |= (vector << (8 * (entry & 0x3)));
4709                IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
4710                break;
4711
4712        case ixgbe_mac_82599EB:
4713        case ixgbe_mac_X540:
4714                if (type == -1) { /* MISC IVAR */
4715                        index = (entry & 1) * 8;
4716                        ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
4717                        ivar &= ~(0xFF << index);
4718                        ivar |= (vector << index);
4719                        IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
4720                } else {        /* RX/TX IVARS */
4721                        index = (16 * (entry & 1)) + (8 * type);
4722                        ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
4723                        ivar &= ~(0xFF << index);
4724                        ivar |= (vector << index);
4725                        IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
4726                }
4727
4728                break;
4729
4730        default:
4731                break;
4732        }
4733}
4734
4735void __rte_cold
4736ixgbe_set_rx_function(struct rte_eth_dev *dev)
4737{
4738        uint16_t i, rx_using_sse;
4739        struct ixgbe_adapter *adapter = dev->data->dev_private;
4740
4741        /*
4742         * In order to allow Vector Rx there are a few configuration
4743         * conditions to be met and Rx Bulk Allocation should be allowed.
4744         */
4745        if (ixgbe_rx_vec_dev_conf_condition_check(dev) ||
4746            !adapter->rx_bulk_alloc_allowed) {
4747                PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet Vector Rx "
4748                                    "preconditions",
4749                             dev->data->port_id);
4750
4751                adapter->rx_vec_allowed = false;
4752        }
4753
4754        /*
4755         * Initialize the appropriate LRO callback.
4756         *
4757         * If all queues satisfy the bulk allocation preconditions
4758         * (hw->rx_bulk_alloc_allowed is TRUE) then we may use bulk allocation.
4759         * Otherwise use a single allocation version.
4760         */
4761        if (dev->data->lro) {
4762                if (adapter->rx_bulk_alloc_allowed) {
4763                        PMD_INIT_LOG(DEBUG, "LRO is requested. Using a bulk "
4764                                           "allocation version");
4765                        dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4766                } else {
4767                        PMD_INIT_LOG(DEBUG, "LRO is requested. Using a single "
4768                                           "allocation version");
4769                        dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4770                }
4771        } else if (dev->data->scattered_rx) {
4772                /*
4773                 * Set the non-LRO scattered callback: there are Vector and
4774                 * single allocation versions.
4775                 */
4776                if (adapter->rx_vec_allowed) {
4777                        PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
4778                                            "callback (port=%d).",
4779                                     dev->data->port_id);
4780
4781                        dev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;
4782                } else if (adapter->rx_bulk_alloc_allowed) {
4783                        PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
4784                                           "allocation callback (port=%d).",
4785                                     dev->data->port_id);
4786                        dev->rx_pkt_burst = ixgbe_recv_pkts_lro_bulk_alloc;
4787                } else {
4788                        PMD_INIT_LOG(DEBUG, "Using Regualr (non-vector, "
4789                                            "single allocation) "
4790                                            "Scattered Rx callback "
4791                                            "(port=%d).",
4792                                     dev->data->port_id);
4793
4794                        dev->rx_pkt_burst = ixgbe_recv_pkts_lro_single_alloc;
4795                }
4796        /*
4797         * Below we set "simple" callbacks according to port/queues parameters.
4798         * If parameters allow we are going to choose between the following
4799         * callbacks:
4800         *    - Vector
4801         *    - Bulk Allocation
4802         *    - Single buffer allocation (the simplest one)
4803         */
4804        } else if (adapter->rx_vec_allowed) {
4805                PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
4806                                    "burst size no less than %d (port=%d).",
4807                             RTE_IXGBE_DESCS_PER_LOOP,
4808                             dev->data->port_id);
4809
4810                dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
4811        } else if (adapter->rx_bulk_alloc_allowed) {
4812                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
4813                                    "satisfied. Rx Burst Bulk Alloc function "
4814                                    "will be used on port=%d.",
4815                             dev->data->port_id);
4816
4817                dev->rx_pkt_burst = ixgbe_recv_pkts_bulk_alloc;
4818        } else {
4819                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
4820                                    "satisfied, or Scattered Rx is requested "
4821                                    "(port=%d).",
4822                             dev->data->port_id);
4823
4824                dev->rx_pkt_burst = ixgbe_recv_pkts;
4825        }
4826
4827        /* Propagate information about RX function choice through all queues. */
4828
4829        rx_using_sse =
4830                (dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec ||
4831                dev->rx_pkt_burst == ixgbe_recv_pkts_vec);
4832
4833        for (i = 0; i < dev->data->nb_rx_queues; i++) {
4834                struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4835
4836                rxq->rx_using_sse = rx_using_sse;
4837#ifdef RTE_LIBRTE_SECURITY
4838                rxq->using_ipsec = !!(dev->data->dev_conf.rxmode.offloads &
4839                                DEV_RX_OFFLOAD_SECURITY);
4840#endif
4841        }
4842}
4843
4844/**
4845 * ixgbe_set_rsc - configure RSC related port HW registers
4846 *
4847 * Configures the port's RSC related registers according to the 4.6.7.2 chapter
4848 * of 82599 Spec (x540 configuration is virtually the same).
4849 *
4850 * @dev port handle
4851 *
4852 * Returns 0 in case of success or a non-zero error code
4853 */
4854static int
4855ixgbe_set_rsc(struct rte_eth_dev *dev)
4856{
4857        struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
4858        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
4859        struct rte_eth_dev_info dev_info = { 0 };
4860        bool rsc_capable = false;
4861        uint16_t i;
4862        uint32_t rdrxctl;
4863        uint32_t rfctl;
4864
4865        /* Sanity check */
4866        dev->dev_ops->dev_infos_get(dev, &dev_info);
4867        if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO)
4868                rsc_capable = true;
4869
4870        if (!rsc_capable && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4871                PMD_INIT_LOG(CRIT, "LRO is requested on HW that doesn't "
4872                                   "support it");
4873                return -EINVAL;
4874        }
4875
4876        /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
4877
4878        if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
4879             (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
4880                /*
4881                 * According to chapter of 4.6.7.2.1 of the Spec Rev.
4882                 * 3.0 RSC configuration requires HW CRC stripping being
4883                 * enabled. If user requested both HW CRC stripping off
4884                 * and RSC on - return an error.
4885                 */
4886                PMD_INIT_LOG(CRIT, "LRO can't be enabled when HW CRC "
4887                                    "is disabled");
4888                return -EINVAL;
4889        }
4890
4891        /* RFCTL configuration  */
4892        rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
4893        if ((rsc_capable) && (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4894                /*
4895                 * Since NFS packets coalescing is not supported - clear
4896                 * RFCTL.NFSW_DIS and RFCTL.NFSR_DIS when RSC is
4897                 * enabled.
4898                 */
4899                rfctl &= ~(IXGBE_RFCTL_RSC_DIS | IXGBE_RFCTL_NFSW_DIS |
4900                           IXGBE_RFCTL_NFSR_DIS);
4901        else
4902                rfctl |= IXGBE_RFCTL_RSC_DIS;
4903        IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
4904
4905        /* If LRO hasn't been requested - we are done here. */
4906        if (!(rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO))
4907                return 0;
4908
4909        /* Set RDRXCTL.RSCACKC bit */
4910        rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
4911        rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
4912        IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
4913
4914        /* Per-queue RSC configuration (chapter 4.6.7.2.2 of 82599 Spec) */
4915        for (i = 0; i < dev->data->nb_rx_queues; i++) {
4916                struct ixgbe_rx_queue *rxq = dev->data->rx_queues[i];
4917                uint32_t srrctl =
4918                        IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxq->reg_idx));
4919                uint32_t rscctl =
4920                        IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxq->reg_idx));
4921                uint32_t psrtype =
4922                        IXGBE_READ_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx));
4923                uint32_t eitr =
4924                        IXGBE_READ_REG(hw, IXGBE_EITR(rxq->reg_idx));
4925
4926                /*
4927                 * ixgbe PMD doesn't support header-split at the moment.
4928                 *
4929                 * Following the 4.6.7.2.1 chapter of the 82599/x540
4930                 * Spec if RSC is enabled the SRRCTL[n].BSIZEHEADER
4931                 * should be configured even if header split is not
4932                 * enabled. We will configure it 128 bytes following the
4933                 * recommendation in the spec.
4934                 */
4935                srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4936                srrctl |= (128 << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) &
4937                                            IXGBE_SRRCTL_BSIZEHDR_MASK;
4938
4939                /*
4940                 * TODO: Consider setting the Receive Descriptor Minimum
4941                 * Threshold Size for an RSC case. This is not an obviously
4942                 * beneficiary option but the one worth considering...
4943                 */
4944
4945                rscctl |= IXGBE_RSCCTL_RSCEN;
4946                rscctl |= ixgbe_get_rscctl_maxdesc(rxq->mb_pool);
4947                psrtype |= IXGBE_PSRTYPE_TCPHDR;
4948
4949                /*
4950                 * RSC: Set ITR interval corresponding to 2K ints/s.
4951                 *
4952                 * Full-sized RSC aggregations for a 10Gb/s link will
4953                 * arrive at about 20K aggregation/s rate.
4954                 *
4955                 * 2K inst/s rate will make only 10% of the
4956                 * aggregations to be closed due to the interrupt timer
4957                 * expiration for a streaming at wire-speed case.
4958                 *
4959                 * For a sparse streaming case this setting will yield
4960                 * at most 500us latency for a single RSC aggregation.
4961                 */
4962                eitr &= ~IXGBE_EITR_ITR_INT_MASK;
4963                eitr |= IXGBE_EITR_INTERVAL_US(IXGBE_QUEUE_ITR_INTERVAL_DEFAULT);
4964                eitr |= IXGBE_EITR_CNT_WDIS;
4965
4966                IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
4967                IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxq->reg_idx), rscctl);
4968                IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(rxq->reg_idx), psrtype);
4969                IXGBE_WRITE_REG(hw, IXGBE_EITR(rxq->reg_idx), eitr);
4970
4971                /*
4972                 * RSC requires the mapping of the queue to the
4973                 * interrupt vector.
4974                 */
4975                ixgbe_set_ivar(dev, rxq->reg_idx, i, 0);
4976        }
4977
4978        dev->data->lro = 1;
4979
4980        PMD_INIT_LOG(DEBUG, "enabling LRO mode");
4981
4982        return 0;
4983}
4984
4985/*
4986 * Initializes Receive Unit.
4987 */
4988int __rte_cold
4989ixgbe_dev_rx_init(struct rte_eth_dev *dev)
4990{
4991        struct ixgbe_hw     *hw;
4992        struct ixgbe_rx_queue *rxq;
4993        uint64_t bus_addr;
4994        uint32_t rxctrl;
4995        uint32_t fctrl;
4996        uint32_t hlreg0;
4997        uint32_t maxfrs;
4998        uint32_t srrctl;
4999        uint32_t rdrxctl;
5000        uint32_t rxcsum;
5001        uint16_t buf_size;
5002        uint16_t i;
5003        struct rte_eth_rxmode *rx_conf = &dev->data->dev_conf.rxmode;
5004        int rc;
5005
5006        PMD_INIT_FUNC_TRACE();
5007        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5008
5009        /*
5010         * Make sure receives are disabled while setting
5011         * up the RX context (registers, descriptor rings, etc.).
5012         */
5013        rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5014        IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN);
5015
5016        /* Enable receipt of broadcasted frames */
5017        fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
5018        fctrl |= IXGBE_FCTRL_BAM;
5019        fctrl |= IXGBE_FCTRL_DPF;
5020        fctrl |= IXGBE_FCTRL_PMCF;
5021        IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
5022
5023        /*
5024         * Configure CRC stripping, if any.
5025         */
5026        hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5027        if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5028                hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
5029        else
5030                hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
5031
5032        /*
5033         * Configure jumbo frame support, if any.
5034         */
5035        if (rx_conf->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
5036                hlreg0 |= IXGBE_HLREG0_JUMBOEN;
5037                maxfrs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
5038                maxfrs &= 0x0000FFFF;
5039                maxfrs |= (rx_conf->max_rx_pkt_len << 16);
5040                IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, maxfrs);
5041        } else
5042                hlreg0 &= ~IXGBE_HLREG0_JUMBOEN;
5043
5044        /*
5045         * If loopback mode is configured, set LPBK bit.
5046         */
5047        if (dev->data->dev_conf.lpbk_mode != 0) {
5048                rc = ixgbe_check_supported_loopback_mode(dev);
5049                if (rc < 0) {
5050                        PMD_INIT_LOG(ERR, "Unsupported loopback mode");
5051                        return rc;
5052                }
5053                hlreg0 |= IXGBE_HLREG0_LPBK;
5054        } else {
5055                hlreg0 &= ~IXGBE_HLREG0_LPBK;
5056        }
5057
5058        IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5059
5060        /*
5061         * Assume no header split and no VLAN strip support
5062         * on any Rx queue first .
5063         */
5064        rx_conf->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5065        /* Setup RX queues */
5066        for (i = 0; i < dev->data->nb_rx_queues; i++) {
5067                rxq = dev->data->rx_queues[i];
5068
5069                /*
5070                 * Reset crc_len in case it was changed after queue setup by a
5071                 * call to configure.
5072                 */
5073                if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5074                        rxq->crc_len = RTE_ETHER_CRC_LEN;
5075                else
5076                        rxq->crc_len = 0;
5077
5078                /* Setup the Base and Length of the Rx Descriptor Rings */
5079                bus_addr = rxq->rx_ring_phys_addr;
5080                IXGBE_WRITE_REG(hw, IXGBE_RDBAL(rxq->reg_idx),
5081                                (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5082                IXGBE_WRITE_REG(hw, IXGBE_RDBAH(rxq->reg_idx),
5083                                (uint32_t)(bus_addr >> 32));
5084                IXGBE_WRITE_REG(hw, IXGBE_RDLEN(rxq->reg_idx),
5085                                rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5086                IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5087                IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), 0);
5088
5089                /* Configure the SRRCTL register */
5090                srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5091
5092                /* Set if packets are dropped when no descriptors available */
5093                if (rxq->drop_en)
5094                        srrctl |= IXGBE_SRRCTL_DROP_EN;
5095
5096                /*
5097                 * Configure the RX buffer size in the BSIZEPACKET field of
5098                 * the SRRCTL register of the queue.
5099                 * The value is in 1 KB resolution. Valid values can be from
5100                 * 1 KB to 16 KB.
5101                 */
5102                buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5103                        RTE_PKTMBUF_HEADROOM);
5104                srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5105                           IXGBE_SRRCTL_BSIZEPKT_MASK);
5106
5107                IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxq->reg_idx), srrctl);
5108
5109                buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5110                                       IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5111
5112                /* It adds dual VLAN length for supporting dual VLAN */
5113                if (dev->data->dev_conf.rxmode.max_rx_pkt_len +
5114                                            2 * IXGBE_VLAN_TAG_SIZE > buf_size)
5115                        dev->data->scattered_rx = 1;
5116                if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5117                        rx_conf->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5118        }
5119
5120        if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
5121                dev->data->scattered_rx = 1;
5122
5123        /*
5124         * Device configured with multiple RX queues.
5125         */
5126        ixgbe_dev_mq_rx_configure(dev);
5127
5128        /*
5129         * Setup the Checksum Register.
5130         * Disable Full-Packet Checksum which is mutually exclusive with RSS.
5131         * Enable IP/L4 checkum computation by hardware if requested to do so.
5132         */
5133        rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
5134        rxcsum |= IXGBE_RXCSUM_PCSD;
5135        if (rx_conf->offloads & DEV_RX_OFFLOAD_CHECKSUM)
5136                rxcsum |= IXGBE_RXCSUM_IPPCSE;
5137        else
5138                rxcsum &= ~IXGBE_RXCSUM_IPPCSE;
5139
5140        IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
5141
5142        if (hw->mac.type == ixgbe_mac_82599EB ||
5143            hw->mac.type == ixgbe_mac_X540) {
5144                rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
5145                if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
5146                        rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
5147                else
5148                        rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
5149                rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
5150                IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
5151        }
5152
5153        rc = ixgbe_set_rsc(dev);
5154        if (rc)
5155                return rc;
5156
5157        ixgbe_set_rx_function(dev);
5158
5159        return 0;
5160}
5161
5162/*
5163 * Initializes Transmit Unit.
5164 */
5165void __rte_cold
5166ixgbe_dev_tx_init(struct rte_eth_dev *dev)
5167{
5168        struct ixgbe_hw     *hw;
5169        struct ixgbe_tx_queue *txq;
5170        uint64_t bus_addr;
5171        uint32_t hlreg0;
5172        uint32_t txctrl;
5173        uint16_t i;
5174
5175        PMD_INIT_FUNC_TRACE();
5176        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5177
5178        /* Enable TX CRC (checksum offload requirement) and hw padding
5179         * (TSO requirement)
5180         */
5181        hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
5182        hlreg0 |= (IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_TXPADEN);
5183        IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
5184
5185        /* Setup the Base and Length of the Tx Descriptor Rings */
5186        for (i = 0; i < dev->data->nb_tx_queues; i++) {
5187                txq = dev->data->tx_queues[i];
5188
5189                bus_addr = txq->tx_ring_phys_addr;
5190                IXGBE_WRITE_REG(hw, IXGBE_TDBAL(txq->reg_idx),
5191                                (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5192                IXGBE_WRITE_REG(hw, IXGBE_TDBAH(txq->reg_idx),
5193                                (uint32_t)(bus_addr >> 32));
5194                IXGBE_WRITE_REG(hw, IXGBE_TDLEN(txq->reg_idx),
5195                                txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5196                /* Setup the HW Tx Head and TX Tail descriptor pointers */
5197                IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5198                IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5199
5200                /*
5201                 * Disable Tx Head Writeback RO bit, since this hoses
5202                 * bookkeeping if things aren't delivered in order.
5203                 */
5204                switch (hw->mac.type) {
5205                case ixgbe_mac_82598EB:
5206                        txctrl = IXGBE_READ_REG(hw,
5207                                                IXGBE_DCA_TXCTRL(txq->reg_idx));
5208                        txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5209                        IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(txq->reg_idx),
5210                                        txctrl);
5211                        break;
5212
5213                case ixgbe_mac_82599EB:
5214                case ixgbe_mac_X540:
5215                case ixgbe_mac_X550:
5216                case ixgbe_mac_X550EM_x:
5217                case ixgbe_mac_X550EM_a:
5218                default:
5219                        txctrl = IXGBE_READ_REG(hw,
5220                                                IXGBE_DCA_TXCTRL_82599(txq->reg_idx));
5221                        txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5222                        IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(txq->reg_idx),
5223                                        txctrl);
5224                        break;
5225                }
5226        }
5227
5228        /* Device configured with multiple TX queues. */
5229        ixgbe_dev_mq_tx_configure(dev);
5230}
5231
5232/*
5233 * Check if requested loopback mode is supported
5234 */
5235int
5236ixgbe_check_supported_loopback_mode(struct rte_eth_dev *dev)
5237{
5238        struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5239
5240        if (dev->data->dev_conf.lpbk_mode == IXGBE_LPBK_TX_RX)
5241                if (hw->mac.type == ixgbe_mac_82599EB ||
5242                     hw->mac.type == ixgbe_mac_X540 ||
5243                     hw->mac.type == ixgbe_mac_X550 ||
5244                     hw->mac.type == ixgbe_mac_X550EM_x ||
5245                     hw->mac.type == ixgbe_mac_X550EM_a)
5246                        return 0;
5247
5248        return -ENOTSUP;
5249}
5250
5251/*
5252 * Set up link for 82599 loopback mode Tx->Rx.
5253 */
5254static inline void __rte_cold
5255ixgbe_setup_loopback_link_82599(struct ixgbe_hw *hw)
5256{
5257        PMD_INIT_FUNC_TRACE();
5258
5259        if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
5260                if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM) !=
5261                                IXGBE_SUCCESS) {
5262                        PMD_INIT_LOG(ERR, "Could not enable loopback mode");
5263                        /* ignore error */
5264                        return;
5265                }
5266        }
5267
5268        /* Restart link */
5269        IXGBE_WRITE_REG(hw,
5270                        IXGBE_AUTOC,
5271                        IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU);
5272        ixgbe_reset_pipeline_82599(hw);
5273
5274        hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
5275        msec_delay(50);
5276}
5277
5278
5279/*
5280 * Start Transmit and Receive Units.
5281 */
5282int __rte_cold
5283ixgbe_dev_rxtx_start(struct rte_eth_dev *dev)
5284{
5285        struct ixgbe_hw     *hw;
5286        struct ixgbe_tx_queue *txq;
5287        struct ixgbe_rx_queue *rxq;
5288        uint32_t txdctl;
5289        uint32_t dmatxctl;
5290        uint32_t rxctrl;
5291        uint16_t i;
5292        int ret = 0;
5293
5294        PMD_INIT_FUNC_TRACE();
5295        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5296
5297        for (i = 0; i < dev->data->nb_tx_queues; i++) {
5298                txq = dev->data->tx_queues[i];
5299                /* Setup Transmit Threshold Registers */
5300                txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5301                txdctl |= txq->pthresh & 0x7F;
5302                txdctl |= ((txq->hthresh & 0x7F) << 8);
5303                txdctl |= ((txq->wthresh & 0x7F) << 16);
5304                IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5305        }
5306
5307        if (hw->mac.type != ixgbe_mac_82598EB) {
5308                dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
5309                dmatxctl |= IXGBE_DMATXCTL_TE;
5310                IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
5311        }
5312
5313        for (i = 0; i < dev->data->nb_tx_queues; i++) {
5314                txq = dev->data->tx_queues[i];
5315                if (!txq->tx_deferred_start) {
5316                        ret = ixgbe_dev_tx_queue_start(dev, i);
5317                        if (ret < 0)
5318                                return ret;
5319                }
5320        }
5321
5322        for (i = 0; i < dev->data->nb_rx_queues; i++) {
5323                rxq = dev->data->rx_queues[i];
5324                if (!rxq->rx_deferred_start) {
5325                        ret = ixgbe_dev_rx_queue_start(dev, i);
5326                        if (ret < 0)
5327                                return ret;
5328                }
5329        }
5330
5331        /* Enable Receive engine */
5332        rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
5333        if (hw->mac.type == ixgbe_mac_82598EB)
5334                rxctrl |= IXGBE_RXCTRL_DMBYPS;
5335        rxctrl |= IXGBE_RXCTRL_RXEN;
5336        hw->mac.ops.enable_rx_dma(hw, rxctrl);
5337
5338        /* If loopback mode is enabled, set up the link accordingly */
5339        if (dev->data->dev_conf.lpbk_mode != 0) {
5340                if (hw->mac.type == ixgbe_mac_82599EB)
5341                        ixgbe_setup_loopback_link_82599(hw);
5342                else if (hw->mac.type == ixgbe_mac_X540 ||
5343                     hw->mac.type == ixgbe_mac_X550 ||
5344                     hw->mac.type == ixgbe_mac_X550EM_x ||
5345                     hw->mac.type == ixgbe_mac_X550EM_a)
5346                        ixgbe_setup_loopback_link_x540_x550(hw, true);
5347        }
5348
5349#ifdef RTE_LIBRTE_SECURITY
5350        if ((dev->data->dev_conf.rxmode.offloads &
5351                        DEV_RX_OFFLOAD_SECURITY) ||
5352                (dev->data->dev_conf.txmode.offloads &
5353                        DEV_TX_OFFLOAD_SECURITY)) {
5354                ret = ixgbe_crypto_enable_ipsec(dev);
5355                if (ret != 0) {
5356                        PMD_DRV_LOG(ERR,
5357                                    "ixgbe_crypto_enable_ipsec fails with %d.",
5358                                    ret);
5359                        return ret;
5360                }
5361        }
5362#endif
5363
5364        return 0;
5365}
5366
5367/*
5368 * Start Receive Units for specified queue.
5369 */
5370int __rte_cold
5371ixgbe_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5372{
5373        struct ixgbe_hw     *hw;
5374        struct ixgbe_rx_queue *rxq;
5375        uint32_t rxdctl;
5376        int poll_ms;
5377
5378        PMD_INIT_FUNC_TRACE();
5379        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5380
5381        rxq = dev->data->rx_queues[rx_queue_id];
5382
5383        /* Allocate buffers for descriptor rings */
5384        if (ixgbe_alloc_rx_queue_mbufs(rxq) != 0) {
5385                PMD_INIT_LOG(ERR, "Could not alloc mbuf for queue:%d",
5386                             rx_queue_id);
5387                return -1;
5388        }
5389        rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5390        rxdctl |= IXGBE_RXDCTL_ENABLE;
5391        IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5392
5393        /* Wait until RX Enable ready */
5394        poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5395        do {
5396                rte_delay_ms(1);
5397                rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5398        } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5399        if (!poll_ms)
5400                PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", rx_queue_id);
5401        rte_wmb();
5402        IXGBE_WRITE_REG(hw, IXGBE_RDH(rxq->reg_idx), 0);
5403        IXGBE_WRITE_REG(hw, IXGBE_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
5404        dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5405
5406        return 0;
5407}
5408
5409/*
5410 * Stop Receive Units for specified queue.
5411 */
5412int __rte_cold
5413ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
5414{
5415        struct ixgbe_hw     *hw;
5416        struct ixgbe_adapter *adapter = dev->data->dev_private;
5417        struct ixgbe_rx_queue *rxq;
5418        uint32_t rxdctl;
5419        int poll_ms;
5420
5421        PMD_INIT_FUNC_TRACE();
5422        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5423
5424        rxq = dev->data->rx_queues[rx_queue_id];
5425
5426        rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5427        rxdctl &= ~IXGBE_RXDCTL_ENABLE;
5428        IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), rxdctl);
5429
5430        /* Wait until RX Enable bit clear */
5431        poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5432        do {
5433                rte_delay_ms(1);
5434                rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx));
5435        } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE));
5436        if (!poll_ms)
5437                PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id);
5438
5439        rte_delay_us(RTE_IXGBE_WAIT_100_US);
5440
5441        ixgbe_rx_queue_release_mbufs(rxq);
5442        ixgbe_reset_rx_queue(adapter, rxq);
5443        dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5444
5445        return 0;
5446}
5447
5448
5449/*
5450 * Start Transmit Units for specified queue.
5451 */
5452int __rte_cold
5453ixgbe_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5454{
5455        struct ixgbe_hw     *hw;
5456        struct ixgbe_tx_queue *txq;
5457        uint32_t txdctl;
5458        int poll_ms;
5459
5460        PMD_INIT_FUNC_TRACE();
5461        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5462
5463        txq = dev->data->tx_queues[tx_queue_id];
5464        IXGBE_WRITE_REG(hw, IXGBE_TDH(txq->reg_idx), 0);
5465        txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5466        txdctl |= IXGBE_TXDCTL_ENABLE;
5467        IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5468
5469        /* Wait until TX Enable ready */
5470        if (hw->mac.type == ixgbe_mac_82599EB) {
5471                poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5472                do {
5473                        rte_delay_ms(1);
5474                        txdctl = IXGBE_READ_REG(hw,
5475                                IXGBE_TXDCTL(txq->reg_idx));
5476                } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5477                if (!poll_ms)
5478                        PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d",
5479                                tx_queue_id);
5480        }
5481        rte_wmb();
5482        IXGBE_WRITE_REG(hw, IXGBE_TDT(txq->reg_idx), 0);
5483        dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
5484
5485        return 0;
5486}
5487
5488/*
5489 * Stop Transmit Units for specified queue.
5490 */
5491int __rte_cold
5492ixgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
5493{
5494        struct ixgbe_hw     *hw;
5495        struct ixgbe_tx_queue *txq;
5496        uint32_t txdctl;
5497        uint32_t txtdh, txtdt;
5498        int poll_ms;
5499
5500        PMD_INIT_FUNC_TRACE();
5501        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5502
5503        txq = dev->data->tx_queues[tx_queue_id];
5504
5505        /* Wait until TX queue is empty */
5506        if (hw->mac.type == ixgbe_mac_82599EB) {
5507                poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5508                do {
5509                        rte_delay_us(RTE_IXGBE_WAIT_100_US);
5510                        txtdh = IXGBE_READ_REG(hw,
5511                                               IXGBE_TDH(txq->reg_idx));
5512                        txtdt = IXGBE_READ_REG(hw,
5513                                               IXGBE_TDT(txq->reg_idx));
5514                } while (--poll_ms && (txtdh != txtdt));
5515                if (!poll_ms)
5516                        PMD_INIT_LOG(ERR,
5517                                "Tx Queue %d is not empty when stopping.",
5518                                tx_queue_id);
5519        }
5520
5521        txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txq->reg_idx));
5522        txdctl &= ~IXGBE_TXDCTL_ENABLE;
5523        IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txq->reg_idx), txdctl);
5524
5525        /* Wait until TX Enable bit clear */
5526        if (hw->mac.type == ixgbe_mac_82599EB) {
5527                poll_ms = RTE_IXGBE_REGISTER_POLL_WAIT_10_MS;
5528                do {
5529                        rte_delay_ms(1);
5530                        txdctl = IXGBE_READ_REG(hw,
5531                                                IXGBE_TXDCTL(txq->reg_idx));
5532                } while (--poll_ms && (txdctl & IXGBE_TXDCTL_ENABLE));
5533                if (!poll_ms)
5534                        PMD_INIT_LOG(ERR, "Could not disable Tx Queue %d",
5535                                tx_queue_id);
5536        }
5537
5538        if (txq->ops != NULL) {
5539                txq->ops->release_mbufs(txq);
5540                txq->ops->reset(txq);
5541        }
5542        dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
5543
5544        return 0;
5545}
5546
5547void
5548ixgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5549        struct rte_eth_rxq_info *qinfo)
5550{
5551        struct ixgbe_rx_queue *rxq;
5552
5553        rxq = dev->data->rx_queues[queue_id];
5554
5555        qinfo->mp = rxq->mb_pool;
5556        qinfo->scattered_rx = dev->data->scattered_rx;
5557        qinfo->nb_desc = rxq->nb_rx_desc;
5558
5559        qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
5560        qinfo->conf.rx_drop_en = rxq->drop_en;
5561        qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
5562        qinfo->conf.offloads = rxq->offloads;
5563}
5564
5565void
5566ixgbe_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
5567        struct rte_eth_txq_info *qinfo)
5568{
5569        struct ixgbe_tx_queue *txq;
5570
5571        txq = dev->data->tx_queues[queue_id];
5572
5573        qinfo->nb_desc = txq->nb_tx_desc;
5574
5575        qinfo->conf.tx_thresh.pthresh = txq->pthresh;
5576        qinfo->conf.tx_thresh.hthresh = txq->hthresh;
5577        qinfo->conf.tx_thresh.wthresh = txq->wthresh;
5578
5579        qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
5580        qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
5581        qinfo->conf.offloads = txq->offloads;
5582        qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
5583}
5584
5585/*
5586 * [VF] Initializes Receive Unit.
5587 */
5588int __rte_cold
5589ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
5590{
5591        struct ixgbe_hw     *hw;
5592        struct ixgbe_rx_queue *rxq;
5593        struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
5594        uint64_t bus_addr;
5595        uint32_t srrctl, psrtype = 0;
5596        uint16_t buf_size;
5597        uint16_t i;
5598        int ret;
5599
5600        PMD_INIT_FUNC_TRACE();
5601        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5602
5603        if (rte_is_power_of_2(dev->data->nb_rx_queues) == 0) {
5604                PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5605                        "it should be power of 2");
5606                return -1;
5607        }
5608
5609        if (dev->data->nb_rx_queues > hw->mac.max_rx_queues) {
5610                PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
5611                        "it should be equal to or less than %d",
5612                        hw->mac.max_rx_queues);
5613                return -1;
5614        }
5615
5616        /*
5617         * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
5618         * disables the VF receipt of packets if the PF MTU is > 1500.
5619         * This is done to deal with 82599 limitations that imposes
5620         * the PF and all VFs to share the same MTU.
5621         * Then, the PF driver enables again the VF receipt of packet when
5622         * the VF driver issues a IXGBE_VF_SET_LPE request.
5623         * In the meantime, the VF device cannot be used, even if the VF driver
5624         * and the Guest VM network stack are ready to accept packets with a
5625         * size up to the PF MTU.
5626         * As a work-around to this PF behaviour, force the call to
5627         * ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
5628         * VF packets received can work in all cases.
5629         */
5630        ixgbevf_rlpml_set_vf(hw,
5631                (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);
5632
5633        /*
5634         * Assume no header split and no VLAN strip support
5635         * on any Rx queue first .
5636         */
5637        rxmode->offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
5638        /* Setup RX queues */
5639        for (i = 0; i < dev->data->nb_rx_queues; i++) {
5640                rxq = dev->data->rx_queues[i];
5641
5642                /* Allocate buffers for descriptor rings */
5643                ret = ixgbe_alloc_rx_queue_mbufs(rxq);
5644                if (ret)
5645                        return ret;
5646
5647                /* Setup the Base and Length of the Rx Descriptor Rings */
5648                bus_addr = rxq->rx_ring_phys_addr;
5649
5650                IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(i),
5651                                (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5652                IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(i),
5653                                (uint32_t)(bus_addr >> 32));
5654                IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(i),
5655                                rxq->nb_rx_desc * sizeof(union ixgbe_adv_rx_desc));
5656                IXGBE_WRITE_REG(hw, IXGBE_VFRDH(i), 0);
5657                IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), 0);
5658
5659
5660                /* Configure the SRRCTL register */
5661                srrctl = IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
5662
5663                /* Set if packets are dropped when no descriptors available */
5664                if (rxq->drop_en)
5665                        srrctl |= IXGBE_SRRCTL_DROP_EN;
5666
5667                /*
5668                 * Configure the RX buffer size in the BSIZEPACKET field of
5669                 * the SRRCTL register of the queue.
5670                 * The value is in 1 KB resolution. Valid values can be from
5671                 * 1 KB to 16 KB.
5672                 */
5673                buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
5674                        RTE_PKTMBUF_HEADROOM);
5675                srrctl |= ((buf_size >> IXGBE_SRRCTL_BSIZEPKT_SHIFT) &
5676                           IXGBE_SRRCTL_BSIZEPKT_MASK);
5677
5678                /*
5679                 * VF modification to write virtual function SRRCTL register
5680                 */
5681                IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(i), srrctl);
5682
5683                buf_size = (uint16_t) ((srrctl & IXGBE_SRRCTL_BSIZEPKT_MASK) <<
5684                                       IXGBE_SRRCTL_BSIZEPKT_SHIFT);
5685
5686                if (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ||
5687                    /* It adds dual VLAN length for supporting dual VLAN */
5688                    (rxmode->max_rx_pkt_len +
5689                                2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {
5690                        if (!dev->data->scattered_rx)
5691                                PMD_INIT_LOG(DEBUG, "forcing scatter mode");
5692                        dev->data->scattered_rx = 1;
5693                }
5694
5695                if (rxq->offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
5696                        rxmode->offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
5697        }
5698
5699        /* Set RQPL for VF RSS according to max Rx queue */
5700        psrtype |= (dev->data->nb_rx_queues >> 1) <<
5701                IXGBE_PSRTYPE_RQPL_SHIFT;
5702        IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
5703
5704        ixgbe_set_rx_function(dev);
5705
5706        return 0;
5707}
5708
5709/*
5710 * [VF] Initializes Transmit Unit.
5711 */
5712void __rte_cold
5713ixgbevf_dev_tx_init(struct rte_eth_dev *dev)
5714{
5715        struct ixgbe_hw     *hw;
5716        struct ixgbe_tx_queue *txq;
5717        uint64_t bus_addr;
5718        uint32_t txctrl;
5719        uint16_t i;
5720
5721        PMD_INIT_FUNC_TRACE();
5722        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5723
5724        /* Setup the Base and Length of the Tx Descriptor Rings */
5725        for (i = 0; i < dev->data->nb_tx_queues; i++) {
5726                txq = dev->data->tx_queues[i];
5727                bus_addr = txq->tx_ring_phys_addr;
5728                IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(i),
5729                                (uint32_t)(bus_addr & 0x00000000ffffffffULL));
5730                IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(i),
5731                                (uint32_t)(bus_addr >> 32));
5732                IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(i),
5733                                txq->nb_tx_desc * sizeof(union ixgbe_adv_tx_desc));
5734                /* Setup the HW Tx Head and TX Tail descriptor pointers */
5735                IXGBE_WRITE_REG(hw, IXGBE_VFTDH(i), 0);
5736                IXGBE_WRITE_REG(hw, IXGBE_VFTDT(i), 0);
5737
5738                /*
5739                 * Disable Tx Head Writeback RO bit, since this hoses
5740                 * bookkeeping if things aren't delivered in order.
5741                 */
5742                txctrl = IXGBE_READ_REG(hw,
5743                                IXGBE_VFDCA_TXCTRL(i));
5744                txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
5745                IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(i),
5746                                txctrl);
5747        }
5748}
5749
5750/*
5751 * [VF] Start Transmit and Receive Units.
5752 */
5753void __rte_cold
5754ixgbevf_dev_rxtx_start(struct rte_eth_dev *dev)
5755{
5756        struct ixgbe_hw     *hw;
5757        struct ixgbe_tx_queue *txq;
5758        struct ixgbe_rx_queue *rxq;
5759        uint32_t txdctl;
5760        uint32_t rxdctl;
5761        uint16_t i;
5762        int poll_ms;
5763
5764        PMD_INIT_FUNC_TRACE();
5765        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5766
5767        for (i = 0; i < dev->data->nb_tx_queues; i++) {
5768                txq = dev->data->tx_queues[i];
5769                /* Setup Transmit Threshold Registers */
5770                txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5771                txdctl |= txq->pthresh & 0x7F;
5772                txdctl |= ((txq->hthresh & 0x7F) << 8);
5773                txdctl |= ((txq->wthresh & 0x7F) << 16);
5774                IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5775        }
5776
5777        for (i = 0; i < dev->data->nb_tx_queues; i++) {
5778
5779                txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5780                txdctl |= IXGBE_TXDCTL_ENABLE;
5781                IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), txdctl);
5782
5783                poll_ms = 10;
5784                /* Wait until TX Enable ready */
5785                do {
5786                        rte_delay_ms(1);
5787                        txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
5788                } while (--poll_ms && !(txdctl & IXGBE_TXDCTL_ENABLE));
5789                if (!poll_ms)
5790                        PMD_INIT_LOG(ERR, "Could not enable Tx Queue %d", i);
5791        }
5792        for (i = 0; i < dev->data->nb_rx_queues; i++) {
5793
5794                rxq = dev->data->rx_queues[i];
5795
5796                rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5797                rxdctl |= IXGBE_RXDCTL_ENABLE;
5798                IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), rxdctl);
5799
5800                /* Wait until RX Enable ready */
5801                poll_ms = 10;
5802                do {
5803                        rte_delay_ms(1);
5804                        rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
5805                } while (--poll_ms && !(rxdctl & IXGBE_RXDCTL_ENABLE));
5806                if (!poll_ms)
5807                        PMD_INIT_LOG(ERR, "Could not enable Rx Queue %d", i);
5808                rte_wmb();
5809                IXGBE_WRITE_REG(hw, IXGBE_VFRDT(i), rxq->nb_rx_desc - 1);
5810
5811        }
5812}
5813
5814int
5815ixgbe_rss_conf_init(struct ixgbe_rte_flow_rss_conf *out,
5816                    const struct rte_flow_action_rss *in)
5817{
5818        if (in->key_len > RTE_DIM(out->key) ||
5819            in->queue_num > RTE_DIM(out->queue))
5820                return -EINVAL;
5821        out->conf = (struct rte_flow_action_rss){
5822                .func = in->func,
5823                .level = in->level,
5824                .types = in->types,
5825                .key_len = in->key_len,
5826                .queue_num = in->queue_num,
5827                .key = memcpy(out->key, in->key, in->key_len),
5828                .queue = memcpy(out->queue, in->queue,
5829                                sizeof(*in->queue) * in->queue_num),
5830        };
5831        return 0;
5832}
5833
5834int
5835ixgbe_action_rss_same(const struct rte_flow_action_rss *comp,
5836                      const struct rte_flow_action_rss *with)
5837{
5838        return (comp->func == with->func &&
5839                comp->level == with->level &&
5840                comp->types == with->types &&
5841                comp->key_len == with->key_len &&
5842                comp->queue_num == with->queue_num &&
5843                !memcmp(comp->key, with->key, with->key_len) &&
5844                !memcmp(comp->queue, with->queue,
5845                        sizeof(*with->queue) * with->queue_num));
5846}
5847
5848int
5849ixgbe_config_rss_filter(struct rte_eth_dev *dev,
5850                struct ixgbe_rte_flow_rss_conf *conf, bool add)
5851{
5852        struct ixgbe_hw *hw;
5853        uint32_t reta;
5854        uint16_t i;
5855        uint16_t j;
5856        uint16_t sp_reta_size;
5857        uint32_t reta_reg;
5858        struct rte_eth_rss_conf rss_conf = {
5859                .rss_key = conf->conf.key_len ?
5860                        (void *)(uintptr_t)conf->conf.key : NULL,
5861                .rss_key_len = conf->conf.key_len,
5862                .rss_hf = conf->conf.types,
5863        };
5864        struct ixgbe_filter_info *filter_info =
5865                IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
5866
5867        PMD_INIT_FUNC_TRACE();
5868        hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
5869
5870        sp_reta_size = ixgbe_reta_size_get(hw->mac.type);
5871
5872        if (!add) {
5873                if (ixgbe_action_rss_same(&filter_info->rss_info.conf,
5874                                          &conf->conf)) {
5875                        ixgbe_rss_disable(dev);
5876                        memset(&filter_info->rss_info, 0,
5877                                sizeof(struct ixgbe_rte_flow_rss_conf));
5878                        return 0;
5879                }
5880                return -EINVAL;
5881        }
5882
5883        if (filter_info->rss_info.conf.queue_num)
5884                return -EINVAL;
5885        /* Fill in redirection table
5886         * The byte-swap is needed because NIC registers are in
5887         * little-endian order.
5888         */
5889        reta = 0;
5890        for (i = 0, j = 0; i < sp_reta_size; i++, j++) {
5891                reta_reg = ixgbe_reta_reg_get(hw->mac.type, i);
5892
5893                if (j == conf->conf.queue_num)
5894                        j = 0;
5895                reta = (reta << 8) | conf->conf.queue[j];
5896                if ((i & 3) == 3)
5897                        IXGBE_WRITE_REG(hw, reta_reg,
5898                                        rte_bswap32(reta));
5899        }
5900
5901        /* Configure the RSS key and the RSS protocols used to compute
5902         * the RSS hash of input packets.
5903         */
5904        if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
5905                ixgbe_rss_disable(dev);
5906                return 0;
5907        }
5908        if (rss_conf.rss_key == NULL)
5909                rss_conf.rss_key = rss_intel_key; /* Default hash key */
5910        ixgbe_hw_rss_hash_set(hw, &rss_conf);
5911
5912        if (ixgbe_rss_conf_init(&filter_info->rss_info, &conf->conf))
5913                return -EINVAL;
5914
5915        return 0;
5916}
5917
5918/* Stubs needed for linkage when CONFIG_RTE_ARCH_PPC_64 is set */
5919#if defined(RTE_ARCH_PPC_64)
5920int
5921ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
5922{
5923        return -1;
5924}
5925
5926uint16_t
5927ixgbe_recv_pkts_vec(
5928        void __rte_unused *rx_queue,
5929        struct rte_mbuf __rte_unused **rx_pkts,
5930        uint16_t __rte_unused nb_pkts)
5931{
5932        return 0;
5933}
5934
5935uint16_t
5936ixgbe_recv_scattered_pkts_vec(
5937        void __rte_unused *rx_queue,
5938        struct rte_mbuf __rte_unused **rx_pkts,
5939        uint16_t __rte_unused nb_pkts)
5940{
5941        return 0;
5942}
5943
5944int
5945ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
5946{
5947        return -1;
5948}
5949
5950uint16_t
5951ixgbe_xmit_fixed_burst_vec(void __rte_unused *tx_queue,
5952                struct rte_mbuf __rte_unused **tx_pkts,
5953                uint16_t __rte_unused nb_pkts)
5954{
5955        return 0;
5956}
5957
5958int
5959ixgbe_txq_vec_setup(struct ixgbe_tx_queue __rte_unused *txq)
5960{
5961        return -1;
5962}
5963
5964void
5965ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue __rte_unused *rxq)
5966{
5967        return;
5968}
5969#endif
5970