dpdk/drivers/net/i40e/i40e_rxtx.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2016 Intel Corporation
   3 */
   4
   5#include <stdio.h>
   6#include <stdlib.h>
   7#include <string.h>
   8#include <errno.h>
   9#include <stdint.h>
  10#include <stdarg.h>
  11#include <unistd.h>
  12#include <inttypes.h>
  13#include <sys/queue.h>
  14
  15#include <rte_string_fns.h>
  16#include <rte_memzone.h>
  17#include <rte_mbuf.h>
  18#include <rte_malloc.h>
  19#include <rte_ether.h>
  20#include <ethdev_driver.h>
  21#include <rte_tcp.h>
  22#include <rte_sctp.h>
  23#include <rte_udp.h>
  24#include <rte_ip.h>
  25#include <rte_net.h>
  26#include <rte_vect.h>
  27
  28#include "i40e_logs.h"
  29#include "base/i40e_prototype.h"
  30#include "base/i40e_type.h"
  31#include "i40e_ethdev.h"
  32#include "i40e_rxtx.h"
  33
  34#define DEFAULT_TX_RS_THRESH   32
  35#define DEFAULT_TX_FREE_THRESH 32
  36
  37#define I40E_TX_MAX_BURST  32
  38
  39#define I40E_DMA_MEM_ALIGN 4096
  40
  41/* Base address of the HW descriptor ring should be 128B aligned. */
  42#define I40E_RING_BASE_ALIGN    128
  43
  44#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
  45
  46#ifdef RTE_LIBRTE_IEEE1588
  47#define I40E_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
  48#else
  49#define I40E_TX_IEEE1588_TMST 0
  50#endif
  51
  52#define I40E_TX_CKSUM_OFFLOAD_MASK (             \
  53                PKT_TX_IP_CKSUM |                \
  54                PKT_TX_L4_MASK |                 \
  55                PKT_TX_TCP_SEG |                 \
  56                PKT_TX_OUTER_IP_CKSUM)
  57
  58#define I40E_TX_OFFLOAD_MASK (  \
  59                PKT_TX_OUTER_IPV4 |     \
  60                PKT_TX_OUTER_IPV6 |     \
  61                PKT_TX_IPV4 |           \
  62                PKT_TX_IPV6 |           \
  63                PKT_TX_IP_CKSUM |       \
  64                PKT_TX_L4_MASK |        \
  65                PKT_TX_OUTER_IP_CKSUM | \
  66                PKT_TX_TCP_SEG |        \
  67                PKT_TX_QINQ_PKT |       \
  68                PKT_TX_VLAN_PKT |       \
  69                PKT_TX_TUNNEL_MASK |    \
  70                I40E_TX_IEEE1588_TMST)
  71
  72#define I40E_TX_OFFLOAD_NOTSUP_MASK \
  73                (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK)
  74
  75#define I40E_TX_OFFLOAD_SIMPLE_SUP_MASK ( \
  76                PKT_TX_IPV4 | \
  77                PKT_TX_IPV6 | \
  78                PKT_TX_OUTER_IPV4 | \
  79                PKT_TX_OUTER_IPV6)
  80
  81#define I40E_TX_OFFLOAD_SIMPLE_NOTSUP_MASK \
  82                (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_SIMPLE_SUP_MASK)
  83
  84static int
  85i40e_monitor_callback(const uint64_t value,
  86                const uint64_t arg[RTE_POWER_MONITOR_OPAQUE_SZ] __rte_unused)
  87{
  88        const uint64_t m = rte_cpu_to_le_64(1 << I40E_RX_DESC_STATUS_DD_SHIFT);
  89        /*
  90         * we expect the DD bit to be set to 1 if this descriptor was already
  91         * written to.
  92         */
  93        return (value & m) == m ? -1 : 0;
  94}
  95
  96int
  97i40e_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
  98{
  99        struct i40e_rx_queue *rxq = rx_queue;
 100        volatile union i40e_rx_desc *rxdp;
 101        uint16_t desc;
 102
 103        desc = rxq->rx_tail;
 104        rxdp = &rxq->rx_ring[desc];
 105        /* watch for changes in status bit */
 106        pmc->addr = &rxdp->wb.qword1.status_error_len;
 107
 108        /* comparison callback */
 109        pmc->fn = i40e_monitor_callback;
 110
 111        /* registers are 64-bit */
 112        pmc->size = sizeof(uint64_t);
 113
 114        return 0;
 115}
 116
 117static inline void
 118i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp)
 119{
 120        if (rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
 121                (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
 122                mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
 123                mb->vlan_tci =
 124                        rte_le_to_cpu_16(rxdp->wb.qword0.lo_dword.l2tag1);
 125                PMD_RX_LOG(DEBUG, "Descriptor l2tag1: %u",
 126                           rte_le_to_cpu_16(rxdp->wb.qword0.lo_dword.l2tag1));
 127        } else {
 128                mb->vlan_tci = 0;
 129        }
 130#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
 131        if (rte_le_to_cpu_16(rxdp->wb.qword2.ext_status) &
 132                (1 << I40E_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) {
 133                mb->ol_flags |= PKT_RX_QINQ_STRIPPED | PKT_RX_QINQ |
 134                        PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN;
 135                mb->vlan_tci_outer = mb->vlan_tci;
 136                mb->vlan_tci = rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_2);
 137                PMD_RX_LOG(DEBUG, "Descriptor l2tag2_1: %u, l2tag2_2: %u",
 138                           rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_1),
 139                           rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_2));
 140        } else {
 141                mb->vlan_tci_outer = 0;
 142        }
 143#endif
 144        PMD_RX_LOG(DEBUG, "Mbuf vlan_tci: %u, vlan_tci_outer: %u",
 145                   mb->vlan_tci, mb->vlan_tci_outer);
 146}
 147
 148/* Translate the rx descriptor status to pkt flags */
 149static inline uint64_t
 150i40e_rxd_status_to_pkt_flags(uint64_t qword)
 151{
 152        uint64_t flags;
 153
 154        /* Check if RSS_HASH */
 155        flags = (((qword >> I40E_RX_DESC_STATUS_FLTSTAT_SHIFT) &
 156                                        I40E_RX_DESC_FLTSTAT_RSS_HASH) ==
 157                        I40E_RX_DESC_FLTSTAT_RSS_HASH) ? PKT_RX_RSS_HASH : 0;
 158
 159        /* Check if FDIR Match */
 160        flags |= (qword & (1 << I40E_RX_DESC_STATUS_FLM_SHIFT) ?
 161                                                        PKT_RX_FDIR : 0);
 162
 163        return flags;
 164}
 165
 166static inline uint64_t
 167i40e_rxd_error_to_pkt_flags(uint64_t qword)
 168{
 169        uint64_t flags = 0;
 170        uint64_t error_bits = (qword >> I40E_RXD_QW1_ERROR_SHIFT);
 171
 172#define I40E_RX_ERR_BITS 0x3f
 173        if (likely((error_bits & I40E_RX_ERR_BITS) == 0)) {
 174                flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
 175                return flags;
 176        }
 177
 178        if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT)))
 179                flags |= PKT_RX_IP_CKSUM_BAD;
 180        else
 181                flags |= PKT_RX_IP_CKSUM_GOOD;
 182
 183        if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT)))
 184                flags |= PKT_RX_L4_CKSUM_BAD;
 185        else
 186                flags |= PKT_RX_L4_CKSUM_GOOD;
 187
 188        if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT)))
 189                flags |= PKT_RX_OUTER_IP_CKSUM_BAD;
 190
 191        return flags;
 192}
 193
 194/* Function to check and set the ieee1588 timesync index and get the
 195 * appropriate flags.
 196 */
 197#ifdef RTE_LIBRTE_IEEE1588
 198static inline uint64_t
 199i40e_get_iee15888_flags(struct rte_mbuf *mb, uint64_t qword)
 200{
 201        uint64_t pkt_flags = 0;
 202        uint16_t tsyn = (qword & (I40E_RXD_QW1_STATUS_TSYNVALID_MASK
 203                                  | I40E_RXD_QW1_STATUS_TSYNINDX_MASK))
 204                                    >> I40E_RX_DESC_STATUS_TSYNINDX_SHIFT;
 205
 206        if ((mb->packet_type & RTE_PTYPE_L2_MASK)
 207                        == RTE_PTYPE_L2_ETHER_TIMESYNC)
 208                pkt_flags = PKT_RX_IEEE1588_PTP;
 209        if (tsyn & 0x04) {
 210                pkt_flags |= PKT_RX_IEEE1588_TMST;
 211                mb->timesync = tsyn & 0x03;
 212        }
 213
 214        return pkt_flags;
 215}
 216#endif
 217
 218static inline uint64_t
 219i40e_rxd_build_fdir(volatile union i40e_rx_desc *rxdp, struct rte_mbuf *mb)
 220{
 221        uint64_t flags = 0;
 222#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
 223        uint16_t flexbh, flexbl;
 224
 225        flexbh = (rte_le_to_cpu_32(rxdp->wb.qword2.ext_status) >>
 226                I40E_RX_DESC_EXT_STATUS_FLEXBH_SHIFT) &
 227                I40E_RX_DESC_EXT_STATUS_FLEXBH_MASK;
 228        flexbl = (rte_le_to_cpu_32(rxdp->wb.qword2.ext_status) >>
 229                I40E_RX_DESC_EXT_STATUS_FLEXBL_SHIFT) &
 230                I40E_RX_DESC_EXT_STATUS_FLEXBL_MASK;
 231
 232
 233        if (flexbh == I40E_RX_DESC_EXT_STATUS_FLEXBH_FD_ID) {
 234                mb->hash.fdir.hi =
 235                        rte_le_to_cpu_32(rxdp->wb.qword3.hi_dword.fd_id);
 236                flags |= PKT_RX_FDIR_ID;
 237        } else if (flexbh == I40E_RX_DESC_EXT_STATUS_FLEXBH_FLEX) {
 238                mb->hash.fdir.hi =
 239                        rte_le_to_cpu_32(rxdp->wb.qword3.hi_dword.flex_bytes_hi);
 240                flags |= PKT_RX_FDIR_FLX;
 241        }
 242        if (flexbl == I40E_RX_DESC_EXT_STATUS_FLEXBL_FLEX) {
 243                mb->hash.fdir.lo =
 244                        rte_le_to_cpu_32(rxdp->wb.qword3.lo_dword.flex_bytes_lo);
 245                flags |= PKT_RX_FDIR_FLX;
 246        }
 247#else
 248        mb->hash.fdir.hi =
 249                rte_le_to_cpu_32(rxdp->wb.qword0.hi_dword.fd_id);
 250        flags |= PKT_RX_FDIR_ID;
 251#endif
 252        return flags;
 253}
 254
 255static inline void
 256i40e_parse_tunneling_params(uint64_t ol_flags,
 257                            union i40e_tx_offload tx_offload,
 258                            uint32_t *cd_tunneling)
 259{
 260        /* EIPT: External (outer) IP header type */
 261        if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
 262                *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
 263        else if (ol_flags & PKT_TX_OUTER_IPV4)
 264                *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
 265        else if (ol_flags & PKT_TX_OUTER_IPV6)
 266                *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
 267
 268        /* EIPLEN: External (outer) IP header length, in DWords */
 269        *cd_tunneling |= (tx_offload.outer_l3_len >> 2) <<
 270                I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT;
 271
 272        /* L4TUNT: L4 Tunneling Type */
 273        switch (ol_flags & PKT_TX_TUNNEL_MASK) {
 274        case PKT_TX_TUNNEL_IPIP:
 275                /* for non UDP / GRE tunneling, set to 00b */
 276                break;
 277        case PKT_TX_TUNNEL_VXLAN:
 278        case PKT_TX_TUNNEL_GENEVE:
 279                *cd_tunneling |= I40E_TXD_CTX_UDP_TUNNELING;
 280                break;
 281        case PKT_TX_TUNNEL_GRE:
 282                *cd_tunneling |= I40E_TXD_CTX_GRE_TUNNELING;
 283                break;
 284        default:
 285                PMD_TX_LOG(ERR, "Tunnel type not supported");
 286                return;
 287        }
 288
 289        /* L4TUNLEN: L4 Tunneling Length, in Words
 290         *
 291         * We depend on app to set rte_mbuf.l2_len correctly.
 292         * For IP in GRE it should be set to the length of the GRE
 293         * header;
 294         * for MAC in GRE or MAC in UDP it should be set to the length
 295         * of the GRE or UDP headers plus the inner MAC up to including
 296         * its last Ethertype.
 297         */
 298        *cd_tunneling |= (tx_offload.l2_len >> 1) <<
 299                I40E_TXD_CTX_QW0_NATLEN_SHIFT;
 300}
 301
 302static inline void
 303i40e_txd_enable_checksum(uint64_t ol_flags,
 304                        uint32_t *td_cmd,
 305                        uint32_t *td_offset,
 306                        union i40e_tx_offload tx_offload)
 307{
 308        /* Set MACLEN */
 309        if (ol_flags & PKT_TX_TUNNEL_MASK)
 310                *td_offset |= (tx_offload.outer_l2_len >> 1)
 311                                << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
 312        else
 313                *td_offset |= (tx_offload.l2_len >> 1)
 314                        << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
 315
 316        /* Enable L3 checksum offloads */
 317        if (ol_flags & PKT_TX_IP_CKSUM) {
 318                *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
 319                *td_offset |= (tx_offload.l3_len >> 2)
 320                                << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
 321        } else if (ol_flags & PKT_TX_IPV4) {
 322                *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
 323                *td_offset |= (tx_offload.l3_len >> 2)
 324                                << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
 325        } else if (ol_flags & PKT_TX_IPV6) {
 326                *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
 327                *td_offset |= (tx_offload.l3_len >> 2)
 328                                << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
 329        }
 330
 331        if (ol_flags & PKT_TX_TCP_SEG) {
 332                *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
 333                *td_offset |= (tx_offload.l4_len >> 2)
 334                        << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
 335                return;
 336        }
 337
 338        /* Enable L4 checksum offloads */
 339        switch (ol_flags & PKT_TX_L4_MASK) {
 340        case PKT_TX_TCP_CKSUM:
 341                *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
 342                *td_offset |= (sizeof(struct rte_tcp_hdr) >> 2) <<
 343                                I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
 344                break;
 345        case PKT_TX_SCTP_CKSUM:
 346                *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
 347                *td_offset |= (sizeof(struct rte_sctp_hdr) >> 2) <<
 348                                I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
 349                break;
 350        case PKT_TX_UDP_CKSUM:
 351                *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
 352                *td_offset |= (sizeof(struct rte_udp_hdr) >> 2) <<
 353                                I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
 354                break;
 355        default:
 356                break;
 357        }
 358}
 359
 360/* Construct the tx flags */
 361static inline uint64_t
 362i40e_build_ctob(uint32_t td_cmd,
 363                uint32_t td_offset,
 364                unsigned int size,
 365                uint32_t td_tag)
 366{
 367        return rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DATA |
 368                        ((uint64_t)td_cmd  << I40E_TXD_QW1_CMD_SHIFT) |
 369                        ((uint64_t)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
 370                        ((uint64_t)size  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
 371                        ((uint64_t)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
 372}
 373
 374static inline int
 375i40e_xmit_cleanup(struct i40e_tx_queue *txq)
 376{
 377        struct i40e_tx_entry *sw_ring = txq->sw_ring;
 378        volatile struct i40e_tx_desc *txd = txq->tx_ring;
 379        uint16_t last_desc_cleaned = txq->last_desc_cleaned;
 380        uint16_t nb_tx_desc = txq->nb_tx_desc;
 381        uint16_t desc_to_clean_to;
 382        uint16_t nb_tx_to_clean;
 383
 384        desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
 385        if (desc_to_clean_to >= nb_tx_desc)
 386                desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
 387
 388        desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
 389        if ((txd[desc_to_clean_to].cmd_type_offset_bsz &
 390                        rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
 391                        rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)) {
 392                PMD_TX_LOG(DEBUG, "TX descriptor %4u is not done "
 393                           "(port=%d queue=%d)", desc_to_clean_to,
 394                           txq->port_id, txq->queue_id);
 395                return -1;
 396        }
 397
 398        if (last_desc_cleaned > desc_to_clean_to)
 399                nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
 400                                                        desc_to_clean_to);
 401        else
 402                nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
 403                                        last_desc_cleaned);
 404
 405        txd[desc_to_clean_to].cmd_type_offset_bsz = 0;
 406
 407        txq->last_desc_cleaned = desc_to_clean_to;
 408        txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
 409
 410        return 0;
 411}
 412
 413static inline int
 414#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
 415check_rx_burst_bulk_alloc_preconditions(struct i40e_rx_queue *rxq)
 416#else
 417check_rx_burst_bulk_alloc_preconditions(__rte_unused struct i40e_rx_queue *rxq)
 418#endif
 419{
 420        int ret = 0;
 421
 422#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
 423        if (!(rxq->rx_free_thresh >= RTE_PMD_I40E_RX_MAX_BURST)) {
 424                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
 425                             "rxq->rx_free_thresh=%d, "
 426                             "RTE_PMD_I40E_RX_MAX_BURST=%d",
 427                             rxq->rx_free_thresh, RTE_PMD_I40E_RX_MAX_BURST);
 428                ret = -EINVAL;
 429        } else if (!(rxq->rx_free_thresh < rxq->nb_rx_desc)) {
 430                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
 431                             "rxq->rx_free_thresh=%d, "
 432                             "rxq->nb_rx_desc=%d",
 433                             rxq->rx_free_thresh, rxq->nb_rx_desc);
 434                ret = -EINVAL;
 435        } else if (rxq->nb_rx_desc % rxq->rx_free_thresh != 0) {
 436                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: "
 437                             "rxq->nb_rx_desc=%d, "
 438                             "rxq->rx_free_thresh=%d",
 439                             rxq->nb_rx_desc, rxq->rx_free_thresh);
 440                ret = -EINVAL;
 441        }
 442#else
 443        ret = -EINVAL;
 444#endif
 445
 446        return ret;
 447}
 448
 449#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
 450#define I40E_LOOK_AHEAD 8
 451#if (I40E_LOOK_AHEAD != 8)
 452#error "PMD I40E: I40E_LOOK_AHEAD must be 8\n"
 453#endif
 454static inline int
 455i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
 456{
 457        volatile union i40e_rx_desc *rxdp;
 458        struct i40e_rx_entry *rxep;
 459        struct rte_mbuf *mb;
 460        uint16_t pkt_len;
 461        uint64_t qword1;
 462        uint32_t rx_status;
 463        int32_t s[I40E_LOOK_AHEAD], var, nb_dd;
 464        int32_t i, j, nb_rx = 0;
 465        uint64_t pkt_flags;
 466        uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
 467
 468        rxdp = &rxq->rx_ring[rxq->rx_tail];
 469        rxep = &rxq->sw_ring[rxq->rx_tail];
 470
 471        qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
 472        rx_status = (qword1 & I40E_RXD_QW1_STATUS_MASK) >>
 473                                I40E_RXD_QW1_STATUS_SHIFT;
 474
 475        /* Make sure there is at least 1 packet to receive */
 476        if (!(rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)))
 477                return 0;
 478
 479        /**
 480         * Scan LOOK_AHEAD descriptors at a time to determine which
 481         * descriptors reference packets that are ready to be received.
 482         */
 483        for (i = 0; i < RTE_PMD_I40E_RX_MAX_BURST; i+=I40E_LOOK_AHEAD,
 484                        rxdp += I40E_LOOK_AHEAD, rxep += I40E_LOOK_AHEAD) {
 485                /* Read desc statuses backwards to avoid race condition */
 486                for (j = I40E_LOOK_AHEAD - 1; j >= 0; j--) {
 487                        qword1 = rte_le_to_cpu_64(\
 488                                rxdp[j].wb.qword1.status_error_len);
 489                        s[j] = (qword1 & I40E_RXD_QW1_STATUS_MASK) >>
 490                                        I40E_RXD_QW1_STATUS_SHIFT;
 491                }
 492
 493                /* This barrier is to order loads of different words in the descriptor */
 494                rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
 495
 496                /* Compute how many status bits were set */
 497                for (j = 0, nb_dd = 0; j < I40E_LOOK_AHEAD; j++) {
 498                        var = s[j] & (1 << I40E_RX_DESC_STATUS_DD_SHIFT);
 499#ifdef RTE_ARCH_ARM
 500                        /* For Arm platforms, only compute continuous status bits */
 501                        if (var)
 502                                nb_dd += 1;
 503                        else
 504                                break;
 505#else
 506                        nb_dd += var;
 507#endif
 508                }
 509
 510                nb_rx += nb_dd;
 511
 512                /* Translate descriptor info to mbuf parameters */
 513                for (j = 0; j < nb_dd; j++) {
 514                        mb = rxep[j].mbuf;
 515                        qword1 = rte_le_to_cpu_64(\
 516                                rxdp[j].wb.qword1.status_error_len);
 517                        pkt_len = ((qword1 & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
 518                                I40E_RXD_QW1_LENGTH_PBUF_SHIFT) - rxq->crc_len;
 519                        mb->data_len = pkt_len;
 520                        mb->pkt_len = pkt_len;
 521                        mb->ol_flags = 0;
 522                        i40e_rxd_to_vlan_tci(mb, &rxdp[j]);
 523                        pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
 524                        pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
 525                        mb->packet_type =
 526                                ptype_tbl[(uint8_t)((qword1 &
 527                                I40E_RXD_QW1_PTYPE_MASK) >>
 528                                I40E_RXD_QW1_PTYPE_SHIFT)];
 529                        if (pkt_flags & PKT_RX_RSS_HASH)
 530                                mb->hash.rss = rte_le_to_cpu_32(\
 531                                        rxdp[j].wb.qword0.hi_dword.rss);
 532                        if (pkt_flags & PKT_RX_FDIR)
 533                                pkt_flags |= i40e_rxd_build_fdir(&rxdp[j], mb);
 534
 535#ifdef RTE_LIBRTE_IEEE1588
 536                        pkt_flags |= i40e_get_iee15888_flags(mb, qword1);
 537#endif
 538                        mb->ol_flags |= pkt_flags;
 539
 540                }
 541
 542                for (j = 0; j < I40E_LOOK_AHEAD; j++)
 543                        rxq->rx_stage[i + j] = rxep[j].mbuf;
 544
 545                if (nb_dd != I40E_LOOK_AHEAD)
 546                        break;
 547        }
 548
 549        /* Clear software ring entries */
 550        for (i = 0; i < nb_rx; i++)
 551                rxq->sw_ring[rxq->rx_tail + i].mbuf = NULL;
 552
 553        return nb_rx;
 554}
 555
 556static inline uint16_t
 557i40e_rx_fill_from_stage(struct i40e_rx_queue *rxq,
 558                        struct rte_mbuf **rx_pkts,
 559                        uint16_t nb_pkts)
 560{
 561        uint16_t i;
 562        struct rte_mbuf **stage = &rxq->rx_stage[rxq->rx_next_avail];
 563
 564        nb_pkts = (uint16_t)RTE_MIN(nb_pkts, rxq->rx_nb_avail);
 565
 566        for (i = 0; i < nb_pkts; i++)
 567                rx_pkts[i] = stage[i];
 568
 569        rxq->rx_nb_avail = (uint16_t)(rxq->rx_nb_avail - nb_pkts);
 570        rxq->rx_next_avail = (uint16_t)(rxq->rx_next_avail + nb_pkts);
 571
 572        return nb_pkts;
 573}
 574
 575static inline int
 576i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
 577{
 578        volatile union i40e_rx_desc *rxdp;
 579        struct i40e_rx_entry *rxep;
 580        struct rte_mbuf *mb;
 581        uint16_t alloc_idx, i;
 582        uint64_t dma_addr;
 583        int diag;
 584
 585        /* Allocate buffers in bulk */
 586        alloc_idx = (uint16_t)(rxq->rx_free_trigger -
 587                                (rxq->rx_free_thresh - 1));
 588        rxep = &(rxq->sw_ring[alloc_idx]);
 589        diag = rte_mempool_get_bulk(rxq->mp, (void *)rxep,
 590                                        rxq->rx_free_thresh);
 591        if (unlikely(diag != 0)) {
 592                PMD_DRV_LOG(ERR, "Failed to get mbufs in bulk");
 593                return -ENOMEM;
 594        }
 595
 596        rxdp = &rxq->rx_ring[alloc_idx];
 597        for (i = 0; i < rxq->rx_free_thresh; i++) {
 598                if (likely(i < (rxq->rx_free_thresh - 1)))
 599                        /* Prefetch next mbuf */
 600                        rte_prefetch0(rxep[i + 1].mbuf);
 601
 602                mb = rxep[i].mbuf;
 603                rte_mbuf_refcnt_set(mb, 1);
 604                mb->next = NULL;
 605                mb->data_off = RTE_PKTMBUF_HEADROOM;
 606                mb->nb_segs = 1;
 607                mb->port = rxq->port_id;
 608                dma_addr = rte_cpu_to_le_64(\
 609                        rte_mbuf_data_iova_default(mb));
 610                rxdp[i].read.hdr_addr = 0;
 611                rxdp[i].read.pkt_addr = dma_addr;
 612        }
 613
 614        /* Update rx tail regsiter */
 615        I40E_PCI_REG_WRITE(rxq->qrx_tail, rxq->rx_free_trigger);
 616
 617        rxq->rx_free_trigger =
 618                (uint16_t)(rxq->rx_free_trigger + rxq->rx_free_thresh);
 619        if (rxq->rx_free_trigger >= rxq->nb_rx_desc)
 620                rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
 621
 622        return 0;
 623}
 624
 625static inline uint16_t
 626rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 627{
 628        struct i40e_rx_queue *rxq = (struct i40e_rx_queue *)rx_queue;
 629        struct rte_eth_dev *dev;
 630        uint16_t nb_rx = 0;
 631
 632        if (!nb_pkts)
 633                return 0;
 634
 635        if (rxq->rx_nb_avail)
 636                return i40e_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
 637
 638        nb_rx = (uint16_t)i40e_rx_scan_hw_ring(rxq);
 639        rxq->rx_next_avail = 0;
 640        rxq->rx_nb_avail = nb_rx;
 641        rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx);
 642
 643        if (rxq->rx_tail > rxq->rx_free_trigger) {
 644                if (i40e_rx_alloc_bufs(rxq) != 0) {
 645                        uint16_t i, j;
 646
 647                        dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
 648                        dev->data->rx_mbuf_alloc_failed +=
 649                                rxq->rx_free_thresh;
 650
 651                        rxq->rx_nb_avail = 0;
 652                        rxq->rx_tail = (uint16_t)(rxq->rx_tail - nb_rx);
 653                        for (i = 0, j = rxq->rx_tail; i < nb_rx; i++, j++)
 654                                rxq->sw_ring[j].mbuf = rxq->rx_stage[i];
 655
 656                        return 0;
 657                }
 658        }
 659
 660        if (rxq->rx_tail >= rxq->nb_rx_desc)
 661                rxq->rx_tail = 0;
 662
 663        if (rxq->rx_nb_avail)
 664                return i40e_rx_fill_from_stage(rxq, rx_pkts, nb_pkts);
 665
 666        return 0;
 667}
 668
 669static uint16_t
 670i40e_recv_pkts_bulk_alloc(void *rx_queue,
 671                          struct rte_mbuf **rx_pkts,
 672                          uint16_t nb_pkts)
 673{
 674        uint16_t nb_rx = 0, n, count;
 675
 676        if (unlikely(nb_pkts == 0))
 677                return 0;
 678
 679        if (likely(nb_pkts <= RTE_PMD_I40E_RX_MAX_BURST))
 680                return rx_recv_pkts(rx_queue, rx_pkts, nb_pkts);
 681
 682        while (nb_pkts) {
 683                n = RTE_MIN(nb_pkts, RTE_PMD_I40E_RX_MAX_BURST);
 684                count = rx_recv_pkts(rx_queue, &rx_pkts[nb_rx], n);
 685                nb_rx = (uint16_t)(nb_rx + count);
 686                nb_pkts = (uint16_t)(nb_pkts - count);
 687                if (count < n)
 688                        break;
 689        }
 690
 691        return nb_rx;
 692}
 693#else
 694static uint16_t
 695i40e_recv_pkts_bulk_alloc(void __rte_unused *rx_queue,
 696                          struct rte_mbuf __rte_unused **rx_pkts,
 697                          uint16_t __rte_unused nb_pkts)
 698{
 699        return 0;
 700}
 701#endif /* RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC */
 702
 703uint16_t
 704i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 705{
 706        struct i40e_rx_queue *rxq;
 707        volatile union i40e_rx_desc *rx_ring;
 708        volatile union i40e_rx_desc *rxdp;
 709        union i40e_rx_desc rxd;
 710        struct i40e_rx_entry *sw_ring;
 711        struct i40e_rx_entry *rxe;
 712        struct rte_eth_dev *dev;
 713        struct rte_mbuf *rxm;
 714        struct rte_mbuf *nmb;
 715        uint16_t nb_rx;
 716        uint32_t rx_status;
 717        uint64_t qword1;
 718        uint16_t rx_packet_len;
 719        uint16_t rx_id, nb_hold;
 720        uint64_t dma_addr;
 721        uint64_t pkt_flags;
 722        uint32_t *ptype_tbl;
 723
 724        nb_rx = 0;
 725        nb_hold = 0;
 726        rxq = rx_queue;
 727        rx_id = rxq->rx_tail;
 728        rx_ring = rxq->rx_ring;
 729        sw_ring = rxq->sw_ring;
 730        ptype_tbl = rxq->vsi->adapter->ptype_tbl;
 731
 732        while (nb_rx < nb_pkts) {
 733                rxdp = &rx_ring[rx_id];
 734                qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
 735                rx_status = (qword1 & I40E_RXD_QW1_STATUS_MASK)
 736                                >> I40E_RXD_QW1_STATUS_SHIFT;
 737
 738                /* Check the DD bit first */
 739                if (!(rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)))
 740                        break;
 741
 742                nmb = rte_mbuf_raw_alloc(rxq->mp);
 743                if (unlikely(!nmb)) {
 744                        dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
 745                        dev->data->rx_mbuf_alloc_failed++;
 746                        break;
 747                }
 748
 749                rxd = *rxdp;
 750                nb_hold++;
 751                rxe = &sw_ring[rx_id];
 752                rx_id++;
 753                if (unlikely(rx_id == rxq->nb_rx_desc))
 754                        rx_id = 0;
 755
 756                /* Prefetch next mbuf */
 757                rte_prefetch0(sw_ring[rx_id].mbuf);
 758
 759                /**
 760                 * When next RX descriptor is on a cache line boundary,
 761                 * prefetch the next 4 RX descriptors and next 8 pointers
 762                 * to mbufs.
 763                 */
 764                if ((rx_id & 0x3) == 0) {
 765                        rte_prefetch0(&rx_ring[rx_id]);
 766                        rte_prefetch0(&sw_ring[rx_id]);
 767                }
 768                rxm = rxe->mbuf;
 769                rxe->mbuf = nmb;
 770                dma_addr =
 771                        rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 772                rxdp->read.hdr_addr = 0;
 773                rxdp->read.pkt_addr = dma_addr;
 774
 775                rx_packet_len = ((qword1 & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
 776                                I40E_RXD_QW1_LENGTH_PBUF_SHIFT) - rxq->crc_len;
 777
 778                rxm->data_off = RTE_PKTMBUF_HEADROOM;
 779                rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM));
 780                rxm->nb_segs = 1;
 781                rxm->next = NULL;
 782                rxm->pkt_len = rx_packet_len;
 783                rxm->data_len = rx_packet_len;
 784                rxm->port = rxq->port_id;
 785                rxm->ol_flags = 0;
 786                i40e_rxd_to_vlan_tci(rxm, &rxd);
 787                pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
 788                pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
 789                rxm->packet_type =
 790                        ptype_tbl[(uint8_t)((qword1 &
 791                        I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT)];
 792                if (pkt_flags & PKT_RX_RSS_HASH)
 793                        rxm->hash.rss =
 794                                rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 795                if (pkt_flags & PKT_RX_FDIR)
 796                        pkt_flags |= i40e_rxd_build_fdir(&rxd, rxm);
 797
 798#ifdef RTE_LIBRTE_IEEE1588
 799                pkt_flags |= i40e_get_iee15888_flags(rxm, qword1);
 800#endif
 801                rxm->ol_flags |= pkt_flags;
 802
 803                rx_pkts[nb_rx++] = rxm;
 804        }
 805        rxq->rx_tail = rx_id;
 806
 807        /**
 808         * If the number of free RX descriptors is greater than the RX free
 809         * threshold of the queue, advance the receive tail register of queue.
 810         * Update that register with the value of the last processed RX
 811         * descriptor minus 1.
 812         */
 813        nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
 814        if (nb_hold > rxq->rx_free_thresh) {
 815                rx_id = (uint16_t) ((rx_id == 0) ?
 816                        (rxq->nb_rx_desc - 1) : (rx_id - 1));
 817                I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
 818                nb_hold = 0;
 819        }
 820        rxq->nb_rx_hold = nb_hold;
 821
 822        return nb_rx;
 823}
 824
 825uint16_t
 826i40e_recv_scattered_pkts(void *rx_queue,
 827                         struct rte_mbuf **rx_pkts,
 828                         uint16_t nb_pkts)
 829{
 830        struct i40e_rx_queue *rxq = rx_queue;
 831        volatile union i40e_rx_desc *rx_ring = rxq->rx_ring;
 832        volatile union i40e_rx_desc *rxdp;
 833        union i40e_rx_desc rxd;
 834        struct i40e_rx_entry *sw_ring = rxq->sw_ring;
 835        struct i40e_rx_entry *rxe;
 836        struct rte_mbuf *first_seg = rxq->pkt_first_seg;
 837        struct rte_mbuf *last_seg = rxq->pkt_last_seg;
 838        struct rte_mbuf *nmb, *rxm;
 839        uint16_t rx_id = rxq->rx_tail;
 840        uint16_t nb_rx = 0, nb_hold = 0, rx_packet_len;
 841        struct rte_eth_dev *dev;
 842        uint32_t rx_status;
 843        uint64_t qword1;
 844        uint64_t dma_addr;
 845        uint64_t pkt_flags;
 846        uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
 847
 848        while (nb_rx < nb_pkts) {
 849                rxdp = &rx_ring[rx_id];
 850                qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
 851                rx_status = (qword1 & I40E_RXD_QW1_STATUS_MASK) >>
 852                                        I40E_RXD_QW1_STATUS_SHIFT;
 853
 854                /* Check the DD bit */
 855                if (!(rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)))
 856                        break;
 857
 858                nmb = rte_mbuf_raw_alloc(rxq->mp);
 859                if (unlikely(!nmb)) {
 860                        dev = I40E_VSI_TO_ETH_DEV(rxq->vsi);
 861                        dev->data->rx_mbuf_alloc_failed++;
 862                        break;
 863                }
 864
 865                rxd = *rxdp;
 866                nb_hold++;
 867                rxe = &sw_ring[rx_id];
 868                rx_id++;
 869                if (rx_id == rxq->nb_rx_desc)
 870                        rx_id = 0;
 871
 872                /* Prefetch next mbuf */
 873                rte_prefetch0(sw_ring[rx_id].mbuf);
 874
 875                /**
 876                 * When next RX descriptor is on a cache line boundary,
 877                 * prefetch the next 4 RX descriptors and next 8 pointers
 878                 * to mbufs.
 879                 */
 880                if ((rx_id & 0x3) == 0) {
 881                        rte_prefetch0(&rx_ring[rx_id]);
 882                        rte_prefetch0(&sw_ring[rx_id]);
 883                }
 884
 885                rxm = rxe->mbuf;
 886                rxe->mbuf = nmb;
 887                dma_addr =
 888                        rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 889
 890                /* Set data buffer address and data length of the mbuf */
 891                rxdp->read.hdr_addr = 0;
 892                rxdp->read.pkt_addr = dma_addr;
 893                rx_packet_len = (qword1 & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
 894                                        I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
 895                rxm->data_len = rx_packet_len;
 896                rxm->data_off = RTE_PKTMBUF_HEADROOM;
 897
 898                /**
 899                 * If this is the first buffer of the received packet, set the
 900                 * pointer to the first mbuf of the packet and initialize its
 901                 * context. Otherwise, update the total length and the number
 902                 * of segments of the current scattered packet, and update the
 903                 * pointer to the last mbuf of the current packet.
 904                 */
 905                if (!first_seg) {
 906                        first_seg = rxm;
 907                        first_seg->nb_segs = 1;
 908                        first_seg->pkt_len = rx_packet_len;
 909                } else {
 910                        first_seg->pkt_len =
 911                                (uint16_t)(first_seg->pkt_len +
 912                                                rx_packet_len);
 913                        first_seg->nb_segs++;
 914                        last_seg->next = rxm;
 915                }
 916
 917                /**
 918                 * If this is not the last buffer of the received packet,
 919                 * update the pointer to the last mbuf of the current scattered
 920                 * packet and continue to parse the RX ring.
 921                 */
 922                if (!(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT))) {
 923                        last_seg = rxm;
 924                        continue;
 925                }
 926
 927                /**
 928                 * This is the last buffer of the received packet. If the CRC
 929                 * is not stripped by the hardware:
 930                 *  - Subtract the CRC length from the total packet length.
 931                 *  - If the last buffer only contains the whole CRC or a part
 932                 *  of it, free the mbuf associated to the last buffer. If part
 933                 *  of the CRC is also contained in the previous mbuf, subtract
 934                 *  the length of that CRC part from the data length of the
 935                 *  previous mbuf.
 936                 */
 937                rxm->next = NULL;
 938                if (unlikely(rxq->crc_len > 0)) {
 939                        first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
 940                        if (rx_packet_len <= RTE_ETHER_CRC_LEN) {
 941                                rte_pktmbuf_free_seg(rxm);
 942                                first_seg->nb_segs--;
 943                                last_seg->data_len =
 944                                        (uint16_t)(last_seg->data_len -
 945                                        (RTE_ETHER_CRC_LEN - rx_packet_len));
 946                                last_seg->next = NULL;
 947                        } else
 948                                rxm->data_len = (uint16_t)(rx_packet_len -
 949                                                        RTE_ETHER_CRC_LEN);
 950                }
 951
 952                first_seg->port = rxq->port_id;
 953                first_seg->ol_flags = 0;
 954                i40e_rxd_to_vlan_tci(first_seg, &rxd);
 955                pkt_flags = i40e_rxd_status_to_pkt_flags(qword1);
 956                pkt_flags |= i40e_rxd_error_to_pkt_flags(qword1);
 957                first_seg->packet_type =
 958                        ptype_tbl[(uint8_t)((qword1 &
 959                        I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT)];
 960                if (pkt_flags & PKT_RX_RSS_HASH)
 961                        first_seg->hash.rss =
 962                                rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
 963                if (pkt_flags & PKT_RX_FDIR)
 964                        pkt_flags |= i40e_rxd_build_fdir(&rxd, first_seg);
 965
 966#ifdef RTE_LIBRTE_IEEE1588
 967                pkt_flags |= i40e_get_iee15888_flags(first_seg, qword1);
 968#endif
 969                first_seg->ol_flags |= pkt_flags;
 970
 971                /* Prefetch data of first segment, if configured to do so. */
 972                rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr,
 973                        first_seg->data_off));
 974                rx_pkts[nb_rx++] = first_seg;
 975                first_seg = NULL;
 976        }
 977
 978        /* Record index of the next RX descriptor to probe. */
 979        rxq->rx_tail = rx_id;
 980        rxq->pkt_first_seg = first_seg;
 981        rxq->pkt_last_seg = last_seg;
 982
 983        /**
 984         * If the number of free RX descriptors is greater than the RX free
 985         * threshold of the queue, advance the Receive Descriptor Tail (RDT)
 986         * register. Update the RDT with the value of the last processed RX
 987         * descriptor minus 1, to guarantee that the RDT register is never
 988         * equal to the RDH register, which creates a "full" ring situtation
 989         * from the hardware point of view.
 990         */
 991        nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
 992        if (nb_hold > rxq->rx_free_thresh) {
 993                rx_id = (uint16_t)(rx_id == 0 ?
 994                        (rxq->nb_rx_desc - 1) : (rx_id - 1));
 995                I40E_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
 996                nb_hold = 0;
 997        }
 998        rxq->nb_rx_hold = nb_hold;
 999
1000        return nb_rx;
1001}
1002
1003/* Check if the context descriptor is needed for TX offloading */
1004static inline uint16_t
1005i40e_calc_context_desc(uint64_t flags)
1006{
1007        static uint64_t mask = PKT_TX_OUTER_IP_CKSUM |
1008                PKT_TX_TCP_SEG |
1009                PKT_TX_QINQ_PKT |
1010                PKT_TX_TUNNEL_MASK;
1011
1012#ifdef RTE_LIBRTE_IEEE1588
1013        mask |= PKT_TX_IEEE1588_TMST;
1014#endif
1015
1016        return (flags & mask) ? 1 : 0;
1017}
1018
1019/* set i40e TSO context descriptor */
1020static inline uint64_t
1021i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
1022{
1023        uint64_t ctx_desc = 0;
1024        uint32_t cd_cmd, hdr_len, cd_tso_len;
1025
1026        if (!tx_offload.l4_len) {
1027                PMD_DRV_LOG(DEBUG, "L4 length set to 0");
1028                return ctx_desc;
1029        }
1030
1031        hdr_len = tx_offload.l2_len + tx_offload.l3_len + tx_offload.l4_len;
1032        hdr_len += (mbuf->ol_flags & PKT_TX_TUNNEL_MASK) ?
1033                   tx_offload.outer_l2_len + tx_offload.outer_l3_len : 0;
1034
1035        cd_cmd = I40E_TX_CTX_DESC_TSO;
1036        cd_tso_len = mbuf->pkt_len - hdr_len;
1037        ctx_desc |= ((uint64_t)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
1038                ((uint64_t)cd_tso_len <<
1039                 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
1040                ((uint64_t)mbuf->tso_segsz <<
1041                 I40E_TXD_CTX_QW1_MSS_SHIFT);
1042
1043        return ctx_desc;
1044}
1045
1046/* HW requires that Tx buffer size ranges from 1B up to (16K-1)B. */
1047#define I40E_MAX_DATA_PER_TXD \
1048        (I40E_TXD_QW1_TX_BUF_SZ_MASK >> I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
1049/* Calculate the number of TX descriptors needed for each pkt */
1050static inline uint16_t
1051i40e_calc_pkt_desc(struct rte_mbuf *tx_pkt)
1052{
1053        struct rte_mbuf *txd = tx_pkt;
1054        uint16_t count = 0;
1055
1056        while (txd != NULL) {
1057                count += DIV_ROUND_UP(txd->data_len, I40E_MAX_DATA_PER_TXD);
1058                txd = txd->next;
1059        }
1060
1061        return count;
1062}
1063
1064uint16_t
1065i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
1066{
1067        struct i40e_tx_queue *txq;
1068        struct i40e_tx_entry *sw_ring;
1069        struct i40e_tx_entry *txe, *txn;
1070        volatile struct i40e_tx_desc *txd;
1071        volatile struct i40e_tx_desc *txr;
1072        struct rte_mbuf *tx_pkt;
1073        struct rte_mbuf *m_seg;
1074        uint32_t cd_tunneling_params;
1075        uint16_t tx_id;
1076        uint16_t nb_tx;
1077        uint32_t td_cmd;
1078        uint32_t td_offset;
1079        uint32_t td_tag;
1080        uint64_t ol_flags;
1081        uint16_t nb_used;
1082        uint16_t nb_ctx;
1083        uint16_t tx_last;
1084        uint16_t slen;
1085        uint64_t buf_dma_addr;
1086        union i40e_tx_offload tx_offload = {0};
1087
1088        txq = tx_queue;
1089        sw_ring = txq->sw_ring;
1090        txr = txq->tx_ring;
1091        tx_id = txq->tx_tail;
1092        txe = &sw_ring[tx_id];
1093
1094        /* Check if the descriptor ring needs to be cleaned. */
1095        if (txq->nb_tx_free < txq->tx_free_thresh)
1096                (void)i40e_xmit_cleanup(txq);
1097
1098        for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
1099                td_cmd = 0;
1100                td_tag = 0;
1101                td_offset = 0;
1102
1103                tx_pkt = *tx_pkts++;
1104                RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
1105
1106                ol_flags = tx_pkt->ol_flags;
1107                tx_offload.l2_len = tx_pkt->l2_len;
1108                tx_offload.l3_len = tx_pkt->l3_len;
1109                tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
1110                tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
1111                tx_offload.l4_len = tx_pkt->l4_len;
1112                tx_offload.tso_segsz = tx_pkt->tso_segsz;
1113
1114                /* Calculate the number of context descriptors needed. */
1115                nb_ctx = i40e_calc_context_desc(ol_flags);
1116
1117                /**
1118                 * The number of descriptors that must be allocated for
1119                 * a packet equals to the number of the segments of that
1120                 * packet plus 1 context descriptor if needed.
1121                 * Recalculate the needed tx descs when TSO enabled in case
1122                 * the mbuf data size exceeds max data size that hw allows
1123                 * per tx desc.
1124                 */
1125                if (ol_flags & PKT_TX_TCP_SEG)
1126                        nb_used = (uint16_t)(i40e_calc_pkt_desc(tx_pkt) +
1127                                             nb_ctx);
1128                else
1129                        nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
1130                tx_last = (uint16_t)(tx_id + nb_used - 1);
1131
1132                /* Circular ring */
1133                if (tx_last >= txq->nb_tx_desc)
1134                        tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
1135
1136                if (nb_used > txq->nb_tx_free) {
1137                        if (i40e_xmit_cleanup(txq) != 0) {
1138                                if (nb_tx == 0)
1139                                        return 0;
1140                                goto end_of_tx;
1141                        }
1142                        if (unlikely(nb_used > txq->tx_rs_thresh)) {
1143                                while (nb_used > txq->nb_tx_free) {
1144                                        if (i40e_xmit_cleanup(txq) != 0) {
1145                                                if (nb_tx == 0)
1146                                                        return 0;
1147                                                goto end_of_tx;
1148                                        }
1149                                }
1150                        }
1151                }
1152
1153                /* Descriptor based VLAN insertion */
1154                if (ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_QINQ_PKT)) {
1155                        td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
1156                        td_tag = tx_pkt->vlan_tci;
1157                }
1158
1159                /* Always enable CRC offload insertion */
1160                td_cmd |= I40E_TX_DESC_CMD_ICRC;
1161
1162                /* Fill in tunneling parameters if necessary */
1163                cd_tunneling_params = 0;
1164                if (ol_flags & PKT_TX_TUNNEL_MASK)
1165                        i40e_parse_tunneling_params(ol_flags, tx_offload,
1166                                                    &cd_tunneling_params);
1167                /* Enable checksum offloading */
1168                if (ol_flags & I40E_TX_CKSUM_OFFLOAD_MASK)
1169                        i40e_txd_enable_checksum(ol_flags, &td_cmd,
1170                                                 &td_offset, tx_offload);
1171
1172                if (nb_ctx) {
1173                        /* Setup TX context descriptor if required */
1174                        volatile struct i40e_tx_context_desc *ctx_txd =
1175                                (volatile struct i40e_tx_context_desc *)\
1176                                                        &txr[tx_id];
1177                        uint16_t cd_l2tag2 = 0;
1178                        uint64_t cd_type_cmd_tso_mss =
1179                                I40E_TX_DESC_DTYPE_CONTEXT;
1180
1181                        txn = &sw_ring[txe->next_id];
1182                        RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
1183                        if (txe->mbuf != NULL) {
1184                                rte_pktmbuf_free_seg(txe->mbuf);
1185                                txe->mbuf = NULL;
1186                        }
1187
1188                        /* TSO enabled means no timestamp */
1189                        if (ol_flags & PKT_TX_TCP_SEG)
1190                                cd_type_cmd_tso_mss |=
1191                                        i40e_set_tso_ctx(tx_pkt, tx_offload);
1192                        else {
1193#ifdef RTE_LIBRTE_IEEE1588
1194                                if (ol_flags & PKT_TX_IEEE1588_TMST)
1195                                        cd_type_cmd_tso_mss |=
1196                                                ((uint64_t)I40E_TX_CTX_DESC_TSYN <<
1197                                                 I40E_TXD_CTX_QW1_CMD_SHIFT);
1198#endif
1199                        }
1200
1201                        ctx_txd->tunneling_params =
1202                                rte_cpu_to_le_32(cd_tunneling_params);
1203                        if (ol_flags & PKT_TX_QINQ_PKT) {
1204                                cd_l2tag2 = tx_pkt->vlan_tci_outer;
1205                                cd_type_cmd_tso_mss |=
1206                                        ((uint64_t)I40E_TX_CTX_DESC_IL2TAG2 <<
1207                                                I40E_TXD_CTX_QW1_CMD_SHIFT);
1208                        }
1209                        ctx_txd->l2tag2 = rte_cpu_to_le_16(cd_l2tag2);
1210                        ctx_txd->type_cmd_tso_mss =
1211                                rte_cpu_to_le_64(cd_type_cmd_tso_mss);
1212
1213                        PMD_TX_LOG(DEBUG, "mbuf: %p, TCD[%u]:\n"
1214                                "tunneling_params: %#x;\n"
1215                                "l2tag2: %#hx;\n"
1216                                "rsvd: %#hx;\n"
1217                                "type_cmd_tso_mss: %#"PRIx64";\n",
1218                                tx_pkt, tx_id,
1219                                ctx_txd->tunneling_params,
1220                                ctx_txd->l2tag2,
1221                                ctx_txd->rsvd,
1222                                ctx_txd->type_cmd_tso_mss);
1223
1224                        txe->last_id = tx_last;
1225                        tx_id = txe->next_id;
1226                        txe = txn;
1227                }
1228
1229                m_seg = tx_pkt;
1230                do {
1231                        txd = &txr[tx_id];
1232                        txn = &sw_ring[txe->next_id];
1233
1234                        if (txe->mbuf)
1235                                rte_pktmbuf_free_seg(txe->mbuf);
1236                        txe->mbuf = m_seg;
1237
1238                        /* Setup TX Descriptor */
1239                        slen = m_seg->data_len;
1240                        buf_dma_addr = rte_mbuf_data_iova(m_seg);
1241
1242                        while ((ol_flags & PKT_TX_TCP_SEG) &&
1243                                unlikely(slen > I40E_MAX_DATA_PER_TXD)) {
1244                                txd->buffer_addr =
1245                                        rte_cpu_to_le_64(buf_dma_addr);
1246                                txd->cmd_type_offset_bsz =
1247                                        i40e_build_ctob(td_cmd,
1248                                        td_offset, I40E_MAX_DATA_PER_TXD,
1249                                        td_tag);
1250
1251                                buf_dma_addr += I40E_MAX_DATA_PER_TXD;
1252                                slen -= I40E_MAX_DATA_PER_TXD;
1253
1254                                txe->last_id = tx_last;
1255                                tx_id = txe->next_id;
1256                                txe = txn;
1257                                txd = &txr[tx_id];
1258                                txn = &sw_ring[txe->next_id];
1259                        }
1260                        PMD_TX_LOG(DEBUG, "mbuf: %p, TDD[%u]:\n"
1261                                "buf_dma_addr: %#"PRIx64";\n"
1262                                "td_cmd: %#x;\n"
1263                                "td_offset: %#x;\n"
1264                                "td_len: %u;\n"
1265                                "td_tag: %#x;\n",
1266                                tx_pkt, tx_id, buf_dma_addr,
1267                                td_cmd, td_offset, slen, td_tag);
1268
1269                        txd->buffer_addr = rte_cpu_to_le_64(buf_dma_addr);
1270                        txd->cmd_type_offset_bsz = i40e_build_ctob(td_cmd,
1271                                                td_offset, slen, td_tag);
1272                        txe->last_id = tx_last;
1273                        tx_id = txe->next_id;
1274                        txe = txn;
1275                        m_seg = m_seg->next;
1276                } while (m_seg != NULL);
1277
1278                /* The last packet data descriptor needs End Of Packet (EOP) */
1279                td_cmd |= I40E_TX_DESC_CMD_EOP;
1280                txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
1281                txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
1282
1283                if (txq->nb_tx_used >= txq->tx_rs_thresh) {
1284                        PMD_TX_LOG(DEBUG,
1285                                   "Setting RS bit on TXD id="
1286                                   "%4u (port=%d queue=%d)",
1287                                   tx_last, txq->port_id, txq->queue_id);
1288
1289                        td_cmd |= I40E_TX_DESC_CMD_RS;
1290
1291                        /* Update txq RS bit counters */
1292                        txq->nb_tx_used = 0;
1293                }
1294
1295                txd->cmd_type_offset_bsz |=
1296                        rte_cpu_to_le_64(((uint64_t)td_cmd) <<
1297                                        I40E_TXD_QW1_CMD_SHIFT);
1298        }
1299
1300end_of_tx:
1301        PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
1302                   (unsigned) txq->port_id, (unsigned) txq->queue_id,
1303                   (unsigned) tx_id, (unsigned) nb_tx);
1304
1305        rte_io_wmb();
1306        I40E_PCI_REG_WC_WRITE_RELAXED(txq->qtx_tail, tx_id);
1307        txq->tx_tail = tx_id;
1308
1309        return nb_tx;
1310}
1311
1312static __rte_always_inline int
1313i40e_tx_free_bufs(struct i40e_tx_queue *txq)
1314{
1315        struct i40e_tx_entry *txep;
1316        uint16_t tx_rs_thresh = txq->tx_rs_thresh;
1317        uint16_t i = 0, j = 0;
1318        struct rte_mbuf *free[RTE_I40E_TX_MAX_FREE_BUF_SZ];
1319        const uint16_t k = RTE_ALIGN_FLOOR(tx_rs_thresh, RTE_I40E_TX_MAX_FREE_BUF_SZ);
1320        const uint16_t m = tx_rs_thresh % RTE_I40E_TX_MAX_FREE_BUF_SZ;
1321
1322        if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
1323                        rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
1324                        rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
1325                return 0;
1326
1327        txep = &txq->sw_ring[txq->tx_next_dd - (tx_rs_thresh - 1)];
1328
1329        for (i = 0; i < tx_rs_thresh; i++)
1330                rte_prefetch0((txep + i)->mbuf);
1331
1332        if (txq->offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE) {
1333                if (k) {
1334                        for (j = 0; j != k; j += RTE_I40E_TX_MAX_FREE_BUF_SZ) {
1335                                for (i = 0; i < RTE_I40E_TX_MAX_FREE_BUF_SZ; ++i, ++txep) {
1336                                        free[i] = txep->mbuf;
1337                                        txep->mbuf = NULL;
1338                                }
1339                                rte_mempool_put_bulk(free[0]->pool, (void **)free,
1340                                                RTE_I40E_TX_MAX_FREE_BUF_SZ);
1341                        }
1342                }
1343
1344                if (m) {
1345                        for (i = 0; i < m; ++i, ++txep) {
1346                                free[i] = txep->mbuf;
1347                                txep->mbuf = NULL;
1348                        }
1349                        rte_mempool_put_bulk(free[0]->pool, (void **)free, m);
1350                }
1351        } else {
1352                for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
1353                        rte_pktmbuf_free_seg(txep->mbuf);
1354                        txep->mbuf = NULL;
1355                }
1356        }
1357
1358        txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
1359        txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
1360        if (txq->tx_next_dd >= txq->nb_tx_desc)
1361                txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
1362
1363        return txq->tx_rs_thresh;
1364}
1365
1366/* Populate 4 descriptors with data from 4 mbufs */
1367static inline void
1368tx4(volatile struct i40e_tx_desc *txdp, struct rte_mbuf **pkts)
1369{
1370        uint64_t dma_addr;
1371        uint32_t i;
1372
1373        for (i = 0; i < 4; i++, txdp++, pkts++) {
1374                dma_addr = rte_mbuf_data_iova(*pkts);
1375                txdp->buffer_addr = rte_cpu_to_le_64(dma_addr);
1376                txdp->cmd_type_offset_bsz =
1377                        i40e_build_ctob((uint32_t)I40E_TD_CMD, 0,
1378                                        (*pkts)->data_len, 0);
1379        }
1380}
1381
1382/* Populate 1 descriptor with data from 1 mbuf */
1383static inline void
1384tx1(volatile struct i40e_tx_desc *txdp, struct rte_mbuf **pkts)
1385{
1386        uint64_t dma_addr;
1387
1388        dma_addr = rte_mbuf_data_iova(*pkts);
1389        txdp->buffer_addr = rte_cpu_to_le_64(dma_addr);
1390        txdp->cmd_type_offset_bsz =
1391                i40e_build_ctob((uint32_t)I40E_TD_CMD, 0,
1392                                (*pkts)->data_len, 0);
1393}
1394
1395/* Fill hardware descriptor ring with mbuf data */
1396static inline void
1397i40e_tx_fill_hw_ring(struct i40e_tx_queue *txq,
1398                     struct rte_mbuf **pkts,
1399                     uint16_t nb_pkts)
1400{
1401        volatile struct i40e_tx_desc *txdp = &(txq->tx_ring[txq->tx_tail]);
1402        struct i40e_tx_entry *txep = &(txq->sw_ring[txq->tx_tail]);
1403        const int N_PER_LOOP = 4;
1404        const int N_PER_LOOP_MASK = N_PER_LOOP - 1;
1405        int mainpart, leftover;
1406        int i, j;
1407
1408        mainpart = (nb_pkts & ((uint32_t) ~N_PER_LOOP_MASK));
1409        leftover = (nb_pkts & ((uint32_t)  N_PER_LOOP_MASK));
1410        for (i = 0; i < mainpart; i += N_PER_LOOP) {
1411                for (j = 0; j < N_PER_LOOP; ++j) {
1412                        (txep + i + j)->mbuf = *(pkts + i + j);
1413                }
1414                tx4(txdp + i, pkts + i);
1415        }
1416        if (unlikely(leftover > 0)) {
1417                for (i = 0; i < leftover; ++i) {
1418                        (txep + mainpart + i)->mbuf = *(pkts + mainpart + i);
1419                        tx1(txdp + mainpart + i, pkts + mainpart + i);
1420                }
1421        }
1422}
1423
1424static inline uint16_t
1425tx_xmit_pkts(struct i40e_tx_queue *txq,
1426             struct rte_mbuf **tx_pkts,
1427             uint16_t nb_pkts)
1428{
1429        volatile struct i40e_tx_desc *txr = txq->tx_ring;
1430        uint16_t n = 0;
1431
1432        /**
1433         * Begin scanning the H/W ring for done descriptors when the number
1434         * of available descriptors drops below tx_free_thresh. For each done
1435         * descriptor, free the associated buffer.
1436         */
1437        if (txq->nb_tx_free < txq->tx_free_thresh)
1438                i40e_tx_free_bufs(txq);
1439
1440        /* Use available descriptor only */
1441        nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
1442        if (unlikely(!nb_pkts))
1443                return 0;
1444
1445        txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
1446        if ((txq->tx_tail + nb_pkts) > txq->nb_tx_desc) {
1447                n = (uint16_t)(txq->nb_tx_desc - txq->tx_tail);
1448                i40e_tx_fill_hw_ring(txq, tx_pkts, n);
1449                txr[txq->tx_next_rs].cmd_type_offset_bsz |=
1450                        rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
1451                                                I40E_TXD_QW1_CMD_SHIFT);
1452                txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
1453                txq->tx_tail = 0;
1454        }
1455
1456        /* Fill hardware descriptor ring with mbuf data */
1457        i40e_tx_fill_hw_ring(txq, tx_pkts + n, (uint16_t)(nb_pkts - n));
1458        txq->tx_tail = (uint16_t)(txq->tx_tail + (nb_pkts - n));
1459
1460        /* Determin if RS bit needs to be set */
1461        if (txq->tx_tail > txq->tx_next_rs) {
1462                txr[txq->tx_next_rs].cmd_type_offset_bsz |=
1463                        rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) <<
1464                                                I40E_TXD_QW1_CMD_SHIFT);
1465                txq->tx_next_rs =
1466                        (uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
1467                if (txq->tx_next_rs >= txq->nb_tx_desc)
1468                        txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
1469        }
1470
1471        if (txq->tx_tail >= txq->nb_tx_desc)
1472                txq->tx_tail = 0;
1473
1474        /* Update the tx tail register */
1475        I40E_PCI_REG_WC_WRITE(txq->qtx_tail, txq->tx_tail);
1476
1477        return nb_pkts;
1478}
1479
1480static uint16_t
1481i40e_xmit_pkts_simple(void *tx_queue,
1482                      struct rte_mbuf **tx_pkts,
1483                      uint16_t nb_pkts)
1484{
1485        uint16_t nb_tx = 0;
1486
1487        if (likely(nb_pkts <= I40E_TX_MAX_BURST))
1488                return tx_xmit_pkts((struct i40e_tx_queue *)tx_queue,
1489                                                tx_pkts, nb_pkts);
1490
1491        while (nb_pkts) {
1492                uint16_t ret, num = (uint16_t)RTE_MIN(nb_pkts,
1493                                                I40E_TX_MAX_BURST);
1494
1495                ret = tx_xmit_pkts((struct i40e_tx_queue *)tx_queue,
1496                                                &tx_pkts[nb_tx], num);
1497                nb_tx = (uint16_t)(nb_tx + ret);
1498                nb_pkts = (uint16_t)(nb_pkts - ret);
1499                if (ret < num)
1500                        break;
1501        }
1502
1503        return nb_tx;
1504}
1505
1506static uint16_t
1507i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
1508                   uint16_t nb_pkts)
1509{
1510        uint16_t nb_tx = 0;
1511        struct i40e_tx_queue *txq = (struct i40e_tx_queue *)tx_queue;
1512
1513        while (nb_pkts) {
1514                uint16_t ret, num;
1515
1516                num = (uint16_t)RTE_MIN(nb_pkts, txq->tx_rs_thresh);
1517                ret = i40e_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
1518                                                num);
1519                nb_tx += ret;
1520                nb_pkts -= ret;
1521                if (ret < num)
1522                        break;
1523        }
1524
1525        return nb_tx;
1526}
1527
1528/*********************************************************************
1529 *
1530 *  TX simple prep functions
1531 *
1532 **********************************************************************/
1533uint16_t
1534i40e_simple_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
1535                      uint16_t nb_pkts)
1536{
1537        int i;
1538        uint64_t ol_flags;
1539        struct rte_mbuf *m;
1540
1541        for (i = 0; i < nb_pkts; i++) {
1542                m = tx_pkts[i];
1543                ol_flags = m->ol_flags;
1544
1545                if (m->nb_segs != 1) {
1546                        rte_errno = EINVAL;
1547                        return i;
1548                }
1549
1550                if (ol_flags & I40E_TX_OFFLOAD_SIMPLE_NOTSUP_MASK) {
1551                        rte_errno = ENOTSUP;
1552                        return i;
1553                }
1554
1555                /* check the size of packet */
1556                if (m->pkt_len < I40E_TX_MIN_PKT_LEN ||
1557                    m->pkt_len > I40E_FRAME_SIZE_MAX) {
1558                        rte_errno = EINVAL;
1559                        return i;
1560                }
1561        }
1562        return i;
1563}
1564
1565/*********************************************************************
1566 *
1567 *  TX prep functions
1568 *
1569 **********************************************************************/
1570uint16_t
1571i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
1572                uint16_t nb_pkts)
1573{
1574        int i, ret;
1575        uint64_t ol_flags;
1576        struct rte_mbuf *m;
1577
1578        for (i = 0; i < nb_pkts; i++) {
1579                m = tx_pkts[i];
1580                ol_flags = m->ol_flags;
1581
1582                /* Check for m->nb_segs to not exceed the limits. */
1583                if (!(ol_flags & PKT_TX_TCP_SEG)) {
1584                        if (m->nb_segs > I40E_TX_MAX_MTU_SEG ||
1585                            m->pkt_len > I40E_FRAME_SIZE_MAX) {
1586                                rte_errno = EINVAL;
1587                                return i;
1588                        }
1589                } else if (m->nb_segs > I40E_TX_MAX_SEG ||
1590                           m->tso_segsz < I40E_MIN_TSO_MSS ||
1591                           m->tso_segsz > I40E_MAX_TSO_MSS ||
1592                           m->pkt_len > I40E_TSO_FRAME_SIZE_MAX) {
1593                        /* MSS outside the range (256B - 9674B) are considered
1594                         * malicious
1595                         */
1596                        rte_errno = EINVAL;
1597                        return i;
1598                }
1599
1600                if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) {
1601                        rte_errno = ENOTSUP;
1602                        return i;
1603                }
1604
1605                /* check the size of packet */
1606                if (m->pkt_len < I40E_TX_MIN_PKT_LEN) {
1607                        rte_errno = EINVAL;
1608                        return i;
1609                }
1610
1611#ifdef RTE_ETHDEV_DEBUG_TX
1612                ret = rte_validate_tx_offload(m);
1613                if (ret != 0) {
1614                        rte_errno = -ret;
1615                        return i;
1616                }
1617#endif
1618                ret = rte_net_intel_cksum_prepare(m);
1619                if (ret != 0) {
1620                        rte_errno = -ret;
1621                        return i;
1622                }
1623        }
1624        return i;
1625}
1626
1627/*
1628 * Find the VSI the queue belongs to. 'queue_idx' is the queue index
1629 * application used, which assume having sequential ones. But from driver's
1630 * perspective, it's different. For example, q0 belongs to FDIR VSI, q1-q64
1631 * to MAIN VSI, , q65-96 to SRIOV VSIs, q97-128 to VMDQ VSIs. For application
1632 * running on host, q1-64 and q97-128 can be used, total 96 queues. They can
1633 * use queue_idx from 0 to 95 to access queues, while real queue would be
1634 * different. This function will do a queue mapping to find VSI the queue
1635 * belongs to.
1636 */
1637static struct i40e_vsi*
1638i40e_pf_get_vsi_by_qindex(struct i40e_pf *pf, uint16_t queue_idx)
1639{
1640        /* the queue in MAIN VSI range */
1641        if (queue_idx < pf->main_vsi->nb_qps)
1642                return pf->main_vsi;
1643
1644        queue_idx -= pf->main_vsi->nb_qps;
1645
1646        /* queue_idx is greater than VMDQ VSIs range */
1647        if (queue_idx > pf->nb_cfg_vmdq_vsi * pf->vmdq_nb_qps - 1) {
1648                PMD_INIT_LOG(ERR, "queue_idx out of range. VMDQ configured?");
1649                return NULL;
1650        }
1651
1652        return pf->vmdq[queue_idx / pf->vmdq_nb_qps].vsi;
1653}
1654
1655static uint16_t
1656i40e_get_queue_offset_by_qindex(struct i40e_pf *pf, uint16_t queue_idx)
1657{
1658        /* the queue in MAIN VSI range */
1659        if (queue_idx < pf->main_vsi->nb_qps)
1660                return queue_idx;
1661
1662        /* It's VMDQ queues */
1663        queue_idx -= pf->main_vsi->nb_qps;
1664
1665        if (pf->nb_cfg_vmdq_vsi)
1666                return queue_idx % pf->vmdq_nb_qps;
1667        else {
1668                PMD_INIT_LOG(ERR, "Fail to get queue offset");
1669                return (uint16_t)(-1);
1670        }
1671}
1672
1673int
1674i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1675{
1676        struct i40e_rx_queue *rxq;
1677        int err;
1678        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1679
1680        PMD_INIT_FUNC_TRACE();
1681
1682        rxq = dev->data->rx_queues[rx_queue_id];
1683        if (!rxq || !rxq->q_set) {
1684                PMD_DRV_LOG(ERR, "RX queue %u not available or setup",
1685                            rx_queue_id);
1686                return -EINVAL;
1687        }
1688
1689        if (rxq->rx_deferred_start)
1690                PMD_DRV_LOG(WARNING, "RX queue %u is deferrd start",
1691                            rx_queue_id);
1692
1693        err = i40e_alloc_rx_queue_mbufs(rxq);
1694        if (err) {
1695                PMD_DRV_LOG(ERR, "Failed to allocate RX queue mbuf");
1696                return err;
1697        }
1698
1699        /* Init the RX tail regieter. */
1700        I40E_PCI_REG_WRITE(rxq->qrx_tail, rxq->nb_rx_desc - 1);
1701
1702        err = i40e_switch_rx_queue(hw, rxq->reg_idx, TRUE);
1703        if (err) {
1704                PMD_DRV_LOG(ERR, "Failed to switch RX queue %u on",
1705                            rx_queue_id);
1706
1707                i40e_rx_queue_release_mbufs(rxq);
1708                i40e_reset_rx_queue(rxq);
1709                return err;
1710        }
1711        dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
1712
1713        return 0;
1714}
1715
1716int
1717i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1718{
1719        struct i40e_rx_queue *rxq;
1720        int err;
1721        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1722
1723        rxq = dev->data->rx_queues[rx_queue_id];
1724        if (!rxq || !rxq->q_set) {
1725                PMD_DRV_LOG(ERR, "RX queue %u not available or setup",
1726                                rx_queue_id);
1727                return -EINVAL;
1728        }
1729
1730        /*
1731         * rx_queue_id is queue id application refers to, while
1732         * rxq->reg_idx is the real queue index.
1733         */
1734        err = i40e_switch_rx_queue(hw, rxq->reg_idx, FALSE);
1735        if (err) {
1736                PMD_DRV_LOG(ERR, "Failed to switch RX queue %u off",
1737                            rx_queue_id);
1738                return err;
1739        }
1740        i40e_rx_queue_release_mbufs(rxq);
1741        i40e_reset_rx_queue(rxq);
1742        dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
1743
1744        return 0;
1745}
1746
1747int
1748i40e_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
1749{
1750        int err;
1751        struct i40e_tx_queue *txq;
1752        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1753
1754        PMD_INIT_FUNC_TRACE();
1755
1756        txq = dev->data->tx_queues[tx_queue_id];
1757        if (!txq || !txq->q_set) {
1758                PMD_DRV_LOG(ERR, "TX queue %u is not available or setup",
1759                            tx_queue_id);
1760                return -EINVAL;
1761        }
1762
1763        if (txq->tx_deferred_start)
1764                PMD_DRV_LOG(WARNING, "TX queue %u is deferrd start",
1765                            tx_queue_id);
1766
1767        /*
1768         * tx_queue_id is queue id application refers to, while
1769         * rxq->reg_idx is the real queue index.
1770         */
1771        err = i40e_switch_tx_queue(hw, txq->reg_idx, TRUE);
1772        if (err) {
1773                PMD_DRV_LOG(ERR, "Failed to switch TX queue %u on",
1774                            tx_queue_id);
1775                return err;
1776        }
1777        dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
1778
1779        return 0;
1780}
1781
1782int
1783i40e_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
1784{
1785        struct i40e_tx_queue *txq;
1786        int err;
1787        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1788
1789        txq = dev->data->tx_queues[tx_queue_id];
1790        if (!txq || !txq->q_set) {
1791                PMD_DRV_LOG(ERR, "TX queue %u is not available or setup",
1792                        tx_queue_id);
1793                return -EINVAL;
1794        }
1795
1796        /*
1797         * tx_queue_id is queue id application refers to, while
1798         * txq->reg_idx is the real queue index.
1799         */
1800        err = i40e_switch_tx_queue(hw, txq->reg_idx, FALSE);
1801        if (err) {
1802                PMD_DRV_LOG(ERR, "Failed to switch TX queue %u of",
1803                            tx_queue_id);
1804                return err;
1805        }
1806
1807        i40e_tx_queue_release_mbufs(txq);
1808        i40e_reset_tx_queue(txq);
1809        dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
1810
1811        return 0;
1812}
1813
1814const uint32_t *
1815i40e_dev_supported_ptypes_get(struct rte_eth_dev *dev)
1816{
1817        static const uint32_t ptypes[] = {
1818                /* refers to i40e_rxd_pkt_type_mapping() */
1819                RTE_PTYPE_L2_ETHER,
1820                RTE_PTYPE_L2_ETHER_TIMESYNC,
1821                RTE_PTYPE_L2_ETHER_LLDP,
1822                RTE_PTYPE_L2_ETHER_ARP,
1823                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
1824                RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
1825                RTE_PTYPE_L4_FRAG,
1826                RTE_PTYPE_L4_ICMP,
1827                RTE_PTYPE_L4_NONFRAG,
1828                RTE_PTYPE_L4_SCTP,
1829                RTE_PTYPE_L4_TCP,
1830                RTE_PTYPE_L4_UDP,
1831                RTE_PTYPE_TUNNEL_GRENAT,
1832                RTE_PTYPE_TUNNEL_IP,
1833                RTE_PTYPE_INNER_L2_ETHER,
1834                RTE_PTYPE_INNER_L2_ETHER_VLAN,
1835                RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
1836                RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
1837                RTE_PTYPE_INNER_L4_FRAG,
1838                RTE_PTYPE_INNER_L4_ICMP,
1839                RTE_PTYPE_INNER_L4_NONFRAG,
1840                RTE_PTYPE_INNER_L4_SCTP,
1841                RTE_PTYPE_INNER_L4_TCP,
1842                RTE_PTYPE_INNER_L4_UDP,
1843                RTE_PTYPE_UNKNOWN
1844        };
1845
1846        if (dev->rx_pkt_burst == i40e_recv_pkts ||
1847#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
1848            dev->rx_pkt_burst == i40e_recv_pkts_bulk_alloc ||
1849#endif
1850            dev->rx_pkt_burst == i40e_recv_scattered_pkts ||
1851            dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec ||
1852            dev->rx_pkt_burst == i40e_recv_pkts_vec ||
1853#ifdef CC_AVX512_SUPPORT
1854            dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec_avx512 ||
1855            dev->rx_pkt_burst == i40e_recv_pkts_vec_avx512 ||
1856#endif
1857            dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec_avx2 ||
1858            dev->rx_pkt_burst == i40e_recv_pkts_vec_avx2)
1859                return ptypes;
1860        return NULL;
1861}
1862
1863static int
1864i40e_dev_first_queue(uint16_t idx, void **queues, int num)
1865{
1866        uint16_t i;
1867
1868        for (i = 0; i < num; i++) {
1869                if (i != idx && queues[i])
1870                        return 0;
1871        }
1872
1873        return 1;
1874}
1875
1876static int
1877i40e_dev_rx_queue_setup_runtime(struct rte_eth_dev *dev,
1878                                struct i40e_rx_queue *rxq)
1879{
1880        struct i40e_adapter *ad =
1881                I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
1882        int use_def_burst_func =
1883                check_rx_burst_bulk_alloc_preconditions(rxq);
1884        uint16_t buf_size =
1885                (uint16_t)(rte_pktmbuf_data_room_size(rxq->mp) -
1886                           RTE_PKTMBUF_HEADROOM);
1887        int use_scattered_rx =
1888                (rxq->max_pkt_len > buf_size);
1889
1890        if (i40e_rx_queue_init(rxq) != I40E_SUCCESS) {
1891                PMD_DRV_LOG(ERR,
1892                            "Failed to do RX queue initialization");
1893                return -EINVAL;
1894        }
1895
1896        if (i40e_dev_first_queue(rxq->queue_id,
1897                                 dev->data->rx_queues,
1898                                 dev->data->nb_rx_queues)) {
1899                /**
1900                 * If it is the first queue to setup,
1901                 * set all flags to default and call
1902                 * i40e_set_rx_function.
1903                 */
1904                ad->rx_bulk_alloc_allowed = true;
1905                ad->rx_vec_allowed = true;
1906                dev->data->scattered_rx = use_scattered_rx;
1907                if (use_def_burst_func)
1908                        ad->rx_bulk_alloc_allowed = false;
1909                i40e_set_rx_function(dev);
1910                return 0;
1911        } else if (ad->rx_vec_allowed && !rte_is_power_of_2(rxq->nb_rx_desc)) {
1912                PMD_DRV_LOG(ERR, "Vector mode is allowed, but descriptor"
1913                            " number %d of queue %d isn't power of 2",
1914                            rxq->nb_rx_desc, rxq->queue_id);
1915                return -EINVAL;
1916        }
1917
1918        /* check bulk alloc conflict */
1919        if (ad->rx_bulk_alloc_allowed && use_def_burst_func) {
1920                PMD_DRV_LOG(ERR, "Can't use default burst.");
1921                return -EINVAL;
1922        }
1923        /* check scatterred conflict */
1924        if (!dev->data->scattered_rx && use_scattered_rx) {
1925                PMD_DRV_LOG(ERR, "Scattered rx is required.");
1926                return -EINVAL;
1927        }
1928        /* check vector conflict */
1929        if (ad->rx_vec_allowed && i40e_rxq_vec_setup(rxq)) {
1930                PMD_DRV_LOG(ERR, "Failed vector rx setup.");
1931                return -EINVAL;
1932        }
1933
1934        return 0;
1935}
1936
1937int
1938i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
1939                        uint16_t queue_idx,
1940                        uint16_t nb_desc,
1941                        unsigned int socket_id,
1942                        const struct rte_eth_rxconf *rx_conf,
1943                        struct rte_mempool *mp)
1944{
1945        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1946        struct i40e_adapter *ad =
1947                I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
1948        struct i40e_vsi *vsi;
1949        struct i40e_pf *pf = NULL;
1950        struct i40e_vf *vf = NULL;
1951        struct i40e_rx_queue *rxq;
1952        const struct rte_memzone *rz;
1953        uint32_t ring_size;
1954        uint16_t len, i;
1955        uint16_t reg_idx, base, bsf, tc_mapping;
1956        int q_offset, use_def_burst_func = 1;
1957        uint64_t offloads;
1958
1959        offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
1960
1961        if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF) {
1962                vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
1963                vsi = &vf->vsi;
1964                if (!vsi)
1965                        return -EINVAL;
1966                reg_idx = queue_idx;
1967        } else {
1968                pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
1969                vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx);
1970                if (!vsi)
1971                        return -EINVAL;
1972                q_offset = i40e_get_queue_offset_by_qindex(pf, queue_idx);
1973                if (q_offset < 0)
1974                        return -EINVAL;
1975                reg_idx = vsi->base_queue + q_offset;
1976        }
1977
1978        if (nb_desc % I40E_ALIGN_RING_DESC != 0 ||
1979            (nb_desc > I40E_MAX_RING_DESC) ||
1980            (nb_desc < I40E_MIN_RING_DESC)) {
1981                PMD_DRV_LOG(ERR, "Number (%u) of receive descriptors is "
1982                            "invalid", nb_desc);
1983                return -EINVAL;
1984        }
1985
1986        /* Free memory if needed */
1987        if (dev->data->rx_queues[queue_idx]) {
1988                i40e_dev_rx_queue_release(dev->data->rx_queues[queue_idx]);
1989                dev->data->rx_queues[queue_idx] = NULL;
1990        }
1991
1992        /* Allocate the rx queue data structure */
1993        rxq = rte_zmalloc_socket("i40e rx queue",
1994                                 sizeof(struct i40e_rx_queue),
1995                                 RTE_CACHE_LINE_SIZE,
1996                                 socket_id);
1997        if (!rxq) {
1998                PMD_DRV_LOG(ERR, "Failed to allocate memory for "
1999                            "rx queue data structure");
2000                return -ENOMEM;
2001        }
2002        rxq->mp = mp;
2003        rxq->nb_rx_desc = nb_desc;
2004        rxq->rx_free_thresh = rx_conf->rx_free_thresh;
2005        rxq->queue_id = queue_idx;
2006        rxq->reg_idx = reg_idx;
2007        rxq->port_id = dev->data->port_id;
2008        if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
2009                rxq->crc_len = RTE_ETHER_CRC_LEN;
2010        else
2011                rxq->crc_len = 0;
2012        rxq->drop_en = rx_conf->rx_drop_en;
2013        rxq->vsi = vsi;
2014        rxq->rx_deferred_start = rx_conf->rx_deferred_start;
2015        rxq->offloads = offloads;
2016
2017        /* Allocate the maximun number of RX ring hardware descriptor. */
2018        len = I40E_MAX_RING_DESC;
2019
2020        /**
2021         * Allocating a little more memory because vectorized/bulk_alloc Rx
2022         * functions doesn't check boundaries each time.
2023         */
2024        len += RTE_PMD_I40E_RX_MAX_BURST;
2025
2026        ring_size = RTE_ALIGN(len * sizeof(union i40e_rx_desc),
2027                              I40E_DMA_MEM_ALIGN);
2028
2029        rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx,
2030                              ring_size, I40E_RING_BASE_ALIGN, socket_id);
2031        if (!rz) {
2032                i40e_dev_rx_queue_release(rxq);
2033                PMD_DRV_LOG(ERR, "Failed to reserve DMA memory for RX");
2034                return -ENOMEM;
2035        }
2036
2037        /* Zero all the descriptors in the ring. */
2038        memset(rz->addr, 0, ring_size);
2039
2040        rxq->rx_ring_phys_addr = rz->iova;
2041        rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
2042
2043        len = (uint16_t)(nb_desc + RTE_PMD_I40E_RX_MAX_BURST);
2044
2045        /* Allocate the software ring. */
2046        rxq->sw_ring =
2047                rte_zmalloc_socket("i40e rx sw ring",
2048                                   sizeof(struct i40e_rx_entry) * len,
2049                                   RTE_CACHE_LINE_SIZE,
2050                                   socket_id);
2051        if (!rxq->sw_ring) {
2052                i40e_dev_rx_queue_release(rxq);
2053                PMD_DRV_LOG(ERR, "Failed to allocate memory for SW ring");
2054                return -ENOMEM;
2055        }
2056
2057        i40e_reset_rx_queue(rxq);
2058        rxq->q_set = TRUE;
2059
2060        for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
2061                if (!(vsi->enabled_tc & (1 << i)))
2062                        continue;
2063                tc_mapping = rte_le_to_cpu_16(vsi->info.tc_mapping[i]);
2064                base = (tc_mapping & I40E_AQ_VSI_TC_QUE_OFFSET_MASK) >>
2065                        I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT;
2066                bsf = (tc_mapping & I40E_AQ_VSI_TC_QUE_NUMBER_MASK) >>
2067                        I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT;
2068
2069                if (queue_idx >= base && queue_idx < (base + BIT(bsf)))
2070                        rxq->dcb_tc = i;
2071        }
2072
2073        if (dev->data->dev_started) {
2074                if (i40e_dev_rx_queue_setup_runtime(dev, rxq)) {
2075                        i40e_dev_rx_queue_release(rxq);
2076                        return -EINVAL;
2077                }
2078        } else {
2079                use_def_burst_func =
2080                        check_rx_burst_bulk_alloc_preconditions(rxq);
2081                if (!use_def_burst_func) {
2082#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
2083                        PMD_INIT_LOG(DEBUG,
2084                          "Rx Burst Bulk Alloc Preconditions are "
2085                          "satisfied. Rx Burst Bulk Alloc function will be "
2086                          "used on port=%d, queue=%d.",
2087                          rxq->port_id, rxq->queue_id);
2088#endif /* RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC */
2089                } else {
2090                        PMD_INIT_LOG(DEBUG,
2091                          "Rx Burst Bulk Alloc Preconditions are "
2092                          "not satisfied, Scattered Rx is requested, "
2093                          "or RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC is "
2094                          "not enabled on port=%d, queue=%d.",
2095                          rxq->port_id, rxq->queue_id);
2096                        ad->rx_bulk_alloc_allowed = false;
2097                }
2098        }
2099
2100        dev->data->rx_queues[queue_idx] = rxq;
2101        return 0;
2102}
2103
2104void
2105i40e_dev_rx_queue_release(void *rxq)
2106{
2107        struct i40e_rx_queue *q = (struct i40e_rx_queue *)rxq;
2108
2109        if (!q) {
2110                PMD_DRV_LOG(DEBUG, "Pointer to rxq is NULL");
2111                return;
2112        }
2113
2114        i40e_rx_queue_release_mbufs(q);
2115        rte_free(q->sw_ring);
2116        rte_free(q);
2117}
2118
2119uint32_t
2120i40e_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
2121{
2122#define I40E_RXQ_SCAN_INTERVAL 4
2123        volatile union i40e_rx_desc *rxdp;
2124        struct i40e_rx_queue *rxq;
2125        uint16_t desc = 0;
2126
2127        rxq = dev->data->rx_queues[rx_queue_id];
2128        rxdp = &(rxq->rx_ring[rxq->rx_tail]);
2129        while ((desc < rxq->nb_rx_desc) &&
2130                ((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
2131                I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT) &
2132                                (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) {
2133                /**
2134                 * Check the DD bit of a rx descriptor of each 4 in a group,
2135                 * to avoid checking too frequently and downgrading performance
2136                 * too much.
2137                 */
2138                desc += I40E_RXQ_SCAN_INTERVAL;
2139                rxdp += I40E_RXQ_SCAN_INTERVAL;
2140                if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
2141                        rxdp = &(rxq->rx_ring[rxq->rx_tail +
2142                                        desc - rxq->nb_rx_desc]);
2143        }
2144
2145        return desc;
2146}
2147
2148int
2149i40e_dev_rx_descriptor_done(void *rx_queue, uint16_t offset)
2150{
2151        volatile union i40e_rx_desc *rxdp;
2152        struct i40e_rx_queue *rxq = rx_queue;
2153        uint16_t desc;
2154        int ret;
2155
2156        if (unlikely(offset >= rxq->nb_rx_desc)) {
2157                PMD_DRV_LOG(ERR, "Invalid RX descriptor id %u", offset);
2158                return 0;
2159        }
2160
2161        desc = rxq->rx_tail + offset;
2162        if (desc >= rxq->nb_rx_desc)
2163                desc -= rxq->nb_rx_desc;
2164
2165        rxdp = &(rxq->rx_ring[desc]);
2166
2167        ret = !!(((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
2168                I40E_RXD_QW1_STATUS_MASK) >> I40E_RXD_QW1_STATUS_SHIFT) &
2169                                (1 << I40E_RX_DESC_STATUS_DD_SHIFT));
2170
2171        return ret;
2172}
2173
2174int
2175i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
2176{
2177        struct i40e_rx_queue *rxq = rx_queue;
2178        volatile uint64_t *status;
2179        uint64_t mask;
2180        uint32_t desc;
2181
2182        if (unlikely(offset >= rxq->nb_rx_desc))
2183                return -EINVAL;
2184
2185        if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
2186                return RTE_ETH_RX_DESC_UNAVAIL;
2187
2188        desc = rxq->rx_tail + offset;
2189        if (desc >= rxq->nb_rx_desc)
2190                desc -= rxq->nb_rx_desc;
2191
2192        status = &rxq->rx_ring[desc].wb.qword1.status_error_len;
2193        mask = rte_le_to_cpu_64((1ULL << I40E_RX_DESC_STATUS_DD_SHIFT)
2194                << I40E_RXD_QW1_STATUS_SHIFT);
2195        if (*status & mask)
2196                return RTE_ETH_RX_DESC_DONE;
2197
2198        return RTE_ETH_RX_DESC_AVAIL;
2199}
2200
2201int
2202i40e_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
2203{
2204        struct i40e_tx_queue *txq = tx_queue;
2205        volatile uint64_t *status;
2206        uint64_t mask, expect;
2207        uint32_t desc;
2208
2209        if (unlikely(offset >= txq->nb_tx_desc))
2210                return -EINVAL;
2211
2212        desc = txq->tx_tail + offset;
2213        /* go to next desc that has the RS bit */
2214        desc = ((desc + txq->tx_rs_thresh - 1) / txq->tx_rs_thresh) *
2215                txq->tx_rs_thresh;
2216        if (desc >= txq->nb_tx_desc) {
2217                desc -= txq->nb_tx_desc;
2218                if (desc >= txq->nb_tx_desc)
2219                        desc -= txq->nb_tx_desc;
2220        }
2221
2222        status = &txq->tx_ring[desc].cmd_type_offset_bsz;
2223        mask = rte_le_to_cpu_64(I40E_TXD_QW1_DTYPE_MASK);
2224        expect = rte_cpu_to_le_64(
2225                I40E_TX_DESC_DTYPE_DESC_DONE << I40E_TXD_QW1_DTYPE_SHIFT);
2226        if ((*status & mask) == expect)
2227                return RTE_ETH_TX_DESC_DONE;
2228
2229        return RTE_ETH_TX_DESC_FULL;
2230}
2231
2232static int
2233i40e_dev_tx_queue_setup_runtime(struct rte_eth_dev *dev,
2234                                struct i40e_tx_queue *txq)
2235{
2236        struct i40e_adapter *ad =
2237                I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
2238
2239        if (i40e_tx_queue_init(txq) != I40E_SUCCESS) {
2240                PMD_DRV_LOG(ERR,
2241                            "Failed to do TX queue initialization");
2242                return -EINVAL;
2243        }
2244
2245        if (i40e_dev_first_queue(txq->queue_id,
2246                                 dev->data->tx_queues,
2247                                 dev->data->nb_tx_queues)) {
2248                /**
2249                 * If it is the first queue to setup,
2250                 * set all flags and call
2251                 * i40e_set_tx_function.
2252                 */
2253                i40e_set_tx_function_flag(dev, txq);
2254                i40e_set_tx_function(dev);
2255                return 0;
2256        }
2257
2258        /* check vector conflict */
2259        if (ad->tx_vec_allowed) {
2260                if (txq->tx_rs_thresh > RTE_I40E_TX_MAX_FREE_BUF_SZ ||
2261                    i40e_txq_vec_setup(txq)) {
2262                        PMD_DRV_LOG(ERR, "Failed vector tx setup.");
2263                        return -EINVAL;
2264                }
2265        }
2266        /* check simple tx conflict */
2267        if (ad->tx_simple_allowed) {
2268                if ((txq->offloads & ~DEV_TX_OFFLOAD_MBUF_FAST_FREE) != 0 ||
2269                                txq->tx_rs_thresh < RTE_PMD_I40E_TX_MAX_BURST) {
2270                        PMD_DRV_LOG(ERR, "No-simple tx is required.");
2271                        return -EINVAL;
2272                }
2273        }
2274
2275        return 0;
2276}
2277
2278int
2279i40e_dev_tx_queue_setup(struct rte_eth_dev *dev,
2280                        uint16_t queue_idx,
2281                        uint16_t nb_desc,
2282                        unsigned int socket_id,
2283                        const struct rte_eth_txconf *tx_conf)
2284{
2285        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2286        struct i40e_vsi *vsi;
2287        struct i40e_pf *pf = NULL;
2288        struct i40e_vf *vf = NULL;
2289        struct i40e_tx_queue *txq;
2290        const struct rte_memzone *tz;
2291        uint32_t ring_size;
2292        uint16_t tx_rs_thresh, tx_free_thresh;
2293        uint16_t reg_idx, i, base, bsf, tc_mapping;
2294        int q_offset;
2295        uint64_t offloads;
2296
2297        offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
2298
2299        if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF) {
2300                vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
2301                vsi = &vf->vsi;
2302                reg_idx = queue_idx;
2303        } else {
2304                pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
2305                vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx);
2306                if (!vsi)
2307                        return -EINVAL;
2308                q_offset = i40e_get_queue_offset_by_qindex(pf, queue_idx);
2309                if (q_offset < 0)
2310                        return -EINVAL;
2311                reg_idx = vsi->base_queue + q_offset;
2312        }
2313
2314        if (nb_desc % I40E_ALIGN_RING_DESC != 0 ||
2315            (nb_desc > I40E_MAX_RING_DESC) ||
2316            (nb_desc < I40E_MIN_RING_DESC)) {
2317                PMD_DRV_LOG(ERR, "Number (%u) of transmit descriptors is "
2318                            "invalid", nb_desc);
2319                return -EINVAL;
2320        }
2321
2322        /**
2323         * The following two parameters control the setting of the RS bit on
2324         * transmit descriptors. TX descriptors will have their RS bit set
2325         * after txq->tx_rs_thresh descriptors have been used. The TX
2326         * descriptor ring will be cleaned after txq->tx_free_thresh
2327         * descriptors are used or if the number of descriptors required to
2328         * transmit a packet is greater than the number of free TX descriptors.
2329         *
2330         * The following constraints must be satisfied:
2331         *  - tx_rs_thresh must be greater than 0.
2332         *  - tx_rs_thresh must be less than the size of the ring minus 2.
2333         *  - tx_rs_thresh must be less than or equal to tx_free_thresh.
2334         *  - tx_rs_thresh must be a divisor of the ring size.
2335         *  - tx_free_thresh must be greater than 0.
2336         *  - tx_free_thresh must be less than the size of the ring minus 3.
2337         *  - tx_free_thresh + tx_rs_thresh must not exceed nb_desc.
2338         *
2339         * One descriptor in the TX ring is used as a sentinel to avoid a H/W
2340         * race condition, hence the maximum threshold constraints. When set
2341         * to zero use default values.
2342         */
2343        tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
2344                tx_conf->tx_free_thresh : DEFAULT_TX_FREE_THRESH);
2345        /* force tx_rs_thresh to adapt an aggresive tx_free_thresh */
2346        tx_rs_thresh = (DEFAULT_TX_RS_THRESH + tx_free_thresh > nb_desc) ?
2347                nb_desc - tx_free_thresh : DEFAULT_TX_RS_THRESH;
2348        if (tx_conf->tx_rs_thresh > 0)
2349                tx_rs_thresh = tx_conf->tx_rs_thresh;
2350        if (tx_rs_thresh + tx_free_thresh > nb_desc) {
2351                PMD_INIT_LOG(ERR, "tx_rs_thresh + tx_free_thresh must not "
2352                                "exceed nb_desc. (tx_rs_thresh=%u "
2353                                "tx_free_thresh=%u nb_desc=%u port=%d queue=%d)",
2354                                (unsigned int)tx_rs_thresh,
2355                                (unsigned int)tx_free_thresh,
2356                                (unsigned int)nb_desc,
2357                                (int)dev->data->port_id,
2358                                (int)queue_idx);
2359                return I40E_ERR_PARAM;
2360        }
2361        if (tx_rs_thresh >= (nb_desc - 2)) {
2362                PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the "
2363                             "number of TX descriptors minus 2. "
2364                             "(tx_rs_thresh=%u port=%d queue=%d)",
2365                             (unsigned int)tx_rs_thresh,
2366                             (int)dev->data->port_id,
2367                             (int)queue_idx);
2368                return I40E_ERR_PARAM;
2369        }
2370        if (tx_free_thresh >= (nb_desc - 3)) {
2371                PMD_INIT_LOG(ERR, "tx_free_thresh must be less than the "
2372                             "number of TX descriptors minus 3. "
2373                             "(tx_free_thresh=%u port=%d queue=%d)",
2374                             (unsigned int)tx_free_thresh,
2375                             (int)dev->data->port_id,
2376                             (int)queue_idx);
2377                return I40E_ERR_PARAM;
2378        }
2379        if (tx_rs_thresh > tx_free_thresh) {
2380                PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than or "
2381                             "equal to tx_free_thresh. (tx_free_thresh=%u"
2382                             " tx_rs_thresh=%u port=%d queue=%d)",
2383                             (unsigned int)tx_free_thresh,
2384                             (unsigned int)tx_rs_thresh,
2385                             (int)dev->data->port_id,
2386                             (int)queue_idx);
2387                return I40E_ERR_PARAM;
2388        }
2389        if ((nb_desc % tx_rs_thresh) != 0) {
2390                PMD_INIT_LOG(ERR, "tx_rs_thresh must be a divisor of the "
2391                             "number of TX descriptors. (tx_rs_thresh=%u"
2392                             " port=%d queue=%d)",
2393                             (unsigned int)tx_rs_thresh,
2394                             (int)dev->data->port_id,
2395                             (int)queue_idx);
2396                return I40E_ERR_PARAM;
2397        }
2398        if ((tx_rs_thresh > 1) && (tx_conf->tx_thresh.wthresh != 0)) {
2399                PMD_INIT_LOG(ERR, "TX WTHRESH must be set to 0 if "
2400                             "tx_rs_thresh is greater than 1. "
2401                             "(tx_rs_thresh=%u port=%d queue=%d)",
2402                             (unsigned int)tx_rs_thresh,
2403                             (int)dev->data->port_id,
2404                             (int)queue_idx);
2405                return I40E_ERR_PARAM;
2406        }
2407
2408        /* Free memory if needed. */
2409        if (dev->data->tx_queues[queue_idx]) {
2410                i40e_dev_tx_queue_release(dev->data->tx_queues[queue_idx]);
2411                dev->data->tx_queues[queue_idx] = NULL;
2412        }
2413
2414        /* Allocate the TX queue data structure. */
2415        txq = rte_zmalloc_socket("i40e tx queue",
2416                                  sizeof(struct i40e_tx_queue),
2417                                  RTE_CACHE_LINE_SIZE,
2418                                  socket_id);
2419        if (!txq) {
2420                PMD_DRV_LOG(ERR, "Failed to allocate memory for "
2421                            "tx queue structure");
2422                return -ENOMEM;
2423        }
2424
2425        /* Allocate TX hardware ring descriptors. */
2426        ring_size = sizeof(struct i40e_tx_desc) * I40E_MAX_RING_DESC;
2427        ring_size = RTE_ALIGN(ring_size, I40E_DMA_MEM_ALIGN);
2428        tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
2429                              ring_size, I40E_RING_BASE_ALIGN, socket_id);
2430        if (!tz) {
2431                i40e_dev_tx_queue_release(txq);
2432                PMD_DRV_LOG(ERR, "Failed to reserve DMA memory for TX");
2433                return -ENOMEM;
2434        }
2435
2436        txq->nb_tx_desc = nb_desc;
2437        txq->tx_rs_thresh = tx_rs_thresh;
2438        txq->tx_free_thresh = tx_free_thresh;
2439        txq->pthresh = tx_conf->tx_thresh.pthresh;
2440        txq->hthresh = tx_conf->tx_thresh.hthresh;
2441        txq->wthresh = tx_conf->tx_thresh.wthresh;
2442        txq->queue_id = queue_idx;
2443        txq->reg_idx = reg_idx;
2444        txq->port_id = dev->data->port_id;
2445        txq->offloads = offloads;
2446        txq->vsi = vsi;
2447        txq->tx_deferred_start = tx_conf->tx_deferred_start;
2448
2449        txq->tx_ring_phys_addr = tz->iova;
2450        txq->tx_ring = (struct i40e_tx_desc *)tz->addr;
2451
2452        /* Allocate software ring */
2453        txq->sw_ring =
2454                rte_zmalloc_socket("i40e tx sw ring",
2455                                   sizeof(struct i40e_tx_entry) * nb_desc,
2456                                   RTE_CACHE_LINE_SIZE,
2457                                   socket_id);
2458        if (!txq->sw_ring) {
2459                i40e_dev_tx_queue_release(txq);
2460                PMD_DRV_LOG(ERR, "Failed to allocate memory for SW TX ring");
2461                return -ENOMEM;
2462        }
2463
2464        i40e_reset_tx_queue(txq);
2465        txq->q_set = TRUE;
2466
2467        for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
2468                if (!(vsi->enabled_tc & (1 << i)))
2469                        continue;
2470                tc_mapping = rte_le_to_cpu_16(vsi->info.tc_mapping[i]);
2471                base = (tc_mapping & I40E_AQ_VSI_TC_QUE_OFFSET_MASK) >>
2472                        I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT;
2473                bsf = (tc_mapping & I40E_AQ_VSI_TC_QUE_NUMBER_MASK) >>
2474                        I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT;
2475
2476                if (queue_idx >= base && queue_idx < (base + BIT(bsf)))
2477                        txq->dcb_tc = i;
2478        }
2479
2480        if (dev->data->dev_started) {
2481                if (i40e_dev_tx_queue_setup_runtime(dev, txq)) {
2482                        i40e_dev_tx_queue_release(txq);
2483                        return -EINVAL;
2484                }
2485        } else {
2486                /**
2487                 * Use a simple TX queue without offloads or
2488                 * multi segs if possible
2489                 */
2490                i40e_set_tx_function_flag(dev, txq);
2491        }
2492        dev->data->tx_queues[queue_idx] = txq;
2493
2494        return 0;
2495}
2496
2497void
2498i40e_dev_tx_queue_release(void *txq)
2499{
2500        struct i40e_tx_queue *q = (struct i40e_tx_queue *)txq;
2501
2502        if (!q) {
2503                PMD_DRV_LOG(DEBUG, "Pointer to TX queue is NULL");
2504                return;
2505        }
2506
2507        i40e_tx_queue_release_mbufs(q);
2508        rte_free(q->sw_ring);
2509        rte_free(q);
2510}
2511
2512const struct rte_memzone *
2513i40e_memzone_reserve(const char *name, uint32_t len, int socket_id)
2514{
2515        const struct rte_memzone *mz;
2516
2517        mz = rte_memzone_lookup(name);
2518        if (mz)
2519                return mz;
2520
2521        mz = rte_memzone_reserve_aligned(name, len, socket_id,
2522                        RTE_MEMZONE_IOVA_CONTIG, I40E_RING_BASE_ALIGN);
2523        return mz;
2524}
2525
2526void
2527i40e_rx_queue_release_mbufs(struct i40e_rx_queue *rxq)
2528{
2529        uint16_t i;
2530
2531        /* SSE Vector driver has a different way of releasing mbufs. */
2532        if (rxq->rx_using_sse) {
2533                i40e_rx_queue_release_mbufs_vec(rxq);
2534                return;
2535        }
2536
2537        if (!rxq->sw_ring) {
2538                PMD_DRV_LOG(DEBUG, "Pointer to sw_ring is NULL");
2539                return;
2540        }
2541
2542        for (i = 0; i < rxq->nb_rx_desc; i++) {
2543                if (rxq->sw_ring[i].mbuf) {
2544                        rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
2545                        rxq->sw_ring[i].mbuf = NULL;
2546                }
2547        }
2548#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
2549        if (rxq->rx_nb_avail == 0)
2550                return;
2551        for (i = 0; i < rxq->rx_nb_avail; i++) {
2552                struct rte_mbuf *mbuf;
2553
2554                mbuf = rxq->rx_stage[rxq->rx_next_avail + i];
2555                rte_pktmbuf_free_seg(mbuf);
2556        }
2557        rxq->rx_nb_avail = 0;
2558#endif /* RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC */
2559}
2560
2561void
2562i40e_reset_rx_queue(struct i40e_rx_queue *rxq)
2563{
2564        unsigned i;
2565        uint16_t len;
2566
2567        if (!rxq) {
2568                PMD_DRV_LOG(DEBUG, "Pointer to rxq is NULL");
2569                return;
2570        }
2571
2572#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
2573        if (check_rx_burst_bulk_alloc_preconditions(rxq) == 0)
2574                len = (uint16_t)(rxq->nb_rx_desc + RTE_PMD_I40E_RX_MAX_BURST);
2575        else
2576#endif /* RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC */
2577                len = rxq->nb_rx_desc;
2578
2579        for (i = 0; i < len * sizeof(union i40e_rx_desc); i++)
2580                ((volatile char *)rxq->rx_ring)[i] = 0;
2581
2582        memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf));
2583        for (i = 0; i < RTE_PMD_I40E_RX_MAX_BURST; ++i)
2584                rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf;
2585
2586#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC
2587        rxq->rx_nb_avail = 0;
2588        rxq->rx_next_avail = 0;
2589        rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_thresh - 1);
2590#endif /* RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC */
2591        rxq->rx_tail = 0;
2592        rxq->nb_rx_hold = 0;
2593        rxq->pkt_first_seg = NULL;
2594        rxq->pkt_last_seg = NULL;
2595
2596        rxq->rxrearm_start = 0;
2597        rxq->rxrearm_nb = 0;
2598}
2599
2600void
2601i40e_tx_queue_release_mbufs(struct i40e_tx_queue *txq)
2602{
2603        struct rte_eth_dev *dev;
2604        uint16_t i;
2605
2606        if (!txq || !txq->sw_ring) {
2607                PMD_DRV_LOG(DEBUG, "Pointer to txq or sw_ring is NULL");
2608                return;
2609        }
2610
2611        dev = &rte_eth_devices[txq->port_id];
2612
2613        /**
2614         *  vPMD tx will not set sw_ring's mbuf to NULL after free,
2615         *  so need to free remains more carefully.
2616         */
2617#ifdef CC_AVX512_SUPPORT
2618        if (dev->tx_pkt_burst == i40e_xmit_pkts_vec_avx512) {
2619                struct i40e_vec_tx_entry *swr = (void *)txq->sw_ring;
2620
2621                i = txq->tx_next_dd - txq->tx_rs_thresh + 1;
2622                if (txq->tx_tail < i) {
2623                        for (; i < txq->nb_tx_desc; i++) {
2624                                rte_pktmbuf_free_seg(swr[i].mbuf);
2625                                swr[i].mbuf = NULL;
2626                        }
2627                        i = 0;
2628                }
2629                for (; i < txq->tx_tail; i++) {
2630                        rte_pktmbuf_free_seg(swr[i].mbuf);
2631                        swr[i].mbuf = NULL;
2632                }
2633                return;
2634        }
2635#endif
2636        if (dev->tx_pkt_burst == i40e_xmit_pkts_vec_avx2 ||
2637                        dev->tx_pkt_burst == i40e_xmit_pkts_vec) {
2638                i = txq->tx_next_dd - txq->tx_rs_thresh + 1;
2639                if (txq->tx_tail < i) {
2640                        for (; i < txq->nb_tx_desc; i++) {
2641                                rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2642                                txq->sw_ring[i].mbuf = NULL;
2643                        }
2644                        i = 0;
2645                }
2646                for (; i < txq->tx_tail; i++) {
2647                        rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2648                        txq->sw_ring[i].mbuf = NULL;
2649                }
2650        } else {
2651                for (i = 0; i < txq->nb_tx_desc; i++) {
2652                        if (txq->sw_ring[i].mbuf) {
2653                                rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
2654                                txq->sw_ring[i].mbuf = NULL;
2655                        }
2656                }
2657        }
2658}
2659
2660static int
2661i40e_tx_done_cleanup_full(struct i40e_tx_queue *txq,
2662                        uint32_t free_cnt)
2663{
2664        struct i40e_tx_entry *swr_ring = txq->sw_ring;
2665        uint16_t i, tx_last, tx_id;
2666        uint16_t nb_tx_free_last;
2667        uint16_t nb_tx_to_clean;
2668        uint32_t pkt_cnt;
2669
2670        /* Start free mbuf from the next of tx_tail */
2671        tx_last = txq->tx_tail;
2672        tx_id  = swr_ring[tx_last].next_id;
2673
2674        if (txq->nb_tx_free == 0 && i40e_xmit_cleanup(txq))
2675                return 0;
2676
2677        nb_tx_to_clean = txq->nb_tx_free;
2678        nb_tx_free_last = txq->nb_tx_free;
2679        if (!free_cnt)
2680                free_cnt = txq->nb_tx_desc;
2681
2682        /* Loop through swr_ring to count the amount of
2683         * freeable mubfs and packets.
2684         */
2685        for (pkt_cnt = 0; pkt_cnt < free_cnt; ) {
2686                for (i = 0; i < nb_tx_to_clean &&
2687                        pkt_cnt < free_cnt &&
2688                        tx_id != tx_last; i++) {
2689                        if (swr_ring[tx_id].mbuf != NULL) {
2690                                rte_pktmbuf_free_seg(swr_ring[tx_id].mbuf);
2691                                swr_ring[tx_id].mbuf = NULL;
2692
2693                                /*
2694                                 * last segment in the packet,
2695                                 * increment packet count
2696                                 */
2697                                pkt_cnt += (swr_ring[tx_id].last_id == tx_id);
2698                        }
2699
2700                        tx_id = swr_ring[tx_id].next_id;
2701                }
2702
2703                if (txq->tx_rs_thresh > txq->nb_tx_desc -
2704                        txq->nb_tx_free || tx_id == tx_last)
2705                        break;
2706
2707                if (pkt_cnt < free_cnt) {
2708                        if (i40e_xmit_cleanup(txq))
2709                                break;
2710
2711                        nb_tx_to_clean = txq->nb_tx_free - nb_tx_free_last;
2712                        nb_tx_free_last = txq->nb_tx_free;
2713                }
2714        }
2715
2716        return (int)pkt_cnt;
2717}
2718
2719static int
2720i40e_tx_done_cleanup_simple(struct i40e_tx_queue *txq,
2721                        uint32_t free_cnt)
2722{
2723        int i, n, cnt;
2724
2725        if (free_cnt == 0 || free_cnt > txq->nb_tx_desc)
2726                free_cnt = txq->nb_tx_desc;
2727
2728        cnt = free_cnt - free_cnt % txq->tx_rs_thresh;
2729
2730        for (i = 0; i < cnt; i += n) {
2731                if (txq->nb_tx_desc - txq->nb_tx_free < txq->tx_rs_thresh)
2732                        break;
2733
2734                n = i40e_tx_free_bufs(txq);
2735
2736                if (n == 0)
2737                        break;
2738        }
2739
2740        return i;
2741}
2742
2743static int
2744i40e_tx_done_cleanup_vec(struct i40e_tx_queue *txq __rte_unused,
2745                        uint32_t free_cnt __rte_unused)
2746{
2747        return -ENOTSUP;
2748}
2749int
2750i40e_tx_done_cleanup(void *txq, uint32_t free_cnt)
2751{
2752        struct i40e_tx_queue *q = (struct i40e_tx_queue *)txq;
2753        struct rte_eth_dev *dev = &rte_eth_devices[q->port_id];
2754        struct i40e_adapter *ad =
2755                I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
2756
2757        if (ad->tx_simple_allowed) {
2758                if (ad->tx_vec_allowed)
2759                        return i40e_tx_done_cleanup_vec(q, free_cnt);
2760                else
2761                        return i40e_tx_done_cleanup_simple(q, free_cnt);
2762        } else {
2763                return i40e_tx_done_cleanup_full(q, free_cnt);
2764        }
2765}
2766
2767void
2768i40e_reset_tx_queue(struct i40e_tx_queue *txq)
2769{
2770        struct i40e_tx_entry *txe;
2771        uint16_t i, prev, size;
2772
2773        if (!txq) {
2774                PMD_DRV_LOG(DEBUG, "Pointer to txq is NULL");
2775                return;
2776        }
2777
2778        txe = txq->sw_ring;
2779        size = sizeof(struct i40e_tx_desc) * txq->nb_tx_desc;
2780        for (i = 0; i < size; i++)
2781                ((volatile char *)txq->tx_ring)[i] = 0;
2782
2783        prev = (uint16_t)(txq->nb_tx_desc - 1);
2784        for (i = 0; i < txq->nb_tx_desc; i++) {
2785                volatile struct i40e_tx_desc *txd = &txq->tx_ring[i];
2786
2787                txd->cmd_type_offset_bsz =
2788                        rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE);
2789                txe[i].mbuf =  NULL;
2790                txe[i].last_id = i;
2791                txe[prev].next_id = i;
2792                prev = i;
2793        }
2794
2795        txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
2796        txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
2797
2798        txq->tx_tail = 0;
2799        txq->nb_tx_used = 0;
2800
2801        txq->last_desc_cleaned = (uint16_t)(txq->nb_tx_desc - 1);
2802        txq->nb_tx_free = (uint16_t)(txq->nb_tx_desc - 1);
2803}
2804
2805/* Init the TX queue in hardware */
2806int
2807i40e_tx_queue_init(struct i40e_tx_queue *txq)
2808{
2809        enum i40e_status_code err = I40E_SUCCESS;
2810        struct i40e_vsi *vsi = txq->vsi;
2811        struct i40e_hw *hw = I40E_VSI_TO_HW(vsi);
2812        uint16_t pf_q = txq->reg_idx;
2813        struct i40e_hmc_obj_txq tx_ctx;
2814        uint32_t qtx_ctl;
2815
2816        /* clear the context structure first */
2817        memset(&tx_ctx, 0, sizeof(tx_ctx));
2818        tx_ctx.new_context = 1;
2819        tx_ctx.base = txq->tx_ring_phys_addr / I40E_QUEUE_BASE_ADDR_UNIT;
2820        tx_ctx.qlen = txq->nb_tx_desc;
2821
2822#ifdef RTE_LIBRTE_IEEE1588
2823        tx_ctx.timesync_ena = 1;
2824#endif
2825        tx_ctx.rdylist = rte_le_to_cpu_16(vsi->info.qs_handle[txq->dcb_tc]);
2826        if (vsi->type == I40E_VSI_FDIR)
2827                tx_ctx.fd_ena = TRUE;
2828
2829        err = i40e_clear_lan_tx_queue_context(hw, pf_q);
2830        if (err != I40E_SUCCESS) {
2831                PMD_DRV_LOG(ERR, "Failure of clean lan tx queue context");
2832                return err;
2833        }
2834
2835        err = i40e_set_lan_tx_queue_context(hw, pf_q, &tx_ctx);
2836        if (err != I40E_SUCCESS) {
2837                PMD_DRV_LOG(ERR, "Failure of set lan tx queue context");
2838                return err;
2839        }
2840
2841        /* Now associate this queue with this PCI function */
2842        qtx_ctl = I40E_QTX_CTL_PF_QUEUE;
2843        qtx_ctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) &
2844                                        I40E_QTX_CTL_PF_INDX_MASK);
2845        I40E_WRITE_REG(hw, I40E_QTX_CTL(pf_q), qtx_ctl);
2846        I40E_WRITE_FLUSH(hw);
2847
2848        txq->qtx_tail = hw->hw_addr + I40E_QTX_TAIL(pf_q);
2849
2850        return err;
2851}
2852
2853int
2854i40e_alloc_rx_queue_mbufs(struct i40e_rx_queue *rxq)
2855{
2856        struct i40e_rx_entry *rxe = rxq->sw_ring;
2857        uint64_t dma_addr;
2858        uint16_t i;
2859
2860        for (i = 0; i < rxq->nb_rx_desc; i++) {
2861                volatile union i40e_rx_desc *rxd;
2862                struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mp);
2863
2864                if (unlikely(!mbuf)) {
2865                        PMD_DRV_LOG(ERR, "Failed to allocate mbuf for RX");
2866                        return -ENOMEM;
2867                }
2868
2869                rte_mbuf_refcnt_set(mbuf, 1);
2870                mbuf->next = NULL;
2871                mbuf->data_off = RTE_PKTMBUF_HEADROOM;
2872                mbuf->nb_segs = 1;
2873                mbuf->port = rxq->port_id;
2874
2875                dma_addr =
2876                        rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
2877
2878                rxd = &rxq->rx_ring[i];
2879                rxd->read.pkt_addr = dma_addr;
2880                rxd->read.hdr_addr = 0;
2881#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
2882                rxd->read.rsvd1 = 0;
2883                rxd->read.rsvd2 = 0;
2884#endif /* RTE_LIBRTE_I40E_16BYTE_RX_DESC */
2885
2886                rxe[i].mbuf = mbuf;
2887        }
2888
2889        return 0;
2890}
2891
2892/*
2893 * Calculate the buffer length, and check the jumbo frame
2894 * and maximum packet length.
2895 */
2896static int
2897i40e_rx_queue_config(struct i40e_rx_queue *rxq)
2898{
2899        struct i40e_pf *pf = I40E_VSI_TO_PF(rxq->vsi);
2900        struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
2901        struct rte_eth_dev_data *data = pf->dev_data;
2902        uint16_t buf_size;
2903
2904        buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mp) -
2905                RTE_PKTMBUF_HEADROOM);
2906
2907        switch (pf->flags & (I40E_FLAG_HEADER_SPLIT_DISABLED |
2908                        I40E_FLAG_HEADER_SPLIT_ENABLED)) {
2909        case I40E_FLAG_HEADER_SPLIT_ENABLED: /* Not supported */
2910                rxq->rx_hdr_len = RTE_ALIGN(I40E_RXBUF_SZ_1024,
2911                                (1 << I40E_RXQ_CTX_HBUFF_SHIFT));
2912                rxq->rx_buf_len = RTE_ALIGN(I40E_RXBUF_SZ_2048,
2913                                (1 << I40E_RXQ_CTX_DBUFF_SHIFT));
2914                rxq->hs_mode = i40e_header_split_enabled;
2915                break;
2916        case I40E_FLAG_HEADER_SPLIT_DISABLED:
2917        default:
2918                rxq->rx_hdr_len = 0;
2919                rxq->rx_buf_len = RTE_ALIGN_FLOOR(buf_size,
2920                        (1 << I40E_RXQ_CTX_DBUFF_SHIFT));
2921                rxq->hs_mode = i40e_header_split_none;
2922                break;
2923        }
2924
2925        rxq->max_pkt_len =
2926                RTE_MIN((uint32_t)(hw->func_caps.rx_buf_chain_len *
2927                        rxq->rx_buf_len), data->dev_conf.rxmode.max_rx_pkt_len);
2928        if (data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
2929                if (rxq->max_pkt_len <= I40E_ETH_MAX_LEN ||
2930                        rxq->max_pkt_len > I40E_FRAME_SIZE_MAX) {
2931                        PMD_DRV_LOG(ERR, "maximum packet length must "
2932                                    "be larger than %u and smaller than %u,"
2933                                    "as jumbo frame is enabled",
2934                                    (uint32_t)I40E_ETH_MAX_LEN,
2935                                    (uint32_t)I40E_FRAME_SIZE_MAX);
2936                        return I40E_ERR_CONFIG;
2937                }
2938        } else {
2939                if (rxq->max_pkt_len < RTE_ETHER_MIN_LEN ||
2940                        rxq->max_pkt_len > I40E_ETH_MAX_LEN) {
2941                        PMD_DRV_LOG(ERR, "maximum packet length must be "
2942                                    "larger than %u and smaller than %u, "
2943                                    "as jumbo frame is disabled",
2944                                    (uint32_t)RTE_ETHER_MIN_LEN,
2945                                    (uint32_t)I40E_ETH_MAX_LEN);
2946                        return I40E_ERR_CONFIG;
2947                }
2948        }
2949
2950        return 0;
2951}
2952
2953/* Init the RX queue in hardware */
2954int
2955i40e_rx_queue_init(struct i40e_rx_queue *rxq)
2956{
2957        int err = I40E_SUCCESS;
2958        struct i40e_hw *hw = I40E_VSI_TO_HW(rxq->vsi);
2959        struct rte_eth_dev_data *dev_data = I40E_VSI_TO_DEV_DATA(rxq->vsi);
2960        uint16_t pf_q = rxq->reg_idx;
2961        uint16_t buf_size;
2962        struct i40e_hmc_obj_rxq rx_ctx;
2963
2964        err = i40e_rx_queue_config(rxq);
2965        if (err < 0) {
2966                PMD_DRV_LOG(ERR, "Failed to config RX queue");
2967                return err;
2968        }
2969
2970        /* Clear the context structure first */
2971        memset(&rx_ctx, 0, sizeof(struct i40e_hmc_obj_rxq));
2972        rx_ctx.dbuff = rxq->rx_buf_len >> I40E_RXQ_CTX_DBUFF_SHIFT;
2973        rx_ctx.hbuff = rxq->rx_hdr_len >> I40E_RXQ_CTX_HBUFF_SHIFT;
2974
2975        rx_ctx.base = rxq->rx_ring_phys_addr / I40E_QUEUE_BASE_ADDR_UNIT;
2976        rx_ctx.qlen = rxq->nb_rx_desc;
2977#ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
2978        rx_ctx.dsize = 1;
2979#endif
2980        rx_ctx.dtype = rxq->hs_mode;
2981        if (rxq->hs_mode)
2982                rx_ctx.hsplit_0 = I40E_HEADER_SPLIT_ALL;
2983        else
2984                rx_ctx.hsplit_0 = I40E_HEADER_SPLIT_NONE;
2985        rx_ctx.rxmax = rxq->max_pkt_len;
2986        rx_ctx.tphrdesc_ena = 1;
2987        rx_ctx.tphwdesc_ena = 1;
2988        rx_ctx.tphdata_ena = 1;
2989        rx_ctx.tphhead_ena = 1;
2990        rx_ctx.lrxqthresh = 2;
2991        rx_ctx.crcstrip = (rxq->crc_len == 0) ? 1 : 0;
2992        rx_ctx.l2tsel = 1;
2993        /* showiv indicates if inner VLAN is stripped inside of tunnel
2994         * packet. When set it to 1, vlan information is stripped from
2995         * the inner header, but the hardware does not put it in the
2996         * descriptor. So set it zero by default.
2997         */
2998        rx_ctx.showiv = 0;
2999        rx_ctx.prefena = 1;
3000
3001        err = i40e_clear_lan_rx_queue_context(hw, pf_q);
3002        if (err != I40E_SUCCESS) {
3003                PMD_DRV_LOG(ERR, "Failed to clear LAN RX queue context");
3004                return err;
3005        }
3006        err = i40e_set_lan_rx_queue_context(hw, pf_q, &rx_ctx);
3007        if (err != I40E_SUCCESS) {
3008                PMD_DRV_LOG(ERR, "Failed to set LAN RX queue context");
3009                return err;
3010        }
3011
3012        rxq->qrx_tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
3013
3014        buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mp) -
3015                RTE_PKTMBUF_HEADROOM);
3016
3017        /* Check if scattered RX needs to be used. */
3018        if (rxq->max_pkt_len > buf_size)
3019                dev_data->scattered_rx = 1;
3020
3021        /* Init the RX tail regieter. */
3022        I40E_PCI_REG_WRITE(rxq->qrx_tail, rxq->nb_rx_desc - 1);
3023
3024        return 0;
3025}
3026
3027void
3028i40e_dev_clear_queues(struct rte_eth_dev *dev)
3029{
3030        uint16_t i;
3031
3032        PMD_INIT_FUNC_TRACE();
3033
3034        for (i = 0; i < dev->data->nb_tx_queues; i++) {
3035                if (!dev->data->tx_queues[i])
3036                        continue;
3037                i40e_tx_queue_release_mbufs(dev->data->tx_queues[i]);
3038                i40e_reset_tx_queue(dev->data->tx_queues[i]);
3039        }
3040
3041        for (i = 0; i < dev->data->nb_rx_queues; i++) {
3042                if (!dev->data->rx_queues[i])
3043                        continue;
3044                i40e_rx_queue_release_mbufs(dev->data->rx_queues[i]);
3045                i40e_reset_rx_queue(dev->data->rx_queues[i]);
3046        }
3047}
3048
3049void
3050i40e_dev_free_queues(struct rte_eth_dev *dev)
3051{
3052        uint16_t i;
3053
3054        PMD_INIT_FUNC_TRACE();
3055
3056        for (i = 0; i < dev->data->nb_rx_queues; i++) {
3057                if (!dev->data->rx_queues[i])
3058                        continue;
3059                i40e_dev_rx_queue_release(dev->data->rx_queues[i]);
3060                dev->data->rx_queues[i] = NULL;
3061                rte_eth_dma_zone_free(dev, "rx_ring", i);
3062        }
3063
3064        for (i = 0; i < dev->data->nb_tx_queues; i++) {
3065                if (!dev->data->tx_queues[i])
3066                        continue;
3067                i40e_dev_tx_queue_release(dev->data->tx_queues[i]);
3068                dev->data->tx_queues[i] = NULL;
3069                rte_eth_dma_zone_free(dev, "tx_ring", i);
3070        }
3071}
3072
3073enum i40e_status_code
3074i40e_fdir_setup_tx_resources(struct i40e_pf *pf)
3075{
3076        struct i40e_tx_queue *txq;
3077        const struct rte_memzone *tz = NULL;
3078        struct rte_eth_dev *dev;
3079        uint32_t ring_size;
3080
3081        if (!pf) {
3082                PMD_DRV_LOG(ERR, "PF is not available");
3083                return I40E_ERR_BAD_PTR;
3084        }
3085
3086        dev = &rte_eth_devices[pf->dev_data->port_id];
3087
3088        /* Allocate the TX queue data structure. */
3089        txq = rte_zmalloc_socket("i40e fdir tx queue",
3090                                  sizeof(struct i40e_tx_queue),
3091                                  RTE_CACHE_LINE_SIZE,
3092                                  SOCKET_ID_ANY);
3093        if (!txq) {
3094                PMD_DRV_LOG(ERR, "Failed to allocate memory for "
3095                                        "tx queue structure.");
3096                return I40E_ERR_NO_MEMORY;
3097        }
3098
3099        /* Allocate TX hardware ring descriptors. */
3100        ring_size = sizeof(struct i40e_tx_desc) * I40E_FDIR_NUM_TX_DESC;
3101        ring_size = RTE_ALIGN(ring_size, I40E_DMA_MEM_ALIGN);
3102
3103        tz = rte_eth_dma_zone_reserve(dev, "fdir_tx_ring",
3104                                      I40E_FDIR_QUEUE_ID, ring_size,
3105                                      I40E_RING_BASE_ALIGN, SOCKET_ID_ANY);
3106        if (!tz) {
3107                i40e_dev_tx_queue_release(txq);
3108                PMD_DRV_LOG(ERR, "Failed to reserve DMA memory for TX.");
3109                return I40E_ERR_NO_MEMORY;
3110        }
3111
3112        txq->nb_tx_desc = I40E_FDIR_NUM_TX_DESC;
3113        txq->queue_id = I40E_FDIR_QUEUE_ID;
3114        txq->reg_idx = pf->fdir.fdir_vsi->base_queue;
3115        txq->vsi = pf->fdir.fdir_vsi;
3116
3117        txq->tx_ring_phys_addr = tz->iova;
3118        txq->tx_ring = (struct i40e_tx_desc *)tz->addr;
3119
3120        /*
3121         * don't need to allocate software ring and reset for the fdir
3122         * program queue just set the queue has been configured.
3123         */
3124        txq->q_set = TRUE;
3125        pf->fdir.txq = txq;
3126        pf->fdir.txq_available_buf_count = I40E_FDIR_PRG_PKT_CNT;
3127
3128        return I40E_SUCCESS;
3129}
3130
3131enum i40e_status_code
3132i40e_fdir_setup_rx_resources(struct i40e_pf *pf)
3133{
3134        struct i40e_rx_queue *rxq;
3135        const struct rte_memzone *rz = NULL;
3136        uint32_t ring_size;
3137        struct rte_eth_dev *dev;
3138
3139        if (!pf) {
3140                PMD_DRV_LOG(ERR, "PF is not available");
3141                return I40E_ERR_BAD_PTR;
3142        }
3143
3144        dev = &rte_eth_devices[pf->dev_data->port_id];
3145
3146        /* Allocate the RX queue data structure. */
3147        rxq = rte_zmalloc_socket("i40e fdir rx queue",
3148                                  sizeof(struct i40e_rx_queue),
3149                                  RTE_CACHE_LINE_SIZE,
3150                                  SOCKET_ID_ANY);
3151        if (!rxq) {
3152                PMD_DRV_LOG(ERR, "Failed to allocate memory for "
3153                                        "rx queue structure.");
3154                return I40E_ERR_NO_MEMORY;
3155        }
3156
3157        /* Allocate RX hardware ring descriptors. */
3158        ring_size = sizeof(union i40e_rx_desc) * I40E_FDIR_NUM_RX_DESC;
3159        ring_size = RTE_ALIGN(ring_size, I40E_DMA_MEM_ALIGN);
3160
3161        rz = rte_eth_dma_zone_reserve(dev, "fdir_rx_ring",
3162                                      I40E_FDIR_QUEUE_ID, ring_size,
3163                                      I40E_RING_BASE_ALIGN, SOCKET_ID_ANY);
3164        if (!rz) {
3165                i40e_dev_rx_queue_release(rxq);
3166                PMD_DRV_LOG(ERR, "Failed to reserve DMA memory for RX.");
3167                return I40E_ERR_NO_MEMORY;
3168        }
3169
3170        rxq->nb_rx_desc = I40E_FDIR_NUM_RX_DESC;
3171        rxq->queue_id = I40E_FDIR_QUEUE_ID;
3172        rxq->reg_idx = pf->fdir.fdir_vsi->base_queue;
3173        rxq->vsi = pf->fdir.fdir_vsi;
3174
3175        rxq->rx_ring_phys_addr = rz->iova;
3176        memset(rz->addr, 0, I40E_FDIR_NUM_RX_DESC * sizeof(union i40e_rx_desc));
3177        rxq->rx_ring = (union i40e_rx_desc *)rz->addr;
3178
3179        /*
3180         * Don't need to allocate software ring and reset for the fdir
3181         * rx queue, just set the queue has been configured.
3182         */
3183        rxq->q_set = TRUE;
3184        pf->fdir.rxq = rxq;
3185
3186        return I40E_SUCCESS;
3187}
3188
3189void
3190i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
3191        struct rte_eth_rxq_info *qinfo)
3192{
3193        struct i40e_rx_queue *rxq;
3194
3195        rxq = dev->data->rx_queues[queue_id];
3196
3197        qinfo->mp = rxq->mp;
3198        qinfo->scattered_rx = dev->data->scattered_rx;
3199        qinfo->nb_desc = rxq->nb_rx_desc;
3200
3201        qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
3202        qinfo->conf.rx_drop_en = rxq->drop_en;
3203        qinfo->conf.rx_deferred_start = rxq->rx_deferred_start;
3204        qinfo->conf.offloads = rxq->offloads;
3205}
3206
3207void
3208i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
3209        struct rte_eth_txq_info *qinfo)
3210{
3211        struct i40e_tx_queue *txq;
3212
3213        txq = dev->data->tx_queues[queue_id];
3214
3215        qinfo->nb_desc = txq->nb_tx_desc;
3216
3217        qinfo->conf.tx_thresh.pthresh = txq->pthresh;
3218        qinfo->conf.tx_thresh.hthresh = txq->hthresh;
3219        qinfo->conf.tx_thresh.wthresh = txq->wthresh;
3220
3221        qinfo->conf.tx_free_thresh = txq->tx_free_thresh;
3222        qinfo->conf.tx_rs_thresh = txq->tx_rs_thresh;
3223        qinfo->conf.tx_deferred_start = txq->tx_deferred_start;
3224        qinfo->conf.offloads = txq->offloads;
3225}
3226
3227static inline bool
3228get_avx_supported(bool request_avx512)
3229{
3230#ifdef RTE_ARCH_X86
3231        if (request_avx512) {
3232                if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512 &&
3233                rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 &&
3234                rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1)
3235#ifdef CC_AVX512_SUPPORT
3236                        return true;
3237#else
3238                PMD_DRV_LOG(NOTICE,
3239                        "AVX512 is not supported in build env");
3240                return false;
3241#endif
3242        } else {
3243                if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256 &&
3244                rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 &&
3245                rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
3246#ifdef CC_AVX2_SUPPORT
3247                        return true;
3248#else
3249                PMD_DRV_LOG(NOTICE,
3250                        "AVX2 is not supported in build env");
3251                return false;
3252#endif
3253        }
3254#else
3255        RTE_SET_USED(request_avx512);
3256#endif /* RTE_ARCH_X86 */
3257
3258        return false;
3259}
3260
3261
3262void __rte_cold
3263i40e_set_rx_function(struct rte_eth_dev *dev)
3264{
3265        struct i40e_adapter *ad =
3266                I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
3267        uint16_t rx_using_sse, i;
3268        /* In order to allow Vector Rx there are a few configuration
3269         * conditions to be met and Rx Bulk Allocation should be allowed.
3270         */
3271        if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
3272#ifdef RTE_ARCH_X86
3273                ad->rx_use_avx512 = false;
3274                ad->rx_use_avx2 = false;
3275#endif
3276                if (i40e_rx_vec_dev_conf_condition_check(dev) ||
3277                    !ad->rx_bulk_alloc_allowed) {
3278                        PMD_INIT_LOG(DEBUG, "Port[%d] doesn't meet"
3279                                     " Vector Rx preconditions",
3280                                     dev->data->port_id);
3281
3282                        ad->rx_vec_allowed = false;
3283                }
3284                if (ad->rx_vec_allowed) {
3285                        for (i = 0; i < dev->data->nb_rx_queues; i++) {
3286                                struct i40e_rx_queue *rxq =
3287                                        dev->data->rx_queues[i];
3288
3289                                if (rxq && i40e_rxq_vec_setup(rxq)) {
3290                                        ad->rx_vec_allowed = false;
3291                                        break;
3292                                }
3293                        }
3294#ifdef RTE_ARCH_X86
3295                        ad->rx_use_avx512 = get_avx_supported(1);
3296
3297                        if (!ad->rx_use_avx512)
3298                                ad->rx_use_avx2 = get_avx_supported(0);
3299#endif
3300                }
3301        }
3302
3303        if (ad->rx_vec_allowed  &&
3304            rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
3305#ifdef RTE_ARCH_X86
3306                if (dev->data->scattered_rx) {
3307                        if (ad->rx_use_avx512) {
3308#ifdef CC_AVX512_SUPPORT
3309                                PMD_DRV_LOG(NOTICE,
3310                                        "Using AVX512 Vector Scattered Rx (port %d).",
3311                                        dev->data->port_id);
3312                                dev->rx_pkt_burst =
3313                                        i40e_recv_scattered_pkts_vec_avx512;
3314#endif
3315                        } else {
3316                                PMD_INIT_LOG(DEBUG,
3317                                        "Using %sVector Scattered Rx (port %d).",
3318                                        ad->rx_use_avx2 ? "avx2 " : "",
3319                                        dev->data->port_id);
3320                                dev->rx_pkt_burst = ad->rx_use_avx2 ?
3321                                        i40e_recv_scattered_pkts_vec_avx2 :
3322                                        i40e_recv_scattered_pkts_vec;
3323                        }
3324                } else {
3325                        if (ad->rx_use_avx512) {
3326#ifdef CC_AVX512_SUPPORT
3327                                PMD_DRV_LOG(NOTICE,
3328                                        "Using AVX512 Vector Rx (port %d).",
3329                                        dev->data->port_id);
3330                                dev->rx_pkt_burst =
3331                                        i40e_recv_pkts_vec_avx512;
3332#endif
3333                        } else {
3334                                PMD_INIT_LOG(DEBUG,
3335                                        "Using %sVector Rx (port %d).",
3336                                        ad->rx_use_avx2 ? "avx2 " : "",
3337                                        dev->data->port_id);
3338                                dev->rx_pkt_burst = ad->rx_use_avx2 ?
3339                                        i40e_recv_pkts_vec_avx2 :
3340                                        i40e_recv_pkts_vec;
3341                        }
3342                }
3343#else /* RTE_ARCH_X86 */
3344                if (dev->data->scattered_rx) {
3345                        PMD_INIT_LOG(DEBUG,
3346                                     "Using Vector Scattered Rx (port %d).",
3347                                     dev->data->port_id);
3348                        dev->rx_pkt_burst = i40e_recv_scattered_pkts_vec;
3349                } else {
3350                        PMD_INIT_LOG(DEBUG, "Using Vector Rx (port %d).",
3351                                     dev->data->port_id);
3352                        dev->rx_pkt_burst = i40e_recv_pkts_vec;
3353                }
3354#endif /* RTE_ARCH_X86 */
3355        } else if (!dev->data->scattered_rx && ad->rx_bulk_alloc_allowed) {
3356                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
3357                                    "satisfied. Rx Burst Bulk Alloc function "
3358                                    "will be used on port=%d.",
3359                             dev->data->port_id);
3360
3361                dev->rx_pkt_burst = i40e_recv_pkts_bulk_alloc;
3362        } else {
3363                /* Simple Rx Path. */
3364                PMD_INIT_LOG(DEBUG, "Simple Rx path will be used on port=%d.",
3365                             dev->data->port_id);
3366                dev->rx_pkt_burst = dev->data->scattered_rx ?
3367                                        i40e_recv_scattered_pkts :
3368                                        i40e_recv_pkts;
3369        }
3370
3371        /* Propagate information about RX function choice through all queues. */
3372        if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
3373                rx_using_sse =
3374                        (dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec ||
3375                         dev->rx_pkt_burst == i40e_recv_pkts_vec ||
3376#ifdef CC_AVX512_SUPPORT
3377                         dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec_avx512 ||
3378                         dev->rx_pkt_burst == i40e_recv_pkts_vec_avx512 ||
3379#endif
3380                         dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec_avx2 ||
3381                         dev->rx_pkt_burst == i40e_recv_pkts_vec_avx2);
3382
3383                for (i = 0; i < dev->data->nb_rx_queues; i++) {
3384                        struct i40e_rx_queue *rxq = dev->data->rx_queues[i];
3385
3386                        if (rxq)
3387                                rxq->rx_using_sse = rx_using_sse;
3388                }
3389        }
3390}
3391
3392static const struct {
3393        eth_rx_burst_t pkt_burst;
3394        const char *info;
3395} i40e_rx_burst_infos[] = {
3396        { i40e_recv_scattered_pkts,          "Scalar Scattered" },
3397        { i40e_recv_pkts_bulk_alloc,         "Scalar Bulk Alloc" },
3398        { i40e_recv_pkts,                    "Scalar" },
3399#ifdef RTE_ARCH_X86
3400#ifdef CC_AVX512_SUPPORT
3401        { i40e_recv_scattered_pkts_vec_avx512, "Vector AVX512 Scattered" },
3402        { i40e_recv_pkts_vec_avx512,           "Vector AVX512" },
3403#endif
3404        { i40e_recv_scattered_pkts_vec_avx2, "Vector AVX2 Scattered" },
3405        { i40e_recv_pkts_vec_avx2,           "Vector AVX2" },
3406        { i40e_recv_scattered_pkts_vec,      "Vector SSE Scattered" },
3407        { i40e_recv_pkts_vec,                "Vector SSE" },
3408#elif defined(RTE_ARCH_ARM64)
3409        { i40e_recv_scattered_pkts_vec,      "Vector Neon Scattered" },
3410        { i40e_recv_pkts_vec,                "Vector Neon" },
3411#elif defined(RTE_ARCH_PPC_64)
3412        { i40e_recv_scattered_pkts_vec,      "Vector AltiVec Scattered" },
3413        { i40e_recv_pkts_vec,                "Vector AltiVec" },
3414#endif
3415};
3416
3417int
3418i40e_rx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,
3419                       struct rte_eth_burst_mode *mode)
3420{
3421        eth_rx_burst_t pkt_burst = dev->rx_pkt_burst;
3422        int ret = -EINVAL;
3423        unsigned int i;
3424
3425        for (i = 0; i < RTE_DIM(i40e_rx_burst_infos); ++i) {
3426                if (pkt_burst == i40e_rx_burst_infos[i].pkt_burst) {
3427                        snprintf(mode->info, sizeof(mode->info), "%s",
3428                                 i40e_rx_burst_infos[i].info);
3429                        ret = 0;
3430                        break;
3431                }
3432        }
3433
3434        return ret;
3435}
3436
3437void __rte_cold
3438i40e_set_tx_function_flag(struct rte_eth_dev *dev, struct i40e_tx_queue *txq)
3439{
3440        struct i40e_adapter *ad =
3441                I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
3442
3443        /* Use a simple Tx queue if possible (only fast free is allowed) */
3444        ad->tx_simple_allowed =
3445                (txq->offloads ==
3446                 (txq->offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE) &&
3447                 txq->tx_rs_thresh >= RTE_PMD_I40E_TX_MAX_BURST);
3448        ad->tx_vec_allowed = (ad->tx_simple_allowed &&
3449                        txq->tx_rs_thresh <= RTE_I40E_TX_MAX_FREE_BUF_SZ);
3450
3451        if (ad->tx_vec_allowed)
3452                PMD_INIT_LOG(DEBUG, "Vector Tx can be enabled on Tx queue %u.",
3453                                txq->queue_id);
3454        else if (ad->tx_simple_allowed)
3455                PMD_INIT_LOG(DEBUG, "Simple Tx can be enabled on Tx queue %u.",
3456                                txq->queue_id);
3457        else
3458                PMD_INIT_LOG(DEBUG,
3459                                "Neither simple nor vector Tx enabled on Tx queue %u\n",
3460                                txq->queue_id);
3461}
3462
3463void __rte_cold
3464i40e_set_tx_function(struct rte_eth_dev *dev)
3465{
3466        struct i40e_adapter *ad =
3467                I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
3468        int i;
3469
3470        if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
3471#ifdef RTE_ARCH_X86
3472                ad->tx_use_avx2 = false;
3473                ad->tx_use_avx512 = false;
3474#endif
3475                if (ad->tx_vec_allowed) {
3476                        for (i = 0; i < dev->data->nb_tx_queues; i++) {
3477                                struct i40e_tx_queue *txq =
3478                                        dev->data->tx_queues[i];
3479
3480                                if (txq && i40e_txq_vec_setup(txq)) {
3481                                        ad->tx_vec_allowed = false;
3482                                        break;
3483                                }
3484                        }
3485#ifdef RTE_ARCH_X86
3486                        ad->tx_use_avx512 = get_avx_supported(1);
3487
3488                        if (!ad->tx_use_avx512)
3489                                ad->tx_use_avx2 = get_avx_supported(0);
3490#endif
3491                }
3492        }
3493
3494        if (ad->tx_simple_allowed) {
3495                if (ad->tx_vec_allowed &&
3496                    rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
3497#ifdef RTE_ARCH_X86
3498                        if (ad->tx_use_avx512) {
3499#ifdef CC_AVX512_SUPPORT
3500                                PMD_DRV_LOG(NOTICE, "Using AVX512 Vector Tx (port %d).",
3501                                            dev->data->port_id);
3502                                dev->tx_pkt_burst = i40e_xmit_pkts_vec_avx512;
3503#endif
3504                        } else {
3505                                PMD_INIT_LOG(DEBUG, "Using %sVector Tx (port %d).",
3506                                             ad->tx_use_avx2 ? "avx2 " : "",
3507                                             dev->data->port_id);
3508                                dev->tx_pkt_burst = ad->tx_use_avx2 ?
3509                                                    i40e_xmit_pkts_vec_avx2 :
3510                                                    i40e_xmit_pkts_vec;
3511                        }
3512#else /* RTE_ARCH_X86 */
3513                        PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
3514                                     dev->data->port_id);
3515                        dev->tx_pkt_burst = i40e_xmit_pkts_vec;
3516#endif /* RTE_ARCH_X86 */
3517                } else {
3518                        PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
3519                        dev->tx_pkt_burst = i40e_xmit_pkts_simple;
3520                }
3521                dev->tx_pkt_prepare = i40e_simple_prep_pkts;
3522        } else {
3523                PMD_INIT_LOG(DEBUG, "Xmit tx finally be used.");
3524                dev->tx_pkt_burst = i40e_xmit_pkts;
3525                dev->tx_pkt_prepare = i40e_prep_pkts;
3526        }
3527}
3528
3529static const struct {
3530        eth_tx_burst_t pkt_burst;
3531        const char *info;
3532} i40e_tx_burst_infos[] = {
3533        { i40e_xmit_pkts_simple,   "Scalar Simple" },
3534        { i40e_xmit_pkts,          "Scalar" },
3535#ifdef RTE_ARCH_X86
3536#ifdef CC_AVX512_SUPPORT
3537        { i40e_xmit_pkts_vec_avx512, "Vector AVX512" },
3538#endif
3539        { i40e_xmit_pkts_vec_avx2, "Vector AVX2" },
3540        { i40e_xmit_pkts_vec,      "Vector SSE" },
3541#elif defined(RTE_ARCH_ARM64)
3542        { i40e_xmit_pkts_vec,      "Vector Neon" },
3543#elif defined(RTE_ARCH_PPC_64)
3544        { i40e_xmit_pkts_vec,      "Vector AltiVec" },
3545#endif
3546};
3547
3548int
3549i40e_tx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,
3550                       struct rte_eth_burst_mode *mode)
3551{
3552        eth_tx_burst_t pkt_burst = dev->tx_pkt_burst;
3553        int ret = -EINVAL;
3554        unsigned int i;
3555
3556        for (i = 0; i < RTE_DIM(i40e_tx_burst_infos); ++i) {
3557                if (pkt_burst == i40e_tx_burst_infos[i].pkt_burst) {
3558                        snprintf(mode->info, sizeof(mode->info), "%s",
3559                                 i40e_tx_burst_infos[i].info);
3560                        ret = 0;
3561                        break;
3562                }
3563        }
3564
3565        return ret;
3566}
3567
3568void __rte_cold
3569i40e_set_default_ptype_table(struct rte_eth_dev *dev)
3570{
3571        struct i40e_adapter *ad =
3572                I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
3573        int i;
3574
3575        for (i = 0; i < I40E_MAX_PKT_TYPE; i++)
3576                ad->ptype_tbl[i] = i40e_get_default_pkt_type(i);
3577}
3578
3579void __rte_cold
3580i40e_set_default_pctype_table(struct rte_eth_dev *dev)
3581{
3582        struct i40e_adapter *ad =
3583                        I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
3584        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
3585        int i;
3586
3587        for (i = 0; i < I40E_FLOW_TYPE_MAX; i++)
3588                ad->pctypes_tbl[i] = 0ULL;
3589        ad->flow_types_mask = 0ULL;
3590        ad->pctypes_mask = 0ULL;
3591
3592        ad->pctypes_tbl[RTE_ETH_FLOW_FRAG_IPV4] =
3593                                (1ULL << I40E_FILTER_PCTYPE_FRAG_IPV4);
3594        ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_UDP] =
3595                                (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP);
3596        ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_TCP] =
3597                                (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP);
3598        ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_SCTP] =
3599                                (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP);
3600        ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_OTHER] =
3601                                (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER);
3602        ad->pctypes_tbl[RTE_ETH_FLOW_FRAG_IPV6] =
3603                                (1ULL << I40E_FILTER_PCTYPE_FRAG_IPV6);
3604        ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_UDP] =
3605                                (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP);
3606        ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_TCP] =
3607                                (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP);
3608        ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_SCTP] =
3609                                (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_SCTP);
3610        ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_OTHER] =
3611                                (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER);
3612        ad->pctypes_tbl[RTE_ETH_FLOW_L2_PAYLOAD] =
3613                                (1ULL << I40E_FILTER_PCTYPE_L2_PAYLOAD);
3614
3615        if (hw->mac.type == I40E_MAC_X722 ||
3616                hw->mac.type == I40E_MAC_X722_VF) {
3617                ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_UDP] |=
3618                        (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP);
3619                ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_UDP] |=
3620                        (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP);
3621                ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV4_TCP] |=
3622                        (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK);
3623                ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_UDP] |=
3624                        (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP);
3625                ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_UDP] |=
3626                        (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP);
3627                ad->pctypes_tbl[RTE_ETH_FLOW_NONFRAG_IPV6_TCP] |=
3628                        (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK);
3629        }
3630
3631        for (i = 0; i < I40E_FLOW_TYPE_MAX; i++) {
3632                if (ad->pctypes_tbl[i])
3633                        ad->flow_types_mask |= (1ULL << i);
3634                ad->pctypes_mask |= ad->pctypes_tbl[i];
3635        }
3636}
3637
3638#ifndef CC_AVX2_SUPPORT
3639uint16_t
3640i40e_recv_pkts_vec_avx2(void __rte_unused *rx_queue,
3641                        struct rte_mbuf __rte_unused **rx_pkts,
3642                        uint16_t __rte_unused nb_pkts)
3643{
3644        return 0;
3645}
3646
3647uint16_t
3648i40e_recv_scattered_pkts_vec_avx2(void __rte_unused *rx_queue,
3649                        struct rte_mbuf __rte_unused **rx_pkts,
3650                        uint16_t __rte_unused nb_pkts)
3651{
3652        return 0;
3653}
3654
3655uint16_t
3656i40e_xmit_pkts_vec_avx2(void __rte_unused * tx_queue,
3657                          struct rte_mbuf __rte_unused **tx_pkts,
3658                          uint16_t __rte_unused nb_pkts)
3659{
3660        return 0;
3661}
3662#endif /* ifndef CC_AVX2_SUPPORT */
3663