dpdk/drivers/net/e1000/igb_rxtx.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2016 Intel Corporation
   3 */
   4
   5#include <sys/queue.h>
   6
   7#include <stdio.h>
   8#include <stdlib.h>
   9#include <string.h>
  10#include <errno.h>
  11#include <stdint.h>
  12#include <stdarg.h>
  13#include <inttypes.h>
  14
  15#include <rte_interrupts.h>
  16#include <rte_byteorder.h>
  17#include <rte_common.h>
  18#include <rte_log.h>
  19#include <rte_debug.h>
  20#include <rte_pci.h>
  21#include <rte_memory.h>
  22#include <rte_memcpy.h>
  23#include <rte_memzone.h>
  24#include <rte_launch.h>
  25#include <rte_eal.h>
  26#include <rte_per_lcore.h>
  27#include <rte_lcore.h>
  28#include <rte_atomic.h>
  29#include <rte_branch_prediction.h>
  30#include <rte_mempool.h>
  31#include <rte_malloc.h>
  32#include <rte_mbuf.h>
  33#include <rte_ether.h>
  34#include <ethdev_driver.h>
  35#include <rte_prefetch.h>
  36#include <rte_udp.h>
  37#include <rte_tcp.h>
  38#include <rte_sctp.h>
  39#include <rte_net.h>
  40#include <rte_string_fns.h>
  41
  42#include "e1000_logs.h"
  43#include "base/e1000_api.h"
  44#include "e1000_ethdev.h"
  45
  46#ifdef RTE_LIBRTE_IEEE1588
  47#define IGB_TX_IEEE1588_TMST RTE_MBUF_F_TX_IEEE1588_TMST
  48#else
  49#define IGB_TX_IEEE1588_TMST 0
  50#endif
  51/* Bit Mask to indicate what bits required for building TX context */
  52#define IGB_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_OUTER_IPV6 |  \
  53                RTE_MBUF_F_TX_OUTER_IPV4 |       \
  54                RTE_MBUF_F_TX_IPV6 |             \
  55                RTE_MBUF_F_TX_IPV4 |             \
  56                RTE_MBUF_F_TX_VLAN |             \
  57                RTE_MBUF_F_TX_IP_CKSUM |                 \
  58                RTE_MBUF_F_TX_L4_MASK |          \
  59                RTE_MBUF_F_TX_TCP_SEG |          \
  60                IGB_TX_IEEE1588_TMST)
  61
  62#define IGB_TX_OFFLOAD_NOTSUP_MASK \
  63                (RTE_MBUF_F_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK)
  64
  65/**
  66 * Structure associated with each descriptor of the RX ring of a RX queue.
  67 */
  68struct igb_rx_entry {
  69        struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
  70};
  71
  72/**
  73 * Structure associated with each descriptor of the TX ring of a TX queue.
  74 */
  75struct igb_tx_entry {
  76        struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
  77        uint16_t next_id; /**< Index of next descriptor in ring. */
  78        uint16_t last_id; /**< Index of last scattered descriptor. */
  79};
  80
  81/**
  82 * rx queue flags
  83 */
  84enum igb_rxq_flags {
  85        IGB_RXQ_FLAG_LB_BSWAP_VLAN = 0x01,
  86};
  87
  88/**
  89 * Structure associated with each RX queue.
  90 */
  91struct igb_rx_queue {
  92        struct rte_mempool  *mb_pool;   /**< mbuf pool to populate RX ring. */
  93        volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
  94        uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
  95        volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
  96        volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
  97        struct igb_rx_entry *sw_ring;   /**< address of RX software ring. */
  98        struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
  99        struct rte_mbuf *pkt_last_seg;  /**< Last segment of current packet. */
 100        uint16_t            nb_rx_desc; /**< number of RX descriptors. */
 101        uint16_t            rx_tail;    /**< current value of RDT register. */
 102        uint16_t            nb_rx_hold; /**< number of held free RX desc. */
 103        uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
 104        uint16_t            queue_id;   /**< RX queue index. */
 105        uint16_t            reg_idx;    /**< RX queue register index. */
 106        uint16_t            port_id;    /**< Device port identifier. */
 107        uint8_t             pthresh;    /**< Prefetch threshold register. */
 108        uint8_t             hthresh;    /**< Host threshold register. */
 109        uint8_t             wthresh;    /**< Write-back threshold register. */
 110        uint8_t             crc_len;    /**< 0 if CRC stripped, 4 otherwise. */
 111        uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
 112        uint32_t            flags;      /**< RX flags. */
 113        uint64_t            offloads;   /**< offloads of RTE_ETH_RX_OFFLOAD_* */
 114        const struct rte_memzone *mz;
 115};
 116
 117/**
 118 * Hardware context number
 119 */
 120enum igb_advctx_num {
 121        IGB_CTX_0    = 0, /**< CTX0    */
 122        IGB_CTX_1    = 1, /**< CTX1    */
 123        IGB_CTX_NUM  = 2, /**< CTX_NUM */
 124};
 125
 126/** Offload features */
 127union igb_tx_offload {
 128        uint64_t data;
 129        struct {
 130                uint64_t l3_len:9; /**< L3 (IP) Header Length. */
 131                uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
 132                uint64_t vlan_tci:16;  /**< VLAN Tag Control Identifier(CPU order). */
 133                uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
 134                uint64_t tso_segsz:16; /**< TCP TSO segment size. */
 135
 136                /* uint64_t unused:8; */
 137        };
 138};
 139
 140/*
 141 * Compare mask for igb_tx_offload.data,
 142 * should be in sync with igb_tx_offload layout.
 143 * */
 144#define TX_MACIP_LEN_CMP_MASK   0x000000000000FFFFULL /**< L2L3 header mask. */
 145#define TX_VLAN_CMP_MASK                0x00000000FFFF0000ULL /**< Vlan mask. */
 146#define TX_TCP_LEN_CMP_MASK             0x000000FF00000000ULL /**< TCP header mask. */
 147#define TX_TSO_MSS_CMP_MASK             0x00FFFF0000000000ULL /**< TSO segsz mask. */
 148/** Mac + IP + TCP + Mss mask. */
 149#define TX_TSO_CMP_MASK \
 150        (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
 151
 152/**
 153 * Structure to check if new context need be built
 154 */
 155struct igb_advctx_info {
 156        uint64_t flags;           /**< ol_flags related to context build. */
 157        /** tx offload: vlan, tso, l2-l3-l4 lengths. */
 158        union igb_tx_offload tx_offload;
 159        /** compare mask for tx offload. */
 160        union igb_tx_offload tx_offload_mask;
 161};
 162
 163/**
 164 * Structure associated with each TX queue.
 165 */
 166struct igb_tx_queue {
 167        volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
 168        uint64_t               tx_ring_phys_addr; /**< TX ring DMA address. */
 169        struct igb_tx_entry    *sw_ring; /**< virtual address of SW ring. */
 170        volatile uint32_t      *tdt_reg_addr; /**< Address of TDT register. */
 171        uint32_t               txd_type;      /**< Device-specific TXD type */
 172        uint16_t               nb_tx_desc;    /**< number of TX descriptors. */
 173        uint16_t               tx_tail; /**< Current value of TDT register. */
 174        uint16_t               tx_head;
 175        /**< Index of first used TX descriptor. */
 176        uint16_t               queue_id; /**< TX queue index. */
 177        uint16_t               reg_idx;  /**< TX queue register index. */
 178        uint16_t               port_id;  /**< Device port identifier. */
 179        uint8_t                pthresh;  /**< Prefetch threshold register. */
 180        uint8_t                hthresh;  /**< Host threshold register. */
 181        uint8_t                wthresh;  /**< Write-back threshold register. */
 182        uint32_t               ctx_curr;
 183        /**< Current used hardware descriptor. */
 184        uint32_t               ctx_start;
 185        /**< Start context position for transmit queue. */
 186        struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
 187        /**< Hardware context history.*/
 188        uint64_t               offloads; /**< offloads of RTE_ETH_TX_OFFLOAD_* */
 189        const struct rte_memzone *mz;
 190};
 191
 192#if 1
 193#define RTE_PMD_USE_PREFETCH
 194#endif
 195
 196#ifdef RTE_PMD_USE_PREFETCH
 197#define rte_igb_prefetch(p)     rte_prefetch0(p)
 198#else
 199#define rte_igb_prefetch(p)     do {} while(0)
 200#endif
 201
 202#ifdef RTE_PMD_PACKET_PREFETCH
 203#define rte_packet_prefetch(p) rte_prefetch1(p)
 204#else
 205#define rte_packet_prefetch(p)  do {} while(0)
 206#endif
 207
 208/*
 209 * Macro for VMDq feature for 1 GbE NIC.
 210 */
 211#define E1000_VMOLR_SIZE                        (8)
 212#define IGB_TSO_MAX_HDRLEN                      (512)
 213#define IGB_TSO_MAX_MSS                         (9216)
 214
 215/*********************************************************************
 216 *
 217 *  TX function
 218 *
 219 **********************************************************************/
 220
 221/*
 222 *There're some limitations in hardware for TCP segmentation offload. We
 223 *should check whether the parameters are valid.
 224 */
 225static inline uint64_t
 226check_tso_para(uint64_t ol_req, union igb_tx_offload ol_para)
 227{
 228        if (!(ol_req & RTE_MBUF_F_TX_TCP_SEG))
 229                return ol_req;
 230        if ((ol_para.tso_segsz > IGB_TSO_MAX_MSS) || (ol_para.l2_len +
 231                        ol_para.l3_len + ol_para.l4_len > IGB_TSO_MAX_HDRLEN)) {
 232                ol_req &= ~RTE_MBUF_F_TX_TCP_SEG;
 233                ol_req |= RTE_MBUF_F_TX_TCP_CKSUM;
 234        }
 235        return ol_req;
 236}
 237
 238/*
 239 * Advanced context descriptor are almost same between igb/ixgbe
 240 * This is a separate function, looking for optimization opportunity here
 241 * Rework required to go with the pre-defined values.
 242 */
 243
 244static inline void
 245igbe_set_xmit_ctx(struct igb_tx_queue* txq,
 246                volatile struct e1000_adv_tx_context_desc *ctx_txd,
 247                uint64_t ol_flags, union igb_tx_offload tx_offload)
 248{
 249        uint32_t type_tucmd_mlhl;
 250        uint32_t mss_l4len_idx;
 251        uint32_t ctx_idx, ctx_curr;
 252        uint32_t vlan_macip_lens;
 253        union igb_tx_offload tx_offload_mask;
 254
 255        ctx_curr = txq->ctx_curr;
 256        ctx_idx = ctx_curr + txq->ctx_start;
 257
 258        tx_offload_mask.data = 0;
 259        type_tucmd_mlhl = 0;
 260
 261        /* Specify which HW CTX to upload. */
 262        mss_l4len_idx = (ctx_idx << E1000_ADVTXD_IDX_SHIFT);
 263
 264        if (ol_flags & RTE_MBUF_F_TX_VLAN)
 265                tx_offload_mask.data |= TX_VLAN_CMP_MASK;
 266
 267        /* check if TCP segmentation required for this packet */
 268        if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
 269                /* implies IP cksum in IPv4 */
 270                if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
 271                        type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4 |
 272                                E1000_ADVTXD_TUCMD_L4T_TCP |
 273                                E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
 274                else
 275                        type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV6 |
 276                                E1000_ADVTXD_TUCMD_L4T_TCP |
 277                                E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
 278
 279                tx_offload_mask.data |= TX_TSO_CMP_MASK;
 280                mss_l4len_idx |= tx_offload.tso_segsz << E1000_ADVTXD_MSS_SHIFT;
 281                mss_l4len_idx |= tx_offload.l4_len << E1000_ADVTXD_L4LEN_SHIFT;
 282        } else { /* no TSO, check if hardware checksum is needed */
 283                if (ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK))
 284                        tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK;
 285
 286                if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
 287                        type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4;
 288
 289                switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) {
 290                case RTE_MBUF_F_TX_UDP_CKSUM:
 291                        type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP |
 292                                E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
 293                        mss_l4len_idx |= sizeof(struct rte_udp_hdr)
 294                                << E1000_ADVTXD_L4LEN_SHIFT;
 295                        break;
 296                case RTE_MBUF_F_TX_TCP_CKSUM:
 297                        type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP |
 298                                E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
 299                        mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
 300                                << E1000_ADVTXD_L4LEN_SHIFT;
 301                        break;
 302                case RTE_MBUF_F_TX_SCTP_CKSUM:
 303                        type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP |
 304                                E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
 305                        mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
 306                                << E1000_ADVTXD_L4LEN_SHIFT;
 307                        break;
 308                default:
 309                        type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_RSV |
 310                                E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
 311                        break;
 312                }
 313        }
 314
 315        txq->ctx_cache[ctx_curr].flags = ol_flags;
 316        txq->ctx_cache[ctx_curr].tx_offload.data =
 317                tx_offload_mask.data & tx_offload.data;
 318        txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
 319
 320        ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
 321        vlan_macip_lens = (uint32_t)tx_offload.data;
 322        ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
 323        ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
 324        ctx_txd->u.seqnum_seed = 0;
 325}
 326
 327/*
 328 * Check which hardware context can be used. Use the existing match
 329 * or create a new context descriptor.
 330 */
 331static inline uint32_t
 332what_advctx_update(struct igb_tx_queue *txq, uint64_t flags,
 333                union igb_tx_offload tx_offload)
 334{
 335        /* If match with the current context */
 336        if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
 337                (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
 338                (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
 339                        return txq->ctx_curr;
 340        }
 341
 342        /* If match with the second context */
 343        txq->ctx_curr ^= 1;
 344        if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
 345                (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
 346                (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
 347                        return txq->ctx_curr;
 348        }
 349
 350        /* Mismatch, use the previous context */
 351        return IGB_CTX_NUM;
 352}
 353
 354static inline uint32_t
 355tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
 356{
 357        static const uint32_t l4_olinfo[2] = {0, E1000_ADVTXD_POPTS_TXSM};
 358        static const uint32_t l3_olinfo[2] = {0, E1000_ADVTXD_POPTS_IXSM};
 359        uint32_t tmp;
 360
 361        tmp  = l4_olinfo[(ol_flags & RTE_MBUF_F_TX_L4_MASK)  != RTE_MBUF_F_TX_L4_NO_CKSUM];
 362        tmp |= l3_olinfo[(ol_flags & RTE_MBUF_F_TX_IP_CKSUM) != 0];
 363        tmp |= l4_olinfo[(ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0];
 364        return tmp;
 365}
 366
 367static inline uint32_t
 368tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
 369{
 370        uint32_t cmdtype;
 371        static uint32_t vlan_cmd[2] = {0, E1000_ADVTXD_DCMD_VLE};
 372        static uint32_t tso_cmd[2] = {0, E1000_ADVTXD_DCMD_TSE};
 373        cmdtype = vlan_cmd[(ol_flags & RTE_MBUF_F_TX_VLAN) != 0];
 374        cmdtype |= tso_cmd[(ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0];
 375        return cmdtype;
 376}
 377
 378uint16_t
 379eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 380               uint16_t nb_pkts)
 381{
 382        struct igb_tx_queue *txq;
 383        struct igb_tx_entry *sw_ring;
 384        struct igb_tx_entry *txe, *txn;
 385        volatile union e1000_adv_tx_desc *txr;
 386        volatile union e1000_adv_tx_desc *txd;
 387        struct rte_mbuf     *tx_pkt;
 388        struct rte_mbuf     *m_seg;
 389        uint64_t buf_dma_addr;
 390        uint32_t olinfo_status;
 391        uint32_t cmd_type_len;
 392        uint32_t pkt_len;
 393        uint16_t slen;
 394        uint64_t ol_flags;
 395        uint16_t tx_end;
 396        uint16_t tx_id;
 397        uint16_t tx_last;
 398        uint16_t nb_tx;
 399        uint64_t tx_ol_req;
 400        uint32_t new_ctx = 0;
 401        uint32_t ctx = 0;
 402        union igb_tx_offload tx_offload = {0};
 403
 404        txq = tx_queue;
 405        sw_ring = txq->sw_ring;
 406        txr     = txq->tx_ring;
 407        tx_id   = txq->tx_tail;
 408        txe = &sw_ring[tx_id];
 409
 410        for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
 411                tx_pkt = *tx_pkts++;
 412                pkt_len = tx_pkt->pkt_len;
 413
 414                RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
 415
 416                /*
 417                 * The number of descriptors that must be allocated for a
 418                 * packet is the number of segments of that packet, plus 1
 419                 * Context Descriptor for the VLAN Tag Identifier, if any.
 420                 * Determine the last TX descriptor to allocate in the TX ring
 421                 * for the packet, starting from the current position (tx_id)
 422                 * in the ring.
 423                 */
 424                tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1);
 425
 426                ol_flags = tx_pkt->ol_flags;
 427                tx_ol_req = ol_flags & IGB_TX_OFFLOAD_MASK;
 428
 429                /* If a Context Descriptor need be built . */
 430                if (tx_ol_req) {
 431                        tx_offload.l2_len = tx_pkt->l2_len;
 432                        tx_offload.l3_len = tx_pkt->l3_len;
 433                        tx_offload.l4_len = tx_pkt->l4_len;
 434                        tx_offload.vlan_tci = tx_pkt->vlan_tci;
 435                        tx_offload.tso_segsz = tx_pkt->tso_segsz;
 436                        tx_ol_req = check_tso_para(tx_ol_req, tx_offload);
 437
 438                        ctx = what_advctx_update(txq, tx_ol_req, tx_offload);
 439                        /* Only allocate context descriptor if required*/
 440                        new_ctx = (ctx == IGB_CTX_NUM);
 441                        ctx = txq->ctx_curr + txq->ctx_start;
 442                        tx_last = (uint16_t) (tx_last + new_ctx);
 443                }
 444                if (tx_last >= txq->nb_tx_desc)
 445                        tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
 446
 447                PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
 448                           " tx_first=%u tx_last=%u",
 449                           (unsigned) txq->port_id,
 450                           (unsigned) txq->queue_id,
 451                           (unsigned) pkt_len,
 452                           (unsigned) tx_id,
 453                           (unsigned) tx_last);
 454
 455                /*
 456                 * Check if there are enough free descriptors in the TX ring
 457                 * to transmit the next packet.
 458                 * This operation is based on the two following rules:
 459                 *
 460                 *   1- Only check that the last needed TX descriptor can be
 461                 *      allocated (by construction, if that descriptor is free,
 462                 *      all intermediate ones are also free).
 463                 *
 464                 *      For this purpose, the index of the last TX descriptor
 465                 *      used for a packet (the "last descriptor" of a packet)
 466                 *      is recorded in the TX entries (the last one included)
 467                 *      that are associated with all TX descriptors allocated
 468                 *      for that packet.
 469                 *
 470                 *   2- Avoid to allocate the last free TX descriptor of the
 471                 *      ring, in order to never set the TDT register with the
 472                 *      same value stored in parallel by the NIC in the TDH
 473                 *      register, which makes the TX engine of the NIC enter
 474                 *      in a deadlock situation.
 475                 *
 476                 *      By extension, avoid to allocate a free descriptor that
 477                 *      belongs to the last set of free descriptors allocated
 478                 *      to the same packet previously transmitted.
 479                 */
 480
 481                /*
 482                 * The "last descriptor" of the previously sent packet, if any,
 483                 * which used the last descriptor to allocate.
 484                 */
 485                tx_end = sw_ring[tx_last].last_id;
 486
 487                /*
 488                 * The next descriptor following that "last descriptor" in the
 489                 * ring.
 490                 */
 491                tx_end = sw_ring[tx_end].next_id;
 492
 493                /*
 494                 * The "last descriptor" associated with that next descriptor.
 495                 */
 496                tx_end = sw_ring[tx_end].last_id;
 497
 498                /*
 499                 * Check that this descriptor is free.
 500                 */
 501                if (! (txr[tx_end].wb.status & E1000_TXD_STAT_DD)) {
 502                        if (nb_tx == 0)
 503                                return 0;
 504                        goto end_of_tx;
 505                }
 506
 507                /*
 508                 * Set common flags of all TX Data Descriptors.
 509                 *
 510                 * The following bits must be set in all Data Descriptors:
 511                 *   - E1000_ADVTXD_DTYP_DATA
 512                 *   - E1000_ADVTXD_DCMD_DEXT
 513                 *
 514                 * The following bits must be set in the first Data Descriptor
 515                 * and are ignored in the other ones:
 516                 *   - E1000_ADVTXD_DCMD_IFCS
 517                 *   - E1000_ADVTXD_MAC_1588
 518                 *   - E1000_ADVTXD_DCMD_VLE
 519                 *
 520                 * The following bits must only be set in the last Data
 521                 * Descriptor:
 522                 *   - E1000_TXD_CMD_EOP
 523                 *
 524                 * The following bits can be set in any Data Descriptor, but
 525                 * are only set in the last Data Descriptor:
 526                 *   - E1000_TXD_CMD_RS
 527                 */
 528                cmd_type_len = txq->txd_type |
 529                        E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
 530                if (tx_ol_req & RTE_MBUF_F_TX_TCP_SEG)
 531                        pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len + tx_pkt->l4_len);
 532                olinfo_status = (pkt_len << E1000_ADVTXD_PAYLEN_SHIFT);
 533#if defined(RTE_LIBRTE_IEEE1588)
 534                if (ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST)
 535                        cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
 536#endif
 537                if (tx_ol_req) {
 538                        /* Setup TX Advanced context descriptor if required */
 539                        if (new_ctx) {
 540                                volatile struct e1000_adv_tx_context_desc *
 541                                    ctx_txd;
 542
 543                                ctx_txd = (volatile struct
 544                                    e1000_adv_tx_context_desc *)
 545                                    &txr[tx_id];
 546
 547                                txn = &sw_ring[txe->next_id];
 548                                RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
 549
 550                                if (txe->mbuf != NULL) {
 551                                        rte_pktmbuf_free_seg(txe->mbuf);
 552                                        txe->mbuf = NULL;
 553                                }
 554
 555                                igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req, tx_offload);
 556
 557                                txe->last_id = tx_last;
 558                                tx_id = txe->next_id;
 559                                txe = txn;
 560                        }
 561
 562                        /* Setup the TX Advanced Data Descriptor */
 563                        cmd_type_len  |= tx_desc_vlan_flags_to_cmdtype(tx_ol_req);
 564                        olinfo_status |= tx_desc_cksum_flags_to_olinfo(tx_ol_req);
 565                        olinfo_status |= (ctx << E1000_ADVTXD_IDX_SHIFT);
 566                }
 567
 568                m_seg = tx_pkt;
 569                do {
 570                        txn = &sw_ring[txe->next_id];
 571                        txd = &txr[tx_id];
 572
 573                        if (txe->mbuf != NULL)
 574                                rte_pktmbuf_free_seg(txe->mbuf);
 575                        txe->mbuf = m_seg;
 576
 577                        /*
 578                         * Set up transmit descriptor.
 579                         */
 580                        slen = (uint16_t) m_seg->data_len;
 581                        buf_dma_addr = rte_mbuf_data_iova(m_seg);
 582                        txd->read.buffer_addr =
 583                                rte_cpu_to_le_64(buf_dma_addr);
 584                        txd->read.cmd_type_len =
 585                                rte_cpu_to_le_32(cmd_type_len | slen);
 586                        txd->read.olinfo_status =
 587                                rte_cpu_to_le_32(olinfo_status);
 588                        txe->last_id = tx_last;
 589                        tx_id = txe->next_id;
 590                        txe = txn;
 591                        m_seg = m_seg->next;
 592                } while (m_seg != NULL);
 593
 594                /*
 595                 * The last packet data descriptor needs End Of Packet (EOP)
 596                 * and Report Status (RS).
 597                 */
 598                txd->read.cmd_type_len |=
 599                        rte_cpu_to_le_32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
 600        }
 601 end_of_tx:
 602        rte_wmb();
 603
 604        /*
 605         * Set the Transmit Descriptor Tail (TDT).
 606         */
 607        E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
 608        PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
 609                   (unsigned) txq->port_id, (unsigned) txq->queue_id,
 610                   (unsigned) tx_id, (unsigned) nb_tx);
 611        txq->tx_tail = tx_id;
 612
 613        return nb_tx;
 614}
 615
 616/*********************************************************************
 617 *
 618 *  TX prep functions
 619 *
 620 **********************************************************************/
 621uint16_t
 622eth_igb_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
 623                uint16_t nb_pkts)
 624{
 625        int i, ret;
 626        struct rte_mbuf *m;
 627
 628        for (i = 0; i < nb_pkts; i++) {
 629                m = tx_pkts[i];
 630
 631                /* Check some limitations for TSO in hardware */
 632                if (m->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
 633                        if ((m->tso_segsz > IGB_TSO_MAX_MSS) ||
 634                                        (m->l2_len + m->l3_len + m->l4_len >
 635                                        IGB_TSO_MAX_HDRLEN)) {
 636                                rte_errno = EINVAL;
 637                                return i;
 638                        }
 639
 640                if (m->ol_flags & IGB_TX_OFFLOAD_NOTSUP_MASK) {
 641                        rte_errno = ENOTSUP;
 642                        return i;
 643                }
 644
 645#ifdef RTE_ETHDEV_DEBUG_TX
 646                ret = rte_validate_tx_offload(m);
 647                if (ret != 0) {
 648                        rte_errno = -ret;
 649                        return i;
 650                }
 651#endif
 652                ret = rte_net_intel_cksum_prepare(m);
 653                if (ret != 0) {
 654                        rte_errno = -ret;
 655                        return i;
 656                }
 657        }
 658
 659        return i;
 660}
 661
 662/*********************************************************************
 663 *
 664 *  RX functions
 665 *
 666 **********************************************************************/
 667#define IGB_PACKET_TYPE_IPV4              0X01
 668#define IGB_PACKET_TYPE_IPV4_TCP          0X11
 669#define IGB_PACKET_TYPE_IPV4_UDP          0X21
 670#define IGB_PACKET_TYPE_IPV4_SCTP         0X41
 671#define IGB_PACKET_TYPE_IPV4_EXT          0X03
 672#define IGB_PACKET_TYPE_IPV4_EXT_SCTP     0X43
 673#define IGB_PACKET_TYPE_IPV6              0X04
 674#define IGB_PACKET_TYPE_IPV6_TCP          0X14
 675#define IGB_PACKET_TYPE_IPV6_UDP          0X24
 676#define IGB_PACKET_TYPE_IPV6_EXT          0X0C
 677#define IGB_PACKET_TYPE_IPV6_EXT_TCP      0X1C
 678#define IGB_PACKET_TYPE_IPV6_EXT_UDP      0X2C
 679#define IGB_PACKET_TYPE_IPV4_IPV6         0X05
 680#define IGB_PACKET_TYPE_IPV4_IPV6_TCP     0X15
 681#define IGB_PACKET_TYPE_IPV4_IPV6_UDP     0X25
 682#define IGB_PACKET_TYPE_IPV4_IPV6_EXT     0X0D
 683#define IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
 684#define IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
 685#define IGB_PACKET_TYPE_MAX               0X80
 686#define IGB_PACKET_TYPE_MASK              0X7F
 687#define IGB_PACKET_TYPE_SHIFT             0X04
 688static inline uint32_t
 689igb_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
 690{
 691        static const uint32_t
 692                ptype_table[IGB_PACKET_TYPE_MAX] __rte_cache_aligned = {
 693                [IGB_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
 694                        RTE_PTYPE_L3_IPV4,
 695                [IGB_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
 696                        RTE_PTYPE_L3_IPV4_EXT,
 697                [IGB_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
 698                        RTE_PTYPE_L3_IPV6,
 699                [IGB_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
 700                        RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
 701                        RTE_PTYPE_INNER_L3_IPV6,
 702                [IGB_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
 703                        RTE_PTYPE_L3_IPV6_EXT,
 704                [IGB_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
 705                        RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
 706                        RTE_PTYPE_INNER_L3_IPV6_EXT,
 707                [IGB_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
 708                        RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
 709                [IGB_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
 710                        RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
 711                [IGB_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
 712                        RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
 713                        RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
 714                [IGB_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
 715                        RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
 716                [IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
 717                        RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
 718                        RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
 719                [IGB_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
 720                        RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
 721                [IGB_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
 722                        RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
 723                [IGB_PACKET_TYPE_IPV4_IPV6_UDP] =  RTE_PTYPE_L2_ETHER |
 724                        RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
 725                        RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
 726                [IGB_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
 727                        RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
 728                [IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
 729                        RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
 730                        RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
 731                [IGB_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
 732                        RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
 733                [IGB_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
 734                        RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
 735        };
 736        if (unlikely(pkt_info & E1000_RXDADV_PKTTYPE_ETQF))
 737                return RTE_PTYPE_UNKNOWN;
 738
 739        pkt_info = (pkt_info >> IGB_PACKET_TYPE_SHIFT) & IGB_PACKET_TYPE_MASK;
 740
 741        return ptype_table[pkt_info];
 742}
 743
 744static inline uint64_t
 745rx_desc_hlen_type_rss_to_pkt_flags(struct igb_rx_queue *rxq, uint32_t hl_tp_rs)
 746{
 747        uint64_t pkt_flags = ((hl_tp_rs & 0x0F) == 0) ?  0 : RTE_MBUF_F_RX_RSS_HASH;
 748
 749#if defined(RTE_LIBRTE_IEEE1588)
 750        static uint32_t ip_pkt_etqf_map[8] = {
 751                0, 0, 0, RTE_MBUF_F_RX_IEEE1588_PTP,
 752                0, 0, 0, 0,
 753        };
 754
 755        struct rte_eth_dev dev = rte_eth_devices[rxq->port_id];
 756        struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev.data->dev_private);
 757
 758        /* EtherType is in bits 8:10 in Packet Type, and not in the default 0:2 */
 759        if (hw->mac.type == e1000_i210)
 760                pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 12) & 0x07];
 761        else
 762                pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07];
 763#else
 764        RTE_SET_USED(rxq);
 765#endif
 766
 767        return pkt_flags;
 768}
 769
 770static inline uint64_t
 771rx_desc_status_to_pkt_flags(uint32_t rx_status)
 772{
 773        uint64_t pkt_flags;
 774
 775        /* Check if VLAN present */
 776        pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
 777                RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED : 0);
 778
 779#if defined(RTE_LIBRTE_IEEE1588)
 780        if (rx_status & E1000_RXD_STAT_TMST)
 781                pkt_flags = pkt_flags | RTE_MBUF_F_RX_IEEE1588_TMST;
 782#endif
 783        return pkt_flags;
 784}
 785
 786static inline uint64_t
 787rx_desc_error_to_pkt_flags(uint32_t rx_status)
 788{
 789        /*
 790         * Bit 30: IPE, IPv4 checksum error
 791         * Bit 29: L4I, L4I integrity error
 792         */
 793
 794        static uint64_t error_to_pkt_flags_map[4] = {
 795                RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD,
 796                RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD,
 797                RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_GOOD,
 798                RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD
 799        };
 800        return error_to_pkt_flags_map[(rx_status >>
 801                E1000_RXD_ERR_CKSUM_BIT) & E1000_RXD_ERR_CKSUM_MSK];
 802}
 803
 804uint16_t
 805eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 806               uint16_t nb_pkts)
 807{
 808        struct igb_rx_queue *rxq;
 809        volatile union e1000_adv_rx_desc *rx_ring;
 810        volatile union e1000_adv_rx_desc *rxdp;
 811        struct igb_rx_entry *sw_ring;
 812        struct igb_rx_entry *rxe;
 813        struct rte_mbuf *rxm;
 814        struct rte_mbuf *nmb;
 815        union e1000_adv_rx_desc rxd;
 816        uint64_t dma_addr;
 817        uint32_t staterr;
 818        uint32_t hlen_type_rss;
 819        uint16_t pkt_len;
 820        uint16_t rx_id;
 821        uint16_t nb_rx;
 822        uint16_t nb_hold;
 823        uint64_t pkt_flags;
 824
 825        nb_rx = 0;
 826        nb_hold = 0;
 827        rxq = rx_queue;
 828        rx_id = rxq->rx_tail;
 829        rx_ring = rxq->rx_ring;
 830        sw_ring = rxq->sw_ring;
 831        while (nb_rx < nb_pkts) {
 832                /*
 833                 * The order of operations here is important as the DD status
 834                 * bit must not be read after any other descriptor fields.
 835                 * rx_ring and rxdp are pointing to volatile data so the order
 836                 * of accesses cannot be reordered by the compiler. If they were
 837                 * not volatile, they could be reordered which could lead to
 838                 * using invalid descriptor fields when read from rxd.
 839                 */
 840                rxdp = &rx_ring[rx_id];
 841                staterr = rxdp->wb.upper.status_error;
 842                if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
 843                        break;
 844                rxd = *rxdp;
 845
 846                /*
 847                 * End of packet.
 848                 *
 849                 * If the E1000_RXD_STAT_EOP flag is not set, the RX packet is
 850                 * likely to be invalid and to be dropped by the various
 851                 * validation checks performed by the network stack.
 852                 *
 853                 * Allocate a new mbuf to replenish the RX ring descriptor.
 854                 * If the allocation fails:
 855                 *    - arrange for that RX descriptor to be the first one
 856                 *      being parsed the next time the receive function is
 857                 *      invoked [on the same queue].
 858                 *
 859                 *    - Stop parsing the RX ring and return immediately.
 860                 *
 861                 * This policy do not drop the packet received in the RX
 862                 * descriptor for which the allocation of a new mbuf failed.
 863                 * Thus, it allows that packet to be later retrieved if
 864                 * mbuf have been freed in the mean time.
 865                 * As a side effect, holding RX descriptors instead of
 866                 * systematically giving them back to the NIC may lead to
 867                 * RX ring exhaustion situations.
 868                 * However, the NIC can gracefully prevent such situations
 869                 * to happen by sending specific "back-pressure" flow control
 870                 * frames to its peer(s).
 871                 */
 872                PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
 873                           "staterr=0x%x pkt_len=%u",
 874                           (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
 875                           (unsigned) rx_id, (unsigned) staterr,
 876                           (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
 877
 878                nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
 879                if (nmb == NULL) {
 880                        PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
 881                                   "queue_id=%u", (unsigned) rxq->port_id,
 882                                   (unsigned) rxq->queue_id);
 883                        rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
 884                        break;
 885                }
 886
 887                nb_hold++;
 888                rxe = &sw_ring[rx_id];
 889                rx_id++;
 890                if (rx_id == rxq->nb_rx_desc)
 891                        rx_id = 0;
 892
 893                /* Prefetch next mbuf while processing current one. */
 894                rte_igb_prefetch(sw_ring[rx_id].mbuf);
 895
 896                /*
 897                 * When next RX descriptor is on a cache-line boundary,
 898                 * prefetch the next 4 RX descriptors and the next 8 pointers
 899                 * to mbufs.
 900                 */
 901                if ((rx_id & 0x3) == 0) {
 902                        rte_igb_prefetch(&rx_ring[rx_id]);
 903                        rte_igb_prefetch(&sw_ring[rx_id]);
 904                }
 905
 906                rxm = rxe->mbuf;
 907                rxe->mbuf = nmb;
 908                dma_addr =
 909                        rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
 910                rxdp->read.hdr_addr = 0;
 911                rxdp->read.pkt_addr = dma_addr;
 912
 913                /*
 914                 * Initialize the returned mbuf.
 915                 * 1) setup generic mbuf fields:
 916                 *    - number of segments,
 917                 *    - next segment,
 918                 *    - packet length,
 919                 *    - RX port identifier.
 920                 * 2) integrate hardware offload data, if any:
 921                 *    - RSS flag & hash,
 922                 *    - IP checksum flag,
 923                 *    - VLAN TCI, if any,
 924                 *    - error flags.
 925                 */
 926                pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
 927                                      rxq->crc_len);
 928                rxm->data_off = RTE_PKTMBUF_HEADROOM;
 929                rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
 930                rxm->nb_segs = 1;
 931                rxm->next = NULL;
 932                rxm->pkt_len = pkt_len;
 933                rxm->data_len = pkt_len;
 934                rxm->port = rxq->port_id;
 935
 936                rxm->hash.rss = rxd.wb.lower.hi_dword.rss;
 937                hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
 938
 939                /*
 940                 * The vlan_tci field is only valid when RTE_MBUF_F_RX_VLAN is
 941                 * set in the pkt_flags field and must be in CPU byte order.
 942                 */
 943                if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
 944                                (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
 945                        rxm->vlan_tci = rte_be_to_cpu_16(rxd.wb.upper.vlan);
 946                } else {
 947                        rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
 948                }
 949                pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
 950                pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
 951                pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
 952                rxm->ol_flags = pkt_flags;
 953                rxm->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.lower.
 954                                                lo_dword.hs_rss.pkt_info);
 955
 956                /*
 957                 * Store the mbuf address into the next entry of the array
 958                 * of returned packets.
 959                 */
 960                rx_pkts[nb_rx++] = rxm;
 961        }
 962        rxq->rx_tail = rx_id;
 963
 964        /*
 965         * If the number of free RX descriptors is greater than the RX free
 966         * threshold of the queue, advance the Receive Descriptor Tail (RDT)
 967         * register.
 968         * Update the RDT with the value of the last processed RX descriptor
 969         * minus 1, to guarantee that the RDT register is never equal to the
 970         * RDH register, which creates a "full" ring situation from the
 971         * hardware point of view...
 972         */
 973        nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
 974        if (nb_hold > rxq->rx_free_thresh) {
 975                PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
 976                           "nb_hold=%u nb_rx=%u",
 977                           (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
 978                           (unsigned) rx_id, (unsigned) nb_hold,
 979                           (unsigned) nb_rx);
 980                rx_id = (uint16_t) ((rx_id == 0) ?
 981                                     (rxq->nb_rx_desc - 1) : (rx_id - 1));
 982                E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
 983                nb_hold = 0;
 984        }
 985        rxq->nb_rx_hold = nb_hold;
 986        return nb_rx;
 987}
 988
 989uint16_t
 990eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 991                         uint16_t nb_pkts)
 992{
 993        struct igb_rx_queue *rxq;
 994        volatile union e1000_adv_rx_desc *rx_ring;
 995        volatile union e1000_adv_rx_desc *rxdp;
 996        struct igb_rx_entry *sw_ring;
 997        struct igb_rx_entry *rxe;
 998        struct rte_mbuf *first_seg;
 999        struct rte_mbuf *last_seg;
1000        struct rte_mbuf *rxm;
1001        struct rte_mbuf *nmb;
1002        union e1000_adv_rx_desc rxd;
1003        uint64_t dma; /* Physical address of mbuf data buffer */
1004        uint32_t staterr;
1005        uint32_t hlen_type_rss;
1006        uint16_t rx_id;
1007        uint16_t nb_rx;
1008        uint16_t nb_hold;
1009        uint16_t data_len;
1010        uint64_t pkt_flags;
1011
1012        nb_rx = 0;
1013        nb_hold = 0;
1014        rxq = rx_queue;
1015        rx_id = rxq->rx_tail;
1016        rx_ring = rxq->rx_ring;
1017        sw_ring = rxq->sw_ring;
1018
1019        /*
1020         * Retrieve RX context of current packet, if any.
1021         */
1022        first_seg = rxq->pkt_first_seg;
1023        last_seg = rxq->pkt_last_seg;
1024
1025        while (nb_rx < nb_pkts) {
1026        next_desc:
1027                /*
1028                 * The order of operations here is important as the DD status
1029                 * bit must not be read after any other descriptor fields.
1030                 * rx_ring and rxdp are pointing to volatile data so the order
1031                 * of accesses cannot be reordered by the compiler. If they were
1032                 * not volatile, they could be reordered which could lead to
1033                 * using invalid descriptor fields when read from rxd.
1034                 */
1035                rxdp = &rx_ring[rx_id];
1036                staterr = rxdp->wb.upper.status_error;
1037                if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
1038                        break;
1039                rxd = *rxdp;
1040
1041                /*
1042                 * Descriptor done.
1043                 *
1044                 * Allocate a new mbuf to replenish the RX ring descriptor.
1045                 * If the allocation fails:
1046                 *    - arrange for that RX descriptor to be the first one
1047                 *      being parsed the next time the receive function is
1048                 *      invoked [on the same queue].
1049                 *
1050                 *    - Stop parsing the RX ring and return immediately.
1051                 *
1052                 * This policy does not drop the packet received in the RX
1053                 * descriptor for which the allocation of a new mbuf failed.
1054                 * Thus, it allows that packet to be later retrieved if
1055                 * mbuf have been freed in the mean time.
1056                 * As a side effect, holding RX descriptors instead of
1057                 * systematically giving them back to the NIC may lead to
1058                 * RX ring exhaustion situations.
1059                 * However, the NIC can gracefully prevent such situations
1060                 * to happen by sending specific "back-pressure" flow control
1061                 * frames to its peer(s).
1062                 */
1063                PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1064                           "staterr=0x%x data_len=%u",
1065                           (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1066                           (unsigned) rx_id, (unsigned) staterr,
1067                           (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1068
1069                nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1070                if (nmb == NULL) {
1071                        PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1072                                   "queue_id=%u", (unsigned) rxq->port_id,
1073                                   (unsigned) rxq->queue_id);
1074                        rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1075                        break;
1076                }
1077
1078                nb_hold++;
1079                rxe = &sw_ring[rx_id];
1080                rx_id++;
1081                if (rx_id == rxq->nb_rx_desc)
1082                        rx_id = 0;
1083
1084                /* Prefetch next mbuf while processing current one. */
1085                rte_igb_prefetch(sw_ring[rx_id].mbuf);
1086
1087                /*
1088                 * When next RX descriptor is on a cache-line boundary,
1089                 * prefetch the next 4 RX descriptors and the next 8 pointers
1090                 * to mbufs.
1091                 */
1092                if ((rx_id & 0x3) == 0) {
1093                        rte_igb_prefetch(&rx_ring[rx_id]);
1094                        rte_igb_prefetch(&sw_ring[rx_id]);
1095                }
1096
1097                /*
1098                 * Update RX descriptor with the physical address of the new
1099                 * data buffer of the new allocated mbuf.
1100                 */
1101                rxm = rxe->mbuf;
1102                rxe->mbuf = nmb;
1103                dma = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1104                rxdp->read.pkt_addr = dma;
1105                rxdp->read.hdr_addr = 0;
1106
1107                /*
1108                 * Set data length & data buffer address of mbuf.
1109                 */
1110                data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1111                rxm->data_len = data_len;
1112                rxm->data_off = RTE_PKTMBUF_HEADROOM;
1113
1114                /*
1115                 * If this is the first buffer of the received packet,
1116                 * set the pointer to the first mbuf of the packet and
1117                 * initialize its context.
1118                 * Otherwise, update the total length and the number of segments
1119                 * of the current scattered packet, and update the pointer to
1120                 * the last mbuf of the current packet.
1121                 */
1122                if (first_seg == NULL) {
1123                        first_seg = rxm;
1124                        first_seg->pkt_len = data_len;
1125                        first_seg->nb_segs = 1;
1126                } else {
1127                        first_seg->pkt_len += data_len;
1128                        first_seg->nb_segs++;
1129                        last_seg->next = rxm;
1130                }
1131
1132                /*
1133                 * If this is not the last buffer of the received packet,
1134                 * update the pointer to the last mbuf of the current scattered
1135                 * packet and continue to parse the RX ring.
1136                 */
1137                if (! (staterr & E1000_RXD_STAT_EOP)) {
1138                        last_seg = rxm;
1139                        goto next_desc;
1140                }
1141
1142                /*
1143                 * This is the last buffer of the received packet.
1144                 * If the CRC is not stripped by the hardware:
1145                 *   - Subtract the CRC length from the total packet length.
1146                 *   - If the last buffer only contains the whole CRC or a part
1147                 *     of it, free the mbuf associated to the last buffer.
1148                 *     If part of the CRC is also contained in the previous
1149                 *     mbuf, subtract the length of that CRC part from the
1150                 *     data length of the previous mbuf.
1151                 */
1152                rxm->next = NULL;
1153                if (unlikely(rxq->crc_len > 0)) {
1154                        first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
1155                        if (data_len <= RTE_ETHER_CRC_LEN) {
1156                                rte_pktmbuf_free_seg(rxm);
1157                                first_seg->nb_segs--;
1158                                last_seg->data_len = (uint16_t)
1159                                        (last_seg->data_len -
1160                                         (RTE_ETHER_CRC_LEN - data_len));
1161                                last_seg->next = NULL;
1162                        } else
1163                                rxm->data_len = (uint16_t)
1164                                        (data_len - RTE_ETHER_CRC_LEN);
1165                }
1166
1167                /*
1168                 * Initialize the first mbuf of the returned packet:
1169                 *    - RX port identifier,
1170                 *    - hardware offload data, if any:
1171                 *      - RSS flag & hash,
1172                 *      - IP checksum flag,
1173                 *      - VLAN TCI, if any,
1174                 *      - error flags.
1175                 */
1176                first_seg->port = rxq->port_id;
1177                first_seg->hash.rss = rxd.wb.lower.hi_dword.rss;
1178
1179                /*
1180                 * The vlan_tci field is only valid when RTE_MBUF_F_RX_VLAN is
1181                 * set in the pkt_flags field and must be in CPU byte order.
1182                 */
1183                if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
1184                                (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
1185                        first_seg->vlan_tci =
1186                                rte_be_to_cpu_16(rxd.wb.upper.vlan);
1187                } else {
1188                        first_seg->vlan_tci =
1189                                rte_le_to_cpu_16(rxd.wb.upper.vlan);
1190                }
1191                hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1192                pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
1193                pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
1194                pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1195                first_seg->ol_flags = pkt_flags;
1196                first_seg->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.
1197                                        lower.lo_dword.hs_rss.pkt_info);
1198
1199                /* Prefetch data of first segment, if configured to do so. */
1200                rte_packet_prefetch((char *)first_seg->buf_addr +
1201                        first_seg->data_off);
1202
1203                /*
1204                 * Store the mbuf address into the next entry of the array
1205                 * of returned packets.
1206                 */
1207                rx_pkts[nb_rx++] = first_seg;
1208
1209                /*
1210                 * Setup receipt context for a new packet.
1211                 */
1212                first_seg = NULL;
1213        }
1214
1215        /*
1216         * Record index of the next RX descriptor to probe.
1217         */
1218        rxq->rx_tail = rx_id;
1219
1220        /*
1221         * Save receive context.
1222         */
1223        rxq->pkt_first_seg = first_seg;
1224        rxq->pkt_last_seg = last_seg;
1225
1226        /*
1227         * If the number of free RX descriptors is greater than the RX free
1228         * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1229         * register.
1230         * Update the RDT with the value of the last processed RX descriptor
1231         * minus 1, to guarantee that the RDT register is never equal to the
1232         * RDH register, which creates a "full" ring situation from the
1233         * hardware point of view...
1234         */
1235        nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1236        if (nb_hold > rxq->rx_free_thresh) {
1237                PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1238                           "nb_hold=%u nb_rx=%u",
1239                           (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1240                           (unsigned) rx_id, (unsigned) nb_hold,
1241                           (unsigned) nb_rx);
1242                rx_id = (uint16_t) ((rx_id == 0) ?
1243                                     (rxq->nb_rx_desc - 1) : (rx_id - 1));
1244                E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1245                nb_hold = 0;
1246        }
1247        rxq->nb_rx_hold = nb_hold;
1248        return nb_rx;
1249}
1250
1251/*
1252 * Maximum number of Ring Descriptors.
1253 *
1254 * Since RDLEN/TDLEN should be multiple of 128bytes, the number of ring
1255 * descriptors should meet the following condition:
1256 *      (num_ring_desc * sizeof(struct e1000_rx/tx_desc)) % 128 == 0
1257 */
1258
1259static void
1260igb_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1261{
1262        unsigned i;
1263
1264        if (txq->sw_ring != NULL) {
1265                for (i = 0; i < txq->nb_tx_desc; i++) {
1266                        if (txq->sw_ring[i].mbuf != NULL) {
1267                                rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1268                                txq->sw_ring[i].mbuf = NULL;
1269                        }
1270                }
1271        }
1272}
1273
1274static void
1275igb_tx_queue_release(struct igb_tx_queue *txq)
1276{
1277        if (txq != NULL) {
1278                igb_tx_queue_release_mbufs(txq);
1279                rte_free(txq->sw_ring);
1280                rte_memzone_free(txq->mz);
1281                rte_free(txq);
1282        }
1283}
1284
1285void
1286eth_igb_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
1287{
1288        igb_tx_queue_release(dev->data->tx_queues[qid]);
1289}
1290
1291static int
1292igb_tx_done_cleanup(struct igb_tx_queue *txq, uint32_t free_cnt)
1293{
1294        struct igb_tx_entry *sw_ring;
1295        volatile union e1000_adv_tx_desc *txr;
1296        uint16_t tx_first; /* First segment analyzed. */
1297        uint16_t tx_id;    /* Current segment being processed. */
1298        uint16_t tx_last;  /* Last segment in the current packet. */
1299        uint16_t tx_next;  /* First segment of the next packet. */
1300        int count = 0;
1301
1302        if (!txq)
1303                return -ENODEV;
1304
1305        sw_ring = txq->sw_ring;
1306        txr = txq->tx_ring;
1307
1308        /* tx_tail is the last sent packet on the sw_ring. Goto the end
1309         * of that packet (the last segment in the packet chain) and
1310         * then the next segment will be the start of the oldest segment
1311         * in the sw_ring. This is the first packet that will be
1312         * attempted to be freed.
1313         */
1314
1315        /* Get last segment in most recently added packet. */
1316        tx_first = sw_ring[txq->tx_tail].last_id;
1317
1318        /* Get the next segment, which is the oldest segment in ring. */
1319        tx_first = sw_ring[tx_first].next_id;
1320
1321        /* Set the current index to the first. */
1322        tx_id = tx_first;
1323
1324        /* Loop through each packet. For each packet, verify that an
1325         * mbuf exists and that the last segment is free. If so, free
1326         * it and move on.
1327         */
1328        while (1) {
1329                tx_last = sw_ring[tx_id].last_id;
1330
1331                if (sw_ring[tx_last].mbuf) {
1332                        if (txr[tx_last].wb.status &
1333                            E1000_TXD_STAT_DD) {
1334                                /* Increment the number of packets
1335                                 * freed.
1336                                 */
1337                                count++;
1338
1339                                /* Get the start of the next packet. */
1340                                tx_next = sw_ring[tx_last].next_id;
1341
1342                                /* Loop through all segments in a
1343                                 * packet.
1344                                 */
1345                                do {
1346                                        if (sw_ring[tx_id].mbuf) {
1347                                                rte_pktmbuf_free_seg(
1348                                                        sw_ring[tx_id].mbuf);
1349                                                sw_ring[tx_id].mbuf = NULL;
1350                                                sw_ring[tx_id].last_id = tx_id;
1351                                        }
1352
1353                                        /* Move to next segment. */
1354                                        tx_id = sw_ring[tx_id].next_id;
1355
1356                                } while (tx_id != tx_next);
1357
1358                                if (unlikely(count == (int)free_cnt))
1359                                        break;
1360                        } else {
1361                                /* mbuf still in use, nothing left to
1362                                 * free.
1363                                 */
1364                                break;
1365                        }
1366                } else {
1367                        /* There are multiple reasons to be here:
1368                         * 1) All the packets on the ring have been
1369                         *    freed - tx_id is equal to tx_first
1370                         *    and some packets have been freed.
1371                         *    - Done, exit
1372                         * 2) Interfaces has not sent a rings worth of
1373                         *    packets yet, so the segment after tail is
1374                         *    still empty. Or a previous call to this
1375                         *    function freed some of the segments but
1376                         *    not all so there is a hole in the list.
1377                         *    Hopefully this is a rare case.
1378                         *    - Walk the list and find the next mbuf. If
1379                         *      there isn't one, then done.
1380                         */
1381                        if (likely(tx_id == tx_first && count != 0))
1382                                break;
1383
1384                        /* Walk the list and find the next mbuf, if any. */
1385                        do {
1386                                /* Move to next segment. */
1387                                tx_id = sw_ring[tx_id].next_id;
1388
1389                                if (sw_ring[tx_id].mbuf)
1390                                        break;
1391
1392                        } while (tx_id != tx_first);
1393
1394                        /* Determine why previous loop bailed. If there
1395                         * is not an mbuf, done.
1396                         */
1397                        if (!sw_ring[tx_id].mbuf)
1398                                break;
1399                }
1400        }
1401
1402        return count;
1403}
1404
1405int
1406eth_igb_tx_done_cleanup(void *txq, uint32_t free_cnt)
1407{
1408        return igb_tx_done_cleanup(txq, free_cnt);
1409}
1410
1411static void
1412igb_reset_tx_queue_stat(struct igb_tx_queue *txq)
1413{
1414        txq->tx_head = 0;
1415        txq->tx_tail = 0;
1416        txq->ctx_curr = 0;
1417        memset((void*)&txq->ctx_cache, 0,
1418                IGB_CTX_NUM * sizeof(struct igb_advctx_info));
1419}
1420
1421static void
1422igb_reset_tx_queue(struct igb_tx_queue *txq, struct rte_eth_dev *dev)
1423{
1424        static const union e1000_adv_tx_desc zeroed_desc = {{0}};
1425        struct igb_tx_entry *txe = txq->sw_ring;
1426        uint16_t i, prev;
1427        struct e1000_hw *hw;
1428
1429        hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1430        /* Zero out HW ring memory */
1431        for (i = 0; i < txq->nb_tx_desc; i++) {
1432                txq->tx_ring[i] = zeroed_desc;
1433        }
1434
1435        /* Initialize ring entries */
1436        prev = (uint16_t)(txq->nb_tx_desc - 1);
1437        for (i = 0; i < txq->nb_tx_desc; i++) {
1438                volatile union e1000_adv_tx_desc *txd = &(txq->tx_ring[i]);
1439
1440                txd->wb.status = E1000_TXD_STAT_DD;
1441                txe[i].mbuf = NULL;
1442                txe[i].last_id = i;
1443                txe[prev].next_id = i;
1444                prev = i;
1445        }
1446
1447        txq->txd_type = E1000_ADVTXD_DTYP_DATA;
1448        /* 82575 specific, each tx queue will use 2 hw contexts */
1449        if (hw->mac.type == e1000_82575)
1450                txq->ctx_start = txq->queue_id * IGB_CTX_NUM;
1451
1452        igb_reset_tx_queue_stat(txq);
1453}
1454
1455uint64_t
1456igb_get_tx_port_offloads_capa(struct rte_eth_dev *dev)
1457{
1458        uint64_t tx_offload_capa;
1459
1460        RTE_SET_USED(dev);
1461        tx_offload_capa = RTE_ETH_TX_OFFLOAD_VLAN_INSERT |
1462                          RTE_ETH_TX_OFFLOAD_IPV4_CKSUM  |
1463                          RTE_ETH_TX_OFFLOAD_UDP_CKSUM   |
1464                          RTE_ETH_TX_OFFLOAD_TCP_CKSUM   |
1465                          RTE_ETH_TX_OFFLOAD_SCTP_CKSUM  |
1466                          RTE_ETH_TX_OFFLOAD_TCP_TSO     |
1467                          RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
1468
1469        return tx_offload_capa;
1470}
1471
1472uint64_t
1473igb_get_tx_queue_offloads_capa(struct rte_eth_dev *dev)
1474{
1475        uint64_t tx_queue_offload_capa;
1476
1477        tx_queue_offload_capa = igb_get_tx_port_offloads_capa(dev);
1478
1479        return tx_queue_offload_capa;
1480}
1481
1482int
1483eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
1484                         uint16_t queue_idx,
1485                         uint16_t nb_desc,
1486                         unsigned int socket_id,
1487                         const struct rte_eth_txconf *tx_conf)
1488{
1489        const struct rte_memzone *tz;
1490        struct igb_tx_queue *txq;
1491        struct e1000_hw     *hw;
1492        uint32_t size;
1493        uint64_t offloads;
1494
1495        offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
1496
1497        hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1498
1499        /*
1500         * Validate number of transmit descriptors.
1501         * It must not exceed hardware maximum, and must be multiple
1502         * of E1000_ALIGN.
1503         */
1504        if (nb_desc % IGB_TXD_ALIGN != 0 ||
1505                        (nb_desc > E1000_MAX_RING_DESC) ||
1506                        (nb_desc < E1000_MIN_RING_DESC)) {
1507                return -EINVAL;
1508        }
1509
1510        /*
1511         * The tx_free_thresh and tx_rs_thresh values are not used in the 1G
1512         * driver.
1513         */
1514        if (tx_conf->tx_free_thresh != 0)
1515                PMD_INIT_LOG(INFO, "The tx_free_thresh parameter is not "
1516                             "used for the 1G driver.");
1517        if (tx_conf->tx_rs_thresh != 0)
1518                PMD_INIT_LOG(INFO, "The tx_rs_thresh parameter is not "
1519                             "used for the 1G driver.");
1520        if (tx_conf->tx_thresh.wthresh == 0 && hw->mac.type != e1000_82576)
1521                PMD_INIT_LOG(INFO, "To improve 1G driver performance, "
1522                             "consider setting the TX WTHRESH value to 4, 8, "
1523                             "or 16.");
1524
1525        /* Free memory prior to re-allocation if needed */
1526        if (dev->data->tx_queues[queue_idx] != NULL) {
1527                igb_tx_queue_release(dev->data->tx_queues[queue_idx]);
1528                dev->data->tx_queues[queue_idx] = NULL;
1529        }
1530
1531        /* First allocate the tx queue data structure */
1532        txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1533                                                        RTE_CACHE_LINE_SIZE);
1534        if (txq == NULL)
1535                return -ENOMEM;
1536
1537        /*
1538         * Allocate TX ring hardware descriptors. A memzone large enough to
1539         * handle the maximum ring size is allocated in order to allow for
1540         * resizing in later calls to the queue setup function.
1541         */
1542        size = sizeof(union e1000_adv_tx_desc) * E1000_MAX_RING_DESC;
1543        tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
1544                                      E1000_ALIGN, socket_id);
1545        if (tz == NULL) {
1546                igb_tx_queue_release(txq);
1547                return -ENOMEM;
1548        }
1549
1550        txq->mz = tz;
1551        txq->nb_tx_desc = nb_desc;
1552        txq->pthresh = tx_conf->tx_thresh.pthresh;
1553        txq->hthresh = tx_conf->tx_thresh.hthresh;
1554        txq->wthresh = tx_conf->tx_thresh.wthresh;
1555        if (txq->wthresh > 0 && hw->mac.type == e1000_82576)
1556                txq->wthresh = 1;
1557        txq->queue_id = queue_idx;
1558        txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1559                queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1560        txq->port_id = dev->data->port_id;
1561
1562        txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
1563        txq->tx_ring_phys_addr = tz->iova;
1564
1565        txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
1566        /* Allocate software ring */
1567        txq->sw_ring = rte_zmalloc("txq->sw_ring",
1568                                   sizeof(struct igb_tx_entry) * nb_desc,
1569                                   RTE_CACHE_LINE_SIZE);
1570        if (txq->sw_ring == NULL) {
1571                igb_tx_queue_release(txq);
1572                return -ENOMEM;
1573        }
1574        PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1575                     txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1576
1577        igb_reset_tx_queue(txq, dev);
1578        dev->tx_pkt_burst = eth_igb_xmit_pkts;
1579        dev->tx_pkt_prepare = &eth_igb_prep_pkts;
1580        dev->data->tx_queues[queue_idx] = txq;
1581        txq->offloads = offloads;
1582
1583        return 0;
1584}
1585
1586static void
1587igb_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1588{
1589        unsigned i;
1590
1591        if (rxq->sw_ring != NULL) {
1592                for (i = 0; i < rxq->nb_rx_desc; i++) {
1593                        if (rxq->sw_ring[i].mbuf != NULL) {
1594                                rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1595                                rxq->sw_ring[i].mbuf = NULL;
1596                        }
1597                }
1598        }
1599}
1600
1601static void
1602igb_rx_queue_release(struct igb_rx_queue *rxq)
1603{
1604        if (rxq != NULL) {
1605                igb_rx_queue_release_mbufs(rxq);
1606                rte_free(rxq->sw_ring);
1607                rte_memzone_free(rxq->mz);
1608                rte_free(rxq);
1609        }
1610}
1611
1612void
1613eth_igb_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
1614{
1615        igb_rx_queue_release(dev->data->rx_queues[qid]);
1616}
1617
1618static void
1619igb_reset_rx_queue(struct igb_rx_queue *rxq)
1620{
1621        static const union e1000_adv_rx_desc zeroed_desc = {{0}};
1622        unsigned i;
1623
1624        /* Zero out HW ring memory */
1625        for (i = 0; i < rxq->nb_rx_desc; i++) {
1626                rxq->rx_ring[i] = zeroed_desc;
1627        }
1628
1629        rxq->rx_tail = 0;
1630        rxq->pkt_first_seg = NULL;
1631        rxq->pkt_last_seg = NULL;
1632}
1633
1634uint64_t
1635igb_get_rx_port_offloads_capa(struct rte_eth_dev *dev)
1636{
1637        uint64_t rx_offload_capa;
1638        struct e1000_hw *hw;
1639
1640        hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1641
1642        rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP  |
1643                          RTE_ETH_RX_OFFLOAD_VLAN_FILTER |
1644                          RTE_ETH_RX_OFFLOAD_IPV4_CKSUM  |
1645                          RTE_ETH_RX_OFFLOAD_UDP_CKSUM   |
1646                          RTE_ETH_RX_OFFLOAD_TCP_CKSUM   |
1647                          RTE_ETH_RX_OFFLOAD_KEEP_CRC    |
1648                          RTE_ETH_RX_OFFLOAD_SCATTER     |
1649                          RTE_ETH_RX_OFFLOAD_RSS_HASH;
1650
1651        if (hw->mac.type == e1000_i350 ||
1652            hw->mac.type == e1000_i210 ||
1653            hw->mac.type == e1000_i211)
1654                rx_offload_capa |= RTE_ETH_RX_OFFLOAD_VLAN_EXTEND;
1655
1656        return rx_offload_capa;
1657}
1658
1659uint64_t
1660igb_get_rx_queue_offloads_capa(struct rte_eth_dev *dev)
1661{
1662        struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1663        uint64_t rx_queue_offload_capa;
1664
1665        switch (hw->mac.type) {
1666        case e1000_vfadapt_i350:
1667                /*
1668                 * As only one Rx queue can be used, let per queue offloading
1669                 * capability be same to per port queue offloading capability
1670                 * for better convenience.
1671                 */
1672                rx_queue_offload_capa = igb_get_rx_port_offloads_capa(dev);
1673                break;
1674        default:
1675                rx_queue_offload_capa = 0;
1676        }
1677        return rx_queue_offload_capa;
1678}
1679
1680int
1681eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
1682                         uint16_t queue_idx,
1683                         uint16_t nb_desc,
1684                         unsigned int socket_id,
1685                         const struct rte_eth_rxconf *rx_conf,
1686                         struct rte_mempool *mp)
1687{
1688        const struct rte_memzone *rz;
1689        struct igb_rx_queue *rxq;
1690        struct e1000_hw     *hw;
1691        unsigned int size;
1692        uint64_t offloads;
1693
1694        offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
1695
1696        hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1697
1698        /*
1699         * Validate number of receive descriptors.
1700         * It must not exceed hardware maximum, and must be multiple
1701         * of E1000_ALIGN.
1702         */
1703        if (nb_desc % IGB_RXD_ALIGN != 0 ||
1704                        (nb_desc > E1000_MAX_RING_DESC) ||
1705                        (nb_desc < E1000_MIN_RING_DESC)) {
1706                return -EINVAL;
1707        }
1708
1709        /* Free memory prior to re-allocation if needed */
1710        if (dev->data->rx_queues[queue_idx] != NULL) {
1711                igb_rx_queue_release(dev->data->rx_queues[queue_idx]);
1712                dev->data->rx_queues[queue_idx] = NULL;
1713        }
1714
1715        /* First allocate the RX queue data structure. */
1716        rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1717                          RTE_CACHE_LINE_SIZE);
1718        if (rxq == NULL)
1719                return -ENOMEM;
1720        rxq->offloads = offloads;
1721        rxq->mb_pool = mp;
1722        rxq->nb_rx_desc = nb_desc;
1723        rxq->pthresh = rx_conf->rx_thresh.pthresh;
1724        rxq->hthresh = rx_conf->rx_thresh.hthresh;
1725        rxq->wthresh = rx_conf->rx_thresh.wthresh;
1726        if (rxq->wthresh > 0 &&
1727            (hw->mac.type == e1000_82576 || hw->mac.type == e1000_vfadapt_i350))
1728                rxq->wthresh = 1;
1729        rxq->drop_en = rx_conf->rx_drop_en;
1730        rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1731        rxq->queue_id = queue_idx;
1732        rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1733                queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1734        rxq->port_id = dev->data->port_id;
1735        if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
1736                rxq->crc_len = RTE_ETHER_CRC_LEN;
1737        else
1738                rxq->crc_len = 0;
1739
1740        /*
1741         *  Allocate RX ring hardware descriptors. A memzone large enough to
1742         *  handle the maximum ring size is allocated in order to allow for
1743         *  resizing in later calls to the queue setup function.
1744         */
1745        size = sizeof(union e1000_adv_rx_desc) * E1000_MAX_RING_DESC;
1746        rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
1747                                      E1000_ALIGN, socket_id);
1748        if (rz == NULL) {
1749                igb_rx_queue_release(rxq);
1750                return -ENOMEM;
1751        }
1752
1753        rxq->mz = rz;
1754        rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
1755        rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
1756        rxq->rx_ring_phys_addr = rz->iova;
1757        rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
1758
1759        /* Allocate software ring. */
1760        rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1761                                   sizeof(struct igb_rx_entry) * nb_desc,
1762                                   RTE_CACHE_LINE_SIZE);
1763        if (rxq->sw_ring == NULL) {
1764                igb_rx_queue_release(rxq);
1765                return -ENOMEM;
1766        }
1767        PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1768                     rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1769
1770        dev->data->rx_queues[queue_idx] = rxq;
1771        igb_reset_rx_queue(rxq);
1772
1773        return 0;
1774}
1775
1776uint32_t
1777eth_igb_rx_queue_count(void *rx_queue)
1778{
1779#define IGB_RXQ_SCAN_INTERVAL 4
1780        volatile union e1000_adv_rx_desc *rxdp;
1781        struct igb_rx_queue *rxq;
1782        uint32_t desc = 0;
1783
1784        rxq = rx_queue;
1785        rxdp = &(rxq->rx_ring[rxq->rx_tail]);
1786
1787        while ((desc < rxq->nb_rx_desc) &&
1788                (rxdp->wb.upper.status_error & E1000_RXD_STAT_DD)) {
1789                desc += IGB_RXQ_SCAN_INTERVAL;
1790                rxdp += IGB_RXQ_SCAN_INTERVAL;
1791                if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
1792                        rxdp = &(rxq->rx_ring[rxq->rx_tail +
1793                                desc - rxq->nb_rx_desc]);
1794        }
1795
1796        return desc;
1797}
1798
1799int
1800eth_igb_rx_descriptor_status(void *rx_queue, uint16_t offset)
1801{
1802        struct igb_rx_queue *rxq = rx_queue;
1803        volatile uint32_t *status;
1804        uint32_t desc;
1805
1806        if (unlikely(offset >= rxq->nb_rx_desc))
1807                return -EINVAL;
1808
1809        if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
1810                return RTE_ETH_RX_DESC_UNAVAIL;
1811
1812        desc = rxq->rx_tail + offset;
1813        if (desc >= rxq->nb_rx_desc)
1814                desc -= rxq->nb_rx_desc;
1815
1816        status = &rxq->rx_ring[desc].wb.upper.status_error;
1817        if (*status & rte_cpu_to_le_32(E1000_RXD_STAT_DD))
1818                return RTE_ETH_RX_DESC_DONE;
1819
1820        return RTE_ETH_RX_DESC_AVAIL;
1821}
1822
1823int
1824eth_igb_tx_descriptor_status(void *tx_queue, uint16_t offset)
1825{
1826        struct igb_tx_queue *txq = tx_queue;
1827        volatile uint32_t *status;
1828        uint32_t desc;
1829
1830        if (unlikely(offset >= txq->nb_tx_desc))
1831                return -EINVAL;
1832
1833        desc = txq->tx_tail + offset;
1834        if (desc >= txq->nb_tx_desc)
1835                desc -= txq->nb_tx_desc;
1836
1837        status = &txq->tx_ring[desc].wb.status;
1838        if (*status & rte_cpu_to_le_32(E1000_TXD_STAT_DD))
1839                return RTE_ETH_TX_DESC_DONE;
1840
1841        return RTE_ETH_TX_DESC_FULL;
1842}
1843
1844void
1845igb_dev_clear_queues(struct rte_eth_dev *dev)
1846{
1847        uint16_t i;
1848        struct igb_tx_queue *txq;
1849        struct igb_rx_queue *rxq;
1850
1851        for (i = 0; i < dev->data->nb_tx_queues; i++) {
1852                txq = dev->data->tx_queues[i];
1853                if (txq != NULL) {
1854                        igb_tx_queue_release_mbufs(txq);
1855                        igb_reset_tx_queue(txq, dev);
1856                }
1857        }
1858
1859        for (i = 0; i < dev->data->nb_rx_queues; i++) {
1860                rxq = dev->data->rx_queues[i];
1861                if (rxq != NULL) {
1862                        igb_rx_queue_release_mbufs(rxq);
1863                        igb_reset_rx_queue(rxq);
1864                }
1865        }
1866}
1867
1868void
1869igb_dev_free_queues(struct rte_eth_dev *dev)
1870{
1871        uint16_t i;
1872
1873        for (i = 0; i < dev->data->nb_rx_queues; i++) {
1874                eth_igb_rx_queue_release(dev, i);
1875                dev->data->rx_queues[i] = NULL;
1876        }
1877        dev->data->nb_rx_queues = 0;
1878
1879        for (i = 0; i < dev->data->nb_tx_queues; i++) {
1880                eth_igb_tx_queue_release(dev, i);
1881                dev->data->tx_queues[i] = NULL;
1882        }
1883        dev->data->nb_tx_queues = 0;
1884}
1885
1886/**
1887 * Receive Side Scaling (RSS).
1888 * See section 7.1.1.7 in the following document:
1889 *     "Intel 82576 GbE Controller Datasheet" - Revision 2.45 October 2009
1890 *
1891 * Principles:
1892 * The source and destination IP addresses of the IP header and the source and
1893 * destination ports of TCP/UDP headers, if any, of received packets are hashed
1894 * against a configurable random key to compute a 32-bit RSS hash result.
1895 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1896 * 128-entry redirection table (RETA).  Each entry of the RETA provides a 3-bit
1897 * RSS output index which is used as the RX queue index where to store the
1898 * received packets.
1899 * The following output is supplied in the RX write-back descriptor:
1900 *     - 32-bit result of the Microsoft RSS hash function,
1901 *     - 4-bit RSS type field.
1902 */
1903
1904/*
1905 * RSS random key supplied in section 7.1.1.7.3 of the Intel 82576 datasheet.
1906 * Used as the default key.
1907 */
1908static uint8_t rss_intel_key[40] = {
1909        0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1910        0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1911        0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1912        0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1913        0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1914};
1915
1916static void
1917igb_rss_disable(struct rte_eth_dev *dev)
1918{
1919        struct e1000_hw *hw;
1920        uint32_t mrqc;
1921
1922        hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1923        mrqc = E1000_READ_REG(hw, E1000_MRQC);
1924        mrqc &= ~E1000_MRQC_ENABLE_MASK;
1925        E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1926}
1927
1928static void
1929igb_hw_rss_hash_set(struct e1000_hw *hw, struct rte_eth_rss_conf *rss_conf)
1930{
1931        uint8_t  *hash_key;
1932        uint32_t rss_key;
1933        uint32_t mrqc;
1934        uint64_t rss_hf;
1935        uint16_t i;
1936
1937        hash_key = rss_conf->rss_key;
1938        if (hash_key != NULL) {
1939                /* Fill in RSS hash key */
1940                for (i = 0; i < 10; i++) {
1941                        rss_key  = hash_key[(i * 4)];
1942                        rss_key |= hash_key[(i * 4) + 1] << 8;
1943                        rss_key |= hash_key[(i * 4) + 2] << 16;
1944                        rss_key |= hash_key[(i * 4) + 3] << 24;
1945                        E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key);
1946                }
1947        }
1948
1949        /* Set configured hashing protocols in MRQC register */
1950        rss_hf = rss_conf->rss_hf;
1951        mrqc = E1000_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
1952        if (rss_hf & RTE_ETH_RSS_IPV4)
1953                mrqc |= E1000_MRQC_RSS_FIELD_IPV4;
1954        if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1955                mrqc |= E1000_MRQC_RSS_FIELD_IPV4_TCP;
1956        if (rss_hf & RTE_ETH_RSS_IPV6)
1957                mrqc |= E1000_MRQC_RSS_FIELD_IPV6;
1958        if (rss_hf & RTE_ETH_RSS_IPV6_EX)
1959                mrqc |= E1000_MRQC_RSS_FIELD_IPV6_EX;
1960        if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1961                mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP;
1962        if (rss_hf & RTE_ETH_RSS_IPV6_TCP_EX)
1963                mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
1964        if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
1965                mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
1966        if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
1967                mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
1968        if (rss_hf & RTE_ETH_RSS_IPV6_UDP_EX)
1969                mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP_EX;
1970        E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1971}
1972
1973int
1974eth_igb_rss_hash_update(struct rte_eth_dev *dev,
1975                        struct rte_eth_rss_conf *rss_conf)
1976{
1977        struct e1000_hw *hw;
1978        uint32_t mrqc;
1979        uint64_t rss_hf;
1980
1981        hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1982
1983        /*
1984         * Before changing anything, first check that the update RSS operation
1985         * does not attempt to disable RSS, if RSS was enabled at
1986         * initialization time, or does not attempt to enable RSS, if RSS was
1987         * disabled at initialization time.
1988         */
1989        rss_hf = rss_conf->rss_hf & IGB_RSS_OFFLOAD_ALL;
1990        mrqc = E1000_READ_REG(hw, E1000_MRQC);
1991        if (!(mrqc & E1000_MRQC_ENABLE_MASK)) { /* RSS disabled */
1992                if (rss_hf != 0) /* Enable RSS */
1993                        return -(EINVAL);
1994                return 0; /* Nothing to do */
1995        }
1996        /* RSS enabled */
1997        if (rss_hf == 0) /* Disable RSS */
1998                return -(EINVAL);
1999        igb_hw_rss_hash_set(hw, rss_conf);
2000        return 0;
2001}
2002
2003int eth_igb_rss_hash_conf_get(struct rte_eth_dev *dev,
2004                              struct rte_eth_rss_conf *rss_conf)
2005{
2006        struct e1000_hw *hw;
2007        uint8_t *hash_key;
2008        uint32_t rss_key;
2009        uint32_t mrqc;
2010        uint64_t rss_hf;
2011        uint16_t i;
2012
2013        hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2014        hash_key = rss_conf->rss_key;
2015        if (hash_key != NULL) {
2016                /* Return RSS hash key */
2017                for (i = 0; i < 10; i++) {
2018                        rss_key = E1000_READ_REG_ARRAY(hw, E1000_RSSRK(0), i);
2019                        hash_key[(i * 4)] = rss_key & 0x000000FF;
2020                        hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
2021                        hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
2022                        hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
2023                }
2024        }
2025
2026        /* Get RSS functions configured in MRQC register */
2027        mrqc = E1000_READ_REG(hw, E1000_MRQC);
2028        if ((mrqc & E1000_MRQC_ENABLE_RSS_4Q) == 0) { /* RSS is disabled */
2029                rss_conf->rss_hf = 0;
2030                return 0;
2031        }
2032        rss_hf = 0;
2033        if (mrqc & E1000_MRQC_RSS_FIELD_IPV4)
2034                rss_hf |= RTE_ETH_RSS_IPV4;
2035        if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_TCP)
2036                rss_hf |= RTE_ETH_RSS_NONFRAG_IPV4_TCP;
2037        if (mrqc & E1000_MRQC_RSS_FIELD_IPV6)
2038                rss_hf |= RTE_ETH_RSS_IPV6;
2039        if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_EX)
2040                rss_hf |= RTE_ETH_RSS_IPV6_EX;
2041        if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP)
2042                rss_hf |= RTE_ETH_RSS_NONFRAG_IPV6_TCP;
2043        if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP_EX)
2044                rss_hf |= RTE_ETH_RSS_IPV6_TCP_EX;
2045        if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_UDP)
2046                rss_hf |= RTE_ETH_RSS_NONFRAG_IPV4_UDP;
2047        if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP)
2048                rss_hf |= RTE_ETH_RSS_NONFRAG_IPV6_UDP;
2049        if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP_EX)
2050                rss_hf |= RTE_ETH_RSS_IPV6_UDP_EX;
2051        rss_conf->rss_hf = rss_hf;
2052        return 0;
2053}
2054
2055static void
2056igb_rss_configure(struct rte_eth_dev *dev)
2057{
2058        struct rte_eth_rss_conf rss_conf;
2059        struct e1000_hw *hw;
2060        uint32_t shift;
2061        uint16_t i;
2062
2063        hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2064
2065        /* Fill in redirection table. */
2066        shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2067        for (i = 0; i < 128; i++) {
2068                union e1000_reta {
2069                        uint32_t dword;
2070                        uint8_t  bytes[4];
2071                } reta;
2072                uint8_t q_idx;
2073
2074                q_idx = (uint8_t) ((dev->data->nb_rx_queues > 1) ?
2075                                   i % dev->data->nb_rx_queues : 0);
2076                reta.bytes[i & 3] = (uint8_t) (q_idx << shift);
2077                if ((i & 3) == 3)
2078                        E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2079        }
2080
2081        /*
2082         * Configure the RSS key and the RSS protocols used to compute
2083         * the RSS hash of input packets.
2084         */
2085        rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2086        if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2087                igb_rss_disable(dev);
2088                return;
2089        }
2090        if (rss_conf.rss_key == NULL)
2091                rss_conf.rss_key = rss_intel_key; /* Default hash key */
2092        igb_hw_rss_hash_set(hw, &rss_conf);
2093}
2094
2095/*
2096 * Check if the mac type support VMDq or not.
2097 * Return 1 if it supports, otherwise, return 0.
2098 */
2099static int
2100igb_is_vmdq_supported(const struct rte_eth_dev *dev)
2101{
2102        const struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2103
2104        switch (hw->mac.type) {
2105        case e1000_82576:
2106        case e1000_82580:
2107        case e1000_i350:
2108                return 1;
2109        case e1000_82540:
2110        case e1000_82541:
2111        case e1000_82542:
2112        case e1000_82543:
2113        case e1000_82544:
2114        case e1000_82545:
2115        case e1000_82546:
2116        case e1000_82547:
2117        case e1000_82571:
2118        case e1000_82572:
2119        case e1000_82573:
2120        case e1000_82574:
2121        case e1000_82583:
2122        case e1000_i210:
2123        case e1000_i211:
2124        default:
2125                PMD_INIT_LOG(ERR, "Cannot support VMDq feature");
2126                return 0;
2127        }
2128}
2129
2130static int
2131igb_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
2132{
2133        struct rte_eth_vmdq_rx_conf *cfg;
2134        struct e1000_hw *hw;
2135        uint32_t mrqc, vt_ctl, vmolr, rctl;
2136        int i;
2137
2138        PMD_INIT_FUNC_TRACE();
2139
2140        hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2141        cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
2142
2143        /* Check if mac type can support VMDq, return value of 0 means NOT support */
2144        if (igb_is_vmdq_supported(dev) == 0)
2145                return -1;
2146
2147        igb_rss_disable(dev);
2148
2149        /* RCTL: enable VLAN filter */
2150        rctl = E1000_READ_REG(hw, E1000_RCTL);
2151        rctl |= E1000_RCTL_VFE;
2152        E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2153
2154        /* MRQC: enable vmdq */
2155        mrqc = E1000_READ_REG(hw, E1000_MRQC);
2156        mrqc |= E1000_MRQC_ENABLE_VMDQ;
2157        E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2158
2159        /* VTCTL:  pool selection according to VLAN tag */
2160        vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL);
2161        if (cfg->enable_default_pool)
2162                vt_ctl |= (cfg->default_pool << E1000_VT_CTL_DEFAULT_POOL_SHIFT);
2163        vt_ctl |= E1000_VT_CTL_IGNORE_MAC;
2164        E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl);
2165
2166        for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2167                vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2168                vmolr &= ~(E1000_VMOLR_AUPE | E1000_VMOLR_ROMPE |
2169                        E1000_VMOLR_ROPE | E1000_VMOLR_BAM |
2170                        E1000_VMOLR_MPME);
2171
2172                if (cfg->rx_mode & RTE_ETH_VMDQ_ACCEPT_UNTAG)
2173                        vmolr |= E1000_VMOLR_AUPE;
2174                if (cfg->rx_mode & RTE_ETH_VMDQ_ACCEPT_HASH_MC)
2175                        vmolr |= E1000_VMOLR_ROMPE;
2176                if (cfg->rx_mode & RTE_ETH_VMDQ_ACCEPT_HASH_UC)
2177                        vmolr |= E1000_VMOLR_ROPE;
2178                if (cfg->rx_mode & RTE_ETH_VMDQ_ACCEPT_BROADCAST)
2179                        vmolr |= E1000_VMOLR_BAM;
2180                if (cfg->rx_mode & RTE_ETH_VMDQ_ACCEPT_MULTICAST)
2181                        vmolr |= E1000_VMOLR_MPME;
2182
2183                E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2184        }
2185
2186        /*
2187         * VMOLR: set STRVLAN as 1 if IGMAC in VTCTL is set as 1
2188         * Both 82576 and 82580 support it
2189         */
2190        if (hw->mac.type != e1000_i350) {
2191                for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2192                        vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2193                        vmolr |= E1000_VMOLR_STRVLAN;
2194                        E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2195                }
2196        }
2197
2198        /* VFTA - enable all vlan filters */
2199        for (i = 0; i < IGB_VFTA_SIZE; i++)
2200                E1000_WRITE_REG(hw, (E1000_VFTA+(i*4)), UINT32_MAX);
2201
2202        /* VFRE: 8 pools enabling for rx, both 82576 and i350 support it */
2203        if (hw->mac.type != e1000_82580)
2204                E1000_WRITE_REG(hw, E1000_VFRE, E1000_MBVFICR_VFREQ_MASK);
2205
2206        /*
2207         * RAH/RAL - allow pools to read specific mac addresses
2208         * In this case, all pools should be able to read from mac addr 0
2209         */
2210        E1000_WRITE_REG(hw, E1000_RAH(0), (E1000_RAH_AV | UINT16_MAX));
2211        E1000_WRITE_REG(hw, E1000_RAL(0), UINT32_MAX);
2212
2213        /* VLVF: set up filters for vlan tags as configured */
2214        for (i = 0; i < cfg->nb_pool_maps; i++) {
2215                /* set vlan id in VF register and set the valid bit */
2216                E1000_WRITE_REG(hw, E1000_VLVF(i), (E1000_VLVF_VLANID_ENABLE |
2217                        (cfg->pool_map[i].vlan_id & RTE_ETH_VLAN_ID_MAX) |
2218                        ((cfg->pool_map[i].pools << E1000_VLVF_POOLSEL_SHIFT) &
2219                        E1000_VLVF_POOLSEL_MASK)));
2220        }
2221
2222        E1000_WRITE_FLUSH(hw);
2223
2224        return 0;
2225}
2226
2227
2228/*********************************************************************
2229 *
2230 *  Enable receive unit.
2231 *
2232 **********************************************************************/
2233
2234static int
2235igb_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
2236{
2237        struct igb_rx_entry *rxe = rxq->sw_ring;
2238        uint64_t dma_addr;
2239        unsigned i;
2240
2241        /* Initialize software ring entries. */
2242        for (i = 0; i < rxq->nb_rx_desc; i++) {
2243                volatile union e1000_adv_rx_desc *rxd;
2244                struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
2245
2246                if (mbuf == NULL) {
2247                        PMD_INIT_LOG(ERR, "RX mbuf alloc failed "
2248                                     "queue_id=%hu", rxq->queue_id);
2249                        return -ENOMEM;
2250                }
2251                dma_addr =
2252                        rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
2253                rxd = &rxq->rx_ring[i];
2254                rxd->read.hdr_addr = 0;
2255                rxd->read.pkt_addr = dma_addr;
2256                rxe[i].mbuf = mbuf;
2257        }
2258
2259        return 0;
2260}
2261
2262#define E1000_MRQC_DEF_Q_SHIFT               (3)
2263static int
2264igb_dev_mq_rx_configure(struct rte_eth_dev *dev)
2265{
2266        struct e1000_hw *hw =
2267                E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2268        uint32_t mrqc;
2269
2270        if (RTE_ETH_DEV_SRIOV(dev).active == RTE_ETH_8_POOLS) {
2271                /*
2272                 * SRIOV active scheme
2273                 * FIXME if support RSS together with VMDq & SRIOV
2274                 */
2275                mrqc = E1000_MRQC_ENABLE_VMDQ;
2276                /* 011b Def_Q ignore, according to VT_CTL.DEF_PL */
2277                mrqc |= 0x3 << E1000_MRQC_DEF_Q_SHIFT;
2278                E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2279        } else if(RTE_ETH_DEV_SRIOV(dev).active == 0) {
2280                /*
2281                 * SRIOV inactive scheme
2282                 */
2283                switch (dev->data->dev_conf.rxmode.mq_mode) {
2284                        case RTE_ETH_MQ_RX_RSS:
2285                                igb_rss_configure(dev);
2286                                break;
2287                        case RTE_ETH_MQ_RX_VMDQ_ONLY:
2288                                /*Configure general VMDQ only RX parameters*/
2289                                igb_vmdq_rx_hw_configure(dev);
2290                                break;
2291                        case RTE_ETH_MQ_RX_NONE:
2292                                /* if mq_mode is none, disable rss mode.*/
2293                        default:
2294                                igb_rss_disable(dev);
2295                                break;
2296                }
2297        }
2298
2299        return 0;
2300}
2301
2302int
2303eth_igb_rx_init(struct rte_eth_dev *dev)
2304{
2305        struct rte_eth_rxmode *rxmode;
2306        struct e1000_hw     *hw;
2307        struct igb_rx_queue *rxq;
2308        uint32_t rctl;
2309        uint32_t rxcsum;
2310        uint32_t srrctl;
2311        uint16_t buf_size;
2312        uint16_t rctl_bsize;
2313        uint32_t max_len;
2314        uint16_t i;
2315        int ret;
2316
2317        hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2318        srrctl = 0;
2319
2320        /*
2321         * Make sure receives are disabled while setting
2322         * up the descriptor ring.
2323         */
2324        rctl = E1000_READ_REG(hw, E1000_RCTL);
2325        E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2326
2327        rxmode = &dev->data->dev_conf.rxmode;
2328
2329        /*
2330         * Configure support of jumbo frames, if any.
2331         */
2332        max_len = dev->data->mtu + E1000_ETH_OVERHEAD;
2333        if (dev->data->mtu > RTE_ETHER_MTU) {
2334                rctl |= E1000_RCTL_LPE;
2335
2336                /*
2337                 * Set maximum packet length by default, and might be updated
2338                 * together with enabling/disabling dual VLAN.
2339                 */
2340                if (rxmode->offloads & RTE_ETH_RX_OFFLOAD_VLAN_EXTEND)
2341                        max_len += VLAN_TAG_SIZE;
2342
2343                E1000_WRITE_REG(hw, E1000_RLPML, max_len);
2344        } else
2345                rctl &= ~E1000_RCTL_LPE;
2346
2347        /* Configure and enable each RX queue. */
2348        rctl_bsize = 0;
2349        dev->rx_pkt_burst = eth_igb_recv_pkts;
2350        for (i = 0; i < dev->data->nb_rx_queues; i++) {
2351                uint64_t bus_addr;
2352                uint32_t rxdctl;
2353
2354                rxq = dev->data->rx_queues[i];
2355
2356                rxq->flags = 0;
2357                /*
2358                 * i350 and i354 vlan packets have vlan tags byte swapped.
2359                 */
2360                if (hw->mac.type == e1000_i350 || hw->mac.type == e1000_i354) {
2361                        rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2362                        PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2363                } else {
2364                        PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2365                }
2366
2367                /* Allocate buffers for descriptor rings and set up queue */
2368                ret = igb_alloc_rx_queue_mbufs(rxq);
2369                if (ret)
2370                        return ret;
2371
2372                /*
2373                 * Reset crc_len in case it was changed after queue setup by a
2374                 *  call to configure
2375                 */
2376                if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
2377                        rxq->crc_len = RTE_ETHER_CRC_LEN;
2378                else
2379                        rxq->crc_len = 0;
2380
2381                bus_addr = rxq->rx_ring_phys_addr;
2382                E1000_WRITE_REG(hw, E1000_RDLEN(rxq->reg_idx),
2383                                rxq->nb_rx_desc *
2384                                sizeof(union e1000_adv_rx_desc));
2385                E1000_WRITE_REG(hw, E1000_RDBAH(rxq->reg_idx),
2386                                (uint32_t)(bus_addr >> 32));
2387                E1000_WRITE_REG(hw, E1000_RDBAL(rxq->reg_idx), (uint32_t)bus_addr);
2388
2389                srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2390
2391                /*
2392                 * Configure RX buffer size.
2393                 */
2394                buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2395                        RTE_PKTMBUF_HEADROOM);
2396                if (buf_size >= 1024) {
2397                        /*
2398                         * Configure the BSIZEPACKET field of the SRRCTL
2399                         * register of the queue.
2400                         * Value is in 1 KB resolution, from 1 KB to 127 KB.
2401                         * If this field is equal to 0b, then RCTL.BSIZE
2402                         * determines the RX packet buffer size.
2403                         */
2404                        srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2405                                   E1000_SRRCTL_BSIZEPKT_MASK);
2406                        buf_size = (uint16_t) ((srrctl &
2407                                                E1000_SRRCTL_BSIZEPKT_MASK) <<
2408                                               E1000_SRRCTL_BSIZEPKT_SHIFT);
2409
2410                        /* It adds dual VLAN length for supporting dual VLAN */
2411                        if ((max_len + 2 * VLAN_TAG_SIZE) > buf_size) {
2412                                if (!dev->data->scattered_rx)
2413                                        PMD_INIT_LOG(DEBUG,
2414                                                     "forcing scatter mode");
2415                                dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2416                                dev->data->scattered_rx = 1;
2417                        }
2418                } else {
2419                        /*
2420                         * Use BSIZE field of the device RCTL register.
2421                         */
2422                        if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2423                                rctl_bsize = buf_size;
2424                        if (!dev->data->scattered_rx)
2425                                PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2426                        dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2427                        dev->data->scattered_rx = 1;
2428                }
2429
2430                /* Set if packets are dropped when no descriptors available */
2431                if (rxq->drop_en)
2432                        srrctl |= E1000_SRRCTL_DROP_EN;
2433
2434                E1000_WRITE_REG(hw, E1000_SRRCTL(rxq->reg_idx), srrctl);
2435
2436                /* Enable this RX queue. */
2437                rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(rxq->reg_idx));
2438                rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2439                rxdctl &= 0xFFF00000;
2440                rxdctl |= (rxq->pthresh & 0x1F);
2441                rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2442                rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2443                E1000_WRITE_REG(hw, E1000_RXDCTL(rxq->reg_idx), rxdctl);
2444        }
2445
2446        if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_SCATTER) {
2447                if (!dev->data->scattered_rx)
2448                        PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2449                dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2450                dev->data->scattered_rx = 1;
2451        }
2452
2453        /*
2454         * Setup BSIZE field of RCTL register, if needed.
2455         * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
2456         * register, since the code above configures the SRRCTL register of
2457         * the RX queue in such a case.
2458         * All configurable sizes are:
2459         * 16384: rctl |= (E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX);
2460         *  8192: rctl |= (E1000_RCTL_SZ_8192  | E1000_RCTL_BSEX);
2461         *  4096: rctl |= (E1000_RCTL_SZ_4096  | E1000_RCTL_BSEX);
2462         *  2048: rctl |= E1000_RCTL_SZ_2048;
2463         *  1024: rctl |= E1000_RCTL_SZ_1024;
2464         *   512: rctl |= E1000_RCTL_SZ_512;
2465         *   256: rctl |= E1000_RCTL_SZ_256;
2466         */
2467        if (rctl_bsize > 0) {
2468                if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
2469                        rctl |= E1000_RCTL_SZ_512;
2470                else /* 256 <= buf_size < 512 - use 256 */
2471                        rctl |= E1000_RCTL_SZ_256;
2472        }
2473
2474        /*
2475         * Configure RSS if device configured with multiple RX queues.
2476         */
2477        igb_dev_mq_rx_configure(dev);
2478
2479        /* Update the rctl since igb_dev_mq_rx_configure may change its value */
2480        rctl |= E1000_READ_REG(hw, E1000_RCTL);
2481
2482        /*
2483         * Setup the Checksum Register.
2484         * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
2485         */
2486        rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2487        rxcsum |= E1000_RXCSUM_PCSD;
2488
2489        /* Enable both L3/L4 rx checksum offload */
2490        if (rxmode->offloads & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM)
2491                rxcsum |= E1000_RXCSUM_IPOFL;
2492        else
2493                rxcsum &= ~E1000_RXCSUM_IPOFL;
2494        if (rxmode->offloads &
2495                (RTE_ETH_RX_OFFLOAD_TCP_CKSUM | RTE_ETH_RX_OFFLOAD_UDP_CKSUM))
2496                rxcsum |= E1000_RXCSUM_TUOFL;
2497        else
2498                rxcsum &= ~E1000_RXCSUM_TUOFL;
2499        if (rxmode->offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM)
2500                rxcsum |= E1000_RXCSUM_CRCOFL;
2501        else
2502                rxcsum &= ~E1000_RXCSUM_CRCOFL;
2503
2504        E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2505
2506        /* Setup the Receive Control Register. */
2507        if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC) {
2508                rctl &= ~E1000_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
2509
2510                /* clear STRCRC bit in all queues */
2511                if (hw->mac.type == e1000_i350 ||
2512                    hw->mac.type == e1000_i210 ||
2513                    hw->mac.type == e1000_i211 ||
2514                    hw->mac.type == e1000_i354) {
2515                        for (i = 0; i < dev->data->nb_rx_queues; i++) {
2516                                rxq = dev->data->rx_queues[i];
2517                                uint32_t dvmolr = E1000_READ_REG(hw,
2518                                        E1000_DVMOLR(rxq->reg_idx));
2519                                dvmolr &= ~E1000_DVMOLR_STRCRC;
2520                                E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2521                        }
2522                }
2523        } else {
2524                rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC. */
2525
2526                /* set STRCRC bit in all queues */
2527                if (hw->mac.type == e1000_i350 ||
2528                    hw->mac.type == e1000_i210 ||
2529                    hw->mac.type == e1000_i211 ||
2530                    hw->mac.type == e1000_i354) {
2531                        for (i = 0; i < dev->data->nb_rx_queues; i++) {
2532                                rxq = dev->data->rx_queues[i];
2533                                uint32_t dvmolr = E1000_READ_REG(hw,
2534                                        E1000_DVMOLR(rxq->reg_idx));
2535                                dvmolr |= E1000_DVMOLR_STRCRC;
2536                                E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2537                        }
2538                }
2539        }
2540
2541        rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2542        rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2543                E1000_RCTL_RDMTS_HALF |
2544                (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2545
2546        /* Make sure VLAN Filters are off. */
2547        if (dev->data->dev_conf.rxmode.mq_mode != RTE_ETH_MQ_RX_VMDQ_ONLY)
2548                rctl &= ~E1000_RCTL_VFE;
2549        /* Don't store bad packets. */
2550        rctl &= ~E1000_RCTL_SBP;
2551
2552        /* Enable Receives. */
2553        E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2554
2555        /*
2556         * Setup the HW Rx Head and Tail Descriptor Pointers.
2557         * This needs to be done after enable.
2558         */
2559        for (i = 0; i < dev->data->nb_rx_queues; i++) {
2560                rxq = dev->data->rx_queues[i];
2561                E1000_WRITE_REG(hw, E1000_RDH(rxq->reg_idx), 0);
2562                E1000_WRITE_REG(hw, E1000_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
2563        }
2564
2565        return 0;
2566}
2567
2568/*********************************************************************
2569 *
2570 *  Enable transmit unit.
2571 *
2572 **********************************************************************/
2573void
2574eth_igb_tx_init(struct rte_eth_dev *dev)
2575{
2576        struct e1000_hw     *hw;
2577        struct igb_tx_queue *txq;
2578        uint32_t tctl;
2579        uint32_t txdctl;
2580        uint16_t i;
2581
2582        hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2583
2584        /* Setup the Base and Length of the Tx Descriptor Rings. */
2585        for (i = 0; i < dev->data->nb_tx_queues; i++) {
2586                uint64_t bus_addr;
2587                txq = dev->data->tx_queues[i];
2588                bus_addr = txq->tx_ring_phys_addr;
2589
2590                E1000_WRITE_REG(hw, E1000_TDLEN(txq->reg_idx),
2591                                txq->nb_tx_desc *
2592                                sizeof(union e1000_adv_tx_desc));
2593                E1000_WRITE_REG(hw, E1000_TDBAH(txq->reg_idx),
2594                                (uint32_t)(bus_addr >> 32));
2595                E1000_WRITE_REG(hw, E1000_TDBAL(txq->reg_idx), (uint32_t)bus_addr);
2596
2597                /* Setup the HW Tx Head and Tail descriptor pointers. */
2598                E1000_WRITE_REG(hw, E1000_TDT(txq->reg_idx), 0);
2599                E1000_WRITE_REG(hw, E1000_TDH(txq->reg_idx), 0);
2600
2601                /* Setup Transmit threshold registers. */
2602                txdctl = E1000_READ_REG(hw, E1000_TXDCTL(txq->reg_idx));
2603                txdctl |= txq->pthresh & 0x1F;
2604                txdctl |= ((txq->hthresh & 0x1F) << 8);
2605                txdctl |= ((txq->wthresh & 0x1F) << 16);
2606                txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2607                E1000_WRITE_REG(hw, E1000_TXDCTL(txq->reg_idx), txdctl);
2608        }
2609
2610        /* Program the Transmit Control Register. */
2611        tctl = E1000_READ_REG(hw, E1000_TCTL);
2612        tctl &= ~E1000_TCTL_CT;
2613        tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2614                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2615
2616        e1000_config_collision_dist(hw);
2617
2618        /* This write will effectively turn on the transmit unit. */
2619        E1000_WRITE_REG(hw, E1000_TCTL, tctl);
2620}
2621
2622/*********************************************************************
2623 *
2624 *  Enable VF receive unit.
2625 *
2626 **********************************************************************/
2627int
2628eth_igbvf_rx_init(struct rte_eth_dev *dev)
2629{
2630        struct e1000_hw     *hw;
2631        struct igb_rx_queue *rxq;
2632        uint32_t srrctl;
2633        uint16_t buf_size;
2634        uint16_t rctl_bsize;
2635        uint32_t max_len;
2636        uint16_t i;
2637        int ret;
2638
2639        hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2640
2641        /* setup MTU */
2642        max_len = dev->data->mtu + E1000_ETH_OVERHEAD;
2643        e1000_rlpml_set_vf(hw, (uint16_t)(max_len + VLAN_TAG_SIZE));
2644
2645        /* Configure and enable each RX queue. */
2646        rctl_bsize = 0;
2647        dev->rx_pkt_burst = eth_igb_recv_pkts;
2648        for (i = 0; i < dev->data->nb_rx_queues; i++) {
2649                uint64_t bus_addr;
2650                uint32_t rxdctl;
2651
2652                rxq = dev->data->rx_queues[i];
2653
2654                rxq->flags = 0;
2655                /*
2656                 * i350VF LB vlan packets have vlan tags byte swapped.
2657                 */
2658                if (hw->mac.type == e1000_vfadapt_i350) {
2659                        rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2660                        PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2661                } else {
2662                        PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2663                }
2664
2665                /* Allocate buffers for descriptor rings and set up queue */
2666                ret = igb_alloc_rx_queue_mbufs(rxq);
2667                if (ret)
2668                        return ret;
2669
2670                bus_addr = rxq->rx_ring_phys_addr;
2671                E1000_WRITE_REG(hw, E1000_RDLEN(i),
2672                                rxq->nb_rx_desc *
2673                                sizeof(union e1000_adv_rx_desc));
2674                E1000_WRITE_REG(hw, E1000_RDBAH(i),
2675                                (uint32_t)(bus_addr >> 32));
2676                E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr);
2677
2678                srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2679
2680                /*
2681                 * Configure RX buffer size.
2682                 */
2683                buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2684                        RTE_PKTMBUF_HEADROOM);
2685                if (buf_size >= 1024) {
2686                        /*
2687                         * Configure the BSIZEPACKET field of the SRRCTL
2688                         * register of the queue.
2689                         * Value is in 1 KB resolution, from 1 KB to 127 KB.
2690                         * If this field is equal to 0b, then RCTL.BSIZE
2691                         * determines the RX packet buffer size.
2692                         */
2693                        srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2694                                   E1000_SRRCTL_BSIZEPKT_MASK);
2695                        buf_size = (uint16_t) ((srrctl &
2696                                                E1000_SRRCTL_BSIZEPKT_MASK) <<
2697                                               E1000_SRRCTL_BSIZEPKT_SHIFT);
2698
2699                        /* It adds dual VLAN length for supporting dual VLAN */
2700                        if ((max_len + 2 * VLAN_TAG_SIZE) > buf_size) {
2701                                if (!dev->data->scattered_rx)
2702                                        PMD_INIT_LOG(DEBUG,
2703                                                     "forcing scatter mode");
2704                                dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2705                                dev->data->scattered_rx = 1;
2706                        }
2707                } else {
2708                        /*
2709                         * Use BSIZE field of the device RCTL register.
2710                         */
2711                        if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2712                                rctl_bsize = buf_size;
2713                        if (!dev->data->scattered_rx)
2714                                PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2715                        dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2716                        dev->data->scattered_rx = 1;
2717                }
2718
2719                /* Set if packets are dropped when no descriptors available */
2720                if (rxq->drop_en)
2721                        srrctl |= E1000_SRRCTL_DROP_EN;
2722
2723                E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
2724
2725                /* Enable this RX queue. */
2726                rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
2727                rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2728                rxdctl &= 0xFFF00000;
2729                rxdctl |= (rxq->pthresh & 0x1F);
2730                rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2731                if (hw->mac.type == e1000_vfadapt) {
2732                        /*
2733                         * Workaround of 82576 VF Erratum
2734                         * force set WTHRESH to 1
2735                         * to avoid Write-Back not triggered sometimes
2736                         */
2737                        rxdctl |= 0x10000;
2738                        PMD_INIT_LOG(DEBUG, "Force set RX WTHRESH to 1 !");
2739                }
2740                else
2741                        rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2742                E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
2743        }
2744
2745        if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_SCATTER) {
2746                if (!dev->data->scattered_rx)
2747                        PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2748                dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2749                dev->data->scattered_rx = 1;
2750        }
2751
2752        /*
2753         * Setup the HW Rx Head and Tail Descriptor Pointers.
2754         * This needs to be done after enable.
2755         */
2756        for (i = 0; i < dev->data->nb_rx_queues; i++) {
2757                rxq = dev->data->rx_queues[i];
2758                E1000_WRITE_REG(hw, E1000_RDH(i), 0);
2759                E1000_WRITE_REG(hw, E1000_RDT(i), rxq->nb_rx_desc - 1);
2760        }
2761
2762        return 0;
2763}
2764
2765/*********************************************************************
2766 *
2767 *  Enable VF transmit unit.
2768 *
2769 **********************************************************************/
2770void
2771eth_igbvf_tx_init(struct rte_eth_dev *dev)
2772{
2773        struct e1000_hw     *hw;
2774        struct igb_tx_queue *txq;
2775        uint32_t txdctl;
2776        uint16_t i;
2777
2778        hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2779
2780        /* Setup the Base and Length of the Tx Descriptor Rings. */
2781        for (i = 0; i < dev->data->nb_tx_queues; i++) {
2782                uint64_t bus_addr;
2783
2784                txq = dev->data->tx_queues[i];
2785                bus_addr = txq->tx_ring_phys_addr;
2786                E1000_WRITE_REG(hw, E1000_TDLEN(i),
2787                                txq->nb_tx_desc *
2788                                sizeof(union e1000_adv_tx_desc));
2789                E1000_WRITE_REG(hw, E1000_TDBAH(i),
2790                                (uint32_t)(bus_addr >> 32));
2791                E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr);
2792
2793                /* Setup the HW Tx Head and Tail descriptor pointers. */
2794                E1000_WRITE_REG(hw, E1000_TDT(i), 0);
2795                E1000_WRITE_REG(hw, E1000_TDH(i), 0);
2796
2797                /* Setup Transmit threshold registers. */
2798                txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
2799                txdctl |= txq->pthresh & 0x1F;
2800                txdctl |= ((txq->hthresh & 0x1F) << 8);
2801                if (hw->mac.type == e1000_82576) {
2802                        /*
2803                         * Workaround of 82576 VF Erratum
2804                         * force set WTHRESH to 1
2805                         * to avoid Write-Back not triggered sometimes
2806                         */
2807                        txdctl |= 0x10000;
2808                        PMD_INIT_LOG(DEBUG, "Force set TX WTHRESH to 1 !");
2809                }
2810                else
2811                        txdctl |= ((txq->wthresh & 0x1F) << 16);
2812                txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2813                E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
2814        }
2815
2816}
2817
2818void
2819igb_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2820        struct rte_eth_rxq_info *qinfo)
2821{
2822        struct igb_rx_queue *rxq;
2823
2824        rxq = dev->data->rx_queues[queue_id];
2825
2826        qinfo->mp = rxq->mb_pool;
2827        qinfo->scattered_rx = dev->data->scattered_rx;
2828        qinfo->nb_desc = rxq->nb_rx_desc;
2829
2830        qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
2831        qinfo->conf.rx_drop_en = rxq->drop_en;
2832        qinfo->conf.offloads = rxq->offloads;
2833}
2834
2835void
2836igb_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2837        struct rte_eth_txq_info *qinfo)
2838{
2839        struct igb_tx_queue *txq;
2840
2841        txq = dev->data->tx_queues[queue_id];
2842
2843        qinfo->nb_desc = txq->nb_tx_desc;
2844
2845        qinfo->conf.tx_thresh.pthresh = txq->pthresh;
2846        qinfo->conf.tx_thresh.hthresh = txq->hthresh;
2847        qinfo->conf.tx_thresh.wthresh = txq->wthresh;
2848        qinfo->conf.offloads = txq->offloads;
2849}
2850
2851int
2852igb_rss_conf_init(struct rte_eth_dev *dev,
2853                  struct igb_rte_flow_rss_conf *out,
2854                  const struct rte_flow_action_rss *in)
2855{
2856        struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2857
2858        if (in->key_len > RTE_DIM(out->key) ||
2859            ((hw->mac.type == e1000_82576) &&
2860             (in->queue_num > IGB_MAX_RX_QUEUE_NUM_82576)) ||
2861            ((hw->mac.type != e1000_82576) &&
2862             (in->queue_num > IGB_MAX_RX_QUEUE_NUM)))
2863                return -EINVAL;
2864        out->conf = (struct rte_flow_action_rss){
2865                .func = in->func,
2866                .level = in->level,
2867                .types = in->types,
2868                .key_len = in->key_len,
2869                .queue_num = in->queue_num,
2870                .key = memcpy(out->key, in->key, in->key_len),
2871                .queue = memcpy(out->queue, in->queue,
2872                                sizeof(*in->queue) * in->queue_num),
2873        };
2874        return 0;
2875}
2876
2877int
2878igb_action_rss_same(const struct rte_flow_action_rss *comp,
2879                    const struct rte_flow_action_rss *with)
2880{
2881        return (comp->func == with->func &&
2882                comp->level == with->level &&
2883                comp->types == with->types &&
2884                comp->key_len == with->key_len &&
2885                comp->queue_num == with->queue_num &&
2886                !memcmp(comp->key, with->key, with->key_len) &&
2887                !memcmp(comp->queue, with->queue,
2888                        sizeof(*with->queue) * with->queue_num));
2889}
2890
2891int
2892igb_config_rss_filter(struct rte_eth_dev *dev,
2893                struct igb_rte_flow_rss_conf *conf, bool add)
2894{
2895        uint32_t shift;
2896        uint16_t i, j;
2897        struct rte_eth_rss_conf rss_conf = {
2898                .rss_key = conf->conf.key_len ?
2899                        (void *)(uintptr_t)conf->conf.key : NULL,
2900                .rss_key_len = conf->conf.key_len,
2901                .rss_hf = conf->conf.types,
2902        };
2903        struct e1000_filter_info *filter_info =
2904                E1000_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
2905        struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2906
2907        hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2908
2909        if (!add) {
2910                if (igb_action_rss_same(&filter_info->rss_info.conf,
2911                                        &conf->conf)) {
2912                        igb_rss_disable(dev);
2913                        memset(&filter_info->rss_info, 0,
2914                                sizeof(struct igb_rte_flow_rss_conf));
2915                        return 0;
2916                }
2917                return -EINVAL;
2918        }
2919
2920        if (filter_info->rss_info.conf.queue_num)
2921                return -EINVAL;
2922
2923        /* Fill in redirection table. */
2924        shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2925        for (i = 0, j = 0; i < 128; i++, j++) {
2926                union e1000_reta {
2927                        uint32_t dword;
2928                        uint8_t  bytes[4];
2929                } reta;
2930                uint8_t q_idx;
2931
2932                if (j == conf->conf.queue_num)
2933                        j = 0;
2934                q_idx = conf->conf.queue[j];
2935                reta.bytes[i & 3] = (uint8_t)(q_idx << shift);
2936                if ((i & 3) == 3)
2937                        E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2938        }
2939
2940        /* Configure the RSS key and the RSS protocols used to compute
2941         * the RSS hash of input packets.
2942         */
2943        if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2944                igb_rss_disable(dev);
2945                return 0;
2946        }
2947        if (rss_conf.rss_key == NULL)
2948                rss_conf.rss_key = rss_intel_key; /* Default hash key */
2949        igb_hw_rss_hash_set(hw, &rss_conf);
2950
2951        if (igb_rss_conf_init(dev, &filter_info->rss_info, &conf->conf))
2952                return -EINVAL;
2953
2954        return 0;
2955}
2956