dpdk/drivers/net/iavf/iavf_rxtx_vec_common.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2017 Intel Corporation
   3 */
   4
   5#ifndef _IAVF_RXTX_VEC_COMMON_H_
   6#define _IAVF_RXTX_VEC_COMMON_H_
   7#include <stdint.h>
   8#include <ethdev_driver.h>
   9#include <rte_malloc.h>
  10
  11#include "iavf.h"
  12#include "iavf_rxtx.h"
  13
  14#ifndef __INTEL_COMPILER
  15#pragma GCC diagnostic ignored "-Wcast-qual"
  16#endif
  17
  18static __rte_always_inline uint16_t
  19reassemble_packets(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_bufs,
  20                   uint16_t nb_bufs, uint8_t *split_flags)
  21{
  22        struct rte_mbuf *pkts[IAVF_VPMD_RX_MAX_BURST];
  23        struct rte_mbuf *start = rxq->pkt_first_seg;
  24        struct rte_mbuf *end =  rxq->pkt_last_seg;
  25        unsigned int pkt_idx, buf_idx;
  26
  27        for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
  28                if (end) {
  29                        /* processing a split packet */
  30                        end->next = rx_bufs[buf_idx];
  31                        rx_bufs[buf_idx]->data_len += rxq->crc_len;
  32
  33                        start->nb_segs++;
  34                        start->pkt_len += rx_bufs[buf_idx]->data_len;
  35                        end = end->next;
  36
  37                        if (!split_flags[buf_idx]) {
  38                                /* it's the last packet of the set */
  39                                start->hash = end->hash;
  40                                start->vlan_tci = end->vlan_tci;
  41                                start->ol_flags = end->ol_flags;
  42                                /* we need to strip crc for the whole packet */
  43                                start->pkt_len -= rxq->crc_len;
  44                                if (end->data_len > rxq->crc_len) {
  45                                        end->data_len -= rxq->crc_len;
  46                                } else {
  47                                        /* free up last mbuf */
  48                                        struct rte_mbuf *secondlast = start;
  49
  50                                        start->nb_segs--;
  51                                        while (secondlast->next != end)
  52                                                secondlast = secondlast->next;
  53                                        secondlast->data_len -= (rxq->crc_len -
  54                                                        end->data_len);
  55                                        secondlast->next = NULL;
  56                                        rte_pktmbuf_free_seg(end);
  57                                }
  58                                pkts[pkt_idx++] = start;
  59                                start = NULL;
  60                                end = NULL;
  61                        }
  62                } else {
  63                        /* not processing a split packet */
  64                        if (!split_flags[buf_idx]) {
  65                                /* not a split packet, save and skip */
  66                                pkts[pkt_idx++] = rx_bufs[buf_idx];
  67                                continue;
  68                        }
  69                        end = start = rx_bufs[buf_idx];
  70                        rx_bufs[buf_idx]->data_len += rxq->crc_len;
  71                        rx_bufs[buf_idx]->pkt_len += rxq->crc_len;
  72                }
  73        }
  74
  75        /* save the partial packet for next time */
  76        rxq->pkt_first_seg = start;
  77        rxq->pkt_last_seg = end;
  78        memcpy(rx_bufs, pkts, pkt_idx * (sizeof(*pkts)));
  79        return pkt_idx;
  80}
  81
  82static __rte_always_inline int
  83iavf_tx_free_bufs(struct iavf_tx_queue *txq)
  84{
  85        struct iavf_tx_entry *txep;
  86        uint32_t n;
  87        uint32_t i;
  88        int nb_free = 0;
  89        struct rte_mbuf *m, *free[IAVF_VPMD_TX_MAX_FREE_BUF];
  90
  91        /* check DD bits on threshold descriptor */
  92        if ((txq->tx_ring[txq->next_dd].cmd_type_offset_bsz &
  93                        rte_cpu_to_le_64(IAVF_TXD_QW1_DTYPE_MASK)) !=
  94                        rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE))
  95                return 0;
  96
  97        n = txq->rs_thresh;
  98
  99         /* first buffer to free from S/W ring is at index
 100          * tx_next_dd - (tx_rs_thresh-1)
 101          */
 102        txep = &txq->sw_ring[txq->next_dd - (n - 1)];
 103        m = rte_pktmbuf_prefree_seg(txep[0].mbuf);
 104        if (likely(m != NULL)) {
 105                free[0] = m;
 106                nb_free = 1;
 107                for (i = 1; i < n; i++) {
 108                        m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
 109                        if (likely(m != NULL)) {
 110                                if (likely(m->pool == free[0]->pool)) {
 111                                        free[nb_free++] = m;
 112                                } else {
 113                                        rte_mempool_put_bulk(free[0]->pool,
 114                                                             (void *)free,
 115                                                             nb_free);
 116                                        free[0] = m;
 117                                        nb_free = 1;
 118                                }
 119                        }
 120                }
 121                rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
 122        } else {
 123                for (i = 1; i < n; i++) {
 124                        m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
 125                        if (m)
 126                                rte_mempool_put(m->pool, m);
 127                }
 128        }
 129
 130        /* buffers were freed, update counters */
 131        txq->nb_free = (uint16_t)(txq->nb_free + txq->rs_thresh);
 132        txq->next_dd = (uint16_t)(txq->next_dd + txq->rs_thresh);
 133        if (txq->next_dd >= txq->nb_tx_desc)
 134                txq->next_dd = (uint16_t)(txq->rs_thresh - 1);
 135
 136        return txq->rs_thresh;
 137}
 138
 139static __rte_always_inline void
 140tx_backlog_entry(struct iavf_tx_entry *txep,
 141                 struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 142{
 143        int i;
 144
 145        for (i = 0; i < (int)nb_pkts; ++i)
 146                txep[i].mbuf = tx_pkts[i];
 147}
 148
 149static inline void
 150_iavf_rx_queue_release_mbufs_vec(struct iavf_rx_queue *rxq)
 151{
 152        const unsigned int mask = rxq->nb_rx_desc - 1;
 153        unsigned int i;
 154
 155        if (!rxq->sw_ring || rxq->rxrearm_nb >= rxq->nb_rx_desc)
 156                return;
 157
 158        /* free all mbufs that are valid in the ring */
 159        if (rxq->rxrearm_nb == 0) {
 160                for (i = 0; i < rxq->nb_rx_desc; i++) {
 161                        if (rxq->sw_ring[i])
 162                                rte_pktmbuf_free_seg(rxq->sw_ring[i]);
 163                }
 164        } else {
 165                for (i = rxq->rx_tail;
 166                     i != rxq->rxrearm_start;
 167                     i = (i + 1) & mask) {
 168                        if (rxq->sw_ring[i])
 169                                rte_pktmbuf_free_seg(rxq->sw_ring[i]);
 170                }
 171        }
 172
 173        rxq->rxrearm_nb = rxq->nb_rx_desc;
 174
 175        /* set all entries to NULL */
 176        memset(rxq->sw_ring, 0, sizeof(rxq->sw_ring[0]) * rxq->nb_rx_desc);
 177}
 178
 179static inline void
 180_iavf_tx_queue_release_mbufs_vec(struct iavf_tx_queue *txq)
 181{
 182        unsigned i;
 183        const uint16_t max_desc = (uint16_t)(txq->nb_tx_desc - 1);
 184
 185        if (!txq->sw_ring || txq->nb_free == max_desc)
 186                return;
 187
 188        i = txq->next_dd - txq->rs_thresh + 1;
 189        if (txq->tx_tail < i) {
 190                for (; i < txq->nb_tx_desc; i++) {
 191                        rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
 192                        txq->sw_ring[i].mbuf = NULL;
 193                }
 194                i = 0;
 195        }
 196}
 197
 198static inline int
 199iavf_rxq_vec_setup_default(struct iavf_rx_queue *rxq)
 200{
 201        uintptr_t p;
 202        struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */
 203
 204        mb_def.nb_segs = 1;
 205        mb_def.data_off = RTE_PKTMBUF_HEADROOM;
 206        mb_def.port = rxq->port_id;
 207        rte_mbuf_refcnt_set(&mb_def, 1);
 208
 209        /* prevent compiler reordering: rearm_data covers previous fields */
 210        rte_compiler_barrier();
 211        p = (uintptr_t)&mb_def.rearm_data;
 212        rxq->mbuf_initializer = *(uint64_t *)p;
 213        return 0;
 214}
 215
 216static inline int
 217iavf_rx_vec_queue_default(struct iavf_rx_queue *rxq)
 218{
 219        if (!rxq)
 220                return -1;
 221
 222        if (!rte_is_power_of_2(rxq->nb_rx_desc))
 223                return -1;
 224
 225        if (rxq->rx_free_thresh < IAVF_VPMD_RX_MAX_BURST)
 226                return -1;
 227
 228        if (rxq->nb_rx_desc % rxq->rx_free_thresh)
 229                return -1;
 230
 231        if (rxq->proto_xtr != IAVF_PROTO_XTR_NONE)
 232                return -1;
 233
 234        if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
 235                return -1;
 236
 237        if (rxq->offloads & IAVF_RX_VECTOR_OFFLOAD)
 238                return IAVF_VECTOR_OFFLOAD_PATH;
 239
 240        return IAVF_VECTOR_PATH;
 241}
 242
 243static inline int
 244iavf_tx_vec_queue_default(struct iavf_tx_queue *txq)
 245{
 246        if (!txq)
 247                return -1;
 248
 249        if (txq->rs_thresh < IAVF_VPMD_TX_MAX_BURST ||
 250            txq->rs_thresh > IAVF_VPMD_TX_MAX_FREE_BUF)
 251                return -1;
 252
 253        if (txq->offloads & IAVF_TX_NO_VECTOR_FLAGS)
 254                return -1;
 255
 256        if (txq->offloads & IAVF_TX_VECTOR_OFFLOAD)
 257                return IAVF_VECTOR_OFFLOAD_PATH;
 258
 259        return IAVF_VECTOR_PATH;
 260}
 261
 262static inline int
 263iavf_rx_vec_dev_check_default(struct rte_eth_dev *dev)
 264{
 265        int i;
 266        struct iavf_rx_queue *rxq;
 267        int ret;
 268        int result = 0;
 269
 270        for (i = 0; i < dev->data->nb_rx_queues; i++) {
 271                rxq = dev->data->rx_queues[i];
 272                ret = iavf_rx_vec_queue_default(rxq);
 273
 274                if (ret < 0)
 275                        return -1;
 276                if (ret > result)
 277                        result = ret;
 278        }
 279
 280        return result;
 281}
 282
 283static inline int
 284iavf_tx_vec_dev_check_default(struct rte_eth_dev *dev)
 285{
 286        int i;
 287        struct iavf_tx_queue *txq;
 288        int ret;
 289        int result = 0;
 290
 291        for (i = 0; i < dev->data->nb_tx_queues; i++) {
 292                txq = dev->data->tx_queues[i];
 293                ret = iavf_tx_vec_queue_default(txq);
 294
 295                if (ret < 0)
 296                        return -1;
 297                if (ret > result)
 298                        result = ret;
 299        }
 300
 301        return result;
 302}
 303
 304/******************************************************************************
 305 * If user knows a specific offload is not enabled by APP,
 306 * the macro can be commented to save the effort of fast path.
 307 * Currently below 2 features are supported in TX path,
 308 * 1, checksum offload
 309 * 2, VLAN/QINQ insertion
 310 ******************************************************************************/
 311#define IAVF_TX_CSUM_OFFLOAD
 312#define IAVF_TX_VLAN_QINQ_OFFLOAD
 313
 314static __rte_always_inline void
 315iavf_txd_enable_offload(__rte_unused struct rte_mbuf *tx_pkt,
 316                        uint64_t *txd_hi)
 317{
 318#if defined(IAVF_TX_CSUM_OFFLOAD) || defined(IAVF_TX_VLAN_QINQ_OFFLOAD)
 319        uint64_t ol_flags = tx_pkt->ol_flags;
 320#endif
 321        uint32_t td_cmd = 0;
 322#ifdef IAVF_TX_CSUM_OFFLOAD
 323        uint32_t td_offset = 0;
 324#endif
 325
 326#ifdef IAVF_TX_CSUM_OFFLOAD
 327        /* Set MACLEN */
 328        td_offset |= (tx_pkt->l2_len >> 1) <<
 329                     IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
 330
 331        /* Enable L3 checksum offloads */
 332        if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
 333                td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM;
 334                td_offset |= (tx_pkt->l3_len >> 2) <<
 335                             IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
 336        } else if (ol_flags & RTE_MBUF_F_TX_IPV4) {
 337                td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV4;
 338                td_offset |= (tx_pkt->l3_len >> 2) <<
 339                             IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
 340        } else if (ol_flags & RTE_MBUF_F_TX_IPV6) {
 341                td_cmd |= IAVF_TX_DESC_CMD_IIPT_IPV6;
 342                td_offset |= (tx_pkt->l3_len >> 2) <<
 343                             IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
 344        }
 345
 346        /* Enable L4 checksum offloads */
 347        switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) {
 348        case RTE_MBUF_F_TX_TCP_CKSUM:
 349                td_cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP;
 350                td_offset |= (sizeof(struct rte_tcp_hdr) >> 2) <<
 351                             IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
 352                break;
 353        case RTE_MBUF_F_TX_SCTP_CKSUM:
 354                td_cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_SCTP;
 355                td_offset |= (sizeof(struct rte_sctp_hdr) >> 2) <<
 356                             IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
 357                break;
 358        case RTE_MBUF_F_TX_UDP_CKSUM:
 359                td_cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_UDP;
 360                td_offset |= (sizeof(struct rte_udp_hdr) >> 2) <<
 361                             IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
 362                break;
 363        default:
 364                break;
 365        }
 366
 367        *txd_hi |= ((uint64_t)td_offset) << IAVF_TXD_QW1_OFFSET_SHIFT;
 368#endif
 369
 370#ifdef IAVF_TX_VLAN_QINQ_OFFLOAD
 371        if (ol_flags & (RTE_MBUF_F_TX_VLAN | RTE_MBUF_F_TX_QINQ)) {
 372                td_cmd |= IAVF_TX_DESC_CMD_IL2TAG1;
 373                *txd_hi |= ((uint64_t)tx_pkt->vlan_tci <<
 374                            IAVF_TXD_QW1_L2TAG1_SHIFT);
 375        }
 376#endif
 377
 378        *txd_hi |= ((uint64_t)td_cmd) << IAVF_TXD_QW1_CMD_SHIFT;
 379}
 380
 381#ifdef CC_AVX2_SUPPORT
 382static __rte_always_inline void
 383iavf_rxq_rearm_common(struct iavf_rx_queue *rxq, __rte_unused bool avx512)
 384{
 385        int i;
 386        uint16_t rx_id;
 387        volatile union iavf_rx_desc *rxdp;
 388        struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start];
 389
 390        rxdp = rxq->rx_ring + rxq->rxrearm_start;
 391
 392        /* Pull 'n' more MBUFs into the software ring */
 393        if (rte_mempool_get_bulk(rxq->mp,
 394                                 (void *)rxp,
 395                                 IAVF_RXQ_REARM_THRESH) < 0) {
 396                if (rxq->rxrearm_nb + IAVF_RXQ_REARM_THRESH >=
 397                    rxq->nb_rx_desc) {
 398                        __m128i dma_addr0;
 399
 400                        dma_addr0 = _mm_setzero_si128();
 401                        for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) {
 402                                rxp[i] = &rxq->fake_mbuf;
 403                                _mm_store_si128((__m128i *)&rxdp[i].read,
 404                                                dma_addr0);
 405                        }
 406                }
 407                rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
 408                        IAVF_RXQ_REARM_THRESH;
 409                return;
 410        }
 411
 412#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC
 413        struct rte_mbuf *mb0, *mb1;
 414        __m128i dma_addr0, dma_addr1;
 415        __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM,
 416                        RTE_PKTMBUF_HEADROOM);
 417        /* Initialize the mbufs in vector, process 2 mbufs in one loop */
 418        for (i = 0; i < IAVF_RXQ_REARM_THRESH; i += 2, rxp += 2) {
 419                __m128i vaddr0, vaddr1;
 420
 421                mb0 = rxp[0];
 422                mb1 = rxp[1];
 423
 424                /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
 425                RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
 426                                offsetof(struct rte_mbuf, buf_addr) + 8);
 427                vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
 428                vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
 429
 430                /* convert pa to dma_addr hdr/data */
 431                dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
 432                dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
 433
 434                /* add headroom to pa values */
 435                dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room);
 436                dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room);
 437
 438                /* flush desc with pa dma_addr */
 439                _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0);
 440                _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1);
 441        }
 442#else
 443#ifdef CC_AVX512_SUPPORT
 444        if (avx512) {
 445                struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
 446                struct rte_mbuf *mb4, *mb5, *mb6, *mb7;
 447                __m512i dma_addr0_3, dma_addr4_7;
 448                __m512i hdr_room = _mm512_set1_epi64(RTE_PKTMBUF_HEADROOM);
 449                /* Initialize the mbufs in vector, process 8 mbufs in one loop */
 450                for (i = 0; i < IAVF_RXQ_REARM_THRESH;
 451                                i += 8, rxp += 8, rxdp += 8) {
 452                        __m128i vaddr0, vaddr1, vaddr2, vaddr3;
 453                        __m128i vaddr4, vaddr5, vaddr6, vaddr7;
 454                        __m256i vaddr0_1, vaddr2_3;
 455                        __m256i vaddr4_5, vaddr6_7;
 456                        __m512i vaddr0_3, vaddr4_7;
 457
 458                        mb0 = rxp[0];
 459                        mb1 = rxp[1];
 460                        mb2 = rxp[2];
 461                        mb3 = rxp[3];
 462                        mb4 = rxp[4];
 463                        mb5 = rxp[5];
 464                        mb6 = rxp[6];
 465                        mb7 = rxp[7];
 466
 467                        /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
 468                        RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
 469                                        offsetof(struct rte_mbuf, buf_addr) + 8);
 470                        vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
 471                        vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
 472                        vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
 473                        vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
 474                        vaddr4 = _mm_loadu_si128((__m128i *)&mb4->buf_addr);
 475                        vaddr5 = _mm_loadu_si128((__m128i *)&mb5->buf_addr);
 476                        vaddr6 = _mm_loadu_si128((__m128i *)&mb6->buf_addr);
 477                        vaddr7 = _mm_loadu_si128((__m128i *)&mb7->buf_addr);
 478
 479                        /**
 480                         * merge 0 & 1, by casting 0 to 256-bit and inserting 1
 481                         * into the high lanes. Similarly for 2 & 3, and so on.
 482                         */
 483                        vaddr0_1 =
 484                                _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
 485                                                        vaddr1, 1);
 486                        vaddr2_3 =
 487                                _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
 488                                                        vaddr3, 1);
 489                        vaddr4_5 =
 490                                _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr4),
 491                                                        vaddr5, 1);
 492                        vaddr6_7 =
 493                                _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr6),
 494                                                        vaddr7, 1);
 495                        vaddr0_3 =
 496                                _mm512_inserti64x4(_mm512_castsi256_si512(vaddr0_1),
 497                                                        vaddr2_3, 1);
 498                        vaddr4_7 =
 499                                _mm512_inserti64x4(_mm512_castsi256_si512(vaddr4_5),
 500                                                        vaddr6_7, 1);
 501
 502                        /* convert pa to dma_addr hdr/data */
 503                        dma_addr0_3 = _mm512_unpackhi_epi64(vaddr0_3, vaddr0_3);
 504                        dma_addr4_7 = _mm512_unpackhi_epi64(vaddr4_7, vaddr4_7);
 505
 506                        /* add headroom to pa values */
 507                        dma_addr0_3 = _mm512_add_epi64(dma_addr0_3, hdr_room);
 508                        dma_addr4_7 = _mm512_add_epi64(dma_addr4_7, hdr_room);
 509
 510                        /* flush desc with pa dma_addr */
 511                        _mm512_store_si512((__m512i *)&rxdp->read, dma_addr0_3);
 512                        _mm512_store_si512((__m512i *)&(rxdp + 4)->read, dma_addr4_7);
 513                }
 514        } else
 515#endif
 516        {
 517                struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
 518                __m256i dma_addr0_1, dma_addr2_3;
 519                __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM);
 520                /* Initialize the mbufs in vector, process 4 mbufs in one loop */
 521                for (i = 0; i < IAVF_RXQ_REARM_THRESH;
 522                                i += 4, rxp += 4, rxdp += 4) {
 523                        __m128i vaddr0, vaddr1, vaddr2, vaddr3;
 524                        __m256i vaddr0_1, vaddr2_3;
 525
 526                        mb0 = rxp[0];
 527                        mb1 = rxp[1];
 528                        mb2 = rxp[2];
 529                        mb3 = rxp[3];
 530
 531                        /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
 532                        RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) !=
 533                                        offsetof(struct rte_mbuf, buf_addr) + 8);
 534                        vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);
 535                        vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);
 536                        vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr);
 537                        vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr);
 538
 539                        /**
 540                         * merge 0 & 1, by casting 0 to 256-bit and inserting 1
 541                         * into the high lanes. Similarly for 2 & 3
 542                         */
 543                        vaddr0_1 =
 544                                _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0),
 545                                                        vaddr1, 1);
 546                        vaddr2_3 =
 547                                _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2),
 548                                                        vaddr3, 1);
 549
 550                        /* convert pa to dma_addr hdr/data */
 551                        dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1);
 552                        dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3);
 553
 554                        /* add headroom to pa values */
 555                        dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room);
 556                        dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room);
 557
 558                        /* flush desc with pa dma_addr */
 559                        _mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1);
 560                        _mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3);
 561                }
 562        }
 563
 564#endif
 565
 566        rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH;
 567        if (rxq->rxrearm_start >= rxq->nb_rx_desc)
 568                rxq->rxrearm_start = 0;
 569
 570        rxq->rxrearm_nb -= IAVF_RXQ_REARM_THRESH;
 571
 572        rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
 573                             (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 574
 575        /* Update the tail pointer on the NIC */
 576        IAVF_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
 577}
 578#endif
 579
 580#endif
 581