dpdk/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright 2017 6WIND S.A.
   3 * Copyright 2017 Mellanox Technologies, Ltd
   4 */
   5
   6#ifndef RTE_PMD_MLX5_RXTX_VEC_ALTIVEC_H_
   7#define RTE_PMD_MLX5_RXTX_VEC_ALTIVEC_H_
   8
   9#include <stdint.h>
  10#include <string.h>
  11#include <stdlib.h>
  12
  13#include <rte_altivec.h>
  14
  15#include <rte_mbuf.h>
  16#include <rte_mempool.h>
  17#include <rte_prefetch.h>
  18
  19#include <mlx5_prm.h>
  20
  21#include "mlx5_defs.h"
  22#include "mlx5.h"
  23#include "mlx5_utils.h"
  24#include "mlx5_rxtx.h"
  25#include "mlx5_rxtx_vec.h"
  26#include "mlx5_autoconf.h"
  27
  28#ifndef __INTEL_COMPILER
  29#pragma GCC diagnostic ignored "-Wcast-qual"
  30#pragma GCC diagnostic ignored "-Wstrict-aliasing"
  31#endif
  32
  33/**
  34 * Store free buffers to RX SW ring.
  35 *
  36 * @param elts
  37 *   Pointer to SW ring to be filled.
  38 * @param pkts
  39 *   Pointer to array of packets to be stored.
  40 * @param pkts_n
  41 *   Number of packets to be stored.
  42 */
  43static inline void
  44rxq_copy_mbuf_v(struct rte_mbuf **elts, struct rte_mbuf **pkts, uint16_t n)
  45{
  46        unsigned int pos;
  47        uint16_t p = n & -2;
  48
  49        for (pos = 0; pos < p; pos += 2) {
  50                vector unsigned char mbp;
  51
  52                mbp = (vector unsigned char)vec_vsx_ld(0,
  53                                (signed int const *)&elts[pos]);
  54                *(vector unsigned char *)&pkts[pos] = mbp;
  55        }
  56        if (n & 1)
  57                pkts[pos] = elts[pos];
  58}
  59
  60/**
  61 * Decompress a compressed completion and fill in mbufs in RX SW ring with data
  62 * extracted from the title completion descriptor.
  63 *
  64 * @param rxq
  65 *   Pointer to RX queue structure.
  66 * @param cq
  67 *   Pointer to completion array having a compressed completion at first.
  68 * @param elts
  69 *   Pointer to SW ring to be filled. The first mbuf has to be pre-built from
  70 *   the title completion descriptor to be copied to the rest of mbufs.
  71 *
  72 * @return
  73 *   Number of mini-CQEs successfully decompressed.
  74 */
  75static inline uint16_t
  76rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
  77                    struct rte_mbuf **elts)
  78{
  79        volatile struct mlx5_mini_cqe8 *mcq = (void *)&(cq + 1)->pkt_info;
  80        struct rte_mbuf *t_pkt = elts[0]; /* Title packet is pre-built. */
  81        const vector unsigned char zero = (vector unsigned char){0};
  82        /* Mask to shuffle from extracted mini CQE to mbuf. */
  83        const vector unsigned char shuf_mask1 = (vector unsigned char){
  84                        -1, -1, -1, -1,   /* skip packet_type */
  85                         7,  6, -1, -1,   /* bswap16, pkt_len */
  86                         7,  6,           /* bswap16, data_len */
  87                        -1, -1,           /* skip vlan_tci */
  88                         3,  2,  1,  0};  /* bswap32, rss */
  89        const vector unsigned char shuf_mask2 = (vector unsigned char){
  90                        -1, -1, -1, -1,   /* skip packet_type */
  91                        15, 14, -1, -1,   /* bswap16, pkt_len */
  92                        15, 14,           /* data_len, bswap16 */
  93                        -1, -1,           /* skip vlan_tci */
  94                        11, 10,  9,  8};  /* bswap32, rss */
  95        /* Restore the compressed count. Must be 16 bits. */
  96        const uint16_t mcqe_n = t_pkt->data_len +
  97                (rxq->crc_present * RTE_ETHER_CRC_LEN);
  98        const vector unsigned char rearm =
  99                (vector unsigned char)vec_vsx_ld(0,
 100                (signed int const *)&t_pkt->rearm_data);
 101        const vector unsigned char rxdf =
 102                (vector unsigned char)vec_vsx_ld(0,
 103                (signed int const *)&t_pkt->rx_descriptor_fields1);
 104        const vector unsigned char crc_adj =
 105                (vector unsigned char)(vector unsigned short){
 106                        0, 0, rxq->crc_present * RTE_ETHER_CRC_LEN, 0,
 107                        rxq->crc_present * RTE_ETHER_CRC_LEN, 0, 0, 0};
 108        const vector unsigned short rxdf_sel_mask =
 109                (vector unsigned short){
 110                        0xffff, 0xffff, 0, 0, 0, 0xffff, 0, 0};
 111        vector unsigned char ol_flags = (vector unsigned char){0};
 112        vector unsigned char ol_flags_mask = (vector unsigned char){0};
 113        unsigned int pos;
 114        unsigned int i;
 115        unsigned int inv = 0;
 116
 117#ifdef MLX5_PMD_SOFT_COUNTERS
 118        const vector unsigned char ones = vec_splat_u8(-1);
 119        uint32_t rcvd_byte = 0;
 120        /* Mask to shuffle byte_cnt to add up stats. Do bswap16 for all. */
 121        const vector unsigned char len_shuf_mask = (vector unsigned char){
 122                 3,  2, 11, 10,
 123                 7,  6, 15, 14,
 124                -1, -1, -1, -1,
 125                -1, -1, -1, -1};
 126#endif
 127
 128        /*
 129         * A. load mCQEs into a 128bit register.
 130         * B. store rearm data to mbuf.
 131         * C. combine data from mCQEs with rx_descriptor_fields1.
 132         * D. store rx_descriptor_fields1.
 133         * E. store flow tag (rte_flow mark).
 134         */
 135        for (pos = 0; pos < mcqe_n; ) {
 136                vector unsigned char mcqe1, mcqe2;
 137                vector unsigned char rxdf1, rxdf2;
 138#ifdef MLX5_PMD_SOFT_COUNTERS
 139                const vector unsigned short mcqe_sel_mask =
 140                        (vector unsigned short){0, 0, 0xffff, 0xffff,
 141                        0, 0, 0xfff, 0xffff};
 142                const vector unsigned char lower_half = {
 143                        0, 1, 4, 5, 8, 9, 12, 13, 16,
 144                        17, 20, 21, 24, 25, 28, 29};
 145                const vector unsigned char upper_half = {
 146                        2, 3, 6, 7, 10, 11, 14, 15,
 147                        18, 19, 22, 23, 26, 27, 30, 31};
 148                vector unsigned short left, right;
 149                vector unsigned char byte_cnt, invalid_mask;
 150                vector unsigned long lshift;
 151                __attribute__((altivec(vector__)))
 152                        __attribute__((altivec(bool__)))
 153                        unsigned long long shmask;
 154                const vector unsigned long shmax = {64, 64};
 155#endif
 156
 157                for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
 158                        if (likely(pos + i < mcqe_n))
 159                                rte_prefetch0((void *)(cq + pos + i));
 160                /* A.1 load mCQEs into a 128bit register. */
 161                mcqe1 = (vector unsigned char)vec_vsx_ld(0,
 162                        (signed int const *)&mcq[pos % 8]);
 163                mcqe2 = (vector unsigned char)vec_vsx_ld(0,
 164                        (signed int const *)&mcq[pos % 8 + 2]);
 165
 166                /* B.1 store rearm data to mbuf. */
 167                *(vector unsigned char *)
 168                        &elts[pos]->rearm_data = rearm;
 169                *(vector unsigned char *)
 170                        &elts[pos + 1]->rearm_data = rearm;
 171
 172                /* C.1 combine data from mCQEs with rx_descriptor_fields1. */
 173                rxdf1 = vec_perm(mcqe1, zero, shuf_mask1);
 174                rxdf2 = vec_perm(mcqe1, zero, shuf_mask2);
 175                rxdf1 = (vector unsigned char)
 176                        ((vector unsigned short)rxdf1 -
 177                        (vector unsigned short)crc_adj);
 178                rxdf2 = (vector unsigned char)
 179                        ((vector unsigned short)rxdf2 -
 180                        (vector unsigned short)crc_adj);
 181                rxdf1 = (vector unsigned char)
 182                        vec_sel((vector unsigned short)rxdf1,
 183                        (vector unsigned short)rxdf, rxdf_sel_mask);
 184                rxdf2 = (vector unsigned char)
 185                        vec_sel((vector unsigned short)rxdf2,
 186                        (vector unsigned short)rxdf, rxdf_sel_mask);
 187
 188                /* D.1 store rx_descriptor_fields1. */
 189                *(vector unsigned char *)
 190                        &elts[pos]->rx_descriptor_fields1 = rxdf1;
 191                *(vector unsigned char *)
 192                        &elts[pos + 1]->rx_descriptor_fields1 = rxdf2;
 193
 194                /* B.1 store rearm data to mbuf. */
 195                *(vector unsigned char *)
 196                        &elts[pos + 2]->rearm_data = rearm;
 197                *(vector unsigned char *)
 198                        &elts[pos + 3]->rearm_data = rearm;
 199
 200                /* C.1 combine data from mCQEs with rx_descriptor_fields1. */
 201                rxdf1 = vec_perm(mcqe2, zero, shuf_mask1);
 202                rxdf2 = vec_perm(mcqe2, zero, shuf_mask2);
 203                rxdf1 = (vector unsigned char)
 204                        ((vector unsigned short)rxdf1 -
 205                        (vector unsigned short)crc_adj);
 206                rxdf2 = (vector unsigned char)
 207                        ((vector unsigned short)rxdf2 -
 208                        (vector unsigned short)crc_adj);
 209                rxdf1 = (vector unsigned char)
 210                        vec_sel((vector unsigned short)rxdf1,
 211                        (vector unsigned short)rxdf, rxdf_sel_mask);
 212                rxdf2 = (vector unsigned char)
 213                        vec_sel((vector unsigned short)rxdf2,
 214                        (vector unsigned short)rxdf, rxdf_sel_mask);
 215
 216                /* D.1 store rx_descriptor_fields1. */
 217                *(vector unsigned char *)
 218                        &elts[pos + 2]->rx_descriptor_fields1 = rxdf1;
 219                *(vector unsigned char *)
 220                        &elts[pos + 3]->rx_descriptor_fields1 = rxdf2;
 221
 222#ifdef MLX5_PMD_SOFT_COUNTERS
 223                invalid_mask = (vector unsigned char)(vector unsigned long){
 224                        (mcqe_n - pos) * sizeof(uint16_t) * 8, 0};
 225
 226                lshift =
 227                        vec_splat((vector unsigned long)invalid_mask, 0);
 228                shmask = vec_cmpgt(shmax, lshift);
 229                invalid_mask = (vector unsigned char)
 230                        vec_sl((vector unsigned long)ones, lshift);
 231                invalid_mask = (vector unsigned char)
 232                        vec_sel((vector unsigned long)shmask,
 233                        (vector unsigned long)invalid_mask, shmask);
 234
 235                byte_cnt = (vector unsigned char)
 236                        vec_sel((vector unsigned short)
 237                        vec_sro((vector unsigned short)mcqe1,
 238                        (vector unsigned char){32}),
 239                        (vector unsigned short)mcqe2, mcqe_sel_mask);
 240                byte_cnt = vec_perm(byte_cnt, zero, len_shuf_mask);
 241                byte_cnt = (vector unsigned char)
 242                        vec_andc((vector unsigned long)byte_cnt,
 243                        (vector unsigned long)invalid_mask);
 244                left = vec_perm((vector unsigned short)byte_cnt,
 245                        (vector unsigned short)zero, lower_half);
 246                right = vec_perm((vector unsigned short)byte_cnt,
 247                        (vector unsigned short)zero, upper_half);
 248                byte_cnt = (vector unsigned char)vec_add(left, right);
 249                left = vec_perm((vector unsigned short)byte_cnt,
 250                        (vector unsigned short)zero, lower_half);
 251                right = vec_perm((vector unsigned short)byte_cnt,
 252                        (vector unsigned short)zero, upper_half);
 253                byte_cnt = (vector unsigned char)vec_add(left, right);
 254                rcvd_byte += ((vector unsigned long)byte_cnt)[0];
 255#endif
 256
 257                if (rxq->mark) {
 258                        if (rxq->mcqe_format !=
 259                            MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) {
 260                                const uint32_t flow_tag = t_pkt->hash.fdir.hi;
 261
 262                                /* E.1 store flow tag (rte_flow mark). */
 263                                elts[pos]->hash.fdir.hi = flow_tag;
 264                                elts[pos + 1]->hash.fdir.hi = flow_tag;
 265                                elts[pos + 2]->hash.fdir.hi = flow_tag;
 266                                elts[pos + 3]->hash.fdir.hi = flow_tag;
 267                        } else {
 268                                const vector unsigned char flow_mark_adj =
 269                                        (vector unsigned char)
 270                                        (vector unsigned int){
 271                                        -1, -1, -1, -1};
 272                                const vector unsigned char flow_mark_shuf =
 273                                        (vector unsigned char){
 274                                        -1, -1, -1, -1,
 275                                        -1, -1, -1, -1,
 276                                        12,  8,  9, -1,
 277                                         4,  0,  1,  -1};
 278                                const vector unsigned char ft_mask =
 279                                        (vector unsigned char)
 280                                        (vector unsigned int){
 281                                        0xffffff00, 0xffffff00,
 282                                        0xffffff00, 0xffffff00};
 283                                const vector unsigned char fdir_flags =
 284                                        (vector unsigned char)
 285                                        (vector unsigned int){
 286                                        RTE_MBUF_F_RX_FDIR, RTE_MBUF_F_RX_FDIR,
 287                                        RTE_MBUF_F_RX_FDIR, RTE_MBUF_F_RX_FDIR};
 288                                const vector unsigned char fdir_all_flags =
 289                                        (vector unsigned char)
 290                                        (vector unsigned int){
 291                                        RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID,
 292                                        RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID,
 293                                        RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID,
 294                                        RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID};
 295                                vector unsigned char fdir_id_flags =
 296                                        (vector unsigned char)
 297                                        (vector unsigned int){
 298                                        RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID,
 299                                        RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID};
 300                                /* Extract flow_tag field. */
 301                                vector unsigned char ftag0 = vec_perm(mcqe1,
 302                                                        zero, flow_mark_shuf);
 303                                vector unsigned char ftag1 = vec_perm(mcqe2,
 304                                                        zero, flow_mark_shuf);
 305                                vector unsigned char ftag =
 306                                        (vector unsigned char)
 307                                        vec_mergel((vector unsigned int)ftag0,
 308                                        (vector unsigned int)ftag1);
 309                                vector unsigned char invalid_mask =
 310                                        (vector unsigned char)
 311                                        vec_cmpeq((vector unsigned int)ftag,
 312                                        (vector unsigned int)zero);
 313
 314                                ol_flags_mask = (vector unsigned char)
 315                                        vec_or((vector unsigned long)
 316                                        ol_flags_mask,
 317                                        (vector unsigned long)fdir_all_flags);
 318
 319                                /* Set RTE_MBUF_F_RX_FDIR if flow tag is non-zero. */
 320                                invalid_mask = (vector unsigned char)
 321                                        vec_cmpeq((vector unsigned int)ftag,
 322                                        (vector unsigned int)zero);
 323                                ol_flags = (vector unsigned char)
 324                                        vec_or((vector unsigned long)ol_flags,
 325                                        (vector unsigned long)
 326                                        vec_andc((vector unsigned long)
 327                                        fdir_flags,
 328                                        (vector unsigned long)invalid_mask));
 329                                ol_flags_mask = (vector unsigned char)
 330                                        vec_or((vector unsigned long)
 331                                        ol_flags_mask,
 332                                        (vector unsigned long)fdir_flags);
 333
 334                                /* Mask out invalid entries. */
 335                                fdir_id_flags = (vector unsigned char)
 336                                        vec_andc((vector unsigned long)
 337                                        fdir_id_flags,
 338                                        (vector unsigned long)invalid_mask);
 339
 340                                /* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */
 341                                ol_flags = (vector unsigned char)
 342                                        vec_or((vector unsigned long)ol_flags,
 343                                        (vector unsigned long)
 344                                        vec_andc((vector unsigned long)
 345                                        fdir_id_flags,
 346                                        (vector unsigned long)
 347                                        vec_cmpeq((vector unsigned int)ftag,
 348                                        (vector unsigned int)ft_mask)));
 349
 350                                ftag = (vector unsigned char)
 351                                        ((vector unsigned int)ftag +
 352                                        (vector unsigned int)flow_mark_adj);
 353                                elts[pos]->hash.fdir.hi =
 354                                        ((vector unsigned int)ftag)[0];
 355                                elts[pos + 1]->hash.fdir.hi =
 356                                        ((vector unsigned int)ftag)[1];
 357                                elts[pos + 2]->hash.fdir.hi =
 358                                        ((vector unsigned int)ftag)[2];
 359                                elts[pos + 3]->hash.fdir.hi =
 360                                        ((vector unsigned int)ftag)[3];
 361                        }
 362                }
 363                if (unlikely(rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH)) {
 364                        if (rxq->mcqe_format ==
 365                            MLX5_CQE_RESP_FORMAT_L34H_STRIDX) {
 366                                const uint8_t pkt_info =
 367                                        (cq->pkt_info & 0x3) << 6;
 368                                const uint8_t pkt_hdr0 =
 369                                        mcq[pos % 8].hdr_type;
 370                                const uint8_t pkt_hdr1 =
 371                                        mcq[pos % 8 + 1].hdr_type;
 372                                const uint8_t pkt_hdr2 =
 373                                        mcq[pos % 8 + 2].hdr_type;
 374                                const uint8_t pkt_hdr3 =
 375                                        mcq[pos % 8 + 3].hdr_type;
 376                                const vector unsigned char vlan_mask =
 377                                        (vector unsigned char)
 378                                        (vector unsigned int) {
 379                                        (RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED),
 380                                        (RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED),
 381                                        (RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED),
 382                                        (RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED)};
 383                                const vector unsigned char cv_mask =
 384                                        (vector unsigned char)
 385                                        (vector unsigned int) {
 386                                        MLX5_CQE_VLAN_STRIPPED,
 387                                        MLX5_CQE_VLAN_STRIPPED,
 388                                        MLX5_CQE_VLAN_STRIPPED,
 389                                        MLX5_CQE_VLAN_STRIPPED};
 390                                vector unsigned char pkt_cv =
 391                                        (vector unsigned char)
 392                                        (vector unsigned int) {
 393                                        pkt_hdr0 & 0x1, pkt_hdr1 & 0x1,
 394                                        pkt_hdr2 & 0x1, pkt_hdr3 & 0x1};
 395
 396                                ol_flags_mask = (vector unsigned char)
 397                                        vec_or((vector unsigned long)
 398                                        ol_flags_mask,
 399                                        (vector unsigned long)vlan_mask);
 400                                ol_flags = (vector unsigned char)
 401                                        vec_or((vector unsigned long)ol_flags,
 402                                        (vector unsigned long)
 403                                        vec_and((vector unsigned long)vlan_mask,
 404                                        (vector unsigned long)
 405                                        vec_cmpeq((vector unsigned int)pkt_cv,
 406                                        (vector unsigned int)cv_mask)));
 407                                elts[pos]->packet_type =
 408                                        mlx5_ptype_table[(pkt_hdr0 >> 2) |
 409                                                         pkt_info];
 410                                elts[pos + 1]->packet_type =
 411                                        mlx5_ptype_table[(pkt_hdr1 >> 2) |
 412                                                         pkt_info];
 413                                elts[pos + 2]->packet_type =
 414                                        mlx5_ptype_table[(pkt_hdr2 >> 2) |
 415                                                         pkt_info];
 416                                elts[pos + 3]->packet_type =
 417                                        mlx5_ptype_table[(pkt_hdr3 >> 2) |
 418                                                         pkt_info];
 419                                if (rxq->tunnel) {
 420                                        elts[pos]->packet_type |=
 421                                                !!(((pkt_hdr0 >> 2) |
 422                                                pkt_info) & (1 << 6));
 423                                        elts[pos + 1]->packet_type |=
 424                                                !!(((pkt_hdr1 >> 2) |
 425                                                pkt_info) & (1 << 6));
 426                                        elts[pos + 2]->packet_type |=
 427                                                !!(((pkt_hdr2 >> 2) |
 428                                                pkt_info) & (1 << 6));
 429                                        elts[pos + 3]->packet_type |=
 430                                                !!(((pkt_hdr3 >> 2) |
 431                                                pkt_info) & (1 << 6));
 432                                }
 433                        }
 434                        const vector unsigned char hash_mask =
 435                                (vector unsigned char)(vector unsigned int) {
 436                                        RTE_MBUF_F_RX_RSS_HASH,
 437                                        RTE_MBUF_F_RX_RSS_HASH,
 438                                        RTE_MBUF_F_RX_RSS_HASH,
 439                                        RTE_MBUF_F_RX_RSS_HASH};
 440                        const vector unsigned char rearm_flags =
 441                                (vector unsigned char)(vector unsigned int) {
 442                                (uint32_t)t_pkt->ol_flags,
 443                                (uint32_t)t_pkt->ol_flags,
 444                                (uint32_t)t_pkt->ol_flags,
 445                                (uint32_t)t_pkt->ol_flags};
 446
 447                        ol_flags_mask = (vector unsigned char)
 448                                vec_or((vector unsigned long)ol_flags_mask,
 449                                (vector unsigned long)hash_mask);
 450                        ol_flags = (vector unsigned char)
 451                                vec_or((vector unsigned long)ol_flags,
 452                                (vector unsigned long)
 453                                vec_andc((vector unsigned long)rearm_flags,
 454                                (vector unsigned long)ol_flags_mask));
 455
 456                        elts[pos]->ol_flags =
 457                                ((vector unsigned int)ol_flags)[0];
 458                        elts[pos + 1]->ol_flags =
 459                                ((vector unsigned int)ol_flags)[1];
 460                        elts[pos + 2]->ol_flags =
 461                                ((vector unsigned int)ol_flags)[2];
 462                        elts[pos + 3]->ol_flags =
 463                                ((vector unsigned int)ol_flags)[3];
 464                        elts[pos]->hash.rss = 0;
 465                        elts[pos + 1]->hash.rss = 0;
 466                        elts[pos + 2]->hash.rss = 0;
 467                        elts[pos + 3]->hash.rss = 0;
 468                }
 469                if (rxq->dynf_meta) {
 470                        int32_t offs = rxq->flow_meta_offset;
 471                        const uint32_t meta =
 472                                *RTE_MBUF_DYNFIELD(t_pkt, offs, uint32_t *);
 473
 474                        /* Check if title packet has valid metadata. */
 475                        if (meta) {
 476                                MLX5_ASSERT(t_pkt->ol_flags &
 477                                            rxq->flow_meta_mask);
 478                                *RTE_MBUF_DYNFIELD(elts[pos], offs,
 479                                                        uint32_t *) = meta;
 480                                *RTE_MBUF_DYNFIELD(elts[pos + 1], offs,
 481                                                        uint32_t *) = meta;
 482                                *RTE_MBUF_DYNFIELD(elts[pos + 2], offs,
 483                                                        uint32_t *) = meta;
 484                                *RTE_MBUF_DYNFIELD(elts[pos + 3], offs,
 485                                                        uint32_t *) = meta;
 486                        }
 487                }
 488
 489                pos += MLX5_VPMD_DESCS_PER_LOOP;
 490                /* Move to next CQE and invalidate consumed CQEs. */
 491                if (!(pos & 0x7) && pos < mcqe_n) {
 492                        if (pos + 8 < mcqe_n)
 493                                rte_prefetch0((void *)(cq + pos + 8));
 494                        mcq = (void *)&(cq + pos)->pkt_info;
 495                        for (i = 0; i < 8; ++i)
 496                                cq[inv++].op_own = MLX5_CQE_INVALIDATE;
 497                }
 498        }
 499
 500        /* Invalidate the rest of CQEs. */
 501        for (; inv < mcqe_n; ++inv)
 502                cq[inv].op_own = MLX5_CQE_INVALIDATE;
 503
 504#ifdef MLX5_PMD_SOFT_COUNTERS
 505        rxq->stats.ipackets += mcqe_n;
 506        rxq->stats.ibytes += rcvd_byte;
 507#endif
 508
 509        return mcqe_n;
 510}
 511
 512/**
 513 * Calculate packet type and offload flag for mbuf and store it.
 514 *
 515 * @param rxq
 516 *   Pointer to RX queue structure.
 517 * @param cqes[4]
 518 *   Array of four 16bytes completions extracted from the original completion
 519 *   descriptor.
 520 * @param op_err
 521 *   Opcode vector having responder error status. Each field is 4B.
 522 * @param pkts
 523 *   Pointer to array of packets to be filled.
 524 */
 525static inline void
 526rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
 527                vector unsigned char cqes[4], vector unsigned char op_err,
 528                struct rte_mbuf **pkts)
 529{
 530        vector unsigned char pinfo0, pinfo1;
 531        vector unsigned char pinfo, ptype;
 532        vector unsigned char ol_flags = (vector unsigned char)
 533                (vector unsigned int){
 534                        rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH |
 535                                rxq->hw_timestamp * rxq->timestamp_rx_flag,
 536                        rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH |
 537                                rxq->hw_timestamp * rxq->timestamp_rx_flag,
 538                        rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH |
 539                                rxq->hw_timestamp * rxq->timestamp_rx_flag,
 540                        rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH |
 541                                rxq->hw_timestamp * rxq->timestamp_rx_flag};
 542        vector unsigned char cv_flags;
 543        const vector unsigned char zero = (vector unsigned char){0};
 544        const vector unsigned char ptype_mask =
 545                (vector unsigned char)(vector unsigned int){
 546                0x0000fd06, 0x0000fd06, 0x0000fd06, 0x0000fd06};
 547        const vector unsigned char ptype_ol_mask =
 548                (vector unsigned char)(vector unsigned int){
 549                0x00000106, 0x00000106, 0x00000106, 0x00000106};
 550        const vector unsigned char pinfo_mask =
 551                (vector unsigned char)(vector unsigned int){
 552                0x00000003, 0x00000003, 0x00000003, 0x00000003};
 553        const vector unsigned char cv_flag_sel = (vector unsigned char){
 554                0, (uint8_t)(RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED),
 555                (uint8_t)(RTE_MBUF_F_RX_IP_CKSUM_GOOD >> 1), 0,
 556                (uint8_t)(RTE_MBUF_F_RX_L4_CKSUM_GOOD >> 1), 0,
 557                (uint8_t)((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1),
 558                0, 0, 0, 0, 0, 0, 0, 0, 0};
 559        const vector unsigned char cv_mask =
 560                (vector unsigned char)(vector unsigned int){
 561                RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD |
 562                RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED,
 563                RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD |
 564                RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED,
 565                RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD |
 566                RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED,
 567                RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD |
 568                RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED};
 569        const vector unsigned char mbuf_init =
 570                (vector unsigned char)vec_vsx_ld
 571                        (0, (vector unsigned char *)&rxq->mbuf_initializer);
 572        const vector unsigned short rearm_sel_mask =
 573                (vector unsigned short){0, 0, 0, 0, 0xffff, 0xffff, 0, 0};
 574        vector unsigned char rearm0, rearm1, rearm2, rearm3;
 575        uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3;
 576
 577        /* Extract pkt_info field. */
 578        pinfo0 = (vector unsigned char)
 579                vec_mergeh((vector unsigned int)cqes[0],
 580                (vector unsigned int)cqes[1]);
 581        pinfo1 = (vector unsigned char)
 582                vec_mergeh((vector unsigned int)cqes[2],
 583                (vector unsigned int)cqes[3]);
 584        pinfo = (vector unsigned char)
 585                vec_mergeh((vector unsigned long)pinfo0,
 586                (vector unsigned long)pinfo1);
 587
 588        /* Extract hdr_type_etc field. */
 589        pinfo0 = (vector unsigned char)
 590                vec_mergel((vector unsigned int)cqes[0],
 591                (vector unsigned int)cqes[1]);
 592        pinfo1 = (vector unsigned char)
 593                vec_mergel((vector unsigned int)cqes[2],
 594                (vector unsigned int)cqes[3]);
 595        ptype = (vector unsigned char)
 596                vec_mergeh((vector unsigned long)pinfo0,
 597                (vector unsigned long)pinfo1);
 598
 599        if (rxq->mark) {
 600                const vector unsigned char pinfo_ft_mask =
 601                        (vector unsigned char)(vector unsigned int){
 602                        0xffffff00, 0xffffff00, 0xffffff00, 0xffffff00};
 603                const vector unsigned char fdir_flags =
 604                        (vector unsigned char)(vector unsigned int){
 605                        RTE_MBUF_F_RX_FDIR, RTE_MBUF_F_RX_FDIR,
 606                        RTE_MBUF_F_RX_FDIR, RTE_MBUF_F_RX_FDIR};
 607                vector unsigned char fdir_id_flags =
 608                        (vector unsigned char)(vector unsigned int){
 609                        RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID,
 610                        RTE_MBUF_F_RX_FDIR_ID, RTE_MBUF_F_RX_FDIR_ID};
 611                vector unsigned char flow_tag, invalid_mask;
 612
 613                flow_tag = (vector unsigned char)
 614                        vec_and((vector unsigned long)pinfo,
 615                        (vector unsigned long)pinfo_ft_mask);
 616
 617                /* Check if flow tag is non-zero then set RTE_MBUF_F_RX_FDIR. */
 618                invalid_mask = (vector unsigned char)
 619                        vec_cmpeq((vector unsigned int)flow_tag,
 620                        (vector unsigned int)zero);
 621                ol_flags = (vector unsigned char)
 622                        vec_or((vector unsigned long)ol_flags,
 623                        (vector unsigned long)
 624                        vec_andc((vector unsigned long)fdir_flags,
 625                        (vector unsigned long)invalid_mask));
 626
 627                /* Mask out invalid entries. */
 628                fdir_id_flags = (vector unsigned char)
 629                        vec_andc((vector unsigned long)fdir_id_flags,
 630                        (vector unsigned long)invalid_mask);
 631
 632                /* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */
 633                ol_flags = (vector unsigned char)
 634                        vec_or((vector unsigned long)ol_flags,
 635                        (vector unsigned long)
 636                        vec_andc((vector unsigned long)fdir_id_flags,
 637                        (vector unsigned long)
 638                        vec_cmpeq((vector unsigned int)flow_tag,
 639                        (vector unsigned int)pinfo_ft_mask)));
 640        }
 641        /*
 642         * Merge the two fields to generate the following:
 643         * bit[1]     = l3_ok
 644         * bit[2]     = l4_ok
 645         * bit[8]     = cv
 646         * bit[11:10] = l3_hdr_type
 647         * bit[14:12] = l4_hdr_type
 648         * bit[15]    = ip_frag
 649         * bit[16]    = tunneled
 650         * bit[17]    = outer_l3_type
 651         */
 652        ptype = (vector unsigned char)
 653                vec_and((vector unsigned long)ptype,
 654                (vector unsigned long)ptype_mask);
 655        pinfo = (vector unsigned char)
 656                vec_and((vector unsigned long)pinfo,
 657                (vector unsigned long)pinfo_mask);
 658        pinfo = (vector unsigned char)
 659                vec_sl((vector unsigned int)pinfo,
 660                (vector unsigned int){16, 16, 16, 16});
 661
 662        /* Make pinfo has merged fields for ol_flags calculation. */
 663        pinfo = (vector unsigned char)
 664                vec_or((vector unsigned long)ptype,
 665                (vector unsigned long)pinfo);
 666        ptype = (vector unsigned char)
 667                vec_sr((vector unsigned int)pinfo,
 668                (vector unsigned int){10, 10, 10, 10});
 669        ptype = (vector unsigned char)
 670                vec_packs((vector unsigned int)ptype,
 671                (vector unsigned int)zero);
 672
 673        /* Errored packets will have RTE_PTYPE_ALL_MASK. */
 674        op_err = (vector unsigned char)
 675                vec_sr((vector unsigned short)op_err,
 676                (vector unsigned short){8, 8, 8, 8, 8, 8, 8, 8});
 677        ptype = (vector unsigned char)
 678                vec_or((vector unsigned long)ptype,
 679                (vector unsigned long)op_err);
 680
 681        pt_idx0 = (uint8_t)((vector unsigned char)ptype)[0];
 682        pt_idx1 = (uint8_t)((vector unsigned char)ptype)[2];
 683        pt_idx2 = (uint8_t)((vector unsigned char)ptype)[4];
 684        pt_idx3 = (uint8_t)((vector unsigned char)ptype)[6];
 685
 686        pkts[0]->packet_type = mlx5_ptype_table[pt_idx0] |
 687                !!(pt_idx0 & (1 << 6)) * rxq->tunnel;
 688        pkts[1]->packet_type = mlx5_ptype_table[pt_idx1] |
 689                !!(pt_idx1 & (1 << 6)) * rxq->tunnel;
 690        pkts[2]->packet_type = mlx5_ptype_table[pt_idx2] |
 691                !!(pt_idx2 & (1 << 6)) * rxq->tunnel;
 692        pkts[3]->packet_type = mlx5_ptype_table[pt_idx3] |
 693                !!(pt_idx3 & (1 << 6)) * rxq->tunnel;
 694
 695        /* Fill flags for checksum and VLAN. */
 696        pinfo = (vector unsigned char)
 697                vec_and((vector unsigned long)pinfo,
 698                (vector unsigned long)ptype_ol_mask);
 699        pinfo = vec_perm(cv_flag_sel, zero, pinfo);
 700
 701        /* Locate checksum flags at byte[2:1] and merge with VLAN flags. */
 702        cv_flags = (vector unsigned char)
 703                vec_sl((vector unsigned int)pinfo,
 704                (vector unsigned int){9, 9, 9, 9});
 705        cv_flags = (vector unsigned char)
 706                vec_or((vector unsigned long)pinfo,
 707                (vector unsigned long)cv_flags);
 708
 709        /* Move back flags to start from byte[0]. */
 710        cv_flags = (vector unsigned char)
 711                vec_sr((vector unsigned int)cv_flags,
 712                (vector unsigned int){8, 8, 8, 8});
 713
 714        /* Mask out garbage bits. */
 715        cv_flags = (vector unsigned char)
 716                vec_and((vector unsigned long)cv_flags,
 717                (vector unsigned long)cv_mask);
 718
 719        /* Merge to ol_flags. */
 720        ol_flags = (vector unsigned char)
 721                vec_or((vector unsigned long)ol_flags,
 722                (vector unsigned long)cv_flags);
 723
 724        /* Merge mbuf_init and ol_flags. */
 725        rearm0 = (vector unsigned char)
 726                vec_sel((vector unsigned short)mbuf_init,
 727                (vector unsigned short)
 728                vec_slo((vector unsigned short)ol_flags,
 729                (vector unsigned char){64}), rearm_sel_mask);
 730        rearm1 = (vector unsigned char)
 731                vec_sel((vector unsigned short)mbuf_init,
 732                (vector unsigned short)
 733                vec_slo((vector unsigned short)ol_flags,
 734                (vector unsigned char){32}), rearm_sel_mask);
 735        rearm2 = (vector unsigned char)
 736                vec_sel((vector unsigned short)mbuf_init,
 737                (vector unsigned short)ol_flags, rearm_sel_mask);
 738        rearm3 = (vector unsigned char)
 739                vec_sel((vector unsigned short)mbuf_init,
 740                (vector unsigned short)
 741                vec_sro((vector unsigned short)ol_flags,
 742                (vector unsigned char){32}), rearm_sel_mask);
 743
 744        /* Write 8B rearm_data and 8B ol_flags. */
 745        vec_vsx_st(rearm0, 0,
 746                (vector unsigned char *)&pkts[0]->rearm_data);
 747        vec_vsx_st(rearm1, 0,
 748                (vector unsigned char *)&pkts[1]->rearm_data);
 749        vec_vsx_st(rearm2, 0,
 750                (vector unsigned char *)&pkts[2]->rearm_data);
 751        vec_vsx_st(rearm3, 0,
 752                (vector unsigned char *)&pkts[3]->rearm_data);
 753}
 754
 755/**
 756 * Process a non-compressed completion and fill in mbufs in RX SW ring
 757 * with data extracted from the title completion descriptor.
 758 *
 759 * @param rxq
 760 *   Pointer to RX queue structure.
 761 * @param cq
 762 *   Pointer to completion array having a non-compressed completion at first.
 763 * @param elts
 764 *   Pointer to SW ring to be filled. The first mbuf has to be pre-built from
 765 *   the title completion descriptor to be copied to the rest of mbufs.
 766 * @param[out] pkts
 767 *   Array to store received packets.
 768 * @param pkts_n
 769 *   Maximum number of packets in array.
 770 * @param[out] err
 771 *   Pointer to a flag. Set non-zero value if pkts array has at least one error
 772 *   packet to handle.
 773 * @param[out] comp
 774 *   Pointer to a index. Set it to the first compressed completion if any.
 775 *
 776 * @return
 777 *   Number of CQEs successfully processed.
 778 */
 779static inline uint16_t
 780rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
 781                 struct rte_mbuf **elts, struct rte_mbuf **pkts,
 782                 uint16_t pkts_n, uint64_t *err, uint64_t *comp)
 783{
 784        const uint16_t q_n = 1 << rxq->cqe_n;
 785        const uint16_t q_mask = q_n - 1;
 786        unsigned int pos;
 787        uint64_t n = 0;
 788        uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
 789        uint16_t nocmp_n = 0;
 790        unsigned int ownership = !!(rxq->cq_ci & (q_mask + 1));
 791        const vector unsigned char zero = (vector unsigned char){0};
 792        const vector unsigned char ones = vec_splat_u8(-1);
 793        const vector unsigned char owner_check =
 794                (vector unsigned char)(vector unsigned long){
 795                0x0100000001000000LL, 0x0100000001000000LL};
 796        const vector unsigned char opcode_check =
 797                (vector unsigned char)(vector unsigned long){
 798                0xf0000000f0000000LL, 0xf0000000f0000000LL};
 799        const vector unsigned char format_check =
 800                (vector unsigned char)(vector unsigned long){
 801                0x0c0000000c000000LL, 0x0c0000000c000000LL};
 802        const vector unsigned char resp_err_check =
 803                (vector unsigned char)(vector unsigned long){
 804                0xe0000000e0000000LL, 0xe0000000e0000000LL};
 805#ifdef MLX5_PMD_SOFT_COUNTERS
 806        uint32_t rcvd_byte = 0;
 807        /* Mask to shuffle byte_cnt to add up stats. Do bswap16 for all. */
 808        const vector unsigned char len_shuf_mask = (vector unsigned char){
 809                 1,  0,  5,  4,
 810                 9,  8, 13, 12,
 811                -1, -1, -1, -1,
 812                -1, -1, -1, -1};
 813#endif
 814        /* Mask to shuffle from extracted CQE to mbuf. */
 815        const vector unsigned char shuf_mask = (vector unsigned char){
 816                 5,  4,           /* bswap16, pkt_len */
 817                -1, -1,           /* zero out 2nd half of pkt_len */
 818                 5,  4,           /* bswap16, data_len */
 819                11, 10,           /* bswap16, vlan+tci */
 820                15, 14, 13, 12,   /* bswap32, rss */
 821                 1,  2,  3, -1};  /* fdir.hi */
 822        /* Mask to blend from the last Qword to the first DQword. */
 823        /* Mask to blend from the last Qword to the first DQword. */
 824        const vector unsigned char blend_mask = (vector unsigned char){
 825                -1,  0,  0,  0,
 826                 0,  0,  0,  0,
 827                -1, -1, -1, -1,
 828                -1, -1, -1, -1};
 829        const vector unsigned char crc_adj =
 830                (vector unsigned char)(vector unsigned short){
 831                rxq->crc_present * RTE_ETHER_CRC_LEN, 0,
 832                rxq->crc_present * RTE_ETHER_CRC_LEN, 0, 0, 0, 0, 0};
 833        const vector unsigned char flow_mark_adj =
 834                (vector unsigned char)(vector unsigned int){
 835                0, 0, 0, rxq->mark * (-1)};
 836        const vector unsigned short cqe_sel_mask1 =
 837                (vector unsigned short){0, 0, 0, 0, 0xffff, 0xffff, 0, 0};
 838        const vector unsigned short cqe_sel_mask2 =
 839                (vector unsigned short){0, 0, 0xffff, 0, 0, 0, 0, 0};
 840
 841        /*
 842         * A. load first Qword (8bytes) in one loop.
 843         * B. copy 4 mbuf pointers from elts ring to returning pkts.
 844         * C. load remaining CQE data and extract necessary fields.
 845         *    Final 16bytes cqes[] extracted from original 64bytes CQE has the
 846         *    following structure:
 847         *        struct {
 848         *          uint8_t  pkt_info;
 849         *          uint8_t  flow_tag[3];
 850         *          uint16_t byte_cnt;
 851         *          uint8_t  rsvd4;
 852         *          uint8_t  op_own;
 853         *          uint16_t hdr_type_etc;
 854         *          uint16_t vlan_info;
 855         *          uint32_t rx_has_res;
 856         *        } c;
 857         * D. fill in mbuf.
 858         * E. get valid CQEs.
 859         * F. find compressed CQE.
 860         */
 861        for (pos = 0;
 862             pos < pkts_n;
 863             pos += MLX5_VPMD_DESCS_PER_LOOP) {
 864                vector unsigned char cqes[MLX5_VPMD_DESCS_PER_LOOP];
 865                vector unsigned char cqe_tmp1, cqe_tmp2;
 866                vector unsigned char pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;
 867                vector unsigned char op_own, op_own_tmp1, op_own_tmp2;
 868                vector unsigned char opcode, owner_mask, invalid_mask;
 869                vector unsigned char comp_mask;
 870                vector unsigned char mask;
 871#ifdef MLX5_PMD_SOFT_COUNTERS
 872                const vector unsigned char lower_half = {
 873                        0, 1, 4, 5, 8, 9, 12, 13,
 874                        16, 17, 20, 21, 24, 25, 28, 29};
 875                const vector unsigned char upper_half = {
 876                        2, 3, 6, 7, 10, 11, 14, 15,
 877                        18, 19, 22, 23, 26, 27, 30, 31};
 878                const vector unsigned long shmax = {64, 64};
 879                vector unsigned char byte_cnt;
 880                vector unsigned short left, right;
 881                vector unsigned long lshift;
 882                vector __attribute__((altivec(bool__)))
 883                        unsigned long shmask;
 884#endif
 885                vector unsigned char mbp1, mbp2;
 886                vector unsigned char p =
 887                        (vector unsigned char)(vector unsigned short){
 888                                0, 1, 2, 3, 0, 0, 0, 0};
 889                unsigned int p1, p2, p3;
 890
 891                /* Prefetch next 4 CQEs. */
 892                if (pkts_n - pos >= 2 * MLX5_VPMD_DESCS_PER_LOOP) {
 893                        rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP]);
 894                        rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP + 1]);
 895                        rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP + 2]);
 896                        rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP + 3]);
 897                }
 898
 899                /* A.0 do not cross the end of CQ. */
 900                mask = (vector unsigned char)(vector unsigned long){
 901                        (pkts_n - pos) * sizeof(uint16_t) * 8, 0};
 902
 903                {
 904                        vector unsigned long lshift;
 905                        vector __attribute__((altivec(bool__)))
 906                                unsigned long shmask;
 907                        const vector unsigned long shmax = {64, 64};
 908
 909                        lshift = vec_splat((vector unsigned long)mask, 0);
 910                        shmask = vec_cmpgt(shmax, lshift);
 911                        mask = (vector unsigned char)
 912                                vec_sl((vector unsigned long)ones, lshift);
 913                        mask = (vector unsigned char)
 914                                vec_sel((vector unsigned long)shmask,
 915                                (vector unsigned long)mask, shmask);
 916                }
 917
 918                p = (vector unsigned char)
 919                        vec_andc((vector unsigned long)p,
 920                        (vector unsigned long)mask);
 921
 922                /* A.1 load cqes. */
 923                p3 = (unsigned int)((vector unsigned short)p)[3];
 924                cqes[3] = (vector unsigned char)(vector unsigned long){
 925                        *(__rte_aligned(8) unsigned long *)
 926                        &cq[pos + p3].sop_drop_qpn, 0LL};
 927                rte_compiler_barrier();
 928
 929                p2 = (unsigned int)((vector unsigned short)p)[2];
 930                cqes[2] = (vector unsigned char)(vector unsigned long){
 931                        *(__rte_aligned(8) unsigned long *)
 932                        &cq[pos + p2].sop_drop_qpn, 0LL};
 933                rte_compiler_barrier();
 934
 935                /* B.1 load mbuf pointers. */
 936                mbp1 = (vector unsigned char)vec_vsx_ld(0,
 937                        (signed int const *)&elts[pos]);
 938                mbp2 = (vector unsigned char)vec_vsx_ld(0,
 939                        (signed int const *)&elts[pos + 2]);
 940
 941                /* A.1 load a block having op_own. */
 942                p1 = (unsigned int)((vector unsigned short)p)[1];
 943                cqes[1] = (vector unsigned char)(vector unsigned long){
 944                        *(__rte_aligned(8) unsigned long *)
 945                        &cq[pos + p1].sop_drop_qpn, 0LL};
 946                rte_compiler_barrier();
 947
 948                cqes[0] = (vector unsigned char)(vector unsigned long){
 949                        *(__rte_aligned(8) unsigned long *)
 950                        &cq[pos].sop_drop_qpn, 0LL};
 951                rte_compiler_barrier();
 952
 953                /* B.2 copy mbuf pointers. */
 954                *(vector unsigned char *)&pkts[pos] = mbp1;
 955                *(vector unsigned char *)&pkts[pos + 2] = mbp2;
 956                rte_io_rmb();
 957
 958                /* C.1 load remaining CQE data and extract necessary fields. */
 959                cqe_tmp2 = *(vector unsigned char *)
 960                        &cq[pos + p3].pkt_info;
 961                cqe_tmp1 = *(vector unsigned char *)
 962                        &cq[pos + p2].pkt_info;
 963                cqes[3] = vec_sel(cqes[3], cqe_tmp2, blend_mask);
 964                cqes[2] = vec_sel(cqes[2], cqe_tmp1, blend_mask);
 965                cqe_tmp2 = (vector unsigned char)vec_vsx_ld(0,
 966                        (signed int const *)&cq[pos + p3].csum);
 967                cqe_tmp1 = (vector unsigned char)vec_vsx_ld(0,
 968                        (signed int const *)&cq[pos + p2].csum);
 969                cqes[3] = (vector unsigned char)
 970                        vec_sel((vector unsigned short)cqes[3],
 971                        (vector unsigned short)cqe_tmp2, cqe_sel_mask1);
 972                cqes[2] = (vector unsigned char)
 973                        vec_sel((vector unsigned short)cqes[2],
 974                        (vector unsigned short)cqe_tmp1, cqe_sel_mask1);
 975                cqe_tmp2 = (vector unsigned char)(vector unsigned long){
 976                        *(__rte_aligned(8) unsigned long *)
 977                        &cq[pos + p3].rsvd4[2], 0LL};
 978                cqe_tmp1 = (vector unsigned char)(vector unsigned long){
 979                        *(__rte_aligned(8) unsigned long *)
 980                        &cq[pos + p2].rsvd4[2], 0LL};
 981                cqes[3] = (vector unsigned char)
 982                        vec_sel((vector unsigned short)cqes[3],
 983                        (vector unsigned short)cqe_tmp2,
 984                        (vector unsigned short)cqe_sel_mask2);
 985                cqes[2] = (vector unsigned char)
 986                        vec_sel((vector unsigned short)cqes[2],
 987                        (vector unsigned short)cqe_tmp1,
 988                        (vector unsigned short)cqe_sel_mask2);
 989
 990                /* C.2 generate final structure for mbuf with swapping bytes. */
 991                pkt_mb3 = vec_perm(cqes[3], zero, shuf_mask);
 992                pkt_mb2 = vec_perm(cqes[2], zero, shuf_mask);
 993
 994                /* C.3 adjust CRC length. */
 995                pkt_mb3 = (vector unsigned char)
 996                        ((vector unsigned short)pkt_mb3 -
 997                        (vector unsigned short)crc_adj);
 998                pkt_mb2 = (vector unsigned char)
 999                        ((vector unsigned short)pkt_mb2 -
1000                        (vector unsigned short)crc_adj);
1001
1002                /* C.4 adjust flow mark. */
1003                pkt_mb3 = (vector unsigned char)
1004                        ((vector unsigned int)pkt_mb3 +
1005                        (vector unsigned int)flow_mark_adj);
1006                pkt_mb2 = (vector unsigned char)
1007                        ((vector unsigned int)pkt_mb2 +
1008                        (vector unsigned int)flow_mark_adj);
1009
1010                /* D.1 fill in mbuf - rx_descriptor_fields1. */
1011                *(vector unsigned char *)
1012                        &pkts[pos + 3]->pkt_len = pkt_mb3;
1013                *(vector unsigned char *)
1014                        &pkts[pos + 2]->pkt_len = pkt_mb2;
1015
1016                /* E.1 extract op_own field. */
1017                op_own_tmp2 = (vector unsigned char)
1018                        vec_mergeh((vector unsigned int)cqes[2],
1019                        (vector unsigned int)cqes[3]);
1020
1021                /* C.1 load remaining CQE data and extract necessary fields. */
1022                cqe_tmp2 = *(vector unsigned char *)
1023                        &cq[pos + p1].pkt_info;
1024                cqe_tmp1 = *(vector unsigned char *)
1025                        &cq[pos].pkt_info;
1026                cqes[1] = vec_sel(cqes[1], cqe_tmp2, blend_mask);
1027                cqes[0] = vec_sel(cqes[0], cqe_tmp2, blend_mask);
1028                cqe_tmp2 = (vector unsigned char)vec_vsx_ld(0,
1029                        (signed int const *)&cq[pos + p1].csum);
1030                cqe_tmp1 = (vector unsigned char)vec_vsx_ld(0,
1031                        (signed int const *)&cq[pos].csum);
1032                cqes[1] = (vector unsigned char)
1033                        vec_sel((vector unsigned short)cqes[1],
1034                        (vector unsigned short)cqe_tmp2, cqe_sel_mask1);
1035                cqes[0] = (vector unsigned char)
1036                        vec_sel((vector unsigned short)cqes[0],
1037                        (vector unsigned short)cqe_tmp1, cqe_sel_mask1);
1038                cqe_tmp2 = (vector unsigned char)(vector unsigned long){
1039                        *(__rte_aligned(8) unsigned long *)
1040                        &cq[pos + p1].rsvd4[2], 0LL};
1041                cqe_tmp1 = (vector unsigned char)(vector unsigned long){
1042                        *(__rte_aligned(8) unsigned long *)
1043                        &cq[pos].rsvd4[2], 0LL};
1044                cqes[1] = (vector unsigned char)
1045                        vec_sel((vector unsigned short)cqes[1],
1046                        (vector unsigned short)cqe_tmp2, cqe_sel_mask2);
1047                cqes[0] = (vector unsigned char)
1048                        vec_sel((vector unsigned short)cqes[0],
1049                        (vector unsigned short)cqe_tmp1, cqe_sel_mask2);
1050
1051                /* C.2 generate final structure for mbuf with swapping bytes. */
1052                pkt_mb1 = vec_perm(cqes[1], zero, shuf_mask);
1053                pkt_mb0 = vec_perm(cqes[0], zero, shuf_mask);
1054
1055                /* C.3 adjust CRC length. */
1056                pkt_mb1 = (vector unsigned char)
1057                        ((vector unsigned short)pkt_mb1 -
1058                        (vector unsigned short)crc_adj);
1059                pkt_mb0 = (vector unsigned char)
1060                        ((vector unsigned short)pkt_mb0 -
1061                        (vector unsigned short)crc_adj);
1062
1063                /* C.4 adjust flow mark. */
1064                pkt_mb1 = (vector unsigned char)
1065                        ((vector unsigned int)pkt_mb1 +
1066                        (vector unsigned int)flow_mark_adj);
1067                pkt_mb0 = (vector unsigned char)
1068                        ((vector unsigned int)pkt_mb0 +
1069                        (vector unsigned int)flow_mark_adj);
1070
1071                /* E.1 extract op_own byte. */
1072                op_own_tmp1 = (vector unsigned char)
1073                        vec_mergeh((vector unsigned int)cqes[0],
1074                        (vector unsigned int)cqes[1]);
1075                op_own = (vector unsigned char)
1076                        vec_mergel((vector unsigned long)op_own_tmp1,
1077                        (vector unsigned long)op_own_tmp2);
1078
1079                /* D.1 fill in mbuf - rx_descriptor_fields1. */
1080                *(vector unsigned char *)
1081                        &pkts[pos + 1]->pkt_len = pkt_mb1;
1082                *(vector unsigned char *)
1083                        &pkts[pos]->pkt_len = pkt_mb0;
1084
1085                /* E.2 flip owner bit to mark CQEs from last round. */
1086                owner_mask = (vector unsigned char)
1087                        vec_and((vector unsigned long)op_own,
1088                        (vector unsigned long)owner_check);
1089                if (ownership)
1090                        owner_mask = (vector unsigned char)
1091                                vec_xor((vector unsigned long)owner_mask,
1092                                (vector unsigned long)owner_check);
1093                owner_mask = (vector unsigned char)
1094                        vec_cmpeq((vector unsigned int)owner_mask,
1095                        (vector unsigned int)owner_check);
1096                owner_mask = (vector unsigned char)
1097                        vec_packs((vector unsigned int)owner_mask,
1098                        (vector unsigned int)zero);
1099
1100                /* E.3 get mask for invalidated CQEs. */
1101                opcode = (vector unsigned char)
1102                        vec_and((vector unsigned long)op_own,
1103                        (vector unsigned long)opcode_check);
1104                invalid_mask = (vector unsigned char)
1105                        vec_cmpeq((vector unsigned int)opcode_check,
1106                        (vector unsigned int)opcode);
1107                invalid_mask = (vector unsigned char)
1108                        vec_packs((vector unsigned int)invalid_mask,
1109                        (vector unsigned int)zero);
1110
1111                /* E.4 mask out beyond boundary. */
1112                invalid_mask = (vector unsigned char)
1113                        vec_or((vector unsigned long)invalid_mask,
1114                        (vector unsigned long)mask);
1115
1116                /* E.5 merge invalid_mask with invalid owner. */
1117                invalid_mask = (vector unsigned char)
1118                        vec_or((vector unsigned long)invalid_mask,
1119                        (vector unsigned long)owner_mask);
1120
1121                /* F.1 find compressed CQE format. */
1122                comp_mask = (vector unsigned char)
1123                        vec_and((vector unsigned long)op_own,
1124                        (vector unsigned long)format_check);
1125                comp_mask = (vector unsigned char)
1126                        vec_cmpeq((vector unsigned int)comp_mask,
1127                        (vector unsigned int)format_check);
1128                comp_mask = (vector unsigned char)
1129                        vec_packs((vector unsigned int)comp_mask,
1130                        (vector unsigned int)zero);
1131
1132                /* F.2 mask out invalid entries. */
1133                comp_mask = (vector unsigned char)
1134                        vec_andc((vector unsigned long)comp_mask,
1135                        (vector unsigned long)invalid_mask);
1136                comp_idx = ((vector unsigned long)comp_mask)[0];
1137
1138                /* F.3 get the first compressed CQE. */
1139                comp_idx = comp_idx ? __builtin_ctzll(comp_idx) /
1140                        (sizeof(uint16_t) * 8) : MLX5_VPMD_DESCS_PER_LOOP;
1141
1142                /* E.6 mask out entries after the compressed CQE. */
1143                mask = (vector unsigned char)(vector unsigned long){
1144                        (comp_idx * sizeof(uint16_t) * 8), 0};
1145                lshift = vec_splat((vector unsigned long)mask, 0);
1146                shmask = vec_cmpgt(shmax, lshift);
1147                mask = (vector unsigned char)
1148                        vec_sl((vector unsigned long)ones, lshift);
1149                mask = (vector unsigned char)
1150                        vec_sel((vector unsigned long)shmask,
1151                        (vector unsigned long)mask, shmask);
1152                invalid_mask = (vector unsigned char)
1153                        vec_or((vector unsigned long)invalid_mask,
1154                        (vector unsigned long)mask);
1155
1156                /* E.7 count non-compressed valid CQEs. */
1157                n = ((vector unsigned long)invalid_mask)[0];
1158                n = n ? __builtin_ctzll(n) / (sizeof(uint16_t) * 8) :
1159                        MLX5_VPMD_DESCS_PER_LOOP;
1160                nocmp_n += n;
1161
1162                /* D.2 get the final invalid mask. */
1163                mask = (vector unsigned char)(vector unsigned long){
1164                        (n * sizeof(uint16_t) * 8), 0};
1165                lshift = vec_splat((vector unsigned long)mask, 0);
1166                shmask = vec_cmpgt(shmax, lshift);
1167                mask = (vector unsigned char)
1168                        vec_sl((vector unsigned long)ones, lshift);
1169                mask = (vector unsigned char)
1170                        vec_sel((vector unsigned long)shmask,
1171                        (vector unsigned long)mask, shmask);
1172                invalid_mask = (vector unsigned char)
1173                        vec_or((vector unsigned long)invalid_mask,
1174                        (vector unsigned long)mask);
1175
1176                /* D.3 check error in opcode. */
1177                opcode = (vector unsigned char)
1178                        vec_cmpeq((vector unsigned int)resp_err_check,
1179                        (vector unsigned int)opcode);
1180                opcode = (vector unsigned char)
1181                        vec_packs((vector unsigned int)opcode,
1182                        (vector unsigned int)zero);
1183                opcode = (vector unsigned char)
1184                        vec_andc((vector unsigned long)opcode,
1185                        (vector unsigned long)invalid_mask);
1186
1187                /* D.4 mark if any error is set */
1188                *err |= ((vector unsigned long)opcode)[0];
1189
1190                /* D.5 fill in mbuf - rearm_data and packet_type. */
1191                rxq_cq_to_ptype_oflags_v(rxq, cqes, opcode, &pkts[pos]);
1192                if (unlikely(rxq->shared)) {
1193                        pkts[pos]->port = cq[pos].user_index_low;
1194                        pkts[pos + p1]->port = cq[pos + p1].user_index_low;
1195                        pkts[pos + p2]->port = cq[pos + p2].user_index_low;
1196                        pkts[pos + p3]->port = cq[pos + p3].user_index_low;
1197                }
1198                if (rxq->hw_timestamp) {
1199                        int offset = rxq->timestamp_offset;
1200                        if (rxq->rt_timestamp) {
1201                                struct mlx5_dev_ctx_shared *sh = rxq->sh;
1202                                uint64_t ts;
1203
1204                                ts = rte_be_to_cpu_64(cq[pos].timestamp);
1205                                mlx5_timestamp_set(pkts[pos], offset,
1206                                        mlx5_txpp_convert_rx_ts(sh, ts));
1207                                ts = rte_be_to_cpu_64(cq[pos + p1].timestamp);
1208                                mlx5_timestamp_set(pkts[pos + 1], offset,
1209                                        mlx5_txpp_convert_rx_ts(sh, ts));
1210                                ts = rte_be_to_cpu_64(cq[pos + p2].timestamp);
1211                                mlx5_timestamp_set(pkts[pos + 2], offset,
1212                                        mlx5_txpp_convert_rx_ts(sh, ts));
1213                                ts = rte_be_to_cpu_64(cq[pos + p3].timestamp);
1214                                mlx5_timestamp_set(pkts[pos + 3], offset,
1215                                        mlx5_txpp_convert_rx_ts(sh, ts));
1216                        } else {
1217                                mlx5_timestamp_set(pkts[pos], offset,
1218                                        rte_be_to_cpu_64(cq[pos].timestamp));
1219                                mlx5_timestamp_set(pkts[pos + 1], offset,
1220                                        rte_be_to_cpu_64(cq[pos + p1].timestamp));
1221                                mlx5_timestamp_set(pkts[pos + 2], offset,
1222                                        rte_be_to_cpu_64(cq[pos + p2].timestamp));
1223                                mlx5_timestamp_set(pkts[pos + 3], offset,
1224                                        rte_be_to_cpu_64(cq[pos + p3].timestamp));
1225                        }
1226                }
1227                if (rxq->dynf_meta) {
1228                        uint64_t flag = rxq->flow_meta_mask;
1229                        int32_t offs = rxq->flow_meta_offset;
1230                        uint32_t mask = rxq->flow_meta_port_mask;
1231                        uint32_t metadata;
1232
1233                        /* This code is subject for futher optimization. */
1234                        metadata = rte_be_to_cpu_32
1235                                (cq[pos].flow_table_metadata) & mask;
1236                        *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
1237                                                                metadata;
1238                        pkts[pos]->ol_flags |= metadata ? flag : 0ULL;
1239                        metadata = rte_be_to_cpu_32
1240                                (cq[pos + 1].flow_table_metadata) & mask;
1241                        *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) =
1242                                                                metadata;
1243                        pkts[pos + 1]->ol_flags |= metadata ? flag : 0ULL;
1244                        metadata = rte_be_to_cpu_32
1245                                (cq[pos + 2].flow_table_metadata) &     mask;
1246                        *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) =
1247                                                                metadata;
1248                        pkts[pos + 2]->ol_flags |= metadata ? flag : 0ULL;
1249                        metadata = rte_be_to_cpu_32
1250                                (cq[pos + 3].flow_table_metadata) &     mask;
1251                        *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) =
1252                                                                metadata;
1253                        pkts[pos + 3]->ol_flags |= metadata ? flag : 0ULL;
1254                }
1255#ifdef MLX5_PMD_SOFT_COUNTERS
1256                /* Add up received bytes count. */
1257                byte_cnt = vec_perm(op_own, zero, len_shuf_mask);
1258                byte_cnt = (vector unsigned char)
1259                        vec_andc((vector unsigned long)byte_cnt,
1260                        (vector unsigned long)invalid_mask);
1261                left = vec_perm((vector unsigned short)byte_cnt,
1262                        (vector unsigned short)zero, lower_half);
1263                right = vec_perm((vector unsigned short)byte_cnt,
1264                        (vector unsigned short)zero, upper_half);
1265                byte_cnt = (vector unsigned char)vec_add(left, right);
1266                left = vec_perm((vector unsigned short)byte_cnt,
1267                        (vector unsigned short)zero, lower_half);
1268                right = vec_perm((vector unsigned short)byte_cnt,
1269                        (vector unsigned short)zero, upper_half);
1270                byte_cnt = (vector unsigned char)vec_add(left, right);
1271                rcvd_byte += ((vector unsigned long)byte_cnt)[0];
1272#endif
1273
1274                /*
1275                 * Break the loop unless more valid CQE is expected, or if
1276                 * there's a compressed CQE.
1277                 */
1278                if (n != MLX5_VPMD_DESCS_PER_LOOP)
1279                        break;
1280        }
1281#ifdef MLX5_PMD_SOFT_COUNTERS
1282        rxq->stats.ipackets += nocmp_n;
1283        rxq->stats.ibytes += rcvd_byte;
1284#endif
1285        if (comp_idx == n)
1286                *comp = comp_idx;
1287        return nocmp_n;
1288}
1289
1290#endif /* RTE_PMD_MLX5_RXTX_VEC_ALTIVEC_H_ */
1291