linux/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <linux/ip.h>
  34#include <linux/ipv6.h>
  35#include <linux/tcp.h>
  36#include <net/busy_poll.h>
  37#include "en.h"
  38#include "en_tc.h"
  39#include "eswitch.h"
  40
  41static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
  42{
  43        return tstamp->hwtstamp_config.rx_filter == HWTSTAMP_FILTER_ALL;
  44}
  45
  46static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc,
  47                                       void *data)
  48{
  49        u32 ci = cqcc & cq->wq.sz_m1;
  50
  51        memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64));
  52}
  53
  54static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq,
  55                                         struct mlx5e_cq *cq, u32 cqcc)
  56{
  57        mlx5e_read_cqe_slot(cq, cqcc, &cq->title);
  58        cq->decmprs_left        = be32_to_cpu(cq->title.byte_cnt);
  59        cq->decmprs_wqe_counter = be16_to_cpu(cq->title.wqe_counter);
  60        rq->stats.cqe_compress_blks++;
  61}
  62
  63static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc)
  64{
  65        mlx5e_read_cqe_slot(cq, cqcc, cq->mini_arr);
  66        cq->mini_arr_idx = 0;
  67}
  68
  69static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n)
  70{
  71        u8 op_own = (cqcc >> cq->wq.log_sz) & 1;
  72        u32 wq_sz = 1 << cq->wq.log_sz;
  73        u32 ci = cqcc & cq->wq.sz_m1;
  74        u32 ci_top = min_t(u32, wq_sz, ci + n);
  75
  76        for (; ci < ci_top; ci++, n--) {
  77                struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci);
  78
  79                cqe->op_own = op_own;
  80        }
  81
  82        if (unlikely(ci == wq_sz)) {
  83                op_own = !op_own;
  84                for (ci = 0; ci < n; ci++) {
  85                        struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci);
  86
  87                        cqe->op_own = op_own;
  88                }
  89        }
  90}
  91
  92static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq,
  93                                        struct mlx5e_cq *cq, u32 cqcc)
  94{
  95        u16 wqe_cnt_step;
  96
  97        cq->title.byte_cnt     = cq->mini_arr[cq->mini_arr_idx].byte_cnt;
  98        cq->title.check_sum    = cq->mini_arr[cq->mini_arr_idx].checksum;
  99        cq->title.op_own      &= 0xf0;
 100        cq->title.op_own      |= 0x01 & (cqcc >> cq->wq.log_sz);
 101        cq->title.wqe_counter  = cpu_to_be16(cq->decmprs_wqe_counter);
 102
 103        wqe_cnt_step =
 104                rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
 105                mpwrq_get_cqe_consumed_strides(&cq->title) : 1;
 106        cq->decmprs_wqe_counter =
 107                (cq->decmprs_wqe_counter + wqe_cnt_step) & rq->wq.sz_m1;
 108}
 109
 110static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq,
 111                                                struct mlx5e_cq *cq, u32 cqcc)
 112{
 113        mlx5e_decompress_cqe(rq, cq, cqcc);
 114        cq->title.rss_hash_type   = 0;
 115        cq->title.rss_hash_result = 0;
 116}
 117
 118static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
 119                                             struct mlx5e_cq *cq,
 120                                             int update_owner_only,
 121                                             int budget_rem)
 122{
 123        u32 cqcc = cq->wq.cc + update_owner_only;
 124        u32 cqe_count;
 125        u32 i;
 126
 127        cqe_count = min_t(u32, cq->decmprs_left, budget_rem);
 128
 129        for (i = update_owner_only; i < cqe_count;
 130             i++, cq->mini_arr_idx++, cqcc++) {
 131                if (cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE)
 132                        mlx5e_read_mini_arr_slot(cq, cqcc);
 133
 134                mlx5e_decompress_cqe_no_hash(rq, cq, cqcc);
 135                rq->handle_rx_cqe(rq, &cq->title);
 136        }
 137        mlx5e_cqes_update_owner(cq, cq->wq.cc, cqcc - cq->wq.cc);
 138        cq->wq.cc = cqcc;
 139        cq->decmprs_left -= cqe_count;
 140        rq->stats.cqe_compress_pkts += cqe_count;
 141
 142        return cqe_count;
 143}
 144
 145static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
 146                                              struct mlx5e_cq *cq,
 147                                              int budget_rem)
 148{
 149        mlx5e_read_title_slot(rq, cq, cq->wq.cc);
 150        mlx5e_read_mini_arr_slot(cq, cq->wq.cc + 1);
 151        mlx5e_decompress_cqe(rq, cq, cq->wq.cc);
 152        rq->handle_rx_cqe(rq, &cq->title);
 153        cq->mini_arr_idx++;
 154
 155        return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1;
 156}
 157
 158void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val)
 159{
 160        bool was_opened;
 161
 162        if (!MLX5_CAP_GEN(priv->mdev, cqe_compression))
 163                return;
 164
 165        mutex_lock(&priv->state_lock);
 166
 167        if (priv->params.rx_cqe_compress == val)
 168                goto unlock;
 169
 170        was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
 171        if (was_opened)
 172                mlx5e_close_locked(priv->netdev);
 173
 174        priv->params.rx_cqe_compress = val;
 175
 176        if (was_opened)
 177                mlx5e_open_locked(priv->netdev);
 178
 179unlock:
 180        mutex_unlock(&priv->state_lock);
 181}
 182
 183#define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT)
 184
 185static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
 186                                      struct mlx5e_dma_info *dma_info)
 187{
 188        struct mlx5e_page_cache *cache = &rq->page_cache;
 189        u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
 190
 191        if (tail_next == cache->head) {
 192                rq->stats.cache_full++;
 193                return false;
 194        }
 195
 196        cache->page_cache[cache->tail] = *dma_info;
 197        cache->tail = tail_next;
 198        return true;
 199}
 200
 201static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
 202                                      struct mlx5e_dma_info *dma_info)
 203{
 204        struct mlx5e_page_cache *cache = &rq->page_cache;
 205
 206        if (unlikely(cache->head == cache->tail)) {
 207                rq->stats.cache_empty++;
 208                return false;
 209        }
 210
 211        if (page_ref_count(cache->page_cache[cache->head].page) != 1) {
 212                rq->stats.cache_busy++;
 213                return false;
 214        }
 215
 216        *dma_info = cache->page_cache[cache->head];
 217        cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
 218        rq->stats.cache_reuse++;
 219
 220        dma_sync_single_for_device(rq->pdev, dma_info->addr,
 221                                   RQ_PAGE_SIZE(rq),
 222                                   DMA_FROM_DEVICE);
 223        return true;
 224}
 225
 226static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
 227                                          struct mlx5e_dma_info *dma_info)
 228{
 229        struct page *page;
 230
 231        if (mlx5e_rx_cache_get(rq, dma_info))
 232                return 0;
 233
 234        page = dev_alloc_pages(rq->buff.page_order);
 235        if (unlikely(!page))
 236                return -ENOMEM;
 237
 238        dma_info->page = page;
 239        dma_info->addr = dma_map_page(rq->pdev, page, 0,
 240                                      RQ_PAGE_SIZE(rq), rq->buff.map_dir);
 241        if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
 242                put_page(page);
 243                return -ENOMEM;
 244        }
 245
 246        return 0;
 247}
 248
 249void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
 250                        bool recycle)
 251{
 252        if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info))
 253                return;
 254
 255        dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq),
 256                       rq->buff.map_dir);
 257        put_page(dma_info->page);
 258}
 259
 260int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
 261{
 262        struct mlx5e_dma_info *di = &rq->dma_info[ix];
 263
 264        if (unlikely(mlx5e_page_alloc_mapped(rq, di)))
 265                return -ENOMEM;
 266
 267        wqe->data.addr = cpu_to_be64(di->addr + MLX5_RX_HEADROOM);
 268        return 0;
 269}
 270
 271void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
 272{
 273        struct mlx5e_dma_info *di = &rq->dma_info[ix];
 274
 275        mlx5e_page_release(rq, di, true);
 276}
 277
 278static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
 279{
 280        return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER;
 281}
 282
 283static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq,
 284                                            struct sk_buff *skb,
 285                                            struct mlx5e_mpw_info *wi,
 286                                            u32 page_idx, u32 frag_offset,
 287                                            u32 len)
 288{
 289        unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz);
 290
 291        dma_sync_single_for_cpu(rq->pdev,
 292                                wi->umr.dma_info[page_idx].addr + frag_offset,
 293                                len, DMA_FROM_DEVICE);
 294        wi->skbs_frags[page_idx]++;
 295        skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
 296                        wi->umr.dma_info[page_idx].page, frag_offset,
 297                        len, truesize);
 298}
 299
 300static inline void
 301mlx5e_copy_skb_header_mpwqe(struct device *pdev,
 302                            struct sk_buff *skb,
 303                            struct mlx5e_mpw_info *wi,
 304                            u32 page_idx, u32 offset,
 305                            u32 headlen)
 306{
 307        u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset);
 308        struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[page_idx];
 309        unsigned int len;
 310
 311         /* Aligning len to sizeof(long) optimizes memcpy performance */
 312        len = ALIGN(headlen_pg, sizeof(long));
 313        dma_sync_single_for_cpu(pdev, dma_info->addr + offset, len,
 314                                DMA_FROM_DEVICE);
 315        skb_copy_to_linear_data_offset(skb, 0,
 316                                       page_address(dma_info->page) + offset,
 317                                       len);
 318        if (unlikely(offset + headlen > PAGE_SIZE)) {
 319                dma_info++;
 320                headlen_pg = len;
 321                len = ALIGN(headlen - headlen_pg, sizeof(long));
 322                dma_sync_single_for_cpu(pdev, dma_info->addr, len,
 323                                        DMA_FROM_DEVICE);
 324                skb_copy_to_linear_data_offset(skb, headlen_pg,
 325                                               page_address(dma_info->page),
 326                                               len);
 327        }
 328}
 329
 330static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
 331{
 332        struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
 333        struct mlx5e_sq *sq = &rq->channel->icosq;
 334        struct mlx5_wq_cyc *wq = &sq->wq;
 335        struct mlx5e_umr_wqe *wqe;
 336        u8 num_wqebbs = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_BB);
 337        u16 pi;
 338
 339        /* fill sq edge with nops to avoid wqe wrap around */
 340        while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
 341                sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
 342                sq->db.ico_wqe[pi].num_wqebbs = 1;
 343                mlx5e_send_nop(sq, false);
 344        }
 345
 346        wqe = mlx5_wq_cyc_get_wqe(wq, pi);
 347        memcpy(wqe, &wi->umr.wqe, sizeof(*wqe));
 348        wqe->ctrl.opmod_idx_opcode =
 349                cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
 350                            MLX5_OPCODE_UMR);
 351
 352        sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
 353        sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs;
 354        sq->pc += num_wqebbs;
 355        mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0);
 356}
 357
 358static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq,
 359                                    struct mlx5e_rx_wqe *wqe,
 360                                    u16 ix)
 361{
 362        struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
 363        u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT;
 364        int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
 365        int err;
 366        int i;
 367
 368        for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
 369                struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i];
 370
 371                err = mlx5e_page_alloc_mapped(rq, dma_info);
 372                if (unlikely(err))
 373                        goto err_unmap;
 374                wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
 375                page_ref_add(dma_info->page, pg_strides);
 376                wi->skbs_frags[i] = 0;
 377        }
 378
 379        wi->consumed_strides = 0;
 380        wqe->data.addr = cpu_to_be64(dma_offset);
 381
 382        return 0;
 383
 384err_unmap:
 385        while (--i >= 0) {
 386                struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i];
 387
 388                page_ref_sub(dma_info->page, pg_strides);
 389                mlx5e_page_release(rq, dma_info, true);
 390        }
 391
 392        return err;
 393}
 394
 395void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi)
 396{
 397        int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
 398        int i;
 399
 400        for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
 401                struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i];
 402
 403                page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]);
 404                mlx5e_page_release(rq, dma_info, true);
 405        }
 406}
 407
 408void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
 409{
 410        struct mlx5_wq_ll *wq = &rq->wq;
 411        struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
 412
 413        clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state);
 414
 415        if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) {
 416                mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]);
 417                return;
 418        }
 419
 420        mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index));
 421
 422        /* ensure wqes are visible to device before updating doorbell record */
 423        dma_wmb();
 424
 425        mlx5_wq_ll_update_db_record(wq);
 426}
 427
 428int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
 429{
 430        int err;
 431
 432        err = mlx5e_alloc_rx_umr_mpwqe(rq, wqe, ix);
 433        if (unlikely(err))
 434                return err;
 435        set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state);
 436        mlx5e_post_umr_wqe(rq, ix);
 437        return -EBUSY;
 438}
 439
 440void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 441{
 442        struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
 443
 444        mlx5e_free_rx_mpwqe(rq, wi);
 445}
 446
 447#define RQ_CANNOT_POST(rq) \
 448        (!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state) || \
 449         test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
 450
 451bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
 452{
 453        struct mlx5_wq_ll *wq = &rq->wq;
 454
 455        if (unlikely(RQ_CANNOT_POST(rq)))
 456                return false;
 457
 458        while (!mlx5_wq_ll_is_full(wq)) {
 459                struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
 460                int err;
 461
 462                err = rq->alloc_wqe(rq, wqe, wq->head);
 463                if (err == -EBUSY)
 464                        return true;
 465                if (unlikely(err)) {
 466                        rq->stats.buff_alloc_err++;
 467                        break;
 468                }
 469
 470                mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index));
 471        }
 472
 473        /* ensure wqes are visible to device before updating doorbell record */
 474        dma_wmb();
 475
 476        mlx5_wq_ll_update_db_record(wq);
 477
 478        return !mlx5_wq_ll_is_full(wq);
 479}
 480
 481static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
 482                                 u32 cqe_bcnt)
 483{
 484        struct ethhdr   *eth = (struct ethhdr *)(skb->data);
 485        struct iphdr    *ipv4;
 486        struct ipv6hdr  *ipv6;
 487        struct tcphdr   *tcp;
 488        int network_depth = 0;
 489        __be16 proto;
 490        u16 tot_len;
 491
 492        u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
 493        int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA  == l4_hdr_type) ||
 494                       (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type));
 495
 496        skb->mac_len = ETH_HLEN;
 497        proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth);
 498
 499        ipv4 = (struct iphdr *)(skb->data + network_depth);
 500        ipv6 = (struct ipv6hdr *)(skb->data + network_depth);
 501        tot_len = cqe_bcnt - network_depth;
 502
 503        if (proto == htons(ETH_P_IP)) {
 504                tcp = (struct tcphdr *)(skb->data + network_depth +
 505                                        sizeof(struct iphdr));
 506                ipv6 = NULL;
 507                skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 508        } else {
 509                tcp = (struct tcphdr *)(skb->data + network_depth +
 510                                        sizeof(struct ipv6hdr));
 511                ipv4 = NULL;
 512                skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
 513        }
 514
 515        if (get_cqe_lro_tcppsh(cqe))
 516                tcp->psh                = 1;
 517
 518        if (tcp_ack) {
 519                tcp->ack                = 1;
 520                tcp->ack_seq            = cqe->lro_ack_seq_num;
 521                tcp->window             = cqe->lro_tcp_win;
 522        }
 523
 524        if (ipv4) {
 525                ipv4->ttl               = cqe->lro_min_ttl;
 526                ipv4->tot_len           = cpu_to_be16(tot_len);
 527                ipv4->check             = 0;
 528                ipv4->check             = ip_fast_csum((unsigned char *)ipv4,
 529                                                       ipv4->ihl);
 530        } else {
 531                ipv6->hop_limit         = cqe->lro_min_ttl;
 532                ipv6->payload_len       = cpu_to_be16(tot_len -
 533                                                      sizeof(struct ipv6hdr));
 534        }
 535}
 536
 537static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe,
 538                                      struct sk_buff *skb)
 539{
 540        u8 cht = cqe->rss_hash_type;
 541        int ht = (cht & CQE_RSS_HTYPE_L4) ? PKT_HASH_TYPE_L4 :
 542                 (cht & CQE_RSS_HTYPE_IP) ? PKT_HASH_TYPE_L3 :
 543                                            PKT_HASH_TYPE_NONE;
 544        skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht);
 545}
 546
 547static inline bool is_first_ethertype_ip(struct sk_buff *skb)
 548{
 549        __be16 ethertype = ((struct ethhdr *)skb->data)->h_proto;
 550
 551        return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6));
 552}
 553
 554static inline void mlx5e_handle_csum(struct net_device *netdev,
 555                                     struct mlx5_cqe64 *cqe,
 556                                     struct mlx5e_rq *rq,
 557                                     struct sk_buff *skb,
 558                                     bool   lro)
 559{
 560        if (unlikely(!(netdev->features & NETIF_F_RXCSUM)))
 561                goto csum_none;
 562
 563        if (lro) {
 564                skb->ip_summed = CHECKSUM_UNNECESSARY;
 565                return;
 566        }
 567
 568        if (is_first_ethertype_ip(skb)) {
 569                skb->ip_summed = CHECKSUM_COMPLETE;
 570                skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
 571                rq->stats.csum_complete++;
 572                return;
 573        }
 574
 575        if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
 576                   (cqe->hds_ip_ext & CQE_L4_OK))) {
 577                skb->ip_summed = CHECKSUM_UNNECESSARY;
 578                if (cqe_is_tunneled(cqe)) {
 579                        skb->csum_level = 1;
 580                        skb->encapsulation = 1;
 581                        rq->stats.csum_unnecessary_inner++;
 582                }
 583                return;
 584        }
 585csum_none:
 586        skb->ip_summed = CHECKSUM_NONE;
 587        rq->stats.csum_none++;
 588}
 589
 590static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
 591                                      u32 cqe_bcnt,
 592                                      struct mlx5e_rq *rq,
 593                                      struct sk_buff *skb)
 594{
 595        struct net_device *netdev = rq->netdev;
 596        struct mlx5e_tstamp *tstamp = rq->tstamp;
 597        int lro_num_seg;
 598
 599        lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
 600        if (lro_num_seg > 1) {
 601                mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt);
 602                skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg);
 603                rq->stats.lro_packets++;
 604                rq->stats.lro_bytes += cqe_bcnt;
 605        }
 606
 607        if (unlikely(mlx5e_rx_hw_stamp(tstamp)))
 608                mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb));
 609
 610        skb_record_rx_queue(skb, rq->ix);
 611
 612        if (likely(netdev->features & NETIF_F_RXHASH))
 613                mlx5e_skb_set_hash(cqe, skb);
 614
 615        if (cqe_has_vlan(cqe))
 616                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
 617                                       be16_to_cpu(cqe->vlan_info));
 618
 619        skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
 620
 621        mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg);
 622        skb->protocol = eth_type_trans(skb, netdev);
 623}
 624
 625static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq,
 626                                         struct mlx5_cqe64 *cqe,
 627                                         u32 cqe_bcnt,
 628                                         struct sk_buff *skb)
 629{
 630        rq->stats.packets++;
 631        rq->stats.bytes += cqe_bcnt;
 632        mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
 633}
 634
 635static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq)
 636{
 637        struct mlx5_wq_cyc *wq = &sq->wq;
 638        struct mlx5e_tx_wqe *wqe;
 639        u16 pi = (sq->pc - MLX5E_XDP_TX_WQEBBS) & wq->sz_m1; /* last pi */
 640
 641        wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
 642
 643        wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
 644        mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0);
 645}
 646
 647static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
 648                                        struct mlx5e_dma_info *di,
 649                                        unsigned int data_offset,
 650                                        int len)
 651{
 652        struct mlx5e_sq          *sq   = &rq->channel->xdp_sq;
 653        struct mlx5_wq_cyc       *wq   = &sq->wq;
 654        u16                      pi    = sq->pc & wq->sz_m1;
 655        struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
 656        struct mlx5e_sq_wqe_info *wi   = &sq->db.xdp.wqe_info[pi];
 657
 658        struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
 659        struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
 660        struct mlx5_wqe_data_seg *dseg;
 661
 662        dma_addr_t dma_addr  = di->addr + data_offset + MLX5E_XDP_MIN_INLINE;
 663        unsigned int dma_len = len - MLX5E_XDP_MIN_INLINE;
 664        void *data           = page_address(di->page) + data_offset;
 665
 666        if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) {
 667                if (sq->db.xdp.doorbell) {
 668                        /* SQ is full, ring doorbell */
 669                        mlx5e_xmit_xdp_doorbell(sq);
 670                        sq->db.xdp.doorbell = false;
 671                }
 672                rq->stats.xdp_tx_full++;
 673                mlx5e_page_release(rq, di, true);
 674                return;
 675        }
 676
 677        dma_sync_single_for_device(sq->pdev, dma_addr, dma_len,
 678                                   PCI_DMA_TODEVICE);
 679
 680        memset(wqe, 0, sizeof(*wqe));
 681
 682        /* copy the inline part */
 683        memcpy(eseg->inline_hdr_start, data, MLX5E_XDP_MIN_INLINE);
 684        eseg->inline_hdr_sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
 685
 686        dseg = (struct mlx5_wqe_data_seg *)cseg + (MLX5E_XDP_TX_DS_COUNT - 1);
 687
 688        /* write the dma part */
 689        dseg->addr       = cpu_to_be64(dma_addr);
 690        dseg->byte_count = cpu_to_be32(dma_len);
 691        dseg->lkey       = sq->mkey_be;
 692
 693        cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
 694        cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | MLX5E_XDP_TX_DS_COUNT);
 695
 696        sq->db.xdp.di[pi] = *di;
 697        wi->opcode     = MLX5_OPCODE_SEND;
 698        wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS;
 699        sq->pc += MLX5E_XDP_TX_WQEBBS;
 700
 701        sq->db.xdp.doorbell = true;
 702        rq->stats.xdp_tx++;
 703}
 704
 705/* returns true if packet was consumed by xdp */
 706static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
 707                                    const struct bpf_prog *prog,
 708                                    struct mlx5e_dma_info *di,
 709                                    void *data, u16 len)
 710{
 711        struct xdp_buff xdp;
 712        u32 act;
 713
 714        if (!prog)
 715                return false;
 716
 717        xdp.data = data;
 718        xdp.data_end = xdp.data + len;
 719        act = bpf_prog_run_xdp(prog, &xdp);
 720        switch (act) {
 721        case XDP_PASS:
 722                return false;
 723        case XDP_TX:
 724                mlx5e_xmit_xdp_frame(rq, di, MLX5_RX_HEADROOM, len);
 725                return true;
 726        default:
 727                bpf_warn_invalid_xdp_action(act);
 728        case XDP_ABORTED:
 729        case XDP_DROP:
 730                rq->stats.xdp_drop++;
 731                mlx5e_page_release(rq, di, true);
 732                return true;
 733        }
 734}
 735
 736static inline
 737struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 738                             u16 wqe_counter, u32 cqe_bcnt)
 739{
 740        struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog);
 741        struct mlx5e_dma_info *di;
 742        struct sk_buff *skb;
 743        void *va, *data;
 744
 745        di             = &rq->dma_info[wqe_counter];
 746        va             = page_address(di->page);
 747        data           = va + MLX5_RX_HEADROOM;
 748
 749        dma_sync_single_range_for_cpu(rq->pdev,
 750                                      di->addr,
 751                                      MLX5_RX_HEADROOM,
 752                                      rq->buff.wqe_sz,
 753                                      DMA_FROM_DEVICE);
 754        prefetch(data);
 755
 756        if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
 757                rq->stats.wqe_err++;
 758                mlx5e_page_release(rq, di, true);
 759                return NULL;
 760        }
 761
 762        if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt))
 763                return NULL; /* page/packet was consumed by XDP */
 764
 765        skb = build_skb(va, RQ_PAGE_SIZE(rq));
 766        if (unlikely(!skb)) {
 767                rq->stats.buff_alloc_err++;
 768                mlx5e_page_release(rq, di, true);
 769                return NULL;
 770        }
 771
 772        /* queue up for recycling ..*/
 773        page_ref_inc(di->page);
 774        mlx5e_page_release(rq, di, true);
 775
 776        skb_reserve(skb, MLX5_RX_HEADROOM);
 777        skb_put(skb, cqe_bcnt);
 778
 779        return skb;
 780}
 781
 782void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 783{
 784        struct mlx5e_rx_wqe *wqe;
 785        __be16 wqe_counter_be;
 786        struct sk_buff *skb;
 787        u16 wqe_counter;
 788        u32 cqe_bcnt;
 789
 790        wqe_counter_be = cqe->wqe_counter;
 791        wqe_counter    = be16_to_cpu(wqe_counter_be);
 792        wqe            = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
 793        cqe_bcnt       = be32_to_cpu(cqe->byte_cnt);
 794
 795        skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt);
 796        if (!skb)
 797                goto wq_ll_pop;
 798
 799        mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
 800        napi_gro_receive(rq->cq.napi, skb);
 801
 802wq_ll_pop:
 803        mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
 804                       &wqe->next.next_wqe_index);
 805}
 806
 807void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 808{
 809        struct net_device *netdev = rq->netdev;
 810        struct mlx5e_priv *priv = netdev_priv(netdev);
 811        struct mlx5_eswitch_rep *rep = priv->ppriv;
 812        struct mlx5e_rx_wqe *wqe;
 813        struct sk_buff *skb;
 814        __be16 wqe_counter_be;
 815        u16 wqe_counter;
 816        u32 cqe_bcnt;
 817
 818        wqe_counter_be = cqe->wqe_counter;
 819        wqe_counter    = be16_to_cpu(wqe_counter_be);
 820        wqe            = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
 821        cqe_bcnt       = be32_to_cpu(cqe->byte_cnt);
 822
 823        skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt);
 824        if (!skb)
 825                goto wq_ll_pop;
 826
 827        mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
 828
 829        if (rep->vlan && skb_vlan_tag_present(skb))
 830                skb_vlan_pop(skb);
 831
 832        napi_gro_receive(rq->cq.napi, skb);
 833
 834wq_ll_pop:
 835        mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
 836                       &wqe->next.next_wqe_index);
 837}
 838
 839static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq,
 840                                           struct mlx5_cqe64 *cqe,
 841                                           struct mlx5e_mpw_info *wi,
 842                                           u32 cqe_bcnt,
 843                                           struct sk_buff *skb)
 844{
 845        u16 stride_ix      = mpwrq_get_cqe_stride_index(cqe);
 846        u32 wqe_offset     = stride_ix * rq->mpwqe_stride_sz;
 847        u32 head_offset    = wqe_offset & (PAGE_SIZE - 1);
 848        u32 page_idx       = wqe_offset >> PAGE_SHIFT;
 849        u32 head_page_idx  = page_idx;
 850        u16 headlen = min_t(u16, MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, cqe_bcnt);
 851        u32 frag_offset    = head_offset + headlen;
 852        u16 byte_cnt       = cqe_bcnt - headlen;
 853
 854        if (unlikely(frag_offset >= PAGE_SIZE)) {
 855                page_idx++;
 856                frag_offset -= PAGE_SIZE;
 857        }
 858
 859        while (byte_cnt) {
 860                u32 pg_consumed_bytes =
 861                        min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
 862
 863                mlx5e_add_skb_frag_mpwqe(rq, skb, wi, page_idx, frag_offset,
 864                                         pg_consumed_bytes);
 865                byte_cnt -= pg_consumed_bytes;
 866                frag_offset = 0;
 867                page_idx++;
 868        }
 869        /* copy header */
 870        mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, wi, head_page_idx,
 871                                    head_offset, headlen);
 872        /* skb linear part was allocated with headlen and aligned to long */
 873        skb->tail += headlen;
 874        skb->len  += headlen;
 875}
 876
 877void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 878{
 879        u16 cstrides       = mpwrq_get_cqe_consumed_strides(cqe);
 880        u16 wqe_id         = be16_to_cpu(cqe->wqe_id);
 881        struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id];
 882        struct mlx5e_rx_wqe  *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id);
 883        struct sk_buff *skb;
 884        u16 cqe_bcnt;
 885
 886        wi->consumed_strides += cstrides;
 887
 888        if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
 889                rq->stats.wqe_err++;
 890                goto mpwrq_cqe_out;
 891        }
 892
 893        if (unlikely(mpwrq_is_filler_cqe(cqe))) {
 894                rq->stats.mpwqe_filler++;
 895                goto mpwrq_cqe_out;
 896        }
 897
 898        skb = napi_alloc_skb(rq->cq.napi,
 899                             ALIGN(MLX5_MPWRQ_SMALL_PACKET_THRESHOLD,
 900                                   sizeof(long)));
 901        if (unlikely(!skb)) {
 902                rq->stats.buff_alloc_err++;
 903                goto mpwrq_cqe_out;
 904        }
 905
 906        prefetch(skb->data);
 907        cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe);
 908
 909        mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb);
 910        mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
 911        napi_gro_receive(rq->cq.napi, skb);
 912
 913mpwrq_cqe_out:
 914        if (likely(wi->consumed_strides < rq->mpwqe_num_strides))
 915                return;
 916
 917        mlx5e_free_rx_mpwqe(rq, wi);
 918        mlx5_wq_ll_pop(&rq->wq, cqe->wqe_id, &wqe->next.next_wqe_index);
 919}
 920
 921int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 922{
 923        struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
 924        struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq;
 925        int work_done = 0;
 926
 927        if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
 928                return 0;
 929
 930        if (cq->decmprs_left)
 931                work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
 932
 933        for (; work_done < budget; work_done++) {
 934                struct mlx5_cqe64 *cqe = mlx5e_get_cqe(cq);
 935
 936                if (!cqe)
 937                        break;
 938
 939                if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
 940                        work_done +=
 941                                mlx5e_decompress_cqes_start(rq, cq,
 942                                                            budget - work_done);
 943                        continue;
 944                }
 945
 946                mlx5_cqwq_pop(&cq->wq);
 947
 948                rq->handle_rx_cqe(rq, cqe);
 949        }
 950
 951        if (xdp_sq->db.xdp.doorbell) {
 952                mlx5e_xmit_xdp_doorbell(xdp_sq);
 953                xdp_sq->db.xdp.doorbell = false;
 954        }
 955
 956        mlx5_cqwq_update_db_record(&cq->wq);
 957
 958        /* ensure cq space is freed before enabling more cqes */
 959        wmb();
 960
 961        return work_done;
 962}
 963