linux/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <net/tc_act/tc_gact.h>
  34#include <net/pkt_cls.h>
  35#include <linux/mlx5/fs.h>
  36#include <net/vxlan.h>
  37#include <net/geneve.h>
  38#include <linux/bpf.h>
  39#include <linux/if_bridge.h>
  40#include <net/page_pool.h>
  41#include <net/xdp_sock_drv.h>
  42#include "eswitch.h"
  43#include "en.h"
  44#include "en/txrx.h"
  45#include "en_tc.h"
  46#include "en_rep.h"
  47#include "en_accel/ipsec.h"
  48#include "en_accel/en_accel.h"
  49#include "en_accel/tls.h"
  50#include "accel/ipsec.h"
  51#include "accel/tls.h"
  52#include "lib/vxlan.h"
  53#include "lib/clock.h"
  54#include "en/port.h"
  55#include "en/xdp.h"
  56#include "lib/eq.h"
  57#include "en/monitor_stats.h"
  58#include "en/health.h"
  59#include "en/params.h"
  60#include "en/xsk/pool.h"
  61#include "en/xsk/setup.h"
  62#include "en/xsk/rx.h"
  63#include "en/xsk/tx.h"
  64#include "en/hv_vhca_stats.h"
  65#include "en/devlink.h"
  66#include "lib/mlx5.h"
  67#include "en/ptp.h"
  68#include "qos.h"
  69#include "en/trap.h"
  70#include "fpga/ipsec.h"
  71
  72bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
  73{
  74        bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) &&
  75                MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
  76                MLX5_CAP_ETH(mdev, reg_umr_sq);
  77        u16 max_wqe_sz_cap = MLX5_CAP_GEN(mdev, max_wqe_sz_sq);
  78        bool inline_umr = MLX5E_UMR_WQE_INLINE_SZ <= max_wqe_sz_cap;
  79
  80        if (!striding_rq_umr)
  81                return false;
  82        if (!inline_umr) {
  83                mlx5_core_warn(mdev, "Cannot support Striding RQ: UMR WQE size (%d) exceeds maximum supported (%d).\n",
  84                               (int)MLX5E_UMR_WQE_INLINE_SZ, max_wqe_sz_cap);
  85                return false;
  86        }
  87        return true;
  88}
  89
  90void mlx5e_update_carrier(struct mlx5e_priv *priv)
  91{
  92        struct mlx5_core_dev *mdev = priv->mdev;
  93        u8 port_state;
  94        bool up;
  95
  96        port_state = mlx5_query_vport_state(mdev,
  97                                            MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT,
  98                                            0);
  99
 100        up = port_state == VPORT_STATE_UP;
 101        if (up == netif_carrier_ok(priv->netdev))
 102                netif_carrier_event(priv->netdev);
 103        if (up) {
 104                netdev_info(priv->netdev, "Link up\n");
 105                netif_carrier_on(priv->netdev);
 106        } else {
 107                netdev_info(priv->netdev, "Link down\n");
 108                netif_carrier_off(priv->netdev);
 109        }
 110}
 111
 112static void mlx5e_update_carrier_work(struct work_struct *work)
 113{
 114        struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
 115                                               update_carrier_work);
 116
 117        mutex_lock(&priv->state_lock);
 118        if (test_bit(MLX5E_STATE_OPENED, &priv->state))
 119                if (priv->profile->update_carrier)
 120                        priv->profile->update_carrier(priv);
 121        mutex_unlock(&priv->state_lock);
 122}
 123
 124static void mlx5e_update_stats_work(struct work_struct *work)
 125{
 126        struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
 127                                               update_stats_work);
 128
 129        mutex_lock(&priv->state_lock);
 130        priv->profile->update_stats(priv);
 131        mutex_unlock(&priv->state_lock);
 132}
 133
 134void mlx5e_queue_update_stats(struct mlx5e_priv *priv)
 135{
 136        if (!priv->profile->update_stats)
 137                return;
 138
 139        if (unlikely(test_bit(MLX5E_STATE_DESTROYING, &priv->state)))
 140                return;
 141
 142        queue_work(priv->wq, &priv->update_stats_work);
 143}
 144
 145static int async_event(struct notifier_block *nb, unsigned long event, void *data)
 146{
 147        struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
 148        struct mlx5_eqe   *eqe = data;
 149
 150        if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
 151                return NOTIFY_DONE;
 152
 153        switch (eqe->sub_type) {
 154        case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
 155        case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
 156                queue_work(priv->wq, &priv->update_carrier_work);
 157                break;
 158        default:
 159                return NOTIFY_DONE;
 160        }
 161
 162        return NOTIFY_OK;
 163}
 164
 165static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
 166{
 167        priv->events_nb.notifier_call = async_event;
 168        mlx5_notifier_register(priv->mdev, &priv->events_nb);
 169}
 170
 171static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
 172{
 173        mlx5_notifier_unregister(priv->mdev, &priv->events_nb);
 174}
 175
 176static int blocking_event(struct notifier_block *nb, unsigned long event, void *data)
 177{
 178        struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, blocking_events_nb);
 179        int err;
 180
 181        switch (event) {
 182        case MLX5_DRIVER_EVENT_TYPE_TRAP:
 183                err = mlx5e_handle_trap_event(priv, data);
 184                break;
 185        default:
 186                netdev_warn(priv->netdev, "Sync event: Unknown event %ld\n", event);
 187                err = -EINVAL;
 188        }
 189        return err;
 190}
 191
 192static void mlx5e_enable_blocking_events(struct mlx5e_priv *priv)
 193{
 194        priv->blocking_events_nb.notifier_call = blocking_event;
 195        mlx5_blocking_notifier_register(priv->mdev, &priv->blocking_events_nb);
 196}
 197
 198static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv)
 199{
 200        mlx5_blocking_notifier_unregister(priv->mdev, &priv->blocking_events_nb);
 201}
 202
 203static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
 204                                       struct mlx5e_icosq *sq,
 205                                       struct mlx5e_umr_wqe *wqe)
 206{
 207        struct mlx5_wqe_ctrl_seg      *cseg = &wqe->ctrl;
 208        struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
 209        u8 ds_cnt = DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_DS);
 210
 211        cseg->qpn_ds    = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
 212                                      ds_cnt);
 213        cseg->umr_mkey  = rq->mkey_be;
 214
 215        ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
 216        ucseg->xlt_octowords =
 217                cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE));
 218        ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
 219}
 220
 221static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
 222{
 223        int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
 224
 225        rq->mpwqe.info = kvzalloc_node(array_size(wq_sz,
 226                                                  sizeof(*rq->mpwqe.info)),
 227                                       GFP_KERNEL, node);
 228        if (!rq->mpwqe.info)
 229                return -ENOMEM;
 230
 231        mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe);
 232
 233        return 0;
 234}
 235
 236static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
 237                                 u64 npages, u8 page_shift,
 238                                 struct mlx5_core_mkey *umr_mkey,
 239                                 dma_addr_t filler_addr)
 240{
 241        struct mlx5_mtt *mtt;
 242        int inlen;
 243        void *mkc;
 244        u32 *in;
 245        int err;
 246        int i;
 247
 248        inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + sizeof(*mtt) * npages;
 249
 250        in = kvzalloc(inlen, GFP_KERNEL);
 251        if (!in)
 252                return -ENOMEM;
 253
 254        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 255
 256        MLX5_SET(mkc, mkc, free, 1);
 257        MLX5_SET(mkc, mkc, umr_en, 1);
 258        MLX5_SET(mkc, mkc, lw, 1);
 259        MLX5_SET(mkc, mkc, lr, 1);
 260        MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
 261        mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
 262        MLX5_SET(mkc, mkc, qpn, 0xffffff);
 263        MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
 264        MLX5_SET64(mkc, mkc, len, npages << page_shift);
 265        MLX5_SET(mkc, mkc, translations_octword_size,
 266                 MLX5_MTT_OCTW(npages));
 267        MLX5_SET(mkc, mkc, log_page_size, page_shift);
 268        MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
 269                 MLX5_MTT_OCTW(npages));
 270
 271        /* Initialize the mkey with all MTTs pointing to a default
 272         * page (filler_addr). When the channels are activated, UMR
 273         * WQEs will redirect the RX WQEs to the actual memory from
 274         * the RQ's pool, while the gaps (wqe_overflow) remain mapped
 275         * to the default page.
 276         */
 277        mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
 278        for (i = 0 ; i < npages ; i++)
 279                mtt[i].ptag = cpu_to_be64(filler_addr);
 280
 281        err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
 282
 283        kvfree(in);
 284        return err;
 285}
 286
 287static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq)
 288{
 289        u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq));
 290
 291        return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey,
 292                                     rq->wqe_overflow.addr);
 293}
 294
 295static u64 mlx5e_get_mpwqe_offset(u16 wqe_ix)
 296{
 297        return MLX5E_REQUIRED_MTTS(wqe_ix) << PAGE_SHIFT;
 298}
 299
 300static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
 301{
 302        struct mlx5e_wqe_frag_info next_frag = {};
 303        struct mlx5e_wqe_frag_info *prev = NULL;
 304        int i;
 305
 306        next_frag.di = &rq->wqe.di[0];
 307
 308        for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) {
 309                struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
 310                struct mlx5e_wqe_frag_info *frag =
 311                        &rq->wqe.frags[i << rq->wqe.info.log_num_frags];
 312                int f;
 313
 314                for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) {
 315                        if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) {
 316                                next_frag.di++;
 317                                next_frag.offset = 0;
 318                                if (prev)
 319                                        prev->last_in_page = true;
 320                        }
 321                        *frag = next_frag;
 322
 323                        /* prepare next */
 324                        next_frag.offset += frag_info[f].frag_stride;
 325                        prev = frag;
 326                }
 327        }
 328
 329        if (prev)
 330                prev->last_in_page = true;
 331}
 332
 333int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node)
 334{
 335        int len = wq_sz << rq->wqe.info.log_num_frags;
 336
 337        rq->wqe.di = kvzalloc_node(array_size(len, sizeof(*rq->wqe.di)), GFP_KERNEL, node);
 338        if (!rq->wqe.di)
 339                return -ENOMEM;
 340
 341        mlx5e_init_frags_partition(rq);
 342
 343        return 0;
 344}
 345
 346void mlx5e_free_di_list(struct mlx5e_rq *rq)
 347{
 348        kvfree(rq->wqe.di);
 349}
 350
 351static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
 352{
 353        struct mlx5e_rq *rq = container_of(recover_work, struct mlx5e_rq, recover_work);
 354
 355        mlx5e_reporter_rq_cqe_err(rq);
 356}
 357
 358static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
 359{
 360        rq->wqe_overflow.page = alloc_page(GFP_KERNEL);
 361        if (!rq->wqe_overflow.page)
 362                return -ENOMEM;
 363
 364        rq->wqe_overflow.addr = dma_map_page(rq->pdev, rq->wqe_overflow.page, 0,
 365                                             PAGE_SIZE, rq->buff.map_dir);
 366        if (dma_mapping_error(rq->pdev, rq->wqe_overflow.addr)) {
 367                __free_page(rq->wqe_overflow.page);
 368                return -ENOMEM;
 369        }
 370        return 0;
 371}
 372
 373static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
 374{
 375         dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, PAGE_SIZE,
 376                        rq->buff.map_dir);
 377         __free_page(rq->wqe_overflow.page);
 378}
 379
 380static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
 381                             struct mlx5e_rq *rq)
 382{
 383        struct mlx5_core_dev *mdev = c->mdev;
 384        int err;
 385
 386        rq->wq_type      = params->rq_wq_type;
 387        rq->pdev         = c->pdev;
 388        rq->netdev       = c->netdev;
 389        rq->priv         = c->priv;
 390        rq->tstamp       = c->tstamp;
 391        rq->clock        = &mdev->clock;
 392        rq->icosq        = &c->icosq;
 393        rq->ix           = c->ix;
 394        rq->mdev         = mdev;
 395        rq->hw_mtu       = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 396        rq->xdpsq        = &c->rq_xdpsq;
 397        rq->stats        = &c->priv->channel_stats[c->ix].rq;
 398        rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
 399        err = mlx5e_rq_set_handlers(rq, params, NULL);
 400        if (err)
 401                return err;
 402
 403        return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0);
 404}
 405
 406static int mlx5e_alloc_rq(struct mlx5e_params *params,
 407                          struct mlx5e_xsk_param *xsk,
 408                          struct mlx5e_rq_param *rqp,
 409                          int node, struct mlx5e_rq *rq)
 410{
 411        struct page_pool_params pp_params = { 0 };
 412        struct mlx5_core_dev *mdev = rq->mdev;
 413        void *rqc = rqp->rqc;
 414        void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
 415        u32 pool_size;
 416        int wq_sz;
 417        int err;
 418        int i;
 419
 420        rqp->wq.db_numa_node = node;
 421        INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work);
 422
 423        if (params->xdp_prog)
 424                bpf_prog_inc(params->xdp_prog);
 425        RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog);
 426
 427        rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
 428        rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
 429        pool_size = 1 << params->log_rq_mtu_frames;
 430
 431        switch (rq->wq_type) {
 432        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 433                err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
 434                                        &rq->wq_ctrl);
 435                if (err)
 436                        goto err_rq_xdp_prog;
 437
 438                err = mlx5e_alloc_mpwqe_rq_drop_page(rq);
 439                if (err)
 440                        goto err_rq_wq_destroy;
 441
 442                rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR];
 443
 444                wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
 445
 446                pool_size = MLX5_MPWRQ_PAGES_PER_WQE <<
 447                        mlx5e_mpwqe_get_log_rq_size(params, xsk);
 448
 449                rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
 450                rq->mpwqe.num_strides =
 451                        BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
 452
 453                rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz);
 454
 455                err = mlx5e_create_rq_umr_mkey(mdev, rq);
 456                if (err)
 457                        goto err_rq_drop_page;
 458                rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
 459
 460                err = mlx5e_rq_alloc_mpwqe_info(rq, node);
 461                if (err)
 462                        goto err_rq_mkey;
 463                break;
 464        default: /* MLX5_WQ_TYPE_CYCLIC */
 465                err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
 466                                         &rq->wq_ctrl);
 467                if (err)
 468                        goto err_rq_xdp_prog;
 469
 470                rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
 471
 472                wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
 473
 474                rq->wqe.info = rqp->frags_info;
 475                rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride;
 476
 477                rq->wqe.frags =
 478                        kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
 479                                        (wq_sz << rq->wqe.info.log_num_frags)),
 480                                      GFP_KERNEL, node);
 481                if (!rq->wqe.frags) {
 482                        err = -ENOMEM;
 483                        goto err_rq_wq_destroy;
 484                }
 485
 486                err = mlx5e_init_di_list(rq, wq_sz, node);
 487                if (err)
 488                        goto err_rq_frags;
 489
 490                rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey.key);
 491        }
 492
 493        if (xsk) {
 494                err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
 495                                                 MEM_TYPE_XSK_BUFF_POOL, NULL);
 496                xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq);
 497        } else {
 498                /* Create a page_pool and register it with rxq */
 499                pp_params.order     = 0;
 500                pp_params.flags     = 0; /* No-internal DMA mapping in page_pool */
 501                pp_params.pool_size = pool_size;
 502                pp_params.nid       = node;
 503                pp_params.dev       = rq->pdev;
 504                pp_params.dma_dir   = rq->buff.map_dir;
 505
 506                /* page_pool can be used even when there is no rq->xdp_prog,
 507                 * given page_pool does not handle DMA mapping there is no
 508                 * required state to clear. And page_pool gracefully handle
 509                 * elevated refcnt.
 510                 */
 511                rq->page_pool = page_pool_create(&pp_params);
 512                if (IS_ERR(rq->page_pool)) {
 513                        err = PTR_ERR(rq->page_pool);
 514                        rq->page_pool = NULL;
 515                        goto err_free_by_rq_type;
 516                }
 517                if (xdp_rxq_info_is_reg(&rq->xdp_rxq))
 518                        err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
 519                                                         MEM_TYPE_PAGE_POOL, rq->page_pool);
 520        }
 521        if (err)
 522                goto err_free_by_rq_type;
 523
 524        for (i = 0; i < wq_sz; i++) {
 525                if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
 526                        struct mlx5e_rx_wqe_ll *wqe =
 527                                mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i);
 528                        u32 byte_count =
 529                                rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz;
 530                        u64 dma_offset = mlx5e_get_mpwqe_offset(i);
 531
 532                        wqe->data[0].addr = cpu_to_be64(dma_offset + rq->buff.headroom);
 533                        wqe->data[0].byte_count = cpu_to_be32(byte_count);
 534                        wqe->data[0].lkey = rq->mkey_be;
 535                } else {
 536                        struct mlx5e_rx_wqe_cyc *wqe =
 537                                mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i);
 538                        int f;
 539
 540                        for (f = 0; f < rq->wqe.info.num_frags; f++) {
 541                                u32 frag_size = rq->wqe.info.arr[f].frag_size |
 542                                        MLX5_HW_START_PADDING;
 543
 544                                wqe->data[f].byte_count = cpu_to_be32(frag_size);
 545                                wqe->data[f].lkey = rq->mkey_be;
 546                        }
 547                        /* check if num_frags is not a pow of two */
 548                        if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) {
 549                                wqe->data[f].byte_count = 0;
 550                                wqe->data[f].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
 551                                wqe->data[f].addr = 0;
 552                        }
 553                }
 554        }
 555
 556        INIT_WORK(&rq->dim.work, mlx5e_rx_dim_work);
 557
 558        switch (params->rx_cq_moderation.cq_period_mode) {
 559        case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
 560                rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE;
 561                break;
 562        case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
 563        default:
 564                rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 565        }
 566
 567        rq->page_cache.head = 0;
 568        rq->page_cache.tail = 0;
 569
 570        return 0;
 571
 572err_free_by_rq_type:
 573        switch (rq->wq_type) {
 574        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 575                kvfree(rq->mpwqe.info);
 576err_rq_mkey:
 577                mlx5_core_destroy_mkey(mdev, &rq->umr_mkey);
 578err_rq_drop_page:
 579                mlx5e_free_mpwqe_rq_drop_page(rq);
 580                break;
 581        default: /* MLX5_WQ_TYPE_CYCLIC */
 582                mlx5e_free_di_list(rq);
 583err_rq_frags:
 584                kvfree(rq->wqe.frags);
 585        }
 586err_rq_wq_destroy:
 587        mlx5_wq_destroy(&rq->wq_ctrl);
 588err_rq_xdp_prog:
 589        if (params->xdp_prog)
 590                bpf_prog_put(params->xdp_prog);
 591
 592        return err;
 593}
 594
 595static void mlx5e_free_rq(struct mlx5e_rq *rq)
 596{
 597        struct bpf_prog *old_prog;
 598        int i;
 599
 600        if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) {
 601                old_prog = rcu_dereference_protected(rq->xdp_prog,
 602                                                     lockdep_is_held(&rq->priv->state_lock));
 603                if (old_prog)
 604                        bpf_prog_put(old_prog);
 605        }
 606
 607        switch (rq->wq_type) {
 608        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 609                kvfree(rq->mpwqe.info);
 610                mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
 611                mlx5e_free_mpwqe_rq_drop_page(rq);
 612                break;
 613        default: /* MLX5_WQ_TYPE_CYCLIC */
 614                kvfree(rq->wqe.frags);
 615                mlx5e_free_di_list(rq);
 616        }
 617
 618        for (i = rq->page_cache.head; i != rq->page_cache.tail;
 619             i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
 620                struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i];
 621
 622                /* With AF_XDP, page_cache is not used, so this loop is not
 623                 * entered, and it's safe to call mlx5e_page_release_dynamic
 624                 * directly.
 625                 */
 626                mlx5e_page_release_dynamic(rq, dma_info, false);
 627        }
 628
 629        xdp_rxq_info_unreg(&rq->xdp_rxq);
 630        page_pool_destroy(rq->page_pool);
 631        mlx5_wq_destroy(&rq->wq_ctrl);
 632}
 633
 634int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
 635{
 636        struct mlx5_core_dev *mdev = rq->mdev;
 637        u8 ts_format;
 638        void *in;
 639        void *rqc;
 640        void *wq;
 641        int inlen;
 642        int err;
 643
 644        inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
 645                sizeof(u64) * rq->wq_ctrl.buf.npages;
 646        in = kvzalloc(inlen, GFP_KERNEL);
 647        if (!in)
 648                return -ENOMEM;
 649
 650        ts_format = mlx5_is_real_time_rq(mdev) ?
 651                            MLX5_TIMESTAMP_FORMAT_REAL_TIME :
 652                            MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
 653        rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
 654        wq  = MLX5_ADDR_OF(rqc, rqc, wq);
 655
 656        memcpy(rqc, param->rqc, sizeof(param->rqc));
 657
 658        MLX5_SET(rqc,  rqc, cqn,                rq->cq.mcq.cqn);
 659        MLX5_SET(rqc,  rqc, state,              MLX5_RQC_STATE_RST);
 660        MLX5_SET(rqc,  rqc, ts_format,          ts_format);
 661        MLX5_SET(wq,   wq,  log_wq_pg_sz,       rq->wq_ctrl.buf.page_shift -
 662                                                MLX5_ADAPTER_PAGE_SHIFT);
 663        MLX5_SET64(wq, wq,  dbr_addr,           rq->wq_ctrl.db.dma);
 664
 665        mlx5_fill_page_frag_array(&rq->wq_ctrl.buf,
 666                                  (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
 667
 668        err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
 669
 670        kvfree(in);
 671
 672        return err;
 673}
 674
 675int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
 676{
 677        struct mlx5_core_dev *mdev = rq->mdev;
 678
 679        void *in;
 680        void *rqc;
 681        int inlen;
 682        int err;
 683
 684        inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
 685        in = kvzalloc(inlen, GFP_KERNEL);
 686        if (!in)
 687                return -ENOMEM;
 688
 689        if (curr_state == MLX5_RQC_STATE_RST && next_state == MLX5_RQC_STATE_RDY)
 690                mlx5e_rqwq_reset(rq);
 691
 692        rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
 693
 694        MLX5_SET(modify_rq_in, in, rq_state, curr_state);
 695        MLX5_SET(rqc, rqc, state, next_state);
 696
 697        err = mlx5_core_modify_rq(mdev, rq->rqn, in);
 698
 699        kvfree(in);
 700
 701        return err;
 702}
 703
 704static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable)
 705{
 706        struct mlx5_core_dev *mdev = rq->mdev;
 707
 708        void *in;
 709        void *rqc;
 710        int inlen;
 711        int err;
 712
 713        inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
 714        in = kvzalloc(inlen, GFP_KERNEL);
 715        if (!in)
 716                return -ENOMEM;
 717
 718        rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
 719
 720        MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY);
 721        MLX5_SET64(modify_rq_in, in, modify_bitmask,
 722                   MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_SCATTER_FCS);
 723        MLX5_SET(rqc, rqc, scatter_fcs, enable);
 724        MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
 725
 726        err = mlx5_core_modify_rq(mdev, rq->rqn, in);
 727
 728        kvfree(in);
 729
 730        return err;
 731}
 732
 733static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
 734{
 735        struct mlx5_core_dev *mdev = rq->mdev;
 736        void *in;
 737        void *rqc;
 738        int inlen;
 739        int err;
 740
 741        inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
 742        in = kvzalloc(inlen, GFP_KERNEL);
 743        if (!in)
 744                return -ENOMEM;
 745
 746        rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
 747
 748        MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY);
 749        MLX5_SET64(modify_rq_in, in, modify_bitmask,
 750                   MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
 751        MLX5_SET(rqc, rqc, vsd, vsd);
 752        MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
 753
 754        err = mlx5_core_modify_rq(mdev, rq->rqn, in);
 755
 756        kvfree(in);
 757
 758        return err;
 759}
 760
 761void mlx5e_destroy_rq(struct mlx5e_rq *rq)
 762{
 763        mlx5_core_destroy_rq(rq->mdev, rq->rqn);
 764}
 765
 766int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
 767{
 768        unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time);
 769
 770        u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5e_rqwq_get_size(rq));
 771
 772        do {
 773                if (mlx5e_rqwq_get_cur_sz(rq) >= min_wqes)
 774                        return 0;
 775
 776                msleep(20);
 777        } while (time_before(jiffies, exp_time));
 778
 779        netdev_warn(rq->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
 780                    rq->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
 781
 782        mlx5e_reporter_rx_timeout(rq);
 783        return -ETIMEDOUT;
 784}
 785
 786void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq)
 787{
 788        struct mlx5_wq_ll *wq;
 789        u16 head;
 790        int i;
 791
 792        if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
 793                return;
 794
 795        wq = &rq->mpwqe.wq;
 796        head = wq->head;
 797
 798        /* Outstanding UMR WQEs (in progress) start at wq->head */
 799        for (i = 0; i < rq->mpwqe.umr_in_progress; i++) {
 800                rq->dealloc_wqe(rq, head);
 801                head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
 802        }
 803
 804        rq->mpwqe.actual_wq_head = wq->head;
 805        rq->mpwqe.umr_in_progress = 0;
 806        rq->mpwqe.umr_completed = 0;
 807}
 808
 809void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
 810{
 811        __be16 wqe_ix_be;
 812        u16 wqe_ix;
 813
 814        if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
 815                struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
 816
 817                mlx5e_free_rx_in_progress_descs(rq);
 818
 819                while (!mlx5_wq_ll_is_empty(wq)) {
 820                        struct mlx5e_rx_wqe_ll *wqe;
 821
 822                        wqe_ix_be = *wq->tail_next;
 823                        wqe_ix    = be16_to_cpu(wqe_ix_be);
 824                        wqe       = mlx5_wq_ll_get_wqe(wq, wqe_ix);
 825                        rq->dealloc_wqe(rq, wqe_ix);
 826                        mlx5_wq_ll_pop(wq, wqe_ix_be,
 827                                       &wqe->next.next_wqe_index);
 828                }
 829        } else {
 830                struct mlx5_wq_cyc *wq = &rq->wqe.wq;
 831
 832                while (!mlx5_wq_cyc_is_empty(wq)) {
 833                        wqe_ix = mlx5_wq_cyc_get_tail(wq);
 834                        rq->dealloc_wqe(rq, wqe_ix);
 835                        mlx5_wq_cyc_pop(wq);
 836                }
 837        }
 838
 839}
 840
 841int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
 842                  struct mlx5e_xsk_param *xsk, int node,
 843                  struct mlx5e_rq *rq)
 844{
 845        struct mlx5_core_dev *mdev = rq->mdev;
 846        int err;
 847
 848        err = mlx5e_alloc_rq(params, xsk, param, node, rq);
 849        if (err)
 850                return err;
 851
 852        err = mlx5e_create_rq(rq, param);
 853        if (err)
 854                goto err_free_rq;
 855
 856        err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
 857        if (err)
 858                goto err_destroy_rq;
 859
 860        if (mlx5e_is_tls_on(rq->priv) && !mlx5e_accel_is_ktls_device(mdev))
 861                __set_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state); /* must be FPGA */
 862
 863        if (MLX5_CAP_ETH(mdev, cqe_checksum_full))
 864                __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state);
 865
 866        if (params->rx_dim_enabled)
 867                __set_bit(MLX5E_RQ_STATE_AM, &rq->state);
 868
 869        /* We disable csum_complete when XDP is enabled since
 870         * XDP programs might manipulate packets which will render
 871         * skb->checksum incorrect.
 872         */
 873        if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || params->xdp_prog)
 874                __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state);
 875
 876        /* For CQE compression on striding RQ, use stride index provided by
 877         * HW if capability is supported.
 878         */
 879        if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) &&
 880            MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index))
 881                __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state);
 882
 883        return 0;
 884
 885err_destroy_rq:
 886        mlx5e_destroy_rq(rq);
 887err_free_rq:
 888        mlx5e_free_rq(rq);
 889
 890        return err;
 891}
 892
 893void mlx5e_activate_rq(struct mlx5e_rq *rq)
 894{
 895        set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
 896        if (rq->icosq) {
 897                mlx5e_trigger_irq(rq->icosq);
 898        } else {
 899                local_bh_disable();
 900                napi_schedule(rq->cq.napi);
 901                local_bh_enable();
 902        }
 903}
 904
 905void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
 906{
 907        clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
 908        synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
 909}
 910
 911void mlx5e_close_rq(struct mlx5e_rq *rq)
 912{
 913        cancel_work_sync(&rq->dim.work);
 914        if (rq->icosq)
 915                cancel_work_sync(&rq->icosq->recover_work);
 916        cancel_work_sync(&rq->recover_work);
 917        mlx5e_destroy_rq(rq);
 918        mlx5e_free_rx_descs(rq);
 919        mlx5e_free_rq(rq);
 920}
 921
 922static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq)
 923{
 924        kvfree(sq->db.xdpi_fifo.xi);
 925        kvfree(sq->db.wqe_info);
 926}
 927
 928static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
 929{
 930        struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
 931        int wq_sz        = mlx5_wq_cyc_get_size(&sq->wq);
 932        int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
 933
 934        xdpi_fifo->xi = kvzalloc_node(sizeof(*xdpi_fifo->xi) * dsegs_per_wq,
 935                                      GFP_KERNEL, numa);
 936        if (!xdpi_fifo->xi)
 937                return -ENOMEM;
 938
 939        xdpi_fifo->pc   = &sq->xdpi_fifo_pc;
 940        xdpi_fifo->cc   = &sq->xdpi_fifo_cc;
 941        xdpi_fifo->mask = dsegs_per_wq - 1;
 942
 943        return 0;
 944}
 945
 946static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
 947{
 948        int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 949        int err;
 950
 951        sq->db.wqe_info = kvzalloc_node(sizeof(*sq->db.wqe_info) * wq_sz,
 952                                        GFP_KERNEL, numa);
 953        if (!sq->db.wqe_info)
 954                return -ENOMEM;
 955
 956        err = mlx5e_alloc_xdpsq_fifo(sq, numa);
 957        if (err) {
 958                mlx5e_free_xdpsq_db(sq);
 959                return err;
 960        }
 961
 962        return 0;
 963}
 964
 965static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
 966                             struct mlx5e_params *params,
 967                             struct xsk_buff_pool *xsk_pool,
 968                             struct mlx5e_sq_param *param,
 969                             struct mlx5e_xdpsq *sq,
 970                             bool is_redirect)
 971{
 972        void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
 973        struct mlx5_core_dev *mdev = c->mdev;
 974        struct mlx5_wq_cyc *wq = &sq->wq;
 975        int err;
 976
 977        sq->pdev      = c->pdev;
 978        sq->mkey_be   = c->mkey_be;
 979        sq->channel   = c;
 980        sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
 981        sq->min_inline_mode = params->tx_min_inline_mode;
 982        sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 983        sq->xsk_pool  = xsk_pool;
 984
 985        sq->stats = sq->xsk_pool ?
 986                &c->priv->channel_stats[c->ix].xsksq :
 987                is_redirect ?
 988                        &c->priv->channel_stats[c->ix].xdpsq :
 989                        &c->priv->channel_stats[c->ix].rq_xdpsq;
 990
 991        param->wq.db_numa_node = cpu_to_node(c->cpu);
 992        err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
 993        if (err)
 994                return err;
 995        wq->db = &wq->db[MLX5_SND_DBR];
 996
 997        err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
 998        if (err)
 999                goto err_sq_wq_destroy;
1000
1001        return 0;
1002
1003err_sq_wq_destroy:
1004        mlx5_wq_destroy(&sq->wq_ctrl);
1005
1006        return err;
1007}
1008
1009static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq)
1010{
1011        mlx5e_free_xdpsq_db(sq);
1012        mlx5_wq_destroy(&sq->wq_ctrl);
1013}
1014
1015static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq)
1016{
1017        kvfree(sq->db.wqe_info);
1018}
1019
1020static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa)
1021{
1022        int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1023        size_t size;
1024
1025        size = array_size(wq_sz, sizeof(*sq->db.wqe_info));
1026        sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa);
1027        if (!sq->db.wqe_info)
1028                return -ENOMEM;
1029
1030        return 0;
1031}
1032
1033static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work)
1034{
1035        struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq,
1036                                              recover_work);
1037
1038        mlx5e_reporter_icosq_cqe_err(sq);
1039}
1040
1041static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
1042                             struct mlx5e_sq_param *param,
1043                             struct mlx5e_icosq *sq)
1044{
1045        void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
1046        struct mlx5_core_dev *mdev = c->mdev;
1047        struct mlx5_wq_cyc *wq = &sq->wq;
1048        int err;
1049
1050        sq->channel   = c;
1051        sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
1052        sq->reserved_room = param->stop_room;
1053
1054        param->wq.db_numa_node = cpu_to_node(c->cpu);
1055        err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
1056        if (err)
1057                return err;
1058        wq->db = &wq->db[MLX5_SND_DBR];
1059
1060        err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
1061        if (err)
1062                goto err_sq_wq_destroy;
1063
1064        INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work);
1065
1066        return 0;
1067
1068err_sq_wq_destroy:
1069        mlx5_wq_destroy(&sq->wq_ctrl);
1070
1071        return err;
1072}
1073
1074static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
1075{
1076        mlx5e_free_icosq_db(sq);
1077        mlx5_wq_destroy(&sq->wq_ctrl);
1078}
1079
1080void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
1081{
1082        kvfree(sq->db.wqe_info);
1083        kvfree(sq->db.skb_fifo.fifo);
1084        kvfree(sq->db.dma_fifo);
1085}
1086
1087int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
1088{
1089        int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1090        int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
1091
1092        sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
1093                                                   sizeof(*sq->db.dma_fifo)),
1094                                        GFP_KERNEL, numa);
1095        sq->db.skb_fifo.fifo = kvzalloc_node(array_size(df_sz,
1096                                                        sizeof(*sq->db.skb_fifo.fifo)),
1097                                        GFP_KERNEL, numa);
1098        sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
1099                                                   sizeof(*sq->db.wqe_info)),
1100                                        GFP_KERNEL, numa);
1101        if (!sq->db.dma_fifo || !sq->db.skb_fifo.fifo || !sq->db.wqe_info) {
1102                mlx5e_free_txqsq_db(sq);
1103                return -ENOMEM;
1104        }
1105
1106        sq->dma_fifo_mask = df_sz - 1;
1107
1108        sq->db.skb_fifo.pc   = &sq->skb_fifo_pc;
1109        sq->db.skb_fifo.cc   = &sq->skb_fifo_cc;
1110        sq->db.skb_fifo.mask = df_sz - 1;
1111
1112        return 0;
1113}
1114
1115static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
1116                             int txq_ix,
1117                             struct mlx5e_params *params,
1118                             struct mlx5e_sq_param *param,
1119                             struct mlx5e_txqsq *sq,
1120                             int tc)
1121{
1122        void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
1123        struct mlx5_core_dev *mdev = c->mdev;
1124        struct mlx5_wq_cyc *wq = &sq->wq;
1125        int err;
1126
1127        sq->pdev      = c->pdev;
1128        sq->tstamp    = c->tstamp;
1129        sq->clock     = &mdev->clock;
1130        sq->mkey_be   = c->mkey_be;
1131        sq->netdev    = c->netdev;
1132        sq->mdev      = c->mdev;
1133        sq->priv      = c->priv;
1134        sq->ch_ix     = c->ix;
1135        sq->txq_ix    = txq_ix;
1136        sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
1137        sq->min_inline_mode = params->tx_min_inline_mode;
1138        sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
1139        INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
1140        if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
1141                set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
1142        if (MLX5_IPSEC_DEV(c->priv->mdev))
1143                set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
1144        if (param->is_mpw)
1145                set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state);
1146        sq->stop_room = param->stop_room;
1147        sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev);
1148
1149        param->wq.db_numa_node = cpu_to_node(c->cpu);
1150        err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
1151        if (err)
1152                return err;
1153        wq->db    = &wq->db[MLX5_SND_DBR];
1154
1155        err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
1156        if (err)
1157                goto err_sq_wq_destroy;
1158
1159        INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work);
1160        sq->dim.mode = params->tx_cq_moderation.cq_period_mode;
1161
1162        return 0;
1163
1164err_sq_wq_destroy:
1165        mlx5_wq_destroy(&sq->wq_ctrl);
1166
1167        return err;
1168}
1169
1170void mlx5e_free_txqsq(struct mlx5e_txqsq *sq)
1171{
1172        mlx5e_free_txqsq_db(sq);
1173        mlx5_wq_destroy(&sq->wq_ctrl);
1174}
1175
1176static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
1177                           struct mlx5e_sq_param *param,
1178                           struct mlx5e_create_sq_param *csp,
1179                           u32 *sqn)
1180{
1181        u8 ts_format;
1182        void *in;
1183        void *sqc;
1184        void *wq;
1185        int inlen;
1186        int err;
1187
1188        inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1189                sizeof(u64) * csp->wq_ctrl->buf.npages;
1190        in = kvzalloc(inlen, GFP_KERNEL);
1191        if (!in)
1192                return -ENOMEM;
1193
1194        ts_format = mlx5_is_real_time_sq(mdev) ?
1195                            MLX5_TIMESTAMP_FORMAT_REAL_TIME :
1196                            MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
1197        sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1198        wq = MLX5_ADDR_OF(sqc, sqc, wq);
1199
1200        memcpy(sqc, param->sqc, sizeof(param->sqc));
1201        MLX5_SET(sqc,  sqc, tis_lst_sz, csp->tis_lst_sz);
1202        MLX5_SET(sqc,  sqc, tis_num_0, csp->tisn);
1203        MLX5_SET(sqc,  sqc, cqn, csp->cqn);
1204        MLX5_SET(sqc,  sqc, ts_cqe_to_dest_cqn, csp->ts_cqe_to_dest_cqn);
1205        MLX5_SET(sqc,  sqc, ts_format, ts_format);
1206
1207
1208        if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
1209                MLX5_SET(sqc,  sqc, min_wqe_inline_mode, csp->min_inline_mode);
1210
1211        MLX5_SET(sqc,  sqc, state, MLX5_SQC_STATE_RST);
1212        MLX5_SET(sqc,  sqc, flush_in_error_en, 1);
1213
1214        MLX5_SET(wq,   wq, wq_type,       MLX5_WQ_TYPE_CYCLIC);
1215        MLX5_SET(wq,   wq, uar_page,      mdev->mlx5e_res.hw_objs.bfreg.index);
1216        MLX5_SET(wq,   wq, log_wq_pg_sz,  csp->wq_ctrl->buf.page_shift -
1217                                          MLX5_ADAPTER_PAGE_SHIFT);
1218        MLX5_SET64(wq, wq, dbr_addr,      csp->wq_ctrl->db.dma);
1219
1220        mlx5_fill_page_frag_array(&csp->wq_ctrl->buf,
1221                                  (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
1222
1223        err = mlx5_core_create_sq(mdev, in, inlen, sqn);
1224
1225        kvfree(in);
1226
1227        return err;
1228}
1229
1230int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
1231                    struct mlx5e_modify_sq_param *p)
1232{
1233        u64 bitmask = 0;
1234        void *in;
1235        void *sqc;
1236        int inlen;
1237        int err;
1238
1239        inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1240        in = kvzalloc(inlen, GFP_KERNEL);
1241        if (!in)
1242                return -ENOMEM;
1243
1244        sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1245
1246        MLX5_SET(modify_sq_in, in, sq_state, p->curr_state);
1247        MLX5_SET(sqc, sqc, state, p->next_state);
1248        if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) {
1249                bitmask |= 1;
1250                MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index);
1251        }
1252        if (p->qos_update && p->next_state == MLX5_SQC_STATE_RDY) {
1253                bitmask |= 1 << 2;
1254                MLX5_SET(sqc, sqc, qos_queue_group_id, p->qos_queue_group_id);
1255        }
1256        MLX5_SET64(modify_sq_in, in, modify_bitmask, bitmask);
1257
1258        err = mlx5_core_modify_sq(mdev, sqn, in);
1259
1260        kvfree(in);
1261
1262        return err;
1263}
1264
1265static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn)
1266{
1267        mlx5_core_destroy_sq(mdev, sqn);
1268}
1269
1270int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
1271                        struct mlx5e_sq_param *param,
1272                        struct mlx5e_create_sq_param *csp,
1273                        u16 qos_queue_group_id,
1274                        u32 *sqn)
1275{
1276        struct mlx5e_modify_sq_param msp = {0};
1277        int err;
1278
1279        err = mlx5e_create_sq(mdev, param, csp, sqn);
1280        if (err)
1281                return err;
1282
1283        msp.curr_state = MLX5_SQC_STATE_RST;
1284        msp.next_state = MLX5_SQC_STATE_RDY;
1285        if (qos_queue_group_id) {
1286                msp.qos_update = true;
1287                msp.qos_queue_group_id = qos_queue_group_id;
1288        }
1289        err = mlx5e_modify_sq(mdev, *sqn, &msp);
1290        if (err)
1291                mlx5e_destroy_sq(mdev, *sqn);
1292
1293        return err;
1294}
1295
1296static int mlx5e_set_sq_maxrate(struct net_device *dev,
1297                                struct mlx5e_txqsq *sq, u32 rate);
1298
1299int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix,
1300                     struct mlx5e_params *params, struct mlx5e_sq_param *param,
1301                     struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, u16 qos_qid)
1302{
1303        struct mlx5e_create_sq_param csp = {};
1304        u32 tx_rate;
1305        int err;
1306
1307        err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq, tc);
1308        if (err)
1309                return err;
1310
1311        if (qos_queue_group_id)
1312                sq->stats = c->priv->htb.qos_sq_stats[qos_qid];
1313        else
1314                sq->stats = &c->priv->channel_stats[c->ix].sq[tc];
1315
1316        csp.tisn            = tisn;
1317        csp.tis_lst_sz      = 1;
1318        csp.cqn             = sq->cq.mcq.cqn;
1319        csp.wq_ctrl         = &sq->wq_ctrl;
1320        csp.min_inline_mode = sq->min_inline_mode;
1321        err = mlx5e_create_sq_rdy(c->mdev, param, &csp, qos_queue_group_id, &sq->sqn);
1322        if (err)
1323                goto err_free_txqsq;
1324
1325        tx_rate = c->priv->tx_rates[sq->txq_ix];
1326        if (tx_rate)
1327                mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate);
1328
1329        if (params->tx_dim_enabled)
1330                sq->state |= BIT(MLX5E_SQ_STATE_AM);
1331
1332        return 0;
1333
1334err_free_txqsq:
1335        mlx5e_free_txqsq(sq);
1336
1337        return err;
1338}
1339
1340void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
1341{
1342        sq->txq = netdev_get_tx_queue(sq->netdev, sq->txq_ix);
1343        set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
1344        netdev_tx_reset_queue(sq->txq);
1345        netif_tx_start_queue(sq->txq);
1346}
1347
1348void mlx5e_tx_disable_queue(struct netdev_queue *txq)
1349{
1350        __netif_tx_lock_bh(txq);
1351        netif_tx_stop_queue(txq);
1352        __netif_tx_unlock_bh(txq);
1353}
1354
1355void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
1356{
1357        struct mlx5_wq_cyc *wq = &sq->wq;
1358
1359        clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
1360        synchronize_net(); /* Sync with NAPI to prevent netif_tx_wake_queue. */
1361
1362        mlx5e_tx_disable_queue(sq->txq);
1363
1364        /* last doorbell out, godspeed .. */
1365        if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) {
1366                u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
1367                struct mlx5e_tx_wqe *nop;
1368
1369                sq->db.wqe_info[pi] = (struct mlx5e_tx_wqe_info) {
1370                        .num_wqebbs = 1,
1371                };
1372
1373                nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
1374                mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl);
1375        }
1376}
1377
1378void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
1379{
1380        struct mlx5_core_dev *mdev = sq->mdev;
1381        struct mlx5_rate_limit rl = {0};
1382
1383        cancel_work_sync(&sq->dim.work);
1384        cancel_work_sync(&sq->recover_work);
1385        mlx5e_destroy_sq(mdev, sq->sqn);
1386        if (sq->rate_limit) {
1387                rl.rate = sq->rate_limit;
1388                mlx5_rl_remove_rate(mdev, &rl);
1389        }
1390        mlx5e_free_txqsq_descs(sq);
1391        mlx5e_free_txqsq(sq);
1392}
1393
1394void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
1395{
1396        struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq,
1397                                              recover_work);
1398
1399        mlx5e_reporter_tx_err_cqe(sq);
1400}
1401
1402int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
1403                     struct mlx5e_sq_param *param, struct mlx5e_icosq *sq)
1404{
1405        struct mlx5e_create_sq_param csp = {};
1406        int err;
1407
1408        err = mlx5e_alloc_icosq(c, param, sq);
1409        if (err)
1410                return err;
1411
1412        csp.cqn             = sq->cq.mcq.cqn;
1413        csp.wq_ctrl         = &sq->wq_ctrl;
1414        csp.min_inline_mode = params->tx_min_inline_mode;
1415        err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
1416        if (err)
1417                goto err_free_icosq;
1418
1419        if (param->is_tls) {
1420                sq->ktls_resync = mlx5e_ktls_rx_resync_create_resp_list();
1421                if (IS_ERR(sq->ktls_resync)) {
1422                        err = PTR_ERR(sq->ktls_resync);
1423                        goto err_destroy_icosq;
1424                }
1425        }
1426        return 0;
1427
1428err_destroy_icosq:
1429        mlx5e_destroy_sq(c->mdev, sq->sqn);
1430err_free_icosq:
1431        mlx5e_free_icosq(sq);
1432
1433        return err;
1434}
1435
1436void mlx5e_activate_icosq(struct mlx5e_icosq *icosq)
1437{
1438        set_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
1439}
1440
1441void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
1442{
1443        clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
1444        synchronize_net(); /* Sync with NAPI. */
1445}
1446
1447void mlx5e_close_icosq(struct mlx5e_icosq *sq)
1448{
1449        struct mlx5e_channel *c = sq->channel;
1450
1451        if (sq->ktls_resync)
1452                mlx5e_ktls_rx_resync_destroy_resp_list(sq->ktls_resync);
1453        mlx5e_destroy_sq(c->mdev, sq->sqn);
1454        mlx5e_free_icosq_descs(sq);
1455        mlx5e_free_icosq(sq);
1456}
1457
1458int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
1459                     struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
1460                     struct mlx5e_xdpsq *sq, bool is_redirect)
1461{
1462        struct mlx5e_create_sq_param csp = {};
1463        int err;
1464
1465        err = mlx5e_alloc_xdpsq(c, params, xsk_pool, param, sq, is_redirect);
1466        if (err)
1467                return err;
1468
1469        csp.tis_lst_sz      = 1;
1470        csp.tisn            = c->priv->tisn[c->lag_port][0]; /* tc = 0 */
1471        csp.cqn             = sq->cq.mcq.cqn;
1472        csp.wq_ctrl         = &sq->wq_ctrl;
1473        csp.min_inline_mode = sq->min_inline_mode;
1474        set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
1475        err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
1476        if (err)
1477                goto err_free_xdpsq;
1478
1479        mlx5e_set_xmit_fp(sq, param->is_mpw);
1480
1481        if (!param->is_mpw) {
1482                unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
1483                unsigned int inline_hdr_sz = 0;
1484                int i;
1485
1486                if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
1487                        inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
1488                        ds_cnt++;
1489                }
1490
1491                /* Pre initialize fixed WQE fields */
1492                for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
1493                        struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(&sq->wq, i);
1494                        struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
1495                        struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
1496                        struct mlx5_wqe_data_seg *dseg;
1497
1498                        sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) {
1499                                .num_wqebbs = 1,
1500                                .num_pkts   = 1,
1501                        };
1502
1503                        cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
1504                        eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
1505
1506                        dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
1507                        dseg->lkey = sq->mkey_be;
1508                }
1509        }
1510
1511        return 0;
1512
1513err_free_xdpsq:
1514        clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
1515        mlx5e_free_xdpsq(sq);
1516
1517        return err;
1518}
1519
1520void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
1521{
1522        struct mlx5e_channel *c = sq->channel;
1523
1524        clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
1525        synchronize_net(); /* Sync with NAPI. */
1526
1527        mlx5e_destroy_sq(c->mdev, sq->sqn);
1528        mlx5e_free_xdpsq_descs(sq);
1529        mlx5e_free_xdpsq(sq);
1530}
1531
1532static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv,
1533                                 struct mlx5e_cq_param *param,
1534                                 struct mlx5e_cq *cq)
1535{
1536        struct mlx5_core_dev *mdev = priv->mdev;
1537        struct mlx5_core_cq *mcq = &cq->mcq;
1538        int err;
1539        u32 i;
1540
1541        err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1542                               &cq->wq_ctrl);
1543        if (err)
1544                return err;
1545
1546        mcq->cqe_sz     = 64;
1547        mcq->set_ci_db  = cq->wq_ctrl.db.db;
1548        mcq->arm_db     = cq->wq_ctrl.db.db + 1;
1549        *mcq->set_ci_db = 0;
1550        *mcq->arm_db    = 0;
1551        mcq->vector     = param->eq_ix;
1552        mcq->comp       = mlx5e_completion_event;
1553        mcq->event      = mlx5e_cq_error_event;
1554
1555        for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1556                struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1557
1558                cqe->op_own = 0xf1;
1559        }
1560
1561        cq->mdev = mdev;
1562        cq->netdev = priv->netdev;
1563        cq->priv = priv;
1564
1565        return 0;
1566}
1567
1568static int mlx5e_alloc_cq(struct mlx5e_priv *priv,
1569                          struct mlx5e_cq_param *param,
1570                          struct mlx5e_create_cq_param *ccp,
1571                          struct mlx5e_cq *cq)
1572{
1573        int err;
1574
1575        param->wq.buf_numa_node = ccp->node;
1576        param->wq.db_numa_node  = ccp->node;
1577        param->eq_ix            = ccp->ix;
1578
1579        err = mlx5e_alloc_cq_common(priv, param, cq);
1580
1581        cq->napi     = ccp->napi;
1582        cq->ch_stats = ccp->ch_stats;
1583
1584        return err;
1585}
1586
1587static void mlx5e_free_cq(struct mlx5e_cq *cq)
1588{
1589        mlx5_wq_destroy(&cq->wq_ctrl);
1590}
1591
1592static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
1593{
1594        u32 out[MLX5_ST_SZ_DW(create_cq_out)];
1595        struct mlx5_core_dev *mdev = cq->mdev;
1596        struct mlx5_core_cq *mcq = &cq->mcq;
1597
1598        void *in;
1599        void *cqc;
1600        int inlen;
1601        int eqn;
1602        int err;
1603
1604        err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn);
1605        if (err)
1606                return err;
1607
1608        inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1609                sizeof(u64) * cq->wq_ctrl.buf.npages;
1610        in = kvzalloc(inlen, GFP_KERNEL);
1611        if (!in)
1612                return -ENOMEM;
1613
1614        cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1615
1616        memcpy(cqc, param->cqc, sizeof(param->cqc));
1617
1618        mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
1619                                  (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
1620
1621        MLX5_SET(cqc,   cqc, cq_period_mode, param->cq_period_mode);
1622        MLX5_SET(cqc,   cqc, c_eqn_or_apu_element, eqn);
1623        MLX5_SET(cqc,   cqc, uar_page,      mdev->priv.uar->index);
1624        MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1625                                            MLX5_ADAPTER_PAGE_SHIFT);
1626        MLX5_SET64(cqc, cqc, dbr_addr,      cq->wq_ctrl.db.dma);
1627
1628        err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
1629
1630        kvfree(in);
1631
1632        if (err)
1633                return err;
1634
1635        mlx5e_cq_arm(cq);
1636
1637        return 0;
1638}
1639
1640static void mlx5e_destroy_cq(struct mlx5e_cq *cq)
1641{
1642        mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
1643}
1644
1645int mlx5e_open_cq(struct mlx5e_priv *priv, struct dim_cq_moder moder,
1646                  struct mlx5e_cq_param *param, struct mlx5e_create_cq_param *ccp,
1647                  struct mlx5e_cq *cq)
1648{
1649        struct mlx5_core_dev *mdev = priv->mdev;
1650        int err;
1651
1652        err = mlx5e_alloc_cq(priv, param, ccp, cq);
1653        if (err)
1654                return err;
1655
1656        err = mlx5e_create_cq(cq, param);
1657        if (err)
1658                goto err_free_cq;
1659
1660        if (MLX5_CAP_GEN(mdev, cq_moderation))
1661                mlx5_core_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts);
1662        return 0;
1663
1664err_free_cq:
1665        mlx5e_free_cq(cq);
1666
1667        return err;
1668}
1669
1670void mlx5e_close_cq(struct mlx5e_cq *cq)
1671{
1672        mlx5e_destroy_cq(cq);
1673        mlx5e_free_cq(cq);
1674}
1675
1676static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1677                             struct mlx5e_params *params,
1678                             struct mlx5e_create_cq_param *ccp,
1679                             struct mlx5e_channel_param *cparam)
1680{
1681        int err;
1682        int tc;
1683
1684        for (tc = 0; tc < c->num_tc; tc++) {
1685                err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->txq_sq.cqp,
1686                                    ccp, &c->sq[tc].cq);
1687                if (err)
1688                        goto err_close_tx_cqs;
1689        }
1690
1691        return 0;
1692
1693err_close_tx_cqs:
1694        for (tc--; tc >= 0; tc--)
1695                mlx5e_close_cq(&c->sq[tc].cq);
1696
1697        return err;
1698}
1699
1700static void mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1701{
1702        int tc;
1703
1704        for (tc = 0; tc < c->num_tc; tc++)
1705                mlx5e_close_cq(&c->sq[tc].cq);
1706}
1707
1708static int mlx5e_open_sqs(struct mlx5e_channel *c,
1709                          struct mlx5e_params *params,
1710                          struct mlx5e_channel_param *cparam)
1711{
1712        int err, tc;
1713
1714        for (tc = 0; tc < mlx5e_get_dcb_num_tc(params); tc++) {
1715                int txq_ix = c->ix + tc * params->num_channels;
1716
1717                err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
1718                                       params, &cparam->txq_sq, &c->sq[tc], tc, 0, 0);
1719                if (err)
1720                        goto err_close_sqs;
1721        }
1722
1723        return 0;
1724
1725err_close_sqs:
1726        for (tc--; tc >= 0; tc--)
1727                mlx5e_close_txqsq(&c->sq[tc]);
1728
1729        return err;
1730}
1731
1732static void mlx5e_close_sqs(struct mlx5e_channel *c)
1733{
1734        int tc;
1735
1736        for (tc = 0; tc < c->num_tc; tc++)
1737                mlx5e_close_txqsq(&c->sq[tc]);
1738}
1739
1740static int mlx5e_set_sq_maxrate(struct net_device *dev,
1741                                struct mlx5e_txqsq *sq, u32 rate)
1742{
1743        struct mlx5e_priv *priv = netdev_priv(dev);
1744        struct mlx5_core_dev *mdev = priv->mdev;
1745        struct mlx5e_modify_sq_param msp = {0};
1746        struct mlx5_rate_limit rl = {0};
1747        u16 rl_index = 0;
1748        int err;
1749
1750        if (rate == sq->rate_limit)
1751                /* nothing to do */
1752                return 0;
1753
1754        if (sq->rate_limit) {
1755                rl.rate = sq->rate_limit;
1756                /* remove current rl index to free space to next ones */
1757                mlx5_rl_remove_rate(mdev, &rl);
1758        }
1759
1760        sq->rate_limit = 0;
1761
1762        if (rate) {
1763                rl.rate = rate;
1764                err = mlx5_rl_add_rate(mdev, &rl_index, &rl);
1765                if (err) {
1766                        netdev_err(dev, "Failed configuring rate %u: %d\n",
1767                                   rate, err);
1768                        return err;
1769                }
1770        }
1771
1772        msp.curr_state = MLX5_SQC_STATE_RDY;
1773        msp.next_state = MLX5_SQC_STATE_RDY;
1774        msp.rl_index   = rl_index;
1775        msp.rl_update  = true;
1776        err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
1777        if (err) {
1778                netdev_err(dev, "Failed configuring rate %u: %d\n",
1779                           rate, err);
1780                /* remove the rate from the table */
1781                if (rate)
1782                        mlx5_rl_remove_rate(mdev, &rl);
1783                return err;
1784        }
1785
1786        sq->rate_limit = rate;
1787        return 0;
1788}
1789
1790static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
1791{
1792        struct mlx5e_priv *priv = netdev_priv(dev);
1793        struct mlx5_core_dev *mdev = priv->mdev;
1794        struct mlx5e_txqsq *sq = priv->txq2sq[index];
1795        int err = 0;
1796
1797        if (!mlx5_rl_is_supported(mdev)) {
1798                netdev_err(dev, "Rate limiting is not supported on this device\n");
1799                return -EINVAL;
1800        }
1801
1802        /* rate is given in Mb/sec, HW config is in Kb/sec */
1803        rate = rate << 10;
1804
1805        /* Check whether rate in valid range, 0 is always valid */
1806        if (rate && !mlx5_rl_is_in_range(mdev, rate)) {
1807                netdev_err(dev, "TX rate %u, is not in range\n", rate);
1808                return -ERANGE;
1809        }
1810
1811        mutex_lock(&priv->state_lock);
1812        if (test_bit(MLX5E_STATE_OPENED, &priv->state))
1813                err = mlx5e_set_sq_maxrate(dev, sq, rate);
1814        if (!err)
1815                priv->tx_rates[index] = rate;
1816        mutex_unlock(&priv->state_lock);
1817
1818        return err;
1819}
1820
1821static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
1822                             struct mlx5e_rq_param *rq_params)
1823{
1824        int err;
1825
1826        err = mlx5e_init_rxq_rq(c, params, &c->rq);
1827        if (err)
1828                return err;
1829
1830        return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), &c->rq);
1831}
1832
1833static int mlx5e_open_queues(struct mlx5e_channel *c,
1834                             struct mlx5e_params *params,
1835                             struct mlx5e_channel_param *cparam)
1836{
1837        struct dim_cq_moder icocq_moder = {0, 0};
1838        struct mlx5e_create_cq_param ccp;
1839        int err;
1840
1841        mlx5e_build_create_cq_param(&ccp, c);
1842
1843        err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp,
1844                            &c->async_icosq.cq);
1845        if (err)
1846                return err;
1847
1848        err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp,
1849                            &c->icosq.cq);
1850        if (err)
1851                goto err_close_async_icosq_cq;
1852
1853        err = mlx5e_open_tx_cqs(c, params, &ccp, cparam);
1854        if (err)
1855                goto err_close_icosq_cq;
1856
1857        err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp, &ccp,
1858                            &c->xdpsq.cq);
1859        if (err)
1860                goto err_close_tx_cqs;
1861
1862        err = mlx5e_open_cq(c->priv, params->rx_cq_moderation, &cparam->rq.cqp, &ccp,
1863                            &c->rq.cq);
1864        if (err)
1865                goto err_close_xdp_tx_cqs;
1866
1867        err = c->xdp ? mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp,
1868                                     &ccp, &c->rq_xdpsq.cq) : 0;
1869        if (err)
1870                goto err_close_rx_cq;
1871
1872        spin_lock_init(&c->async_icosq_lock);
1873
1874        err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq);
1875        if (err)
1876                goto err_close_xdpsq_cq;
1877
1878        err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq);
1879        if (err)
1880                goto err_close_async_icosq;
1881
1882        err = mlx5e_open_sqs(c, params, cparam);
1883        if (err)
1884                goto err_close_icosq;
1885
1886        err = mlx5e_open_rxq_rq(c, params, &cparam->rq);
1887        if (err)
1888                goto err_close_sqs;
1889
1890        if (c->xdp) {
1891                err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL,
1892                                       &c->rq_xdpsq, false);
1893                if (err)
1894                        goto err_close_rq;
1895        }
1896
1897        err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true);
1898        if (err)
1899                goto err_close_xdp_sq;
1900
1901        return 0;
1902
1903err_close_xdp_sq:
1904        if (c->xdp)
1905                mlx5e_close_xdpsq(&c->rq_xdpsq);
1906
1907err_close_rq:
1908        mlx5e_close_rq(&c->rq);
1909
1910err_close_sqs:
1911        mlx5e_close_sqs(c);
1912
1913err_close_icosq:
1914        mlx5e_close_icosq(&c->icosq);
1915
1916err_close_async_icosq:
1917        mlx5e_close_icosq(&c->async_icosq);
1918
1919err_close_xdpsq_cq:
1920        if (c->xdp)
1921                mlx5e_close_cq(&c->rq_xdpsq.cq);
1922
1923err_close_rx_cq:
1924        mlx5e_close_cq(&c->rq.cq);
1925
1926err_close_xdp_tx_cqs:
1927        mlx5e_close_cq(&c->xdpsq.cq);
1928
1929err_close_tx_cqs:
1930        mlx5e_close_tx_cqs(c);
1931
1932err_close_icosq_cq:
1933        mlx5e_close_cq(&c->icosq.cq);
1934
1935err_close_async_icosq_cq:
1936        mlx5e_close_cq(&c->async_icosq.cq);
1937
1938        return err;
1939}
1940
1941static void mlx5e_close_queues(struct mlx5e_channel *c)
1942{
1943        mlx5e_close_xdpsq(&c->xdpsq);
1944        if (c->xdp)
1945                mlx5e_close_xdpsq(&c->rq_xdpsq);
1946        mlx5e_close_rq(&c->rq);
1947        mlx5e_close_sqs(c);
1948        mlx5e_close_icosq(&c->icosq);
1949        mlx5e_close_icosq(&c->async_icosq);
1950        if (c->xdp)
1951                mlx5e_close_cq(&c->rq_xdpsq.cq);
1952        mlx5e_close_cq(&c->rq.cq);
1953        mlx5e_close_cq(&c->xdpsq.cq);
1954        mlx5e_close_tx_cqs(c);
1955        mlx5e_close_cq(&c->icosq.cq);
1956        mlx5e_close_cq(&c->async_icosq.cq);
1957}
1958
1959static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix)
1960{
1961        u16 port_aff_bias = mlx5_core_is_pf(mdev) ? 0 : MLX5_CAP_GEN(mdev, vhca_id);
1962
1963        return (ix + port_aff_bias) % mlx5e_get_num_lag_ports(mdev);
1964}
1965
1966static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1967                              struct mlx5e_params *params,
1968                              struct mlx5e_channel_param *cparam,
1969                              struct xsk_buff_pool *xsk_pool,
1970                              struct mlx5e_channel **cp)
1971{
1972        int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
1973        struct net_device *netdev = priv->netdev;
1974        struct mlx5e_xsk_param xsk;
1975        struct mlx5e_channel *c;
1976        unsigned int irq;
1977        int err;
1978
1979        err = mlx5_vector2irqn(priv->mdev, ix, &irq);
1980        if (err)
1981                return err;
1982
1983        c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
1984        if (!c)
1985                return -ENOMEM;
1986
1987        c->priv     = priv;
1988        c->mdev     = priv->mdev;
1989        c->tstamp   = &priv->tstamp;
1990        c->ix       = ix;
1991        c->cpu      = cpu;
1992        c->pdev     = mlx5_core_dma_dev(priv->mdev);
1993        c->netdev   = priv->netdev;
1994        c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
1995        c->num_tc   = mlx5e_get_dcb_num_tc(params);
1996        c->xdp      = !!params->xdp_prog;
1997        c->stats    = &priv->channel_stats[ix].ch;
1998        c->aff_mask = irq_get_effective_affinity_mask(irq);
1999        c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix);
2000
2001        netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
2002
2003        err = mlx5e_open_queues(c, params, cparam);
2004        if (unlikely(err))
2005                goto err_napi_del;
2006
2007        if (xsk_pool) {
2008                mlx5e_build_xsk_param(xsk_pool, &xsk);
2009                err = mlx5e_open_xsk(priv, params, &xsk, xsk_pool, c);
2010                if (unlikely(err))
2011                        goto err_close_queues;
2012        }
2013
2014        *cp = c;
2015
2016        return 0;
2017
2018err_close_queues:
2019        mlx5e_close_queues(c);
2020
2021err_napi_del:
2022        netif_napi_del(&c->napi);
2023
2024        kvfree(c);
2025
2026        return err;
2027}
2028
2029static void mlx5e_activate_channel(struct mlx5e_channel *c)
2030{
2031        int tc;
2032
2033        napi_enable(&c->napi);
2034
2035        for (tc = 0; tc < c->num_tc; tc++)
2036                mlx5e_activate_txqsq(&c->sq[tc]);
2037        mlx5e_activate_icosq(&c->icosq);
2038        mlx5e_activate_icosq(&c->async_icosq);
2039        mlx5e_activate_rq(&c->rq);
2040
2041        if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
2042                mlx5e_activate_xsk(c);
2043}
2044
2045static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
2046{
2047        int tc;
2048
2049        if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
2050                mlx5e_deactivate_xsk(c);
2051
2052        mlx5e_deactivate_rq(&c->rq);
2053        mlx5e_deactivate_icosq(&c->async_icosq);
2054        mlx5e_deactivate_icosq(&c->icosq);
2055        for (tc = 0; tc < c->num_tc; tc++)
2056                mlx5e_deactivate_txqsq(&c->sq[tc]);
2057        mlx5e_qos_deactivate_queues(c);
2058
2059        napi_disable(&c->napi);
2060}
2061
2062static void mlx5e_close_channel(struct mlx5e_channel *c)
2063{
2064        if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
2065                mlx5e_close_xsk(c);
2066        mlx5e_close_queues(c);
2067        mlx5e_qos_close_queues(c);
2068        netif_napi_del(&c->napi);
2069
2070        kvfree(c);
2071}
2072
2073int mlx5e_open_channels(struct mlx5e_priv *priv,
2074                        struct mlx5e_channels *chs)
2075{
2076        struct mlx5e_channel_param *cparam;
2077        int err = -ENOMEM;
2078        int i;
2079
2080        chs->num = chs->params.num_channels;
2081
2082        chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL);
2083        cparam = kvzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL);
2084        if (!chs->c || !cparam)
2085                goto err_free;
2086
2087        err = mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam);
2088        if (err)
2089                goto err_free;
2090
2091        for (i = 0; i < chs->num; i++) {
2092                struct xsk_buff_pool *xsk_pool = NULL;
2093
2094                if (chs->params.xdp_prog)
2095                        xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i);
2096
2097                err = mlx5e_open_channel(priv, i, &chs->params, cparam, xsk_pool, &chs->c[i]);
2098                if (err)
2099                        goto err_close_channels;
2100        }
2101
2102        if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || chs->params.ptp_rx) {
2103                err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp);
2104                if (err)
2105                        goto err_close_channels;
2106        }
2107
2108        err = mlx5e_qos_open_queues(priv, chs);
2109        if (err)
2110                goto err_close_ptp;
2111
2112        mlx5e_health_channels_update(priv);
2113        kvfree(cparam);
2114        return 0;
2115
2116err_close_ptp:
2117        if (chs->ptp)
2118                mlx5e_ptp_close(chs->ptp);
2119
2120err_close_channels:
2121        for (i--; i >= 0; i--)
2122                mlx5e_close_channel(chs->c[i]);
2123
2124err_free:
2125        kfree(chs->c);
2126        kvfree(cparam);
2127        chs->num = 0;
2128        return err;
2129}
2130
2131static void mlx5e_activate_channels(struct mlx5e_channels *chs)
2132{
2133        int i;
2134
2135        for (i = 0; i < chs->num; i++)
2136                mlx5e_activate_channel(chs->c[i]);
2137
2138        if (chs->ptp)
2139                mlx5e_ptp_activate_channel(chs->ptp);
2140}
2141
2142#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */
2143
2144static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
2145{
2146        int err = 0;
2147        int i;
2148
2149        for (i = 0; i < chs->num; i++) {
2150                int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT;
2151
2152                err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, timeout);
2153
2154                /* Don't wait on the XSK RQ, because the newer xdpsock sample
2155                 * doesn't provide any Fill Ring entries at the setup stage.
2156                 */
2157        }
2158
2159        return err ? -ETIMEDOUT : 0;
2160}
2161
2162static void mlx5e_deactivate_channels(struct mlx5e_channels *chs)
2163{
2164        int i;
2165
2166        if (chs->ptp)
2167                mlx5e_ptp_deactivate_channel(chs->ptp);
2168
2169        for (i = 0; i < chs->num; i++)
2170                mlx5e_deactivate_channel(chs->c[i]);
2171}
2172
2173void mlx5e_close_channels(struct mlx5e_channels *chs)
2174{
2175        int i;
2176
2177        if (chs->ptp) {
2178                mlx5e_ptp_close(chs->ptp);
2179                chs->ptp = NULL;
2180        }
2181        for (i = 0; i < chs->num; i++)
2182                mlx5e_close_channel(chs->c[i]);
2183
2184        kfree(chs->c);
2185        chs->num = 0;
2186}
2187
2188static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
2189{
2190        struct mlx5e_rx_res *res = priv->rx_res;
2191        struct mlx5e_lro_param lro_param;
2192
2193        lro_param = mlx5e_get_lro_param(&priv->channels.params);
2194
2195        return mlx5e_rx_res_lro_set_param(res, &lro_param);
2196}
2197
2198static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro);
2199
2200static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
2201                         struct mlx5e_params *params, u16 mtu)
2202{
2203        u16 hw_mtu = MLX5E_SW2HW_MTU(params, mtu);
2204        int err;
2205
2206        err = mlx5_set_port_mtu(mdev, hw_mtu, 1);
2207        if (err)
2208                return err;
2209
2210        /* Update vport context MTU */
2211        mlx5_modify_nic_vport_mtu(mdev, hw_mtu);
2212        return 0;
2213}
2214
2215static void mlx5e_query_mtu(struct mlx5_core_dev *mdev,
2216                            struct mlx5e_params *params, u16 *mtu)
2217{
2218        u16 hw_mtu = 0;
2219        int err;
2220
2221        err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
2222        if (err || !hw_mtu) /* fallback to port oper mtu */
2223                mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1);
2224
2225        *mtu = MLX5E_HW2SW_MTU(params, hw_mtu);
2226}
2227
2228int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
2229{
2230        struct mlx5e_params *params = &priv->channels.params;
2231        struct net_device *netdev = priv->netdev;
2232        struct mlx5_core_dev *mdev = priv->mdev;
2233        u16 mtu;
2234        int err;
2235
2236        err = mlx5e_set_mtu(mdev, params, params->sw_mtu);
2237        if (err)
2238                return err;
2239
2240        mlx5e_query_mtu(mdev, params, &mtu);
2241        if (mtu != params->sw_mtu)
2242                netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n",
2243                            __func__, mtu, params->sw_mtu);
2244
2245        params->sw_mtu = mtu;
2246        return 0;
2247}
2248
2249MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_set_dev_port_mtu);
2250
2251void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv)
2252{
2253        struct mlx5e_params *params = &priv->channels.params;
2254        struct net_device *netdev   = priv->netdev;
2255        struct mlx5_core_dev *mdev  = priv->mdev;
2256        u16 max_mtu;
2257
2258        /* MTU range: 68 - hw-specific max */
2259        netdev->min_mtu = ETH_MIN_MTU;
2260
2261        mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
2262        netdev->max_mtu = min_t(unsigned int, MLX5E_HW2SW_MTU(params, max_mtu),
2263                                ETH_MAX_MTU);
2264}
2265
2266static int mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc,
2267                                struct netdev_tc_txq *tc_to_txq)
2268{
2269        int tc, err;
2270
2271        netdev_reset_tc(netdev);
2272
2273        if (ntc == 1)
2274                return 0;
2275
2276        err = netdev_set_num_tc(netdev, ntc);
2277        if (err) {
2278                netdev_WARN(netdev, "netdev_set_num_tc failed (%d), ntc = %d\n", err, ntc);
2279                return err;
2280        }
2281
2282        for (tc = 0; tc < ntc; tc++) {
2283                u16 count, offset;
2284
2285                count = tc_to_txq[tc].count;
2286                offset = tc_to_txq[tc].offset;
2287                netdev_set_tc_queue(netdev, tc, count, offset);
2288        }
2289
2290        return 0;
2291}
2292
2293int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv)
2294{
2295        int qos_queues, nch, ntc, num_txqs, err;
2296
2297        qos_queues = mlx5e_qos_cur_leaf_nodes(priv);
2298
2299        nch = priv->channels.params.num_channels;
2300        ntc = mlx5e_get_dcb_num_tc(&priv->channels.params);
2301        num_txqs = nch * ntc + qos_queues;
2302        if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS))
2303                num_txqs += ntc;
2304
2305        mlx5e_dbg(DRV, priv, "Setting num_txqs %d\n", num_txqs);
2306        err = netif_set_real_num_tx_queues(priv->netdev, num_txqs);
2307        if (err)
2308                netdev_warn(priv->netdev, "netif_set_real_num_tx_queues failed, %d\n", err);
2309
2310        return err;
2311}
2312
2313static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
2314{
2315        struct netdev_tc_txq old_tc_to_txq[TC_MAX_QUEUE], *tc_to_txq;
2316        struct net_device *netdev = priv->netdev;
2317        int old_num_txqs, old_ntc;
2318        int num_rxqs, nch, ntc;
2319        int err;
2320        int i;
2321
2322        old_num_txqs = netdev->real_num_tx_queues;
2323        old_ntc = netdev->num_tc ? : 1;
2324        for (i = 0; i < ARRAY_SIZE(old_tc_to_txq); i++)
2325                old_tc_to_txq[i] = netdev->tc_to_txq[i];
2326
2327        nch = priv->channels.params.num_channels;
2328        ntc = priv->channels.params.mqprio.num_tc;
2329        num_rxqs = nch * priv->profile->rq_groups;
2330        tc_to_txq = priv->channels.params.mqprio.tc_to_txq;
2331
2332        err = mlx5e_netdev_set_tcs(netdev, nch, ntc, tc_to_txq);
2333        if (err)
2334                goto err_out;
2335        err = mlx5e_update_tx_netdev_queues(priv);
2336        if (err)
2337                goto err_tcs;
2338        err = netif_set_real_num_rx_queues(netdev, num_rxqs);
2339        if (err) {
2340                netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err);
2341                goto err_txqs;
2342        }
2343
2344        return 0;
2345
2346err_txqs:
2347        /* netif_set_real_num_rx_queues could fail only when nch increased. Only
2348         * one of nch and ntc is changed in this function. That means, the call
2349         * to netif_set_real_num_tx_queues below should not fail, because it
2350         * decreases the number of TX queues.
2351         */
2352        WARN_ON_ONCE(netif_set_real_num_tx_queues(netdev, old_num_txqs));
2353
2354err_tcs:
2355        WARN_ON_ONCE(mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc,
2356                                          old_tc_to_txq));
2357err_out:
2358        return err;
2359}
2360
2361static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_update_netdev_queues);
2362
2363static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv,
2364                                           struct mlx5e_params *params)
2365{
2366        struct mlx5_core_dev *mdev = priv->mdev;
2367        int num_comp_vectors, ix, irq;
2368
2369        num_comp_vectors = mlx5_comp_vectors_count(mdev);
2370
2371        for (ix = 0; ix < params->num_channels; ix++) {
2372                cpumask_clear(priv->scratchpad.cpumask);
2373
2374                for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) {
2375                        int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(mdev, irq));
2376
2377                        cpumask_set_cpu(cpu, priv->scratchpad.cpumask);
2378                }
2379
2380                netif_set_xps_queue(priv->netdev, priv->scratchpad.cpumask, ix);
2381        }
2382}
2383
2384int mlx5e_num_channels_changed(struct mlx5e_priv *priv)
2385{
2386        u16 count = priv->channels.params.num_channels;
2387        int err;
2388
2389        err = mlx5e_update_netdev_queues(priv);
2390        if (err)
2391                return err;
2392
2393        mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params);
2394
2395        /* This function may be called on attach, before priv->rx_res is created. */
2396        if (!netif_is_rxfh_configured(priv->netdev) && priv->rx_res)
2397                mlx5e_rx_res_rss_set_indir_uniform(priv->rx_res, count);
2398
2399        return 0;
2400}
2401
2402MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_num_channels_changed);
2403
2404static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
2405{
2406        int i, ch, tc, num_tc;
2407
2408        ch = priv->channels.num;
2409        num_tc = mlx5e_get_dcb_num_tc(&priv->channels.params);
2410
2411        for (i = 0; i < ch; i++) {
2412                for (tc = 0; tc < num_tc; tc++) {
2413                        struct mlx5e_channel *c = priv->channels.c[i];
2414                        struct mlx5e_txqsq *sq = &c->sq[tc];
2415
2416                        priv->txq2sq[sq->txq_ix] = sq;
2417                        priv->channel_tc2realtxq[i][tc] = i + tc * ch;
2418                }
2419        }
2420
2421        if (!priv->channels.ptp)
2422                return;
2423
2424        if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state))
2425                return;
2426
2427        for (tc = 0; tc < num_tc; tc++) {
2428                struct mlx5e_ptp *c = priv->channels.ptp;
2429                struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq;
2430
2431                priv->txq2sq[sq->txq_ix] = sq;
2432                priv->port_ptp_tc2realtxq[tc] = priv->num_tc_x_num_ch + tc;
2433        }
2434}
2435
2436static void mlx5e_update_num_tc_x_num_ch(struct mlx5e_priv *priv)
2437{
2438        /* Sync with mlx5e_select_queue. */
2439        WRITE_ONCE(priv->num_tc_x_num_ch,
2440                   mlx5e_get_dcb_num_tc(&priv->channels.params) * priv->channels.num);
2441}
2442
2443void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
2444{
2445        mlx5e_update_num_tc_x_num_ch(priv);
2446        mlx5e_build_txq_maps(priv);
2447        mlx5e_activate_channels(&priv->channels);
2448        mlx5e_qos_activate_queues(priv);
2449        mlx5e_xdp_tx_enable(priv);
2450        netif_tx_start_all_queues(priv->netdev);
2451
2452        if (mlx5e_is_vport_rep(priv))
2453                mlx5e_add_sqs_fwd_rules(priv);
2454
2455        mlx5e_wait_channels_min_rx_wqes(&priv->channels);
2456
2457        if (priv->rx_res)
2458                mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels);
2459}
2460
2461void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
2462{
2463        if (priv->rx_res)
2464                mlx5e_rx_res_channels_deactivate(priv->rx_res);
2465
2466        if (mlx5e_is_vport_rep(priv))
2467                mlx5e_remove_sqs_fwd_rules(priv);
2468
2469        /* FIXME: This is a W/A only for tx timeout watch dog false alarm when
2470         * polling for inactive tx queues.
2471         */
2472        netif_tx_stop_all_queues(priv->netdev);
2473        netif_tx_disable(priv->netdev);
2474        mlx5e_xdp_tx_disable(priv);
2475        mlx5e_deactivate_channels(&priv->channels);
2476}
2477
2478static int mlx5e_switch_priv_params(struct mlx5e_priv *priv,
2479                                    struct mlx5e_params *new_params,
2480                                    mlx5e_fp_preactivate preactivate,
2481                                    void *context)
2482{
2483        struct mlx5e_params old_params;
2484
2485        old_params = priv->channels.params;
2486        priv->channels.params = *new_params;
2487
2488        if (preactivate) {
2489                int err;
2490
2491                err = preactivate(priv, context);
2492                if (err) {
2493                        priv->channels.params = old_params;
2494                        return err;
2495                }
2496        }
2497
2498        return 0;
2499}
2500
2501static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
2502                                      struct mlx5e_channels *new_chs,
2503                                      mlx5e_fp_preactivate preactivate,
2504                                      void *context)
2505{
2506        struct net_device *netdev = priv->netdev;
2507        struct mlx5e_channels old_chs;
2508        int carrier_ok;
2509        int err = 0;
2510
2511        carrier_ok = netif_carrier_ok(netdev);
2512        netif_carrier_off(netdev);
2513
2514        mlx5e_deactivate_priv_channels(priv);
2515
2516        old_chs = priv->channels;
2517        priv->channels = *new_chs;
2518
2519        /* New channels are ready to roll, call the preactivate hook if needed
2520         * to modify HW settings or update kernel parameters.
2521         */
2522        if (preactivate) {
2523                err = preactivate(priv, context);
2524                if (err) {
2525                        priv->channels = old_chs;
2526                        goto out;
2527                }
2528        }
2529
2530        mlx5e_close_channels(&old_chs);
2531        priv->profile->update_rx(priv);
2532
2533out:
2534        mlx5e_activate_priv_channels(priv);
2535
2536        /* return carrier back if needed */
2537        if (carrier_ok)
2538                netif_carrier_on(netdev);
2539
2540        return err;
2541}
2542
2543int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
2544                             struct mlx5e_params *params,
2545                             mlx5e_fp_preactivate preactivate,
2546                             void *context, bool reset)
2547{
2548        struct mlx5e_channels new_chs = {};
2549        int err;
2550
2551        reset &= test_bit(MLX5E_STATE_OPENED, &priv->state);
2552        if (!reset)
2553                return mlx5e_switch_priv_params(priv, params, preactivate, context);
2554
2555        new_chs.params = *params;
2556        err = mlx5e_open_channels(priv, &new_chs);
2557        if (err)
2558                return err;
2559        err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context);
2560        if (err)
2561                mlx5e_close_channels(&new_chs);
2562
2563        return err;
2564}
2565
2566int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv)
2567{
2568        return mlx5e_safe_switch_params(priv, &priv->channels.params, NULL, NULL, true);
2569}
2570
2571void mlx5e_timestamp_init(struct mlx5e_priv *priv)
2572{
2573        priv->tstamp.tx_type   = HWTSTAMP_TX_OFF;
2574        priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE;
2575}
2576
2577static void mlx5e_modify_admin_state(struct mlx5_core_dev *mdev,
2578                                     enum mlx5_port_status state)
2579{
2580        struct mlx5_eswitch *esw = mdev->priv.eswitch;
2581        int vport_admin_state;
2582
2583        mlx5_set_port_admin_status(mdev, state);
2584
2585        if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS ||
2586            !MLX5_CAP_GEN(mdev, uplink_follow))
2587                return;
2588
2589        if (state == MLX5_PORT_UP)
2590                vport_admin_state = MLX5_VPORT_ADMIN_STATE_AUTO;
2591        else
2592                vport_admin_state = MLX5_VPORT_ADMIN_STATE_DOWN;
2593
2594        mlx5_eswitch_set_vport_state(esw, MLX5_VPORT_UPLINK, vport_admin_state);
2595}
2596
2597int mlx5e_open_locked(struct net_device *netdev)
2598{
2599        struct mlx5e_priv *priv = netdev_priv(netdev);
2600        int err;
2601
2602        set_bit(MLX5E_STATE_OPENED, &priv->state);
2603
2604        err = mlx5e_open_channels(priv, &priv->channels);
2605        if (err)
2606                goto err_clear_state_opened_flag;
2607
2608        priv->profile->update_rx(priv);
2609        mlx5e_activate_priv_channels(priv);
2610        mlx5e_apply_traps(priv, true);
2611        if (priv->profile->update_carrier)
2612                priv->profile->update_carrier(priv);
2613
2614        mlx5e_queue_update_stats(priv);
2615        return 0;
2616
2617err_clear_state_opened_flag:
2618        clear_bit(MLX5E_STATE_OPENED, &priv->state);
2619        return err;
2620}
2621
2622int mlx5e_open(struct net_device *netdev)
2623{
2624        struct mlx5e_priv *priv = netdev_priv(netdev);
2625        int err;
2626
2627        mutex_lock(&priv->state_lock);
2628        err = mlx5e_open_locked(netdev);
2629        if (!err)
2630                mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_UP);
2631        mutex_unlock(&priv->state_lock);
2632
2633        return err;
2634}
2635
2636int mlx5e_close_locked(struct net_device *netdev)
2637{
2638        struct mlx5e_priv *priv = netdev_priv(netdev);
2639
2640        /* May already be CLOSED in case a previous configuration operation
2641         * (e.g RX/TX queue size change) that involves close&open failed.
2642         */
2643        if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
2644                return 0;
2645
2646        mlx5e_apply_traps(priv, false);
2647        clear_bit(MLX5E_STATE_OPENED, &priv->state);
2648
2649        netif_carrier_off(priv->netdev);
2650        mlx5e_deactivate_priv_channels(priv);
2651        mlx5e_close_channels(&priv->channels);
2652
2653        return 0;
2654}
2655
2656int mlx5e_close(struct net_device *netdev)
2657{
2658        struct mlx5e_priv *priv = netdev_priv(netdev);
2659        int err;
2660
2661        if (!netif_device_present(netdev))
2662                return -ENODEV;
2663
2664        mutex_lock(&priv->state_lock);
2665        mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_DOWN);
2666        err = mlx5e_close_locked(netdev);
2667        mutex_unlock(&priv->state_lock);
2668
2669        return err;
2670}
2671
2672static void mlx5e_free_drop_rq(struct mlx5e_rq *rq)
2673{
2674        mlx5_wq_destroy(&rq->wq_ctrl);
2675}
2676
2677static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev,
2678                               struct mlx5e_rq *rq,
2679                               struct mlx5e_rq_param *param)
2680{
2681        void *rqc = param->rqc;
2682        void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
2683        int err;
2684
2685        param->wq.db_numa_node = param->wq.buf_numa_node;
2686
2687        err = mlx5_wq_cyc_create(mdev, &param->wq, rqc_wq, &rq->wqe.wq,
2688                                 &rq->wq_ctrl);
2689        if (err)
2690                return err;
2691
2692        /* Mark as unused given "Drop-RQ" packets never reach XDP */
2693        xdp_rxq_info_unused(&rq->xdp_rxq);
2694
2695        rq->mdev = mdev;
2696
2697        return 0;
2698}
2699
2700static int mlx5e_alloc_drop_cq(struct mlx5e_priv *priv,
2701                               struct mlx5e_cq *cq,
2702                               struct mlx5e_cq_param *param)
2703{
2704        struct mlx5_core_dev *mdev = priv->mdev;
2705
2706        param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
2707        param->wq.db_numa_node  = dev_to_node(mlx5_core_dma_dev(mdev));
2708
2709        return mlx5e_alloc_cq_common(priv, param, cq);
2710}
2711
2712int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
2713                       struct mlx5e_rq *drop_rq)
2714{
2715        struct mlx5_core_dev *mdev = priv->mdev;
2716        struct mlx5e_cq_param cq_param = {};
2717        struct mlx5e_rq_param rq_param = {};
2718        struct mlx5e_cq *cq = &drop_rq->cq;
2719        int err;
2720
2721        mlx5e_build_drop_rq_param(mdev, priv->drop_rq_q_counter, &rq_param);
2722
2723        err = mlx5e_alloc_drop_cq(priv, cq, &cq_param);
2724        if (err)
2725                return err;
2726
2727        err = mlx5e_create_cq(cq, &cq_param);
2728        if (err)
2729                goto err_free_cq;
2730
2731        err = mlx5e_alloc_drop_rq(mdev, drop_rq, &rq_param);
2732        if (err)
2733                goto err_destroy_cq;
2734
2735        err = mlx5e_create_rq(drop_rq, &rq_param);
2736        if (err)
2737                goto err_free_rq;
2738
2739        err = mlx5e_modify_rq_state(drop_rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
2740        if (err)
2741                mlx5_core_warn(priv->mdev, "modify_rq_state failed, rx_if_down_packets won't be counted %d\n", err);
2742
2743        return 0;
2744
2745err_free_rq:
2746        mlx5e_free_drop_rq(drop_rq);
2747
2748err_destroy_cq:
2749        mlx5e_destroy_cq(cq);
2750
2751err_free_cq:
2752        mlx5e_free_cq(cq);
2753
2754        return err;
2755}
2756
2757void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
2758{
2759        mlx5e_destroy_rq(drop_rq);
2760        mlx5e_free_drop_rq(drop_rq);
2761        mlx5e_destroy_cq(&drop_rq->cq);
2762        mlx5e_free_cq(&drop_rq->cq);
2763}
2764
2765int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn)
2766{
2767        void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
2768
2769        MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
2770
2771        if (MLX5_GET(tisc, tisc, tls_en))
2772                MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn);
2773
2774        if (mlx5_lag_is_lacp_owner(mdev))
2775                MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
2776
2777        return mlx5_core_create_tis(mdev, in, tisn);
2778}
2779
2780void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn)
2781{
2782        mlx5_core_destroy_tis(mdev, tisn);
2783}
2784
2785void mlx5e_destroy_tises(struct mlx5e_priv *priv)
2786{
2787        int tc, i;
2788
2789        for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++)
2790                for (tc = 0; tc < priv->profile->max_tc; tc++)
2791                        mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]);
2792}
2793
2794static bool mlx5e_lag_should_assign_affinity(struct mlx5_core_dev *mdev)
2795{
2796        return MLX5_CAP_GEN(mdev, lag_tx_port_affinity) && mlx5e_get_num_lag_ports(mdev) > 1;
2797}
2798
2799int mlx5e_create_tises(struct mlx5e_priv *priv)
2800{
2801        int tc, i;
2802        int err;
2803
2804        for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++) {
2805                for (tc = 0; tc < priv->profile->max_tc; tc++) {
2806                        u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
2807                        void *tisc;
2808
2809                        tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
2810
2811                        MLX5_SET(tisc, tisc, prio, tc << 1);
2812
2813                        if (mlx5e_lag_should_assign_affinity(priv->mdev))
2814                                MLX5_SET(tisc, tisc, lag_tx_port_affinity, i + 1);
2815
2816                        err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[i][tc]);
2817                        if (err)
2818                                goto err_close_tises;
2819                }
2820        }
2821
2822        return 0;
2823
2824err_close_tises:
2825        for (; i >= 0; i--) {
2826                for (tc--; tc >= 0; tc--)
2827                        mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]);
2828                tc = priv->profile->max_tc;
2829        }
2830
2831        return err;
2832}
2833
2834static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
2835{
2836        mlx5e_destroy_tises(priv);
2837}
2838
2839static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
2840{
2841        int err = 0;
2842        int i;
2843
2844        for (i = 0; i < chs->num; i++) {
2845                err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable);
2846                if (err)
2847                        return err;
2848        }
2849
2850        return 0;
2851}
2852
2853static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
2854{
2855        int err;
2856        int i;
2857
2858        for (i = 0; i < chs->num; i++) {
2859                err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd);
2860                if (err)
2861                        return err;
2862        }
2863        if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state))
2864                return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd);
2865
2866        return 0;
2867}
2868
2869static void mlx5e_mqprio_build_default_tc_to_txq(struct netdev_tc_txq *tc_to_txq,
2870                                                 int ntc, int nch)
2871{
2872        int tc;
2873
2874        memset(tc_to_txq, 0, sizeof(*tc_to_txq) * TC_MAX_QUEUE);
2875
2876        /* Map netdev TCs to offset 0.
2877         * We have our own UP to TXQ mapping for DCB mode of QoS
2878         */
2879        for (tc = 0; tc < ntc; tc++) {
2880                tc_to_txq[tc] = (struct netdev_tc_txq) {
2881                        .count = nch,
2882                        .offset = 0,
2883                };
2884        }
2885}
2886
2887static void mlx5e_mqprio_build_tc_to_txq(struct netdev_tc_txq *tc_to_txq,
2888                                         struct tc_mqprio_qopt *qopt)
2889{
2890        int tc;
2891
2892        for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
2893                tc_to_txq[tc] = (struct netdev_tc_txq) {
2894                        .count = qopt->count[tc],
2895                        .offset = qopt->offset[tc],
2896                };
2897        }
2898}
2899
2900static void mlx5e_params_mqprio_dcb_set(struct mlx5e_params *params, u8 num_tc)
2901{
2902        params->mqprio.mode = TC_MQPRIO_MODE_DCB;
2903        params->mqprio.num_tc = num_tc;
2904        mlx5e_mqprio_build_default_tc_to_txq(params->mqprio.tc_to_txq, num_tc,
2905                                             params->num_channels);
2906}
2907
2908static void mlx5e_params_mqprio_channel_set(struct mlx5e_params *params,
2909                                            struct tc_mqprio_qopt *qopt)
2910{
2911        params->mqprio.mode = TC_MQPRIO_MODE_CHANNEL;
2912        params->mqprio.num_tc = qopt->num_tc;
2913        mlx5e_mqprio_build_tc_to_txq(params->mqprio.tc_to_txq, qopt);
2914}
2915
2916static void mlx5e_params_mqprio_reset(struct mlx5e_params *params)
2917{
2918        mlx5e_params_mqprio_dcb_set(params, 1);
2919}
2920
2921static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
2922                                     struct tc_mqprio_qopt *mqprio)
2923{
2924        struct mlx5e_params new_params;
2925        u8 tc = mqprio->num_tc;
2926        int err;
2927
2928        mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
2929
2930        if (tc && tc != MLX5E_MAX_NUM_TC)
2931                return -EINVAL;
2932
2933        new_params = priv->channels.params;
2934        mlx5e_params_mqprio_dcb_set(&new_params, tc ? tc : 1);
2935
2936        err = mlx5e_safe_switch_params(priv, &new_params,
2937                                       mlx5e_num_channels_changed_ctx, NULL, true);
2938
2939        priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
2940                                    mlx5e_get_dcb_num_tc(&priv->channels.params));
2941        return err;
2942}
2943
2944static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv,
2945                                         struct tc_mqprio_qopt_offload *mqprio)
2946{
2947        struct net_device *netdev = priv->netdev;
2948        struct mlx5e_ptp *ptp_channel;
2949        int agg_count = 0;
2950        int i;
2951
2952        ptp_channel = priv->channels.ptp;
2953        if (ptp_channel && test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state)) {
2954                netdev_err(netdev,
2955                           "Cannot activate MQPRIO mode channel since it conflicts with TX port TS\n");
2956                return -EINVAL;
2957        }
2958
2959        if (mqprio->qopt.offset[0] != 0 || mqprio->qopt.num_tc < 1 ||
2960            mqprio->qopt.num_tc > MLX5E_MAX_NUM_MQPRIO_CH_TC)
2961                return -EINVAL;
2962
2963        for (i = 0; i < mqprio->qopt.num_tc; i++) {
2964                if (!mqprio->qopt.count[i]) {
2965                        netdev_err(netdev, "Zero size for queue-group (%d) is not supported\n", i);
2966                        return -EINVAL;
2967                }
2968                if (mqprio->min_rate[i]) {
2969                        netdev_err(netdev, "Min tx rate is not supported\n");
2970                        return -EINVAL;
2971                }
2972                if (mqprio->max_rate[i]) {
2973                        netdev_err(netdev, "Max tx rate is not supported\n");
2974                        return -EINVAL;
2975                }
2976
2977                if (mqprio->qopt.offset[i] != agg_count) {
2978                        netdev_err(netdev, "Discontinuous queues config is not supported\n");
2979                        return -EINVAL;
2980                }
2981                agg_count += mqprio->qopt.count[i];
2982        }
2983
2984        if (priv->channels.params.num_channels != agg_count) {
2985                netdev_err(netdev, "Num of queues (%d) does not match available (%d)\n",
2986                           agg_count, priv->channels.params.num_channels);
2987                return -EINVAL;
2988        }
2989
2990        return 0;
2991}
2992
2993static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv,
2994                                         struct tc_mqprio_qopt_offload *mqprio)
2995{
2996        mlx5e_fp_preactivate preactivate;
2997        struct mlx5e_params new_params;
2998        bool nch_changed;
2999        int err;
3000
3001        err = mlx5e_mqprio_channel_validate(priv, mqprio);
3002        if (err)
3003                return err;
3004
3005        new_params = priv->channels.params;
3006        mlx5e_params_mqprio_channel_set(&new_params, &mqprio->qopt);
3007
3008        nch_changed = mlx5e_get_dcb_num_tc(&priv->channels.params) > 1;
3009        preactivate = nch_changed ? mlx5e_num_channels_changed_ctx :
3010                mlx5e_update_netdev_queues_ctx;
3011        return mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, true);
3012}
3013
3014static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
3015                                 struct tc_mqprio_qopt_offload *mqprio)
3016{
3017        /* MQPRIO is another toplevel qdisc that can't be attached
3018         * simultaneously with the offloaded HTB.
3019         */
3020        if (WARN_ON(priv->htb.maj_id))
3021                return -EINVAL;
3022
3023        switch (mqprio->mode) {
3024        case TC_MQPRIO_MODE_DCB:
3025                return mlx5e_setup_tc_mqprio_dcb(priv, &mqprio->qopt);
3026        case TC_MQPRIO_MODE_CHANNEL:
3027                return mlx5e_setup_tc_mqprio_channel(priv, mqprio);
3028        default:
3029                return -EOPNOTSUPP;
3030        }
3031}
3032
3033static int mlx5e_setup_tc_htb(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb)
3034{
3035        int res;
3036
3037        switch (htb->command) {
3038        case TC_HTB_CREATE:
3039                return mlx5e_htb_root_add(priv, htb->parent_classid, htb->classid,
3040                                          htb->extack);
3041        case TC_HTB_DESTROY:
3042                return mlx5e_htb_root_del(priv);
3043        case TC_HTB_LEAF_ALLOC_QUEUE:
3044                res = mlx5e_htb_leaf_alloc_queue(priv, htb->classid, htb->parent_classid,
3045                                                 htb->rate, htb->ceil, htb->extack);
3046                if (res < 0)
3047                        return res;
3048                htb->qid = res;
3049                return 0;
3050        case TC_HTB_LEAF_TO_INNER:
3051                return mlx5e_htb_leaf_to_inner(priv, htb->parent_classid, htb->classid,
3052                                               htb->rate, htb->ceil, htb->extack);
3053        case TC_HTB_LEAF_DEL:
3054                return mlx5e_htb_leaf_del(priv, &htb->classid, htb->extack);
3055        case TC_HTB_LEAF_DEL_LAST:
3056        case TC_HTB_LEAF_DEL_LAST_FORCE:
3057                return mlx5e_htb_leaf_del_last(priv, htb->classid,
3058                                               htb->command == TC_HTB_LEAF_DEL_LAST_FORCE,
3059                                               htb->extack);
3060        case TC_HTB_NODE_MODIFY:
3061                return mlx5e_htb_node_modify(priv, htb->classid, htb->rate, htb->ceil,
3062                                             htb->extack);
3063        case TC_HTB_LEAF_QUERY_QUEUE:
3064                res = mlx5e_get_txq_by_classid(priv, htb->classid);
3065                if (res < 0)
3066                        return res;
3067                htb->qid = res;
3068                return 0;
3069        default:
3070                return -EOPNOTSUPP;
3071        }
3072}
3073
3074static LIST_HEAD(mlx5e_block_cb_list);
3075
3076static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
3077                          void *type_data)
3078{
3079        struct mlx5e_priv *priv = netdev_priv(dev);
3080        bool tc_unbind = false;
3081        int err;
3082
3083        if (type == TC_SETUP_BLOCK &&
3084            ((struct flow_block_offload *)type_data)->command == FLOW_BLOCK_UNBIND)
3085                tc_unbind = true;
3086
3087        if (!netif_device_present(dev) && !tc_unbind)
3088                return -ENODEV;
3089
3090        switch (type) {
3091        case TC_SETUP_BLOCK: {
3092                struct flow_block_offload *f = type_data;
3093
3094                f->unlocked_driver_cb = true;
3095                return flow_block_cb_setup_simple(type_data,
3096                                                  &mlx5e_block_cb_list,
3097                                                  mlx5e_setup_tc_block_cb,
3098                                                  priv, priv, true);
3099        }
3100        case TC_SETUP_QDISC_MQPRIO:
3101                mutex_lock(&priv->state_lock);
3102                err = mlx5e_setup_tc_mqprio(priv, type_data);
3103                mutex_unlock(&priv->state_lock);
3104                return err;
3105        case TC_SETUP_QDISC_HTB:
3106                mutex_lock(&priv->state_lock);
3107                err = mlx5e_setup_tc_htb(priv, type_data);
3108                mutex_unlock(&priv->state_lock);
3109                return err;
3110        default:
3111                return -EOPNOTSUPP;
3112        }
3113}
3114
3115void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
3116{
3117        int i;
3118
3119        for (i = 0; i < priv->stats_nch; i++) {
3120                struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i];
3121                struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
3122                struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
3123                int j;
3124
3125                s->rx_packets   += rq_stats->packets + xskrq_stats->packets;
3126                s->rx_bytes     += rq_stats->bytes + xskrq_stats->bytes;
3127                s->multicast    += rq_stats->mcast_packets + xskrq_stats->mcast_packets;
3128
3129                for (j = 0; j < priv->max_opened_tc; j++) {
3130                        struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
3131
3132                        s->tx_packets    += sq_stats->packets;
3133                        s->tx_bytes      += sq_stats->bytes;
3134                        s->tx_dropped    += sq_stats->dropped;
3135                }
3136        }
3137        if (priv->tx_ptp_opened) {
3138                for (i = 0; i < priv->max_opened_tc; i++) {
3139                        struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[i];
3140
3141                        s->tx_packets    += sq_stats->packets;
3142                        s->tx_bytes      += sq_stats->bytes;
3143                        s->tx_dropped    += sq_stats->dropped;
3144                }
3145        }
3146        if (priv->rx_ptp_opened) {
3147                struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq;
3148
3149                s->rx_packets   += rq_stats->packets;
3150                s->rx_bytes     += rq_stats->bytes;
3151                s->multicast    += rq_stats->mcast_packets;
3152        }
3153}
3154
3155void
3156mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
3157{
3158        struct mlx5e_priv *priv = netdev_priv(dev);
3159        struct mlx5e_pport_stats *pstats = &priv->stats.pport;
3160
3161        if (!netif_device_present(dev))
3162                return;
3163
3164        /* In switchdev mode, monitor counters doesn't monitor
3165         * rx/tx stats of 802_3. The update stats mechanism
3166         * should keep the 802_3 layout counters updated
3167         */
3168        if (!mlx5e_monitor_counter_supported(priv) ||
3169            mlx5e_is_uplink_rep(priv)) {
3170                /* update HW stats in background for next time */
3171                mlx5e_queue_update_stats(priv);
3172        }
3173
3174        if (mlx5e_is_uplink_rep(priv)) {
3175                struct mlx5e_vport_stats *vstats = &priv->stats.vport;
3176
3177                stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
3178                stats->rx_bytes   = PPORT_802_3_GET(pstats, a_octets_received_ok);
3179                stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
3180                stats->tx_bytes   = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
3181
3182                /* vport multicast also counts packets that are dropped due to steering
3183                 * or rx out of buffer
3184                 */
3185                stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets);
3186        } else {
3187                mlx5e_fold_sw_stats64(priv, stats);
3188        }
3189
3190        stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
3191
3192        stats->rx_length_errors =
3193                PPORT_802_3_GET(pstats, a_in_range_length_errors) +
3194                PPORT_802_3_GET(pstats, a_out_of_range_length_field) +
3195                PPORT_802_3_GET(pstats, a_frame_too_long_errors);
3196        stats->rx_crc_errors =
3197                PPORT_802_3_GET(pstats, a_frame_check_sequence_errors);
3198        stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors);
3199        stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards);
3200        stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors +
3201                           stats->rx_frame_errors;
3202        stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors;
3203}
3204
3205static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv)
3206{
3207        if (mlx5e_is_uplink_rep(priv))
3208                return; /* no rx mode for uplink rep */
3209
3210        queue_work(priv->wq, &priv->set_rx_mode_work);
3211}
3212
3213static void mlx5e_set_rx_mode(struct net_device *dev)
3214{
3215        struct mlx5e_priv *priv = netdev_priv(dev);
3216
3217        mlx5e_nic_set_rx_mode(priv);
3218}
3219
3220static int mlx5e_set_mac(struct net_device *netdev, void *addr)
3221{
3222        struct mlx5e_priv *priv = netdev_priv(netdev);
3223        struct sockaddr *saddr = addr;
3224
3225        if (!is_valid_ether_addr(saddr->sa_data))
3226                return -EADDRNOTAVAIL;
3227
3228        netif_addr_lock_bh(netdev);
3229        ether_addr_copy(netdev->dev_addr, saddr->sa_data);
3230        netif_addr_unlock_bh(netdev);
3231
3232        mlx5e_nic_set_rx_mode(priv);
3233
3234        return 0;
3235}
3236
3237#define MLX5E_SET_FEATURE(features, feature, enable)    \
3238        do {                                            \
3239                if (enable)                             \
3240                        *features |= feature;           \
3241                else                                    \
3242                        *features &= ~feature;          \
3243        } while (0)
3244
3245typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable);
3246
3247static int set_feature_lro(struct net_device *netdev, bool enable)
3248{
3249        struct mlx5e_priv *priv = netdev_priv(netdev);
3250        struct mlx5_core_dev *mdev = priv->mdev;
3251        struct mlx5e_params *cur_params;
3252        struct mlx5e_params new_params;
3253        bool reset = true;
3254        int err = 0;
3255
3256        mutex_lock(&priv->state_lock);
3257
3258        if (enable && priv->xsk.refcnt) {
3259                netdev_warn(netdev, "LRO is incompatible with AF_XDP (%u XSKs are active)\n",
3260                            priv->xsk.refcnt);
3261                err = -EINVAL;
3262                goto out;
3263        }
3264
3265        cur_params = &priv->channels.params;
3266        if (enable && !MLX5E_GET_PFLAG(cur_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
3267                netdev_warn(netdev, "can't set LRO with legacy RQ\n");
3268                err = -EINVAL;
3269                goto out;
3270        }
3271
3272        new_params = *cur_params;
3273        new_params.lro_en = enable;
3274
3275        if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
3276                if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
3277                    mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL))
3278                        reset = false;
3279        }
3280
3281        err = mlx5e_safe_switch_params(priv, &new_params,
3282                                       mlx5e_modify_tirs_lro_ctx, NULL, reset);
3283out:
3284        mutex_unlock(&priv->state_lock);
3285        return err;
3286}
3287
3288static int set_feature_cvlan_filter(struct net_device *netdev, bool enable)
3289{
3290        struct mlx5e_priv *priv = netdev_priv(netdev);
3291
3292        if (enable)
3293                mlx5e_enable_cvlan_filter(priv);
3294        else
3295                mlx5e_disable_cvlan_filter(priv);
3296
3297        return 0;
3298}
3299
3300static int set_feature_hw_tc(struct net_device *netdev, bool enable)
3301{
3302        struct mlx5e_priv *priv = netdev_priv(netdev);
3303
3304#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
3305        if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) {
3306                netdev_err(netdev,
3307                           "Active offloaded tc filters, can't turn hw_tc_offload off\n");
3308                return -EINVAL;
3309        }
3310#endif
3311
3312        if (!enable && priv->htb.maj_id) {
3313                netdev_err(netdev, "Active HTB offload, can't turn hw_tc_offload off\n");
3314                return -EINVAL;
3315        }
3316
3317        return 0;
3318}
3319
3320static int set_feature_rx_all(struct net_device *netdev, bool enable)
3321{
3322        struct mlx5e_priv *priv = netdev_priv(netdev);
3323        struct mlx5_core_dev *mdev = priv->mdev;
3324
3325        return mlx5_set_port_fcs(mdev, !enable);
3326}
3327
3328static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable)
3329{
3330        u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {};
3331        bool supported, curr_state;
3332        int err;
3333
3334        if (!MLX5_CAP_GEN(mdev, ports_check))
3335                return 0;
3336
3337        err = mlx5_query_ports_check(mdev, in, sizeof(in));
3338        if (err)
3339                return err;
3340
3341        supported = MLX5_GET(pcmr_reg, in, rx_ts_over_crc_cap);
3342        curr_state = MLX5_GET(pcmr_reg, in, rx_ts_over_crc);
3343
3344        if (!supported || enable == curr_state)
3345                return 0;
3346
3347        MLX5_SET(pcmr_reg, in, local_port, 1);
3348        MLX5_SET(pcmr_reg, in, rx_ts_over_crc, enable);
3349
3350        return mlx5_set_ports_check(mdev, in, sizeof(in));
3351}
3352
3353static int set_feature_rx_fcs(struct net_device *netdev, bool enable)
3354{
3355        struct mlx5e_priv *priv = netdev_priv(netdev);
3356        struct mlx5e_channels *chs = &priv->channels;
3357        struct mlx5_core_dev *mdev = priv->mdev;
3358        int err;
3359
3360        mutex_lock(&priv->state_lock);
3361
3362        if (enable) {
3363                err = mlx5e_set_rx_port_ts(mdev, false);
3364                if (err)
3365                        goto out;
3366
3367                chs->params.scatter_fcs_en = true;
3368                err = mlx5e_modify_channels_scatter_fcs(chs, true);
3369                if (err) {
3370                        chs->params.scatter_fcs_en = false;
3371                        mlx5e_set_rx_port_ts(mdev, true);
3372                }
3373        } else {
3374                chs->params.scatter_fcs_en = false;
3375                err = mlx5e_modify_channels_scatter_fcs(chs, false);
3376                if (err) {
3377                        chs->params.scatter_fcs_en = true;
3378                        goto out;
3379                }
3380                err = mlx5e_set_rx_port_ts(mdev, true);
3381                if (err) {
3382                        mlx5_core_warn(mdev, "Failed to set RX port timestamp %d\n", err);
3383                        err = 0;
3384                }
3385        }
3386
3387out:
3388        mutex_unlock(&priv->state_lock);
3389        return err;
3390}
3391
3392static int set_feature_rx_vlan(struct net_device *netdev, bool enable)
3393{
3394        struct mlx5e_priv *priv = netdev_priv(netdev);
3395        int err = 0;
3396
3397        mutex_lock(&priv->state_lock);
3398
3399        priv->channels.params.vlan_strip_disable = !enable;
3400        if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
3401                goto unlock;
3402
3403        err = mlx5e_modify_channels_vsd(&priv->channels, !enable);
3404        if (err)
3405                priv->channels.params.vlan_strip_disable = enable;
3406
3407unlock:
3408        mutex_unlock(&priv->state_lock);
3409
3410        return err;
3411}
3412
3413#ifdef CONFIG_MLX5_EN_ARFS
3414static int set_feature_arfs(struct net_device *netdev, bool enable)
3415{
3416        struct mlx5e_priv *priv = netdev_priv(netdev);
3417        int err;
3418
3419        if (enable)
3420                err = mlx5e_arfs_enable(priv);
3421        else
3422                err = mlx5e_arfs_disable(priv);
3423
3424        return err;
3425}
3426#endif
3427
3428static int mlx5e_handle_feature(struct net_device *netdev,
3429                                netdev_features_t *features,
3430                                netdev_features_t wanted_features,
3431                                netdev_features_t feature,
3432                                mlx5e_feature_handler feature_handler)
3433{
3434        netdev_features_t changes = wanted_features ^ netdev->features;
3435        bool enable = !!(wanted_features & feature);
3436        int err;
3437
3438        if (!(changes & feature))
3439                return 0;
3440
3441        err = feature_handler(netdev, enable);
3442        if (err) {
3443                netdev_err(netdev, "%s feature %pNF failed, err %d\n",
3444                           enable ? "Enable" : "Disable", &feature, err);
3445                return err;
3446        }
3447
3448        MLX5E_SET_FEATURE(features, feature, enable);
3449        return 0;
3450}
3451
3452int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
3453{
3454        netdev_features_t oper_features = netdev->features;
3455        int err = 0;
3456
3457#define MLX5E_HANDLE_FEATURE(feature, handler) \
3458        mlx5e_handle_feature(netdev, &oper_features, features, feature, handler)
3459
3460        err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
3461        err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
3462                                    set_feature_cvlan_filter);
3463        err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_hw_tc);
3464        err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all);
3465        err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs);
3466        err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan);
3467#ifdef CONFIG_MLX5_EN_ARFS
3468        err |= MLX5E_HANDLE_FEATURE(NETIF_F_NTUPLE, set_feature_arfs);
3469#endif
3470        err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TLS_RX, mlx5e_ktls_set_feature_rx);
3471
3472        if (err) {
3473                netdev->features = oper_features;
3474                return -EINVAL;
3475        }
3476
3477        return 0;
3478}
3479
3480static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev,
3481                                                       netdev_features_t features)
3482{
3483        features &= ~NETIF_F_HW_TLS_RX;
3484        if (netdev->features & NETIF_F_HW_TLS_RX)
3485                netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n");
3486
3487        features &= ~NETIF_F_HW_TLS_TX;
3488        if (netdev->features & NETIF_F_HW_TLS_TX)
3489                netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n");
3490
3491        features &= ~NETIF_F_NTUPLE;
3492        if (netdev->features & NETIF_F_NTUPLE)
3493                netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n");
3494
3495        return features;
3496}
3497
3498static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
3499                                            netdev_features_t features)
3500{
3501        struct mlx5e_priv *priv = netdev_priv(netdev);
3502        struct mlx5e_params *params;
3503
3504        mutex_lock(&priv->state_lock);
3505        params = &priv->channels.params;
3506        if (!priv->fs.vlan ||
3507            !bitmap_empty(mlx5e_vlan_get_active_svlans(priv->fs.vlan), VLAN_N_VID)) {
3508                /* HW strips the outer C-tag header, this is a problem
3509                 * for S-tag traffic.
3510                 */
3511                features &= ~NETIF_F_HW_VLAN_CTAG_RX;
3512                if (!params->vlan_strip_disable)
3513                        netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n");
3514        }
3515
3516        if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
3517                if (features & NETIF_F_LRO) {
3518                        netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n");
3519                        features &= ~NETIF_F_LRO;
3520                }
3521        }
3522
3523        if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
3524                features &= ~NETIF_F_RXHASH;
3525                if (netdev->features & NETIF_F_RXHASH)
3526                        netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
3527        }
3528
3529        if (mlx5e_is_uplink_rep(priv))
3530                features = mlx5e_fix_uplink_rep_features(netdev, features);
3531
3532        mutex_unlock(&priv->state_lock);
3533
3534        return features;
3535}
3536
3537static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
3538                                   struct mlx5e_channels *chs,
3539                                   struct mlx5e_params *new_params,
3540                                   struct mlx5_core_dev *mdev)
3541{
3542        u16 ix;
3543
3544        for (ix = 0; ix < chs->params.num_channels; ix++) {
3545                struct xsk_buff_pool *xsk_pool =
3546                        mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix);
3547                struct mlx5e_xsk_param xsk;
3548
3549                if (!xsk_pool)
3550                        continue;
3551
3552                mlx5e_build_xsk_param(xsk_pool, &xsk);
3553
3554                if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) {
3555                        u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk);
3556                        int max_mtu_frame, max_mtu_page, max_mtu;
3557
3558                        /* Two criteria must be met:
3559                         * 1. HW MTU + all headrooms <= XSK frame size.
3560                         * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE.
3561                         */
3562                        max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr);
3563                        max_mtu_page = mlx5e_xdp_max_mtu(new_params, &xsk);
3564                        max_mtu = min(max_mtu_frame, max_mtu_page);
3565
3566                        netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u. Try MTU <= %d\n",
3567                                   new_params->sw_mtu, ix, max_mtu);
3568                        return false;
3569                }
3570        }
3571
3572        return true;
3573}
3574
3575int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
3576                     mlx5e_fp_preactivate preactivate)
3577{
3578        struct mlx5e_priv *priv = netdev_priv(netdev);
3579        struct mlx5e_params new_params;
3580        struct mlx5e_params *params;
3581        bool reset = true;
3582        int err = 0;
3583
3584        mutex_lock(&priv->state_lock);
3585
3586        params = &priv->channels.params;
3587
3588        new_params = *params;
3589        new_params.sw_mtu = new_mtu;
3590        err = mlx5e_validate_params(priv->mdev, &new_params);
3591        if (err)
3592                goto out;
3593
3594        if (params->xdp_prog &&
3595            !mlx5e_rx_is_linear_skb(&new_params, NULL)) {
3596                netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
3597                           new_mtu, mlx5e_xdp_max_mtu(params, NULL));
3598                err = -EINVAL;
3599                goto out;
3600        }
3601
3602        if (priv->xsk.refcnt &&
3603            !mlx5e_xsk_validate_mtu(netdev, &priv->channels,
3604                                    &new_params, priv->mdev)) {
3605                err = -EINVAL;
3606                goto out;
3607        }
3608
3609        if (params->lro_en)
3610                reset = false;
3611
3612        if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
3613                bool is_linear_old = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, params, NULL);
3614                bool is_linear_new = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev,
3615                                                                  &new_params, NULL);
3616                u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL);
3617                u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_params, NULL);
3618
3619                /* Always reset in linear mode - hw_mtu is used in data path.
3620                 * Check that the mode was non-linear and didn't change.
3621                 * If XSK is active, XSK RQs are linear.
3622                 */
3623                if (!is_linear_old && !is_linear_new && !priv->xsk.refcnt &&
3624                    ppw_old == ppw_new)
3625                        reset = false;
3626        }
3627
3628        err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, reset);
3629
3630out:
3631        netdev->mtu = params->sw_mtu;
3632        mutex_unlock(&priv->state_lock);
3633        return err;
3634}
3635
3636static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu)
3637{
3638        return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx);
3639}
3640
3641int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx)
3642{
3643        bool set  = *(bool *)ctx;
3644
3645        return mlx5e_ptp_rx_manage_fs(priv, set);
3646}
3647
3648static int mlx5e_hwstamp_config_no_ptp_rx(struct mlx5e_priv *priv, bool rx_filter)
3649{
3650        bool rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def;
3651        int err;
3652
3653        if (!rx_filter)
3654                /* Reset CQE compression to Admin default */
3655                return mlx5e_modify_rx_cqe_compression_locked(priv, rx_cqe_compress_def, false);
3656
3657        if (!MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
3658                return 0;
3659
3660        /* Disable CQE compression */
3661        netdev_warn(priv->netdev, "Disabling RX cqe compression\n");
3662        err = mlx5e_modify_rx_cqe_compression_locked(priv, false, true);
3663        if (err)
3664                netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
3665
3666        return err;
3667}
3668
3669static int mlx5e_hwstamp_config_ptp_rx(struct mlx5e_priv *priv, bool ptp_rx)
3670{
3671        struct mlx5e_params new_params;
3672
3673        if (ptp_rx == priv->channels.params.ptp_rx)
3674                return 0;
3675
3676        new_params = priv->channels.params;
3677        new_params.ptp_rx = ptp_rx;
3678        return mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx,
3679                                        &new_params.ptp_rx, true);
3680}
3681
3682int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
3683{
3684        struct hwtstamp_config config;
3685        bool rx_cqe_compress_def;
3686        bool ptp_rx;
3687        int err;
3688
3689        if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) ||
3690            (mlx5_clock_get_ptp_index(priv->mdev) == -1))
3691                return -EOPNOTSUPP;
3692
3693        if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
3694                return -EFAULT;
3695
3696        /* TX HW timestamp */
3697        switch (config.tx_type) {
3698        case HWTSTAMP_TX_OFF:
3699        case HWTSTAMP_TX_ON:
3700                break;
3701        default:
3702                return -ERANGE;
3703        }
3704
3705        mutex_lock(&priv->state_lock);
3706        rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def;
3707
3708        /* RX HW timestamp */
3709        switch (config.rx_filter) {
3710        case HWTSTAMP_FILTER_NONE:
3711                ptp_rx = false;
3712                break;
3713        case HWTSTAMP_FILTER_ALL:
3714        case HWTSTAMP_FILTER_SOME:
3715        case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
3716        case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
3717        case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
3718        case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
3719        case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
3720        case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
3721        case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
3722        case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
3723        case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
3724        case HWTSTAMP_FILTER_PTP_V2_EVENT:
3725        case HWTSTAMP_FILTER_PTP_V2_SYNC:
3726        case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
3727        case HWTSTAMP_FILTER_NTP_ALL:
3728                config.rx_filter = HWTSTAMP_FILTER_ALL;
3729                /* ptp_rx is set if both HW TS is set and CQE
3730                 * compression is set
3731                 */
3732                ptp_rx = rx_cqe_compress_def;
3733                break;
3734        default:
3735                err = -ERANGE;
3736                goto err_unlock;
3737        }
3738
3739        if (!priv->profile->rx_ptp_support)
3740                err = mlx5e_hwstamp_config_no_ptp_rx(priv,
3741                                                     config.rx_filter != HWTSTAMP_FILTER_NONE);
3742        else
3743                err = mlx5e_hwstamp_config_ptp_rx(priv, ptp_rx);
3744        if (err)
3745                goto err_unlock;
3746
3747        memcpy(&priv->tstamp, &config, sizeof(config));
3748        mutex_unlock(&priv->state_lock);
3749
3750        /* might need to fix some features */
3751        netdev_update_features(priv->netdev);
3752
3753        return copy_to_user(ifr->ifr_data, &config,
3754                            sizeof(config)) ? -EFAULT : 0;
3755err_unlock:
3756        mutex_unlock(&priv->state_lock);
3757        return err;
3758}
3759
3760int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr)
3761{
3762        struct hwtstamp_config *cfg = &priv->tstamp;
3763
3764        if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
3765                return -EOPNOTSUPP;
3766
3767        return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0;
3768}
3769
3770static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
3771{
3772        struct mlx5e_priv *priv = netdev_priv(dev);
3773
3774        switch (cmd) {
3775        case SIOCSHWTSTAMP:
3776                return mlx5e_hwstamp_set(priv, ifr);
3777        case SIOCGHWTSTAMP:
3778                return mlx5e_hwstamp_get(priv, ifr);
3779        default:
3780                return -EOPNOTSUPP;
3781        }
3782}
3783
3784#ifdef CONFIG_MLX5_ESWITCH
3785int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
3786{
3787        struct mlx5e_priv *priv = netdev_priv(dev);
3788        struct mlx5_core_dev *mdev = priv->mdev;
3789
3790        return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac);
3791}
3792
3793static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
3794                             __be16 vlan_proto)
3795{
3796        struct mlx5e_priv *priv = netdev_priv(dev);
3797        struct mlx5_core_dev *mdev = priv->mdev;
3798
3799        if (vlan_proto != htons(ETH_P_8021Q))
3800                return -EPROTONOSUPPORT;
3801
3802        return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1,
3803                                           vlan, qos);
3804}
3805
3806static int mlx5e_set_vf_spoofchk(struct net_device *dev, int vf, bool setting)
3807{
3808        struct mlx5e_priv *priv = netdev_priv(dev);
3809        struct mlx5_core_dev *mdev = priv->mdev;
3810
3811        return mlx5_eswitch_set_vport_spoofchk(mdev->priv.eswitch, vf + 1, setting);
3812}
3813
3814static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting)
3815{
3816        struct mlx5e_priv *priv = netdev_priv(dev);
3817        struct mlx5_core_dev *mdev = priv->mdev;
3818
3819        return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting);
3820}
3821
3822int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
3823                      int max_tx_rate)
3824{
3825        struct mlx5e_priv *priv = netdev_priv(dev);
3826        struct mlx5_core_dev *mdev = priv->mdev;
3827
3828        return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1,
3829                                           max_tx_rate, min_tx_rate);
3830}
3831
3832static int mlx5_vport_link2ifla(u8 esw_link)
3833{
3834        switch (esw_link) {
3835        case MLX5_VPORT_ADMIN_STATE_DOWN:
3836                return IFLA_VF_LINK_STATE_DISABLE;
3837        case MLX5_VPORT_ADMIN_STATE_UP:
3838                return IFLA_VF_LINK_STATE_ENABLE;
3839        }
3840        return IFLA_VF_LINK_STATE_AUTO;
3841}
3842
3843static int mlx5_ifla_link2vport(u8 ifla_link)
3844{
3845        switch (ifla_link) {
3846        case IFLA_VF_LINK_STATE_DISABLE:
3847                return MLX5_VPORT_ADMIN_STATE_DOWN;
3848        case IFLA_VF_LINK_STATE_ENABLE:
3849                return MLX5_VPORT_ADMIN_STATE_UP;
3850        }
3851        return MLX5_VPORT_ADMIN_STATE_AUTO;
3852}
3853
3854static int mlx5e_set_vf_link_state(struct net_device *dev, int vf,
3855                                   int link_state)
3856{
3857        struct mlx5e_priv *priv = netdev_priv(dev);
3858        struct mlx5_core_dev *mdev = priv->mdev;
3859
3860        if (mlx5e_is_uplink_rep(priv))
3861                return -EOPNOTSUPP;
3862
3863        return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1,
3864                                            mlx5_ifla_link2vport(link_state));
3865}
3866
3867int mlx5e_get_vf_config(struct net_device *dev,
3868                        int vf, struct ifla_vf_info *ivi)
3869{
3870        struct mlx5e_priv *priv = netdev_priv(dev);
3871        struct mlx5_core_dev *mdev = priv->mdev;
3872        int err;
3873
3874        if (!netif_device_present(dev))
3875                return -EOPNOTSUPP;
3876
3877        err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi);
3878        if (err)
3879                return err;
3880        ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate);
3881        return 0;
3882}
3883
3884int mlx5e_get_vf_stats(struct net_device *dev,
3885                       int vf, struct ifla_vf_stats *vf_stats)
3886{
3887        struct mlx5e_priv *priv = netdev_priv(dev);
3888        struct mlx5_core_dev *mdev = priv->mdev;
3889
3890        return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1,
3891                                            vf_stats);
3892}
3893
3894static bool
3895mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
3896{
3897        struct mlx5e_priv *priv = netdev_priv(dev);
3898
3899        if (!netif_device_present(dev))
3900                return false;
3901
3902        if (!mlx5e_is_uplink_rep(priv))
3903                return false;
3904
3905        return mlx5e_rep_has_offload_stats(dev, attr_id);
3906}
3907
3908static int
3909mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
3910                        void *sp)
3911{
3912        struct mlx5e_priv *priv = netdev_priv(dev);
3913
3914        if (!mlx5e_is_uplink_rep(priv))
3915                return -EOPNOTSUPP;
3916
3917        return mlx5e_rep_get_offload_stats(attr_id, dev, sp);
3918}
3919#endif
3920
3921static bool mlx5e_tunnel_proto_supported_tx(struct mlx5_core_dev *mdev, u8 proto_type)
3922{
3923        switch (proto_type) {
3924        case IPPROTO_GRE:
3925                return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
3926        case IPPROTO_IPIP:
3927        case IPPROTO_IPV6:
3928                return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
3929                        MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_tx));
3930        default:
3931                return false;
3932        }
3933}
3934
3935static bool mlx5e_gre_tunnel_inner_proto_offload_supported(struct mlx5_core_dev *mdev,
3936                                                           struct sk_buff *skb)
3937{
3938        switch (skb->inner_protocol) {
3939        case htons(ETH_P_IP):
3940        case htons(ETH_P_IPV6):
3941        case htons(ETH_P_TEB):
3942                return true;
3943        case htons(ETH_P_MPLS_UC):
3944        case htons(ETH_P_MPLS_MC):
3945                return MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre);
3946        }
3947        return false;
3948}
3949
3950static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
3951                                                     struct sk_buff *skb,
3952                                                     netdev_features_t features)
3953{
3954        unsigned int offset = 0;
3955        struct udphdr *udph;
3956        u8 proto;
3957        u16 port;
3958
3959        switch (vlan_get_protocol(skb)) {
3960        case htons(ETH_P_IP):
3961                proto = ip_hdr(skb)->protocol;
3962                break;
3963        case htons(ETH_P_IPV6):
3964                proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
3965                break;
3966        default:
3967                goto out;
3968        }
3969
3970        switch (proto) {
3971        case IPPROTO_GRE:
3972                if (mlx5e_gre_tunnel_inner_proto_offload_supported(priv->mdev, skb))
3973                        return features;
3974                break;
3975        case IPPROTO_IPIP:
3976        case IPPROTO_IPV6:
3977                if (mlx5e_tunnel_proto_supported_tx(priv->mdev, IPPROTO_IPIP))
3978                        return features;
3979                break;
3980        case IPPROTO_UDP:
3981                udph = udp_hdr(skb);
3982                port = be16_to_cpu(udph->dest);
3983
3984                /* Verify if UDP port is being offloaded by HW */
3985                if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port))
3986                        return features;
3987
3988#if IS_ENABLED(CONFIG_GENEVE)
3989                /* Support Geneve offload for default UDP port */
3990                if (port == GENEVE_UDP_PORT && mlx5_geneve_tx_allowed(priv->mdev))
3991                        return features;
3992#endif
3993                break;
3994#ifdef CONFIG_MLX5_EN_IPSEC
3995        case IPPROTO_ESP:
3996                return mlx5e_ipsec_feature_check(skb, features);
3997#endif
3998        }
3999
4000out:
4001        /* Disable CSUM and GSO if the udp dport is not offloaded by HW */
4002        return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
4003}
4004
4005netdev_features_t mlx5e_features_check(struct sk_buff *skb,
4006                                       struct net_device *netdev,
4007                                       netdev_features_t features)
4008{
4009        struct mlx5e_priv *priv = netdev_priv(netdev);
4010
4011        features = vlan_features_check(skb, features);
4012        features = vxlan_features_check(skb, features);
4013
4014        /* Validate if the tunneled packet is being offloaded by HW */
4015        if (skb->encapsulation &&
4016            (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))
4017                return mlx5e_tunnel_features_check(priv, skb, features);
4018
4019        return features;
4020}
4021
4022static void mlx5e_tx_timeout_work(struct work_struct *work)
4023{
4024        struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
4025                                               tx_timeout_work);
4026        struct net_device *netdev = priv->netdev;
4027        int i;
4028
4029        rtnl_lock();
4030        mutex_lock(&priv->state_lock);
4031
4032        if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
4033                goto unlock;
4034
4035        for (i = 0; i < netdev->real_num_tx_queues; i++) {
4036                struct netdev_queue *dev_queue =
4037                        netdev_get_tx_queue(netdev, i);
4038                struct mlx5e_txqsq *sq = priv->txq2sq[i];
4039
4040                if (!netif_xmit_stopped(dev_queue))
4041                        continue;
4042
4043                if (mlx5e_reporter_tx_timeout(sq))
4044                /* break if tried to reopened channels */
4045                        break;
4046        }
4047
4048unlock:
4049        mutex_unlock(&priv->state_lock);
4050        rtnl_unlock();
4051}
4052
4053static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue)
4054{
4055        struct mlx5e_priv *priv = netdev_priv(dev);
4056
4057        netdev_err(dev, "TX timeout detected\n");
4058        queue_work(priv->wq, &priv->tx_timeout_work);
4059}
4060
4061static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
4062{
4063        struct net_device *netdev = priv->netdev;
4064        struct mlx5e_params new_params;
4065
4066        if (priv->channels.params.lro_en) {
4067                netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n");
4068                return -EINVAL;
4069        }
4070
4071        if (mlx5_fpga_is_ipsec_device(priv->mdev)) {
4072                netdev_warn(netdev,
4073                            "XDP is not available on Innova cards with IPsec support\n");
4074                return -EINVAL;
4075        }
4076
4077        new_params = priv->channels.params;
4078        new_params.xdp_prog = prog;
4079
4080        /* No XSK params: AF_XDP can't be enabled yet at the point of setting
4081         * the XDP program.
4082         */
4083        if (!mlx5e_rx_is_linear_skb(&new_params, NULL)) {
4084                netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
4085                            new_params.sw_mtu,
4086                            mlx5e_xdp_max_mtu(&new_params, NULL));
4087                return -EINVAL;
4088        }
4089
4090        return 0;
4091}
4092
4093static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog)
4094{
4095        struct bpf_prog *old_prog;
4096
4097        old_prog = rcu_replace_pointer(rq->xdp_prog, prog,
4098                                       lockdep_is_held(&rq->priv->state_lock));
4099        if (old_prog)
4100                bpf_prog_put(old_prog);
4101}
4102
4103static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
4104{
4105        struct mlx5e_priv *priv = netdev_priv(netdev);
4106        struct mlx5e_params new_params;
4107        struct bpf_prog *old_prog;
4108        int err = 0;
4109        bool reset;
4110        int i;
4111
4112        mutex_lock(&priv->state_lock);
4113
4114        if (prog) {
4115                err = mlx5e_xdp_allowed(priv, prog);
4116                if (err)
4117                        goto unlock;
4118        }
4119
4120        /* no need for full reset when exchanging programs */
4121        reset = (!priv->channels.params.xdp_prog || !prog);
4122
4123        new_params = priv->channels.params;
4124        new_params.xdp_prog = prog;
4125        if (reset)
4126                mlx5e_set_rq_type(priv->mdev, &new_params);
4127        old_prog = priv->channels.params.xdp_prog;
4128
4129        err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
4130        if (err)
4131                goto unlock;
4132
4133        if (old_prog)
4134                bpf_prog_put(old_prog);
4135
4136        if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
4137                goto unlock;
4138
4139        /* exchanging programs w/o reset, we update ref counts on behalf
4140         * of the channels RQs here.
4141         */
4142        bpf_prog_add(prog, priv->channels.num);
4143        for (i = 0; i < priv->channels.num; i++) {
4144                struct mlx5e_channel *c = priv->channels.c[i];
4145
4146                mlx5e_rq_replace_xdp_prog(&c->rq, prog);
4147                if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) {
4148                        bpf_prog_inc(prog);
4149                        mlx5e_rq_replace_xdp_prog(&c->xskrq, prog);
4150                }
4151        }
4152
4153unlock:
4154        mutex_unlock(&priv->state_lock);
4155        return err;
4156}
4157
4158static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
4159{
4160        switch (xdp->command) {
4161        case XDP_SETUP_PROG:
4162                return mlx5e_xdp_set(dev, xdp->prog);
4163        case XDP_SETUP_XSK_POOL:
4164                return mlx5e_xsk_setup_pool(dev, xdp->xsk.pool,
4165                                            xdp->xsk.queue_id);
4166        default:
4167                return -EINVAL;
4168        }
4169}
4170
4171#ifdef CONFIG_MLX5_ESWITCH
4172static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4173                                struct net_device *dev, u32 filter_mask,
4174                                int nlflags)
4175{
4176        struct mlx5e_priv *priv = netdev_priv(dev);
4177        struct mlx5_core_dev *mdev = priv->mdev;
4178        u8 mode, setting;
4179        int err;
4180
4181        err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting);
4182        if (err)
4183                return err;
4184        mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB;
4185        return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4186                                       mode,
4187                                       0, 0, nlflags, filter_mask, NULL);
4188}
4189
4190static int mlx5e_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4191                                u16 flags, struct netlink_ext_ack *extack)
4192{
4193        struct mlx5e_priv *priv = netdev_priv(dev);
4194        struct mlx5_core_dev *mdev = priv->mdev;
4195        struct nlattr *attr, *br_spec;
4196        u16 mode = BRIDGE_MODE_UNDEF;
4197        u8 setting;
4198        int rem;
4199
4200        br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4201        if (!br_spec)
4202                return -EINVAL;
4203
4204        nla_for_each_nested(attr, br_spec, rem) {
4205                if (nla_type(attr) != IFLA_BRIDGE_MODE)
4206                        continue;
4207
4208                if (nla_len(attr) < sizeof(mode))
4209                        return -EINVAL;
4210
4211                mode = nla_get_u16(attr);
4212                if (mode > BRIDGE_MODE_VEPA)
4213                        return -EINVAL;
4214
4215                break;
4216        }
4217
4218        if (mode == BRIDGE_MODE_UNDEF)
4219                return -EINVAL;
4220
4221        setting = (mode == BRIDGE_MODE_VEPA) ?  1 : 0;
4222        return mlx5_eswitch_set_vepa(mdev->priv.eswitch, setting);
4223}
4224#endif
4225
4226const struct net_device_ops mlx5e_netdev_ops = {
4227        .ndo_open                = mlx5e_open,
4228        .ndo_stop                = mlx5e_close,
4229        .ndo_start_xmit          = mlx5e_xmit,
4230        .ndo_setup_tc            = mlx5e_setup_tc,
4231        .ndo_select_queue        = mlx5e_select_queue,
4232        .ndo_get_stats64         = mlx5e_get_stats,
4233        .ndo_set_rx_mode         = mlx5e_set_rx_mode,
4234        .ndo_set_mac_address     = mlx5e_set_mac,
4235        .ndo_vlan_rx_add_vid     = mlx5e_vlan_rx_add_vid,
4236        .ndo_vlan_rx_kill_vid    = mlx5e_vlan_rx_kill_vid,
4237        .ndo_set_features        = mlx5e_set_features,
4238        .ndo_fix_features        = mlx5e_fix_features,
4239        .ndo_change_mtu          = mlx5e_change_nic_mtu,
4240        .ndo_eth_ioctl            = mlx5e_ioctl,
4241        .ndo_set_tx_maxrate      = mlx5e_set_tx_maxrate,
4242        .ndo_features_check      = mlx5e_features_check,
4243        .ndo_tx_timeout          = mlx5e_tx_timeout,
4244        .ndo_bpf                 = mlx5e_xdp,
4245        .ndo_xdp_xmit            = mlx5e_xdp_xmit,
4246        .ndo_xsk_wakeup          = mlx5e_xsk_wakeup,
4247#ifdef CONFIG_MLX5_EN_ARFS
4248        .ndo_rx_flow_steer       = mlx5e_rx_flow_steer,
4249#endif
4250#ifdef CONFIG_MLX5_ESWITCH
4251        .ndo_bridge_setlink      = mlx5e_bridge_setlink,
4252        .ndo_bridge_getlink      = mlx5e_bridge_getlink,
4253
4254        /* SRIOV E-Switch NDOs */
4255        .ndo_set_vf_mac          = mlx5e_set_vf_mac,
4256        .ndo_set_vf_vlan         = mlx5e_set_vf_vlan,
4257        .ndo_set_vf_spoofchk     = mlx5e_set_vf_spoofchk,
4258        .ndo_set_vf_trust        = mlx5e_set_vf_trust,
4259        .ndo_set_vf_rate         = mlx5e_set_vf_rate,
4260        .ndo_get_vf_config       = mlx5e_get_vf_config,
4261        .ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
4262        .ndo_get_vf_stats        = mlx5e_get_vf_stats,
4263        .ndo_has_offload_stats   = mlx5e_has_offload_stats,
4264        .ndo_get_offload_stats   = mlx5e_get_offload_stats,
4265#endif
4266        .ndo_get_devlink_port    = mlx5e_get_devlink_port,
4267};
4268
4269static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
4270{
4271        int i;
4272
4273        /* The supported periods are organized in ascending order */
4274        for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++)
4275                if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout)
4276                        break;
4277
4278        return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
4279}
4280
4281void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu)
4282{
4283        struct mlx5e_params *params = &priv->channels.params;
4284        struct mlx5_core_dev *mdev = priv->mdev;
4285        u8 rx_cq_period_mode;
4286
4287        params->sw_mtu = mtu;
4288        params->hard_mtu = MLX5E_ETH_HARD_MTU;
4289        params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2,
4290                                     priv->max_nch);
4291        mlx5e_params_mqprio_reset(params);
4292
4293        /* Set an initial non-zero value, so that mlx5e_select_queue won't
4294         * divide by zero if called before first activating channels.
4295         */
4296        priv->num_tc_x_num_ch = params->num_channels * params->mqprio.num_tc;
4297
4298        /* SQ */
4299        params->log_sq_size = is_kdump_kernel() ?
4300                MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
4301                MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
4302        MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev));
4303
4304        /* XDP SQ */
4305        MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev));
4306
4307        /* set CQE compression */
4308        params->rx_cqe_compress_def = false;
4309        if (MLX5_CAP_GEN(mdev, cqe_compression) &&
4310            MLX5_CAP_GEN(mdev, vport_group_manager))
4311                params->rx_cqe_compress_def = slow_pci_heuristic(mdev);
4312
4313        MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
4314        MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, false);
4315
4316        /* RQ */
4317        mlx5e_build_rq_params(mdev, params);
4318
4319        /* HW LRO */
4320        if (MLX5_CAP_ETH(mdev, lro_cap) &&
4321            params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
4322                /* No XSK params: checking the availability of striding RQ in general. */
4323                if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
4324                        params->lro_en = !slow_pci_heuristic(mdev);
4325        }
4326        params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
4327
4328        /* CQ moderation params */
4329        rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
4330                        MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
4331                        MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
4332        params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
4333        params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
4334        mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode);
4335        mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
4336
4337        /* TX inline */
4338        mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
4339
4340        params->tunneled_offload_en = mlx5_tunnel_inner_ft_supported(mdev);
4341
4342        /* AF_XDP */
4343        params->xsk = xsk;
4344
4345        /* Do not update netdev->features directly in here
4346         * on mlx5e_attach_netdev() we will call mlx5e_update_features()
4347         * To update netdev->features please modify mlx5e_fix_features()
4348         */
4349}
4350
4351static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
4352{
4353        struct mlx5e_priv *priv = netdev_priv(netdev);
4354
4355        mlx5_query_mac_address(priv->mdev, netdev->dev_addr);
4356        if (is_zero_ether_addr(netdev->dev_addr) &&
4357            !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) {
4358                eth_hw_addr_random(netdev);
4359                mlx5_core_info(priv->mdev, "Assigned random MAC address %pM\n", netdev->dev_addr);
4360        }
4361}
4362
4363static int mlx5e_vxlan_set_port(struct net_device *netdev, unsigned int table,
4364                                unsigned int entry, struct udp_tunnel_info *ti)
4365{
4366        struct mlx5e_priv *priv = netdev_priv(netdev);
4367
4368        return mlx5_vxlan_add_port(priv->mdev->vxlan, ntohs(ti->port));
4369}
4370
4371static int mlx5e_vxlan_unset_port(struct net_device *netdev, unsigned int table,
4372                                  unsigned int entry, struct udp_tunnel_info *ti)
4373{
4374        struct mlx5e_priv *priv = netdev_priv(netdev);
4375
4376        return mlx5_vxlan_del_port(priv->mdev->vxlan, ntohs(ti->port));
4377}
4378
4379void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv)
4380{
4381        if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
4382                return;
4383
4384        priv->nic_info.set_port = mlx5e_vxlan_set_port;
4385        priv->nic_info.unset_port = mlx5e_vxlan_unset_port;
4386        priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
4387                                UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN;
4388        priv->nic_info.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN;
4389        /* Don't count the space hard-coded to the IANA port */
4390        priv->nic_info.tables[0].n_entries =
4391                mlx5_vxlan_max_udp_ports(priv->mdev) - 1;
4392
4393        priv->netdev->udp_tunnel_nic_info = &priv->nic_info;
4394}
4395
4396static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev)
4397{
4398        int tt;
4399
4400        for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
4401                if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5_get_proto_by_tunnel_type(tt)))
4402                        return true;
4403        }
4404        return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev));
4405}
4406
4407static void mlx5e_build_nic_netdev(struct net_device *netdev)
4408{
4409        struct mlx5e_priv *priv = netdev_priv(netdev);
4410        struct mlx5_core_dev *mdev = priv->mdev;
4411        bool fcs_supported;
4412        bool fcs_enabled;
4413
4414        SET_NETDEV_DEV(netdev, mdev->device);
4415
4416        netdev->netdev_ops = &mlx5e_netdev_ops;
4417
4418        mlx5e_dcbnl_build_netdev(netdev);
4419
4420        netdev->watchdog_timeo    = 15 * HZ;
4421
4422        netdev->ethtool_ops       = &mlx5e_ethtool_ops;
4423
4424        netdev->vlan_features    |= NETIF_F_SG;
4425        netdev->vlan_features    |= NETIF_F_HW_CSUM;
4426        netdev->vlan_features    |= NETIF_F_GRO;
4427        netdev->vlan_features    |= NETIF_F_TSO;
4428        netdev->vlan_features    |= NETIF_F_TSO6;
4429        netdev->vlan_features    |= NETIF_F_RXCSUM;
4430        netdev->vlan_features    |= NETIF_F_RXHASH;
4431
4432        netdev->mpls_features    |= NETIF_F_SG;
4433        netdev->mpls_features    |= NETIF_F_HW_CSUM;
4434        netdev->mpls_features    |= NETIF_F_TSO;
4435        netdev->mpls_features    |= NETIF_F_TSO6;
4436
4437        netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_TX;
4438        netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_RX;
4439
4440        /* Tunneled LRO is not supported in the driver, and the same RQs are
4441         * shared between inner and outer TIRs, so the driver can't disable LRO
4442         * for inner TIRs while having it enabled for outer TIRs. Due to this,
4443         * block LRO altogether if the firmware declares tunneled LRO support.
4444         */
4445        if (!!MLX5_CAP_ETH(mdev, lro_cap) &&
4446            !MLX5_CAP_ETH(mdev, tunnel_lro_vxlan) &&
4447            !MLX5_CAP_ETH(mdev, tunnel_lro_gre) &&
4448            mlx5e_check_fragmented_striding_rq_cap(mdev))
4449                netdev->vlan_features    |= NETIF_F_LRO;
4450
4451        netdev->hw_features       = netdev->vlan_features;
4452        netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_TX;
4453        netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_RX;
4454        netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
4455        netdev->hw_features      |= NETIF_F_HW_VLAN_STAG_TX;
4456
4457        if (mlx5e_tunnel_any_tx_proto_supported(mdev)) {
4458                netdev->hw_enc_features |= NETIF_F_HW_CSUM;
4459                netdev->hw_enc_features |= NETIF_F_TSO;
4460                netdev->hw_enc_features |= NETIF_F_TSO6;
4461                netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL;
4462        }
4463
4464        if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) {
4465                netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL;
4466                netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL;
4467                netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL;
4468        }
4469
4470        if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) {
4471                netdev->hw_features     |= NETIF_F_GSO_GRE;
4472                netdev->hw_enc_features |= NETIF_F_GSO_GRE;
4473                netdev->gso_partial_features |= NETIF_F_GSO_GRE;
4474        }
4475
4476        if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) {
4477                netdev->hw_features |= NETIF_F_GSO_IPXIP4 |
4478                                       NETIF_F_GSO_IPXIP6;
4479                netdev->hw_enc_features |= NETIF_F_GSO_IPXIP4 |
4480                                           NETIF_F_GSO_IPXIP6;
4481                netdev->gso_partial_features |= NETIF_F_GSO_IPXIP4 |
4482                                                NETIF_F_GSO_IPXIP6;
4483        }
4484
4485        netdev->hw_features                      |= NETIF_F_GSO_PARTIAL;
4486        netdev->gso_partial_features             |= NETIF_F_GSO_UDP_L4;
4487        netdev->hw_features                      |= NETIF_F_GSO_UDP_L4;
4488        netdev->features                         |= NETIF_F_GSO_UDP_L4;
4489
4490        mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled);
4491
4492        if (fcs_supported)
4493                netdev->hw_features |= NETIF_F_RXALL;
4494
4495        if (MLX5_CAP_ETH(mdev, scatter_fcs))
4496                netdev->hw_features |= NETIF_F_RXFCS;
4497
4498        if (mlx5_qos_is_supported(mdev))
4499                netdev->hw_features |= NETIF_F_HW_TC;
4500
4501        netdev->features          = netdev->hw_features;
4502
4503        /* Defaults */
4504        if (fcs_enabled)
4505                netdev->features  &= ~NETIF_F_RXALL;
4506        netdev->features  &= ~NETIF_F_LRO;
4507        netdev->features  &= ~NETIF_F_RXFCS;
4508
4509#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
4510        if (FT_CAP(flow_modify_en) &&
4511            FT_CAP(modify_root) &&
4512            FT_CAP(identified_miss_table_mode) &&
4513            FT_CAP(flow_table_modify)) {
4514#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
4515                netdev->hw_features      |= NETIF_F_HW_TC;
4516#endif
4517#ifdef CONFIG_MLX5_EN_ARFS
4518                netdev->hw_features      |= NETIF_F_NTUPLE;
4519#endif
4520        }
4521
4522        netdev->features         |= NETIF_F_HIGHDMA;
4523        netdev->features         |= NETIF_F_HW_VLAN_STAG_FILTER;
4524
4525        netdev->priv_flags       |= IFF_UNICAST_FLT;
4526
4527        mlx5e_set_netdev_dev_addr(netdev);
4528        mlx5e_ipsec_build_netdev(priv);
4529        mlx5e_tls_build_netdev(priv);
4530}
4531
4532void mlx5e_create_q_counters(struct mlx5e_priv *priv)
4533{
4534        u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
4535        u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
4536        struct mlx5_core_dev *mdev = priv->mdev;
4537        int err;
4538
4539        MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
4540        err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out);
4541        if (!err)
4542                priv->q_counter =
4543                        MLX5_GET(alloc_q_counter_out, out, counter_set_id);
4544
4545        err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out);
4546        if (!err)
4547                priv->drop_rq_q_counter =
4548                        MLX5_GET(alloc_q_counter_out, out, counter_set_id);
4549}
4550
4551void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
4552{
4553        u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
4554
4555        MLX5_SET(dealloc_q_counter_in, in, opcode,
4556                 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
4557        if (priv->q_counter) {
4558                MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
4559                         priv->q_counter);
4560                mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in);
4561        }
4562
4563        if (priv->drop_rq_q_counter) {
4564                MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
4565                         priv->drop_rq_q_counter);
4566                mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in);
4567        }
4568}
4569
4570static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
4571                          struct net_device *netdev)
4572{
4573        struct mlx5e_priv *priv = netdev_priv(netdev);
4574        int err;
4575
4576        mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu);
4577        mlx5e_vxlan_set_netdev_info(priv);
4578
4579        mlx5e_timestamp_init(priv);
4580
4581        err = mlx5e_fs_init(priv);
4582        if (err) {
4583                mlx5_core_err(mdev, "FS initialization failed, %d\n", err);
4584                return err;
4585        }
4586
4587        err = mlx5e_ipsec_init(priv);
4588        if (err)
4589                mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
4590
4591        err = mlx5e_tls_init(priv);
4592        if (err)
4593                mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
4594
4595        mlx5e_health_create_reporters(priv);
4596        return 0;
4597}
4598
4599static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
4600{
4601        mlx5e_health_destroy_reporters(priv);
4602        mlx5e_tls_cleanup(priv);
4603        mlx5e_ipsec_cleanup(priv);
4604        mlx5e_fs_cleanup(priv);
4605}
4606
4607static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
4608{
4609        struct mlx5_core_dev *mdev = priv->mdev;
4610        enum mlx5e_rx_res_features features;
4611        struct mlx5e_lro_param lro_param;
4612        int err;
4613
4614        priv->rx_res = mlx5e_rx_res_alloc();
4615        if (!priv->rx_res)
4616                return -ENOMEM;
4617
4618        mlx5e_create_q_counters(priv);
4619
4620        err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
4621        if (err) {
4622                mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
4623                goto err_destroy_q_counters;
4624        }
4625
4626        features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP;
4627        if (priv->channels.params.tunneled_offload_en)
4628                features |= MLX5E_RX_RES_FEATURE_INNER_FT;
4629        lro_param = mlx5e_get_lro_param(&priv->channels.params);
4630        err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
4631                                priv->max_nch, priv->drop_rq.rqn, &lro_param,
4632                                priv->channels.params.num_channels);
4633        if (err)
4634                goto err_close_drop_rq;
4635
4636        err = mlx5e_create_flow_steering(priv);
4637        if (err) {
4638                mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
4639                goto err_destroy_rx_res;
4640        }
4641
4642        err = mlx5e_tc_nic_init(priv);
4643        if (err)
4644                goto err_destroy_flow_steering;
4645
4646        err = mlx5e_accel_init_rx(priv);
4647        if (err)
4648                goto err_tc_nic_cleanup;
4649
4650#ifdef CONFIG_MLX5_EN_ARFS
4651        priv->netdev->rx_cpu_rmap =  mlx5_eq_table_get_rmap(priv->mdev);
4652#endif
4653
4654        return 0;
4655
4656err_tc_nic_cleanup:
4657        mlx5e_tc_nic_cleanup(priv);
4658err_destroy_flow_steering:
4659        mlx5e_destroy_flow_steering(priv);
4660err_destroy_rx_res:
4661        mlx5e_rx_res_destroy(priv->rx_res);
4662err_close_drop_rq:
4663        mlx5e_close_drop_rq(&priv->drop_rq);
4664err_destroy_q_counters:
4665        mlx5e_destroy_q_counters(priv);
4666        mlx5e_rx_res_free(priv->rx_res);
4667        priv->rx_res = NULL;
4668        return err;
4669}
4670
4671static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
4672{
4673        mlx5e_accel_cleanup_rx(priv);
4674        mlx5e_tc_nic_cleanup(priv);
4675        mlx5e_destroy_flow_steering(priv);
4676        mlx5e_rx_res_destroy(priv->rx_res);
4677        mlx5e_close_drop_rq(&priv->drop_rq);
4678        mlx5e_destroy_q_counters(priv);
4679        mlx5e_rx_res_free(priv->rx_res);
4680        priv->rx_res = NULL;
4681}
4682
4683static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
4684{
4685        int err;
4686
4687        err = mlx5e_create_tises(priv);
4688        if (err) {
4689                mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
4690                return err;
4691        }
4692
4693        mlx5e_dcbnl_initialize(priv);
4694        return 0;
4695}
4696
4697static void mlx5e_nic_enable(struct mlx5e_priv *priv)
4698{
4699        struct net_device *netdev = priv->netdev;
4700        struct mlx5_core_dev *mdev = priv->mdev;
4701
4702        mlx5e_init_l2_addr(priv);
4703
4704        /* Marking the link as currently not needed by the Driver */
4705        if (!netif_running(netdev))
4706                mlx5e_modify_admin_state(mdev, MLX5_PORT_DOWN);
4707
4708        mlx5e_set_netdev_mtu_boundaries(priv);
4709        mlx5e_set_dev_port_mtu(priv);
4710
4711        mlx5_lag_add_netdev(mdev, netdev);
4712
4713        mlx5e_enable_async_events(priv);
4714        mlx5e_enable_blocking_events(priv);
4715        if (mlx5e_monitor_counter_supported(priv))
4716                mlx5e_monitor_counter_init(priv);
4717
4718        mlx5e_hv_vhca_stats_create(priv);
4719        if (netdev->reg_state != NETREG_REGISTERED)
4720                return;
4721        mlx5e_dcbnl_init_app(priv);
4722
4723        mlx5e_nic_set_rx_mode(priv);
4724
4725        rtnl_lock();
4726        if (netif_running(netdev))
4727                mlx5e_open(netdev);
4728        udp_tunnel_nic_reset_ntf(priv->netdev);
4729        netif_device_attach(netdev);
4730        rtnl_unlock();
4731}
4732
4733static void mlx5e_nic_disable(struct mlx5e_priv *priv)
4734{
4735        struct mlx5_core_dev *mdev = priv->mdev;
4736
4737        if (priv->netdev->reg_state == NETREG_REGISTERED)
4738                mlx5e_dcbnl_delete_app(priv);
4739
4740        rtnl_lock();
4741        if (netif_running(priv->netdev))
4742                mlx5e_close(priv->netdev);
4743        netif_device_detach(priv->netdev);
4744        rtnl_unlock();
4745
4746        mlx5e_nic_set_rx_mode(priv);
4747
4748        mlx5e_hv_vhca_stats_destroy(priv);
4749        if (mlx5e_monitor_counter_supported(priv))
4750                mlx5e_monitor_counter_cleanup(priv);
4751
4752        mlx5e_disable_blocking_events(priv);
4753        if (priv->en_trap) {
4754                mlx5e_deactivate_trap(priv);
4755                mlx5e_close_trap(priv->en_trap);
4756                priv->en_trap = NULL;
4757        }
4758        mlx5e_disable_async_events(priv);
4759        mlx5_lag_remove_netdev(mdev, priv->netdev);
4760        mlx5_vxlan_reset_to_default(mdev->vxlan);
4761}
4762
4763int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
4764{
4765        return mlx5e_refresh_tirs(priv, false, false);
4766}
4767
4768static const struct mlx5e_profile mlx5e_nic_profile = {
4769        .init              = mlx5e_nic_init,
4770        .cleanup           = mlx5e_nic_cleanup,
4771        .init_rx           = mlx5e_init_nic_rx,
4772        .cleanup_rx        = mlx5e_cleanup_nic_rx,
4773        .init_tx           = mlx5e_init_nic_tx,
4774        .cleanup_tx        = mlx5e_cleanup_nic_tx,
4775        .enable            = mlx5e_nic_enable,
4776        .disable           = mlx5e_nic_disable,
4777        .update_rx         = mlx5e_update_nic_rx,
4778        .update_stats      = mlx5e_stats_update_ndo_stats,
4779        .update_carrier    = mlx5e_update_carrier,
4780        .rx_handlers       = &mlx5e_rx_handlers_nic,
4781        .max_tc            = MLX5E_MAX_NUM_TC,
4782        .rq_groups         = MLX5E_NUM_RQ_GROUPS(XSK),
4783        .stats_grps        = mlx5e_nic_stats_grps,
4784        .stats_grps_num    = mlx5e_nic_stats_grps_num,
4785        .rx_ptp_support    = true,
4786};
4787
4788static unsigned int
4789mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev,
4790                   const struct mlx5e_profile *profile)
4791
4792{
4793        unsigned int max_nch, tmp;
4794
4795        /* core resources */
4796        max_nch = mlx5e_get_max_num_channels(mdev);
4797
4798        /* netdev rx queues */
4799        tmp = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1);
4800        max_nch = min_t(unsigned int, max_nch, tmp);
4801
4802        /* netdev tx queues */
4803        tmp = netdev->num_tx_queues;
4804        if (mlx5_qos_is_supported(mdev))
4805                tmp -= mlx5e_qos_max_leaf_nodes(mdev);
4806        if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn))
4807                tmp -= profile->max_tc;
4808        tmp = tmp / profile->max_tc;
4809        max_nch = min_t(unsigned int, max_nch, tmp);
4810
4811        return max_nch;
4812}
4813
4814/* mlx5e generic netdev management API (move to en_common.c) */
4815int mlx5e_priv_init(struct mlx5e_priv *priv,
4816                    const struct mlx5e_profile *profile,
4817                    struct net_device *netdev,
4818                    struct mlx5_core_dev *mdev)
4819{
4820        /* priv init */
4821        priv->mdev        = mdev;
4822        priv->netdev      = netdev;
4823        priv->msglevel    = MLX5E_MSG_LEVEL;
4824        priv->max_nch     = mlx5e_calc_max_nch(mdev, netdev, profile);
4825        priv->stats_nch   = priv->max_nch;
4826        priv->max_opened_tc = 1;
4827
4828        if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL))
4829                return -ENOMEM;
4830
4831        mutex_init(&priv->state_lock);
4832        hash_init(priv->htb.qos_tc2node);
4833        INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
4834        INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
4835        INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
4836        INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
4837
4838        priv->wq = create_singlethread_workqueue("mlx5e");
4839        if (!priv->wq)
4840                goto err_free_cpumask;
4841
4842        return 0;
4843
4844err_free_cpumask:
4845        free_cpumask_var(priv->scratchpad.cpumask);
4846
4847        return -ENOMEM;
4848}
4849
4850void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
4851{
4852        int i;
4853
4854        /* bail if change profile failed and also rollback failed */
4855        if (!priv->mdev)
4856                return;
4857
4858        destroy_workqueue(priv->wq);
4859        free_cpumask_var(priv->scratchpad.cpumask);
4860
4861        for (i = 0; i < priv->htb.max_qos_sqs; i++)
4862                kfree(priv->htb.qos_sq_stats[i]);
4863        kvfree(priv->htb.qos_sq_stats);
4864
4865        memset(priv, 0, sizeof(*priv));
4866}
4867
4868struct net_device *
4869mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile,
4870                    unsigned int txqs, unsigned int rxqs)
4871{
4872        struct net_device *netdev;
4873        int err;
4874
4875        netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), txqs, rxqs);
4876        if (!netdev) {
4877                mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
4878                return NULL;
4879        }
4880
4881        err = mlx5e_priv_init(netdev_priv(netdev), profile, netdev, mdev);
4882        if (err) {
4883                mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err);
4884                goto err_free_netdev;
4885        }
4886
4887        netif_carrier_off(netdev);
4888        dev_net_set(netdev, mlx5_core_net(mdev));
4889
4890        return netdev;
4891
4892err_free_netdev:
4893        free_netdev(netdev);
4894
4895        return NULL;
4896}
4897
4898static void mlx5e_update_features(struct net_device *netdev)
4899{
4900        if (netdev->reg_state != NETREG_REGISTERED)
4901                return; /* features will be updated on netdev registration */
4902
4903        rtnl_lock();
4904        netdev_update_features(netdev);
4905        rtnl_unlock();
4906}
4907
4908static void mlx5e_reset_channels(struct net_device *netdev)
4909{
4910        netdev_reset_tc(netdev);
4911}
4912
4913int mlx5e_attach_netdev(struct mlx5e_priv *priv)
4914{
4915        const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED;
4916        const struct mlx5e_profile *profile = priv->profile;
4917        int max_nch;
4918        int err;
4919
4920        clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
4921
4922        /* max number of channels may have changed */
4923        max_nch = mlx5e_calc_max_nch(priv->mdev, priv->netdev, profile);
4924        if (priv->channels.params.num_channels > max_nch) {
4925                mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch);
4926                /* Reducing the number of channels - RXFH has to be reset, and
4927                 * mlx5e_num_channels_changed below will build the RQT.
4928                 */
4929                priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED;
4930                priv->channels.params.num_channels = max_nch;
4931                if (priv->channels.params.mqprio.mode == TC_MQPRIO_MODE_CHANNEL) {
4932                        mlx5_core_warn(priv->mdev, "MLX5E: Disabling MQPRIO channel mode\n");
4933                        mlx5e_params_mqprio_reset(&priv->channels.params);
4934                }
4935        }
4936        if (max_nch != priv->max_nch) {
4937                mlx5_core_warn(priv->mdev,
4938                               "MLX5E: Updating max number of channels from %u to %u\n",
4939                               priv->max_nch, max_nch);
4940                priv->max_nch = max_nch;
4941        }
4942
4943        /* 1. Set the real number of queues in the kernel the first time.
4944         * 2. Set our default XPS cpumask.
4945         * 3. Build the RQT.
4946         *
4947         * rtnl_lock is required by netif_set_real_num_*_queues in case the
4948         * netdev has been registered by this point (if this function was called
4949         * in the reload or resume flow).
4950         */
4951        if (take_rtnl)
4952                rtnl_lock();
4953        err = mlx5e_num_channels_changed(priv);
4954        if (take_rtnl)
4955                rtnl_unlock();
4956        if (err)
4957                goto out;
4958
4959        err = profile->init_tx(priv);
4960        if (err)
4961                goto out;
4962
4963        err = profile->init_rx(priv);
4964        if (err)
4965                goto err_cleanup_tx;
4966
4967        if (profile->enable)
4968                profile->enable(priv);
4969
4970        mlx5e_update_features(priv->netdev);
4971
4972        return 0;
4973
4974err_cleanup_tx:
4975        profile->cleanup_tx(priv);
4976
4977out:
4978        mlx5e_reset_channels(priv->netdev);
4979        set_bit(MLX5E_STATE_DESTROYING, &priv->state);
4980        cancel_work_sync(&priv->update_stats_work);
4981        return err;
4982}
4983
4984void mlx5e_detach_netdev(struct mlx5e_priv *priv)
4985{
4986        const struct mlx5e_profile *profile = priv->profile;
4987
4988        set_bit(MLX5E_STATE_DESTROYING, &priv->state);
4989
4990        if (profile->disable)
4991                profile->disable(priv);
4992        flush_workqueue(priv->wq);
4993
4994        profile->cleanup_rx(priv);
4995        profile->cleanup_tx(priv);
4996        mlx5e_reset_channels(priv->netdev);
4997        cancel_work_sync(&priv->update_stats_work);
4998}
4999
5000static int
5001mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev,
5002                            const struct mlx5e_profile *new_profile, void *new_ppriv)
5003{
5004        struct mlx5e_priv *priv = netdev_priv(netdev);
5005        int err;
5006
5007        err = mlx5e_priv_init(priv, new_profile, netdev, mdev);
5008        if (err) {
5009                mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err);
5010                return err;
5011        }
5012        netif_carrier_off(netdev);
5013        priv->profile = new_profile;
5014        priv->ppriv = new_ppriv;
5015        err = new_profile->init(priv->mdev, priv->netdev);
5016        if (err)
5017                goto priv_cleanup;
5018        err = mlx5e_attach_netdev(priv);
5019        if (err)
5020                goto profile_cleanup;
5021        return err;
5022
5023profile_cleanup:
5024        new_profile->cleanup(priv);
5025priv_cleanup:
5026        mlx5e_priv_cleanup(priv);
5027        return err;
5028}
5029
5030int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
5031                                const struct mlx5e_profile *new_profile, void *new_ppriv)
5032{
5033        const struct mlx5e_profile *orig_profile = priv->profile;
5034        struct net_device *netdev = priv->netdev;
5035        struct mlx5_core_dev *mdev = priv->mdev;
5036        void *orig_ppriv = priv->ppriv;
5037        int err, rollback_err;
5038
5039        /* cleanup old profile */
5040        mlx5e_detach_netdev(priv);
5041        priv->profile->cleanup(priv);
5042        mlx5e_priv_cleanup(priv);
5043
5044        err = mlx5e_netdev_attach_profile(netdev, mdev, new_profile, new_ppriv);
5045        if (err) { /* roll back to original profile */
5046                netdev_warn(netdev, "%s: new profile init failed, %d\n", __func__, err);
5047                goto rollback;
5048        }
5049
5050        return 0;
5051
5052rollback:
5053        rollback_err = mlx5e_netdev_attach_profile(netdev, mdev, orig_profile, orig_ppriv);
5054        if (rollback_err)
5055                netdev_err(netdev, "%s: failed to rollback to orig profile, %d\n",
5056                           __func__, rollback_err);
5057        return err;
5058}
5059
5060void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv)
5061{
5062        mlx5e_netdev_change_profile(priv, &mlx5e_nic_profile, NULL);
5063}
5064
5065void mlx5e_destroy_netdev(struct mlx5e_priv *priv)
5066{
5067        struct net_device *netdev = priv->netdev;
5068
5069        mlx5e_priv_cleanup(priv);
5070        free_netdev(netdev);
5071}
5072
5073static int mlx5e_resume(struct auxiliary_device *adev)
5074{
5075        struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
5076        struct mlx5e_priv *priv = dev_get_drvdata(&adev->dev);
5077        struct net_device *netdev = priv->netdev;
5078        struct mlx5_core_dev *mdev = edev->mdev;
5079        int err;
5080
5081        if (netif_device_present(netdev))
5082                return 0;
5083
5084        err = mlx5e_create_mdev_resources(mdev);
5085        if (err)
5086                return err;
5087
5088        err = mlx5e_attach_netdev(priv);
5089        if (err) {
5090                mlx5e_destroy_mdev_resources(mdev);
5091                return err;
5092        }
5093
5094        return 0;
5095}
5096
5097static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
5098{
5099        struct mlx5e_priv *priv = dev_get_drvdata(&adev->dev);
5100        struct net_device *netdev = priv->netdev;
5101        struct mlx5_core_dev *mdev = priv->mdev;
5102
5103        if (!netif_device_present(netdev))
5104                return -ENODEV;
5105
5106        mlx5e_detach_netdev(priv);
5107        mlx5e_destroy_mdev_resources(mdev);
5108        return 0;
5109}
5110
5111static int mlx5e_probe(struct auxiliary_device *adev,
5112                       const struct auxiliary_device_id *id)
5113{
5114        struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
5115        const struct mlx5e_profile *profile = &mlx5e_nic_profile;
5116        struct mlx5_core_dev *mdev = edev->mdev;
5117        struct net_device *netdev;
5118        pm_message_t state = {};
5119        unsigned int txqs, rxqs, ptp_txqs = 0;
5120        struct mlx5e_priv *priv;
5121        int qos_sqs = 0;
5122        int err;
5123        int nch;
5124
5125        if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn))
5126                ptp_txqs = profile->max_tc;
5127
5128        if (mlx5_qos_is_supported(mdev))
5129                qos_sqs = mlx5e_qos_max_leaf_nodes(mdev);
5130
5131        nch = mlx5e_get_max_num_channels(mdev);
5132        txqs = nch * profile->max_tc + ptp_txqs + qos_sqs;
5133        rxqs = nch * profile->rq_groups;
5134        netdev = mlx5e_create_netdev(mdev, profile, txqs, rxqs);
5135        if (!netdev) {
5136                mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
5137                return -ENOMEM;
5138        }
5139
5140        mlx5e_build_nic_netdev(netdev);
5141
5142        priv = netdev_priv(netdev);
5143        dev_set_drvdata(&adev->dev, priv);
5144
5145        priv->profile = profile;
5146        priv->ppriv = NULL;
5147
5148        err = mlx5e_devlink_port_register(priv);
5149        if (err) {
5150                mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err);
5151                goto err_destroy_netdev;
5152        }
5153
5154        err = profile->init(mdev, netdev);
5155        if (err) {
5156                mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err);
5157                goto err_devlink_cleanup;
5158        }
5159
5160        err = mlx5e_resume(adev);
5161        if (err) {
5162                mlx5_core_err(mdev, "mlx5e_resume failed, %d\n", err);
5163                goto err_profile_cleanup;
5164        }
5165
5166        err = register_netdev(netdev);
5167        if (err) {
5168                mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
5169                goto err_resume;
5170        }
5171
5172        mlx5e_devlink_port_type_eth_set(priv);
5173
5174        mlx5e_dcbnl_init_app(priv);
5175        mlx5_uplink_netdev_set(mdev, netdev);
5176        return 0;
5177
5178err_resume:
5179        mlx5e_suspend(adev, state);
5180err_profile_cleanup:
5181        profile->cleanup(priv);
5182err_devlink_cleanup:
5183        mlx5e_devlink_port_unregister(priv);
5184err_destroy_netdev:
5185        mlx5e_destroy_netdev(priv);
5186        return err;
5187}
5188
5189static void mlx5e_remove(struct auxiliary_device *adev)
5190{
5191        struct mlx5e_priv *priv = dev_get_drvdata(&adev->dev);
5192        pm_message_t state = {};
5193
5194        mlx5e_dcbnl_delete_app(priv);
5195        unregister_netdev(priv->netdev);
5196        mlx5e_suspend(adev, state);
5197        priv->profile->cleanup(priv);
5198        mlx5e_devlink_port_unregister(priv);
5199        mlx5e_destroy_netdev(priv);
5200}
5201
5202static const struct auxiliary_device_id mlx5e_id_table[] = {
5203        { .name = MLX5_ADEV_NAME ".eth", },
5204        {},
5205};
5206
5207MODULE_DEVICE_TABLE(auxiliary, mlx5e_id_table);
5208
5209static struct auxiliary_driver mlx5e_driver = {
5210        .name = "eth",
5211        .probe = mlx5e_probe,
5212        .remove = mlx5e_remove,
5213        .suspend = mlx5e_suspend,
5214        .resume = mlx5e_resume,
5215        .id_table = mlx5e_id_table,
5216};
5217
5218int mlx5e_init(void)
5219{
5220        int ret;
5221
5222        mlx5e_ipsec_build_inverse_table();
5223        mlx5e_build_ptys2ethtool_map();
5224        ret = auxiliary_driver_register(&mlx5e_driver);
5225        if (ret)
5226                return ret;
5227
5228        ret = mlx5e_rep_init();
5229        if (ret)
5230                auxiliary_driver_unregister(&mlx5e_driver);
5231        return ret;
5232}
5233
5234void mlx5e_cleanup(void)
5235{
5236        mlx5e_rep_cleanup();
5237        auxiliary_driver_unregister(&mlx5e_driver);
5238}
5239