linux/drivers/net/ethernet/mellanox/mlx5/core/eq.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright (c) 2013-2021, Mellanox Technologies inc.  All rights reserved.
   4 */
   5
   6#include <linux/interrupt.h>
   7#include <linux/notifier.h>
   8#include <linux/module.h>
   9#include <linux/mlx5/driver.h>
  10#include <linux/mlx5/vport.h>
  11#include <linux/mlx5/eq.h>
  12#ifdef CONFIG_RFS_ACCEL
  13#include <linux/cpu_rmap.h>
  14#endif
  15#include "mlx5_core.h"
  16#include "lib/eq.h"
  17#include "fpga/core.h"
  18#include "eswitch.h"
  19#include "lib/clock.h"
  20#include "diag/fw_tracer.h"
  21#include "mlx5_irq.h"
  22
  23enum {
  24        MLX5_EQE_OWNER_INIT_VAL = 0x1,
  25};
  26
  27enum {
  28        MLX5_EQ_STATE_ARMED             = 0x9,
  29        MLX5_EQ_STATE_FIRED             = 0xa,
  30        MLX5_EQ_STATE_ALWAYS_ARMED      = 0xb,
  31};
  32
  33enum {
  34        MLX5_EQ_DOORBEL_OFFSET  = 0x40,
  35};
  36
  37/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update
  38 * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is
  39 * used to set the EQ size, budget must be smaller than the EQ size.
  40 */
  41enum {
  42        MLX5_EQ_POLLING_BUDGET  = 128,
  43};
  44
  45static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE);
  46
  47struct mlx5_eq_table {
  48        struct list_head        comp_eqs_list;
  49        struct mlx5_eq_async    pages_eq;
  50        struct mlx5_eq_async    cmd_eq;
  51        struct mlx5_eq_async    async_eq;
  52
  53        struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];
  54
  55        /* Since CQ DB is stored in async_eq */
  56        struct mlx5_nb          cq_err_nb;
  57
  58        struct mutex            lock; /* sync async eqs creations */
  59        int                     num_comp_eqs;
  60        struct mlx5_irq_table   *irq_table;
  61#ifdef CONFIG_RFS_ACCEL
  62        struct cpu_rmap         *rmap;
  63#endif
  64};
  65
  66#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG)           | \
  67                               (1ull << MLX5_EVENT_TYPE_COMM_EST)           | \
  68                               (1ull << MLX5_EVENT_TYPE_SQ_DRAINED)         | \
  69                               (1ull << MLX5_EVENT_TYPE_CQ_ERROR)           | \
  70                               (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR)     | \
  71                               (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED)    | \
  72                               (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
  73                               (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
  74                               (1ull << MLX5_EVENT_TYPE_PORT_CHANGE)        | \
  75                               (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
  76                               (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE)       | \
  77                               (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
  78
  79static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
  80{
  81        u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {};
  82
  83        MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
  84        MLX5_SET(destroy_eq_in, in, eq_number, eqn);
  85        return mlx5_cmd_exec_in(dev, destroy_eq, in);
  86}
  87
  88/* caller must eventually call mlx5_cq_put on the returned cq */
  89static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
  90{
  91        struct mlx5_cq_table *table = &eq->cq_table;
  92        struct mlx5_core_cq *cq = NULL;
  93
  94        rcu_read_lock();
  95        cq = radix_tree_lookup(&table->tree, cqn);
  96        if (likely(cq))
  97                mlx5_cq_hold(cq);
  98        rcu_read_unlock();
  99
 100        return cq;
 101}
 102
 103static int mlx5_eq_comp_int(struct notifier_block *nb,
 104                            __always_unused unsigned long action,
 105                            __always_unused void *data)
 106{
 107        struct mlx5_eq_comp *eq_comp =
 108                container_of(nb, struct mlx5_eq_comp, irq_nb);
 109        struct mlx5_eq *eq = &eq_comp->core;
 110        struct mlx5_eqe *eqe;
 111        int num_eqes = 0;
 112        u32 cqn = -1;
 113
 114        eqe = next_eqe_sw(eq);
 115        if (!eqe)
 116                goto out;
 117
 118        do {
 119                struct mlx5_core_cq *cq;
 120
 121                /* Make sure we read EQ entry contents after we've
 122                 * checked the ownership bit.
 123                 */
 124                dma_rmb();
 125                /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */
 126                cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
 127
 128                cq = mlx5_eq_cq_get(eq, cqn);
 129                if (likely(cq)) {
 130                        ++cq->arm_sn;
 131                        cq->comp(cq, eqe);
 132                        mlx5_cq_put(cq);
 133                } else {
 134                        dev_dbg_ratelimited(eq->dev->device,
 135                                            "Completion event for bogus CQ 0x%x\n", cqn);
 136                }
 137
 138                ++eq->cons_index;
 139
 140        } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
 141
 142out:
 143        eq_update_ci(eq, 1);
 144
 145        if (cqn != -1)
 146                tasklet_schedule(&eq_comp->tasklet_ctx.task);
 147
 148        return 0;
 149}
 150
 151/* Some architectures don't latch interrupts when they are disabled, so using
 152 * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
 153 * avoid losing them.  It is not recommended to use it, unless this is the last
 154 * resort.
 155 */
 156u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
 157{
 158        u32 count_eqe;
 159
 160        disable_irq(eq->core.irqn);
 161        count_eqe = eq->core.cons_index;
 162        mlx5_eq_comp_int(&eq->irq_nb, 0, NULL);
 163        count_eqe = eq->core.cons_index - count_eqe;
 164        enable_irq(eq->core.irqn);
 165
 166        return count_eqe;
 167}
 168
 169static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, bool recovery,
 170                                   unsigned long *flags)
 171        __acquires(&eq->lock)
 172{
 173        if (!recovery)
 174                spin_lock(&eq->lock);
 175        else
 176                spin_lock_irqsave(&eq->lock, *flags);
 177}
 178
 179static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, bool recovery,
 180                                     unsigned long *flags)
 181        __releases(&eq->lock)
 182{
 183        if (!recovery)
 184                spin_unlock(&eq->lock);
 185        else
 186                spin_unlock_irqrestore(&eq->lock, *flags);
 187}
 188
 189enum async_eq_nb_action {
 190        ASYNC_EQ_IRQ_HANDLER = 0,
 191        ASYNC_EQ_RECOVER = 1,
 192};
 193
 194static int mlx5_eq_async_int(struct notifier_block *nb,
 195                             unsigned long action, void *data)
 196{
 197        struct mlx5_eq_async *eq_async =
 198                container_of(nb, struct mlx5_eq_async, irq_nb);
 199        struct mlx5_eq *eq = &eq_async->core;
 200        struct mlx5_eq_table *eqt;
 201        struct mlx5_core_dev *dev;
 202        struct mlx5_eqe *eqe;
 203        unsigned long flags;
 204        int num_eqes = 0;
 205        bool recovery;
 206
 207        dev = eq->dev;
 208        eqt = dev->priv.eq_table;
 209
 210        recovery = action == ASYNC_EQ_RECOVER;
 211        mlx5_eq_async_int_lock(eq_async, recovery, &flags);
 212
 213        eqe = next_eqe_sw(eq);
 214        if (!eqe)
 215                goto out;
 216
 217        do {
 218                /*
 219                 * Make sure we read EQ entry contents after we've
 220                 * checked the ownership bit.
 221                 */
 222                dma_rmb();
 223
 224                atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe);
 225                atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);
 226
 227                ++eq->cons_index;
 228
 229        } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
 230
 231out:
 232        eq_update_ci(eq, 1);
 233        mlx5_eq_async_int_unlock(eq_async, recovery, &flags);
 234
 235        return unlikely(recovery) ? num_eqes : 0;
 236}
 237
 238void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev)
 239{
 240        struct mlx5_eq_async *eq = &dev->priv.eq_table->cmd_eq;
 241        int eqes;
 242
 243        eqes = mlx5_eq_async_int(&eq->irq_nb, ASYNC_EQ_RECOVER, NULL);
 244        if (eqes)
 245                mlx5_core_warn(dev, "Recovered %d EQEs on cmd_eq\n", eqes);
 246}
 247
 248static void init_eq_buf(struct mlx5_eq *eq)
 249{
 250        struct mlx5_eqe *eqe;
 251        int i;
 252
 253        for (i = 0; i < eq_get_size(eq); i++) {
 254                eqe = get_eqe(eq, i);
 255                eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
 256        }
 257}
 258
 259static int
 260create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 261              struct mlx5_eq_param *param)
 262{
 263        u8 log_eq_size = order_base_2(param->nent + MLX5_NUM_SPARE_EQE);
 264        struct mlx5_cq_table *cq_table = &eq->cq_table;
 265        u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
 266        u8 log_eq_stride = ilog2(MLX5_EQE_SIZE);
 267        struct mlx5_priv *priv = &dev->priv;
 268        u16 vecidx = param->irq_index;
 269        __be64 *pas;
 270        void *eqc;
 271        int inlen;
 272        u32 *in;
 273        int err;
 274        int i;
 275
 276        /* Init CQ table */
 277        memset(cq_table, 0, sizeof(*cq_table));
 278        spin_lock_init(&cq_table->lock);
 279        INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
 280
 281        eq->cons_index = 0;
 282
 283        err = mlx5_frag_buf_alloc_node(dev, wq_get_byte_sz(log_eq_size, log_eq_stride),
 284                                       &eq->frag_buf, dev->priv.numa_node);
 285        if (err)
 286                return err;
 287
 288        mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc);
 289        init_eq_buf(eq);
 290
 291        eq->irq = mlx5_irq_request(dev, vecidx, param->affinity);
 292        if (IS_ERR(eq->irq)) {
 293                err = PTR_ERR(eq->irq);
 294                goto err_buf;
 295        }
 296
 297        vecidx = mlx5_irq_get_index(eq->irq);
 298        inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
 299                MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
 300
 301        in = kvzalloc(inlen, GFP_KERNEL);
 302        if (!in) {
 303                err = -ENOMEM;
 304                goto err_irq;
 305        }
 306
 307        pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
 308        mlx5_fill_page_frag_array(&eq->frag_buf, pas);
 309
 310        MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
 311        if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx))
 312                MLX5_SET(create_eq_in, in, uid, MLX5_SHARED_RESOURCE_UID);
 313
 314        for (i = 0; i < 4; i++)
 315                MLX5_ARRAY_SET64(create_eq_in, in, event_bitmask, i,
 316                                 param->mask[i]);
 317
 318        eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
 319        MLX5_SET(eqc, eqc, log_eq_size, eq->fbc.log_sz);
 320        MLX5_SET(eqc, eqc, uar_page, priv->uar->index);
 321        MLX5_SET(eqc, eqc, intr, vecidx);
 322        MLX5_SET(eqc, eqc, log_page_size,
 323                 eq->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 324
 325        err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 326        if (err)
 327                goto err_in;
 328
 329        eq->vecidx = vecidx;
 330        eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
 331        eq->irqn = pci_irq_vector(dev->pdev, vecidx);
 332        eq->dev = dev;
 333        eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
 334
 335        err = mlx5_debug_eq_add(dev, eq);
 336        if (err)
 337                goto err_eq;
 338
 339        kvfree(in);
 340        return 0;
 341
 342err_eq:
 343        mlx5_cmd_destroy_eq(dev, eq->eqn);
 344
 345err_in:
 346        kvfree(in);
 347
 348err_irq:
 349        mlx5_irq_release(eq->irq);
 350err_buf:
 351        mlx5_frag_buf_free(dev, &eq->frag_buf);
 352        return err;
 353}
 354
 355/**
 356 * mlx5_eq_enable - Enable EQ for receiving EQEs
 357 * @dev : Device which owns the eq
 358 * @eq  : EQ to enable
 359 * @nb  : Notifier call block
 360 *
 361 * Must be called after EQ is created in device.
 362 *
 363 * @return: 0 if no error
 364 */
 365int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 366                   struct notifier_block *nb)
 367{
 368        int err;
 369
 370        err = mlx5_irq_attach_nb(eq->irq, nb);
 371        if (!err)
 372                eq_update_ci(eq, 1);
 373
 374        return err;
 375}
 376EXPORT_SYMBOL(mlx5_eq_enable);
 377
 378/**
 379 * mlx5_eq_disable - Disable EQ for receiving EQEs
 380 * @dev : Device which owns the eq
 381 * @eq  : EQ to disable
 382 * @nb  : Notifier call block
 383 *
 384 * Must be called before EQ is destroyed.
 385 */
 386void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 387                     struct notifier_block *nb)
 388{
 389        mlx5_irq_detach_nb(eq->irq, nb);
 390}
 391EXPORT_SYMBOL(mlx5_eq_disable);
 392
 393static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 394{
 395        int err;
 396
 397        mlx5_debug_eq_remove(dev, eq);
 398
 399        err = mlx5_cmd_destroy_eq(dev, eq->eqn);
 400        if (err)
 401                mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
 402                               eq->eqn);
 403        mlx5_irq_release(eq->irq);
 404
 405        mlx5_frag_buf_free(dev, &eq->frag_buf);
 406        return err;
 407}
 408
 409int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
 410{
 411        struct mlx5_cq_table *table = &eq->cq_table;
 412        int err;
 413
 414        spin_lock(&table->lock);
 415        err = radix_tree_insert(&table->tree, cq->cqn, cq);
 416        spin_unlock(&table->lock);
 417
 418        return err;
 419}
 420
 421void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
 422{
 423        struct mlx5_cq_table *table = &eq->cq_table;
 424        struct mlx5_core_cq *tmp;
 425
 426        spin_lock(&table->lock);
 427        tmp = radix_tree_delete(&table->tree, cq->cqn);
 428        spin_unlock(&table->lock);
 429
 430        if (!tmp) {
 431                mlx5_core_dbg(eq->dev, "cq 0x%x not found in eq 0x%x tree\n",
 432                              eq->eqn, cq->cqn);
 433                return;
 434        }
 435
 436        if (tmp != cq)
 437                mlx5_core_dbg(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n",
 438                              eq->eqn, cq->cqn);
 439}
 440
 441int mlx5_eq_table_init(struct mlx5_core_dev *dev)
 442{
 443        struct mlx5_eq_table *eq_table;
 444        int i;
 445
 446        eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL);
 447        if (!eq_table)
 448                return -ENOMEM;
 449
 450        dev->priv.eq_table = eq_table;
 451
 452        mlx5_eq_debugfs_init(dev);
 453
 454        mutex_init(&eq_table->lock);
 455        for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
 456                ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
 457
 458        eq_table->irq_table = mlx5_irq_table_get(dev);
 459        return 0;
 460}
 461
 462void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
 463{
 464        mlx5_eq_debugfs_cleanup(dev);
 465        kvfree(dev->priv.eq_table);
 466}
 467
 468/* Async EQs */
 469
 470static int create_async_eq(struct mlx5_core_dev *dev,
 471                           struct mlx5_eq *eq, struct mlx5_eq_param *param)
 472{
 473        struct mlx5_eq_table *eq_table = dev->priv.eq_table;
 474        int err;
 475
 476        mutex_lock(&eq_table->lock);
 477        err = create_map_eq(dev, eq, param);
 478        mutex_unlock(&eq_table->lock);
 479        return err;
 480}
 481
 482static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 483{
 484        struct mlx5_eq_table *eq_table = dev->priv.eq_table;
 485        int err;
 486
 487        mutex_lock(&eq_table->lock);
 488        err = destroy_unmap_eq(dev, eq);
 489        mutex_unlock(&eq_table->lock);
 490        return err;
 491}
 492
 493static int cq_err_event_notifier(struct notifier_block *nb,
 494                                 unsigned long type, void *data)
 495{
 496        struct mlx5_eq_table *eqt;
 497        struct mlx5_core_cq *cq;
 498        struct mlx5_eqe *eqe;
 499        struct mlx5_eq *eq;
 500        u32 cqn;
 501
 502        /* type == MLX5_EVENT_TYPE_CQ_ERROR */
 503
 504        eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
 505        eq  = &eqt->async_eq.core;
 506        eqe = data;
 507
 508        cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
 509        mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
 510                       cqn, eqe->data.cq_err.syndrome);
 511
 512        cq = mlx5_eq_cq_get(eq, cqn);
 513        if (unlikely(!cq)) {
 514                mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
 515                return NOTIFY_OK;
 516        }
 517
 518        if (cq->event)
 519                cq->event(cq, type);
 520
 521        mlx5_cq_put(cq);
 522
 523        return NOTIFY_OK;
 524}
 525
 526static void gather_user_async_events(struct mlx5_core_dev *dev, u64 mask[4])
 527{
 528        __be64 *user_unaffiliated_events;
 529        __be64 *user_affiliated_events;
 530        int i;
 531
 532        user_affiliated_events =
 533                MLX5_CAP_DEV_EVENT(dev, user_affiliated_events);
 534        user_unaffiliated_events =
 535                MLX5_CAP_DEV_EVENT(dev, user_unaffiliated_events);
 536
 537        for (i = 0; i < 4; i++)
 538                mask[i] |= be64_to_cpu(user_affiliated_events[i] |
 539                                       user_unaffiliated_events[i]);
 540}
 541
 542static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4])
 543{
 544        u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
 545
 546        if (MLX5_VPORT_MANAGER(dev))
 547                async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
 548
 549        if (MLX5_CAP_GEN(dev, general_notification_event))
 550                async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT);
 551
 552        if (MLX5_CAP_GEN(dev, port_module_event))
 553                async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT);
 554        else
 555                mlx5_core_dbg(dev, "port_module_event is not set\n");
 556
 557        if (MLX5_PPS_CAP(dev))
 558                async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT);
 559
 560        if (MLX5_CAP_GEN(dev, fpga))
 561                async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR) |
 562                                    (1ull << MLX5_EVENT_TYPE_FPGA_QP_ERROR);
 563        if (MLX5_CAP_GEN_MAX(dev, dct))
 564                async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED);
 565
 566        if (MLX5_CAP_GEN(dev, temp_warn_event))
 567                async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT);
 568
 569        if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
 570                async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
 571
 572        if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters))
 573                async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER);
 574
 575        if (mlx5_eswitch_is_funcs_handler(dev))
 576                async_event_mask |=
 577                        (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED);
 578
 579        if (MLX5_CAP_GEN_MAX(dev, vhca_state))
 580                async_event_mask |= (1ull << MLX5_EVENT_TYPE_VHCA_STATE_CHANGE);
 581
 582        mask[0] = async_event_mask;
 583
 584        if (MLX5_CAP_GEN(dev, event_cap))
 585                gather_user_async_events(dev, mask);
 586}
 587
 588static int
 589setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq,
 590               struct mlx5_eq_param *param, const char *name)
 591{
 592        int err;
 593
 594        eq->irq_nb.notifier_call = mlx5_eq_async_int;
 595        spin_lock_init(&eq->lock);
 596        if (!zalloc_cpumask_var(&param->affinity, GFP_KERNEL))
 597                return -ENOMEM;
 598
 599        err = create_async_eq(dev, &eq->core, param);
 600        free_cpumask_var(param->affinity);
 601        if (err) {
 602                mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err);
 603                return err;
 604        }
 605        err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
 606        if (err) {
 607                mlx5_core_warn(dev, "failed to enable %s EQ %d\n", name, err);
 608                destroy_async_eq(dev, &eq->core);
 609        }
 610        return err;
 611}
 612
 613static void cleanup_async_eq(struct mlx5_core_dev *dev,
 614                             struct mlx5_eq_async *eq, const char *name)
 615{
 616        int err;
 617
 618        mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
 619        err = destroy_async_eq(dev, &eq->core);
 620        if (err)
 621                mlx5_core_err(dev, "failed to destroy %s eq, err(%d)\n",
 622                              name, err);
 623}
 624
 625static int create_async_eqs(struct mlx5_core_dev *dev)
 626{
 627        struct mlx5_eq_table *table = dev->priv.eq_table;
 628        struct mlx5_eq_param param = {};
 629        int err;
 630
 631        MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
 632        mlx5_eq_notifier_register(dev, &table->cq_err_nb);
 633
 634        param = (struct mlx5_eq_param) {
 635                .nent = MLX5_NUM_CMD_EQE,
 636                .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
 637        };
 638        mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ);
 639        err = setup_async_eq(dev, &table->cmd_eq, &param, "cmd");
 640        if (err)
 641                goto err1;
 642
 643        mlx5_cmd_use_events(dev);
 644        mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
 645
 646        param = (struct mlx5_eq_param) {
 647                .nent = MLX5_NUM_ASYNC_EQE,
 648        };
 649
 650        gather_async_events_mask(dev, param.mask);
 651        err = setup_async_eq(dev, &table->async_eq, &param, "async");
 652        if (err)
 653                goto err2;
 654
 655        param = (struct mlx5_eq_param) {
 656                .nent = /* TODO: sriov max_vf + */ 1,
 657                .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
 658        };
 659
 660        err = setup_async_eq(dev, &table->pages_eq, &param, "pages");
 661        if (err)
 662                goto err3;
 663
 664        return 0;
 665
 666err3:
 667        cleanup_async_eq(dev, &table->async_eq, "async");
 668err2:
 669        mlx5_cmd_use_polling(dev);
 670        cleanup_async_eq(dev, &table->cmd_eq, "cmd");
 671err1:
 672        mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
 673        mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
 674        return err;
 675}
 676
 677static void destroy_async_eqs(struct mlx5_core_dev *dev)
 678{
 679        struct mlx5_eq_table *table = dev->priv.eq_table;
 680
 681        cleanup_async_eq(dev, &table->pages_eq, "pages");
 682        cleanup_async_eq(dev, &table->async_eq, "async");
 683        mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ);
 684        mlx5_cmd_use_polling(dev);
 685        cleanup_async_eq(dev, &table->cmd_eq, "cmd");
 686        mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
 687        mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
 688}
 689
 690struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
 691{
 692        return &dev->priv.eq_table->async_eq.core;
 693}
 694
 695void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
 696{
 697        synchronize_irq(dev->priv.eq_table->async_eq.core.irqn);
 698}
 699
 700void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
 701{
 702        synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn);
 703}
 704
 705/* Generic EQ API for mlx5_core consumers
 706 * Needed For RDMA ODP EQ for now
 707 */
 708struct mlx5_eq *
 709mlx5_eq_create_generic(struct mlx5_core_dev *dev,
 710                       struct mlx5_eq_param *param)
 711{
 712        struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
 713        int err;
 714
 715        if (!cpumask_available(param->affinity))
 716                return ERR_PTR(-EINVAL);
 717
 718        if (!eq)
 719                return ERR_PTR(-ENOMEM);
 720
 721        err = create_async_eq(dev, eq, param);
 722        if (err) {
 723                kvfree(eq);
 724                eq = ERR_PTR(err);
 725        }
 726
 727        return eq;
 728}
 729EXPORT_SYMBOL(mlx5_eq_create_generic);
 730
 731int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 732{
 733        int err;
 734
 735        if (IS_ERR(eq))
 736                return -EINVAL;
 737
 738        err = destroy_async_eq(dev, eq);
 739        if (err)
 740                goto out;
 741
 742        kvfree(eq);
 743out:
 744        return err;
 745}
 746EXPORT_SYMBOL(mlx5_eq_destroy_generic);
 747
 748struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc)
 749{
 750        u32 ci = eq->cons_index + cc;
 751        u32 nent = eq_get_size(eq);
 752        struct mlx5_eqe *eqe;
 753
 754        eqe = get_eqe(eq, ci & (nent - 1));
 755        eqe = ((eqe->owner & 1) ^ !!(ci & nent)) ? NULL : eqe;
 756        /* Make sure we read EQ entry contents after we've
 757         * checked the ownership bit.
 758         */
 759        if (eqe)
 760                dma_rmb();
 761
 762        return eqe;
 763}
 764EXPORT_SYMBOL(mlx5_eq_get_eqe);
 765
 766void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
 767{
 768        __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
 769        u32 val;
 770
 771        eq->cons_index += cc;
 772        val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
 773
 774        __raw_writel((__force u32)cpu_to_be32(val), addr);
 775        /* We still want ordering, just not swabbing, so add a barrier */
 776        wmb();
 777}
 778EXPORT_SYMBOL(mlx5_eq_update_ci);
 779
 780static void destroy_comp_eqs(struct mlx5_core_dev *dev)
 781{
 782        struct mlx5_eq_table *table = dev->priv.eq_table;
 783        struct mlx5_eq_comp *eq, *n;
 784
 785        list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
 786                list_del(&eq->list);
 787                mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
 788                if (destroy_unmap_eq(dev, &eq->core))
 789                        mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
 790                                       eq->core.eqn);
 791                tasklet_disable(&eq->tasklet_ctx.task);
 792                kfree(eq);
 793        }
 794}
 795
 796static int create_comp_eqs(struct mlx5_core_dev *dev)
 797{
 798        struct mlx5_eq_table *table = dev->priv.eq_table;
 799        struct mlx5_eq_comp *eq;
 800        int ncomp_eqs;
 801        int nent;
 802        int err;
 803        int i;
 804
 805        INIT_LIST_HEAD(&table->comp_eqs_list);
 806        ncomp_eqs = table->num_comp_eqs;
 807        nent = MLX5_COMP_EQ_SIZE;
 808        for (i = 0; i < ncomp_eqs; i++) {
 809                int vecidx = i + MLX5_IRQ_VEC_COMP_BASE;
 810                struct mlx5_eq_param param = {};
 811
 812                eq = kzalloc(sizeof(*eq), GFP_KERNEL);
 813                if (!eq) {
 814                        err = -ENOMEM;
 815                        goto clean;
 816                }
 817
 818                INIT_LIST_HEAD(&eq->tasklet_ctx.list);
 819                INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
 820                spin_lock_init(&eq->tasklet_ctx.lock);
 821                tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb);
 822
 823                eq->irq_nb.notifier_call = mlx5_eq_comp_int;
 824                param = (struct mlx5_eq_param) {
 825                        .irq_index = vecidx,
 826                        .nent = nent,
 827                };
 828
 829                if (!zalloc_cpumask_var(&param.affinity, GFP_KERNEL)) {
 830                        err = -ENOMEM;
 831                        goto clean_eq;
 832                }
 833                cpumask_set_cpu(cpumask_local_spread(i, dev->priv.numa_node),
 834                                param.affinity);
 835                err = create_map_eq(dev, &eq->core, &param);
 836                free_cpumask_var(param.affinity);
 837                if (err)
 838                        goto clean_eq;
 839                err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
 840                if (err) {
 841                        destroy_unmap_eq(dev, &eq->core);
 842                        goto clean_eq;
 843                }
 844
 845                mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
 846                /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
 847                list_add_tail(&eq->list, &table->comp_eqs_list);
 848        }
 849
 850        return 0;
 851clean_eq:
 852        kfree(eq);
 853clean:
 854        destroy_comp_eqs(dev);
 855        return err;
 856}
 857
 858static int vector2eqnirqn(struct mlx5_core_dev *dev, int vector, int *eqn,
 859                          unsigned int *irqn)
 860{
 861        struct mlx5_eq_table *table = dev->priv.eq_table;
 862        struct mlx5_eq_comp *eq, *n;
 863        int err = -ENOENT;
 864        int i = 0;
 865
 866        list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
 867                if (i++ == vector) {
 868                        if (irqn)
 869                                *irqn = eq->core.irqn;
 870                        if (eqn)
 871                                *eqn = eq->core.eqn;
 872                        err = 0;
 873                        break;
 874                }
 875        }
 876
 877        return err;
 878}
 879
 880int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn)
 881{
 882        return vector2eqnirqn(dev, vector, eqn, NULL);
 883}
 884EXPORT_SYMBOL(mlx5_vector2eqn);
 885
 886int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn)
 887{
 888        return vector2eqnirqn(dev, vector, NULL, irqn);
 889}
 890
 891unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
 892{
 893        return dev->priv.eq_table->num_comp_eqs;
 894}
 895EXPORT_SYMBOL(mlx5_comp_vectors_count);
 896
 897struct cpumask *
 898mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
 899{
 900        struct mlx5_eq_table *table = dev->priv.eq_table;
 901        struct mlx5_eq_comp *eq, *n;
 902        int i = 0;
 903
 904        list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
 905                if (i++ == vector)
 906                        break;
 907        }
 908
 909        return mlx5_irq_get_affinity_mask(eq->core.irq);
 910}
 911EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
 912
 913#ifdef CONFIG_RFS_ACCEL
 914struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
 915{
 916        return dev->priv.eq_table->rmap;
 917}
 918#endif
 919
 920struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
 921{
 922        struct mlx5_eq_table *table = dev->priv.eq_table;
 923        struct mlx5_eq_comp *eq;
 924
 925        list_for_each_entry(eq, &table->comp_eqs_list, list) {
 926                if (eq->core.eqn == eqn)
 927                        return eq;
 928        }
 929
 930        return ERR_PTR(-ENOENT);
 931}
 932
 933static void clear_rmap(struct mlx5_core_dev *dev)
 934{
 935#ifdef CONFIG_RFS_ACCEL
 936        struct mlx5_eq_table *eq_table = dev->priv.eq_table;
 937
 938        free_irq_cpu_rmap(eq_table->rmap);
 939#endif
 940}
 941
 942static int set_rmap(struct mlx5_core_dev *mdev)
 943{
 944        int err = 0;
 945#ifdef CONFIG_RFS_ACCEL
 946        struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
 947        int vecidx;
 948
 949        eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs);
 950        if (!eq_table->rmap) {
 951                err = -ENOMEM;
 952                mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
 953                goto err_out;
 954        }
 955
 956        vecidx = MLX5_IRQ_VEC_COMP_BASE;
 957        for (; vecidx < eq_table->num_comp_eqs + MLX5_IRQ_VEC_COMP_BASE;
 958             vecidx++) {
 959                err = irq_cpu_rmap_add(eq_table->rmap,
 960                                       pci_irq_vector(mdev->pdev, vecidx));
 961                if (err) {
 962                        mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
 963                                      err);
 964                        goto err_irq_cpu_rmap_add;
 965                }
 966        }
 967        return 0;
 968
 969err_irq_cpu_rmap_add:
 970        clear_rmap(mdev);
 971err_out:
 972#endif
 973        return err;
 974}
 975
 976/* This function should only be called after mlx5_cmd_force_teardown_hca */
 977void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
 978{
 979        struct mlx5_eq_table *table = dev->priv.eq_table;
 980
 981        mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
 982        if (!mlx5_core_is_sf(dev))
 983                clear_rmap(dev);
 984        mlx5_irq_table_destroy(dev);
 985        mutex_unlock(&table->lock);
 986}
 987
 988#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 989#define MLX5_MAX_ASYNC_EQS 4
 990#else
 991#define MLX5_MAX_ASYNC_EQS 3
 992#endif
 993
 994int mlx5_eq_table_create(struct mlx5_core_dev *dev)
 995{
 996        struct mlx5_eq_table *eq_table = dev->priv.eq_table;
 997        int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
 998                      MLX5_CAP_GEN(dev, max_num_eqs) :
 999                      1 << MLX5_CAP_GEN(dev, log_max_eq);
1000        int max_eqs_sf;
1001        int err;
1002
1003        eq_table->num_comp_eqs =
1004                min_t(int,
1005                      mlx5_irq_table_get_num_comp(eq_table->irq_table),
1006                      num_eqs - MLX5_MAX_ASYNC_EQS);
1007        if (mlx5_core_is_sf(dev)) {
1008                max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF,
1009                                   mlx5_irq_table_get_sfs_vec(eq_table->irq_table));
1010                eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs,
1011                                               max_eqs_sf);
1012        }
1013
1014        err = create_async_eqs(dev);
1015        if (err) {
1016                mlx5_core_err(dev, "Failed to create async EQs\n");
1017                goto err_async_eqs;
1018        }
1019
1020        if (!mlx5_core_is_sf(dev)) {
1021                /* rmap is a mapping between irq number and queue number.
1022                 * each irq can be assign only to a single rmap.
1023                 * since SFs share IRQs, rmap mapping cannot function correctly
1024                 * for irqs that are shared for different core/netdev RX rings.
1025                 * Hence we don't allow netdev rmap for SFs
1026                 */
1027                err = set_rmap(dev);
1028                if (err)
1029                        goto err_rmap;
1030        }
1031
1032        err = create_comp_eqs(dev);
1033        if (err) {
1034                mlx5_core_err(dev, "Failed to create completion EQs\n");
1035                goto err_comp_eqs;
1036        }
1037
1038        return 0;
1039err_comp_eqs:
1040        if (!mlx5_core_is_sf(dev))
1041                clear_rmap(dev);
1042err_rmap:
1043        destroy_async_eqs(dev);
1044err_async_eqs:
1045        return err;
1046}
1047
1048void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
1049{
1050        if (!mlx5_core_is_sf(dev))
1051                clear_rmap(dev);
1052        destroy_comp_eqs(dev);
1053        destroy_async_eqs(dev);
1054}
1055
1056int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
1057{
1058        struct mlx5_eq_table *eqt = dev->priv.eq_table;
1059
1060        return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb);
1061}
1062EXPORT_SYMBOL(mlx5_eq_notifier_register);
1063
1064int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
1065{
1066        struct mlx5_eq_table *eqt = dev->priv.eq_table;
1067
1068        return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb);
1069}
1070EXPORT_SYMBOL(mlx5_eq_notifier_unregister);
1071