linux/drivers/net/ethernet/mellanox/mlx5/core/main.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <linux/highmem.h>
  34#include <linux/module.h>
  35#include <linux/init.h>
  36#include <linux/errno.h>
  37#include <linux/pci.h>
  38#include <linux/dma-mapping.h>
  39#include <linux/slab.h>
  40#include <linux/io-mapping.h>
  41#include <linux/interrupt.h>
  42#include <linux/delay.h>
  43#include <linux/mlx5/driver.h>
  44#include <linux/mlx5/cq.h>
  45#include <linux/mlx5/qp.h>
  46#include <linux/mlx5/srq.h>
  47#include <linux/debugfs.h>
  48#include <linux/kmod.h>
  49#include <linux/mlx5/mlx5_ifc.h>
  50#ifdef CONFIG_RFS_ACCEL
  51#include <linux/cpu_rmap.h>
  52#endif
  53#include <net/devlink.h>
  54#include "mlx5_core.h"
  55#include "fs_core.h"
  56#ifdef CONFIG_MLX5_CORE_EN
  57#include "eswitch.h"
  58#endif
  59#include "lib/mlx5.h"
  60#include "fpga/core.h"
  61#include "accel/ipsec.h"
  62
  63MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
  64MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
  65MODULE_LICENSE("Dual BSD/GPL");
  66MODULE_VERSION(DRIVER_VERSION);
  67
  68unsigned int mlx5_core_debug_mask;
  69module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644);
  70MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
  71
  72#define MLX5_DEFAULT_PROF       2
  73static unsigned int prof_sel = MLX5_DEFAULT_PROF;
  74module_param_named(prof_sel, prof_sel, uint, 0444);
  75MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
  76
  77enum {
  78        MLX5_ATOMIC_REQ_MODE_BE = 0x0,
  79        MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
  80};
  81
  82static struct mlx5_profile profile[] = {
  83        [0] = {
  84                .mask           = 0,
  85        },
  86        [1] = {
  87                .mask           = MLX5_PROF_MASK_QP_SIZE,
  88                .log_max_qp     = 12,
  89        },
  90        [2] = {
  91                .mask           = MLX5_PROF_MASK_QP_SIZE |
  92                                  MLX5_PROF_MASK_MR_CACHE,
  93                .log_max_qp     = 18,
  94                .mr_cache[0]    = {
  95                        .size   = 500,
  96                        .limit  = 250
  97                },
  98                .mr_cache[1]    = {
  99                        .size   = 500,
 100                        .limit  = 250
 101                },
 102                .mr_cache[2]    = {
 103                        .size   = 500,
 104                        .limit  = 250
 105                },
 106                .mr_cache[3]    = {
 107                        .size   = 500,
 108                        .limit  = 250
 109                },
 110                .mr_cache[4]    = {
 111                        .size   = 500,
 112                        .limit  = 250
 113                },
 114                .mr_cache[5]    = {
 115                        .size   = 500,
 116                        .limit  = 250
 117                },
 118                .mr_cache[6]    = {
 119                        .size   = 500,
 120                        .limit  = 250
 121                },
 122                .mr_cache[7]    = {
 123                        .size   = 500,
 124                        .limit  = 250
 125                },
 126                .mr_cache[8]    = {
 127                        .size   = 500,
 128                        .limit  = 250
 129                },
 130                .mr_cache[9]    = {
 131                        .size   = 500,
 132                        .limit  = 250
 133                },
 134                .mr_cache[10]   = {
 135                        .size   = 500,
 136                        .limit  = 250
 137                },
 138                .mr_cache[11]   = {
 139                        .size   = 500,
 140                        .limit  = 250
 141                },
 142                .mr_cache[12]   = {
 143                        .size   = 64,
 144                        .limit  = 32
 145                },
 146                .mr_cache[13]   = {
 147                        .size   = 32,
 148                        .limit  = 16
 149                },
 150                .mr_cache[14]   = {
 151                        .size   = 16,
 152                        .limit  = 8
 153                },
 154                .mr_cache[15]   = {
 155                        .size   = 8,
 156                        .limit  = 4
 157                },
 158                .mr_cache[16]   = {
 159                        .size   = 8,
 160                        .limit  = 4
 161                },
 162                .mr_cache[17]   = {
 163                        .size   = 8,
 164                        .limit  = 4
 165                },
 166                .mr_cache[18]   = {
 167                        .size   = 8,
 168                        .limit  = 4
 169                },
 170                .mr_cache[19]   = {
 171                        .size   = 4,
 172                        .limit  = 2
 173                },
 174                .mr_cache[20]   = {
 175                        .size   = 4,
 176                        .limit  = 2
 177                },
 178        },
 179};
 180
 181#define FW_INIT_TIMEOUT_MILI            2000
 182#define FW_INIT_WAIT_MS                 2
 183#define FW_PRE_INIT_TIMEOUT_MILI        10000
 184
 185static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
 186{
 187        unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
 188        int err = 0;
 189
 190        while (fw_initializing(dev)) {
 191                if (time_after(jiffies, end)) {
 192                        err = -EBUSY;
 193                        break;
 194                }
 195                msleep(FW_INIT_WAIT_MS);
 196        }
 197
 198        return err;
 199}
 200
 201static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
 202{
 203        int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in,
 204                                              driver_version);
 205        u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {0};
 206        u8 out[MLX5_ST_SZ_BYTES(set_driver_version_out)] = {0};
 207        int remaining_size = driver_ver_sz;
 208        char *string;
 209
 210        if (!MLX5_CAP_GEN(dev, driver_version))
 211                return;
 212
 213        string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version);
 214
 215        strncpy(string, "Linux", remaining_size);
 216
 217        remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
 218        strncat(string, ",", remaining_size);
 219
 220        remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
 221        strncat(string, DRIVER_NAME, remaining_size);
 222
 223        remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
 224        strncat(string, ",", remaining_size);
 225
 226        remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
 227        strncat(string, DRIVER_VERSION, remaining_size);
 228
 229        /*Send the command*/
 230        MLX5_SET(set_driver_version_in, in, opcode,
 231                 MLX5_CMD_OP_SET_DRIVER_VERSION);
 232
 233        mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 234}
 235
 236static int set_dma_caps(struct pci_dev *pdev)
 237{
 238        int err;
 239
 240        err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
 241        if (err) {
 242                dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
 243                err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 244                if (err) {
 245                        dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
 246                        return err;
 247                }
 248        }
 249
 250        err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
 251        if (err) {
 252                dev_warn(&pdev->dev,
 253                         "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
 254                err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
 255                if (err) {
 256                        dev_err(&pdev->dev,
 257                                "Can't set consistent PCI DMA mask, aborting\n");
 258                        return err;
 259                }
 260        }
 261
 262        dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
 263        return err;
 264}
 265
 266static int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
 267{
 268        struct pci_dev *pdev = dev->pdev;
 269        int err = 0;
 270
 271        mutex_lock(&dev->pci_status_mutex);
 272        if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) {
 273                err = pci_enable_device(pdev);
 274                if (!err)
 275                        dev->pci_status = MLX5_PCI_STATUS_ENABLED;
 276        }
 277        mutex_unlock(&dev->pci_status_mutex);
 278
 279        return err;
 280}
 281
 282static void mlx5_pci_disable_device(struct mlx5_core_dev *dev)
 283{
 284        struct pci_dev *pdev = dev->pdev;
 285
 286        mutex_lock(&dev->pci_status_mutex);
 287        if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) {
 288                pci_disable_device(pdev);
 289                dev->pci_status = MLX5_PCI_STATUS_DISABLED;
 290        }
 291        mutex_unlock(&dev->pci_status_mutex);
 292}
 293
 294static int request_bar(struct pci_dev *pdev)
 295{
 296        int err = 0;
 297
 298        if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
 299                dev_err(&pdev->dev, "Missing registers BAR, aborting\n");
 300                return -ENODEV;
 301        }
 302
 303        err = pci_request_regions(pdev, DRIVER_NAME);
 304        if (err)
 305                dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
 306
 307        return err;
 308}
 309
 310static void release_bar(struct pci_dev *pdev)
 311{
 312        pci_release_regions(pdev);
 313}
 314
 315static int mlx5_enable_msix(struct mlx5_core_dev *dev)
 316{
 317        struct mlx5_priv *priv = &dev->priv;
 318        struct mlx5_eq_table *table = &priv->eq_table;
 319        int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
 320        int nvec;
 321        int i;
 322
 323        nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
 324               MLX5_EQ_VEC_COMP_BASE;
 325        nvec = min_t(int, nvec, num_eqs);
 326        if (nvec <= MLX5_EQ_VEC_COMP_BASE)
 327                return -ENOMEM;
 328
 329        priv->msix_arr = kcalloc(nvec, sizeof(*priv->msix_arr), GFP_KERNEL);
 330
 331        priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL);
 332        if (!priv->msix_arr || !priv->irq_info)
 333                goto err_free_msix;
 334
 335        for (i = 0; i < nvec; i++)
 336                priv->msix_arr[i].entry = i;
 337
 338        nvec = pci_enable_msix_range(dev->pdev, priv->msix_arr,
 339                                     MLX5_EQ_VEC_COMP_BASE + 1, nvec);
 340        if (nvec < 0)
 341                return nvec;
 342
 343        table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
 344
 345        return 0;
 346
 347err_free_msix:
 348        kfree(priv->irq_info);
 349        kfree(priv->msix_arr);
 350        return -ENOMEM;
 351}
 352
 353static void mlx5_disable_msix(struct mlx5_core_dev *dev)
 354{
 355        struct mlx5_priv *priv = &dev->priv;
 356
 357        pci_disable_msix(dev->pdev);
 358        kfree(priv->irq_info);
 359        kfree(priv->msix_arr);
 360}
 361
 362struct mlx5_reg_host_endianness {
 363        u8      he;
 364        u8      rsvd[15];
 365};
 366
 367#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
 368
 369enum {
 370        MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
 371                                MLX5_DEV_CAP_FLAG_DCT,
 372};
 373
 374static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size)
 375{
 376        switch (size) {
 377        case 128:
 378                return 0;
 379        case 256:
 380                return 1;
 381        case 512:
 382                return 2;
 383        case 1024:
 384                return 3;
 385        case 2048:
 386                return 4;
 387        case 4096:
 388                return 5;
 389        default:
 390                mlx5_core_warn(dev, "invalid pkey table size %d\n", size);
 391                return 0;
 392        }
 393}
 394
 395static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
 396                                   enum mlx5_cap_type cap_type,
 397                                   enum mlx5_cap_mode cap_mode)
 398{
 399        u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
 400        int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
 401        void *out, *hca_caps;
 402        u16 opmod = (cap_type << 1) | (cap_mode & 0x01);
 403        int err;
 404
 405        memset(in, 0, sizeof(in));
 406        out = kzalloc(out_sz, GFP_KERNEL);
 407        if (!out)
 408                return -ENOMEM;
 409
 410        MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
 411        MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
 412        err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
 413        if (err) {
 414                mlx5_core_warn(dev,
 415                               "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
 416                               cap_type, cap_mode, err);
 417                goto query_ex;
 418        }
 419
 420        hca_caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
 421
 422        switch (cap_mode) {
 423        case HCA_CAP_OPMOD_GET_MAX:
 424                memcpy(dev->caps.hca_max[cap_type], hca_caps,
 425                       MLX5_UN_SZ_BYTES(hca_cap_union));
 426                break;
 427        case HCA_CAP_OPMOD_GET_CUR:
 428                memcpy(dev->caps.hca_cur[cap_type], hca_caps,
 429                       MLX5_UN_SZ_BYTES(hca_cap_union));
 430                break;
 431        default:
 432                mlx5_core_warn(dev,
 433                               "Tried to query dev cap type(%x) with wrong opmode(%x)\n",
 434                               cap_type, cap_mode);
 435                err = -EINVAL;
 436                break;
 437        }
 438query_ex:
 439        kfree(out);
 440        return err;
 441}
 442
 443int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
 444{
 445        int ret;
 446
 447        ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
 448        if (ret)
 449                return ret;
 450        return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
 451}
 452
 453static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
 454{
 455        u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0};
 456
 457        MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
 458        MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
 459        return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
 460}
 461
 462static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
 463{
 464        void *set_ctx;
 465        void *set_hca_cap;
 466        int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
 467        int req_endianness;
 468        int err;
 469
 470        if (MLX5_CAP_GEN(dev, atomic)) {
 471                err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
 472                if (err)
 473                        return err;
 474        } else {
 475                return 0;
 476        }
 477
 478        req_endianness =
 479                MLX5_CAP_ATOMIC(dev,
 480                                supported_atomic_req_8B_endianness_mode_1);
 481
 482        if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
 483                return 0;
 484
 485        set_ctx = kzalloc(set_sz, GFP_KERNEL);
 486        if (!set_ctx)
 487                return -ENOMEM;
 488
 489        set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
 490
 491        /* Set requestor to host endianness */
 492        MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode,
 493                 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
 494
 495        err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
 496
 497        kfree(set_ctx);
 498        return err;
 499}
 500
 501static int handle_hca_cap(struct mlx5_core_dev *dev)
 502{
 503        void *set_ctx = NULL;
 504        struct mlx5_profile *prof = dev->profile;
 505        int err = -ENOMEM;
 506        int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
 507        void *set_hca_cap;
 508
 509        set_ctx = kzalloc(set_sz, GFP_KERNEL);
 510        if (!set_ctx)
 511                goto query_ex;
 512
 513        err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
 514        if (err)
 515                goto query_ex;
 516
 517        set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
 518                                   capability);
 519        memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_GENERAL],
 520               MLX5_ST_SZ_BYTES(cmd_hca_cap));
 521
 522        mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
 523                      mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)),
 524                      128);
 525        /* we limit the size of the pkey table to 128 entries for now */
 526        MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
 527                 to_fw_pkey_sz(dev, 128));
 528
 529        /* Check log_max_qp from HCA caps to set in current profile */
 530        if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) {
 531                mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n",
 532                               profile[prof_sel].log_max_qp,
 533                               MLX5_CAP_GEN_MAX(dev, log_max_qp));
 534                profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
 535        }
 536        if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
 537                MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
 538                         prof->log_max_qp);
 539
 540        /* disable cmdif checksum */
 541        MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
 542
 543        /* Enable 4K UAR only when HCA supports it and page size is bigger
 544         * than 4K.
 545         */
 546        if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096)
 547                MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1);
 548
 549        MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
 550
 551        if (MLX5_CAP_GEN_MAX(dev, cache_line_128byte))
 552                MLX5_SET(cmd_hca_cap,
 553                         set_hca_cap,
 554                         cache_line_128byte,
 555                         cache_line_size() == 128 ? 1 : 0);
 556
 557        err = set_caps(dev, set_ctx, set_sz,
 558                       MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
 559
 560query_ex:
 561        kfree(set_ctx);
 562        return err;
 563}
 564
 565static int set_hca_ctrl(struct mlx5_core_dev *dev)
 566{
 567        struct mlx5_reg_host_endianness he_in;
 568        struct mlx5_reg_host_endianness he_out;
 569        int err;
 570
 571        if (!mlx5_core_is_pf(dev))
 572                return 0;
 573
 574        memset(&he_in, 0, sizeof(he_in));
 575        he_in.he = MLX5_SET_HOST_ENDIANNESS;
 576        err = mlx5_core_access_reg(dev, &he_in,  sizeof(he_in),
 577                                        &he_out, sizeof(he_out),
 578                                        MLX5_REG_HOST_ENDIANNESS, 0, 1);
 579        return err;
 580}
 581
 582int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
 583{
 584        u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0};
 585        u32 in[MLX5_ST_SZ_DW(enable_hca_in)]   = {0};
 586
 587        MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
 588        MLX5_SET(enable_hca_in, in, function_id, func_id);
 589        return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
 590}
 591
 592int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
 593{
 594        u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {0};
 595        u32 in[MLX5_ST_SZ_DW(disable_hca_in)]   = {0};
 596
 597        MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
 598        MLX5_SET(disable_hca_in, in, function_id, func_id);
 599        return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 600}
 601
 602u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
 603{
 604        u32 timer_h, timer_h1, timer_l;
 605
 606        timer_h = ioread32be(&dev->iseg->internal_timer_h);
 607        timer_l = ioread32be(&dev->iseg->internal_timer_l);
 608        timer_h1 = ioread32be(&dev->iseg->internal_timer_h);
 609        if (timer_h != timer_h1) /* wrap around */
 610                timer_l = ioread32be(&dev->iseg->internal_timer_l);
 611
 612        return (u64)timer_l | (u64)timer_h1 << 32;
 613}
 614
 615static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
 616{
 617        struct mlx5_priv *priv  = &mdev->priv;
 618        struct msix_entry *msix = priv->msix_arr;
 619        int irq                 = msix[i + MLX5_EQ_VEC_COMP_BASE].vector;
 620
 621        if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
 622                mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
 623                return -ENOMEM;
 624        }
 625
 626        cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
 627                        priv->irq_info[i].mask);
 628
 629        if (IS_ENABLED(CONFIG_SMP) &&
 630            irq_set_affinity_hint(irq, priv->irq_info[i].mask))
 631                mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
 632
 633        return 0;
 634}
 635
 636static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
 637{
 638        struct mlx5_priv *priv  = &mdev->priv;
 639        struct msix_entry *msix = priv->msix_arr;
 640        int irq                 = msix[i + MLX5_EQ_VEC_COMP_BASE].vector;
 641
 642        irq_set_affinity_hint(irq, NULL);
 643        free_cpumask_var(priv->irq_info[i].mask);
 644}
 645
 646static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
 647{
 648        int err;
 649        int i;
 650
 651        for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
 652                err = mlx5_irq_set_affinity_hint(mdev, i);
 653                if (err)
 654                        goto err_out;
 655        }
 656
 657        return 0;
 658
 659err_out:
 660        for (i--; i >= 0; i--)
 661                mlx5_irq_clear_affinity_hint(mdev, i);
 662
 663        return err;
 664}
 665
 666static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
 667{
 668        int i;
 669
 670        for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
 671                mlx5_irq_clear_affinity_hint(mdev, i);
 672}
 673
 674int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
 675                    unsigned int *irqn)
 676{
 677        struct mlx5_eq_table *table = &dev->priv.eq_table;
 678        struct mlx5_eq *eq, *n;
 679        int err = -ENOENT;
 680
 681        spin_lock(&table->lock);
 682        list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
 683                if (eq->index == vector) {
 684                        *eqn = eq->eqn;
 685                        *irqn = eq->irqn;
 686                        err = 0;
 687                        break;
 688                }
 689        }
 690        spin_unlock(&table->lock);
 691
 692        return err;
 693}
 694EXPORT_SYMBOL(mlx5_vector2eqn);
 695
 696struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn)
 697{
 698        struct mlx5_eq_table *table = &dev->priv.eq_table;
 699        struct mlx5_eq *eq;
 700
 701        spin_lock(&table->lock);
 702        list_for_each_entry(eq, &table->comp_eqs_list, list)
 703                if (eq->eqn == eqn) {
 704                        spin_unlock(&table->lock);
 705                        return eq;
 706                }
 707
 708        spin_unlock(&table->lock);
 709
 710        return ERR_PTR(-ENOENT);
 711}
 712
 713static void free_comp_eqs(struct mlx5_core_dev *dev)
 714{
 715        struct mlx5_eq_table *table = &dev->priv.eq_table;
 716        struct mlx5_eq *eq, *n;
 717
 718#ifdef CONFIG_RFS_ACCEL
 719        if (dev->rmap) {
 720                free_irq_cpu_rmap(dev->rmap);
 721                dev->rmap = NULL;
 722        }
 723#endif
 724        spin_lock(&table->lock);
 725        list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
 726                list_del(&eq->list);
 727                spin_unlock(&table->lock);
 728                if (mlx5_destroy_unmap_eq(dev, eq))
 729                        mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
 730                                       eq->eqn);
 731                kfree(eq);
 732                spin_lock(&table->lock);
 733        }
 734        spin_unlock(&table->lock);
 735}
 736
 737static int alloc_comp_eqs(struct mlx5_core_dev *dev)
 738{
 739        struct mlx5_eq_table *table = &dev->priv.eq_table;
 740        char name[MLX5_MAX_IRQ_NAME];
 741        struct mlx5_eq *eq;
 742        int ncomp_vec;
 743        int nent;
 744        int err;
 745        int i;
 746
 747        INIT_LIST_HEAD(&table->comp_eqs_list);
 748        ncomp_vec = table->num_comp_vectors;
 749        nent = MLX5_COMP_EQ_SIZE;
 750#ifdef CONFIG_RFS_ACCEL
 751        dev->rmap = alloc_irq_cpu_rmap(ncomp_vec);
 752        if (!dev->rmap)
 753                return -ENOMEM;
 754#endif
 755        for (i = 0; i < ncomp_vec; i++) {
 756                eq = kzalloc(sizeof(*eq), GFP_KERNEL);
 757                if (!eq) {
 758                        err = -ENOMEM;
 759                        goto clean;
 760                }
 761
 762#ifdef CONFIG_RFS_ACCEL
 763                irq_cpu_rmap_add(dev->rmap,
 764                                 dev->priv.msix_arr[i + MLX5_EQ_VEC_COMP_BASE].vector);
 765#endif
 766                snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
 767                err = mlx5_create_map_eq(dev, eq,
 768                                         i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
 769                                         name, MLX5_EQ_TYPE_COMP);
 770                if (err) {
 771                        kfree(eq);
 772                        goto clean;
 773                }
 774                mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
 775                eq->index = i;
 776                spin_lock(&table->lock);
 777                list_add_tail(&eq->list, &table->comp_eqs_list);
 778                spin_unlock(&table->lock);
 779        }
 780
 781        return 0;
 782
 783clean:
 784        free_comp_eqs(dev);
 785        return err;
 786}
 787
 788static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 789{
 790        u32 query_in[MLX5_ST_SZ_DW(query_issi_in)]   = {0};
 791        u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {0};
 792        u32 sup_issi;
 793        int err;
 794
 795        MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
 796        err = mlx5_cmd_exec(dev, query_in, sizeof(query_in),
 797                            query_out, sizeof(query_out));
 798        if (err) {
 799                u32 syndrome;
 800                u8 status;
 801
 802                mlx5_cmd_mbox_status(query_out, &status, &syndrome);
 803                if (!status || syndrome == MLX5_DRIVER_SYND) {
 804                        mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n",
 805                                      err, status, syndrome);
 806                        return err;
 807                }
 808
 809                mlx5_core_warn(dev, "Query ISSI is not supported by FW, ISSI is 0\n");
 810                dev->issi = 0;
 811                return 0;
 812        }
 813
 814        sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
 815
 816        if (sup_issi & (1 << 1)) {
 817                u32 set_in[MLX5_ST_SZ_DW(set_issi_in)]   = {0};
 818                u32 set_out[MLX5_ST_SZ_DW(set_issi_out)] = {0};
 819
 820                MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
 821                MLX5_SET(set_issi_in, set_in, current_issi, 1);
 822                err = mlx5_cmd_exec(dev, set_in, sizeof(set_in),
 823                                    set_out, sizeof(set_out));
 824                if (err) {
 825                        mlx5_core_err(dev, "Failed to set ISSI to 1 err(%d)\n",
 826                                      err);
 827                        return err;
 828                }
 829
 830                dev->issi = 1;
 831
 832                return 0;
 833        } else if (sup_issi & (1 << 0) || !sup_issi) {
 834                return 0;
 835        }
 836
 837        return -EOPNOTSUPP;
 838}
 839
 840
 841static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 842{
 843        struct pci_dev *pdev = dev->pdev;
 844        int err = 0;
 845
 846        pci_set_drvdata(dev->pdev, dev);
 847        strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN);
 848        priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
 849
 850        mutex_init(&priv->pgdir_mutex);
 851        INIT_LIST_HEAD(&priv->pgdir_list);
 852        spin_lock_init(&priv->mkey_lock);
 853
 854        mutex_init(&priv->alloc_mutex);
 855
 856        priv->numa_node = dev_to_node(&dev->pdev->dev);
 857
 858        priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root);
 859        if (!priv->dbg_root)
 860                return -ENOMEM;
 861
 862        err = mlx5_pci_enable_device(dev);
 863        if (err) {
 864                dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
 865                goto err_dbg;
 866        }
 867
 868        err = request_bar(pdev);
 869        if (err) {
 870                dev_err(&pdev->dev, "error requesting BARs, aborting\n");
 871                goto err_disable;
 872        }
 873
 874        pci_set_master(pdev);
 875
 876        err = set_dma_caps(pdev);
 877        if (err) {
 878                dev_err(&pdev->dev, "Failed setting DMA capabilities mask, aborting\n");
 879                goto err_clr_master;
 880        }
 881
 882        dev->iseg_base = pci_resource_start(dev->pdev, 0);
 883        dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
 884        if (!dev->iseg) {
 885                err = -ENOMEM;
 886                dev_err(&pdev->dev, "Failed mapping initialization segment, aborting\n");
 887                goto err_clr_master;
 888        }
 889
 890        return 0;
 891
 892err_clr_master:
 893        pci_clear_master(dev->pdev);
 894        release_bar(dev->pdev);
 895err_disable:
 896        mlx5_pci_disable_device(dev);
 897
 898err_dbg:
 899        debugfs_remove(priv->dbg_root);
 900        return err;
 901}
 902
 903static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 904{
 905        iounmap(dev->iseg);
 906        pci_clear_master(dev->pdev);
 907        release_bar(dev->pdev);
 908        mlx5_pci_disable_device(dev);
 909        debugfs_remove(priv->dbg_root);
 910}
 911
 912static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 913{
 914        struct pci_dev *pdev = dev->pdev;
 915        int err;
 916
 917        err = mlx5_query_board_id(dev);
 918        if (err) {
 919                dev_err(&pdev->dev, "query board id failed\n");
 920                goto out;
 921        }
 922
 923        err = mlx5_eq_init(dev);
 924        if (err) {
 925                dev_err(&pdev->dev, "failed to initialize eq\n");
 926                goto out;
 927        }
 928
 929        err = mlx5_init_cq_table(dev);
 930        if (err) {
 931                dev_err(&pdev->dev, "failed to initialize cq table\n");
 932                goto err_eq_cleanup;
 933        }
 934
 935        mlx5_init_qp_table(dev);
 936
 937        mlx5_init_srq_table(dev);
 938
 939        mlx5_init_mkey_table(dev);
 940
 941        mlx5_init_reserved_gids(dev);
 942
 943        err = mlx5_init_rl_table(dev);
 944        if (err) {
 945                dev_err(&pdev->dev, "Failed to init rate limiting\n");
 946                goto err_tables_cleanup;
 947        }
 948
 949#ifdef CONFIG_MLX5_CORE_EN
 950        err = mlx5_eswitch_init(dev);
 951        if (err) {
 952                dev_err(&pdev->dev, "Failed to init eswitch %d\n", err);
 953                goto err_rl_cleanup;
 954        }
 955#endif
 956
 957        err = mlx5_sriov_init(dev);
 958        if (err) {
 959                dev_err(&pdev->dev, "Failed to init sriov %d\n", err);
 960                goto err_eswitch_cleanup;
 961        }
 962
 963        err = mlx5_fpga_init(dev);
 964        if (err) {
 965                dev_err(&pdev->dev, "Failed to init fpga device %d\n", err);
 966                goto err_sriov_cleanup;
 967        }
 968
 969        return 0;
 970
 971err_sriov_cleanup:
 972        mlx5_sriov_cleanup(dev);
 973err_eswitch_cleanup:
 974#ifdef CONFIG_MLX5_CORE_EN
 975        mlx5_eswitch_cleanup(dev->priv.eswitch);
 976
 977err_rl_cleanup:
 978#endif
 979        mlx5_cleanup_rl_table(dev);
 980
 981err_tables_cleanup:
 982        mlx5_cleanup_mkey_table(dev);
 983        mlx5_cleanup_srq_table(dev);
 984        mlx5_cleanup_qp_table(dev);
 985        mlx5_cleanup_cq_table(dev);
 986
 987err_eq_cleanup:
 988        mlx5_eq_cleanup(dev);
 989
 990out:
 991        return err;
 992}
 993
 994static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 995{
 996        mlx5_fpga_cleanup(dev);
 997        mlx5_sriov_cleanup(dev);
 998#ifdef CONFIG_MLX5_CORE_EN
 999        mlx5_eswitch_cleanup(dev->priv.eswitch);
1000#endif
1001        mlx5_cleanup_rl_table(dev);
1002        mlx5_cleanup_reserved_gids(dev);
1003        mlx5_cleanup_mkey_table(dev);
1004        mlx5_cleanup_srq_table(dev);
1005        mlx5_cleanup_qp_table(dev);
1006        mlx5_cleanup_cq_table(dev);
1007        mlx5_eq_cleanup(dev);
1008}
1009
1010static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1011                         bool boot)
1012{
1013        struct pci_dev *pdev = dev->pdev;
1014        int err;
1015
1016        mutex_lock(&dev->intf_state_mutex);
1017        if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
1018                dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n",
1019                         __func__);
1020                goto out;
1021        }
1022
1023        dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
1024                 fw_rev_min(dev), fw_rev_sub(dev));
1025
1026        /* on load removing any previous indication of internal error, device is
1027         * up
1028         */
1029        dev->state = MLX5_DEVICE_STATE_UP;
1030
1031        /* wait for firmware to accept initialization segments configurations
1032         */
1033        err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI);
1034        if (err) {
1035                dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n",
1036                        FW_PRE_INIT_TIMEOUT_MILI);
1037                goto out_err;
1038        }
1039
1040        err = mlx5_cmd_init(dev);
1041        if (err) {
1042                dev_err(&pdev->dev, "Failed initializing command interface, aborting\n");
1043                goto out_err;
1044        }
1045
1046        err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
1047        if (err) {
1048                dev_err(&dev->pdev->dev, "Firmware over %d MS in initializing state, aborting\n",
1049                        FW_INIT_TIMEOUT_MILI);
1050                goto err_cmd_cleanup;
1051        }
1052
1053        err = mlx5_core_enable_hca(dev, 0);
1054        if (err) {
1055                dev_err(&pdev->dev, "enable hca failed\n");
1056                goto err_cmd_cleanup;
1057        }
1058
1059        err = mlx5_core_set_issi(dev);
1060        if (err) {
1061                dev_err(&pdev->dev, "failed to set issi\n");
1062                goto err_disable_hca;
1063        }
1064
1065        err = mlx5_satisfy_startup_pages(dev, 1);
1066        if (err) {
1067                dev_err(&pdev->dev, "failed to allocate boot pages\n");
1068                goto err_disable_hca;
1069        }
1070
1071        err = set_hca_ctrl(dev);
1072        if (err) {
1073                dev_err(&pdev->dev, "set_hca_ctrl failed\n");
1074                goto reclaim_boot_pages;
1075        }
1076
1077        err = handle_hca_cap(dev);
1078        if (err) {
1079                dev_err(&pdev->dev, "handle_hca_cap failed\n");
1080                goto reclaim_boot_pages;
1081        }
1082
1083        err = handle_hca_cap_atomic(dev);
1084        if (err) {
1085                dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
1086                goto reclaim_boot_pages;
1087        }
1088
1089        err = mlx5_satisfy_startup_pages(dev, 0);
1090        if (err) {
1091                dev_err(&pdev->dev, "failed to allocate init pages\n");
1092                goto reclaim_boot_pages;
1093        }
1094
1095        err = mlx5_pagealloc_start(dev);
1096        if (err) {
1097                dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n");
1098                goto reclaim_boot_pages;
1099        }
1100
1101        err = mlx5_cmd_init_hca(dev);
1102        if (err) {
1103                dev_err(&pdev->dev, "init hca failed\n");
1104                goto err_pagealloc_stop;
1105        }
1106
1107        mlx5_set_driver_version(dev);
1108
1109        mlx5_start_health_poll(dev);
1110
1111        err = mlx5_query_hca_caps(dev);
1112        if (err) {
1113                dev_err(&pdev->dev, "query hca failed\n");
1114                goto err_stop_poll;
1115        }
1116
1117        if (boot && mlx5_init_once(dev, priv)) {
1118                dev_err(&pdev->dev, "sw objs init failed\n");
1119                goto err_stop_poll;
1120        }
1121
1122        err = mlx5_enable_msix(dev);
1123        if (err) {
1124                dev_err(&pdev->dev, "enable msix failed\n");
1125                goto err_cleanup_once;
1126        }
1127
1128        dev->priv.uar = mlx5_get_uars_page(dev);
1129        if (!dev->priv.uar) {
1130                dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
1131                goto err_disable_msix;
1132        }
1133
1134        err = mlx5_start_eqs(dev);
1135        if (err) {
1136                dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
1137                goto err_put_uars;
1138        }
1139
1140        err = alloc_comp_eqs(dev);
1141        if (err) {
1142                dev_err(&pdev->dev, "Failed to alloc completion EQs\n");
1143                goto err_stop_eqs;
1144        }
1145
1146        err = mlx5_irq_set_affinity_hints(dev);
1147        if (err) {
1148                dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
1149                goto err_affinity_hints;
1150        }
1151
1152        err = mlx5_init_fs(dev);
1153        if (err) {
1154                dev_err(&pdev->dev, "Failed to init flow steering\n");
1155                goto err_fs;
1156        }
1157
1158#ifdef CONFIG_MLX5_CORE_EN
1159        mlx5_eswitch_attach(dev->priv.eswitch);
1160#endif
1161
1162        err = mlx5_sriov_attach(dev);
1163        if (err) {
1164                dev_err(&pdev->dev, "sriov init failed %d\n", err);
1165                goto err_sriov;
1166        }
1167
1168        err = mlx5_fpga_device_start(dev);
1169        if (err) {
1170                dev_err(&pdev->dev, "fpga device start failed %d\n", err);
1171                goto err_fpga_start;
1172        }
1173        err = mlx5_accel_ipsec_init(dev);
1174        if (err) {
1175                dev_err(&pdev->dev, "IPSec device start failed %d\n", err);
1176                goto err_ipsec_start;
1177        }
1178
1179        if (mlx5_device_registered(dev)) {
1180                mlx5_attach_device(dev);
1181        } else {
1182                err = mlx5_register_device(dev);
1183                if (err) {
1184                        dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
1185                        goto err_reg_dev;
1186                }
1187        }
1188
1189        set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1190out:
1191        mutex_unlock(&dev->intf_state_mutex);
1192
1193        return 0;
1194
1195err_reg_dev:
1196        mlx5_accel_ipsec_cleanup(dev);
1197err_ipsec_start:
1198        mlx5_fpga_device_stop(dev);
1199
1200err_fpga_start:
1201        mlx5_sriov_detach(dev);
1202
1203err_sriov:
1204#ifdef CONFIG_MLX5_CORE_EN
1205        mlx5_eswitch_detach(dev->priv.eswitch);
1206#endif
1207        mlx5_cleanup_fs(dev);
1208
1209err_fs:
1210        mlx5_irq_clear_affinity_hints(dev);
1211
1212err_affinity_hints:
1213        free_comp_eqs(dev);
1214
1215err_stop_eqs:
1216        mlx5_stop_eqs(dev);
1217
1218err_put_uars:
1219        mlx5_put_uars_page(dev, priv->uar);
1220
1221err_disable_msix:
1222        mlx5_disable_msix(dev);
1223
1224err_cleanup_once:
1225        if (boot)
1226                mlx5_cleanup_once(dev);
1227
1228err_stop_poll:
1229        mlx5_stop_health_poll(dev);
1230        if (mlx5_cmd_teardown_hca(dev)) {
1231                dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
1232                goto out_err;
1233        }
1234
1235err_pagealloc_stop:
1236        mlx5_pagealloc_stop(dev);
1237
1238reclaim_boot_pages:
1239        mlx5_reclaim_startup_pages(dev);
1240
1241err_disable_hca:
1242        mlx5_core_disable_hca(dev, 0);
1243
1244err_cmd_cleanup:
1245        mlx5_cmd_cleanup(dev);
1246
1247out_err:
1248        dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
1249        mutex_unlock(&dev->intf_state_mutex);
1250
1251        return err;
1252}
1253
1254static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1255                           bool cleanup)
1256{
1257        int err = 0;
1258
1259        if (cleanup)
1260                mlx5_drain_health_recovery(dev);
1261
1262        mutex_lock(&dev->intf_state_mutex);
1263        if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
1264                dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
1265                         __func__);
1266                if (cleanup)
1267                        mlx5_cleanup_once(dev);
1268                goto out;
1269        }
1270
1271        clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1272
1273        if (mlx5_device_registered(dev))
1274                mlx5_detach_device(dev);
1275
1276        mlx5_accel_ipsec_cleanup(dev);
1277        mlx5_fpga_device_stop(dev);
1278
1279        mlx5_sriov_detach(dev);
1280#ifdef CONFIG_MLX5_CORE_EN
1281        mlx5_eswitch_detach(dev->priv.eswitch);
1282#endif
1283        mlx5_cleanup_fs(dev);
1284        mlx5_irq_clear_affinity_hints(dev);
1285        free_comp_eqs(dev);
1286        mlx5_stop_eqs(dev);
1287        mlx5_put_uars_page(dev, priv->uar);
1288        mlx5_disable_msix(dev);
1289        if (cleanup)
1290                mlx5_cleanup_once(dev);
1291        mlx5_stop_health_poll(dev);
1292        err = mlx5_cmd_teardown_hca(dev);
1293        if (err) {
1294                dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
1295                goto out;
1296        }
1297        mlx5_pagealloc_stop(dev);
1298        mlx5_reclaim_startup_pages(dev);
1299        mlx5_core_disable_hca(dev, 0);
1300        mlx5_cmd_cleanup(dev);
1301
1302out:
1303        mutex_unlock(&dev->intf_state_mutex);
1304        return err;
1305}
1306
1307struct mlx5_core_event_handler {
1308        void (*event)(struct mlx5_core_dev *dev,
1309                      enum mlx5_dev_event event,
1310                      void *data);
1311};
1312
1313static const struct devlink_ops mlx5_devlink_ops = {
1314#ifdef CONFIG_MLX5_CORE_EN
1315        .eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
1316        .eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
1317        .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set,
1318        .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get,
1319        .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set,
1320        .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
1321#endif
1322};
1323
1324#define MLX5_IB_MOD "mlx5_ib"
1325static int init_one(struct pci_dev *pdev,
1326                    const struct pci_device_id *id)
1327{
1328        struct mlx5_core_dev *dev;
1329        struct devlink *devlink;
1330        struct mlx5_priv *priv;
1331        int err;
1332
1333        devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev));
1334        if (!devlink) {
1335                dev_err(&pdev->dev, "kzalloc failed\n");
1336                return -ENOMEM;
1337        }
1338
1339        dev = devlink_priv(devlink);
1340        priv = &dev->priv;
1341        priv->pci_dev_data = id->driver_data;
1342
1343        pci_set_drvdata(pdev, dev);
1344
1345        dev->pdev = pdev;
1346        dev->event = mlx5_core_event;
1347        dev->profile = &profile[prof_sel];
1348
1349        INIT_LIST_HEAD(&priv->ctx_list);
1350        spin_lock_init(&priv->ctx_lock);
1351        mutex_init(&dev->pci_status_mutex);
1352        mutex_init(&dev->intf_state_mutex);
1353
1354#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1355        err = init_srcu_struct(&priv->pfault_srcu);
1356        if (err) {
1357                dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n",
1358                        err);
1359                goto clean_dev;
1360        }
1361#endif
1362        mutex_init(&priv->bfregs.reg_head.lock);
1363        mutex_init(&priv->bfregs.wc_head.lock);
1364        INIT_LIST_HEAD(&priv->bfregs.reg_head.list);
1365        INIT_LIST_HEAD(&priv->bfregs.wc_head.list);
1366
1367        err = mlx5_pci_init(dev, priv);
1368        if (err) {
1369                dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
1370                goto clean_srcu;
1371        }
1372
1373        err = mlx5_health_init(dev);
1374        if (err) {
1375                dev_err(&pdev->dev, "mlx5_health_init failed with error code %d\n", err);
1376                goto close_pci;
1377        }
1378
1379        mlx5_pagealloc_init(dev);
1380
1381        err = mlx5_load_one(dev, priv, true);
1382        if (err) {
1383                dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
1384                goto clean_health;
1385        }
1386
1387        request_module_nowait(MLX5_IB_MOD);
1388
1389        err = devlink_register(devlink, &pdev->dev);
1390        if (err)
1391                goto clean_load;
1392
1393        pci_save_state(pdev);
1394        return 0;
1395
1396clean_load:
1397        mlx5_unload_one(dev, priv, true);
1398clean_health:
1399        mlx5_pagealloc_cleanup(dev);
1400        mlx5_health_cleanup(dev);
1401close_pci:
1402        mlx5_pci_close(dev, priv);
1403clean_srcu:
1404#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1405        cleanup_srcu_struct(&priv->pfault_srcu);
1406clean_dev:
1407#endif
1408        pci_set_drvdata(pdev, NULL);
1409        devlink_free(devlink);
1410
1411        return err;
1412}
1413
1414static void remove_one(struct pci_dev *pdev)
1415{
1416        struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
1417        struct devlink *devlink = priv_to_devlink(dev);
1418        struct mlx5_priv *priv = &dev->priv;
1419
1420        devlink_unregister(devlink);
1421        mlx5_unregister_device(dev);
1422
1423        if (mlx5_unload_one(dev, priv, true)) {
1424                dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n");
1425                mlx5_health_cleanup(dev);
1426                return;
1427        }
1428
1429        mlx5_pagealloc_cleanup(dev);
1430        mlx5_health_cleanup(dev);
1431        mlx5_pci_close(dev, priv);
1432#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1433        cleanup_srcu_struct(&priv->pfault_srcu);
1434#endif
1435        pci_set_drvdata(pdev, NULL);
1436        devlink_free(devlink);
1437}
1438
1439static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
1440                                              pci_channel_state_t state)
1441{
1442        struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1443        struct mlx5_priv *priv = &dev->priv;
1444
1445        dev_info(&pdev->dev, "%s was called\n", __func__);
1446
1447        mlx5_enter_error_state(dev, false);
1448        mlx5_unload_one(dev, priv, false);
1449        /* In case of kernel call drain the health wq */
1450        if (state) {
1451                mlx5_drain_health_wq(dev);
1452                mlx5_pci_disable_device(dev);
1453        }
1454
1455        return state == pci_channel_io_perm_failure ?
1456                PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
1457}
1458
1459/* wait for the device to show vital signs by waiting
1460 * for the health counter to start counting.
1461 */
1462static int wait_vital(struct pci_dev *pdev)
1463{
1464        struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1465        struct mlx5_core_health *health = &dev->priv.health;
1466        const int niter = 100;
1467        u32 last_count = 0;
1468        u32 count;
1469        int i;
1470
1471        for (i = 0; i < niter; i++) {
1472                count = ioread32be(health->health_counter);
1473                if (count && count != 0xffffffff) {
1474                        if (last_count && last_count != count) {
1475                                dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
1476                                return 0;
1477                        }
1478                        last_count = count;
1479                }
1480                msleep(50);
1481        }
1482
1483        return -ETIMEDOUT;
1484}
1485
1486static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
1487{
1488        struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1489        int err;
1490
1491        dev_info(&pdev->dev, "%s was called\n", __func__);
1492
1493        err = mlx5_pci_enable_device(dev);
1494        if (err) {
1495                dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
1496                        , __func__, err);
1497                return PCI_ERS_RESULT_DISCONNECT;
1498        }
1499
1500        pci_set_master(pdev);
1501        pci_restore_state(pdev);
1502        pci_save_state(pdev);
1503
1504        if (wait_vital(pdev)) {
1505                dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
1506                return PCI_ERS_RESULT_DISCONNECT;
1507        }
1508
1509        return PCI_ERS_RESULT_RECOVERED;
1510}
1511
1512static void mlx5_pci_resume(struct pci_dev *pdev)
1513{
1514        struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1515        struct mlx5_priv *priv = &dev->priv;
1516        int err;
1517
1518        dev_info(&pdev->dev, "%s was called\n", __func__);
1519
1520        err = mlx5_load_one(dev, priv, false);
1521        if (err)
1522                dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
1523                        , __func__, err);
1524        else
1525                dev_info(&pdev->dev, "%s: device recovered\n", __func__);
1526}
1527
1528static const struct pci_error_handlers mlx5_err_handler = {
1529        .error_detected = mlx5_pci_err_detected,
1530        .slot_reset     = mlx5_pci_slot_reset,
1531        .resume         = mlx5_pci_resume
1532};
1533
1534static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
1535{
1536        int ret;
1537
1538        if (!MLX5_CAP_GEN(dev, force_teardown)) {
1539                mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n");
1540                return -EOPNOTSUPP;
1541        }
1542
1543        if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1544                mlx5_core_dbg(dev, "Device in internal error state, giving up\n");
1545                return -EAGAIN;
1546        }
1547
1548        ret = mlx5_cmd_force_teardown_hca(dev);
1549        if (ret) {
1550                mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret);
1551                return ret;
1552        }
1553
1554        mlx5_enter_error_state(dev, true);
1555
1556        return 0;
1557}
1558
1559static void shutdown(struct pci_dev *pdev)
1560{
1561        struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
1562        struct mlx5_priv *priv = &dev->priv;
1563        int err;
1564
1565        dev_info(&pdev->dev, "Shutdown was called\n");
1566        err = mlx5_try_fast_unload(dev);
1567        if (err)
1568                mlx5_unload_one(dev, priv, false);
1569        mlx5_pci_disable_device(dev);
1570}
1571
1572static const struct pci_device_id mlx5_core_pci_table[] = {
1573        { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTIB) },
1574        { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF},   /* Connect-IB VF */
1575        { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4) },
1576        { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF},   /* ConnectX-4 VF */
1577        { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX) },
1578        { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF},   /* ConnectX-4LX VF */
1579        { PCI_VDEVICE(MELLANOX, 0x1017) },                      /* ConnectX-5, PCIe 3.0 */
1580        { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF},   /* ConnectX-5 VF */
1581        { PCI_VDEVICE(MELLANOX, 0x1019) },                      /* ConnectX-5 Ex */
1582        { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF},   /* ConnectX-5 Ex VF */
1583        { PCI_VDEVICE(MELLANOX, 0x101b) },                      /* ConnectX-6 */
1584        { PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF},   /* ConnectX-6 VF */
1585        { PCI_VDEVICE(MELLANOX, 0xa2d2) },                      /* BlueField integrated ConnectX-5 network controller */
1586        { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF},   /* BlueField integrated ConnectX-5 network controller VF */
1587        { 0, }
1588};
1589
1590MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
1591
1592void mlx5_disable_device(struct mlx5_core_dev *dev)
1593{
1594        mlx5_pci_err_detected(dev->pdev, 0);
1595}
1596
1597void mlx5_recover_device(struct mlx5_core_dev *dev)
1598{
1599        mlx5_pci_disable_device(dev);
1600        if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
1601                mlx5_pci_resume(dev->pdev);
1602}
1603
1604static struct pci_driver mlx5_core_driver = {
1605        .name           = DRIVER_NAME,
1606        .id_table       = mlx5_core_pci_table,
1607        .probe          = init_one,
1608        .remove         = remove_one,
1609        .shutdown       = shutdown,
1610        .err_handler    = &mlx5_err_handler,
1611        .sriov_configure   = mlx5_core_sriov_configure,
1612};
1613
1614static void mlx5_core_verify_params(void)
1615{
1616        if (prof_sel >= ARRAY_SIZE(profile)) {
1617                pr_warn("mlx5_core: WARNING: Invalid module parameter prof_sel %d, valid range 0-%zu, changing back to default(%d)\n",
1618                        prof_sel,
1619                        ARRAY_SIZE(profile) - 1,
1620                        MLX5_DEFAULT_PROF);
1621                prof_sel = MLX5_DEFAULT_PROF;
1622        }
1623}
1624
1625static int __init init(void)
1626{
1627        int err;
1628
1629        mlx5_core_verify_params();
1630        mlx5_register_debugfs();
1631
1632        err = pci_register_driver(&mlx5_core_driver);
1633        if (err)
1634                goto err_debug;
1635
1636#ifdef CONFIG_MLX5_CORE_EN
1637        mlx5e_init();
1638#endif
1639
1640        return 0;
1641
1642err_debug:
1643        mlx5_unregister_debugfs();
1644        return err;
1645}
1646
1647static void __exit cleanup(void)
1648{
1649#ifdef CONFIG_MLX5_CORE_EN
1650        mlx5e_cleanup();
1651#endif
1652        pci_unregister_driver(&mlx5_core_driver);
1653        mlx5_unregister_debugfs();
1654}
1655
1656module_init(init);
1657module_exit(cleanup);
1658