linux/drivers/net/ethernet/mellanox/mlx5/core/main.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <linux/highmem.h>
  34#include <linux/module.h>
  35#include <linux/init.h>
  36#include <linux/errno.h>
  37#include <linux/pci.h>
  38#include <linux/dma-mapping.h>
  39#include <linux/slab.h>
  40#include <linux/io-mapping.h>
  41#include <linux/interrupt.h>
  42#include <linux/delay.h>
  43#include <linux/mlx5/driver.h>
  44#include <linux/mlx5/cq.h>
  45#include <linux/mlx5/qp.h>
  46#include <linux/debugfs.h>
  47#include <linux/kmod.h>
  48#include <linux/mlx5/mlx5_ifc.h>
  49#include <linux/mlx5/vport.h>
  50#ifdef CONFIG_RFS_ACCEL
  51#include <linux/cpu_rmap.h>
  52#endif
  53#include <net/devlink.h>
  54#include "mlx5_core.h"
  55#include "lib/eq.h"
  56#include "fs_core.h"
  57#include "lib/mpfs.h"
  58#include "eswitch.h"
  59#include "devlink.h"
  60#include "lib/mlx5.h"
  61#include "fpga/core.h"
  62#include "fpga/ipsec.h"
  63#include "accel/ipsec.h"
  64#include "accel/tls.h"
  65#include "lib/clock.h"
  66#include "lib/vxlan.h"
  67#include "lib/geneve.h"
  68#include "lib/devcom.h"
  69#include "lib/pci_vsc.h"
  70#include "diag/fw_tracer.h"
  71#include "ecpf.h"
  72#include "lib/hv_vhca.h"
  73#include "diag/rsc_dump.h"
  74
  75MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
  76MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
  77MODULE_LICENSE("Dual BSD/GPL");
  78MODULE_VERSION(DRIVER_VERSION);
  79
  80unsigned int mlx5_core_debug_mask;
  81module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644);
  82MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
  83
  84#define MLX5_DEFAULT_PROF       2
  85static unsigned int prof_sel = MLX5_DEFAULT_PROF;
  86module_param_named(prof_sel, prof_sel, uint, 0444);
  87MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
  88
  89static u32 sw_owner_id[4];
  90
  91enum {
  92        MLX5_ATOMIC_REQ_MODE_BE = 0x0,
  93        MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
  94};
  95
  96static struct mlx5_profile profile[] = {
  97        [0] = {
  98                .mask           = 0,
  99        },
 100        [1] = {
 101                .mask           = MLX5_PROF_MASK_QP_SIZE,
 102                .log_max_qp     = 12,
 103        },
 104        [2] = {
 105                .mask           = MLX5_PROF_MASK_QP_SIZE |
 106                                  MLX5_PROF_MASK_MR_CACHE,
 107                .log_max_qp     = 18,
 108                .mr_cache[0]    = {
 109                        .size   = 500,
 110                        .limit  = 250
 111                },
 112                .mr_cache[1]    = {
 113                        .size   = 500,
 114                        .limit  = 250
 115                },
 116                .mr_cache[2]    = {
 117                        .size   = 500,
 118                        .limit  = 250
 119                },
 120                .mr_cache[3]    = {
 121                        .size   = 500,
 122                        .limit  = 250
 123                },
 124                .mr_cache[4]    = {
 125                        .size   = 500,
 126                        .limit  = 250
 127                },
 128                .mr_cache[5]    = {
 129                        .size   = 500,
 130                        .limit  = 250
 131                },
 132                .mr_cache[6]    = {
 133                        .size   = 500,
 134                        .limit  = 250
 135                },
 136                .mr_cache[7]    = {
 137                        .size   = 500,
 138                        .limit  = 250
 139                },
 140                .mr_cache[8]    = {
 141                        .size   = 500,
 142                        .limit  = 250
 143                },
 144                .mr_cache[9]    = {
 145                        .size   = 500,
 146                        .limit  = 250
 147                },
 148                .mr_cache[10]   = {
 149                        .size   = 500,
 150                        .limit  = 250
 151                },
 152                .mr_cache[11]   = {
 153                        .size   = 500,
 154                        .limit  = 250
 155                },
 156                .mr_cache[12]   = {
 157                        .size   = 64,
 158                        .limit  = 32
 159                },
 160                .mr_cache[13]   = {
 161                        .size   = 32,
 162                        .limit  = 16
 163                },
 164                .mr_cache[14]   = {
 165                        .size   = 16,
 166                        .limit  = 8
 167                },
 168                .mr_cache[15]   = {
 169                        .size   = 8,
 170                        .limit  = 4
 171                },
 172        },
 173};
 174
 175#define FW_INIT_TIMEOUT_MILI            2000
 176#define FW_INIT_WAIT_MS                 2
 177#define FW_PRE_INIT_TIMEOUT_MILI        120000
 178#define FW_INIT_WARN_MESSAGE_INTERVAL   20000
 179
 180static int fw_initializing(struct mlx5_core_dev *dev)
 181{
 182        return ioread32be(&dev->iseg->initializing) >> 31;
 183}
 184
 185static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
 186                        u32 warn_time_mili)
 187{
 188        unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili);
 189        unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
 190        int err = 0;
 191
 192        BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL);
 193
 194        while (fw_initializing(dev)) {
 195                if (time_after(jiffies, end)) {
 196                        err = -EBUSY;
 197                        break;
 198                }
 199                if (warn_time_mili && time_after(jiffies, warn)) {
 200                        mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds\n",
 201                                       jiffies_to_msecs(end - warn) / 1000);
 202                        warn = jiffies + msecs_to_jiffies(warn_time_mili);
 203                }
 204                msleep(FW_INIT_WAIT_MS);
 205        }
 206
 207        return err;
 208}
 209
 210static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
 211{
 212        int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in,
 213                                              driver_version);
 214        u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {};
 215        int remaining_size = driver_ver_sz;
 216        char *string;
 217
 218        if (!MLX5_CAP_GEN(dev, driver_version))
 219                return;
 220
 221        string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version);
 222
 223        strncpy(string, "Linux", remaining_size);
 224
 225        remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
 226        strncat(string, ",", remaining_size);
 227
 228        remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
 229        strncat(string, DRIVER_NAME, remaining_size);
 230
 231        remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
 232        strncat(string, ",", remaining_size);
 233
 234        remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
 235        strncat(string, DRIVER_VERSION, remaining_size);
 236
 237        /*Send the command*/
 238        MLX5_SET(set_driver_version_in, in, opcode,
 239                 MLX5_CMD_OP_SET_DRIVER_VERSION);
 240
 241        mlx5_cmd_exec_in(dev, set_driver_version, in);
 242}
 243
 244static int set_dma_caps(struct pci_dev *pdev)
 245{
 246        int err;
 247
 248        err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
 249        if (err) {
 250                dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
 251                err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 252                if (err) {
 253                        dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
 254                        return err;
 255                }
 256        }
 257
 258        err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
 259        if (err) {
 260                dev_warn(&pdev->dev,
 261                         "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
 262                err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
 263                if (err) {
 264                        dev_err(&pdev->dev,
 265                                "Can't set consistent PCI DMA mask, aborting\n");
 266                        return err;
 267                }
 268        }
 269
 270        dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
 271        return err;
 272}
 273
 274static int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
 275{
 276        struct pci_dev *pdev = dev->pdev;
 277        int err = 0;
 278
 279        mutex_lock(&dev->pci_status_mutex);
 280        if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) {
 281                err = pci_enable_device(pdev);
 282                if (!err)
 283                        dev->pci_status = MLX5_PCI_STATUS_ENABLED;
 284        }
 285        mutex_unlock(&dev->pci_status_mutex);
 286
 287        return err;
 288}
 289
 290static void mlx5_pci_disable_device(struct mlx5_core_dev *dev)
 291{
 292        struct pci_dev *pdev = dev->pdev;
 293
 294        mutex_lock(&dev->pci_status_mutex);
 295        if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) {
 296                pci_disable_device(pdev);
 297                dev->pci_status = MLX5_PCI_STATUS_DISABLED;
 298        }
 299        mutex_unlock(&dev->pci_status_mutex);
 300}
 301
 302static int request_bar(struct pci_dev *pdev)
 303{
 304        int err = 0;
 305
 306        if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
 307                dev_err(&pdev->dev, "Missing registers BAR, aborting\n");
 308                return -ENODEV;
 309        }
 310
 311        err = pci_request_regions(pdev, DRIVER_NAME);
 312        if (err)
 313                dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
 314
 315        return err;
 316}
 317
 318static void release_bar(struct pci_dev *pdev)
 319{
 320        pci_release_regions(pdev);
 321}
 322
 323struct mlx5_reg_host_endianness {
 324        u8      he;
 325        u8      rsvd[15];
 326};
 327
 328#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
 329
 330enum {
 331        MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
 332                                MLX5_DEV_CAP_FLAG_DCT,
 333};
 334
 335static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size)
 336{
 337        switch (size) {
 338        case 128:
 339                return 0;
 340        case 256:
 341                return 1;
 342        case 512:
 343                return 2;
 344        case 1024:
 345                return 3;
 346        case 2048:
 347                return 4;
 348        case 4096:
 349                return 5;
 350        default:
 351                mlx5_core_warn(dev, "invalid pkey table size %d\n", size);
 352                return 0;
 353        }
 354}
 355
 356static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
 357                                   enum mlx5_cap_type cap_type,
 358                                   enum mlx5_cap_mode cap_mode)
 359{
 360        u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
 361        int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
 362        void *out, *hca_caps;
 363        u16 opmod = (cap_type << 1) | (cap_mode & 0x01);
 364        int err;
 365
 366        memset(in, 0, sizeof(in));
 367        out = kzalloc(out_sz, GFP_KERNEL);
 368        if (!out)
 369                return -ENOMEM;
 370
 371        MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
 372        MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
 373        err = mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
 374        if (err) {
 375                mlx5_core_warn(dev,
 376                               "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
 377                               cap_type, cap_mode, err);
 378                goto query_ex;
 379        }
 380
 381        hca_caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
 382
 383        switch (cap_mode) {
 384        case HCA_CAP_OPMOD_GET_MAX:
 385                memcpy(dev->caps.hca_max[cap_type], hca_caps,
 386                       MLX5_UN_SZ_BYTES(hca_cap_union));
 387                break;
 388        case HCA_CAP_OPMOD_GET_CUR:
 389                memcpy(dev->caps.hca_cur[cap_type], hca_caps,
 390                       MLX5_UN_SZ_BYTES(hca_cap_union));
 391                break;
 392        default:
 393                mlx5_core_warn(dev,
 394                               "Tried to query dev cap type(%x) with wrong opmode(%x)\n",
 395                               cap_type, cap_mode);
 396                err = -EINVAL;
 397                break;
 398        }
 399query_ex:
 400        kfree(out);
 401        return err;
 402}
 403
 404int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
 405{
 406        int ret;
 407
 408        ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
 409        if (ret)
 410                return ret;
 411        return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
 412}
 413
 414static int set_caps(struct mlx5_core_dev *dev, void *in, int opmod)
 415{
 416        MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
 417        MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
 418        return mlx5_cmd_exec_in(dev, set_hca_cap, in);
 419}
 420
 421static int handle_hca_cap_atomic(struct mlx5_core_dev *dev, void *set_ctx)
 422{
 423        void *set_hca_cap;
 424        int req_endianness;
 425        int err;
 426
 427        if (!MLX5_CAP_GEN(dev, atomic))
 428                return 0;
 429
 430        err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
 431        if (err)
 432                return err;
 433
 434        req_endianness =
 435                MLX5_CAP_ATOMIC(dev,
 436                                supported_atomic_req_8B_endianness_mode_1);
 437
 438        if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
 439                return 0;
 440
 441        set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
 442
 443        /* Set requestor to host endianness */
 444        MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode,
 445                 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
 446
 447        return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
 448}
 449
 450static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
 451{
 452        void *set_hca_cap;
 453        bool do_set = false;
 454        int err;
 455
 456        if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) ||
 457            !MLX5_CAP_GEN(dev, pg))
 458                return 0;
 459
 460        err = mlx5_core_get_caps(dev, MLX5_CAP_ODP);
 461        if (err)
 462                return err;
 463
 464        set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
 465        memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP],
 466               MLX5_ST_SZ_BYTES(odp_cap));
 467
 468#define ODP_CAP_SET_MAX(dev, field)                                            \
 469        do {                                                                   \
 470                u32 _res = MLX5_CAP_ODP_MAX(dev, field);                       \
 471                if (_res) {                                                    \
 472                        do_set = true;                                         \
 473                        MLX5_SET(odp_cap, set_hca_cap, field, _res);           \
 474                }                                                              \
 475        } while (0)
 476
 477        ODP_CAP_SET_MAX(dev, ud_odp_caps.srq_receive);
 478        ODP_CAP_SET_MAX(dev, rc_odp_caps.srq_receive);
 479        ODP_CAP_SET_MAX(dev, xrc_odp_caps.srq_receive);
 480        ODP_CAP_SET_MAX(dev, xrc_odp_caps.send);
 481        ODP_CAP_SET_MAX(dev, xrc_odp_caps.receive);
 482        ODP_CAP_SET_MAX(dev, xrc_odp_caps.write);
 483        ODP_CAP_SET_MAX(dev, xrc_odp_caps.read);
 484        ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic);
 485        ODP_CAP_SET_MAX(dev, dc_odp_caps.srq_receive);
 486        ODP_CAP_SET_MAX(dev, dc_odp_caps.send);
 487        ODP_CAP_SET_MAX(dev, dc_odp_caps.receive);
 488        ODP_CAP_SET_MAX(dev, dc_odp_caps.write);
 489        ODP_CAP_SET_MAX(dev, dc_odp_caps.read);
 490        ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic);
 491
 492        if (!do_set)
 493                return 0;
 494
 495        return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP);
 496}
 497
 498static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 499{
 500        struct mlx5_profile *prof = dev->profile;
 501        void *set_hca_cap;
 502        int err;
 503
 504        err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
 505        if (err)
 506                return err;
 507
 508        set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
 509                                   capability);
 510        memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_GENERAL],
 511               MLX5_ST_SZ_BYTES(cmd_hca_cap));
 512
 513        mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
 514                      mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)),
 515                      128);
 516        /* we limit the size of the pkey table to 128 entries for now */
 517        MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
 518                 to_fw_pkey_sz(dev, 128));
 519
 520        /* Check log_max_qp from HCA caps to set in current profile */
 521        if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) {
 522                mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n",
 523                               profile[prof_sel].log_max_qp,
 524                               MLX5_CAP_GEN_MAX(dev, log_max_qp));
 525                profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
 526        }
 527        if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
 528                MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
 529                         prof->log_max_qp);
 530
 531        /* disable cmdif checksum */
 532        MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
 533
 534        /* Enable 4K UAR only when HCA supports it and page size is bigger
 535         * than 4K.
 536         */
 537        if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096)
 538                MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1);
 539
 540        MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
 541
 542        if (MLX5_CAP_GEN_MAX(dev, cache_line_128byte))
 543                MLX5_SET(cmd_hca_cap,
 544                         set_hca_cap,
 545                         cache_line_128byte,
 546                         cache_line_size() >= 128 ? 1 : 0);
 547
 548        if (MLX5_CAP_GEN_MAX(dev, dct))
 549                MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1);
 550
 551        if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports))
 552                MLX5_SET(cmd_hca_cap,
 553                         set_hca_cap,
 554                         num_vhca_ports,
 555                         MLX5_CAP_GEN_MAX(dev, num_vhca_ports));
 556
 557        if (MLX5_CAP_GEN_MAX(dev, release_all_pages))
 558                MLX5_SET(cmd_hca_cap, set_hca_cap, release_all_pages, 1);
 559
 560        if (MLX5_CAP_GEN_MAX(dev, mkey_by_name))
 561                MLX5_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1);
 562
 563        return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
 564}
 565
 566static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx)
 567{
 568        void *set_hca_cap;
 569        int err;
 570
 571        if (!MLX5_CAP_GEN(dev, roce))
 572                return 0;
 573
 574        err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE);
 575        if (err)
 576                return err;
 577
 578        if (MLX5_CAP_ROCE(dev, sw_r_roce_src_udp_port) ||
 579            !MLX5_CAP_ROCE_MAX(dev, sw_r_roce_src_udp_port))
 580                return 0;
 581
 582        set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
 583        memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ROCE],
 584               MLX5_ST_SZ_BYTES(roce_cap));
 585        MLX5_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1);
 586
 587        err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ROCE);
 588        return err;
 589}
 590
 591static int set_hca_cap(struct mlx5_core_dev *dev)
 592{
 593        int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
 594        void *set_ctx;
 595        int err;
 596
 597        set_ctx = kzalloc(set_sz, GFP_KERNEL);
 598        if (!set_ctx)
 599                return -ENOMEM;
 600
 601        err = handle_hca_cap(dev, set_ctx);
 602        if (err) {
 603                mlx5_core_err(dev, "handle_hca_cap failed\n");
 604                goto out;
 605        }
 606
 607        memset(set_ctx, 0, set_sz);
 608        err = handle_hca_cap_atomic(dev, set_ctx);
 609        if (err) {
 610                mlx5_core_err(dev, "handle_hca_cap_atomic failed\n");
 611                goto out;
 612        }
 613
 614        memset(set_ctx, 0, set_sz);
 615        err = handle_hca_cap_odp(dev, set_ctx);
 616        if (err) {
 617                mlx5_core_err(dev, "handle_hca_cap_odp failed\n");
 618                goto out;
 619        }
 620
 621        memset(set_ctx, 0, set_sz);
 622        err = handle_hca_cap_roce(dev, set_ctx);
 623        if (err) {
 624                mlx5_core_err(dev, "handle_hca_cap_roce failed\n");
 625                goto out;
 626        }
 627
 628out:
 629        kfree(set_ctx);
 630        return err;
 631}
 632
 633static int set_hca_ctrl(struct mlx5_core_dev *dev)
 634{
 635        struct mlx5_reg_host_endianness he_in;
 636        struct mlx5_reg_host_endianness he_out;
 637        int err;
 638
 639        if (!mlx5_core_is_pf(dev))
 640                return 0;
 641
 642        memset(&he_in, 0, sizeof(he_in));
 643        he_in.he = MLX5_SET_HOST_ENDIANNESS;
 644        err = mlx5_core_access_reg(dev, &he_in,  sizeof(he_in),
 645                                        &he_out, sizeof(he_out),
 646                                        MLX5_REG_HOST_ENDIANNESS, 0, 1);
 647        return err;
 648}
 649
 650static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev)
 651{
 652        int ret = 0;
 653
 654        /* Disable local_lb by default */
 655        if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
 656                ret = mlx5_nic_vport_update_local_lb(dev, false);
 657
 658        return ret;
 659}
 660
 661int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
 662{
 663        u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {};
 664
 665        MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
 666        MLX5_SET(enable_hca_in, in, function_id, func_id);
 667        MLX5_SET(enable_hca_in, in, embedded_cpu_function,
 668                 dev->caps.embedded_cpu);
 669        return mlx5_cmd_exec_in(dev, enable_hca, in);
 670}
 671
 672int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
 673{
 674        u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {};
 675
 676        MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
 677        MLX5_SET(disable_hca_in, in, function_id, func_id);
 678        MLX5_SET(enable_hca_in, in, embedded_cpu_function,
 679                 dev->caps.embedded_cpu);
 680        return mlx5_cmd_exec_in(dev, disable_hca, in);
 681}
 682
 683static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 684{
 685        u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {};
 686        u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {};
 687        u32 sup_issi;
 688        int err;
 689
 690        MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
 691        err = mlx5_cmd_exec_inout(dev, query_issi, query_in, query_out);
 692        if (err) {
 693                u32 syndrome;
 694                u8 status;
 695
 696                mlx5_cmd_mbox_status(query_out, &status, &syndrome);
 697                if (!status || syndrome == MLX5_DRIVER_SYND) {
 698                        mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n",
 699                                      err, status, syndrome);
 700                        return err;
 701                }
 702
 703                mlx5_core_warn(dev, "Query ISSI is not supported by FW, ISSI is 0\n");
 704                dev->issi = 0;
 705                return 0;
 706        }
 707
 708        sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
 709
 710        if (sup_issi & (1 << 1)) {
 711                u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {};
 712
 713                MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
 714                MLX5_SET(set_issi_in, set_in, current_issi, 1);
 715                err = mlx5_cmd_exec_in(dev, set_issi, set_in);
 716                if (err) {
 717                        mlx5_core_err(dev, "Failed to set ISSI to 1 err(%d)\n",
 718                                      err);
 719                        return err;
 720                }
 721
 722                dev->issi = 1;
 723
 724                return 0;
 725        } else if (sup_issi & (1 << 0) || !sup_issi) {
 726                return 0;
 727        }
 728
 729        return -EOPNOTSUPP;
 730}
 731
 732static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
 733                         const struct pci_device_id *id)
 734{
 735        struct mlx5_priv *priv = &dev->priv;
 736        int err = 0;
 737
 738        mutex_init(&dev->pci_status_mutex);
 739        pci_set_drvdata(dev->pdev, dev);
 740
 741        dev->bar_addr = pci_resource_start(pdev, 0);
 742        priv->numa_node = dev_to_node(&dev->pdev->dev);
 743
 744        err = mlx5_pci_enable_device(dev);
 745        if (err) {
 746                mlx5_core_err(dev, "Cannot enable PCI device, aborting\n");
 747                return err;
 748        }
 749
 750        err = request_bar(pdev);
 751        if (err) {
 752                mlx5_core_err(dev, "error requesting BARs, aborting\n");
 753                goto err_disable;
 754        }
 755
 756        pci_set_master(pdev);
 757
 758        err = set_dma_caps(pdev);
 759        if (err) {
 760                mlx5_core_err(dev, "Failed setting DMA capabilities mask, aborting\n");
 761                goto err_clr_master;
 762        }
 763
 764        if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) &&
 765            pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) &&
 766            pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128))
 767                mlx5_core_dbg(dev, "Enabling pci atomics failed\n");
 768
 769        dev->iseg_base = dev->bar_addr;
 770        dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
 771        if (!dev->iseg) {
 772                err = -ENOMEM;
 773                mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n");
 774                goto err_clr_master;
 775        }
 776
 777        mlx5_pci_vsc_init(dev);
 778        dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
 779        return 0;
 780
 781err_clr_master:
 782        pci_clear_master(dev->pdev);
 783        release_bar(dev->pdev);
 784err_disable:
 785        mlx5_pci_disable_device(dev);
 786        return err;
 787}
 788
 789static void mlx5_pci_close(struct mlx5_core_dev *dev)
 790{
 791        /* health work might still be active, and it needs pci bar in
 792         * order to know the NIC state. Therefore, drain the health WQ
 793         * before removing the pci bars
 794         */
 795        mlx5_drain_health_wq(dev);
 796        iounmap(dev->iseg);
 797        pci_clear_master(dev->pdev);
 798        release_bar(dev->pdev);
 799        mlx5_pci_disable_device(dev);
 800}
 801
 802static int mlx5_init_once(struct mlx5_core_dev *dev)
 803{
 804        int err;
 805
 806        dev->priv.devcom = mlx5_devcom_register_device(dev);
 807        if (IS_ERR(dev->priv.devcom))
 808                mlx5_core_err(dev, "failed to register with devcom (0x%p)\n",
 809                              dev->priv.devcom);
 810
 811        err = mlx5_query_board_id(dev);
 812        if (err) {
 813                mlx5_core_err(dev, "query board id failed\n");
 814                goto err_devcom;
 815        }
 816
 817        err = mlx5_irq_table_init(dev);
 818        if (err) {
 819                mlx5_core_err(dev, "failed to initialize irq table\n");
 820                goto err_devcom;
 821        }
 822
 823        err = mlx5_eq_table_init(dev);
 824        if (err) {
 825                mlx5_core_err(dev, "failed to initialize eq\n");
 826                goto err_irq_cleanup;
 827        }
 828
 829        err = mlx5_events_init(dev);
 830        if (err) {
 831                mlx5_core_err(dev, "failed to initialize events\n");
 832                goto err_eq_cleanup;
 833        }
 834
 835        mlx5_cq_debugfs_init(dev);
 836
 837        mlx5_init_reserved_gids(dev);
 838
 839        mlx5_init_clock(dev);
 840
 841        dev->vxlan = mlx5_vxlan_create(dev);
 842        dev->geneve = mlx5_geneve_create(dev);
 843
 844        err = mlx5_init_rl_table(dev);
 845        if (err) {
 846                mlx5_core_err(dev, "Failed to init rate limiting\n");
 847                goto err_tables_cleanup;
 848        }
 849
 850        err = mlx5_mpfs_init(dev);
 851        if (err) {
 852                mlx5_core_err(dev, "Failed to init l2 table %d\n", err);
 853                goto err_rl_cleanup;
 854        }
 855
 856        err = mlx5_sriov_init(dev);
 857        if (err) {
 858                mlx5_core_err(dev, "Failed to init sriov %d\n", err);
 859                goto err_mpfs_cleanup;
 860        }
 861
 862        err = mlx5_eswitch_init(dev);
 863        if (err) {
 864                mlx5_core_err(dev, "Failed to init eswitch %d\n", err);
 865                goto err_sriov_cleanup;
 866        }
 867
 868        err = mlx5_fpga_init(dev);
 869        if (err) {
 870                mlx5_core_err(dev, "Failed to init fpga device %d\n", err);
 871                goto err_eswitch_cleanup;
 872        }
 873
 874        dev->dm = mlx5_dm_create(dev);
 875        if (IS_ERR(dev->dm))
 876                mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
 877
 878        dev->tracer = mlx5_fw_tracer_create(dev);
 879        dev->hv_vhca = mlx5_hv_vhca_create(dev);
 880        dev->rsc_dump = mlx5_rsc_dump_create(dev);
 881
 882        return 0;
 883
 884err_eswitch_cleanup:
 885        mlx5_eswitch_cleanup(dev->priv.eswitch);
 886err_sriov_cleanup:
 887        mlx5_sriov_cleanup(dev);
 888err_mpfs_cleanup:
 889        mlx5_mpfs_cleanup(dev);
 890err_rl_cleanup:
 891        mlx5_cleanup_rl_table(dev);
 892err_tables_cleanup:
 893        mlx5_geneve_destroy(dev->geneve);
 894        mlx5_vxlan_destroy(dev->vxlan);
 895        mlx5_cq_debugfs_cleanup(dev);
 896        mlx5_events_cleanup(dev);
 897err_eq_cleanup:
 898        mlx5_eq_table_cleanup(dev);
 899err_irq_cleanup:
 900        mlx5_irq_table_cleanup(dev);
 901err_devcom:
 902        mlx5_devcom_unregister_device(dev->priv.devcom);
 903
 904        return err;
 905}
 906
 907static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 908{
 909        mlx5_rsc_dump_destroy(dev);
 910        mlx5_hv_vhca_destroy(dev->hv_vhca);
 911        mlx5_fw_tracer_destroy(dev->tracer);
 912        mlx5_dm_cleanup(dev);
 913        mlx5_fpga_cleanup(dev);
 914        mlx5_eswitch_cleanup(dev->priv.eswitch);
 915        mlx5_sriov_cleanup(dev);
 916        mlx5_mpfs_cleanup(dev);
 917        mlx5_cleanup_rl_table(dev);
 918        mlx5_geneve_destroy(dev->geneve);
 919        mlx5_vxlan_destroy(dev->vxlan);
 920        mlx5_cleanup_clock(dev);
 921        mlx5_cleanup_reserved_gids(dev);
 922        mlx5_cq_debugfs_cleanup(dev);
 923        mlx5_events_cleanup(dev);
 924        mlx5_eq_table_cleanup(dev);
 925        mlx5_irq_table_cleanup(dev);
 926        mlx5_devcom_unregister_device(dev->priv.devcom);
 927}
 928
 929static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
 930{
 931        int err;
 932
 933        mlx5_core_info(dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
 934                       fw_rev_min(dev), fw_rev_sub(dev));
 935
 936        /* Only PFs hold the relevant PCIe information for this query */
 937        if (mlx5_core_is_pf(dev))
 938                pcie_print_link_status(dev->pdev);
 939
 940        /* wait for firmware to accept initialization segments configurations
 941         */
 942        err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL);
 943        if (err) {
 944                mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n",
 945                              FW_PRE_INIT_TIMEOUT_MILI);
 946                return err;
 947        }
 948
 949        err = mlx5_cmd_init(dev);
 950        if (err) {
 951                mlx5_core_err(dev, "Failed initializing command interface, aborting\n");
 952                return err;
 953        }
 954
 955        err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0);
 956        if (err) {
 957                mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n",
 958                              FW_INIT_TIMEOUT_MILI);
 959                goto err_cmd_cleanup;
 960        }
 961
 962        mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP);
 963
 964        err = mlx5_core_enable_hca(dev, 0);
 965        if (err) {
 966                mlx5_core_err(dev, "enable hca failed\n");
 967                goto err_cmd_cleanup;
 968        }
 969
 970        err = mlx5_core_set_issi(dev);
 971        if (err) {
 972                mlx5_core_err(dev, "failed to set issi\n");
 973                goto err_disable_hca;
 974        }
 975
 976        err = mlx5_satisfy_startup_pages(dev, 1);
 977        if (err) {
 978                mlx5_core_err(dev, "failed to allocate boot pages\n");
 979                goto err_disable_hca;
 980        }
 981
 982        err = set_hca_ctrl(dev);
 983        if (err) {
 984                mlx5_core_err(dev, "set_hca_ctrl failed\n");
 985                goto reclaim_boot_pages;
 986        }
 987
 988        err = set_hca_cap(dev);
 989        if (err) {
 990                mlx5_core_err(dev, "set_hca_cap failed\n");
 991                goto reclaim_boot_pages;
 992        }
 993
 994        err = mlx5_satisfy_startup_pages(dev, 0);
 995        if (err) {
 996                mlx5_core_err(dev, "failed to allocate init pages\n");
 997                goto reclaim_boot_pages;
 998        }
 999
1000        err = mlx5_cmd_init_hca(dev, sw_owner_id);
1001        if (err) {
1002                mlx5_core_err(dev, "init hca failed\n");
1003                goto reclaim_boot_pages;
1004        }
1005
1006        mlx5_set_driver_version(dev);
1007
1008        mlx5_start_health_poll(dev);
1009
1010        err = mlx5_query_hca_caps(dev);
1011        if (err) {
1012                mlx5_core_err(dev, "query hca failed\n");
1013                goto stop_health;
1014        }
1015
1016        return 0;
1017
1018stop_health:
1019        mlx5_stop_health_poll(dev, boot);
1020reclaim_boot_pages:
1021        mlx5_reclaim_startup_pages(dev);
1022err_disable_hca:
1023        mlx5_core_disable_hca(dev, 0);
1024err_cmd_cleanup:
1025        mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
1026        mlx5_cmd_cleanup(dev);
1027
1028        return err;
1029}
1030
1031static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
1032{
1033        int err;
1034
1035        mlx5_stop_health_poll(dev, boot);
1036        err = mlx5_cmd_teardown_hca(dev);
1037        if (err) {
1038                mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
1039                return err;
1040        }
1041        mlx5_reclaim_startup_pages(dev);
1042        mlx5_core_disable_hca(dev, 0);
1043        mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
1044        mlx5_cmd_cleanup(dev);
1045
1046        return 0;
1047}
1048
1049static int mlx5_load(struct mlx5_core_dev *dev)
1050{
1051        int err;
1052
1053        dev->priv.uar = mlx5_get_uars_page(dev);
1054        if (IS_ERR(dev->priv.uar)) {
1055                mlx5_core_err(dev, "Failed allocating uar, aborting\n");
1056                err = PTR_ERR(dev->priv.uar);
1057                return err;
1058        }
1059
1060        mlx5_events_start(dev);
1061        mlx5_pagealloc_start(dev);
1062
1063        err = mlx5_irq_table_create(dev);
1064        if (err) {
1065                mlx5_core_err(dev, "Failed to alloc IRQs\n");
1066                goto err_irq_table;
1067        }
1068
1069        err = mlx5_eq_table_create(dev);
1070        if (err) {
1071                mlx5_core_err(dev, "Failed to create EQs\n");
1072                goto err_eq_table;
1073        }
1074
1075        err = mlx5_fw_tracer_init(dev->tracer);
1076        if (err) {
1077                mlx5_core_err(dev, "Failed to init FW tracer\n");
1078                goto err_fw_tracer;
1079        }
1080
1081        mlx5_hv_vhca_init(dev->hv_vhca);
1082
1083        err = mlx5_rsc_dump_init(dev);
1084        if (err) {
1085                mlx5_core_err(dev, "Failed to init Resource dump\n");
1086                goto err_rsc_dump;
1087        }
1088
1089        err = mlx5_fpga_device_start(dev);
1090        if (err) {
1091                mlx5_core_err(dev, "fpga device start failed %d\n", err);
1092                goto err_fpga_start;
1093        }
1094
1095        mlx5_accel_ipsec_init(dev);
1096
1097        err = mlx5_accel_tls_init(dev);
1098        if (err) {
1099                mlx5_core_err(dev, "TLS device start failed %d\n", err);
1100                goto err_tls_start;
1101        }
1102
1103        err = mlx5_init_fs(dev);
1104        if (err) {
1105                mlx5_core_err(dev, "Failed to init flow steering\n");
1106                goto err_fs;
1107        }
1108
1109        err = mlx5_core_set_hca_defaults(dev);
1110        if (err) {
1111                mlx5_core_err(dev, "Failed to set hca defaults\n");
1112                goto err_sriov;
1113        }
1114
1115        err = mlx5_sriov_attach(dev);
1116        if (err) {
1117                mlx5_core_err(dev, "sriov init failed %d\n", err);
1118                goto err_sriov;
1119        }
1120
1121        err = mlx5_ec_init(dev);
1122        if (err) {
1123                mlx5_core_err(dev, "Failed to init embedded CPU\n");
1124                goto err_ec;
1125        }
1126
1127        return 0;
1128
1129err_ec:
1130        mlx5_sriov_detach(dev);
1131err_sriov:
1132        mlx5_cleanup_fs(dev);
1133err_fs:
1134        mlx5_accel_tls_cleanup(dev);
1135err_tls_start:
1136        mlx5_accel_ipsec_cleanup(dev);
1137        mlx5_fpga_device_stop(dev);
1138err_fpga_start:
1139        mlx5_rsc_dump_cleanup(dev);
1140err_rsc_dump:
1141        mlx5_hv_vhca_cleanup(dev->hv_vhca);
1142        mlx5_fw_tracer_cleanup(dev->tracer);
1143err_fw_tracer:
1144        mlx5_eq_table_destroy(dev);
1145err_eq_table:
1146        mlx5_irq_table_destroy(dev);
1147err_irq_table:
1148        mlx5_pagealloc_stop(dev);
1149        mlx5_events_stop(dev);
1150        mlx5_put_uars_page(dev, dev->priv.uar);
1151        return err;
1152}
1153
1154static void mlx5_unload(struct mlx5_core_dev *dev)
1155{
1156        mlx5_ec_cleanup(dev);
1157        mlx5_sriov_detach(dev);
1158        mlx5_cleanup_fs(dev);
1159        mlx5_accel_ipsec_cleanup(dev);
1160        mlx5_accel_tls_cleanup(dev);
1161        mlx5_fpga_device_stop(dev);
1162        mlx5_rsc_dump_cleanup(dev);
1163        mlx5_hv_vhca_cleanup(dev->hv_vhca);
1164        mlx5_fw_tracer_cleanup(dev->tracer);
1165        mlx5_eq_table_destroy(dev);
1166        mlx5_irq_table_destroy(dev);
1167        mlx5_pagealloc_stop(dev);
1168        mlx5_events_stop(dev);
1169        mlx5_put_uars_page(dev, dev->priv.uar);
1170}
1171
1172int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
1173{
1174        int err = 0;
1175
1176        mutex_lock(&dev->intf_state_mutex);
1177        if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
1178                mlx5_core_warn(dev, "interface is up, NOP\n");
1179                goto out;
1180        }
1181        /* remove any previous indication of internal error */
1182        dev->state = MLX5_DEVICE_STATE_UP;
1183
1184        err = mlx5_function_setup(dev, boot);
1185        if (err)
1186                goto err_function;
1187
1188        if (boot) {
1189                err = mlx5_init_once(dev);
1190                if (err) {
1191                        mlx5_core_err(dev, "sw objs init failed\n");
1192                        goto function_teardown;
1193                }
1194        }
1195
1196        err = mlx5_load(dev);
1197        if (err)
1198                goto err_load;
1199
1200        set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1201
1202        if (boot) {
1203                err = mlx5_devlink_register(priv_to_devlink(dev), dev->device);
1204                if (err)
1205                        goto err_devlink_reg;
1206                mlx5_register_device(dev);
1207        } else {
1208                mlx5_attach_device(dev);
1209        }
1210
1211        mutex_unlock(&dev->intf_state_mutex);
1212        return 0;
1213
1214err_devlink_reg:
1215        clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1216        mlx5_unload(dev);
1217err_load:
1218        if (boot)
1219                mlx5_cleanup_once(dev);
1220function_teardown:
1221        mlx5_function_teardown(dev, boot);
1222err_function:
1223        dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
1224out:
1225        mutex_unlock(&dev->intf_state_mutex);
1226        return err;
1227}
1228
1229void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
1230{
1231        mutex_lock(&dev->intf_state_mutex);
1232
1233        if (cleanup) {
1234                mlx5_unregister_device(dev);
1235                mlx5_devlink_unregister(priv_to_devlink(dev));
1236        } else {
1237                mlx5_detach_device(dev);
1238        }
1239
1240        if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
1241                mlx5_core_warn(dev, "%s: interface is down, NOP\n",
1242                               __func__);
1243                if (cleanup)
1244                        mlx5_cleanup_once(dev);
1245                goto out;
1246        }
1247
1248        clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1249
1250        mlx5_unload(dev);
1251
1252        if (cleanup)
1253                mlx5_cleanup_once(dev);
1254
1255        mlx5_function_teardown(dev, cleanup);
1256out:
1257        mutex_unlock(&dev->intf_state_mutex);
1258}
1259
1260static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
1261{
1262        struct mlx5_priv *priv = &dev->priv;
1263        int err;
1264
1265        dev->profile = &profile[profile_idx];
1266
1267        INIT_LIST_HEAD(&priv->ctx_list);
1268        spin_lock_init(&priv->ctx_lock);
1269        mutex_init(&dev->intf_state_mutex);
1270
1271        mutex_init(&priv->bfregs.reg_head.lock);
1272        mutex_init(&priv->bfregs.wc_head.lock);
1273        INIT_LIST_HEAD(&priv->bfregs.reg_head.list);
1274        INIT_LIST_HEAD(&priv->bfregs.wc_head.list);
1275
1276        mutex_init(&priv->alloc_mutex);
1277        mutex_init(&priv->pgdir_mutex);
1278        INIT_LIST_HEAD(&priv->pgdir_list);
1279
1280        priv->dbg_root = debugfs_create_dir(dev_name(dev->device),
1281                                            mlx5_debugfs_root);
1282        err = mlx5_health_init(dev);
1283        if (err)
1284                goto err_health_init;
1285
1286        err = mlx5_pagealloc_init(dev);
1287        if (err)
1288                goto err_pagealloc_init;
1289
1290        return 0;
1291
1292err_pagealloc_init:
1293        mlx5_health_cleanup(dev);
1294err_health_init:
1295        debugfs_remove(dev->priv.dbg_root);
1296        mutex_destroy(&priv->pgdir_mutex);
1297        mutex_destroy(&priv->alloc_mutex);
1298        mutex_destroy(&priv->bfregs.wc_head.lock);
1299        mutex_destroy(&priv->bfregs.reg_head.lock);
1300        mutex_destroy(&dev->intf_state_mutex);
1301        return err;
1302}
1303
1304static void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
1305{
1306        struct mlx5_priv *priv = &dev->priv;
1307
1308        mlx5_pagealloc_cleanup(dev);
1309        mlx5_health_cleanup(dev);
1310        debugfs_remove_recursive(dev->priv.dbg_root);
1311        mutex_destroy(&priv->pgdir_mutex);
1312        mutex_destroy(&priv->alloc_mutex);
1313        mutex_destroy(&priv->bfregs.wc_head.lock);
1314        mutex_destroy(&priv->bfregs.reg_head.lock);
1315        mutex_destroy(&dev->intf_state_mutex);
1316}
1317
1318#define MLX5_IB_MOD "mlx5_ib"
1319static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
1320{
1321        struct mlx5_core_dev *dev;
1322        struct devlink *devlink;
1323        int err;
1324
1325        devlink = mlx5_devlink_alloc();
1326        if (!devlink) {
1327                dev_err(&pdev->dev, "devlink alloc failed\n");
1328                return -ENOMEM;
1329        }
1330
1331        dev = devlink_priv(devlink);
1332        dev->device = &pdev->dev;
1333        dev->pdev = pdev;
1334
1335        dev->coredev_type = id->driver_data & MLX5_PCI_DEV_IS_VF ?
1336                         MLX5_COREDEV_VF : MLX5_COREDEV_PF;
1337
1338        err = mlx5_mdev_init(dev, prof_sel);
1339        if (err)
1340                goto mdev_init_err;
1341
1342        err = mlx5_pci_init(dev, pdev, id);
1343        if (err) {
1344                mlx5_core_err(dev, "mlx5_pci_init failed with error code %d\n",
1345                              err);
1346                goto pci_init_err;
1347        }
1348
1349        err = mlx5_load_one(dev, true);
1350        if (err) {
1351                mlx5_core_err(dev, "mlx5_load_one failed with error code %d\n",
1352                              err);
1353                goto err_load_one;
1354        }
1355
1356        request_module_nowait(MLX5_IB_MOD);
1357
1358        err = mlx5_crdump_enable(dev);
1359        if (err)
1360                dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
1361
1362        pci_save_state(pdev);
1363        devlink_reload_enable(devlink);
1364        return 0;
1365
1366err_load_one:
1367        mlx5_pci_close(dev);
1368pci_init_err:
1369        mlx5_mdev_uninit(dev);
1370mdev_init_err:
1371        mlx5_devlink_free(devlink);
1372
1373        return err;
1374}
1375
1376static void remove_one(struct pci_dev *pdev)
1377{
1378        struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
1379        struct devlink *devlink = priv_to_devlink(dev);
1380
1381        devlink_reload_disable(devlink);
1382        mlx5_crdump_disable(dev);
1383        mlx5_drain_health_wq(dev);
1384        mlx5_unload_one(dev, true);
1385        mlx5_pci_close(dev);
1386        mlx5_mdev_uninit(dev);
1387        mlx5_devlink_free(devlink);
1388}
1389
1390static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
1391                                              pci_channel_state_t state)
1392{
1393        struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1394
1395        mlx5_core_info(dev, "%s was called\n", __func__);
1396
1397        mlx5_enter_error_state(dev, false);
1398        mlx5_error_sw_reset(dev);
1399        mlx5_unload_one(dev, false);
1400        mlx5_drain_health_wq(dev);
1401        mlx5_pci_disable_device(dev);
1402
1403        return state == pci_channel_io_perm_failure ?
1404                PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
1405}
1406
1407/* wait for the device to show vital signs by waiting
1408 * for the health counter to start counting.
1409 */
1410static int wait_vital(struct pci_dev *pdev)
1411{
1412        struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1413        struct mlx5_core_health *health = &dev->priv.health;
1414        const int niter = 100;
1415        u32 last_count = 0;
1416        u32 count;
1417        int i;
1418
1419        for (i = 0; i < niter; i++) {
1420                count = ioread32be(health->health_counter);
1421                if (count && count != 0xffffffff) {
1422                        if (last_count && last_count != count) {
1423                                mlx5_core_info(dev,
1424                                               "wait vital counter value 0x%x after %d iterations\n",
1425                                               count, i);
1426                                return 0;
1427                        }
1428                        last_count = count;
1429                }
1430                msleep(50);
1431        }
1432
1433        return -ETIMEDOUT;
1434}
1435
1436static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
1437{
1438        struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1439        int err;
1440
1441        mlx5_core_info(dev, "%s was called\n", __func__);
1442
1443        err = mlx5_pci_enable_device(dev);
1444        if (err) {
1445                mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n",
1446                              __func__, err);
1447                return PCI_ERS_RESULT_DISCONNECT;
1448        }
1449
1450        pci_set_master(pdev);
1451        pci_restore_state(pdev);
1452        pci_save_state(pdev);
1453
1454        if (wait_vital(pdev)) {
1455                mlx5_core_err(dev, "%s: wait_vital timed out\n", __func__);
1456                return PCI_ERS_RESULT_DISCONNECT;
1457        }
1458
1459        return PCI_ERS_RESULT_RECOVERED;
1460}
1461
1462static void mlx5_pci_resume(struct pci_dev *pdev)
1463{
1464        struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1465        int err;
1466
1467        mlx5_core_info(dev, "%s was called\n", __func__);
1468
1469        err = mlx5_load_one(dev, false);
1470        if (err)
1471                mlx5_core_err(dev, "%s: mlx5_load_one failed with error code: %d\n",
1472                              __func__, err);
1473        else
1474                mlx5_core_info(dev, "%s: device recovered\n", __func__);
1475}
1476
1477static const struct pci_error_handlers mlx5_err_handler = {
1478        .error_detected = mlx5_pci_err_detected,
1479        .slot_reset     = mlx5_pci_slot_reset,
1480        .resume         = mlx5_pci_resume
1481};
1482
1483static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
1484{
1485        bool fast_teardown = false, force_teardown = false;
1486        int ret = 1;
1487
1488        fast_teardown = MLX5_CAP_GEN(dev, fast_teardown);
1489        force_teardown = MLX5_CAP_GEN(dev, force_teardown);
1490
1491        mlx5_core_dbg(dev, "force teardown firmware support=%d\n", force_teardown);
1492        mlx5_core_dbg(dev, "fast teardown firmware support=%d\n", fast_teardown);
1493
1494        if (!fast_teardown && !force_teardown)
1495                return -EOPNOTSUPP;
1496
1497        if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1498                mlx5_core_dbg(dev, "Device in internal error state, giving up\n");
1499                return -EAGAIN;
1500        }
1501
1502        /* Panic tear down fw command will stop the PCI bus communication
1503         * with the HCA, so the health polll is no longer needed.
1504         */
1505        mlx5_drain_health_wq(dev);
1506        mlx5_stop_health_poll(dev, false);
1507
1508        ret = mlx5_cmd_fast_teardown_hca(dev);
1509        if (!ret)
1510                goto succeed;
1511
1512        ret = mlx5_cmd_force_teardown_hca(dev);
1513        if (!ret)
1514                goto succeed;
1515
1516        mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret);
1517        mlx5_start_health_poll(dev);
1518        return ret;
1519
1520succeed:
1521        mlx5_enter_error_state(dev, true);
1522
1523        /* Some platforms requiring freeing the IRQ's in the shutdown
1524         * flow. If they aren't freed they can't be allocated after
1525         * kexec. There is no need to cleanup the mlx5_core software
1526         * contexts.
1527         */
1528        mlx5_core_eq_free_irqs(dev);
1529
1530        return 0;
1531}
1532
1533static void shutdown(struct pci_dev *pdev)
1534{
1535        struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
1536        int err;
1537
1538        mlx5_core_info(dev, "Shutdown was called\n");
1539        err = mlx5_try_fast_unload(dev);
1540        if (err)
1541                mlx5_unload_one(dev, false);
1542        mlx5_pci_disable_device(dev);
1543}
1544
1545static int mlx5_suspend(struct pci_dev *pdev, pm_message_t state)
1546{
1547        struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1548
1549        mlx5_unload_one(dev, false);
1550
1551        return 0;
1552}
1553
1554static int mlx5_resume(struct pci_dev *pdev)
1555{
1556        struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1557
1558        return mlx5_load_one(dev, false);
1559}
1560
1561static const struct pci_device_id mlx5_core_pci_table[] = {
1562        { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTIB) },
1563        { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF},   /* Connect-IB VF */
1564        { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4) },
1565        { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF},   /* ConnectX-4 VF */
1566        { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX) },
1567        { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF},   /* ConnectX-4LX VF */
1568        { PCI_VDEVICE(MELLANOX, 0x1017) },                      /* ConnectX-5, PCIe 3.0 */
1569        { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF},   /* ConnectX-5 VF */
1570        { PCI_VDEVICE(MELLANOX, 0x1019) },                      /* ConnectX-5 Ex */
1571        { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF},   /* ConnectX-5 Ex VF */
1572        { PCI_VDEVICE(MELLANOX, 0x101b) },                      /* ConnectX-6 */
1573        { PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF},   /* ConnectX-6 VF */
1574        { PCI_VDEVICE(MELLANOX, 0x101d) },                      /* ConnectX-6 Dx */
1575        { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF},   /* ConnectX Family mlx5Gen Virtual Function */
1576        { PCI_VDEVICE(MELLANOX, 0x101f) },                      /* ConnectX-6 LX */
1577        { PCI_VDEVICE(MELLANOX, 0x1021) },                      /* ConnectX-7 */
1578        { PCI_VDEVICE(MELLANOX, 0xa2d2) },                      /* BlueField integrated ConnectX-5 network controller */
1579        { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF},   /* BlueField integrated ConnectX-5 network controller VF */
1580        { PCI_VDEVICE(MELLANOX, 0xa2d6) },                      /* BlueField-2 integrated ConnectX-6 Dx network controller */
1581        { 0, }
1582};
1583
1584MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
1585
1586void mlx5_disable_device(struct mlx5_core_dev *dev)
1587{
1588        mlx5_error_sw_reset(dev);
1589        mlx5_unload_one(dev, false);
1590}
1591
1592void mlx5_recover_device(struct mlx5_core_dev *dev)
1593{
1594        mlx5_pci_disable_device(dev);
1595        if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
1596                mlx5_pci_resume(dev->pdev);
1597}
1598
1599static struct pci_driver mlx5_core_driver = {
1600        .name           = DRIVER_NAME,
1601        .id_table       = mlx5_core_pci_table,
1602        .probe          = init_one,
1603        .remove         = remove_one,
1604        .suspend        = mlx5_suspend,
1605        .resume         = mlx5_resume,
1606        .shutdown       = shutdown,
1607        .err_handler    = &mlx5_err_handler,
1608        .sriov_configure   = mlx5_core_sriov_configure,
1609};
1610
1611static void mlx5_core_verify_params(void)
1612{
1613        if (prof_sel >= ARRAY_SIZE(profile)) {
1614                pr_warn("mlx5_core: WARNING: Invalid module parameter prof_sel %d, valid range 0-%zu, changing back to default(%d)\n",
1615                        prof_sel,
1616                        ARRAY_SIZE(profile) - 1,
1617                        MLX5_DEFAULT_PROF);
1618                prof_sel = MLX5_DEFAULT_PROF;
1619        }
1620}
1621
1622static int __init init(void)
1623{
1624        int err;
1625
1626        get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
1627
1628        mlx5_core_verify_params();
1629        mlx5_fpga_ipsec_build_fs_cmds();
1630        mlx5_register_debugfs();
1631
1632        err = pci_register_driver(&mlx5_core_driver);
1633        if (err)
1634                goto err_debug;
1635
1636#ifdef CONFIG_MLX5_CORE_EN
1637        mlx5e_init();
1638#endif
1639
1640        return 0;
1641
1642err_debug:
1643        mlx5_unregister_debugfs();
1644        return err;
1645}
1646
1647static void __exit cleanup(void)
1648{
1649#ifdef CONFIG_MLX5_CORE_EN
1650        mlx5e_cleanup();
1651#endif
1652        pci_unregister_driver(&mlx5_core_driver);
1653        mlx5_unregister_debugfs();
1654}
1655
1656module_init(init);
1657module_exit(cleanup);
1658