linux/drivers/net/ethernet/mellanox/mlx4/main.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
   3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   4 * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
   5 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
   6 *
   7 * This software is available to you under a choice of one of two
   8 * licenses.  You may choose to be licensed under the terms of the GNU
   9 * General Public License (GPL) Version 2, available from the file
  10 * COPYING in the main directory of this source tree, or the
  11 * OpenIB.org BSD license below:
  12 *
  13 *     Redistribution and use in source and binary forms, with or
  14 *     without modification, are permitted provided that the following
  15 *     conditions are met:
  16 *
  17 *      - Redistributions of source code must retain the above
  18 *        copyright notice, this list of conditions and the following
  19 *        disclaimer.
  20 *
  21 *      - Redistributions in binary form must reproduce the above
  22 *        copyright notice, this list of conditions and the following
  23 *        disclaimer in the documentation and/or other materials
  24 *        provided with the distribution.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33 * SOFTWARE.
  34 */
  35
  36#include <linux/module.h>
  37#include <linux/init.h>
  38#include <linux/errno.h>
  39#include <linux/pci.h>
  40#include <linux/dma-mapping.h>
  41#include <linux/slab.h>
  42#include <linux/io-mapping.h>
  43#include <linux/delay.h>
  44#include <linux/kmod.h>
  45
  46#include <linux/mlx4/device.h>
  47#include <linux/mlx4/doorbell.h>
  48
  49#include "mlx4.h"
  50#include "fw.h"
  51#include "icm.h"
  52
  53MODULE_AUTHOR("Roland Dreier");
  54MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
  55MODULE_LICENSE("Dual BSD/GPL");
  56MODULE_VERSION(DRV_VERSION);
  57
  58struct workqueue_struct *mlx4_wq;
  59
  60#ifdef CONFIG_MLX4_DEBUG
  61
  62int mlx4_debug_level = 0;
  63module_param_named(debug_level, mlx4_debug_level, int, 0644);
  64MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
  65
  66#endif /* CONFIG_MLX4_DEBUG */
  67
  68#ifdef CONFIG_PCI_MSI
  69
  70static int msi_x = 1;
  71module_param(msi_x, int, 0444);
  72MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
  73
  74#else /* CONFIG_PCI_MSI */
  75
  76#define msi_x (0)
  77
  78#endif /* CONFIG_PCI_MSI */
  79
  80static uint8_t num_vfs[3] = {0, 0, 0};
  81static int num_vfs_argc;
  82module_param_array(num_vfs, byte , &num_vfs_argc, 0444);
  83MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n"
  84                          "num_vfs=port1,port2,port1+2");
  85
  86static uint8_t probe_vf[3] = {0, 0, 0};
  87static int probe_vfs_argc;
  88module_param_array(probe_vf, byte, &probe_vfs_argc, 0444);
  89MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n"
  90                           "probe_vf=port1,port2,port1+2");
  91
  92int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
  93module_param_named(log_num_mgm_entry_size,
  94                        mlx4_log_num_mgm_entry_size, int, 0444);
  95MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
  96                                         " of qp per mcg, for example:"
  97                                         " 10 gives 248.range: 7 <="
  98                                         " log_num_mgm_entry_size <= 12."
  99                                         " To activate device managed"
 100                                         " flow steering when available, set to -1");
 101
 102static bool enable_64b_cqe_eqe = true;
 103module_param(enable_64b_cqe_eqe, bool, 0444);
 104MODULE_PARM_DESC(enable_64b_cqe_eqe,
 105                 "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
 106
 107#define PF_CONTEXT_BEHAVIOUR_MASK       (MLX4_FUNC_CAP_64B_EQE_CQE | \
 108                                         MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
 109                                         MLX4_FUNC_CAP_DMFS_A0_STATIC)
 110
 111#define RESET_PERSIST_MASK_FLAGS        (MLX4_FLAG_SRIOV)
 112
 113static char mlx4_version[] =
 114        DRV_NAME ": Mellanox ConnectX core driver v"
 115        DRV_VERSION " (" DRV_RELDATE ")\n";
 116
 117static struct mlx4_profile default_profile = {
 118        .num_qp         = 1 << 18,
 119        .num_srq        = 1 << 16,
 120        .rdmarc_per_qp  = 1 << 4,
 121        .num_cq         = 1 << 16,
 122        .num_mcg        = 1 << 13,
 123        .num_mpt        = 1 << 19,
 124        .num_mtt        = 1 << 20, /* It is really num mtt segements */
 125};
 126
 127static struct mlx4_profile low_mem_profile = {
 128        .num_qp         = 1 << 17,
 129        .num_srq        = 1 << 6,
 130        .rdmarc_per_qp  = 1 << 4,
 131        .num_cq         = 1 << 8,
 132        .num_mcg        = 1 << 8,
 133        .num_mpt        = 1 << 9,
 134        .num_mtt        = 1 << 7,
 135};
 136
 137static int log_num_mac = 7;
 138module_param_named(log_num_mac, log_num_mac, int, 0444);
 139MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
 140
 141static int log_num_vlan;
 142module_param_named(log_num_vlan, log_num_vlan, int, 0444);
 143MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
 144/* Log2 max number of VLANs per ETH port (0-7) */
 145#define MLX4_LOG_NUM_VLANS 7
 146#define MLX4_MIN_LOG_NUM_VLANS 0
 147#define MLX4_MIN_LOG_NUM_MAC 1
 148
 149static bool use_prio;
 150module_param_named(use_prio, use_prio, bool, 0444);
 151MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)");
 152
 153int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
 154module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
 155MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
 156
 157static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
 158static int arr_argc = 2;
 159module_param_array(port_type_array, int, &arr_argc, 0444);
 160MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
 161                                "1 for IB, 2 for Ethernet");
 162
 163struct mlx4_port_config {
 164        struct list_head list;
 165        enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
 166        struct pci_dev *pdev;
 167};
 168
 169static atomic_t pf_loading = ATOMIC_INIT(0);
 170
 171int mlx4_check_port_params(struct mlx4_dev *dev,
 172                           enum mlx4_port_type *port_type)
 173{
 174        int i;
 175
 176        if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
 177                for (i = 0; i < dev->caps.num_ports - 1; i++) {
 178                        if (port_type[i] != port_type[i + 1]) {
 179                                mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
 180                                return -EINVAL;
 181                        }
 182                }
 183        }
 184
 185        for (i = 0; i < dev->caps.num_ports; i++) {
 186                if (!(port_type[i] & dev->caps.supported_type[i+1])) {
 187                        mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n",
 188                                 i + 1);
 189                        return -EINVAL;
 190                }
 191        }
 192        return 0;
 193}
 194
 195static void mlx4_set_port_mask(struct mlx4_dev *dev)
 196{
 197        int i;
 198
 199        for (i = 1; i <= dev->caps.num_ports; ++i)
 200                dev->caps.port_mask[i] = dev->caps.port_type[i];
 201}
 202
 203enum {
 204        MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0,
 205};
 206
 207static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 208{
 209        int err = 0;
 210        struct mlx4_func func;
 211
 212        if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
 213                err = mlx4_QUERY_FUNC(dev, &func, 0);
 214                if (err) {
 215                        mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
 216                        return err;
 217                }
 218                dev_cap->max_eqs = func.max_eq;
 219                dev_cap->reserved_eqs = func.rsvd_eqs;
 220                dev_cap->reserved_uars = func.rsvd_uars;
 221                err |= MLX4_QUERY_FUNC_NUM_SYS_EQS;
 222        }
 223        return err;
 224}
 225
 226static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
 227{
 228        struct mlx4_caps *dev_cap = &dev->caps;
 229
 230        /* FW not supporting or cancelled by user */
 231        if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) ||
 232            !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE))
 233                return;
 234
 235        /* Must have 64B CQE_EQE enabled by FW to use bigger stride
 236         * When FW has NCSI it may decide not to report 64B CQE/EQEs
 237         */
 238        if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) ||
 239            !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) {
 240                dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
 241                dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
 242                return;
 243        }
 244
 245        if (cache_line_size() == 128 || cache_line_size() == 256) {
 246                mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n");
 247                /* Changing the real data inside CQE size to 32B */
 248                dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
 249                dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
 250
 251                if (mlx4_is_master(dev))
 252                        dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE;
 253        } else {
 254                if (cache_line_size() != 32  && cache_line_size() != 64)
 255                        mlx4_dbg(dev, "Disabling CQE stride, cacheLine size unsupported\n");
 256                dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
 257                dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
 258        }
 259}
 260
 261static int _mlx4_dev_port(struct mlx4_dev *dev, int port,
 262                          struct mlx4_port_cap *port_cap)
 263{
 264        dev->caps.vl_cap[port]      = port_cap->max_vl;
 265        dev->caps.ib_mtu_cap[port]          = port_cap->ib_mtu;
 266        dev->phys_caps.gid_phys_table_len[port]  = port_cap->max_gids;
 267        dev->phys_caps.pkey_phys_table_len[port] = port_cap->max_pkeys;
 268        /* set gid and pkey table operating lengths by default
 269         * to non-sriov values
 270         */
 271        dev->caps.gid_table_len[port]  = port_cap->max_gids;
 272        dev->caps.pkey_table_len[port] = port_cap->max_pkeys;
 273        dev->caps.port_width_cap[port] = port_cap->max_port_width;
 274        dev->caps.eth_mtu_cap[port]    = port_cap->eth_mtu;
 275        dev->caps.def_mac[port]        = port_cap->def_mac;
 276        dev->caps.supported_type[port] = port_cap->supported_port_types;
 277        dev->caps.suggested_type[port] = port_cap->suggested_type;
 278        dev->caps.default_sense[port] = port_cap->default_sense;
 279        dev->caps.trans_type[port]          = port_cap->trans_type;
 280        dev->caps.vendor_oui[port]     = port_cap->vendor_oui;
 281        dev->caps.wavelength[port]     = port_cap->wavelength;
 282        dev->caps.trans_code[port]     = port_cap->trans_code;
 283
 284        return 0;
 285}
 286
 287static int mlx4_dev_port(struct mlx4_dev *dev, int port,
 288                         struct mlx4_port_cap *port_cap)
 289{
 290        int err = 0;
 291
 292        err = mlx4_QUERY_PORT(dev, port, port_cap);
 293
 294        if (err)
 295                mlx4_err(dev, "QUERY_PORT command failed.\n");
 296
 297        return err;
 298}
 299
 300static inline void mlx4_enable_ignore_fcs(struct mlx4_dev *dev)
 301{
 302        if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS))
 303                return;
 304
 305        if (mlx4_is_mfunc(dev)) {
 306                mlx4_dbg(dev, "SRIOV mode - Disabling Ignore FCS");
 307                dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
 308                return;
 309        }
 310
 311        if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
 312                mlx4_dbg(dev,
 313                         "Keep FCS is not supported - Disabling Ignore FCS");
 314                dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
 315                return;
 316        }
 317}
 318
 319#define MLX4_A0_STEERING_TABLE_SIZE     256
 320static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 321{
 322        int err;
 323        int i;
 324
 325        err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
 326        if (err) {
 327                mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
 328                return err;
 329        }
 330        mlx4_dev_cap_dump(dev, dev_cap);
 331
 332        if (dev_cap->min_page_sz > PAGE_SIZE) {
 333                mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
 334                         dev_cap->min_page_sz, PAGE_SIZE);
 335                return -ENODEV;
 336        }
 337        if (dev_cap->num_ports > MLX4_MAX_PORTS) {
 338                mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
 339                         dev_cap->num_ports, MLX4_MAX_PORTS);
 340                return -ENODEV;
 341        }
 342
 343        if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) {
 344                mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
 345                         dev_cap->uar_size,
 346                         (unsigned long long)
 347                         pci_resource_len(dev->persist->pdev, 2));
 348                return -ENODEV;
 349        }
 350
 351        dev->caps.num_ports          = dev_cap->num_ports;
 352        dev->caps.num_sys_eqs = dev_cap->num_sys_eqs;
 353        dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ?
 354                                      dev->caps.num_sys_eqs :
 355                                      MLX4_MAX_EQ_NUM;
 356        for (i = 1; i <= dev->caps.num_ports; ++i) {
 357                err = _mlx4_dev_port(dev, i, dev_cap->port_cap + i);
 358                if (err) {
 359                        mlx4_err(dev, "QUERY_PORT command failed, aborting\n");
 360                        return err;
 361                }
 362        }
 363
 364        dev->caps.uar_page_size      = PAGE_SIZE;
 365        dev->caps.num_uars           = dev_cap->uar_size / PAGE_SIZE;
 366        dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
 367        dev->caps.bf_reg_size        = dev_cap->bf_reg_size;
 368        dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
 369        dev->caps.max_sq_sg          = dev_cap->max_sq_sg;
 370        dev->caps.max_rq_sg          = dev_cap->max_rq_sg;
 371        dev->caps.max_wqes           = dev_cap->max_qp_sz;
 372        dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
 373        dev->caps.max_srq_wqes       = dev_cap->max_srq_sz;
 374        dev->caps.max_srq_sge        = dev_cap->max_rq_sg - 1;
 375        dev->caps.reserved_srqs      = dev_cap->reserved_srqs;
 376        dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
 377        dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
 378        /*
 379         * Subtract 1 from the limit because we need to allocate a
 380         * spare CQE so the HCA HW can tell the difference between an
 381         * empty CQ and a full CQ.
 382         */
 383        dev->caps.max_cqes           = dev_cap->max_cq_sz - 1;
 384        dev->caps.reserved_cqs       = dev_cap->reserved_cqs;
 385        dev->caps.reserved_eqs       = dev_cap->reserved_eqs;
 386        dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
 387        dev->caps.reserved_mrws      = dev_cap->reserved_mrws;
 388
 389        /* The first 128 UARs are used for EQ doorbells */
 390        dev->caps.reserved_uars      = max_t(int, 128, dev_cap->reserved_uars);
 391        dev->caps.reserved_pds       = dev_cap->reserved_pds;
 392        dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
 393                                        dev_cap->reserved_xrcds : 0;
 394        dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
 395                                        dev_cap->max_xrcds : 0;
 396        dev->caps.mtt_entry_sz       = dev_cap->mtt_entry_sz;
 397
 398        dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
 399        dev->caps.page_size_cap      = ~(u32) (dev_cap->min_page_sz - 1);
 400        dev->caps.flags              = dev_cap->flags;
 401        dev->caps.flags2             = dev_cap->flags2;
 402        dev->caps.bmme_flags         = dev_cap->bmme_flags;
 403        dev->caps.reserved_lkey      = dev_cap->reserved_lkey;
 404        dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
 405        dev->caps.max_gso_sz         = dev_cap->max_gso_sz;
 406        dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
 407
 408        /* Sense port always allowed on supported devices for ConnectX-1 and -2 */
 409        if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
 410                dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
 411        /* Don't do sense port on multifunction devices (for now at least) */
 412        if (mlx4_is_mfunc(dev))
 413                dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
 414
 415        if (mlx4_low_memory_profile()) {
 416                dev->caps.log_num_macs  = MLX4_MIN_LOG_NUM_MAC;
 417                dev->caps.log_num_vlans = MLX4_MIN_LOG_NUM_VLANS;
 418        } else {
 419                dev->caps.log_num_macs  = log_num_mac;
 420                dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
 421        }
 422
 423        for (i = 1; i <= dev->caps.num_ports; ++i) {
 424                dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
 425                if (dev->caps.supported_type[i]) {
 426                        /* if only ETH is supported - assign ETH */
 427                        if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
 428                                dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
 429                        /* if only IB is supported, assign IB */
 430                        else if (dev->caps.supported_type[i] ==
 431                                 MLX4_PORT_TYPE_IB)
 432                                dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
 433                        else {
 434                                /* if IB and ETH are supported, we set the port
 435                                 * type according to user selection of port type;
 436                                 * if user selected none, take the FW hint */
 437                                if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE)
 438                                        dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
 439                                                MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
 440                                else
 441                                        dev->caps.port_type[i] = port_type_array[i - 1];
 442                        }
 443                }
 444                /*
 445                 * Link sensing is allowed on the port if 3 conditions are true:
 446                 * 1. Both protocols are supported on the port.
 447                 * 2. Different types are supported on the port
 448                 * 3. FW declared that it supports link sensing
 449                 */
 450                mlx4_priv(dev)->sense.sense_allowed[i] =
 451                        ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
 452                         (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
 453                         (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
 454
 455                /*
 456                 * If "default_sense" bit is set, we move the port to "AUTO" mode
 457                 * and perform sense_port FW command to try and set the correct
 458                 * port type from beginning
 459                 */
 460                if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
 461                        enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
 462                        dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
 463                        mlx4_SENSE_PORT(dev, i, &sensed_port);
 464                        if (sensed_port != MLX4_PORT_TYPE_NONE)
 465                                dev->caps.port_type[i] = sensed_port;
 466                } else {
 467                        dev->caps.possible_type[i] = dev->caps.port_type[i];
 468                }
 469
 470                if (dev->caps.log_num_macs > dev_cap->port_cap[i].log_max_macs) {
 471                        dev->caps.log_num_macs = dev_cap->port_cap[i].log_max_macs;
 472                        mlx4_warn(dev, "Requested number of MACs is too much for port %d, reducing to %d\n",
 473                                  i, 1 << dev->caps.log_num_macs);
 474                }
 475                if (dev->caps.log_num_vlans > dev_cap->port_cap[i].log_max_vlans) {
 476                        dev->caps.log_num_vlans = dev_cap->port_cap[i].log_max_vlans;
 477                        mlx4_warn(dev, "Requested number of VLANs is too much for port %d, reducing to %d\n",
 478                                  i, 1 << dev->caps.log_num_vlans);
 479                }
 480        }
 481
 482        if (mlx4_is_master(dev) && (dev->caps.num_ports == 2) &&
 483            (port_type_array[0] == MLX4_PORT_TYPE_IB) &&
 484            (port_type_array[1] == MLX4_PORT_TYPE_ETH)) {
 485                mlx4_warn(dev,
 486                          "Granular QoS per VF not supported with IB/Eth configuration\n");
 487                dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_QOS_VPP;
 488        }
 489
 490        dev->caps.max_counters = dev_cap->max_counters;
 491
 492        dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
 493        dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
 494                dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
 495                (1 << dev->caps.log_num_macs) *
 496                (1 << dev->caps.log_num_vlans) *
 497                dev->caps.num_ports;
 498        dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
 499
 500        if (dev_cap->dmfs_high_rate_qpn_base > 0 &&
 501            dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)
 502                dev->caps.dmfs_high_rate_qpn_base = dev_cap->dmfs_high_rate_qpn_base;
 503        else
 504                dev->caps.dmfs_high_rate_qpn_base =
 505                        dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
 506
 507        if (dev_cap->dmfs_high_rate_qpn_range > 0 &&
 508            dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) {
 509                dev->caps.dmfs_high_rate_qpn_range = dev_cap->dmfs_high_rate_qpn_range;
 510                dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DEFAULT;
 511                dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_FS_A0;
 512        } else {
 513                dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_NOT_SUPPORTED;
 514                dev->caps.dmfs_high_rate_qpn_base =
 515                        dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
 516                dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE;
 517        }
 518
 519        dev->caps.rl_caps = dev_cap->rl_caps;
 520
 521        dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
 522                dev->caps.dmfs_high_rate_qpn_range;
 523
 524        dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
 525                dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
 526                dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
 527                dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
 528
 529        dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
 530
 531        if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) {
 532                if (dev_cap->flags &
 533                    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
 534                        mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
 535                        dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
 536                        dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
 537                }
 538
 539                if (dev_cap->flags2 &
 540                    (MLX4_DEV_CAP_FLAG2_CQE_STRIDE |
 541                     MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) {
 542                        mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n");
 543                        dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
 544                        dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
 545                }
 546        }
 547
 548        if ((dev->caps.flags &
 549            (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
 550            mlx4_is_master(dev))
 551                dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
 552
 553        if (!mlx4_is_slave(dev)) {
 554                mlx4_enable_cqe_eqe_stride(dev);
 555                dev->caps.alloc_res_qp_mask =
 556                        (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) |
 557                        MLX4_RESERVE_A0_QP;
 558
 559                if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) &&
 560                    dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) {
 561                        mlx4_warn(dev, "Old device ETS support detected\n");
 562                        mlx4_warn(dev, "Consider upgrading device FW.\n");
 563                        dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG;
 564                }
 565
 566        } else {
 567                dev->caps.alloc_res_qp_mask = 0;
 568        }
 569
 570        mlx4_enable_ignore_fcs(dev);
 571
 572        return 0;
 573}
 574
 575static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev,
 576                                       enum pci_bus_speed *speed,
 577                                       enum pcie_link_width *width)
 578{
 579        u32 lnkcap1, lnkcap2;
 580        int err1, err2;
 581
 582#define  PCIE_MLW_CAP_SHIFT 4   /* start of MLW mask in link capabilities */
 583
 584        *speed = PCI_SPEED_UNKNOWN;
 585        *width = PCIE_LNK_WIDTH_UNKNOWN;
 586
 587        err1 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP,
 588                                          &lnkcap1);
 589        err2 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP2,
 590                                          &lnkcap2);
 591        if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */
 592                if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB)
 593                        *speed = PCIE_SPEED_8_0GT;
 594                else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB)
 595                        *speed = PCIE_SPEED_5_0GT;
 596                else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB)
 597                        *speed = PCIE_SPEED_2_5GT;
 598        }
 599        if (!err1) {
 600                *width = (lnkcap1 & PCI_EXP_LNKCAP_MLW) >> PCIE_MLW_CAP_SHIFT;
 601                if (!lnkcap2) { /* pre-r3.0 */
 602                        if (lnkcap1 & PCI_EXP_LNKCAP_SLS_5_0GB)
 603                                *speed = PCIE_SPEED_5_0GT;
 604                        else if (lnkcap1 & PCI_EXP_LNKCAP_SLS_2_5GB)
 605                                *speed = PCIE_SPEED_2_5GT;
 606                }
 607        }
 608
 609        if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN) {
 610                return err1 ? err1 :
 611                        err2 ? err2 : -EINVAL;
 612        }
 613        return 0;
 614}
 615
 616static void mlx4_check_pcie_caps(struct mlx4_dev *dev)
 617{
 618        enum pcie_link_width width, width_cap;
 619        enum pci_bus_speed speed, speed_cap;
 620        int err;
 621
 622#define PCIE_SPEED_STR(speed) \
 623        (speed == PCIE_SPEED_8_0GT ? "8.0GT/s" : \
 624         speed == PCIE_SPEED_5_0GT ? "5.0GT/s" : \
 625         speed == PCIE_SPEED_2_5GT ? "2.5GT/s" : \
 626         "Unknown")
 627
 628        err = mlx4_get_pcie_dev_link_caps(dev, &speed_cap, &width_cap);
 629        if (err) {
 630                mlx4_warn(dev,
 631                          "Unable to determine PCIe device BW capabilities\n");
 632                return;
 633        }
 634
 635        err = pcie_get_minimum_link(dev->persist->pdev, &speed, &width);
 636        if (err || speed == PCI_SPEED_UNKNOWN ||
 637            width == PCIE_LNK_WIDTH_UNKNOWN) {
 638                mlx4_warn(dev,
 639                          "Unable to determine PCI device chain minimum BW\n");
 640                return;
 641        }
 642
 643        if (width != width_cap || speed != speed_cap)
 644                mlx4_warn(dev,
 645                          "PCIe BW is different than device's capability\n");
 646
 647        mlx4_info(dev, "PCIe link speed is %s, device supports %s\n",
 648                  PCIE_SPEED_STR(speed), PCIE_SPEED_STR(speed_cap));
 649        mlx4_info(dev, "PCIe link width is x%d, device supports x%d\n",
 650                  width, width_cap);
 651        return;
 652}
 653
 654/*The function checks if there are live vf, return the num of them*/
 655static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
 656{
 657        struct mlx4_priv *priv = mlx4_priv(dev);
 658        struct mlx4_slave_state *s_state;
 659        int i;
 660        int ret = 0;
 661
 662        for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
 663                s_state = &priv->mfunc.master.slave_state[i];
 664                if (s_state->active && s_state->last_cmd !=
 665                    MLX4_COMM_CMD_RESET) {
 666                        mlx4_warn(dev, "%s: slave: %d is still active\n",
 667                                  __func__, i);
 668                        ret++;
 669                }
 670        }
 671        return ret;
 672}
 673
 674int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
 675{
 676        u32 qk = MLX4_RESERVED_QKEY_BASE;
 677
 678        if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
 679            qpn < dev->phys_caps.base_proxy_sqpn)
 680                return -EINVAL;
 681
 682        if (qpn >= dev->phys_caps.base_tunnel_sqpn)
 683                /* tunnel qp */
 684                qk += qpn - dev->phys_caps.base_tunnel_sqpn;
 685        else
 686                qk += qpn - dev->phys_caps.base_proxy_sqpn;
 687        *qkey = qk;
 688        return 0;
 689}
 690EXPORT_SYMBOL(mlx4_get_parav_qkey);
 691
 692void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
 693{
 694        struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
 695
 696        if (!mlx4_is_master(dev))
 697                return;
 698
 699        priv->virt2phys_pkey[slave][port - 1][i] = val;
 700}
 701EXPORT_SYMBOL(mlx4_sync_pkey_table);
 702
 703void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
 704{
 705        struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
 706
 707        if (!mlx4_is_master(dev))
 708                return;
 709
 710        priv->slave_node_guids[slave] = guid;
 711}
 712EXPORT_SYMBOL(mlx4_put_slave_node_guid);
 713
 714__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
 715{
 716        struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
 717
 718        if (!mlx4_is_master(dev))
 719                return 0;
 720
 721        return priv->slave_node_guids[slave];
 722}
 723EXPORT_SYMBOL(mlx4_get_slave_node_guid);
 724
 725int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
 726{
 727        struct mlx4_priv *priv = mlx4_priv(dev);
 728        struct mlx4_slave_state *s_slave;
 729
 730        if (!mlx4_is_master(dev))
 731                return 0;
 732
 733        s_slave = &priv->mfunc.master.slave_state[slave];
 734        return !!s_slave->active;
 735}
 736EXPORT_SYMBOL(mlx4_is_slave_active);
 737
 738static void slave_adjust_steering_mode(struct mlx4_dev *dev,
 739                                       struct mlx4_dev_cap *dev_cap,
 740                                       struct mlx4_init_hca_param *hca_param)
 741{
 742        dev->caps.steering_mode = hca_param->steering_mode;
 743        if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
 744                dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
 745                dev->caps.fs_log_max_ucast_qp_range_size =
 746                        dev_cap->fs_log_max_ucast_qp_range_size;
 747        } else
 748                dev->caps.num_qp_per_mgm =
 749                        4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);
 750
 751        mlx4_dbg(dev, "Steering mode is: %s\n",
 752                 mlx4_steering_mode_str(dev->caps.steering_mode));
 753}
 754
 755static int mlx4_slave_cap(struct mlx4_dev *dev)
 756{
 757        int                        err;
 758        u32                        page_size;
 759        struct mlx4_dev_cap        dev_cap;
 760        struct mlx4_func_cap       func_cap;
 761        struct mlx4_init_hca_param hca_param;
 762        u8                         i;
 763
 764        memset(&hca_param, 0, sizeof(hca_param));
 765        err = mlx4_QUERY_HCA(dev, &hca_param);
 766        if (err) {
 767                mlx4_err(dev, "QUERY_HCA command failed, aborting\n");
 768                return err;
 769        }
 770
 771        /* fail if the hca has an unknown global capability
 772         * at this time global_caps should be always zeroed
 773         */
 774        if (hca_param.global_caps) {
 775                mlx4_err(dev, "Unknown hca global capabilities\n");
 776                return -ENOSYS;
 777        }
 778
 779        mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz;
 780
 781        dev->caps.hca_core_clock = hca_param.hca_core_clock;
 782
 783        memset(&dev_cap, 0, sizeof(dev_cap));
 784        dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp;
 785        err = mlx4_dev_cap(dev, &dev_cap);
 786        if (err) {
 787                mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
 788                return err;
 789        }
 790
 791        err = mlx4_QUERY_FW(dev);
 792        if (err)
 793                mlx4_err(dev, "QUERY_FW command failed: could not get FW version\n");
 794
 795        page_size = ~dev->caps.page_size_cap + 1;
 796        mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
 797        if (page_size > PAGE_SIZE) {
 798                mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
 799                         page_size, PAGE_SIZE);
 800                return -ENODEV;
 801        }
 802
 803        /* slave gets uar page size from QUERY_HCA fw command */
 804        dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);
 805
 806        /* TODO: relax this assumption */
 807        if (dev->caps.uar_page_size != PAGE_SIZE) {
 808                mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %ld\n",
 809                         dev->caps.uar_page_size, PAGE_SIZE);
 810                return -ENODEV;
 811        }
 812
 813        memset(&func_cap, 0, sizeof(func_cap));
 814        err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
 815        if (err) {
 816                mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n",
 817                         err);
 818                return err;
 819        }
 820
 821        if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
 822            PF_CONTEXT_BEHAVIOUR_MASK) {
 823                mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n",
 824                         func_cap.pf_context_behaviour, PF_CONTEXT_BEHAVIOUR_MASK);
 825                return -ENOSYS;
 826        }
 827
 828        dev->caps.num_ports             = func_cap.num_ports;
 829        dev->quotas.qp                  = func_cap.qp_quota;
 830        dev->quotas.srq                 = func_cap.srq_quota;
 831        dev->quotas.cq                  = func_cap.cq_quota;
 832        dev->quotas.mpt                 = func_cap.mpt_quota;
 833        dev->quotas.mtt                 = func_cap.mtt_quota;
 834        dev->caps.num_qps               = 1 << hca_param.log_num_qps;
 835        dev->caps.num_srqs              = 1 << hca_param.log_num_srqs;
 836        dev->caps.num_cqs               = 1 << hca_param.log_num_cqs;
 837        dev->caps.num_mpts              = 1 << hca_param.log_mpt_sz;
 838        dev->caps.num_eqs               = func_cap.max_eq;
 839        dev->caps.reserved_eqs          = func_cap.reserved_eq;
 840        dev->caps.reserved_lkey         = func_cap.reserved_lkey;
 841        dev->caps.num_pds               = MLX4_NUM_PDS;
 842        dev->caps.num_mgms              = 0;
 843        dev->caps.num_amgms             = 0;
 844
 845        if (dev->caps.num_ports > MLX4_MAX_PORTS) {
 846                mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
 847                         dev->caps.num_ports, MLX4_MAX_PORTS);
 848                return -ENODEV;
 849        }
 850
 851        dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
 852        dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 853        dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 854        dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 855        dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 856
 857        if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
 858            !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy ||
 859            !dev->caps.qp0_qkey) {
 860                err = -ENOMEM;
 861                goto err_mem;
 862        }
 863
 864        for (i = 1; i <= dev->caps.num_ports; ++i) {
 865                err = mlx4_QUERY_FUNC_CAP(dev, i, &func_cap);
 866                if (err) {
 867                        mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n",
 868                                 i, err);
 869                        goto err_mem;
 870                }
 871                dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey;
 872                dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
 873                dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
 874                dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
 875                dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
 876                dev->caps.port_mask[i] = dev->caps.port_type[i];
 877                dev->caps.phys_port_id[i] = func_cap.phys_port_id;
 878                if (mlx4_get_slave_pkey_gid_tbl_len(dev, i,
 879                                                    &dev->caps.gid_table_len[i],
 880                                                    &dev->caps.pkey_table_len[i]))
 881                        goto err_mem;
 882        }
 883
 884        if (dev->caps.uar_page_size * (dev->caps.num_uars -
 885                                       dev->caps.reserved_uars) >
 886                                       pci_resource_len(dev->persist->pdev,
 887                                                        2)) {
 888                mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
 889                         dev->caps.uar_page_size * dev->caps.num_uars,
 890                         (unsigned long long)
 891                         pci_resource_len(dev->persist->pdev, 2));
 892                goto err_mem;
 893        }
 894
 895        if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
 896                dev->caps.eqe_size   = 64;
 897                dev->caps.eqe_factor = 1;
 898        } else {
 899                dev->caps.eqe_size   = 32;
 900                dev->caps.eqe_factor = 0;
 901        }
 902
 903        if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
 904                dev->caps.cqe_size   = 64;
 905                dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
 906        } else {
 907                dev->caps.cqe_size   = 32;
 908        }
 909
 910        if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
 911                dev->caps.eqe_size = hca_param.eqe_size;
 912                dev->caps.eqe_factor = 0;
 913        }
 914
 915        if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
 916                dev->caps.cqe_size = hca_param.cqe_size;
 917                /* User still need to know when CQE > 32B */
 918                dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
 919        }
 920
 921        dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
 922        mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
 923
 924        slave_adjust_steering_mode(dev, &dev_cap, &hca_param);
 925        mlx4_dbg(dev, "RSS support for IP fragments is %s\n",
 926                 hca_param.rss_ip_frags ? "on" : "off");
 927
 928        if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
 929            dev->caps.bf_reg_size)
 930                dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;
 931
 932        if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP)
 933                dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP;
 934
 935        return 0;
 936
 937err_mem:
 938        kfree(dev->caps.qp0_qkey);
 939        kfree(dev->caps.qp0_tunnel);
 940        kfree(dev->caps.qp0_proxy);
 941        kfree(dev->caps.qp1_tunnel);
 942        kfree(dev->caps.qp1_proxy);
 943        dev->caps.qp0_qkey = NULL;
 944        dev->caps.qp0_tunnel = NULL;
 945        dev->caps.qp0_proxy = NULL;
 946        dev->caps.qp1_tunnel = NULL;
 947        dev->caps.qp1_proxy = NULL;
 948
 949        return err;
 950}
 951
 952static void mlx4_request_modules(struct mlx4_dev *dev)
 953{
 954        int port;
 955        int has_ib_port = false;
 956        int has_eth_port = false;
 957#define EN_DRV_NAME     "mlx4_en"
 958#define IB_DRV_NAME     "mlx4_ib"
 959
 960        for (port = 1; port <= dev->caps.num_ports; port++) {
 961                if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
 962                        has_ib_port = true;
 963                else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
 964                        has_eth_port = true;
 965        }
 966
 967        if (has_eth_port)
 968                request_module_nowait(EN_DRV_NAME);
 969        if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
 970                request_module_nowait(IB_DRV_NAME);
 971}
 972
 973/*
 974 * Change the port configuration of the device.
 975 * Every user of this function must hold the port mutex.
 976 */
 977int mlx4_change_port_types(struct mlx4_dev *dev,
 978                           enum mlx4_port_type *port_types)
 979{
 980        int err = 0;
 981        int change = 0;
 982        int port;
 983
 984        for (port = 0; port <  dev->caps.num_ports; port++) {
 985                /* Change the port type only if the new type is different
 986                 * from the current, and not set to Auto */
 987                if (port_types[port] != dev->caps.port_type[port + 1])
 988                        change = 1;
 989        }
 990        if (change) {
 991                mlx4_unregister_device(dev);
 992                for (port = 1; port <= dev->caps.num_ports; port++) {
 993                        mlx4_CLOSE_PORT(dev, port);
 994                        dev->caps.port_type[port] = port_types[port - 1];
 995                        err = mlx4_SET_PORT(dev, port, -1);
 996                        if (err) {
 997                                mlx4_err(dev, "Failed to set port %d, aborting\n",
 998                                         port);
 999                                goto out;
1000                        }
1001                }
1002                mlx4_set_port_mask(dev);
1003                err = mlx4_register_device(dev);
1004                if (err) {
1005                        mlx4_err(dev, "Failed to register device\n");
1006                        goto out;
1007                }
1008                mlx4_request_modules(dev);
1009        }
1010
1011out:
1012        return err;
1013}
1014
1015static ssize_t show_port_type(struct device *dev,
1016                              struct device_attribute *attr,
1017                              char *buf)
1018{
1019        struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1020                                                   port_attr);
1021        struct mlx4_dev *mdev = info->dev;
1022        char type[8];
1023
1024        sprintf(type, "%s",
1025                (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
1026                "ib" : "eth");
1027        if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
1028                sprintf(buf, "auto (%s)\n", type);
1029        else
1030                sprintf(buf, "%s\n", type);
1031
1032        return strlen(buf);
1033}
1034
1035static ssize_t set_port_type(struct device *dev,
1036                             struct device_attribute *attr,
1037                             const char *buf, size_t count)
1038{
1039        struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1040                                                   port_attr);
1041        struct mlx4_dev *mdev = info->dev;
1042        struct mlx4_priv *priv = mlx4_priv(mdev);
1043        enum mlx4_port_type types[MLX4_MAX_PORTS];
1044        enum mlx4_port_type new_types[MLX4_MAX_PORTS];
1045        static DEFINE_MUTEX(set_port_type_mutex);
1046        int i;
1047        int err = 0;
1048
1049        mutex_lock(&set_port_type_mutex);
1050
1051        if (!strcmp(buf, "ib\n"))
1052                info->tmp_type = MLX4_PORT_TYPE_IB;
1053        else if (!strcmp(buf, "eth\n"))
1054                info->tmp_type = MLX4_PORT_TYPE_ETH;
1055        else if (!strcmp(buf, "auto\n"))
1056                info->tmp_type = MLX4_PORT_TYPE_AUTO;
1057        else {
1058                mlx4_err(mdev, "%s is not supported port type\n", buf);
1059                err = -EINVAL;
1060                goto err_out;
1061        }
1062
1063        mlx4_stop_sense(mdev);
1064        mutex_lock(&priv->port_mutex);
1065        /* Possible type is always the one that was delivered */
1066        mdev->caps.possible_type[info->port] = info->tmp_type;
1067
1068        for (i = 0; i < mdev->caps.num_ports; i++) {
1069                types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
1070                                        mdev->caps.possible_type[i+1];
1071                if (types[i] == MLX4_PORT_TYPE_AUTO)
1072                        types[i] = mdev->caps.port_type[i+1];
1073        }
1074
1075        if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
1076            !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
1077                for (i = 1; i <= mdev->caps.num_ports; i++) {
1078                        if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
1079                                mdev->caps.possible_type[i] = mdev->caps.port_type[i];
1080                                err = -EINVAL;
1081                        }
1082                }
1083        }
1084        if (err) {
1085                mlx4_err(mdev, "Auto sensing is not supported on this HCA. Set only 'eth' or 'ib' for both ports (should be the same)\n");
1086                goto out;
1087        }
1088
1089        mlx4_do_sense_ports(mdev, new_types, types);
1090
1091        err = mlx4_check_port_params(mdev, new_types);
1092        if (err)
1093                goto out;
1094
1095        /* We are about to apply the changes after the configuration
1096         * was verified, no need to remember the temporary types
1097         * any more */
1098        for (i = 0; i < mdev->caps.num_ports; i++)
1099                priv->port[i + 1].tmp_type = 0;
1100
1101        err = mlx4_change_port_types(mdev, new_types);
1102
1103out:
1104        mlx4_start_sense(mdev);
1105        mutex_unlock(&priv->port_mutex);
1106err_out:
1107        mutex_unlock(&set_port_type_mutex);
1108
1109        return err ? err : count;
1110}
1111
1112enum ibta_mtu {
1113        IB_MTU_256  = 1,
1114        IB_MTU_512  = 2,
1115        IB_MTU_1024 = 3,
1116        IB_MTU_2048 = 4,
1117        IB_MTU_4096 = 5
1118};
1119
1120static inline int int_to_ibta_mtu(int mtu)
1121{
1122        switch (mtu) {
1123        case 256:  return IB_MTU_256;
1124        case 512:  return IB_MTU_512;
1125        case 1024: return IB_MTU_1024;
1126        case 2048: return IB_MTU_2048;
1127        case 4096: return IB_MTU_4096;
1128        default: return -1;
1129        }
1130}
1131
1132static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
1133{
1134        switch (mtu) {
1135        case IB_MTU_256:  return  256;
1136        case IB_MTU_512:  return  512;
1137        case IB_MTU_1024: return 1024;
1138        case IB_MTU_2048: return 2048;
1139        case IB_MTU_4096: return 4096;
1140        default: return -1;
1141        }
1142}
1143
1144static ssize_t show_port_ib_mtu(struct device *dev,
1145                             struct device_attribute *attr,
1146                             char *buf)
1147{
1148        struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1149                                                   port_mtu_attr);
1150        struct mlx4_dev *mdev = info->dev;
1151
1152        if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
1153                mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1154
1155        sprintf(buf, "%d\n",
1156                        ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
1157        return strlen(buf);
1158}
1159
1160static ssize_t set_port_ib_mtu(struct device *dev,
1161                             struct device_attribute *attr,
1162                             const char *buf, size_t count)
1163{
1164        struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1165                                                   port_mtu_attr);
1166        struct mlx4_dev *mdev = info->dev;
1167        struct mlx4_priv *priv = mlx4_priv(mdev);
1168        int err, port, mtu, ibta_mtu = -1;
1169
1170        if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
1171                mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1172                return -EINVAL;
1173        }
1174
1175        err = kstrtoint(buf, 0, &mtu);
1176        if (!err)
1177                ibta_mtu = int_to_ibta_mtu(mtu);
1178
1179        if (err || ibta_mtu < 0) {
1180                mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
1181                return -EINVAL;
1182        }
1183
1184        mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
1185
1186        mlx4_stop_sense(mdev);
1187        mutex_lock(&priv->port_mutex);
1188        mlx4_unregister_device(mdev);
1189        for (port = 1; port <= mdev->caps.num_ports; port++) {
1190                mlx4_CLOSE_PORT(mdev, port);
1191                err = mlx4_SET_PORT(mdev, port, -1);
1192                if (err) {
1193                        mlx4_err(mdev, "Failed to set port %d, aborting\n",
1194                                 port);
1195                        goto err_set_port;
1196                }
1197        }
1198        err = mlx4_register_device(mdev);
1199err_set_port:
1200        mutex_unlock(&priv->port_mutex);
1201        mlx4_start_sense(mdev);
1202        return err ? err : count;
1203}
1204
1205int mlx4_bond(struct mlx4_dev *dev)
1206{
1207        int ret = 0;
1208        struct mlx4_priv *priv = mlx4_priv(dev);
1209
1210        mutex_lock(&priv->bond_mutex);
1211
1212        if (!mlx4_is_bonded(dev))
1213                ret = mlx4_do_bond(dev, true);
1214        else
1215                ret = 0;
1216
1217        mutex_unlock(&priv->bond_mutex);
1218        if (ret)
1219                mlx4_err(dev, "Failed to bond device: %d\n", ret);
1220        else
1221                mlx4_dbg(dev, "Device is bonded\n");
1222        return ret;
1223}
1224EXPORT_SYMBOL_GPL(mlx4_bond);
1225
1226int mlx4_unbond(struct mlx4_dev *dev)
1227{
1228        int ret = 0;
1229        struct mlx4_priv *priv = mlx4_priv(dev);
1230
1231        mutex_lock(&priv->bond_mutex);
1232
1233        if (mlx4_is_bonded(dev))
1234                ret = mlx4_do_bond(dev, false);
1235
1236        mutex_unlock(&priv->bond_mutex);
1237        if (ret)
1238                mlx4_err(dev, "Failed to unbond device: %d\n", ret);
1239        else
1240                mlx4_dbg(dev, "Device is unbonded\n");
1241        return ret;
1242}
1243EXPORT_SYMBOL_GPL(mlx4_unbond);
1244
1245
1246int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p)
1247{
1248        u8 port1 = v2p->port1;
1249        u8 port2 = v2p->port2;
1250        struct mlx4_priv *priv = mlx4_priv(dev);
1251        int err;
1252
1253        if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
1254                return -ENOTSUPP;
1255
1256        mutex_lock(&priv->bond_mutex);
1257
1258        /* zero means keep current mapping for this port */
1259        if (port1 == 0)
1260                port1 = priv->v2p.port1;
1261        if (port2 == 0)
1262                port2 = priv->v2p.port2;
1263
1264        if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) ||
1265            (port2 < 1) || (port2 > MLX4_MAX_PORTS) ||
1266            (port1 == 2 && port2 == 1)) {
1267                /* besides boundary checks cross mapping makes
1268                 * no sense and therefore not allowed */
1269                err = -EINVAL;
1270        } else if ((port1 == priv->v2p.port1) &&
1271                 (port2 == priv->v2p.port2)) {
1272                err = 0;
1273        } else {
1274                err = mlx4_virt2phy_port_map(dev, port1, port2);
1275                if (!err) {
1276                        mlx4_dbg(dev, "port map changed: [%d][%d]\n",
1277                                 port1, port2);
1278                        priv->v2p.port1 = port1;
1279                        priv->v2p.port2 = port2;
1280                } else {
1281                        mlx4_err(dev, "Failed to change port mape: %d\n", err);
1282                }
1283        }
1284
1285        mutex_unlock(&priv->bond_mutex);
1286        return err;
1287}
1288EXPORT_SYMBOL_GPL(mlx4_port_map_set);
1289
1290static int mlx4_load_fw(struct mlx4_dev *dev)
1291{
1292        struct mlx4_priv *priv = mlx4_priv(dev);
1293        int err;
1294
1295        priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
1296                                         GFP_HIGHUSER | __GFP_NOWARN, 0);
1297        if (!priv->fw.fw_icm) {
1298                mlx4_err(dev, "Couldn't allocate FW area, aborting\n");
1299                return -ENOMEM;
1300        }
1301
1302        err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
1303        if (err) {
1304                mlx4_err(dev, "MAP_FA command failed, aborting\n");
1305                goto err_free;
1306        }
1307
1308        err = mlx4_RUN_FW(dev);
1309        if (err) {
1310                mlx4_err(dev, "RUN_FW command failed, aborting\n");
1311                goto err_unmap_fa;
1312        }
1313
1314        return 0;
1315
1316err_unmap_fa:
1317        mlx4_UNMAP_FA(dev);
1318
1319err_free:
1320        mlx4_free_icm(dev, priv->fw.fw_icm, 0);
1321        return err;
1322}
1323
1324static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
1325                                int cmpt_entry_sz)
1326{
1327        struct mlx4_priv *priv = mlx4_priv(dev);
1328        int err;
1329        int num_eqs;
1330
1331        err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
1332                                  cmpt_base +
1333                                  ((u64) (MLX4_CMPT_TYPE_QP *
1334                                          cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1335                                  cmpt_entry_sz, dev->caps.num_qps,
1336                                  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1337                                  0, 0);
1338        if (err)
1339                goto err;
1340
1341        err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
1342                                  cmpt_base +
1343                                  ((u64) (MLX4_CMPT_TYPE_SRQ *
1344                                          cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1345                                  cmpt_entry_sz, dev->caps.num_srqs,
1346                                  dev->caps.reserved_srqs, 0, 0);
1347        if (err)
1348                goto err_qp;
1349
1350        err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
1351                                  cmpt_base +
1352                                  ((u64) (MLX4_CMPT_TYPE_CQ *
1353                                          cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1354                                  cmpt_entry_sz, dev->caps.num_cqs,
1355                                  dev->caps.reserved_cqs, 0, 0);
1356        if (err)
1357                goto err_srq;
1358
1359        num_eqs = dev->phys_caps.num_phys_eqs;
1360        err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
1361                                  cmpt_base +
1362                                  ((u64) (MLX4_CMPT_TYPE_EQ *
1363                                          cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1364                                  cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
1365        if (err)
1366                goto err_cq;
1367
1368        return 0;
1369
1370err_cq:
1371        mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1372
1373err_srq:
1374        mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1375
1376err_qp:
1377        mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1378
1379err:
1380        return err;
1381}
1382
1383static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
1384                         struct mlx4_init_hca_param *init_hca, u64 icm_size)
1385{
1386        struct mlx4_priv *priv = mlx4_priv(dev);
1387        u64 aux_pages;
1388        int num_eqs;
1389        int err;
1390
1391        err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
1392        if (err) {
1393                mlx4_err(dev, "SET_ICM_SIZE command failed, aborting\n");
1394                return err;
1395        }
1396
1397        mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory\n",
1398                 (unsigned long long) icm_size >> 10,
1399                 (unsigned long long) aux_pages << 2);
1400
1401        priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
1402                                          GFP_HIGHUSER | __GFP_NOWARN, 0);
1403        if (!priv->fw.aux_icm) {
1404                mlx4_err(dev, "Couldn't allocate aux memory, aborting\n");
1405                return -ENOMEM;
1406        }
1407
1408        err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
1409        if (err) {
1410                mlx4_err(dev, "MAP_ICM_AUX command failed, aborting\n");
1411                goto err_free_aux;
1412        }
1413
1414        err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
1415        if (err) {
1416                mlx4_err(dev, "Failed to map cMPT context memory, aborting\n");
1417                goto err_unmap_aux;
1418        }
1419
1420
1421        num_eqs = dev->phys_caps.num_phys_eqs;
1422        err = mlx4_init_icm_table(dev, &priv->eq_table.table,
1423                                  init_hca->eqc_base, dev_cap->eqc_entry_sz,
1424                                  num_eqs, num_eqs, 0, 0);
1425        if (err) {
1426                mlx4_err(dev, "Failed to map EQ context memory, aborting\n");
1427                goto err_unmap_cmpt;
1428        }
1429
1430        /*
1431         * Reserved MTT entries must be aligned up to a cacheline
1432         * boundary, since the FW will write to them, while the driver
1433         * writes to all other MTT entries. (The variable
1434         * dev->caps.mtt_entry_sz below is really the MTT segment
1435         * size, not the raw entry size)
1436         */
1437        dev->caps.reserved_mtts =
1438                ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
1439                      dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
1440
1441        err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
1442                                  init_hca->mtt_base,
1443                                  dev->caps.mtt_entry_sz,
1444                                  dev->caps.num_mtts,
1445                                  dev->caps.reserved_mtts, 1, 0);
1446        if (err) {
1447                mlx4_err(dev, "Failed to map MTT context memory, aborting\n");
1448                goto err_unmap_eq;
1449        }
1450
1451        err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
1452                                  init_hca->dmpt_base,
1453                                  dev_cap->dmpt_entry_sz,
1454                                  dev->caps.num_mpts,
1455                                  dev->caps.reserved_mrws, 1, 1);
1456        if (err) {
1457                mlx4_err(dev, "Failed to map dMPT context memory, aborting\n");
1458                goto err_unmap_mtt;
1459        }
1460
1461        err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
1462                                  init_hca->qpc_base,
1463                                  dev_cap->qpc_entry_sz,
1464                                  dev->caps.num_qps,
1465                                  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1466                                  0, 0);
1467        if (err) {
1468                mlx4_err(dev, "Failed to map QP context memory, aborting\n");
1469                goto err_unmap_dmpt;
1470        }
1471
1472        err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
1473                                  init_hca->auxc_base,
1474                                  dev_cap->aux_entry_sz,
1475                                  dev->caps.num_qps,
1476                                  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1477                                  0, 0);
1478        if (err) {
1479                mlx4_err(dev, "Failed to map AUXC context memory, aborting\n");
1480                goto err_unmap_qp;
1481        }
1482
1483        err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
1484                                  init_hca->altc_base,
1485                                  dev_cap->altc_entry_sz,
1486                                  dev->caps.num_qps,
1487                                  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1488                                  0, 0);
1489        if (err) {
1490                mlx4_err(dev, "Failed to map ALTC context memory, aborting\n");
1491                goto err_unmap_auxc;
1492        }
1493
1494        err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
1495                                  init_hca->rdmarc_base,
1496                                  dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
1497                                  dev->caps.num_qps,
1498                                  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1499                                  0, 0);
1500        if (err) {
1501                mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
1502                goto err_unmap_altc;
1503        }
1504
1505        err = mlx4_init_icm_table(dev, &priv->cq_table.table,
1506                                  init_hca->cqc_base,
1507                                  dev_cap->cqc_entry_sz,
1508                                  dev->caps.num_cqs,
1509                                  dev->caps.reserved_cqs, 0, 0);
1510        if (err) {
1511                mlx4_err(dev, "Failed to map CQ context memory, aborting\n");
1512                goto err_unmap_rdmarc;
1513        }
1514
1515        err = mlx4_init_icm_table(dev, &priv->srq_table.table,
1516                                  init_hca->srqc_base,
1517                                  dev_cap->srq_entry_sz,
1518                                  dev->caps.num_srqs,
1519                                  dev->caps.reserved_srqs, 0, 0);
1520        if (err) {
1521                mlx4_err(dev, "Failed to map SRQ context memory, aborting\n");
1522                goto err_unmap_cq;
1523        }
1524
1525        /*
1526         * For flow steering device managed mode it is required to use
1527         * mlx4_init_icm_table. For B0 steering mode it's not strictly
1528         * required, but for simplicity just map the whole multicast
1529         * group table now.  The table isn't very big and it's a lot
1530         * easier than trying to track ref counts.
1531         */
1532        err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
1533                                  init_hca->mc_base,
1534                                  mlx4_get_mgm_entry_size(dev),
1535                                  dev->caps.num_mgms + dev->caps.num_amgms,
1536                                  dev->caps.num_mgms + dev->caps.num_amgms,
1537                                  0, 0);
1538        if (err) {
1539                mlx4_err(dev, "Failed to map MCG context memory, aborting\n");
1540                goto err_unmap_srq;
1541        }
1542
1543        return 0;
1544
1545err_unmap_srq:
1546        mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1547
1548err_unmap_cq:
1549        mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1550
1551err_unmap_rdmarc:
1552        mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1553
1554err_unmap_altc:
1555        mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1556
1557err_unmap_auxc:
1558        mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1559
1560err_unmap_qp:
1561        mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1562
1563err_unmap_dmpt:
1564        mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1565
1566err_unmap_mtt:
1567        mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1568
1569err_unmap_eq:
1570        mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1571
1572err_unmap_cmpt:
1573        mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1574        mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1575        mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1576        mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1577
1578err_unmap_aux:
1579        mlx4_UNMAP_ICM_AUX(dev);
1580
1581err_free_aux:
1582        mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1583
1584        return err;
1585}
1586
1587static void mlx4_free_icms(struct mlx4_dev *dev)
1588{
1589        struct mlx4_priv *priv = mlx4_priv(dev);
1590
1591        mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
1592        mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1593        mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1594        mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1595        mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1596        mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1597        mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1598        mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1599        mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1600        mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1601        mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1602        mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1603        mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1604        mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1605
1606        mlx4_UNMAP_ICM_AUX(dev);
1607        mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1608}
1609
1610static void mlx4_slave_exit(struct mlx4_dev *dev)
1611{
1612        struct mlx4_priv *priv = mlx4_priv(dev);
1613
1614        mutex_lock(&priv->cmd.slave_cmd_mutex);
1615        if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP,
1616                          MLX4_COMM_TIME))
1617                mlx4_warn(dev, "Failed to close slave function\n");
1618        mutex_unlock(&priv->cmd.slave_cmd_mutex);
1619}
1620
1621static int map_bf_area(struct mlx4_dev *dev)
1622{
1623        struct mlx4_priv *priv = mlx4_priv(dev);
1624        resource_size_t bf_start;
1625        resource_size_t bf_len;
1626        int err = 0;
1627
1628        if (!dev->caps.bf_reg_size)
1629                return -ENXIO;
1630
1631        bf_start = pci_resource_start(dev->persist->pdev, 2) +
1632                        (dev->caps.num_uars << PAGE_SHIFT);
1633        bf_len = pci_resource_len(dev->persist->pdev, 2) -
1634                        (dev->caps.num_uars << PAGE_SHIFT);
1635        priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
1636        if (!priv->bf_mapping)
1637                err = -ENOMEM;
1638
1639        return err;
1640}
1641
1642static void unmap_bf_area(struct mlx4_dev *dev)
1643{
1644        if (mlx4_priv(dev)->bf_mapping)
1645                io_mapping_free(mlx4_priv(dev)->bf_mapping);
1646}
1647
1648cycle_t mlx4_read_clock(struct mlx4_dev *dev)
1649{
1650        u32 clockhi, clocklo, clockhi1;
1651        cycle_t cycles;
1652        int i;
1653        struct mlx4_priv *priv = mlx4_priv(dev);
1654
1655        for (i = 0; i < 10; i++) {
1656                clockhi = swab32(readl(priv->clock_mapping));
1657                clocklo = swab32(readl(priv->clock_mapping + 4));
1658                clockhi1 = swab32(readl(priv->clock_mapping));
1659                if (clockhi == clockhi1)
1660                        break;
1661        }
1662
1663        cycles = (u64) clockhi << 32 | (u64) clocklo;
1664
1665        return cycles;
1666}
1667EXPORT_SYMBOL_GPL(mlx4_read_clock);
1668
1669
1670static int map_internal_clock(struct mlx4_dev *dev)
1671{
1672        struct mlx4_priv *priv = mlx4_priv(dev);
1673
1674        priv->clock_mapping =
1675                ioremap(pci_resource_start(dev->persist->pdev,
1676                                           priv->fw.clock_bar) +
1677                        priv->fw.clock_offset, MLX4_CLOCK_SIZE);
1678
1679        if (!priv->clock_mapping)
1680                return -ENOMEM;
1681
1682        return 0;
1683}
1684
1685int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
1686                                   struct mlx4_clock_params *params)
1687{
1688        struct mlx4_priv *priv = mlx4_priv(dev);
1689
1690        if (mlx4_is_slave(dev))
1691                return -ENOTSUPP;
1692
1693        if (!params)
1694                return -EINVAL;
1695
1696        params->bar = priv->fw.clock_bar;
1697        params->offset = priv->fw.clock_offset;
1698        params->size = MLX4_CLOCK_SIZE;
1699
1700        return 0;
1701}
1702EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params);
1703
1704static void unmap_internal_clock(struct mlx4_dev *dev)
1705{
1706        struct mlx4_priv *priv = mlx4_priv(dev);
1707
1708        if (priv->clock_mapping)
1709                iounmap(priv->clock_mapping);
1710}
1711
1712static void mlx4_close_hca(struct mlx4_dev *dev)
1713{
1714        unmap_internal_clock(dev);
1715        unmap_bf_area(dev);
1716        if (mlx4_is_slave(dev))
1717                mlx4_slave_exit(dev);
1718        else {
1719                mlx4_CLOSE_HCA(dev, 0);
1720                mlx4_free_icms(dev);
1721        }
1722}
1723
1724static void mlx4_close_fw(struct mlx4_dev *dev)
1725{
1726        if (!mlx4_is_slave(dev)) {
1727                mlx4_UNMAP_FA(dev);
1728                mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
1729        }
1730}
1731
1732static int mlx4_comm_check_offline(struct mlx4_dev *dev)
1733{
1734#define COMM_CHAN_OFFLINE_OFFSET 0x09
1735
1736        u32 comm_flags;
1737        u32 offline_bit;
1738        unsigned long end;
1739        struct mlx4_priv *priv = mlx4_priv(dev);
1740
1741        end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies;
1742        while (time_before(jiffies, end)) {
1743                comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
1744                                          MLX4_COMM_CHAN_FLAGS));
1745                offline_bit = (comm_flags &
1746                               (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
1747                if (!offline_bit)
1748                        return 0;
1749                /* There are cases as part of AER/Reset flow that PF needs
1750                 * around 100 msec to load. We therefore sleep for 100 msec
1751                 * to allow other tasks to make use of that CPU during this
1752                 * time interval.
1753                 */
1754                msleep(100);
1755        }
1756        mlx4_err(dev, "Communication channel is offline.\n");
1757        return -EIO;
1758}
1759
1760static void mlx4_reset_vf_support(struct mlx4_dev *dev)
1761{
1762#define COMM_CHAN_RST_OFFSET 0x1e
1763
1764        struct mlx4_priv *priv = mlx4_priv(dev);
1765        u32 comm_rst;
1766        u32 comm_caps;
1767
1768        comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm +
1769                                 MLX4_COMM_CHAN_CAPS));
1770        comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET));
1771
1772        if (comm_rst)
1773                dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET;
1774}
1775
1776static int mlx4_init_slave(struct mlx4_dev *dev)
1777{
1778        struct mlx4_priv *priv = mlx4_priv(dev);
1779        u64 dma = (u64) priv->mfunc.vhcr_dma;
1780        int ret_from_reset = 0;
1781        u32 slave_read;
1782        u32 cmd_channel_ver;
1783
1784        if (atomic_read(&pf_loading)) {
1785                mlx4_warn(dev, "PF is not ready - Deferring probe\n");
1786                return -EPROBE_DEFER;
1787        }
1788
1789        mutex_lock(&priv->cmd.slave_cmd_mutex);
1790        priv->cmd.max_cmds = 1;
1791        if (mlx4_comm_check_offline(dev)) {
1792                mlx4_err(dev, "PF is not responsive, skipping initialization\n");
1793                goto err_offline;
1794        }
1795
1796        mlx4_reset_vf_support(dev);
1797        mlx4_warn(dev, "Sending reset\n");
1798        ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
1799                                       MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME);
1800        /* if we are in the middle of flr the slave will try
1801         * NUM_OF_RESET_RETRIES times before leaving.*/
1802        if (ret_from_reset) {
1803                if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
1804                        mlx4_warn(dev, "slave is currently in the middle of FLR - Deferring probe\n");
1805                        mutex_unlock(&priv->cmd.slave_cmd_mutex);
1806                        return -EPROBE_DEFER;
1807                } else
1808                        goto err;
1809        }
1810
1811        /* check the driver version - the slave I/F revision
1812         * must match the master's */
1813        slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
1814        cmd_channel_ver = mlx4_comm_get_version();
1815
1816        if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
1817                MLX4_COMM_GET_IF_REV(slave_read)) {
1818                mlx4_err(dev, "slave driver version is not supported by the master\n");
1819                goto err;
1820        }
1821
1822        mlx4_warn(dev, "Sending vhcr0\n");
1823        if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
1824                             MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1825                goto err;
1826        if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
1827                             MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1828                goto err;
1829        if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
1830                             MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1831                goto err;
1832        if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma,
1833                          MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
1834                goto err;
1835
1836        mutex_unlock(&priv->cmd.slave_cmd_mutex);
1837        return 0;
1838
1839err:
1840        mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0);
1841err_offline:
1842        mutex_unlock(&priv->cmd.slave_cmd_mutex);
1843        return -EIO;
1844}
1845
1846static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
1847{
1848        int i;
1849
1850        for (i = 1; i <= dev->caps.num_ports; i++) {
1851                if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
1852                        dev->caps.gid_table_len[i] =
1853                                mlx4_get_slave_num_gids(dev, 0, i);
1854                else
1855                        dev->caps.gid_table_len[i] = 1;
1856                dev->caps.pkey_table_len[i] =
1857                        dev->phys_caps.pkey_phys_table_len[i] - 1;
1858        }
1859}
1860
1861static int choose_log_fs_mgm_entry_size(int qp_per_entry)
1862{
1863        int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
1864
1865        for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
1866              i++) {
1867                if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
1868                        break;
1869        }
1870
1871        return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
1872}
1873
1874static const char *dmfs_high_rate_steering_mode_str(int dmfs_high_steer_mode)
1875{
1876        switch (dmfs_high_steer_mode) {
1877        case MLX4_STEERING_DMFS_A0_DEFAULT:
1878                return "default performance";
1879
1880        case MLX4_STEERING_DMFS_A0_DYNAMIC:
1881                return "dynamic hybrid mode";
1882
1883        case MLX4_STEERING_DMFS_A0_STATIC:
1884                return "performance optimized for limited rule configuration (static)";
1885
1886        case MLX4_STEERING_DMFS_A0_DISABLE:
1887                return "disabled performance optimized steering";
1888
1889        case MLX4_STEERING_DMFS_A0_NOT_SUPPORTED:
1890                return "performance optimized steering not supported";
1891
1892        default:
1893                return "Unrecognized mode";
1894        }
1895}
1896
1897#define MLX4_DMFS_A0_STEERING                   (1UL << 2)
1898
1899static void choose_steering_mode(struct mlx4_dev *dev,
1900                                 struct mlx4_dev_cap *dev_cap)
1901{
1902        if (mlx4_log_num_mgm_entry_size <= 0) {
1903                if ((-mlx4_log_num_mgm_entry_size) & MLX4_DMFS_A0_STEERING) {
1904                        if (dev->caps.dmfs_high_steer_mode ==
1905                            MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
1906                                mlx4_err(dev, "DMFS high rate mode not supported\n");
1907                        else
1908                                dev->caps.dmfs_high_steer_mode =
1909                                        MLX4_STEERING_DMFS_A0_STATIC;
1910                }
1911        }
1912
1913        if (mlx4_log_num_mgm_entry_size <= 0 &&
1914            dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
1915            (!mlx4_is_mfunc(dev) ||
1916             (dev_cap->fs_max_num_qp_per_entry >=
1917             (dev->persist->num_vfs + 1))) &&
1918            choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
1919                MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
1920                dev->oper_log_mgm_entry_size =
1921                        choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
1922                dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
1923                dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
1924                dev->caps.fs_log_max_ucast_qp_range_size =
1925                        dev_cap->fs_log_max_ucast_qp_range_size;
1926        } else {
1927                if (dev->caps.dmfs_high_steer_mode !=
1928                    MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
1929                        dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DISABLE;
1930                if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
1931                    dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
1932                        dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
1933                else {
1934                        dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
1935
1936                        if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
1937                            dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
1938                                mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags set to use B0 steering - falling back to A0 steering mode\n");
1939                }
1940                dev->oper_log_mgm_entry_size =
1941                        mlx4_log_num_mgm_entry_size > 0 ?
1942                        mlx4_log_num_mgm_entry_size :
1943                        MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
1944                dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
1945        }
1946        mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, modparam log_num_mgm_entry_size = %d\n",
1947                 mlx4_steering_mode_str(dev->caps.steering_mode),
1948                 dev->oper_log_mgm_entry_size,
1949                 mlx4_log_num_mgm_entry_size);
1950}
1951
1952static void choose_tunnel_offload_mode(struct mlx4_dev *dev,
1953                                       struct mlx4_dev_cap *dev_cap)
1954{
1955        if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
1956            dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS)
1957                dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN;
1958        else
1959                dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE;
1960
1961        mlx4_dbg(dev, "Tunneling offload mode is: %s\n",  (dev->caps.tunnel_offload_mode
1962                 == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none");
1963}
1964
1965static int mlx4_validate_optimized_steering(struct mlx4_dev *dev)
1966{
1967        int i;
1968        struct mlx4_port_cap port_cap;
1969
1970        if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
1971                return -EINVAL;
1972
1973        for (i = 1; i <= dev->caps.num_ports; i++) {
1974                if (mlx4_dev_port(dev, i, &port_cap)) {
1975                        mlx4_err(dev,
1976                                 "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n");
1977                } else if ((dev->caps.dmfs_high_steer_mode !=
1978                            MLX4_STEERING_DMFS_A0_DEFAULT) &&
1979                           (port_cap.dmfs_optimized_state ==
1980                            !!(dev->caps.dmfs_high_steer_mode ==
1981                            MLX4_STEERING_DMFS_A0_DISABLE))) {
1982                        mlx4_err(dev,
1983                                 "DMFS high rate steer mode differ, driver requested %s but %s in FW.\n",
1984                                 dmfs_high_rate_steering_mode_str(
1985                                        dev->caps.dmfs_high_steer_mode),
1986                                 (port_cap.dmfs_optimized_state ?
1987                                        "enabled" : "disabled"));
1988                }
1989        }
1990
1991        return 0;
1992}
1993
1994static int mlx4_init_fw(struct mlx4_dev *dev)
1995{
1996        struct mlx4_mod_stat_cfg   mlx4_cfg;
1997        int err = 0;
1998
1999        if (!mlx4_is_slave(dev)) {
2000                err = mlx4_QUERY_FW(dev);
2001                if (err) {
2002                        if (err == -EACCES)
2003                                mlx4_info(dev, "non-primary physical function, skipping\n");
2004                        else
2005                                mlx4_err(dev, "QUERY_FW command failed, aborting\n");
2006                        return err;
2007                }
2008
2009                err = mlx4_load_fw(dev);
2010                if (err) {
2011                        mlx4_err(dev, "Failed to start FW, aborting\n");
2012                        return err;
2013                }
2014
2015                mlx4_cfg.log_pg_sz_m = 1;
2016                mlx4_cfg.log_pg_sz = 0;
2017                err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
2018                if (err)
2019                        mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
2020        }
2021
2022        return err;
2023}
2024
2025static int mlx4_init_hca(struct mlx4_dev *dev)
2026{
2027        struct mlx4_priv          *priv = mlx4_priv(dev);
2028        struct mlx4_adapter        adapter;
2029        struct mlx4_dev_cap        dev_cap;
2030        struct mlx4_profile        profile;
2031        struct mlx4_init_hca_param init_hca;
2032        u64 icm_size;
2033        struct mlx4_config_dev_params params;
2034        int err;
2035
2036        if (!mlx4_is_slave(dev)) {
2037                err = mlx4_dev_cap(dev, &dev_cap);
2038                if (err) {
2039                        mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
2040                        return err;
2041                }
2042
2043                choose_steering_mode(dev, &dev_cap);
2044                choose_tunnel_offload_mode(dev, &dev_cap);
2045
2046                if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC &&
2047                    mlx4_is_master(dev))
2048                        dev->caps.function_caps |= MLX4_FUNC_CAP_DMFS_A0_STATIC;
2049
2050                err = mlx4_get_phys_port_id(dev);
2051                if (err)
2052                        mlx4_err(dev, "Fail to get physical port id\n");
2053
2054                if (mlx4_is_master(dev))
2055                        mlx4_parav_master_pf_caps(dev);
2056
2057                if (mlx4_low_memory_profile()) {
2058                        mlx4_info(dev, "Running from within kdump kernel. Using low memory profile\n");
2059                        profile = low_mem_profile;
2060                } else {
2061                        profile = default_profile;
2062                }
2063                if (dev->caps.steering_mode ==
2064                    MLX4_STEERING_MODE_DEVICE_MANAGED)
2065                        profile.num_mcg = MLX4_FS_NUM_MCG;
2066
2067                icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
2068                                             &init_hca);
2069                if ((long long) icm_size < 0) {
2070                        err = icm_size;
2071                        return err;
2072                }
2073
2074                dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
2075
2076                init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
2077                init_hca.uar_page_sz = PAGE_SHIFT - 12;
2078                init_hca.mw_enabled = 0;
2079                if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
2080                    dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
2081                        init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE;
2082
2083                err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
2084                if (err)
2085                        return err;
2086
2087                err = mlx4_INIT_HCA(dev, &init_hca);
2088                if (err) {
2089                        mlx4_err(dev, "INIT_HCA command failed, aborting\n");
2090                        goto err_free_icm;
2091                }
2092
2093                if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
2094                        err = mlx4_query_func(dev, &dev_cap);
2095                        if (err < 0) {
2096                                mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
2097                                goto err_close;
2098                        } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
2099                                dev->caps.num_eqs = dev_cap.max_eqs;
2100                                dev->caps.reserved_eqs = dev_cap.reserved_eqs;
2101                                dev->caps.reserved_uars = dev_cap.reserved_uars;
2102                        }
2103                }
2104
2105                /*
2106                 * If TS is supported by FW
2107                 * read HCA frequency by QUERY_HCA command
2108                 */
2109                if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
2110                        memset(&init_hca, 0, sizeof(init_hca));
2111                        err = mlx4_QUERY_HCA(dev, &init_hca);
2112                        if (err) {
2113                                mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n");
2114                                dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2115                        } else {
2116                                dev->caps.hca_core_clock =
2117                                        init_hca.hca_core_clock;
2118                        }
2119
2120                        /* In case we got HCA frequency 0 - disable timestamping
2121                         * to avoid dividing by zero
2122                         */
2123                        if (!dev->caps.hca_core_clock) {
2124                                dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2125                                mlx4_err(dev,
2126                                         "HCA frequency is 0 - timestamping is not supported\n");
2127                        } else if (map_internal_clock(dev)) {
2128                                /*
2129                                 * Map internal clock,
2130                                 * in case of failure disable timestamping
2131                                 */
2132                                dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2133                                mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported\n");
2134                        }
2135                }
2136
2137                if (dev->caps.dmfs_high_steer_mode !=
2138                    MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) {
2139                        if (mlx4_validate_optimized_steering(dev))
2140                                mlx4_warn(dev, "Optimized steering validation failed\n");
2141
2142                        if (dev->caps.dmfs_high_steer_mode ==
2143                            MLX4_STEERING_DMFS_A0_DISABLE) {
2144                                dev->caps.dmfs_high_rate_qpn_base =
2145                                        dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
2146                                dev->caps.dmfs_high_rate_qpn_range =
2147                                        MLX4_A0_STEERING_TABLE_SIZE;
2148                        }
2149
2150                        mlx4_dbg(dev, "DMFS high rate steer mode is: %s\n",
2151                                 dmfs_high_rate_steering_mode_str(
2152                                        dev->caps.dmfs_high_steer_mode));
2153                }
2154        } else {
2155                err = mlx4_init_slave(dev);
2156                if (err) {
2157                        if (err != -EPROBE_DEFER)
2158                                mlx4_err(dev, "Failed to initialize slave\n");
2159                        return err;
2160                }
2161
2162                err = mlx4_slave_cap(dev);
2163                if (err) {
2164                        mlx4_err(dev, "Failed to obtain slave caps\n");
2165                        goto err_close;
2166                }
2167        }
2168
2169        if (map_bf_area(dev))
2170                mlx4_dbg(dev, "Failed to map blue flame area\n");
2171
2172        /*Only the master set the ports, all the rest got it from it.*/
2173        if (!mlx4_is_slave(dev))
2174                mlx4_set_port_mask(dev);
2175
2176        err = mlx4_QUERY_ADAPTER(dev, &adapter);
2177        if (err) {
2178                mlx4_err(dev, "QUERY_ADAPTER command failed, aborting\n");
2179                goto unmap_bf;
2180        }
2181
2182        /* Query CONFIG_DEV parameters */
2183        err = mlx4_config_dev_retrieval(dev, &params);
2184        if (err && err != -ENOTSUPP) {
2185                mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n");
2186        } else if (!err) {
2187                dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1;
2188                dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2;
2189        }
2190        priv->eq_table.inta_pin = adapter.inta_pin;
2191        memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
2192
2193        return 0;
2194
2195unmap_bf:
2196        unmap_internal_clock(dev);
2197        unmap_bf_area(dev);
2198
2199        if (mlx4_is_slave(dev)) {
2200                kfree(dev->caps.qp0_qkey);
2201                kfree(dev->caps.qp0_tunnel);
2202                kfree(dev->caps.qp0_proxy);
2203                kfree(dev->caps.qp1_tunnel);
2204                kfree(dev->caps.qp1_proxy);
2205        }
2206
2207err_close:
2208        if (mlx4_is_slave(dev))
2209                mlx4_slave_exit(dev);
2210        else
2211                mlx4_CLOSE_HCA(dev, 0);
2212
2213err_free_icm:
2214        if (!mlx4_is_slave(dev))
2215                mlx4_free_icms(dev);
2216
2217        return err;
2218}
2219
2220static int mlx4_init_counters_table(struct mlx4_dev *dev)
2221{
2222        struct mlx4_priv *priv = mlx4_priv(dev);
2223        int nent_pow2;
2224
2225        if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2226                return -ENOENT;
2227
2228        if (!dev->caps.max_counters)
2229                return -ENOSPC;
2230
2231        nent_pow2 = roundup_pow_of_two(dev->caps.max_counters);
2232        /* reserve last counter index for sink counter */
2233        return mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2,
2234                                nent_pow2 - 1, 0,
2235                                nent_pow2 - dev->caps.max_counters + 1);
2236}
2237
2238static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
2239{
2240        if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2241                return;
2242
2243        if (!dev->caps.max_counters)
2244                return;
2245
2246        mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
2247}
2248
2249static void mlx4_cleanup_default_counters(struct mlx4_dev *dev)
2250{
2251        struct mlx4_priv *priv = mlx4_priv(dev);
2252        int port;
2253
2254        for (port = 0; port < dev->caps.num_ports; port++)
2255                if (priv->def_counter[port] != -1)
2256                        mlx4_counter_free(dev,  priv->def_counter[port]);
2257}
2258
2259static int mlx4_allocate_default_counters(struct mlx4_dev *dev)
2260{
2261        struct mlx4_priv *priv = mlx4_priv(dev);
2262        int port, err = 0;
2263        u32 idx;
2264
2265        for (port = 0; port < dev->caps.num_ports; port++)
2266                priv->def_counter[port] = -1;
2267
2268        for (port = 0; port < dev->caps.num_ports; port++) {
2269                err = mlx4_counter_alloc(dev, &idx);
2270
2271                if (!err || err == -ENOSPC) {
2272                        priv->def_counter[port] = idx;
2273                } else if (err == -ENOENT) {
2274                        err = 0;
2275                        continue;
2276                } else if (mlx4_is_slave(dev) && err == -EINVAL) {
2277                        priv->def_counter[port] = MLX4_SINK_COUNTER_INDEX(dev);
2278                        mlx4_warn(dev, "can't allocate counter from old PF driver, using index %d\n",
2279                                  MLX4_SINK_COUNTER_INDEX(dev));
2280                        err = 0;
2281                } else {
2282                        mlx4_err(dev, "%s: failed to allocate default counter port %d err %d\n",
2283                                 __func__, port + 1, err);
2284                        mlx4_cleanup_default_counters(dev);
2285                        return err;
2286                }
2287
2288                mlx4_dbg(dev, "%s: default counter index %d for port %d\n",
2289                         __func__, priv->def_counter[port], port + 1);
2290        }
2291
2292        return err;
2293}
2294
2295int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
2296{
2297        struct mlx4_priv *priv = mlx4_priv(dev);
2298
2299        if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2300                return -ENOENT;
2301
2302        *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
2303        if (*idx == -1) {
2304                *idx = MLX4_SINK_COUNTER_INDEX(dev);
2305                return -ENOSPC;
2306        }
2307
2308        return 0;
2309}
2310
2311int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
2312{
2313        u64 out_param;
2314        int err;
2315
2316        if (mlx4_is_mfunc(dev)) {
2317                err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER,
2318                                   RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
2319                                   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
2320                if (!err)
2321                        *idx = get_param_l(&out_param);
2322
2323                return err;
2324        }
2325        return __mlx4_counter_alloc(dev, idx);
2326}
2327EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
2328
2329static int __mlx4_clear_if_stat(struct mlx4_dev *dev,
2330                                u8 counter_index)
2331{
2332        struct mlx4_cmd_mailbox *if_stat_mailbox;
2333        int err;
2334        u32 if_stat_in_mod = (counter_index & 0xff) | MLX4_QUERY_IF_STAT_RESET;
2335
2336        if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev);
2337        if (IS_ERR(if_stat_mailbox))
2338                return PTR_ERR(if_stat_mailbox);
2339
2340        err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0,
2341                           MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
2342                           MLX4_CMD_NATIVE);
2343
2344        mlx4_free_cmd_mailbox(dev, if_stat_mailbox);
2345        return err;
2346}
2347
2348void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2349{
2350        if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2351                return;
2352
2353        if (idx == MLX4_SINK_COUNTER_INDEX(dev))
2354                return;
2355
2356        __mlx4_clear_if_stat(dev, idx);
2357
2358        mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR);
2359        return;
2360}
2361
2362void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2363{
2364        u64 in_param = 0;
2365
2366        if (mlx4_is_mfunc(dev)) {
2367                set_param_l(&in_param, idx);
2368                mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE,
2369                         MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
2370                         MLX4_CMD_WRAPPED);
2371                return;
2372        }
2373        __mlx4_counter_free(dev, idx);
2374}
2375EXPORT_SYMBOL_GPL(mlx4_counter_free);
2376
2377int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port)
2378{
2379        struct mlx4_priv *priv = mlx4_priv(dev);
2380
2381        return priv->def_counter[port - 1];
2382}
2383EXPORT_SYMBOL_GPL(mlx4_get_default_counter_index);
2384
2385void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port)
2386{
2387        struct mlx4_priv *priv = mlx4_priv(dev);
2388
2389        priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2390}
2391EXPORT_SYMBOL_GPL(mlx4_set_admin_guid);
2392
2393__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port)
2394{
2395        struct mlx4_priv *priv = mlx4_priv(dev);
2396
2397        return priv->mfunc.master.vf_admin[entry].vport[port].guid;
2398}
2399EXPORT_SYMBOL_GPL(mlx4_get_admin_guid);
2400
2401void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port)
2402{
2403        struct mlx4_priv *priv = mlx4_priv(dev);
2404        __be64 guid;
2405
2406        /* hw GUID */
2407        if (entry == 0)
2408                return;
2409
2410        get_random_bytes((char *)&guid, sizeof(guid));
2411        guid &= ~(cpu_to_be64(1ULL << 56));
2412        guid |= cpu_to_be64(1ULL << 57);
2413        priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2414}
2415
2416static int mlx4_setup_hca(struct mlx4_dev *dev)
2417{
2418        struct mlx4_priv *priv = mlx4_priv(dev);
2419        int err;
2420        int port;
2421        __be32 ib_port_default_caps;
2422
2423        err = mlx4_init_uar_table(dev);
2424        if (err) {
2425                mlx4_err(dev, "Failed to initialize user access region table, aborting\n");
2426                 return err;
2427        }
2428
2429        err = mlx4_uar_alloc(dev, &priv->driver_uar);
2430        if (err) {
2431                mlx4_err(dev, "Failed to allocate driver access region, aborting\n");
2432                goto err_uar_table_free;
2433        }
2434
2435        priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
2436        if (!priv->kar) {
2437                mlx4_err(dev, "Couldn't map kernel access region, aborting\n");
2438                err = -ENOMEM;
2439                goto err_uar_free;
2440        }
2441
2442        err = mlx4_init_pd_table(dev);
2443        if (err) {
2444                mlx4_err(dev, "Failed to initialize protection domain table, aborting\n");
2445                goto err_kar_unmap;
2446        }
2447
2448        err = mlx4_init_xrcd_table(dev);
2449        if (err) {
2450                mlx4_err(dev, "Failed to initialize reliable connection domain table, aborting\n");
2451                goto err_pd_table_free;
2452        }
2453
2454        err = mlx4_init_mr_table(dev);
2455        if (err) {
2456                mlx4_err(dev, "Failed to initialize memory region table, aborting\n");
2457                goto err_xrcd_table_free;
2458        }
2459
2460        if (!mlx4_is_slave(dev)) {
2461                err = mlx4_init_mcg_table(dev);
2462                if (err) {
2463                        mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
2464                        goto err_mr_table_free;
2465                }
2466                err = mlx4_config_mad_demux(dev);
2467                if (err) {
2468                        mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
2469                        goto err_mcg_table_free;
2470                }
2471        }
2472
2473        err = mlx4_init_eq_table(dev);
2474        if (err) {
2475                mlx4_err(dev, "Failed to initialize event queue table, aborting\n");
2476                goto err_mcg_table_free;
2477        }
2478
2479        err = mlx4_cmd_use_events(dev);
2480        if (err) {
2481                mlx4_err(dev, "Failed to switch to event-driven firmware commands, aborting\n");
2482                goto err_eq_table_free;
2483        }
2484
2485        err = mlx4_NOP(dev);
2486        if (err) {
2487                if (dev->flags & MLX4_FLAG_MSI_X) {
2488                        mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n",
2489                                  priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
2490                        mlx4_warn(dev, "Trying again without MSI-X\n");
2491                } else {
2492                        mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n",
2493                                 priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
2494                        mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
2495                }
2496
2497                goto err_cmd_poll;
2498        }
2499
2500        mlx4_dbg(dev, "NOP command IRQ test passed\n");
2501
2502        err = mlx4_init_cq_table(dev);
2503        if (err) {
2504                mlx4_err(dev, "Failed to initialize completion queue table, aborting\n");
2505                goto err_cmd_poll;
2506        }
2507
2508        err = mlx4_init_srq_table(dev);
2509        if (err) {
2510                mlx4_err(dev, "Failed to initialize shared receive queue table, aborting\n");
2511                goto err_cq_table_free;
2512        }
2513
2514        err = mlx4_init_qp_table(dev);
2515        if (err) {
2516                mlx4_err(dev, "Failed to initialize queue pair table, aborting\n");
2517                goto err_srq_table_free;
2518        }
2519
2520        if (!mlx4_is_slave(dev)) {
2521                err = mlx4_init_counters_table(dev);
2522                if (err && err != -ENOENT) {
2523                        mlx4_err(dev, "Failed to initialize counters table, aborting\n");
2524                        goto err_qp_table_free;
2525                }
2526        }
2527
2528        err = mlx4_allocate_default_counters(dev);
2529        if (err) {
2530                mlx4_err(dev, "Failed to allocate default counters, aborting\n");
2531                goto err_counters_table_free;
2532        }
2533
2534        if (!mlx4_is_slave(dev)) {
2535                for (port = 1; port <= dev->caps.num_ports; port++) {
2536                        ib_port_default_caps = 0;
2537                        err = mlx4_get_port_ib_caps(dev, port,
2538                                                    &ib_port_default_caps);
2539                        if (err)
2540                                mlx4_warn(dev, "failed to get port %d default ib capabilities (%d). Continuing with caps = 0\n",
2541                                          port, err);
2542                        dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
2543
2544                        /* initialize per-slave default ib port capabilities */
2545                        if (mlx4_is_master(dev)) {
2546                                int i;
2547                                for (i = 0; i < dev->num_slaves; i++) {
2548                                        if (i == mlx4_master_func_num(dev))
2549                                                continue;
2550                                        priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
2551                                                ib_port_default_caps;
2552                                }
2553                        }
2554
2555                        if (mlx4_is_mfunc(dev))
2556                                dev->caps.port_ib_mtu[port] = IB_MTU_2048;
2557                        else
2558                                dev->caps.port_ib_mtu[port] = IB_MTU_4096;
2559
2560                        err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
2561                                            dev->caps.pkey_table_len[port] : -1);
2562                        if (err) {
2563                                mlx4_err(dev, "Failed to set port %d, aborting\n",
2564                                         port);
2565                                goto err_default_countes_free;
2566                        }
2567                }
2568        }
2569
2570        return 0;
2571
2572err_default_countes_free:
2573        mlx4_cleanup_default_counters(dev);
2574
2575err_counters_table_free:
2576        if (!mlx4_is_slave(dev))
2577                mlx4_cleanup_counters_table(dev);
2578
2579err_qp_table_free:
2580        mlx4_cleanup_qp_table(dev);
2581
2582err_srq_table_free:
2583        mlx4_cleanup_srq_table(dev);
2584
2585err_cq_table_free:
2586        mlx4_cleanup_cq_table(dev);
2587
2588err_cmd_poll:
2589        mlx4_cmd_use_polling(dev);
2590
2591err_eq_table_free:
2592        mlx4_cleanup_eq_table(dev);
2593
2594err_mcg_table_free:
2595        if (!mlx4_is_slave(dev))
2596                mlx4_cleanup_mcg_table(dev);
2597
2598err_mr_table_free:
2599        mlx4_cleanup_mr_table(dev);
2600
2601err_xrcd_table_free:
2602        mlx4_cleanup_xrcd_table(dev);
2603
2604err_pd_table_free:
2605        mlx4_cleanup_pd_table(dev);
2606
2607err_kar_unmap:
2608        iounmap(priv->kar);
2609
2610err_uar_free:
2611        mlx4_uar_free(dev, &priv->driver_uar);
2612
2613err_uar_table_free:
2614        mlx4_cleanup_uar_table(dev);
2615        return err;
2616}
2617
2618static int mlx4_init_affinity_hint(struct mlx4_dev *dev, int port, int eqn)
2619{
2620        int requested_cpu = 0;
2621        struct mlx4_priv *priv = mlx4_priv(dev);
2622        struct mlx4_eq *eq;
2623        int off = 0;
2624        int i;
2625
2626        if (eqn > dev->caps.num_comp_vectors)
2627                return -EINVAL;
2628
2629        for (i = 1; i < port; i++)
2630                off += mlx4_get_eqs_per_port(dev, i);
2631
2632        requested_cpu = eqn - off - !!(eqn > MLX4_EQ_ASYNC);
2633
2634        /* Meaning EQs are shared, and this call comes from the second port */
2635        if (requested_cpu < 0)
2636                return 0;
2637
2638        eq = &priv->eq_table.eq[eqn];
2639
2640        if (!zalloc_cpumask_var(&eq->affinity_mask, GFP_KERNEL))
2641                return -ENOMEM;
2642
2643        cpumask_set_cpu(requested_cpu, eq->affinity_mask);
2644
2645        return 0;
2646}
2647
2648static void mlx4_enable_msi_x(struct mlx4_dev *dev)
2649{
2650        struct mlx4_priv *priv = mlx4_priv(dev);
2651        struct msix_entry *entries;
2652        int i;
2653        int port = 0;
2654
2655        if (msi_x) {
2656                int nreq = dev->caps.num_ports * num_online_cpus() + 1;
2657
2658                nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
2659                             nreq);
2660
2661                entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
2662                if (!entries)
2663                        goto no_msi;
2664
2665                for (i = 0; i < nreq; ++i)
2666                        entries[i].entry = i;
2667
2668                nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2,
2669                                             nreq);
2670
2671                if (nreq < 0 || nreq < MLX4_EQ_ASYNC) {
2672                        kfree(entries);
2673                        goto no_msi;
2674                }
2675                /* 1 is reserved for events (asyncrounous EQ) */
2676                dev->caps.num_comp_vectors = nreq - 1;
2677
2678                priv->eq_table.eq[MLX4_EQ_ASYNC].irq = entries[0].vector;
2679                bitmap_zero(priv->eq_table.eq[MLX4_EQ_ASYNC].actv_ports.ports,
2680                            dev->caps.num_ports);
2681
2682                for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) {
2683                        if (i == MLX4_EQ_ASYNC)
2684                                continue;
2685
2686                        priv->eq_table.eq[i].irq =
2687                                entries[i + 1 - !!(i > MLX4_EQ_ASYNC)].vector;
2688
2689                        if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) {
2690                                bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
2691                                            dev->caps.num_ports);
2692                                /* We don't set affinity hint when there
2693                                 * aren't enough EQs
2694                                 */
2695                        } else {
2696                                set_bit(port,
2697                                        priv->eq_table.eq[i].actv_ports.ports);
2698                                if (mlx4_init_affinity_hint(dev, port + 1, i))
2699                                        mlx4_warn(dev, "Couldn't init hint cpumask for EQ %d\n",
2700                                                  i);
2701                        }
2702                        /* We divide the Eqs evenly between the two ports.
2703                         * (dev->caps.num_comp_vectors / dev->caps.num_ports)
2704                         * refers to the number of Eqs per port
2705                         * (i.e eqs_per_port). Theoretically, we would like to
2706                         * write something like (i + 1) % eqs_per_port == 0.
2707                         * However, since there's an asynchronous Eq, we have
2708                         * to skip over it by comparing this condition to
2709                         * !!((i + 1) > MLX4_EQ_ASYNC).
2710                         */
2711                        if ((dev->caps.num_comp_vectors > dev->caps.num_ports) &&
2712                            ((i + 1) %
2713                             (dev->caps.num_comp_vectors / dev->caps.num_ports)) ==
2714                            !!((i + 1) > MLX4_EQ_ASYNC))
2715                                /* If dev->caps.num_comp_vectors < dev->caps.num_ports,
2716                                 * everything is shared anyway.
2717                                 */
2718                                port++;
2719                }
2720
2721                dev->flags |= MLX4_FLAG_MSI_X;
2722
2723                kfree(entries);
2724                return;
2725        }
2726
2727no_msi:
2728        dev->caps.num_comp_vectors = 1;
2729
2730        BUG_ON(MLX4_EQ_ASYNC >= 2);
2731        for (i = 0; i < 2; ++i) {
2732                priv->eq_table.eq[i].irq = dev->persist->pdev->irq;
2733                if (i != MLX4_EQ_ASYNC) {
2734                        bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
2735                                    dev->caps.num_ports);
2736                }
2737        }
2738}
2739
2740static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
2741{
2742        struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
2743        int err = 0;
2744
2745        info->dev = dev;
2746        info->port = port;
2747        if (!mlx4_is_slave(dev)) {
2748                mlx4_init_mac_table(dev, &info->mac_table);
2749                mlx4_init_vlan_table(dev, &info->vlan_table);
2750                mlx4_init_roce_gid_table(dev, &info->gid_table);
2751                info->base_qpn = mlx4_get_base_qpn(dev, port);
2752        }
2753
2754        sprintf(info->dev_name, "mlx4_port%d", port);
2755        info->port_attr.attr.name = info->dev_name;
2756        if (mlx4_is_mfunc(dev))
2757                info->port_attr.attr.mode = S_IRUGO;
2758        else {
2759                info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
2760                info->port_attr.store     = set_port_type;
2761        }
2762        info->port_attr.show      = show_port_type;
2763        sysfs_attr_init(&info->port_attr.attr);
2764
2765        err = device_create_file(&dev->persist->pdev->dev, &info->port_attr);
2766        if (err) {
2767                mlx4_err(dev, "Failed to create file for port %d\n", port);
2768                info->port = -1;
2769        }
2770
2771        sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
2772        info->port_mtu_attr.attr.name = info->dev_mtu_name;
2773        if (mlx4_is_mfunc(dev))
2774                info->port_mtu_attr.attr.mode = S_IRUGO;
2775        else {
2776                info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR;
2777                info->port_mtu_attr.store     = set_port_ib_mtu;
2778        }
2779        info->port_mtu_attr.show      = show_port_ib_mtu;
2780        sysfs_attr_init(&info->port_mtu_attr.attr);
2781
2782        err = device_create_file(&dev->persist->pdev->dev,
2783                                 &info->port_mtu_attr);
2784        if (err) {
2785                mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
2786                device_remove_file(&info->dev->persist->pdev->dev,
2787                                   &info->port_attr);
2788                info->port = -1;
2789        }
2790
2791        return err;
2792}
2793
2794static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
2795{
2796        if (info->port < 0)
2797                return;
2798
2799        device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
2800        device_remove_file(&info->dev->persist->pdev->dev,
2801                           &info->port_mtu_attr);
2802#ifdef CONFIG_RFS_ACCEL
2803        free_irq_cpu_rmap(info->rmap);
2804        info->rmap = NULL;
2805#endif
2806}
2807
2808static int mlx4_init_steering(struct mlx4_dev *dev)
2809{
2810        struct mlx4_priv *priv = mlx4_priv(dev);
2811        int num_entries = dev->caps.num_ports;
2812        int i, j;
2813
2814        priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
2815        if (!priv->steer)
2816                return -ENOMEM;
2817
2818        for (i = 0; i < num_entries; i++)
2819                for (j = 0; j < MLX4_NUM_STEERS; j++) {
2820                        INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
2821                        INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
2822                }
2823        return 0;
2824}
2825
2826static void mlx4_clear_steering(struct mlx4_dev *dev)
2827{
2828        struct mlx4_priv *priv = mlx4_priv(dev);
2829        struct mlx4_steer_index *entry, *tmp_entry;
2830        struct mlx4_promisc_qp *pqp, *tmp_pqp;
2831        int num_entries = dev->caps.num_ports;
2832        int i, j;
2833
2834        for (i = 0; i < num_entries; i++) {
2835                for (j = 0; j < MLX4_NUM_STEERS; j++) {
2836                        list_for_each_entry_safe(pqp, tmp_pqp,
2837                                                 &priv->steer[i].promisc_qps[j],
2838                                                 list) {
2839                                list_del(&pqp->list);
2840                                kfree(pqp);
2841                        }
2842                        list_for_each_entry_safe(entry, tmp_entry,
2843                                                 &priv->steer[i].steer_entries[j],
2844                                                 list) {
2845                                list_del(&entry->list);
2846                                list_for_each_entry_safe(pqp, tmp_pqp,
2847                                                         &entry->duplicates,
2848                                                         list) {
2849                                        list_del(&pqp->list);
2850                                        kfree(pqp);
2851                                }
2852                                kfree(entry);
2853                        }
2854                }
2855        }
2856        kfree(priv->steer);
2857}
2858
2859static int extended_func_num(struct pci_dev *pdev)
2860{
2861        return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
2862}
2863
2864#define MLX4_OWNER_BASE 0x8069c
2865#define MLX4_OWNER_SIZE 4
2866
2867static int mlx4_get_ownership(struct mlx4_dev *dev)
2868{
2869        void __iomem *owner;
2870        u32 ret;
2871
2872        if (pci_channel_offline(dev->persist->pdev))
2873                return -EIO;
2874
2875        owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
2876                        MLX4_OWNER_BASE,
2877                        MLX4_OWNER_SIZE);
2878        if (!owner) {
2879                mlx4_err(dev, "Failed to obtain ownership bit\n");
2880                return -ENOMEM;
2881        }
2882
2883        ret = readl(owner);
2884        iounmap(owner);
2885        return (int) !!ret;
2886}
2887
2888static void mlx4_free_ownership(struct mlx4_dev *dev)
2889{
2890        void __iomem *owner;
2891
2892        if (pci_channel_offline(dev->persist->pdev))
2893                return;
2894
2895        owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
2896                        MLX4_OWNER_BASE,
2897                        MLX4_OWNER_SIZE);
2898        if (!owner) {
2899                mlx4_err(dev, "Failed to obtain ownership bit\n");
2900                return;
2901        }
2902        writel(0, owner);
2903        msleep(1000);
2904        iounmap(owner);
2905}
2906
2907#define SRIOV_VALID_STATE(flags) (!!((flags) & MLX4_FLAG_SRIOV) ==\
2908                                  !!((flags) & MLX4_FLAG_MASTER))
2909
2910static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
2911                             u8 total_vfs, int existing_vfs, int reset_flow)
2912{
2913        u64 dev_flags = dev->flags;
2914        int err = 0;
2915
2916        if (reset_flow) {
2917                dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs),
2918                                       GFP_KERNEL);
2919                if (!dev->dev_vfs)
2920                        goto free_mem;
2921                return dev_flags;
2922        }
2923
2924        atomic_inc(&pf_loading);
2925        if (dev->flags &  MLX4_FLAG_SRIOV) {
2926                if (existing_vfs != total_vfs) {
2927                        mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
2928                                 existing_vfs, total_vfs);
2929                        total_vfs = existing_vfs;
2930                }
2931        }
2932
2933        dev->dev_vfs = kzalloc(total_vfs * sizeof(*dev->dev_vfs), GFP_KERNEL);
2934        if (NULL == dev->dev_vfs) {
2935                mlx4_err(dev, "Failed to allocate memory for VFs\n");
2936                goto disable_sriov;
2937        }
2938
2939        if (!(dev->flags &  MLX4_FLAG_SRIOV)) {
2940                mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
2941                err = pci_enable_sriov(pdev, total_vfs);
2942        }
2943        if (err) {
2944                mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
2945                         err);
2946                goto disable_sriov;
2947        } else {
2948                mlx4_warn(dev, "Running in master mode\n");
2949                dev_flags |= MLX4_FLAG_SRIOV |
2950                        MLX4_FLAG_MASTER;
2951                dev_flags &= ~MLX4_FLAG_SLAVE;
2952                dev->persist->num_vfs = total_vfs;
2953        }
2954        return dev_flags;
2955
2956disable_sriov:
2957        atomic_dec(&pf_loading);
2958free_mem:
2959        dev->persist->num_vfs = 0;
2960        kfree(dev->dev_vfs);
2961        dev->dev_vfs = NULL;
2962        return dev_flags & ~MLX4_FLAG_MASTER;
2963}
2964
2965enum {
2966        MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64 = -1,
2967};
2968
2969static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
2970                              int *nvfs)
2971{
2972        int requested_vfs = nvfs[0] + nvfs[1] + nvfs[2];
2973        /* Checking for 64 VFs as a limitation of CX2 */
2974        if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_80_VFS) &&
2975            requested_vfs >= 64) {
2976                mlx4_err(dev, "Requested %d VFs, but FW does not support more than 64\n",
2977                         requested_vfs);
2978                return MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64;
2979        }
2980        return 0;
2981}
2982
2983static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
2984                         int total_vfs, int *nvfs, struct mlx4_priv *priv,
2985                         int reset_flow)
2986{
2987        struct mlx4_dev *dev;
2988        unsigned sum = 0;
2989        int err;
2990        int port;
2991        int i;
2992        struct mlx4_dev_cap *dev_cap = NULL;
2993        int existing_vfs = 0;
2994
2995        dev = &priv->dev;
2996
2997        INIT_LIST_HEAD(&priv->ctx_list);
2998        spin_lock_init(&priv->ctx_lock);
2999
3000        mutex_init(&priv->port_mutex);
3001        mutex_init(&priv->bond_mutex);
3002
3003        INIT_LIST_HEAD(&priv->pgdir_list);
3004        mutex_init(&priv->pgdir_mutex);
3005
3006        INIT_LIST_HEAD(&priv->bf_list);
3007        mutex_init(&priv->bf_mutex);
3008
3009        dev->rev_id = pdev->revision;
3010        dev->numa_node = dev_to_node(&pdev->dev);
3011
3012        /* Detect if this device is a virtual function */
3013        if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
3014                mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
3015                dev->flags |= MLX4_FLAG_SLAVE;
3016        } else {
3017                /* We reset the device and enable SRIOV only for physical
3018                 * devices.  Try to claim ownership on the device;
3019                 * if already taken, skip -- do not allow multiple PFs */
3020                err = mlx4_get_ownership(dev);
3021                if (err) {
3022                        if (err < 0)
3023                                return err;
3024                        else {
3025                                mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
3026                                return -EINVAL;
3027                        }
3028                }
3029
3030                atomic_set(&priv->opreq_count, 0);
3031                INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
3032
3033                /*
3034                 * Now reset the HCA before we touch the PCI capabilities or
3035                 * attempt a firmware command, since a boot ROM may have left
3036                 * the HCA in an undefined state.
3037                 */
3038                err = mlx4_reset(dev);
3039                if (err) {
3040                        mlx4_err(dev, "Failed to reset HCA, aborting\n");
3041                        goto err_sriov;
3042                }
3043
3044                if (total_vfs) {
3045                        dev->flags = MLX4_FLAG_MASTER;
3046                        existing_vfs = pci_num_vf(pdev);
3047                        if (existing_vfs)
3048                                dev->flags |= MLX4_FLAG_SRIOV;
3049                        dev->persist->num_vfs = total_vfs;
3050                }
3051        }
3052
3053        /* on load remove any previous indication of internal error,
3054         * device is up.
3055         */
3056        dev->persist->state = MLX4_DEVICE_STATE_UP;
3057
3058slave_start:
3059        err = mlx4_cmd_init(dev);
3060        if (err) {
3061                mlx4_err(dev, "Failed to init command interface, aborting\n");
3062                goto err_sriov;
3063        }
3064
3065        /* In slave functions, the communication channel must be initialized
3066         * before posting commands. Also, init num_slaves before calling
3067         * mlx4_init_hca */
3068        if (mlx4_is_mfunc(dev)) {
3069                if (mlx4_is_master(dev)) {
3070                        dev->num_slaves = MLX4_MAX_NUM_SLAVES;
3071
3072                } else {
3073                        dev->num_slaves = 0;
3074                        err = mlx4_multi_func_init(dev);
3075                        if (err) {
3076                                mlx4_err(dev, "Failed to init slave mfunc interface, aborting\n");
3077                                goto err_cmd;
3078                        }
3079                }
3080        }
3081
3082        err = mlx4_init_fw(dev);
3083        if (err) {
3084                mlx4_err(dev, "Failed to init fw, aborting.\n");
3085                goto err_mfunc;
3086        }
3087
3088        if (mlx4_is_master(dev)) {
3089                /* when we hit the goto slave_start below, dev_cap already initialized */
3090                if (!dev_cap) {
3091                        dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
3092
3093                        if (!dev_cap) {
3094                                err = -ENOMEM;
3095                                goto err_fw;
3096                        }
3097
3098                        err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
3099                        if (err) {
3100                                mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
3101                                goto err_fw;
3102                        }
3103
3104                        if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
3105                                goto err_fw;
3106
3107                        if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
3108                                u64 dev_flags = mlx4_enable_sriov(dev, pdev,
3109                                                                  total_vfs,
3110                                                                  existing_vfs,
3111                                                                  reset_flow);
3112
3113                                mlx4_close_fw(dev);
3114                                mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3115                                dev->flags = dev_flags;
3116                                if (!SRIOV_VALID_STATE(dev->flags)) {
3117                                        mlx4_err(dev, "Invalid SRIOV state\n");
3118                                        goto err_sriov;
3119                                }
3120                                err = mlx4_reset(dev);
3121                                if (err) {
3122                                        mlx4_err(dev, "Failed to reset HCA, aborting.\n");
3123                                        goto err_sriov;
3124                                }
3125                                goto slave_start;
3126                        }
3127                } else {
3128                        /* Legacy mode FW requires SRIOV to be enabled before
3129                         * doing QUERY_DEV_CAP, since max_eq's value is different if
3130                         * SRIOV is enabled.
3131                         */
3132                        memset(dev_cap, 0, sizeof(*dev_cap));
3133                        err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
3134                        if (err) {
3135                                mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
3136                                goto err_fw;
3137                        }
3138
3139                        if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
3140                                goto err_fw;
3141                }
3142        }
3143
3144        err = mlx4_init_hca(dev);
3145        if (err) {
3146                if (err == -EACCES) {
3147                        /* Not primary Physical function
3148                         * Running in slave mode */
3149                        mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3150                        /* We're not a PF */
3151                        if (dev->flags & MLX4_FLAG_SRIOV) {
3152                                if (!existing_vfs)
3153                                        pci_disable_sriov(pdev);
3154                                if (mlx4_is_master(dev) && !reset_flow)
3155                                        atomic_dec(&pf_loading);
3156                                dev->flags &= ~MLX4_FLAG_SRIOV;
3157                        }
3158                        if (!mlx4_is_slave(dev))
3159                                mlx4_free_ownership(dev);
3160                        dev->flags |= MLX4_FLAG_SLAVE;
3161                        dev->flags &= ~MLX4_FLAG_MASTER;
3162                        goto slave_start;
3163                } else
3164                        goto err_fw;
3165        }
3166
3167        if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
3168                u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
3169                                                  existing_vfs, reset_flow);
3170
3171                if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
3172                        mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
3173                        dev->flags = dev_flags;
3174                        err = mlx4_cmd_init(dev);
3175                        if (err) {
3176                                /* Only VHCR is cleaned up, so could still
3177                                 * send FW commands
3178                                 */
3179                                mlx4_err(dev, "Failed to init VHCR command interface, aborting\n");
3180                                goto err_close;
3181                        }
3182                } else {
3183                        dev->flags = dev_flags;
3184                }
3185
3186                if (!SRIOV_VALID_STATE(dev->flags)) {
3187                        mlx4_err(dev, "Invalid SRIOV state\n");
3188                        goto err_close;
3189                }
3190        }
3191
3192        /* check if the device is functioning at its maximum possible speed.
3193         * No return code for this call, just warn the user in case of PCI
3194         * express device capabilities are under-satisfied by the bus.
3195         */
3196        if (!mlx4_is_slave(dev))
3197                mlx4_check_pcie_caps(dev);
3198
3199        /* In master functions, the communication channel must be initialized
3200         * after obtaining its address from fw */
3201        if (mlx4_is_master(dev)) {
3202                if (dev->caps.num_ports < 2 &&
3203                    num_vfs_argc > 1) {
3204                        err = -EINVAL;
3205                        mlx4_err(dev,
3206                                 "Error: Trying to configure VFs on port 2, but HCA has only %d physical ports\n",
3207                                 dev->caps.num_ports);
3208                        goto err_close;
3209                }
3210                memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs));
3211
3212                for (i = 0;
3213                     i < sizeof(dev->persist->nvfs)/
3214                     sizeof(dev->persist->nvfs[0]); i++) {
3215                        unsigned j;
3216
3217                        for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) {
3218                                dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1;
3219                                dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
3220                                        dev->caps.num_ports;
3221                        }
3222                }
3223
3224                /* In master functions, the communication channel
3225                 * must be initialized after obtaining its address from fw
3226                 */
3227                err = mlx4_multi_func_init(dev);
3228                if (err) {
3229                        mlx4_err(dev, "Failed to init master mfunc interface, aborting.\n");
3230                        goto err_close;
3231                }
3232        }
3233
3234        err = mlx4_alloc_eq_table(dev);
3235        if (err)
3236                goto err_master_mfunc;
3237
3238        bitmap_zero(priv->msix_ctl.pool_bm, MAX_MSIX);
3239        mutex_init(&priv->msix_ctl.pool_lock);
3240
3241        mlx4_enable_msi_x(dev);
3242        if ((mlx4_is_mfunc(dev)) &&
3243            !(dev->flags & MLX4_FLAG_MSI_X)) {
3244                err = -ENOSYS;
3245                mlx4_err(dev, "INTx is not supported in multi-function mode, aborting\n");
3246                goto err_free_eq;
3247        }
3248
3249        if (!mlx4_is_slave(dev)) {
3250                err = mlx4_init_steering(dev);
3251                if (err)
3252                        goto err_disable_msix;
3253        }
3254
3255        err = mlx4_setup_hca(dev);
3256        if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
3257            !mlx4_is_mfunc(dev)) {
3258                dev->flags &= ~MLX4_FLAG_MSI_X;
3259                dev->caps.num_comp_vectors = 1;
3260                pci_disable_msix(pdev);
3261                err = mlx4_setup_hca(dev);
3262        }
3263
3264        if (err)
3265                goto err_steer;
3266
3267        mlx4_init_quotas(dev);
3268        /* When PF resources are ready arm its comm channel to enable
3269         * getting commands
3270         */
3271        if (mlx4_is_master(dev)) {
3272                err = mlx4_ARM_COMM_CHANNEL(dev);
3273                if (err) {
3274                        mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
3275                                 err);
3276                        goto err_steer;
3277                }
3278        }
3279
3280        for (port = 1; port <= dev->caps.num_ports; port++) {
3281                err = mlx4_init_port_info(dev, port);
3282                if (err)
3283                        goto err_port;
3284        }
3285
3286        priv->v2p.port1 = 1;
3287        priv->v2p.port2 = 2;
3288
3289        err = mlx4_register_device(dev);
3290        if (err)
3291                goto err_port;
3292
3293        mlx4_request_modules(dev);
3294
3295        mlx4_sense_init(dev);
3296        mlx4_start_sense(dev);
3297
3298        priv->removed = 0;
3299
3300        if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
3301                atomic_dec(&pf_loading);
3302
3303        kfree(dev_cap);
3304        return 0;
3305
3306err_port:
3307        for (--port; port >= 1; --port)
3308                mlx4_cleanup_port_info(&priv->port[port]);
3309
3310        mlx4_cleanup_default_counters(dev);
3311        if (!mlx4_is_slave(dev))
3312                mlx4_cleanup_counters_table(dev);
3313        mlx4_cleanup_qp_table(dev);
3314        mlx4_cleanup_srq_table(dev);
3315        mlx4_cleanup_cq_table(dev);
3316        mlx4_cmd_use_polling(dev);
3317        mlx4_cleanup_eq_table(dev);
3318        mlx4_cleanup_mcg_table(dev);
3319        mlx4_cleanup_mr_table(dev);
3320        mlx4_cleanup_xrcd_table(dev);
3321        mlx4_cleanup_pd_table(dev);
3322        mlx4_cleanup_uar_table(dev);
3323
3324err_steer:
3325        if (!mlx4_is_slave(dev))
3326                mlx4_clear_steering(dev);
3327
3328err_disable_msix:
3329        if (dev->flags & MLX4_FLAG_MSI_X)
3330                pci_disable_msix(pdev);
3331
3332err_free_eq:
3333        mlx4_free_eq_table(dev);
3334
3335err_master_mfunc:
3336        if (mlx4_is_master(dev)) {
3337                mlx4_free_resource_tracker(dev, RES_TR_FREE_STRUCTS_ONLY);
3338                mlx4_multi_func_cleanup(dev);
3339        }
3340
3341        if (mlx4_is_slave(dev)) {
3342                kfree(dev->caps.qp0_qkey);
3343                kfree(dev->caps.qp0_tunnel);
3344                kfree(dev->caps.qp0_proxy);
3345                kfree(dev->caps.qp1_tunnel);
3346                kfree(dev->caps.qp1_proxy);
3347        }
3348
3349err_close:
3350        mlx4_close_hca(dev);
3351
3352err_fw:
3353        mlx4_close_fw(dev);
3354
3355err_mfunc:
3356        if (mlx4_is_slave(dev))
3357                mlx4_multi_func_cleanup(dev);
3358
3359err_cmd:
3360        mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3361
3362err_sriov:
3363        if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) {
3364                pci_disable_sriov(pdev);
3365                dev->flags &= ~MLX4_FLAG_SRIOV;
3366        }
3367
3368        if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
3369                atomic_dec(&pf_loading);
3370
3371        kfree(priv->dev.dev_vfs);
3372
3373        if (!mlx4_is_slave(dev))
3374                mlx4_free_ownership(dev);
3375
3376        kfree(dev_cap);
3377        return err;
3378}
3379
3380static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
3381                           struct mlx4_priv *priv)
3382{
3383        int err;
3384        int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3385        int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3386        const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
3387                {2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
3388        unsigned total_vfs = 0;
3389        unsigned int i;
3390
3391        pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
3392
3393        err = pci_enable_device(pdev);
3394        if (err) {
3395                dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
3396                return err;
3397        }
3398
3399        /* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
3400         * per port, we must limit the number of VFs to 63 (since their are
3401         * 128 MACs)
3402         */
3403        for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc;
3404             total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
3405                nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
3406                if (nvfs[i] < 0) {
3407                        dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
3408                        err = -EINVAL;
3409                        goto err_disable_pdev;
3410                }
3411        }
3412        for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc;
3413             i++) {
3414                prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
3415                if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
3416                        dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
3417                        err = -EINVAL;
3418                        goto err_disable_pdev;
3419                }
3420        }
3421        if (total_vfs >= MLX4_MAX_NUM_VF) {
3422                dev_err(&pdev->dev,
3423                        "Requested more VF's (%d) than allowed (%d)\n",
3424                        total_vfs, MLX4_MAX_NUM_VF - 1);
3425                err = -EINVAL;
3426                goto err_disable_pdev;
3427        }
3428
3429        for (i = 0; i < MLX4_MAX_PORTS; i++) {
3430                if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) {
3431                        dev_err(&pdev->dev,
3432                                "Requested more VF's (%d) for port (%d) than allowed (%d)\n",
3433                                nvfs[i] + nvfs[2], i + 1,
3434                                MLX4_MAX_NUM_VF_P_PORT - 1);
3435                        err = -EINVAL;
3436                        goto err_disable_pdev;
3437                }
3438        }
3439
3440        /* Check for BARs. */
3441        if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
3442            !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
3443                dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
3444                        pci_dev_data, pci_resource_flags(pdev, 0));
3445                err = -ENODEV;
3446                goto err_disable_pdev;
3447        }
3448        if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
3449                dev_err(&pdev->dev, "Missing UAR, aborting\n");
3450                err = -ENODEV;
3451                goto err_disable_pdev;
3452        }
3453
3454        err = pci_request_regions(pdev, DRV_NAME);
3455        if (err) {
3456                dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
3457                goto err_disable_pdev;
3458        }
3459
3460        pci_set_master(pdev);
3461
3462        err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
3463        if (err) {
3464                dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
3465                err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
3466                if (err) {
3467                        dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
3468                        goto err_release_regions;
3469                }
3470        }
3471        err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
3472        if (err) {
3473                dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
3474                err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
3475                if (err) {
3476                        dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
3477                        goto err_release_regions;
3478                }
3479        }
3480
3481        /* Allow large DMA segments, up to the firmware limit of 1 GB */
3482        dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
3483        /* Detect if this device is a virtual function */
3484        if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
3485                /* When acting as pf, we normally skip vfs unless explicitly
3486                 * requested to probe them.
3487                 */
3488                if (total_vfs) {
3489                        unsigned vfs_offset = 0;
3490
3491                        for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) &&
3492                             vfs_offset + nvfs[i] < extended_func_num(pdev);
3493                             vfs_offset += nvfs[i], i++)
3494                                ;
3495                        if (i == sizeof(nvfs)/sizeof(nvfs[0])) {
3496                                err = -ENODEV;
3497                                goto err_release_regions;
3498                        }
3499                        if ((extended_func_num(pdev) - vfs_offset)
3500                            > prb_vf[i]) {
3501                                dev_warn(&pdev->dev, "Skipping virtual function:%d\n",
3502                                         extended_func_num(pdev));
3503                                err = -ENODEV;
3504                                goto err_release_regions;
3505                        }
3506                }
3507        }
3508
3509        err = mlx4_catas_init(&priv->dev);
3510        if (err)
3511                goto err_release_regions;
3512
3513        err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0);
3514        if (err)
3515                goto err_catas;
3516
3517        return 0;
3518
3519err_catas:
3520        mlx4_catas_end(&priv->dev);
3521
3522err_release_regions:
3523        pci_release_regions(pdev);
3524
3525err_disable_pdev:
3526        pci_disable_device(pdev);
3527        pci_set_drvdata(pdev, NULL);
3528        return err;
3529}
3530
3531static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
3532{
3533        struct mlx4_priv *priv;
3534        struct mlx4_dev *dev;
3535        int ret;
3536
3537        printk_once(KERN_INFO "%s", mlx4_version);
3538
3539        priv = kzalloc(sizeof(*priv), GFP_KERNEL);
3540        if (!priv)
3541                return -ENOMEM;
3542
3543        dev       = &priv->dev;
3544        dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL);
3545        if (!dev->persist) {
3546                kfree(priv);
3547                return -ENOMEM;
3548        }
3549        dev->persist->pdev = pdev;
3550        dev->persist->dev = dev;
3551        pci_set_drvdata(pdev, dev->persist);
3552        priv->pci_dev_data = id->driver_data;
3553        mutex_init(&dev->persist->device_state_mutex);
3554        mutex_init(&dev->persist->interface_state_mutex);
3555
3556        ret =  __mlx4_init_one(pdev, id->driver_data, priv);
3557        if (ret) {
3558                kfree(dev->persist);
3559                kfree(priv);
3560        } else {
3561                pci_save_state(pdev);
3562        }
3563
3564        return ret;
3565}
3566
3567static void mlx4_clean_dev(struct mlx4_dev *dev)
3568{
3569        struct mlx4_dev_persistent *persist = dev->persist;
3570        struct mlx4_priv *priv = mlx4_priv(dev);
3571        unsigned long   flags = (dev->flags & RESET_PERSIST_MASK_FLAGS);
3572
3573        memset(priv, 0, sizeof(*priv));
3574        priv->dev.persist = persist;
3575        priv->dev.flags = flags;
3576}
3577
3578static void mlx4_unload_one(struct pci_dev *pdev)
3579{
3580        struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3581        struct mlx4_dev  *dev  = persist->dev;
3582        struct mlx4_priv *priv = mlx4_priv(dev);
3583        int               pci_dev_data;
3584        int p, i;
3585
3586        if (priv->removed)
3587                return;
3588
3589        /* saving current ports type for further use */
3590        for (i = 0; i < dev->caps.num_ports; i++) {
3591                dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1];
3592                dev->persist->curr_port_poss_type[i] = dev->caps.
3593                                                       possible_type[i + 1];
3594        }
3595
3596        pci_dev_data = priv->pci_dev_data;
3597
3598        mlx4_stop_sense(dev);
3599        mlx4_unregister_device(dev);
3600
3601        for (p = 1; p <= dev->caps.num_ports; p++) {
3602                mlx4_cleanup_port_info(&priv->port[p]);
3603                mlx4_CLOSE_PORT(dev, p);
3604        }
3605
3606        if (mlx4_is_master(dev))
3607                mlx4_free_resource_tracker(dev,
3608                                           RES_TR_FREE_SLAVES_ONLY);
3609
3610        mlx4_cleanup_default_counters(dev);
3611        if (!mlx4_is_slave(dev))
3612                mlx4_cleanup_counters_table(dev);
3613        mlx4_cleanup_qp_table(dev);
3614        mlx4_cleanup_srq_table(dev);
3615        mlx4_cleanup_cq_table(dev);
3616        mlx4_cmd_use_polling(dev);
3617        mlx4_cleanup_eq_table(dev);
3618        mlx4_cleanup_mcg_table(dev);
3619        mlx4_cleanup_mr_table(dev);
3620        mlx4_cleanup_xrcd_table(dev);
3621        mlx4_cleanup_pd_table(dev);
3622
3623        if (mlx4_is_master(dev))
3624                mlx4_free_resource_tracker(dev,
3625                                           RES_TR_FREE_STRUCTS_ONLY);
3626
3627        iounmap(priv->kar);
3628        mlx4_uar_free(dev, &priv->driver_uar);
3629        mlx4_cleanup_uar_table(dev);
3630        if (!mlx4_is_slave(dev))
3631                mlx4_clear_steering(dev);
3632        mlx4_free_eq_table(dev);
3633        if (mlx4_is_master(dev))
3634                mlx4_multi_func_cleanup(dev);
3635        mlx4_close_hca(dev);
3636        mlx4_close_fw(dev);
3637        if (mlx4_is_slave(dev))
3638                mlx4_multi_func_cleanup(dev);
3639        mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
3640
3641        if (dev->flags & MLX4_FLAG_MSI_X)
3642                pci_disable_msix(pdev);
3643
3644        if (!mlx4_is_slave(dev))
3645                mlx4_free_ownership(dev);
3646
3647        kfree(dev->caps.qp0_qkey);
3648        kfree(dev->caps.qp0_tunnel);
3649        kfree(dev->caps.qp0_proxy);
3650        kfree(dev->caps.qp1_tunnel);
3651        kfree(dev->caps.qp1_proxy);
3652        kfree(dev->dev_vfs);
3653
3654        mlx4_clean_dev(dev);
3655        priv->pci_dev_data = pci_dev_data;
3656        priv->removed = 1;
3657}
3658
3659static void mlx4_remove_one(struct pci_dev *pdev)
3660{
3661        struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3662        struct mlx4_dev  *dev  = persist->dev;
3663        struct mlx4_priv *priv = mlx4_priv(dev);
3664        int active_vfs = 0;
3665
3666        mutex_lock(&persist->interface_state_mutex);
3667        persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
3668        mutex_unlock(&persist->interface_state_mutex);
3669
3670        /* Disabling SR-IOV is not allowed while there are active vf's */
3671        if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) {
3672                active_vfs = mlx4_how_many_lives_vf(dev);
3673                if (active_vfs) {
3674                        pr_warn("Removing PF when there are active VF's !!\n");
3675                        pr_warn("Will not disable SR-IOV.\n");
3676                }
3677        }
3678
3679        /* device marked to be under deletion running now without the lock
3680         * letting other tasks to be terminated
3681         */
3682        if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
3683                mlx4_unload_one(pdev);
3684        else
3685                mlx4_info(dev, "%s: interface is down\n", __func__);
3686        mlx4_catas_end(dev);
3687        if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
3688                mlx4_warn(dev, "Disabling SR-IOV\n");
3689                pci_disable_sriov(pdev);
3690        }
3691
3692        pci_release_regions(pdev);
3693        pci_disable_device(pdev);
3694        kfree(dev->persist);
3695        kfree(priv);
3696        pci_set_drvdata(pdev, NULL);
3697}
3698
3699static int restore_current_port_types(struct mlx4_dev *dev,
3700                                      enum mlx4_port_type *types,
3701                                      enum mlx4_port_type *poss_types)
3702{
3703        struct mlx4_priv *priv = mlx4_priv(dev);
3704        int err, i;
3705
3706        mlx4_stop_sense(dev);
3707
3708        mutex_lock(&priv->port_mutex);
3709        for (i = 0; i < dev->caps.num_ports; i++)
3710                dev->caps.possible_type[i + 1] = poss_types[i];
3711        err = mlx4_change_port_types(dev, types);
3712        mlx4_start_sense(dev);
3713        mutex_unlock(&priv->port_mutex);
3714
3715        return err;
3716}
3717
3718int mlx4_restart_one(struct pci_dev *pdev)
3719{
3720        struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3721        struct mlx4_dev  *dev  = persist->dev;
3722        struct mlx4_priv *priv = mlx4_priv(dev);
3723        int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3724        int pci_dev_data, err, total_vfs;
3725
3726        pci_dev_data = priv->pci_dev_data;
3727        total_vfs = dev->persist->num_vfs;
3728        memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
3729
3730        mlx4_unload_one(pdev);
3731        err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1);
3732        if (err) {
3733                mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
3734                         __func__, pci_name(pdev), err);
3735                return err;
3736        }
3737
3738        err = restore_current_port_types(dev, dev->persist->curr_port_type,
3739                                         dev->persist->curr_port_poss_type);
3740        if (err)
3741                mlx4_err(dev, "could not restore original port types (%d)\n",
3742                         err);
3743
3744        return err;
3745}
3746
3747static const struct pci_device_id mlx4_pci_table[] = {
3748        /* MT25408 "Hermon" SDR */
3749        { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3750        /* MT25408 "Hermon" DDR */
3751        { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3752        /* MT25408 "Hermon" QDR */
3753        { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3754        /* MT25408 "Hermon" DDR PCIe gen2 */
3755        { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3756        /* MT25408 "Hermon" QDR PCIe gen2 */
3757        { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3758        /* MT25408 "Hermon" EN 10GigE */
3759        { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3760        /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
3761        { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3762        /* MT25458 ConnectX EN 10GBASE-T 10GigE */
3763        { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3764        /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
3765        { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3766        /* MT26468 ConnectX EN 10GigE PCIe gen2*/
3767        { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3768        /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
3769        { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3770        /* MT26478 ConnectX2 40GigE PCIe gen2 */
3771        { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT },
3772        /* MT25400 Family [ConnectX-2 Virtual Function] */
3773        { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF },
3774        /* MT27500 Family [ConnectX-3] */
3775        { PCI_VDEVICE(MELLANOX, 0x1003), 0 },
3776        /* MT27500 Family [ConnectX-3 Virtual Function] */
3777        { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF },
3778        { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */
3779        { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */
3780        { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */
3781        { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */
3782        { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */
3783        { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */
3784        { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */
3785        { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */
3786        { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */
3787        { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */
3788        { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */
3789        { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */
3790        { 0, }
3791};
3792
3793MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
3794
3795static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
3796                                              pci_channel_state_t state)
3797{
3798        struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3799
3800        mlx4_err(persist->dev, "mlx4_pci_err_detected was called\n");
3801        mlx4_enter_error_state(persist);
3802
3803        mutex_lock(&persist->interface_state_mutex);
3804        if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
3805                mlx4_unload_one(pdev);
3806
3807        mutex_unlock(&persist->interface_state_mutex);
3808        if (state == pci_channel_io_perm_failure)
3809                return PCI_ERS_RESULT_DISCONNECT;
3810
3811        pci_disable_device(pdev);
3812        return PCI_ERS_RESULT_NEED_RESET;
3813}
3814
3815static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
3816{
3817        struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3818        struct mlx4_dev  *dev  = persist->dev;
3819        struct mlx4_priv *priv = mlx4_priv(dev);
3820        int               ret;
3821        int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
3822        int total_vfs;
3823
3824        mlx4_err(dev, "mlx4_pci_slot_reset was called\n");
3825        ret = pci_enable_device(pdev);
3826        if (ret) {
3827                mlx4_err(dev, "Can not re-enable device, ret=%d\n", ret);
3828                return PCI_ERS_RESULT_DISCONNECT;
3829        }
3830
3831        pci_set_master(pdev);
3832        pci_restore_state(pdev);
3833        pci_save_state(pdev);
3834
3835        total_vfs = dev->persist->num_vfs;
3836        memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
3837
3838        mutex_lock(&persist->interface_state_mutex);
3839        if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
3840                ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs,
3841                                    priv, 1);
3842                if (ret) {
3843                        mlx4_err(dev, "%s: mlx4_load_one failed, ret=%d\n",
3844                                 __func__,  ret);
3845                        goto end;
3846                }
3847
3848                ret = restore_current_port_types(dev, dev->persist->
3849                                                 curr_port_type, dev->persist->
3850                                                 curr_port_poss_type);
3851                if (ret)
3852                        mlx4_err(dev, "could not restore original port types (%d)\n", ret);
3853        }
3854end:
3855        mutex_unlock(&persist->interface_state_mutex);
3856
3857        return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
3858}
3859
3860static void mlx4_shutdown(struct pci_dev *pdev)
3861{
3862        struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
3863
3864        mlx4_info(persist->dev, "mlx4_shutdown was called\n");
3865        mutex_lock(&persist->interface_state_mutex);
3866        if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
3867                mlx4_unload_one(pdev);
3868        mutex_unlock(&persist->interface_state_mutex);
3869}
3870
3871static const struct pci_error_handlers mlx4_err_handler = {
3872        .error_detected = mlx4_pci_err_detected,
3873        .slot_reset     = mlx4_pci_slot_reset,
3874};
3875
3876static struct pci_driver mlx4_driver = {
3877        .name           = DRV_NAME,
3878        .id_table       = mlx4_pci_table,
3879        .probe          = mlx4_init_one,
3880        .shutdown       = mlx4_shutdown,
3881        .remove         = mlx4_remove_one,
3882        .err_handler    = &mlx4_err_handler,
3883};
3884
3885static int __init mlx4_verify_params(void)
3886{
3887        if ((log_num_mac < 0) || (log_num_mac > 7)) {
3888                pr_warn("mlx4_core: bad num_mac: %d\n", log_num_mac);
3889                return -1;
3890        }
3891
3892        if (log_num_vlan != 0)
3893                pr_warn("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
3894                        MLX4_LOG_NUM_VLANS);
3895
3896        if (use_prio != 0)
3897                pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n");
3898
3899        if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
3900                pr_warn("mlx4_core: bad log_mtts_per_seg: %d\n",
3901                        log_mtts_per_seg);
3902                return -1;
3903        }
3904
3905        /* Check if module param for ports type has legal combination */
3906        if (port_type_array[0] == false && port_type_array[1] == true) {
3907                pr_warn("Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
3908                port_type_array[0] = true;
3909        }
3910
3911        if (mlx4_log_num_mgm_entry_size < -7 ||
3912            (mlx4_log_num_mgm_entry_size > 0 &&
3913             (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
3914              mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE))) {
3915                pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-7..0 or %d..%d)\n",
3916                        mlx4_log_num_mgm_entry_size,
3917                        MLX4_MIN_MGM_LOG_ENTRY_SIZE,
3918                        MLX4_MAX_MGM_LOG_ENTRY_SIZE);
3919                return -1;
3920        }
3921
3922        return 0;
3923}
3924
3925static int __init mlx4_init(void)
3926{
3927        int ret;
3928
3929        if (mlx4_verify_params())
3930                return -EINVAL;
3931
3932
3933        mlx4_wq = create_singlethread_workqueue("mlx4");
3934        if (!mlx4_wq)
3935                return -ENOMEM;
3936
3937        ret = pci_register_driver(&mlx4_driver);
3938        if (ret < 0)
3939                destroy_workqueue(mlx4_wq);
3940        return ret < 0 ? ret : 0;
3941}
3942
3943static void __exit mlx4_cleanup(void)
3944{
3945        pci_unregister_driver(&mlx4_driver);
3946        destroy_workqueue(mlx4_wq);
3947}
3948
3949module_init(mlx4_init);
3950module_exit(mlx4_cleanup);
3951