linux/drivers/net/ethernet/mellanox/mlx4/main.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
   3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
   4 * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
   5 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
   6 *
   7 * This software is available to you under a choice of one of two
   8 * licenses.  You may choose to be licensed under the terms of the GNU
   9 * General Public License (GPL) Version 2, available from the file
  10 * COPYING in the main directory of this source tree, or the
  11 * OpenIB.org BSD license below:
  12 *
  13 *     Redistribution and use in source and binary forms, with or
  14 *     without modification, are permitted provided that the following
  15 *     conditions are met:
  16 *
  17 *      - Redistributions of source code must retain the above
  18 *        copyright notice, this list of conditions and the following
  19 *        disclaimer.
  20 *
  21 *      - Redistributions in binary form must reproduce the above
  22 *        copyright notice, this list of conditions and the following
  23 *        disclaimer in the documentation and/or other materials
  24 *        provided with the distribution.
  25 *
  26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33 * SOFTWARE.
  34 */
  35
  36#include <linux/module.h>
  37#include <linux/init.h>
  38#include <linux/errno.h>
  39#include <linux/pci.h>
  40#include <linux/dma-mapping.h>
  41#include <linux/slab.h>
  42#include <linux/io-mapping.h>
  43#include <linux/delay.h>
  44#include <linux/netdevice.h>
  45
  46#include <linux/mlx4/device.h>
  47#include <linux/mlx4/doorbell.h>
  48
  49#include "mlx4.h"
  50#include "fw.h"
  51#include "icm.h"
  52
  53MODULE_AUTHOR("Roland Dreier");
  54MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
  55MODULE_LICENSE("Dual BSD/GPL");
  56MODULE_VERSION(DRV_VERSION);
  57
  58struct workqueue_struct *mlx4_wq;
  59
  60#ifdef CONFIG_MLX4_DEBUG
  61
  62int mlx4_debug_level = 0;
  63module_param_named(debug_level, mlx4_debug_level, int, 0644);
  64MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
  65
  66#endif /* CONFIG_MLX4_DEBUG */
  67
  68#ifdef CONFIG_PCI_MSI
  69
  70static int msi_x = 1;
  71module_param(msi_x, int, 0444);
  72MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
  73
  74#else /* CONFIG_PCI_MSI */
  75
  76#define msi_x (0)
  77
  78#endif /* CONFIG_PCI_MSI */
  79
  80static int num_vfs;
  81module_param(num_vfs, int, 0444);
  82MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0");
  83
  84static int probe_vf;
  85module_param(probe_vf, int, 0644);
  86MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)");
  87
  88int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
  89module_param_named(log_num_mgm_entry_size,
  90                        mlx4_log_num_mgm_entry_size, int, 0444);
  91MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
  92                                         " of qp per mcg, for example:"
  93                                         " 10 gives 248.range: 7 <="
  94                                         " log_num_mgm_entry_size <= 12."
  95                                         " To activate device managed"
  96                                         " flow steering when available, set to -1");
  97
  98static bool enable_64b_cqe_eqe;
  99module_param(enable_64b_cqe_eqe, bool, 0444);
 100MODULE_PARM_DESC(enable_64b_cqe_eqe,
 101                 "Enable 64 byte CQEs/EQEs when the the FW supports this");
 102
 103#define HCA_GLOBAL_CAP_MASK            0
 104
 105#define PF_CONTEXT_BEHAVIOUR_MASK       MLX4_FUNC_CAP_64B_EQE_CQE
 106
 107static char mlx4_version[] =
 108        DRV_NAME ": Mellanox ConnectX core driver v"
 109        DRV_VERSION " (" DRV_RELDATE ")\n";
 110
 111static struct mlx4_profile default_profile = {
 112        .num_qp         = 1 << 18,
 113        .num_srq        = 1 << 16,
 114        .rdmarc_per_qp  = 1 << 4,
 115        .num_cq         = 1 << 16,
 116        .num_mcg        = 1 << 13,
 117        .num_mpt        = 1 << 19,
 118        .num_mtt        = 1 << 20, /* It is really num mtt segements */
 119};
 120
 121static int log_num_mac = 7;
 122module_param_named(log_num_mac, log_num_mac, int, 0444);
 123MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
 124
 125static int log_num_vlan;
 126module_param_named(log_num_vlan, log_num_vlan, int, 0444);
 127MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
 128/* Log2 max number of VLANs per ETH port (0-7) */
 129#define MLX4_LOG_NUM_VLANS 7
 130
 131static bool use_prio;
 132module_param_named(use_prio, use_prio, bool, 0444);
 133MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
 134                  "(0/1, default 0)");
 135
 136int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
 137module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
 138MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
 139
 140static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
 141static int arr_argc = 2;
 142module_param_array(port_type_array, int, &arr_argc, 0444);
 143MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
 144                                "1 for IB, 2 for Ethernet");
 145
 146struct mlx4_port_config {
 147        struct list_head list;
 148        enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
 149        struct pci_dev *pdev;
 150};
 151
 152int mlx4_check_port_params(struct mlx4_dev *dev,
 153                           enum mlx4_port_type *port_type)
 154{
 155        int i;
 156
 157        for (i = 0; i < dev->caps.num_ports - 1; i++) {
 158                if (port_type[i] != port_type[i + 1]) {
 159                        if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
 160                                mlx4_err(dev, "Only same port types supported "
 161                                         "on this HCA, aborting.\n");
 162                                return -EINVAL;
 163                        }
 164                }
 165        }
 166
 167        for (i = 0; i < dev->caps.num_ports; i++) {
 168                if (!(port_type[i] & dev->caps.supported_type[i+1])) {
 169                        mlx4_err(dev, "Requested port type for port %d is not "
 170                                      "supported on this HCA\n", i + 1);
 171                        return -EINVAL;
 172                }
 173        }
 174        return 0;
 175}
 176
 177static void mlx4_set_port_mask(struct mlx4_dev *dev)
 178{
 179        int i;
 180
 181        for (i = 1; i <= dev->caps.num_ports; ++i)
 182                dev->caps.port_mask[i] = dev->caps.port_type[i];
 183}
 184
 185static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 186{
 187        int err;
 188        int i;
 189
 190        err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
 191        if (err) {
 192                mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
 193                return err;
 194        }
 195
 196        if (dev_cap->min_page_sz > PAGE_SIZE) {
 197                mlx4_err(dev, "HCA minimum page size of %d bigger than "
 198                         "kernel PAGE_SIZE of %ld, aborting.\n",
 199                         dev_cap->min_page_sz, PAGE_SIZE);
 200                return -ENODEV;
 201        }
 202        if (dev_cap->num_ports > MLX4_MAX_PORTS) {
 203                mlx4_err(dev, "HCA has %d ports, but we only support %d, "
 204                         "aborting.\n",
 205                         dev_cap->num_ports, MLX4_MAX_PORTS);
 206                return -ENODEV;
 207        }
 208
 209        if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) {
 210                mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than "
 211                         "PCI resource 2 size of 0x%llx, aborting.\n",
 212                         dev_cap->uar_size,
 213                         (unsigned long long) pci_resource_len(dev->pdev, 2));
 214                return -ENODEV;
 215        }
 216
 217        dev->caps.num_ports          = dev_cap->num_ports;
 218        dev->phys_caps.num_phys_eqs  = MLX4_MAX_EQ_NUM;
 219        for (i = 1; i <= dev->caps.num_ports; ++i) {
 220                dev->caps.vl_cap[i]         = dev_cap->max_vl[i];
 221                dev->caps.ib_mtu_cap[i]     = dev_cap->ib_mtu[i];
 222                dev->phys_caps.gid_phys_table_len[i]  = dev_cap->max_gids[i];
 223                dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i];
 224                /* set gid and pkey table operating lengths by default
 225                 * to non-sriov values */
 226                dev->caps.gid_table_len[i]  = dev_cap->max_gids[i];
 227                dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i];
 228                dev->caps.port_width_cap[i] = dev_cap->max_port_width[i];
 229                dev->caps.eth_mtu_cap[i]    = dev_cap->eth_mtu[i];
 230                dev->caps.def_mac[i]        = dev_cap->def_mac[i];
 231                dev->caps.supported_type[i] = dev_cap->supported_port_types[i];
 232                dev->caps.suggested_type[i] = dev_cap->suggested_type[i];
 233                dev->caps.default_sense[i] = dev_cap->default_sense[i];
 234                dev->caps.trans_type[i]     = dev_cap->trans_type[i];
 235                dev->caps.vendor_oui[i]     = dev_cap->vendor_oui[i];
 236                dev->caps.wavelength[i]     = dev_cap->wavelength[i];
 237                dev->caps.trans_code[i]     = dev_cap->trans_code[i];
 238        }
 239
 240        dev->caps.uar_page_size      = PAGE_SIZE;
 241        dev->caps.num_uars           = dev_cap->uar_size / PAGE_SIZE;
 242        dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
 243        dev->caps.bf_reg_size        = dev_cap->bf_reg_size;
 244        dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
 245        dev->caps.max_sq_sg          = dev_cap->max_sq_sg;
 246        dev->caps.max_rq_sg          = dev_cap->max_rq_sg;
 247        dev->caps.max_wqes           = dev_cap->max_qp_sz;
 248        dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
 249        dev->caps.max_srq_wqes       = dev_cap->max_srq_sz;
 250        dev->caps.max_srq_sge        = dev_cap->max_rq_sg - 1;
 251        dev->caps.reserved_srqs      = dev_cap->reserved_srqs;
 252        dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
 253        dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
 254        /*
 255         * Subtract 1 from the limit because we need to allocate a
 256         * spare CQE so the HCA HW can tell the difference between an
 257         * empty CQ and a full CQ.
 258         */
 259        dev->caps.max_cqes           = dev_cap->max_cq_sz - 1;
 260        dev->caps.reserved_cqs       = dev_cap->reserved_cqs;
 261        dev->caps.reserved_eqs       = dev_cap->reserved_eqs;
 262        dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
 263        dev->caps.reserved_mrws      = dev_cap->reserved_mrws;
 264
 265        /* The first 128 UARs are used for EQ doorbells */
 266        dev->caps.reserved_uars      = max_t(int, 128, dev_cap->reserved_uars);
 267        dev->caps.reserved_pds       = dev_cap->reserved_pds;
 268        dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
 269                                        dev_cap->reserved_xrcds : 0;
 270        dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
 271                                        dev_cap->max_xrcds : 0;
 272        dev->caps.mtt_entry_sz       = dev_cap->mtt_entry_sz;
 273
 274        dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
 275        dev->caps.page_size_cap      = ~(u32) (dev_cap->min_page_sz - 1);
 276        dev->caps.flags              = dev_cap->flags;
 277        dev->caps.flags2             = dev_cap->flags2;
 278        dev->caps.bmme_flags         = dev_cap->bmme_flags;
 279        dev->caps.reserved_lkey      = dev_cap->reserved_lkey;
 280        dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
 281        dev->caps.max_gso_sz         = dev_cap->max_gso_sz;
 282        dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
 283
 284        /* Sense port always allowed on supported devices for ConnectX-1 and -2 */
 285        if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
 286                dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
 287        /* Don't do sense port on multifunction devices (for now at least) */
 288        if (mlx4_is_mfunc(dev))
 289                dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
 290
 291        dev->caps.log_num_macs  = log_num_mac;
 292        dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
 293        dev->caps.log_num_prios = use_prio ? 3 : 0;
 294
 295        for (i = 1; i <= dev->caps.num_ports; ++i) {
 296                dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
 297                if (dev->caps.supported_type[i]) {
 298                        /* if only ETH is supported - assign ETH */
 299                        if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
 300                                dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
 301                        /* if only IB is supported, assign IB */
 302                        else if (dev->caps.supported_type[i] ==
 303                                 MLX4_PORT_TYPE_IB)
 304                                dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
 305                        else {
 306                                /* if IB and ETH are supported, we set the port
 307                                 * type according to user selection of port type;
 308                                 * if user selected none, take the FW hint */
 309                                if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE)
 310                                        dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
 311                                                MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
 312                                else
 313                                        dev->caps.port_type[i] = port_type_array[i - 1];
 314                        }
 315                }
 316                /*
 317                 * Link sensing is allowed on the port if 3 conditions are true:
 318                 * 1. Both protocols are supported on the port.
 319                 * 2. Different types are supported on the port
 320                 * 3. FW declared that it supports link sensing
 321                 */
 322                mlx4_priv(dev)->sense.sense_allowed[i] =
 323                        ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
 324                         (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
 325                         (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
 326
 327                /*
 328                 * If "default_sense" bit is set, we move the port to "AUTO" mode
 329                 * and perform sense_port FW command to try and set the correct
 330                 * port type from beginning
 331                 */
 332                if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
 333                        enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
 334                        dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
 335                        mlx4_SENSE_PORT(dev, i, &sensed_port);
 336                        if (sensed_port != MLX4_PORT_TYPE_NONE)
 337                                dev->caps.port_type[i] = sensed_port;
 338                } else {
 339                        dev->caps.possible_type[i] = dev->caps.port_type[i];
 340                }
 341
 342                if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
 343                        dev->caps.log_num_macs = dev_cap->log_max_macs[i];
 344                        mlx4_warn(dev, "Requested number of MACs is too much "
 345                                  "for port %d, reducing to %d.\n",
 346                                  i, 1 << dev->caps.log_num_macs);
 347                }
 348                if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) {
 349                        dev->caps.log_num_vlans = dev_cap->log_max_vlans[i];
 350                        mlx4_warn(dev, "Requested number of VLANs is too much "
 351                                  "for port %d, reducing to %d.\n",
 352                                  i, 1 << dev->caps.log_num_vlans);
 353                }
 354        }
 355
 356        dev->caps.max_counters = 1 << ilog2(dev_cap->max_counters);
 357
 358        dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
 359        dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
 360                dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
 361                (1 << dev->caps.log_num_macs) *
 362                (1 << dev->caps.log_num_vlans) *
 363                (1 << dev->caps.log_num_prios) *
 364                dev->caps.num_ports;
 365        dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
 366
 367        dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
 368                dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
 369                dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
 370                dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
 371
 372        dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
 373
 374        if (!enable_64b_cqe_eqe) {
 375                if (dev_cap->flags &
 376                    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
 377                        mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
 378                        dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
 379                        dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
 380                }
 381        }
 382
 383        if ((dev->caps.flags &
 384            (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
 385            mlx4_is_master(dev))
 386                dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
 387
 388        return 0;
 389}
 390/*The function checks if there are live vf, return the num of them*/
 391static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
 392{
 393        struct mlx4_priv *priv = mlx4_priv(dev);
 394        struct mlx4_slave_state *s_state;
 395        int i;
 396        int ret = 0;
 397
 398        for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
 399                s_state = &priv->mfunc.master.slave_state[i];
 400                if (s_state->active && s_state->last_cmd !=
 401                    MLX4_COMM_CMD_RESET) {
 402                        mlx4_warn(dev, "%s: slave: %d is still active\n",
 403                                  __func__, i);
 404                        ret++;
 405                }
 406        }
 407        return ret;
 408}
 409
 410int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
 411{
 412        u32 qk = MLX4_RESERVED_QKEY_BASE;
 413
 414        if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
 415            qpn < dev->phys_caps.base_proxy_sqpn)
 416                return -EINVAL;
 417
 418        if (qpn >= dev->phys_caps.base_tunnel_sqpn)
 419                /* tunnel qp */
 420                qk += qpn - dev->phys_caps.base_tunnel_sqpn;
 421        else
 422                qk += qpn - dev->phys_caps.base_proxy_sqpn;
 423        *qkey = qk;
 424        return 0;
 425}
 426EXPORT_SYMBOL(mlx4_get_parav_qkey);
 427
 428void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
 429{
 430        struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
 431
 432        if (!mlx4_is_master(dev))
 433                return;
 434
 435        priv->virt2phys_pkey[slave][port - 1][i] = val;
 436}
 437EXPORT_SYMBOL(mlx4_sync_pkey_table);
 438
 439void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
 440{
 441        struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
 442
 443        if (!mlx4_is_master(dev))
 444                return;
 445
 446        priv->slave_node_guids[slave] = guid;
 447}
 448EXPORT_SYMBOL(mlx4_put_slave_node_guid);
 449
 450__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
 451{
 452        struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
 453
 454        if (!mlx4_is_master(dev))
 455                return 0;
 456
 457        return priv->slave_node_guids[slave];
 458}
 459EXPORT_SYMBOL(mlx4_get_slave_node_guid);
 460
 461int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
 462{
 463        struct mlx4_priv *priv = mlx4_priv(dev);
 464        struct mlx4_slave_state *s_slave;
 465
 466        if (!mlx4_is_master(dev))
 467                return 0;
 468
 469        s_slave = &priv->mfunc.master.slave_state[slave];
 470        return !!s_slave->active;
 471}
 472EXPORT_SYMBOL(mlx4_is_slave_active);
 473
 474static void slave_adjust_steering_mode(struct mlx4_dev *dev,
 475                                       struct mlx4_dev_cap *dev_cap,
 476                                       struct mlx4_init_hca_param *hca_param)
 477{
 478        dev->caps.steering_mode = hca_param->steering_mode;
 479        if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
 480                dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
 481                dev->caps.fs_log_max_ucast_qp_range_size =
 482                        dev_cap->fs_log_max_ucast_qp_range_size;
 483        } else
 484                dev->caps.num_qp_per_mgm =
 485                        4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);
 486
 487        mlx4_dbg(dev, "Steering mode is: %s\n",
 488                 mlx4_steering_mode_str(dev->caps.steering_mode));
 489}
 490
 491static int mlx4_slave_cap(struct mlx4_dev *dev)
 492{
 493        int                        err;
 494        u32                        page_size;
 495        struct mlx4_dev_cap        dev_cap;
 496        struct mlx4_func_cap       func_cap;
 497        struct mlx4_init_hca_param hca_param;
 498        int                        i;
 499
 500        memset(&hca_param, 0, sizeof(hca_param));
 501        err = mlx4_QUERY_HCA(dev, &hca_param);
 502        if (err) {
 503                mlx4_err(dev, "QUERY_HCA command failed, aborting.\n");
 504                return err;
 505        }
 506
 507        /*fail if the hca has an unknown capability */
 508        if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) !=
 509            HCA_GLOBAL_CAP_MASK) {
 510                mlx4_err(dev, "Unknown hca global capabilities\n");
 511                return -ENOSYS;
 512        }
 513
 514        mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz;
 515
 516        memset(&dev_cap, 0, sizeof(dev_cap));
 517        dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp;
 518        err = mlx4_dev_cap(dev, &dev_cap);
 519        if (err) {
 520                mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
 521                return err;
 522        }
 523
 524        err = mlx4_QUERY_FW(dev);
 525        if (err)
 526                mlx4_err(dev, "QUERY_FW command failed: could not get FW version.\n");
 527
 528        page_size = ~dev->caps.page_size_cap + 1;
 529        mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
 530        if (page_size > PAGE_SIZE) {
 531                mlx4_err(dev, "HCA minimum page size of %d bigger than "
 532                         "kernel PAGE_SIZE of %ld, aborting.\n",
 533                         page_size, PAGE_SIZE);
 534                return -ENODEV;
 535        }
 536
 537        /* slave gets uar page size from QUERY_HCA fw command */
 538        dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);
 539
 540        /* TODO: relax this assumption */
 541        if (dev->caps.uar_page_size != PAGE_SIZE) {
 542                mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %ld\n",
 543                         dev->caps.uar_page_size, PAGE_SIZE);
 544                return -ENODEV;
 545        }
 546
 547        memset(&func_cap, 0, sizeof(func_cap));
 548        err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
 549        if (err) {
 550                mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n",
 551                          err);
 552                return err;
 553        }
 554
 555        if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
 556            PF_CONTEXT_BEHAVIOUR_MASK) {
 557                mlx4_err(dev, "Unknown pf context behaviour\n");
 558                return -ENOSYS;
 559        }
 560
 561        dev->caps.num_ports             = func_cap.num_ports;
 562        dev->caps.num_qps               = func_cap.qp_quota;
 563        dev->caps.num_srqs              = func_cap.srq_quota;
 564        dev->caps.num_cqs               = func_cap.cq_quota;
 565        dev->caps.num_eqs               = func_cap.max_eq;
 566        dev->caps.reserved_eqs          = func_cap.reserved_eq;
 567        dev->caps.num_mpts              = func_cap.mpt_quota;
 568        dev->caps.num_mtts              = func_cap.mtt_quota;
 569        dev->caps.num_pds               = MLX4_NUM_PDS;
 570        dev->caps.num_mgms              = 0;
 571        dev->caps.num_amgms             = 0;
 572
 573        if (dev->caps.num_ports > MLX4_MAX_PORTS) {
 574                mlx4_err(dev, "HCA has %d ports, but we only support %d, "
 575                         "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS);
 576                return -ENODEV;
 577        }
 578
 579        dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 580        dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 581        dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 582        dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 583
 584        if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
 585            !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
 586                err = -ENOMEM;
 587                goto err_mem;
 588        }
 589
 590        for (i = 1; i <= dev->caps.num_ports; ++i) {
 591                err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap);
 592                if (err) {
 593                        mlx4_err(dev, "QUERY_FUNC_CAP port command failed for"
 594                                 " port %d, aborting (%d).\n", i, err);
 595                        goto err_mem;
 596                }
 597                dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
 598                dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
 599                dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
 600                dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
 601                dev->caps.port_mask[i] = dev->caps.port_type[i];
 602                if (mlx4_get_slave_pkey_gid_tbl_len(dev, i,
 603                                                    &dev->caps.gid_table_len[i],
 604                                                    &dev->caps.pkey_table_len[i]))
 605                        goto err_mem;
 606        }
 607
 608        if (dev->caps.uar_page_size * (dev->caps.num_uars -
 609                                       dev->caps.reserved_uars) >
 610                                       pci_resource_len(dev->pdev, 2)) {
 611                mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than "
 612                         "PCI resource 2 size of 0x%llx, aborting.\n",
 613                         dev->caps.uar_page_size * dev->caps.num_uars,
 614                         (unsigned long long) pci_resource_len(dev->pdev, 2));
 615                goto err_mem;
 616        }
 617
 618        if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
 619                dev->caps.eqe_size   = 64;
 620                dev->caps.eqe_factor = 1;
 621        } else {
 622                dev->caps.eqe_size   = 32;
 623                dev->caps.eqe_factor = 0;
 624        }
 625
 626        if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
 627                dev->caps.cqe_size   = 64;
 628                dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
 629        } else {
 630                dev->caps.cqe_size   = 32;
 631        }
 632
 633        slave_adjust_steering_mode(dev, &dev_cap, &hca_param);
 634
 635        return 0;
 636
 637err_mem:
 638        kfree(dev->caps.qp0_tunnel);
 639        kfree(dev->caps.qp0_proxy);
 640        kfree(dev->caps.qp1_tunnel);
 641        kfree(dev->caps.qp1_proxy);
 642        dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
 643                dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
 644
 645        return err;
 646}
 647
 648/*
 649 * Change the port configuration of the device.
 650 * Every user of this function must hold the port mutex.
 651 */
 652int mlx4_change_port_types(struct mlx4_dev *dev,
 653                           enum mlx4_port_type *port_types)
 654{
 655        int err = 0;
 656        int change = 0;
 657        int port;
 658
 659        for (port = 0; port <  dev->caps.num_ports; port++) {
 660                /* Change the port type only if the new type is different
 661                 * from the current, and not set to Auto */
 662                if (port_types[port] != dev->caps.port_type[port + 1])
 663                        change = 1;
 664        }
 665        if (change) {
 666                mlx4_unregister_device(dev);
 667                for (port = 1; port <= dev->caps.num_ports; port++) {
 668                        mlx4_CLOSE_PORT(dev, port);
 669                        dev->caps.port_type[port] = port_types[port - 1];
 670                        err = mlx4_SET_PORT(dev, port, -1);
 671                        if (err) {
 672                                mlx4_err(dev, "Failed to set port %d, "
 673                                              "aborting\n", port);
 674                                goto out;
 675                        }
 676                }
 677                mlx4_set_port_mask(dev);
 678                err = mlx4_register_device(dev);
 679        }
 680
 681out:
 682        return err;
 683}
 684
 685static ssize_t show_port_type(struct device *dev,
 686                              struct device_attribute *attr,
 687                              char *buf)
 688{
 689        struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
 690                                                   port_attr);
 691        struct mlx4_dev *mdev = info->dev;
 692        char type[8];
 693
 694        sprintf(type, "%s",
 695                (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
 696                "ib" : "eth");
 697        if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
 698                sprintf(buf, "auto (%s)\n", type);
 699        else
 700                sprintf(buf, "%s\n", type);
 701
 702        return strlen(buf);
 703}
 704
 705static ssize_t set_port_type(struct device *dev,
 706                             struct device_attribute *attr,
 707                             const char *buf, size_t count)
 708{
 709        struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
 710                                                   port_attr);
 711        struct mlx4_dev *mdev = info->dev;
 712        struct mlx4_priv *priv = mlx4_priv(mdev);
 713        enum mlx4_port_type types[MLX4_MAX_PORTS];
 714        enum mlx4_port_type new_types[MLX4_MAX_PORTS];
 715        int i;
 716        int err = 0;
 717
 718        if (!strcmp(buf, "ib\n"))
 719                info->tmp_type = MLX4_PORT_TYPE_IB;
 720        else if (!strcmp(buf, "eth\n"))
 721                info->tmp_type = MLX4_PORT_TYPE_ETH;
 722        else if (!strcmp(buf, "auto\n"))
 723                info->tmp_type = MLX4_PORT_TYPE_AUTO;
 724        else {
 725                mlx4_err(mdev, "%s is not supported port type\n", buf);
 726                return -EINVAL;
 727        }
 728
 729        mlx4_stop_sense(mdev);
 730        mutex_lock(&priv->port_mutex);
 731        /* Possible type is always the one that was delivered */
 732        mdev->caps.possible_type[info->port] = info->tmp_type;
 733
 734        for (i = 0; i < mdev->caps.num_ports; i++) {
 735                types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
 736                                        mdev->caps.possible_type[i+1];
 737                if (types[i] == MLX4_PORT_TYPE_AUTO)
 738                        types[i] = mdev->caps.port_type[i+1];
 739        }
 740
 741        if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
 742            !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
 743                for (i = 1; i <= mdev->caps.num_ports; i++) {
 744                        if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
 745                                mdev->caps.possible_type[i] = mdev->caps.port_type[i];
 746                                err = -EINVAL;
 747                        }
 748                }
 749        }
 750        if (err) {
 751                mlx4_err(mdev, "Auto sensing is not supported on this HCA. "
 752                               "Set only 'eth' or 'ib' for both ports "
 753                               "(should be the same)\n");
 754                goto out;
 755        }
 756
 757        mlx4_do_sense_ports(mdev, new_types, types);
 758
 759        err = mlx4_check_port_params(mdev, new_types);
 760        if (err)
 761                goto out;
 762
 763        /* We are about to apply the changes after the configuration
 764         * was verified, no need to remember the temporary types
 765         * any more */
 766        for (i = 0; i < mdev->caps.num_ports; i++)
 767                priv->port[i + 1].tmp_type = 0;
 768
 769        err = mlx4_change_port_types(mdev, new_types);
 770
 771out:
 772        mlx4_start_sense(mdev);
 773        mutex_unlock(&priv->port_mutex);
 774        return err ? err : count;
 775}
 776
 777enum ibta_mtu {
 778        IB_MTU_256  = 1,
 779        IB_MTU_512  = 2,
 780        IB_MTU_1024 = 3,
 781        IB_MTU_2048 = 4,
 782        IB_MTU_4096 = 5
 783};
 784
 785static inline int int_to_ibta_mtu(int mtu)
 786{
 787        switch (mtu) {
 788        case 256:  return IB_MTU_256;
 789        case 512:  return IB_MTU_512;
 790        case 1024: return IB_MTU_1024;
 791        case 2048: return IB_MTU_2048;
 792        case 4096: return IB_MTU_4096;
 793        default: return -1;
 794        }
 795}
 796
 797static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
 798{
 799        switch (mtu) {
 800        case IB_MTU_256:  return  256;
 801        case IB_MTU_512:  return  512;
 802        case IB_MTU_1024: return 1024;
 803        case IB_MTU_2048: return 2048;
 804        case IB_MTU_4096: return 4096;
 805        default: return -1;
 806        }
 807}
 808
 809static ssize_t show_port_ib_mtu(struct device *dev,
 810                             struct device_attribute *attr,
 811                             char *buf)
 812{
 813        struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
 814                                                   port_mtu_attr);
 815        struct mlx4_dev *mdev = info->dev;
 816
 817        if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
 818                mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
 819
 820        sprintf(buf, "%d\n",
 821                        ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
 822        return strlen(buf);
 823}
 824
 825static ssize_t set_port_ib_mtu(struct device *dev,
 826                             struct device_attribute *attr,
 827                             const char *buf, size_t count)
 828{
 829        struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
 830                                                   port_mtu_attr);
 831        struct mlx4_dev *mdev = info->dev;
 832        struct mlx4_priv *priv = mlx4_priv(mdev);
 833        int err, port, mtu, ibta_mtu = -1;
 834
 835        if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
 836                mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
 837                return -EINVAL;
 838        }
 839
 840        err = sscanf(buf, "%d", &mtu);
 841        if (err > 0)
 842                ibta_mtu = int_to_ibta_mtu(mtu);
 843
 844        if (err <= 0 || ibta_mtu < 0) {
 845                mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
 846                return -EINVAL;
 847        }
 848
 849        mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
 850
 851        mlx4_stop_sense(mdev);
 852        mutex_lock(&priv->port_mutex);
 853        mlx4_unregister_device(mdev);
 854        for (port = 1; port <= mdev->caps.num_ports; port++) {
 855                mlx4_CLOSE_PORT(mdev, port);
 856                err = mlx4_SET_PORT(mdev, port, -1);
 857                if (err) {
 858                        mlx4_err(mdev, "Failed to set port %d, "
 859                                      "aborting\n", port);
 860                        goto err_set_port;
 861                }
 862        }
 863        err = mlx4_register_device(mdev);
 864err_set_port:
 865        mutex_unlock(&priv->port_mutex);
 866        mlx4_start_sense(mdev);
 867        return err ? err : count;
 868}
 869
 870static int mlx4_load_fw(struct mlx4_dev *dev)
 871{
 872        struct mlx4_priv *priv = mlx4_priv(dev);
 873        int err;
 874
 875        priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
 876                                         GFP_HIGHUSER | __GFP_NOWARN, 0);
 877        if (!priv->fw.fw_icm) {
 878                mlx4_err(dev, "Couldn't allocate FW area, aborting.\n");
 879                return -ENOMEM;
 880        }
 881
 882        err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
 883        if (err) {
 884                mlx4_err(dev, "MAP_FA command failed, aborting.\n");
 885                goto err_free;
 886        }
 887
 888        err = mlx4_RUN_FW(dev);
 889        if (err) {
 890                mlx4_err(dev, "RUN_FW command failed, aborting.\n");
 891                goto err_unmap_fa;
 892        }
 893
 894        return 0;
 895
 896err_unmap_fa:
 897        mlx4_UNMAP_FA(dev);
 898
 899err_free:
 900        mlx4_free_icm(dev, priv->fw.fw_icm, 0);
 901        return err;
 902}
 903
 904static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
 905                                int cmpt_entry_sz)
 906{
 907        struct mlx4_priv *priv = mlx4_priv(dev);
 908        int err;
 909        int num_eqs;
 910
 911        err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
 912                                  cmpt_base +
 913                                  ((u64) (MLX4_CMPT_TYPE_QP *
 914                                          cmpt_entry_sz) << MLX4_CMPT_SHIFT),
 915                                  cmpt_entry_sz, dev->caps.num_qps,
 916                                  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
 917                                  0, 0);
 918        if (err)
 919                goto err;
 920
 921        err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
 922                                  cmpt_base +
 923                                  ((u64) (MLX4_CMPT_TYPE_SRQ *
 924                                          cmpt_entry_sz) << MLX4_CMPT_SHIFT),
 925                                  cmpt_entry_sz, dev->caps.num_srqs,
 926                                  dev->caps.reserved_srqs, 0, 0);
 927        if (err)
 928                goto err_qp;
 929
 930        err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
 931                                  cmpt_base +
 932                                  ((u64) (MLX4_CMPT_TYPE_CQ *
 933                                          cmpt_entry_sz) << MLX4_CMPT_SHIFT),
 934                                  cmpt_entry_sz, dev->caps.num_cqs,
 935                                  dev->caps.reserved_cqs, 0, 0);
 936        if (err)
 937                goto err_srq;
 938
 939        num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
 940                  dev->caps.num_eqs;
 941        err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
 942                                  cmpt_base +
 943                                  ((u64) (MLX4_CMPT_TYPE_EQ *
 944                                          cmpt_entry_sz) << MLX4_CMPT_SHIFT),
 945                                  cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
 946        if (err)
 947                goto err_cq;
 948
 949        return 0;
 950
 951err_cq:
 952        mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
 953
 954err_srq:
 955        mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
 956
 957err_qp:
 958        mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
 959
 960err:
 961        return err;
 962}
 963
 964static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
 965                         struct mlx4_init_hca_param *init_hca, u64 icm_size)
 966{
 967        struct mlx4_priv *priv = mlx4_priv(dev);
 968        u64 aux_pages;
 969        int num_eqs;
 970        int err;
 971
 972        err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
 973        if (err) {
 974                mlx4_err(dev, "SET_ICM_SIZE command failed, aborting.\n");
 975                return err;
 976        }
 977
 978        mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory.\n",
 979                 (unsigned long long) icm_size >> 10,
 980                 (unsigned long long) aux_pages << 2);
 981
 982        priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
 983                                          GFP_HIGHUSER | __GFP_NOWARN, 0);
 984        if (!priv->fw.aux_icm) {
 985                mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n");
 986                return -ENOMEM;
 987        }
 988
 989        err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
 990        if (err) {
 991                mlx4_err(dev, "MAP_ICM_AUX command failed, aborting.\n");
 992                goto err_free_aux;
 993        }
 994
 995        err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
 996        if (err) {
 997                mlx4_err(dev, "Failed to map cMPT context memory, aborting.\n");
 998                goto err_unmap_aux;
 999        }
1000
1001
1002        num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
1003                   dev->caps.num_eqs;
1004        err = mlx4_init_icm_table(dev, &priv->eq_table.table,
1005                                  init_hca->eqc_base, dev_cap->eqc_entry_sz,
1006                                  num_eqs, num_eqs, 0, 0);
1007        if (err) {
1008                mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
1009                goto err_unmap_cmpt;
1010        }
1011
1012        /*
1013         * Reserved MTT entries must be aligned up to a cacheline
1014         * boundary, since the FW will write to them, while the driver
1015         * writes to all other MTT entries. (The variable
1016         * dev->caps.mtt_entry_sz below is really the MTT segment
1017         * size, not the raw entry size)
1018         */
1019        dev->caps.reserved_mtts =
1020                ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
1021                      dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
1022
1023        err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
1024                                  init_hca->mtt_base,
1025                                  dev->caps.mtt_entry_sz,
1026                                  dev->caps.num_mtts,
1027                                  dev->caps.reserved_mtts, 1, 0);
1028        if (err) {
1029                mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
1030                goto err_unmap_eq;
1031        }
1032
1033        err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
1034                                  init_hca->dmpt_base,
1035                                  dev_cap->dmpt_entry_sz,
1036                                  dev->caps.num_mpts,
1037                                  dev->caps.reserved_mrws, 1, 1);
1038        if (err) {
1039                mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n");
1040                goto err_unmap_mtt;
1041        }
1042
1043        err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
1044                                  init_hca->qpc_base,
1045                                  dev_cap->qpc_entry_sz,
1046                                  dev->caps.num_qps,
1047                                  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1048                                  0, 0);
1049        if (err) {
1050                mlx4_err(dev, "Failed to map QP context memory, aborting.\n");
1051                goto err_unmap_dmpt;
1052        }
1053
1054        err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
1055                                  init_hca->auxc_base,
1056                                  dev_cap->aux_entry_sz,
1057                                  dev->caps.num_qps,
1058                                  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1059                                  0, 0);
1060        if (err) {
1061                mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n");
1062                goto err_unmap_qp;
1063        }
1064
1065        err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
1066                                  init_hca->altc_base,
1067                                  dev_cap->altc_entry_sz,
1068                                  dev->caps.num_qps,
1069                                  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1070                                  0, 0);
1071        if (err) {
1072                mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n");
1073                goto err_unmap_auxc;
1074        }
1075
1076        err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
1077                                  init_hca->rdmarc_base,
1078                                  dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
1079                                  dev->caps.num_qps,
1080                                  dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1081                                  0, 0);
1082        if (err) {
1083                mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
1084                goto err_unmap_altc;
1085        }
1086
1087        err = mlx4_init_icm_table(dev, &priv->cq_table.table,
1088                                  init_hca->cqc_base,
1089                                  dev_cap->cqc_entry_sz,
1090                                  dev->caps.num_cqs,
1091                                  dev->caps.reserved_cqs, 0, 0);
1092        if (err) {
1093                mlx4_err(dev, "Failed to map CQ context memory, aborting.\n");
1094                goto err_unmap_rdmarc;
1095        }
1096
1097        err = mlx4_init_icm_table(dev, &priv->srq_table.table,
1098                                  init_hca->srqc_base,
1099                                  dev_cap->srq_entry_sz,
1100                                  dev->caps.num_srqs,
1101                                  dev->caps.reserved_srqs, 0, 0);
1102        if (err) {
1103                mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n");
1104                goto err_unmap_cq;
1105        }
1106
1107        /*
1108         * For flow steering device managed mode it is required to use
1109         * mlx4_init_icm_table. For B0 steering mode it's not strictly
1110         * required, but for simplicity just map the whole multicast
1111         * group table now.  The table isn't very big and it's a lot
1112         * easier than trying to track ref counts.
1113         */
1114        err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
1115                                  init_hca->mc_base,
1116                                  mlx4_get_mgm_entry_size(dev),
1117                                  dev->caps.num_mgms + dev->caps.num_amgms,
1118                                  dev->caps.num_mgms + dev->caps.num_amgms,
1119                                  0, 0);
1120        if (err) {
1121                mlx4_err(dev, "Failed to map MCG context memory, aborting.\n");
1122                goto err_unmap_srq;
1123        }
1124
1125        return 0;
1126
1127err_unmap_srq:
1128        mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1129
1130err_unmap_cq:
1131        mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1132
1133err_unmap_rdmarc:
1134        mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1135
1136err_unmap_altc:
1137        mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1138
1139err_unmap_auxc:
1140        mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1141
1142err_unmap_qp:
1143        mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1144
1145err_unmap_dmpt:
1146        mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1147
1148err_unmap_mtt:
1149        mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1150
1151err_unmap_eq:
1152        mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1153
1154err_unmap_cmpt:
1155        mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1156        mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1157        mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1158        mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1159
1160err_unmap_aux:
1161        mlx4_UNMAP_ICM_AUX(dev);
1162
1163err_free_aux:
1164        mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1165
1166        return err;
1167}
1168
1169static void mlx4_free_icms(struct mlx4_dev *dev)
1170{
1171        struct mlx4_priv *priv = mlx4_priv(dev);
1172
1173        mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
1174        mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1175        mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1176        mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1177        mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1178        mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1179        mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1180        mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1181        mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1182        mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1183        mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1184        mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1185        mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1186        mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1187
1188        mlx4_UNMAP_ICM_AUX(dev);
1189        mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1190}
1191
1192static void mlx4_slave_exit(struct mlx4_dev *dev)
1193{
1194        struct mlx4_priv *priv = mlx4_priv(dev);
1195
1196        mutex_lock(&priv->cmd.slave_cmd_mutex);
1197        if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
1198                mlx4_warn(dev, "Failed to close slave function.\n");
1199        mutex_unlock(&priv->cmd.slave_cmd_mutex);
1200}
1201
1202static int map_bf_area(struct mlx4_dev *dev)
1203{
1204        struct mlx4_priv *priv = mlx4_priv(dev);
1205        resource_size_t bf_start;
1206        resource_size_t bf_len;
1207        int err = 0;
1208
1209        if (!dev->caps.bf_reg_size)
1210                return -ENXIO;
1211
1212        bf_start = pci_resource_start(dev->pdev, 2) +
1213                        (dev->caps.num_uars << PAGE_SHIFT);
1214        bf_len = pci_resource_len(dev->pdev, 2) -
1215                        (dev->caps.num_uars << PAGE_SHIFT);
1216        priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
1217        if (!priv->bf_mapping)
1218                err = -ENOMEM;
1219
1220        return err;
1221}
1222
1223static void unmap_bf_area(struct mlx4_dev *dev)
1224{
1225        if (mlx4_priv(dev)->bf_mapping)
1226                io_mapping_free(mlx4_priv(dev)->bf_mapping);
1227}
1228
1229static void mlx4_close_hca(struct mlx4_dev *dev)
1230{
1231        unmap_bf_area(dev);
1232        if (mlx4_is_slave(dev))
1233                mlx4_slave_exit(dev);
1234        else {
1235                mlx4_CLOSE_HCA(dev, 0);
1236                mlx4_free_icms(dev);
1237                mlx4_UNMAP_FA(dev);
1238                mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
1239        }
1240}
1241
1242static int mlx4_init_slave(struct mlx4_dev *dev)
1243{
1244        struct mlx4_priv *priv = mlx4_priv(dev);
1245        u64 dma = (u64) priv->mfunc.vhcr_dma;
1246        int num_of_reset_retries = NUM_OF_RESET_RETRIES;
1247        int ret_from_reset = 0;
1248        u32 slave_read;
1249        u32 cmd_channel_ver;
1250
1251        mutex_lock(&priv->cmd.slave_cmd_mutex);
1252        priv->cmd.max_cmds = 1;
1253        mlx4_warn(dev, "Sending reset\n");
1254        ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
1255                                       MLX4_COMM_TIME);
1256        /* if we are in the middle of flr the slave will try
1257         * NUM_OF_RESET_RETRIES times before leaving.*/
1258        if (ret_from_reset) {
1259                if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
1260                        msleep(SLEEP_TIME_IN_RESET);
1261                        while (ret_from_reset && num_of_reset_retries) {
1262                                mlx4_warn(dev, "slave is currently in the"
1263                                          "middle of FLR. retrying..."
1264                                          "(try num:%d)\n",
1265                                          (NUM_OF_RESET_RETRIES -
1266                                           num_of_reset_retries  + 1));
1267                                ret_from_reset =
1268                                        mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET,
1269                                                      0, MLX4_COMM_TIME);
1270                                num_of_reset_retries = num_of_reset_retries - 1;
1271                        }
1272                } else
1273                        goto err;
1274        }
1275
1276        /* check the driver version - the slave I/F revision
1277         * must match the master's */
1278        slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
1279        cmd_channel_ver = mlx4_comm_get_version();
1280
1281        if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
1282                MLX4_COMM_GET_IF_REV(slave_read)) {
1283                mlx4_err(dev, "slave driver version is not supported"
1284                         " by the master\n");
1285                goto err;
1286        }
1287
1288        mlx4_warn(dev, "Sending vhcr0\n");
1289        if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
1290                                                    MLX4_COMM_TIME))
1291                goto err;
1292        if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
1293                                                    MLX4_COMM_TIME))
1294                goto err;
1295        if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
1296                                                    MLX4_COMM_TIME))
1297                goto err;
1298        if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME))
1299                goto err;
1300
1301        mutex_unlock(&priv->cmd.slave_cmd_mutex);
1302        return 0;
1303
1304err:
1305        mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
1306        mutex_unlock(&priv->cmd.slave_cmd_mutex);
1307        return -EIO;
1308}
1309
1310static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
1311{
1312        int i;
1313
1314        for (i = 1; i <= dev->caps.num_ports; i++) {
1315                dev->caps.gid_table_len[i] = 1;
1316                dev->caps.pkey_table_len[i] =
1317                        dev->phys_caps.pkey_phys_table_len[i] - 1;
1318        }
1319}
1320
1321static int choose_log_fs_mgm_entry_size(int qp_per_entry)
1322{
1323        int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
1324
1325        for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
1326              i++) {
1327                if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
1328                        break;
1329        }
1330
1331        return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
1332}
1333
1334static void choose_steering_mode(struct mlx4_dev *dev,
1335                                 struct mlx4_dev_cap *dev_cap)
1336{
1337        if (mlx4_log_num_mgm_entry_size == -1 &&
1338            dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
1339            (!mlx4_is_mfunc(dev) ||
1340             (dev_cap->fs_max_num_qp_per_entry >= (num_vfs + 1))) &&
1341            choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
1342                MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
1343                dev->oper_log_mgm_entry_size =
1344                        choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
1345                dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
1346                dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
1347                dev->caps.fs_log_max_ucast_qp_range_size =
1348                        dev_cap->fs_log_max_ucast_qp_range_size;
1349        } else {
1350                if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
1351                    dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
1352                        dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
1353                else {
1354                        dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
1355
1356                        if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
1357                            dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
1358                                mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags "
1359                                          "set to use B0 steering. Falling back to A0 steering mode.\n");
1360                }
1361                dev->oper_log_mgm_entry_size =
1362                        mlx4_log_num_mgm_entry_size > 0 ?
1363                        mlx4_log_num_mgm_entry_size :
1364                        MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
1365                dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
1366        }
1367        mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, "
1368                 "modparam log_num_mgm_entry_size = %d\n",
1369                 mlx4_steering_mode_str(dev->caps.steering_mode),
1370                 dev->oper_log_mgm_entry_size,
1371                 mlx4_log_num_mgm_entry_size);
1372}
1373
1374static int mlx4_init_hca(struct mlx4_dev *dev)
1375{
1376        struct mlx4_priv          *priv = mlx4_priv(dev);
1377        struct mlx4_adapter        adapter;
1378        struct mlx4_dev_cap        dev_cap;
1379        struct mlx4_mod_stat_cfg   mlx4_cfg;
1380        struct mlx4_profile        profile;
1381        struct mlx4_init_hca_param init_hca;
1382        u64 icm_size;
1383        int err;
1384
1385        if (!mlx4_is_slave(dev)) {
1386                err = mlx4_QUERY_FW(dev);
1387                if (err) {
1388                        if (err == -EACCES)
1389                                mlx4_info(dev, "non-primary physical function, skipping.\n");
1390                        else
1391                                mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
1392                        return err;
1393                }
1394
1395                err = mlx4_load_fw(dev);
1396                if (err) {
1397                        mlx4_err(dev, "Failed to start FW, aborting.\n");
1398                        return err;
1399                }
1400
1401                mlx4_cfg.log_pg_sz_m = 1;
1402                mlx4_cfg.log_pg_sz = 0;
1403                err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
1404                if (err)
1405                        mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
1406
1407                err = mlx4_dev_cap(dev, &dev_cap);
1408                if (err) {
1409                        mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
1410                        goto err_stop_fw;
1411                }
1412
1413                choose_steering_mode(dev, &dev_cap);
1414
1415                if (mlx4_is_master(dev))
1416                        mlx4_parav_master_pf_caps(dev);
1417
1418                priv->fs_hash_mode = MLX4_FS_L2_HASH;
1419
1420                switch (priv->fs_hash_mode) {
1421                case MLX4_FS_L2_HASH:
1422                        init_hca.fs_hash_enable_bits = 0;
1423                        break;
1424
1425                case MLX4_FS_L2_L3_L4_HASH:
1426                        /* Enable flow steering with
1427                         * udp unicast and tcp unicast
1428                         */
1429                        init_hca.fs_hash_enable_bits =
1430                                MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN;
1431                        break;
1432                }
1433
1434                profile = default_profile;
1435                if (dev->caps.steering_mode ==
1436                    MLX4_STEERING_MODE_DEVICE_MANAGED)
1437                        profile.num_mcg = MLX4_FS_NUM_MCG;
1438
1439                icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
1440                                             &init_hca);
1441                if ((long long) icm_size < 0) {
1442                        err = icm_size;
1443                        goto err_stop_fw;
1444                }
1445
1446                dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
1447
1448                init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
1449                init_hca.uar_page_sz = PAGE_SHIFT - 12;
1450
1451                err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
1452                if (err)
1453                        goto err_stop_fw;
1454
1455                err = mlx4_INIT_HCA(dev, &init_hca);
1456                if (err) {
1457                        mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
1458                        goto err_free_icm;
1459                }
1460        } else {
1461                err = mlx4_init_slave(dev);
1462                if (err) {
1463                        mlx4_err(dev, "Failed to initialize slave\n");
1464                        return err;
1465                }
1466
1467                err = mlx4_slave_cap(dev);
1468                if (err) {
1469                        mlx4_err(dev, "Failed to obtain slave caps\n");
1470                        goto err_close;
1471                }
1472        }
1473
1474        if (map_bf_area(dev))
1475                mlx4_dbg(dev, "Failed to map blue flame area\n");
1476
1477        /*Only the master set the ports, all the rest got it from it.*/
1478        if (!mlx4_is_slave(dev))
1479                mlx4_set_port_mask(dev);
1480
1481        err = mlx4_QUERY_ADAPTER(dev, &adapter);
1482        if (err) {
1483                mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n");
1484                goto unmap_bf;
1485        }
1486
1487        priv->eq_table.inta_pin = adapter.inta_pin;
1488        memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
1489
1490        return 0;
1491
1492unmap_bf:
1493        unmap_bf_area(dev);
1494
1495err_close:
1496        if (mlx4_is_slave(dev))
1497                mlx4_slave_exit(dev);
1498        else
1499                mlx4_CLOSE_HCA(dev, 0);
1500
1501err_free_icm:
1502        if (!mlx4_is_slave(dev))
1503                mlx4_free_icms(dev);
1504
1505err_stop_fw:
1506        if (!mlx4_is_slave(dev)) {
1507                mlx4_UNMAP_FA(dev);
1508                mlx4_free_icm(dev, priv->fw.fw_icm, 0);
1509        }
1510        return err;
1511}
1512
1513static int mlx4_init_counters_table(struct mlx4_dev *dev)
1514{
1515        struct mlx4_priv *priv = mlx4_priv(dev);
1516        int nent;
1517
1518        if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
1519                return -ENOENT;
1520
1521        nent = dev->caps.max_counters;
1522        return mlx4_bitmap_init(&priv->counters_bitmap, nent, nent - 1, 0, 0);
1523}
1524
1525static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
1526{
1527        mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
1528}
1529
1530int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
1531{
1532        struct mlx4_priv *priv = mlx4_priv(dev);
1533
1534        if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
1535                return -ENOENT;
1536
1537        *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
1538        if (*idx == -1)
1539                return -ENOMEM;
1540
1541        return 0;
1542}
1543
1544int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
1545{
1546        u64 out_param;
1547        int err;
1548
1549        if (mlx4_is_mfunc(dev)) {
1550                err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER,
1551                                   RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
1552                                   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
1553                if (!err)
1554                        *idx = get_param_l(&out_param);
1555
1556                return err;
1557        }
1558        return __mlx4_counter_alloc(dev, idx);
1559}
1560EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
1561
1562void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
1563{
1564        mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx);
1565        return;
1566}
1567
1568void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
1569{
1570        u64 in_param;
1571
1572        if (mlx4_is_mfunc(dev)) {
1573                set_param_l(&in_param, idx);
1574                mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE,
1575                         MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
1576                         MLX4_CMD_WRAPPED);
1577                return;
1578        }
1579        __mlx4_counter_free(dev, idx);
1580}
1581EXPORT_SYMBOL_GPL(mlx4_counter_free);
1582
1583static int mlx4_setup_hca(struct mlx4_dev *dev)
1584{
1585        struct mlx4_priv *priv = mlx4_priv(dev);
1586        int err;
1587        int port;
1588        __be32 ib_port_default_caps;
1589
1590        err = mlx4_init_uar_table(dev);
1591        if (err) {
1592                mlx4_err(dev, "Failed to initialize "
1593                         "user access region table, aborting.\n");
1594                return err;
1595        }
1596
1597        err = mlx4_uar_alloc(dev, &priv->driver_uar);
1598        if (err) {
1599                mlx4_err(dev, "Failed to allocate driver access region, "
1600                         "aborting.\n");
1601                goto err_uar_table_free;
1602        }
1603
1604        priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
1605        if (!priv->kar) {
1606                mlx4_err(dev, "Couldn't map kernel access region, "
1607                         "aborting.\n");
1608                err = -ENOMEM;
1609                goto err_uar_free;
1610        }
1611
1612        err = mlx4_init_pd_table(dev);
1613        if (err) {
1614                mlx4_err(dev, "Failed to initialize "
1615                         "protection domain table, aborting.\n");
1616                goto err_kar_unmap;
1617        }
1618
1619        err = mlx4_init_xrcd_table(dev);
1620        if (err) {
1621                mlx4_err(dev, "Failed to initialize "
1622                         "reliable connection domain table, aborting.\n");
1623                goto err_pd_table_free;
1624        }
1625
1626        err = mlx4_init_mr_table(dev);
1627        if (err) {
1628                mlx4_err(dev, "Failed to initialize "
1629                         "memory region table, aborting.\n");
1630                goto err_xrcd_table_free;
1631        }
1632
1633        err = mlx4_init_eq_table(dev);
1634        if (err) {
1635                mlx4_err(dev, "Failed to initialize "
1636                         "event queue table, aborting.\n");
1637                goto err_mr_table_free;
1638        }
1639
1640        err = mlx4_cmd_use_events(dev);
1641        if (err) {
1642                mlx4_err(dev, "Failed to switch to event-driven "
1643                         "firmware commands, aborting.\n");
1644                goto err_eq_table_free;
1645        }
1646
1647        err = mlx4_NOP(dev);
1648        if (err) {
1649                if (dev->flags & MLX4_FLAG_MSI_X) {
1650                        mlx4_warn(dev, "NOP command failed to generate MSI-X "
1651                                  "interrupt IRQ %d).\n",
1652                                  priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
1653                        mlx4_warn(dev, "Trying again without MSI-X.\n");
1654                } else {
1655                        mlx4_err(dev, "NOP command failed to generate interrupt "
1656                                 "(IRQ %d), aborting.\n",
1657                                 priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
1658                        mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
1659                }
1660
1661                goto err_cmd_poll;
1662        }
1663
1664        mlx4_dbg(dev, "NOP command IRQ test passed\n");
1665
1666        err = mlx4_init_cq_table(dev);
1667        if (err) {
1668                mlx4_err(dev, "Failed to initialize "
1669                         "completion queue table, aborting.\n");
1670                goto err_cmd_poll;
1671        }
1672
1673        err = mlx4_init_srq_table(dev);
1674        if (err) {
1675                mlx4_err(dev, "Failed to initialize "
1676                         "shared receive queue table, aborting.\n");
1677                goto err_cq_table_free;
1678        }
1679
1680        err = mlx4_init_qp_table(dev);
1681        if (err) {
1682                mlx4_err(dev, "Failed to initialize "
1683                         "queue pair table, aborting.\n");
1684                goto err_srq_table_free;
1685        }
1686
1687        if (!mlx4_is_slave(dev)) {
1688                err = mlx4_init_mcg_table(dev);
1689                if (err) {
1690                        mlx4_err(dev, "Failed to initialize "
1691                                 "multicast group table, aborting.\n");
1692                        goto err_qp_table_free;
1693                }
1694        }
1695
1696        err = mlx4_init_counters_table(dev);
1697        if (err && err != -ENOENT) {
1698                mlx4_err(dev, "Failed to initialize counters table, aborting.\n");
1699                goto err_mcg_table_free;
1700        }
1701
1702        if (!mlx4_is_slave(dev)) {
1703                for (port = 1; port <= dev->caps.num_ports; port++) {
1704                        ib_port_default_caps = 0;
1705                        err = mlx4_get_port_ib_caps(dev, port,
1706                                                    &ib_port_default_caps);
1707                        if (err)
1708                                mlx4_warn(dev, "failed to get port %d default "
1709                                          "ib capabilities (%d). Continuing "
1710                                          "with caps = 0\n", port, err);
1711                        dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
1712
1713                        /* initialize per-slave default ib port capabilities */
1714                        if (mlx4_is_master(dev)) {
1715                                int i;
1716                                for (i = 0; i < dev->num_slaves; i++) {
1717                                        if (i == mlx4_master_func_num(dev))
1718                                                continue;
1719                                        priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
1720                                                        ib_port_default_caps;
1721                                }
1722                        }
1723
1724                        if (mlx4_is_mfunc(dev))
1725                                dev->caps.port_ib_mtu[port] = IB_MTU_2048;
1726                        else
1727                                dev->caps.port_ib_mtu[port] = IB_MTU_4096;
1728
1729                        err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
1730                                            dev->caps.pkey_table_len[port] : -1);
1731                        if (err) {
1732                                mlx4_err(dev, "Failed to set port %d, aborting\n",
1733                                        port);
1734                                goto err_counters_table_free;
1735                        }
1736                }
1737        }
1738
1739        return 0;
1740
1741err_counters_table_free:
1742        mlx4_cleanup_counters_table(dev);
1743
1744err_mcg_table_free:
1745        mlx4_cleanup_mcg_table(dev);
1746
1747err_qp_table_free:
1748        mlx4_cleanup_qp_table(dev);
1749
1750err_srq_table_free:
1751        mlx4_cleanup_srq_table(dev);
1752
1753err_cq_table_free:
1754        mlx4_cleanup_cq_table(dev);
1755
1756err_cmd_poll:
1757        mlx4_cmd_use_polling(dev);
1758
1759err_eq_table_free:
1760        mlx4_cleanup_eq_table(dev);
1761
1762err_mr_table_free:
1763        mlx4_cleanup_mr_table(dev);
1764
1765err_xrcd_table_free:
1766        mlx4_cleanup_xrcd_table(dev);
1767
1768err_pd_table_free:
1769        mlx4_cleanup_pd_table(dev);
1770
1771err_kar_unmap:
1772        iounmap(priv->kar);
1773
1774err_uar_free:
1775        mlx4_uar_free(dev, &priv->driver_uar);
1776
1777err_uar_table_free:
1778        mlx4_cleanup_uar_table(dev);
1779        return err;
1780}
1781
1782static void mlx4_enable_msi_x(struct mlx4_dev *dev)
1783{
1784        struct mlx4_priv *priv = mlx4_priv(dev);
1785        struct msix_entry *entries;
1786        int nreq = min_t(int, dev->caps.num_ports *
1787                         min_t(int, netif_get_num_default_rss_queues() + 1,
1788                               MAX_MSIX_P_PORT) + MSIX_LEGACY_SZ, MAX_MSIX);
1789        int err;
1790        int i;
1791
1792        if (msi_x) {
1793                nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
1794                             nreq);
1795
1796                entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
1797                if (!entries)
1798                        goto no_msi;
1799
1800                for (i = 0; i < nreq; ++i)
1801                        entries[i].entry = i;
1802
1803        retry:
1804                err = pci_enable_msix(dev->pdev, entries, nreq);
1805                if (err) {
1806                        /* Try again if at least 2 vectors are available */
1807                        if (err > 1) {
1808                                mlx4_info(dev, "Requested %d vectors, "
1809                                          "but only %d MSI-X vectors available, "
1810                                          "trying again\n", nreq, err);
1811                                nreq = err;
1812                                goto retry;
1813                        }
1814                        kfree(entries);
1815                        goto no_msi;
1816                }
1817
1818                if (nreq <
1819                    MSIX_LEGACY_SZ + dev->caps.num_ports * MIN_MSIX_P_PORT) {
1820                        /*Working in legacy mode , all EQ's shared*/
1821                        dev->caps.comp_pool           = 0;
1822                        dev->caps.num_comp_vectors = nreq - 1;
1823                } else {
1824                        dev->caps.comp_pool           = nreq - MSIX_LEGACY_SZ;
1825                        dev->caps.num_comp_vectors = MSIX_LEGACY_SZ - 1;
1826                }
1827                for (i = 0; i < nreq; ++i)
1828                        priv->eq_table.eq[i].irq = entries[i].vector;
1829
1830                dev->flags |= MLX4_FLAG_MSI_X;
1831
1832                kfree(entries);
1833                return;
1834        }
1835
1836no_msi:
1837        dev->caps.num_comp_vectors = 1;
1838        dev->caps.comp_pool        = 0;
1839
1840        for (i = 0; i < 2; ++i)
1841                priv->eq_table.eq[i].irq = dev->pdev->irq;
1842}
1843
1844static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
1845{
1846        struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
1847        int err = 0;
1848
1849        info->dev = dev;
1850        info->port = port;
1851        if (!mlx4_is_slave(dev)) {
1852                INIT_RADIX_TREE(&info->mac_tree, GFP_KERNEL);
1853                mlx4_init_mac_table(dev, &info->mac_table);
1854                mlx4_init_vlan_table(dev, &info->vlan_table);
1855                info->base_qpn =
1856                        dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] +
1857                        (port - 1) * (1 << log_num_mac);
1858        }
1859
1860        sprintf(info->dev_name, "mlx4_port%d", port);
1861        info->port_attr.attr.name = info->dev_name;
1862        if (mlx4_is_mfunc(dev))
1863                info->port_attr.attr.mode = S_IRUGO;
1864        else {
1865                info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
1866                info->port_attr.store     = set_port_type;
1867        }
1868        info->port_attr.show      = show_port_type;
1869        sysfs_attr_init(&info->port_attr.attr);
1870
1871        err = device_create_file(&dev->pdev->dev, &info->port_attr);
1872        if (err) {
1873                mlx4_err(dev, "Failed to create file for port %d\n", port);
1874                info->port = -1;
1875        }
1876
1877        sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
1878        info->port_mtu_attr.attr.name = info->dev_mtu_name;
1879        if (mlx4_is_mfunc(dev))
1880                info->port_mtu_attr.attr.mode = S_IRUGO;
1881        else {
1882                info->port_mtu_attr.attr.mode = S_IRUGO | S_IWUSR;
1883                info->port_mtu_attr.store     = set_port_ib_mtu;
1884        }
1885        info->port_mtu_attr.show      = show_port_ib_mtu;
1886        sysfs_attr_init(&info->port_mtu_attr.attr);
1887
1888        err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr);
1889        if (err) {
1890                mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
1891                device_remove_file(&info->dev->pdev->dev, &info->port_attr);
1892                info->port = -1;
1893        }
1894
1895        return err;
1896}
1897
1898static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
1899{
1900        if (info->port < 0)
1901                return;
1902
1903        device_remove_file(&info->dev->pdev->dev, &info->port_attr);
1904        device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr);
1905}
1906
1907static int mlx4_init_steering(struct mlx4_dev *dev)
1908{
1909        struct mlx4_priv *priv = mlx4_priv(dev);
1910        int num_entries = dev->caps.num_ports;
1911        int i, j;
1912
1913        priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
1914        if (!priv->steer)
1915                return -ENOMEM;
1916
1917        for (i = 0; i < num_entries; i++)
1918                for (j = 0; j < MLX4_NUM_STEERS; j++) {
1919                        INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
1920                        INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
1921                }
1922        return 0;
1923}
1924
1925static void mlx4_clear_steering(struct mlx4_dev *dev)
1926{
1927        struct mlx4_priv *priv = mlx4_priv(dev);
1928        struct mlx4_steer_index *entry, *tmp_entry;
1929        struct mlx4_promisc_qp *pqp, *tmp_pqp;
1930        int num_entries = dev->caps.num_ports;
1931        int i, j;
1932
1933        for (i = 0; i < num_entries; i++) {
1934                for (j = 0; j < MLX4_NUM_STEERS; j++) {
1935                        list_for_each_entry_safe(pqp, tmp_pqp,
1936                                                 &priv->steer[i].promisc_qps[j],
1937                                                 list) {
1938                                list_del(&pqp->list);
1939                                kfree(pqp);
1940                        }
1941                        list_for_each_entry_safe(entry, tmp_entry,
1942                                                 &priv->steer[i].steer_entries[j],
1943                                                 list) {
1944                                list_del(&entry->list);
1945                                list_for_each_entry_safe(pqp, tmp_pqp,
1946                                                         &entry->duplicates,
1947                                                         list) {
1948                                        list_del(&pqp->list);
1949                                        kfree(pqp);
1950                                }
1951                                kfree(entry);
1952                        }
1953                }
1954        }
1955        kfree(priv->steer);
1956}
1957
1958static int extended_func_num(struct pci_dev *pdev)
1959{
1960        return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
1961}
1962
1963#define MLX4_OWNER_BASE 0x8069c
1964#define MLX4_OWNER_SIZE 4
1965
1966static int mlx4_get_ownership(struct mlx4_dev *dev)
1967{
1968        void __iomem *owner;
1969        u32 ret;
1970
1971        if (pci_channel_offline(dev->pdev))
1972                return -EIO;
1973
1974        owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
1975                        MLX4_OWNER_SIZE);
1976        if (!owner) {
1977                mlx4_err(dev, "Failed to obtain ownership bit\n");
1978                return -ENOMEM;
1979        }
1980
1981        ret = readl(owner);
1982        iounmap(owner);
1983        return (int) !!ret;
1984}
1985
1986static void mlx4_free_ownership(struct mlx4_dev *dev)
1987{
1988        void __iomem *owner;
1989
1990        if (pci_channel_offline(dev->pdev))
1991                return;
1992
1993        owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
1994                        MLX4_OWNER_SIZE);
1995        if (!owner) {
1996                mlx4_err(dev, "Failed to obtain ownership bit\n");
1997                return;
1998        }
1999        writel(0, owner);
2000        msleep(1000);
2001        iounmap(owner);
2002}
2003
2004static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
2005{
2006        struct mlx4_priv *priv;
2007        struct mlx4_dev *dev;
2008        int err;
2009        int port;
2010
2011        pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
2012
2013        err = pci_enable_device(pdev);
2014        if (err) {
2015                dev_err(&pdev->dev, "Cannot enable PCI device, "
2016                        "aborting.\n");
2017                return err;
2018        }
2019        if (num_vfs > MLX4_MAX_NUM_VF) {
2020                printk(KERN_ERR "There are more VF's (%d) than allowed(%d)\n",
2021                       num_vfs, MLX4_MAX_NUM_VF);
2022                return -EINVAL;
2023        }
2024        /*
2025         * Check for BARs.
2026         */
2027        if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
2028            !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
2029                dev_err(&pdev->dev, "Missing DCS, aborting."
2030                        "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
2031                        pci_dev_data, pci_resource_flags(pdev, 0));
2032                err = -ENODEV;
2033                goto err_disable_pdev;
2034        }
2035        if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
2036                dev_err(&pdev->dev, "Missing UAR, aborting.\n");
2037                err = -ENODEV;
2038                goto err_disable_pdev;
2039        }
2040
2041        err = pci_request_regions(pdev, DRV_NAME);
2042        if (err) {
2043                dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
2044                goto err_disable_pdev;
2045        }
2046
2047        pci_set_master(pdev);
2048
2049        err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2050        if (err) {
2051                dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
2052                err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2053                if (err) {
2054                        dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
2055                        goto err_release_regions;
2056                }
2057        }
2058        err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2059        if (err) {
2060                dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
2061                         "consistent PCI DMA mask.\n");
2062                err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
2063                if (err) {
2064                        dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
2065                                "aborting.\n");
2066                        goto err_release_regions;
2067                }
2068        }
2069
2070        /* Allow large DMA segments, up to the firmware limit of 1 GB */
2071        dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
2072
2073        priv = kzalloc(sizeof *priv, GFP_KERNEL);
2074        if (!priv) {
2075                dev_err(&pdev->dev, "Device struct alloc failed, "
2076                        "aborting.\n");
2077                err = -ENOMEM;
2078                goto err_release_regions;
2079        }
2080
2081        dev       = &priv->dev;
2082        dev->pdev = pdev;
2083        INIT_LIST_HEAD(&priv->ctx_list);
2084        spin_lock_init(&priv->ctx_lock);
2085
2086        mutex_init(&priv->port_mutex);
2087
2088        INIT_LIST_HEAD(&priv->pgdir_list);
2089        mutex_init(&priv->pgdir_mutex);
2090
2091        INIT_LIST_HEAD(&priv->bf_list);
2092        mutex_init(&priv->bf_mutex);
2093
2094        dev->rev_id = pdev->revision;
2095        /* Detect if this device is a virtual function */
2096        if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
2097                /* When acting as pf, we normally skip vfs unless explicitly
2098                 * requested to probe them. */
2099                if (num_vfs && extended_func_num(pdev) > probe_vf) {
2100                        mlx4_warn(dev, "Skipping virtual function:%d\n",
2101                                                extended_func_num(pdev));
2102                        err = -ENODEV;
2103                        goto err_free_dev;
2104                }
2105                mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
2106                dev->flags |= MLX4_FLAG_SLAVE;
2107        } else {
2108                /* We reset the device and enable SRIOV only for physical
2109                 * devices.  Try to claim ownership on the device;
2110                 * if already taken, skip -- do not allow multiple PFs */
2111                err = mlx4_get_ownership(dev);
2112                if (err) {
2113                        if (err < 0)
2114                                goto err_free_dev;
2115                        else {
2116                                mlx4_warn(dev, "Multiple PFs not yet supported."
2117                                          " Skipping PF.\n");
2118                                err = -EINVAL;
2119                                goto err_free_dev;
2120                        }
2121                }
2122
2123                if (num_vfs) {
2124                        mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", num_vfs);
2125                        err = pci_enable_sriov(pdev, num_vfs);
2126                        if (err) {
2127                                mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n",
2128                                         err);
2129                                err = 0;
2130                        } else {
2131                                mlx4_warn(dev, "Running in master mode\n");
2132                                dev->flags |= MLX4_FLAG_SRIOV |
2133                                              MLX4_FLAG_MASTER;
2134                                dev->num_vfs = num_vfs;
2135                        }
2136                }
2137
2138                /*
2139                 * Now reset the HCA before we touch the PCI capabilities or
2140                 * attempt a firmware command, since a boot ROM may have left
2141                 * the HCA in an undefined state.
2142                 */
2143                err = mlx4_reset(dev);
2144                if (err) {
2145                        mlx4_err(dev, "Failed to reset HCA, aborting.\n");
2146                        goto err_rel_own;
2147                }
2148        }
2149
2150slave_start:
2151        err = mlx4_cmd_init(dev);
2152        if (err) {
2153                mlx4_err(dev, "Failed to init command interface, aborting.\n");
2154                goto err_sriov;
2155        }
2156
2157        /* In slave functions, the communication channel must be initialized
2158         * before posting commands. Also, init num_slaves before calling
2159         * mlx4_init_hca */
2160        if (mlx4_is_mfunc(dev)) {
2161                if (mlx4_is_master(dev))
2162                        dev->num_slaves = MLX4_MAX_NUM_SLAVES;
2163                else {
2164                        dev->num_slaves = 0;
2165                        if (mlx4_multi_func_init(dev)) {
2166                                mlx4_err(dev, "Failed to init slave mfunc"
2167                                         " interface, aborting.\n");
2168                                goto err_cmd;
2169                        }
2170                }
2171        }
2172
2173        err = mlx4_init_hca(dev);
2174        if (err) {
2175                if (err == -EACCES) {
2176                        /* Not primary Physical function
2177                         * Running in slave mode */
2178                        mlx4_cmd_cleanup(dev);
2179                        dev->flags |= MLX4_FLAG_SLAVE;
2180                        dev->flags &= ~MLX4_FLAG_MASTER;
2181                        goto slave_start;
2182                } else
2183                        goto err_mfunc;
2184        }
2185
2186        /* In master functions, the communication channel must be initialized
2187         * after obtaining its address from fw */
2188        if (mlx4_is_master(dev)) {
2189                if (mlx4_multi_func_init(dev)) {
2190                        mlx4_err(dev, "Failed to init master mfunc"
2191                                 "interface, aborting.\n");
2192                        goto err_close;
2193                }
2194        }
2195
2196        err = mlx4_alloc_eq_table(dev);
2197        if (err)
2198                goto err_master_mfunc;
2199
2200        priv->msix_ctl.pool_bm = 0;
2201        mutex_init(&priv->msix_ctl.pool_lock);
2202
2203        mlx4_enable_msi_x(dev);
2204        if ((mlx4_is_mfunc(dev)) &&
2205            !(dev->flags & MLX4_FLAG_MSI_X)) {
2206                mlx4_err(dev, "INTx is not supported in multi-function mode."
2207                         " aborting.\n");
2208                goto err_free_eq;
2209        }
2210
2211        if (!mlx4_is_slave(dev)) {
2212                err = mlx4_init_steering(dev);
2213                if (err)
2214                        goto err_free_eq;
2215        }
2216
2217        err = mlx4_setup_hca(dev);
2218        if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
2219            !mlx4_is_mfunc(dev)) {
2220                dev->flags &= ~MLX4_FLAG_MSI_X;
2221                dev->caps.num_comp_vectors = 1;
2222                dev->caps.comp_pool        = 0;
2223                pci_disable_msix(pdev);
2224                err = mlx4_setup_hca(dev);
2225        }
2226
2227        if (err)
2228                goto err_steer;
2229
2230        for (port = 1; port <= dev->caps.num_ports; port++) {
2231                err = mlx4_init_port_info(dev, port);
2232                if (err)
2233                        goto err_port;
2234        }
2235
2236        err = mlx4_register_device(dev);
2237        if (err)
2238                goto err_port;
2239
2240        mlx4_sense_init(dev);
2241        mlx4_start_sense(dev);
2242
2243        priv->pci_dev_data = pci_dev_data;
2244        pci_set_drvdata(pdev, dev);
2245
2246        return 0;
2247
2248err_port:
2249        for (--port; port >= 1; --port)
2250                mlx4_cleanup_port_info(&priv->port[port]);
2251
2252        mlx4_cleanup_counters_table(dev);
2253        mlx4_cleanup_mcg_table(dev);
2254        mlx4_cleanup_qp_table(dev);
2255        mlx4_cleanup_srq_table(dev);
2256        mlx4_cleanup_cq_table(dev);
2257        mlx4_cmd_use_polling(dev);
2258        mlx4_cleanup_eq_table(dev);
2259        mlx4_cleanup_mr_table(dev);
2260        mlx4_cleanup_xrcd_table(dev);
2261        mlx4_cleanup_pd_table(dev);
2262        mlx4_cleanup_uar_table(dev);
2263
2264err_steer:
2265        if (!mlx4_is_slave(dev))
2266                mlx4_clear_steering(dev);
2267
2268err_free_eq:
2269        mlx4_free_eq_table(dev);
2270
2271err_master_mfunc:
2272        if (mlx4_is_master(dev))
2273                mlx4_multi_func_cleanup(dev);
2274
2275err_close:
2276        if (dev->flags & MLX4_FLAG_MSI_X)
2277                pci_disable_msix(pdev);
2278
2279        mlx4_close_hca(dev);
2280
2281err_mfunc:
2282        if (mlx4_is_slave(dev))
2283                mlx4_multi_func_cleanup(dev);
2284
2285err_cmd:
2286        mlx4_cmd_cleanup(dev);
2287
2288err_sriov:
2289        if (dev->flags & MLX4_FLAG_SRIOV)
2290                pci_disable_sriov(pdev);
2291
2292err_rel_own:
2293        if (!mlx4_is_slave(dev))
2294                mlx4_free_ownership(dev);
2295
2296err_free_dev:
2297        kfree(priv);
2298
2299err_release_regions:
2300        pci_release_regions(pdev);
2301
2302err_disable_pdev:
2303        pci_disable_device(pdev);
2304        pci_set_drvdata(pdev, NULL);
2305        return err;
2306}
2307
2308static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
2309{
2310        printk_once(KERN_INFO "%s", mlx4_version);
2311
2312        return __mlx4_init_one(pdev, id->driver_data);
2313}
2314
2315static void mlx4_remove_one(struct pci_dev *pdev)
2316{
2317        struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
2318        struct mlx4_priv *priv = mlx4_priv(dev);
2319        int p;
2320
2321        if (dev) {
2322                /* in SRIOV it is not allowed to unload the pf's
2323                 * driver while there are alive vf's */
2324                if (mlx4_is_master(dev)) {
2325                        if (mlx4_how_many_lives_vf(dev))
2326                                printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n");
2327                }
2328                mlx4_stop_sense(dev);
2329                mlx4_unregister_device(dev);
2330
2331                for (p = 1; p <= dev->caps.num_ports; p++) {
2332                        mlx4_cleanup_port_info(&priv->port[p]);
2333                        mlx4_CLOSE_PORT(dev, p);
2334                }
2335
2336                if (mlx4_is_master(dev))
2337                        mlx4_free_resource_tracker(dev,
2338                                                   RES_TR_FREE_SLAVES_ONLY);
2339
2340                mlx4_cleanup_counters_table(dev);
2341                mlx4_cleanup_mcg_table(dev);
2342                mlx4_cleanup_qp_table(dev);
2343                mlx4_cleanup_srq_table(dev);
2344                mlx4_cleanup_cq_table(dev);
2345                mlx4_cmd_use_polling(dev);
2346                mlx4_cleanup_eq_table(dev);
2347                mlx4_cleanup_mr_table(dev);
2348                mlx4_cleanup_xrcd_table(dev);
2349                mlx4_cleanup_pd_table(dev);
2350
2351                if (mlx4_is_master(dev))
2352                        mlx4_free_resource_tracker(dev,
2353                                                   RES_TR_FREE_STRUCTS_ONLY);
2354
2355                iounmap(priv->kar);
2356                mlx4_uar_free(dev, &priv->driver_uar);
2357                mlx4_cleanup_uar_table(dev);
2358                if (!mlx4_is_slave(dev))
2359                        mlx4_clear_steering(dev);
2360                mlx4_free_eq_table(dev);
2361                if (mlx4_is_master(dev))
2362                        mlx4_multi_func_cleanup(dev);
2363                mlx4_close_hca(dev);
2364                if (mlx4_is_slave(dev))
2365                        mlx4_multi_func_cleanup(dev);
2366                mlx4_cmd_cleanup(dev);
2367
2368                if (dev->flags & MLX4_FLAG_MSI_X)
2369                        pci_disable_msix(pdev);
2370                if (dev->flags & MLX4_FLAG_SRIOV) {
2371                        mlx4_warn(dev, "Disabling SR-IOV\n");
2372                        pci_disable_sriov(pdev);
2373                }
2374
2375                if (!mlx4_is_slave(dev))
2376                        mlx4_free_ownership(dev);
2377
2378                kfree(dev->caps.qp0_tunnel);
2379                kfree(dev->caps.qp0_proxy);
2380                kfree(dev->caps.qp1_tunnel);
2381                kfree(dev->caps.qp1_proxy);
2382
2383                kfree(priv);
2384                pci_release_regions(pdev);
2385                pci_disable_device(pdev);
2386                pci_set_drvdata(pdev, NULL);
2387        }
2388}
2389
2390int mlx4_restart_one(struct pci_dev *pdev)
2391{
2392        struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
2393        struct mlx4_priv *priv = mlx4_priv(dev);
2394        int               pci_dev_data;
2395
2396        pci_dev_data = priv->pci_dev_data;
2397        mlx4_remove_one(pdev);
2398        return __mlx4_init_one(pdev, pci_dev_data);
2399}
2400
2401static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = {
2402        /* MT25408 "Hermon" SDR */
2403        { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2404        /* MT25408 "Hermon" DDR */
2405        { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2406        /* MT25408 "Hermon" QDR */
2407        { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2408        /* MT25408 "Hermon" DDR PCIe gen2 */
2409        { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2410        /* MT25408 "Hermon" QDR PCIe gen2 */
2411        { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2412        /* MT25408 "Hermon" EN 10GigE */
2413        { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2414        /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
2415        { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2416        /* MT25458 ConnectX EN 10GBASE-T 10GigE */
2417        { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2418        /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
2419        { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2420        /* MT26468 ConnectX EN 10GigE PCIe gen2*/
2421        { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2422        /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
2423        { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2424        /* MT26478 ConnectX2 40GigE PCIe gen2 */
2425        { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT },
2426        /* MT25400 Family [ConnectX-2 Virtual Function] */
2427        { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF },
2428        /* MT27500 Family [ConnectX-3] */
2429        { PCI_VDEVICE(MELLANOX, 0x1003), 0 },
2430        /* MT27500 Family [ConnectX-3 Virtual Function] */
2431        { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF },
2432        { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */
2433        { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */
2434        { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */
2435        { PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */
2436        { PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */
2437        { PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */
2438        { PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */
2439        { PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */
2440        { PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */
2441        { PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */
2442        { PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */
2443        { PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */
2444        { 0, }
2445};
2446
2447MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
2448
2449static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
2450                                              pci_channel_state_t state)
2451{
2452        mlx4_remove_one(pdev);
2453
2454        return state == pci_channel_io_perm_failure ?
2455                PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
2456}
2457
2458static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
2459{
2460        int ret = __mlx4_init_one(pdev, 0);
2461
2462        return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
2463}
2464
2465static const struct pci_error_handlers mlx4_err_handler = {
2466        .error_detected = mlx4_pci_err_detected,
2467        .slot_reset     = mlx4_pci_slot_reset,
2468};
2469
2470static struct pci_driver mlx4_driver = {
2471        .name           = DRV_NAME,
2472        .id_table       = mlx4_pci_table,
2473        .probe          = mlx4_init_one,
2474        .remove         = mlx4_remove_one,
2475        .err_handler    = &mlx4_err_handler,
2476};
2477
2478static int __init mlx4_verify_params(void)
2479{
2480        if ((log_num_mac < 0) || (log_num_mac > 7)) {
2481                pr_warning("mlx4_core: bad num_mac: %d\n", log_num_mac);
2482                return -1;
2483        }
2484
2485        if (log_num_vlan != 0)
2486                pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
2487                           MLX4_LOG_NUM_VLANS);
2488
2489        if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
2490                pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
2491                return -1;
2492        }
2493
2494        /* Check if module param for ports type has legal combination */
2495        if (port_type_array[0] == false && port_type_array[1] == true) {
2496                printk(KERN_WARNING "Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
2497                port_type_array[0] = true;
2498        }
2499
2500        if (mlx4_log_num_mgm_entry_size != -1 &&
2501            (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
2502             mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) {
2503                pr_warning("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not "
2504                           "in legal range (-1 or %d..%d)\n",
2505                           mlx4_log_num_mgm_entry_size,
2506                           MLX4_MIN_MGM_LOG_ENTRY_SIZE,
2507                           MLX4_MAX_MGM_LOG_ENTRY_SIZE);
2508                return -1;
2509        }
2510
2511        return 0;
2512}
2513
2514static int __init mlx4_init(void)
2515{
2516        int ret;
2517
2518        if (mlx4_verify_params())
2519                return -EINVAL;
2520
2521        mlx4_catas_init();
2522
2523        mlx4_wq = create_singlethread_workqueue("mlx4");
2524        if (!mlx4_wq)
2525                return -ENOMEM;
2526
2527        ret = pci_register_driver(&mlx4_driver);
2528        return ret < 0 ? ret : 0;
2529}
2530
2531static void __exit mlx4_cleanup(void)
2532{
2533        pci_unregister_driver(&mlx4_driver);
2534        destroy_workqueue(mlx4_wq);
2535}
2536
2537module_init(mlx4_init);
2538module_exit(mlx4_cleanup);
2539