linux/drivers/infiniband/hw/bnxt_re/main.c
<<
>>
Prefs
   1/*
   2 * Broadcom NetXtreme-E RoCE driver.
   3 *
   4 * Copyright (c) 2016 - 2017, Broadcom. All rights reserved.  The term
   5 * Broadcom refers to Broadcom Limited and/or its subsidiaries.
   6 *
   7 * This software is available to you under a choice of one of two
   8 * licenses.  You may choose to be licensed under the terms of the GNU
   9 * General Public License (GPL) Version 2, available from the file
  10 * COPYING in the main directory of this source tree, or the
  11 * BSD license below:
  12 *
  13 * Redistribution and use in source and binary forms, with or without
  14 * modification, are permitted provided that the following conditions
  15 * are met:
  16 *
  17 * 1. Redistributions of source code must retain the above copyright
  18 *    notice, this list of conditions and the following disclaimer.
  19 * 2. Redistributions in binary form must reproduce the above copyright
  20 *    notice, this list of conditions and the following disclaimer in
  21 *    the documentation and/or other materials provided with the
  22 *    distribution.
  23 *
  24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
  25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  26 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
  28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  31 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  32 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  33 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  34 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35 *
  36 * Description: Main component of the bnxt_re driver
  37 */
  38
  39#include <linux/module.h>
  40#include <linux/netdevice.h>
  41#include <linux/ethtool.h>
  42#include <linux/mutex.h>
  43#include <linux/list.h>
  44#include <linux/rculist.h>
  45#include <linux/spinlock.h>
  46#include <linux/pci.h>
  47#include <net/dcbnl.h>
  48#include <net/ipv6.h>
  49#include <net/addrconf.h>
  50#include <linux/if_ether.h>
  51
  52#include <rdma/ib_verbs.h>
  53#include <rdma/ib_user_verbs.h>
  54#include <rdma/ib_umem.h>
  55#include <rdma/ib_addr.h>
  56
  57#include "bnxt_ulp.h"
  58#include "roce_hsi.h"
  59#include "qplib_res.h"
  60#include "qplib_sp.h"
  61#include "qplib_fp.h"
  62#include "qplib_rcfw.h"
  63#include "bnxt_re.h"
  64#include "ib_verbs.h"
  65#include <rdma/bnxt_re-abi.h>
  66#include "bnxt.h"
  67#include "hw_counters.h"
  68
  69static char version[] =
  70                BNXT_RE_DESC "\n";
  71
  72MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
  73MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
  74MODULE_LICENSE("Dual BSD/GPL");
  75
  76/* globals */
  77static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list);
  78/* Mutex to protect the list of bnxt_re devices added */
  79static DEFINE_MUTEX(bnxt_re_dev_lock);
  80static struct workqueue_struct *bnxt_re_wq;
  81static void bnxt_re_remove_device(struct bnxt_re_dev *rdev);
  82static void bnxt_re_dealloc_driver(struct ib_device *ib_dev);
  83static void bnxt_re_stop_irq(void *handle);
  84static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev);
  85
  86static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode)
  87{
  88        struct bnxt_qplib_chip_ctx *cctx;
  89
  90        cctx = rdev->chip_ctx;
  91        cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
  92                               mode : BNXT_QPLIB_WQE_MODE_STATIC;
  93}
  94
  95static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
  96{
  97        struct bnxt_qplib_chip_ctx *chip_ctx;
  98
  99        if (!rdev->chip_ctx)
 100                return;
 101        chip_ctx = rdev->chip_ctx;
 102        rdev->chip_ctx = NULL;
 103        rdev->rcfw.res = NULL;
 104        rdev->qplib_res.cctx = NULL;
 105        rdev->qplib_res.pdev = NULL;
 106        rdev->qplib_res.netdev = NULL;
 107        kfree(chip_ctx);
 108}
 109
 110static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode)
 111{
 112        struct bnxt_qplib_chip_ctx *chip_ctx;
 113        struct bnxt_en_dev *en_dev;
 114        struct bnxt *bp;
 115
 116        en_dev = rdev->en_dev;
 117        bp = netdev_priv(en_dev->net);
 118
 119        chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL);
 120        if (!chip_ctx)
 121                return -ENOMEM;
 122        chip_ctx->chip_num = bp->chip_num;
 123        chip_ctx->hw_stats_size = bp->hw_ring_stats_size;
 124
 125        rdev->chip_ctx = chip_ctx;
 126        /* rest members to follow eventually */
 127
 128        rdev->qplib_res.cctx = rdev->chip_ctx;
 129        rdev->rcfw.res = &rdev->qplib_res;
 130
 131        bnxt_re_set_drv_mode(rdev, wqe_mode);
 132        if (bnxt_qplib_determine_atomics(en_dev->pdev))
 133                ibdev_info(&rdev->ibdev,
 134                           "platform doesn't support global atomics.");
 135        return 0;
 136}
 137
 138/* SR-IOV helper functions */
 139
 140static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev)
 141{
 142        struct bnxt *bp;
 143
 144        bp = netdev_priv(rdev->en_dev->net);
 145        if (BNXT_VF(bp))
 146                rdev->is_virtfn = 1;
 147}
 148
 149/* Set the maximum number of each resource that the driver actually wants
 150 * to allocate. This may be up to the maximum number the firmware has
 151 * reserved for the function. The driver may choose to allocate fewer
 152 * resources than the firmware maximum.
 153 */
 154static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev)
 155{
 156        struct bnxt_qplib_dev_attr *attr;
 157        struct bnxt_qplib_ctx *ctx;
 158        int i;
 159
 160        attr = &rdev->dev_attr;
 161        ctx = &rdev->qplib_ctx;
 162
 163        ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT,
 164                               attr->max_qp);
 165        ctx->mrw_count = BNXT_RE_MAX_MRW_COUNT_256K;
 166        /* Use max_mr from fw since max_mrw does not get set */
 167        ctx->mrw_count = min_t(u32, ctx->mrw_count, attr->max_mr);
 168        ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT,
 169                                attr->max_srq);
 170        ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq);
 171        if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx))
 172                for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
 173                        rdev->qplib_ctx.tqm_ctx.qcount[i] =
 174                        rdev->dev_attr.tqm_alloc_reqs[i];
 175}
 176
 177static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf)
 178{
 179        struct bnxt_qplib_vf_res *vf_res;
 180        u32 mrws = 0;
 181        u32 vf_pct;
 182        u32 nvfs;
 183
 184        vf_res = &qplib_ctx->vf_res;
 185        /*
 186         * Reserve a set of resources for the PF. Divide the remaining
 187         * resources among the VFs
 188         */
 189        vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF;
 190        nvfs = num_vf;
 191        num_vf = 100 * num_vf;
 192        vf_res->max_qp_per_vf = (qplib_ctx->qpc_count * vf_pct) / num_vf;
 193        vf_res->max_srq_per_vf = (qplib_ctx->srqc_count * vf_pct) / num_vf;
 194        vf_res->max_cq_per_vf = (qplib_ctx->cq_count * vf_pct) / num_vf;
 195        /*
 196         * The driver allows many more MRs than other resources. If the
 197         * firmware does also, then reserve a fixed amount for the PF and
 198         * divide the rest among VFs. VFs may use many MRs for NFS
 199         * mounts, ISER, NVME applications, etc. If the firmware severely
 200         * restricts the number of MRs, then let PF have half and divide
 201         * the rest among VFs, as for the other resource types.
 202         */
 203        if (qplib_ctx->mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) {
 204                mrws = qplib_ctx->mrw_count * vf_pct;
 205                nvfs = num_vf;
 206        } else {
 207                mrws = qplib_ctx->mrw_count - BNXT_RE_RESVD_MR_FOR_PF;
 208        }
 209        vf_res->max_mrw_per_vf = (mrws / nvfs);
 210        vf_res->max_gid_per_vf = BNXT_RE_MAX_GID_PER_VF;
 211}
 212
 213static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
 214{
 215        u32 num_vfs;
 216
 217        memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res));
 218        bnxt_re_limit_pf_res(rdev);
 219
 220        num_vfs =  bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
 221                        BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs;
 222        if (num_vfs)
 223                bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs);
 224}
 225
 226/* for handling bnxt_en callbacks later */
 227static void bnxt_re_stop(void *p)
 228{
 229        struct bnxt_re_dev *rdev = p;
 230        struct bnxt *bp;
 231
 232        if (!rdev)
 233                return;
 234        ASSERT_RTNL();
 235
 236        /* L2 driver invokes this callback during device error/crash or device
 237         * reset. Current RoCE driver doesn't recover the device in case of
 238         * error. Handle the error by dispatching fatal events to all qps
 239         * ie. by calling bnxt_re_dev_stop and release the MSIx vectors as
 240         * L2 driver want to modify the MSIx table.
 241         */
 242        bp = netdev_priv(rdev->netdev);
 243
 244        ibdev_info(&rdev->ibdev, "Handle device stop call from L2 driver");
 245        /* Check the current device state from L2 structure and move the
 246         * device to detached state if FW_FATAL_COND is set.
 247         * This prevents more commands to HW during clean-up,
 248         * in case the device is already in error.
 249         */
 250        if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
 251                set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
 252
 253        bnxt_re_dev_stop(rdev);
 254        bnxt_re_stop_irq(rdev);
 255        /* Move the device states to detached and  avoid sending any more
 256         * commands to HW
 257         */
 258        set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
 259        set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
 260}
 261
 262static void bnxt_re_start(void *p)
 263{
 264}
 265
 266static void bnxt_re_sriov_config(void *p, int num_vfs)
 267{
 268        struct bnxt_re_dev *rdev = p;
 269
 270        if (!rdev)
 271                return;
 272
 273        if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
 274                return;
 275        rdev->num_vfs = num_vfs;
 276        if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
 277                bnxt_re_set_resource_limits(rdev);
 278                bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
 279                                              &rdev->qplib_ctx);
 280        }
 281}
 282
 283static void bnxt_re_shutdown(void *p)
 284{
 285        struct bnxt_re_dev *rdev = p;
 286
 287        if (!rdev)
 288                return;
 289        ASSERT_RTNL();
 290        /* Release the MSIx vectors before queuing unregister */
 291        bnxt_re_stop_irq(rdev);
 292        ib_unregister_device_queued(&rdev->ibdev);
 293}
 294
 295static void bnxt_re_stop_irq(void *handle)
 296{
 297        struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle;
 298        struct bnxt_qplib_rcfw *rcfw = &rdev->rcfw;
 299        struct bnxt_qplib_nq *nq;
 300        int indx;
 301
 302        for (indx = BNXT_RE_NQ_IDX; indx < rdev->num_msix; indx++) {
 303                nq = &rdev->nq[indx - 1];
 304                bnxt_qplib_nq_stop_irq(nq, false);
 305        }
 306
 307        bnxt_qplib_rcfw_stop_irq(rcfw, false);
 308}
 309
 310static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
 311{
 312        struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle;
 313        struct bnxt_msix_entry *msix_ent = rdev->msix_entries;
 314        struct bnxt_qplib_rcfw *rcfw = &rdev->rcfw;
 315        struct bnxt_qplib_nq *nq;
 316        int indx, rc;
 317
 318        if (!ent) {
 319                /* Not setting the f/w timeout bit in rcfw.
 320                 * During the driver unload the first command
 321                 * to f/w will timeout and that will set the
 322                 * timeout bit.
 323                 */
 324                ibdev_err(&rdev->ibdev, "Failed to re-start IRQs\n");
 325                return;
 326        }
 327
 328        /* Vectors may change after restart, so update with new vectors
 329         * in device sctructure.
 330         */
 331        for (indx = 0; indx < rdev->num_msix; indx++)
 332                rdev->msix_entries[indx].vector = ent[indx].vector;
 333
 334        bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector,
 335                                  false);
 336        for (indx = BNXT_RE_NQ_IDX ; indx < rdev->num_msix; indx++) {
 337                nq = &rdev->nq[indx - 1];
 338                rc = bnxt_qplib_nq_start_irq(nq, indx - 1,
 339                                             msix_ent[indx].vector, false);
 340                if (rc)
 341                        ibdev_warn(&rdev->ibdev, "Failed to reinit NQ index %d\n",
 342                                   indx - 1);
 343        }
 344}
 345
 346static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
 347        .ulp_async_notifier = NULL,
 348        .ulp_stop = bnxt_re_stop,
 349        .ulp_start = bnxt_re_start,
 350        .ulp_sriov_config = bnxt_re_sriov_config,
 351        .ulp_shutdown = bnxt_re_shutdown,
 352        .ulp_irq_stop = bnxt_re_stop_irq,
 353        .ulp_irq_restart = bnxt_re_start_irq
 354};
 355
 356/* RoCE -> Net driver */
 357
 358/* Driver registration routines used to let the networking driver (bnxt_en)
 359 * to know that the RoCE driver is now installed
 360 */
 361static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev)
 362{
 363        struct bnxt_en_dev *en_dev;
 364        int rc;
 365
 366        if (!rdev)
 367                return -EINVAL;
 368
 369        en_dev = rdev->en_dev;
 370
 371        rc = en_dev->en_ops->bnxt_unregister_device(rdev->en_dev,
 372                                                    BNXT_ROCE_ULP);
 373        return rc;
 374}
 375
 376static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
 377{
 378        struct bnxt_en_dev *en_dev;
 379        int rc = 0;
 380
 381        if (!rdev)
 382                return -EINVAL;
 383
 384        en_dev = rdev->en_dev;
 385
 386        rc = en_dev->en_ops->bnxt_register_device(en_dev, BNXT_ROCE_ULP,
 387                                                  &bnxt_re_ulp_ops, rdev);
 388        rdev->qplib_res.pdev = rdev->en_dev->pdev;
 389        return rc;
 390}
 391
 392static int bnxt_re_free_msix(struct bnxt_re_dev *rdev)
 393{
 394        struct bnxt_en_dev *en_dev;
 395        int rc;
 396
 397        if (!rdev)
 398                return -EINVAL;
 399
 400        en_dev = rdev->en_dev;
 401
 402
 403        rc = en_dev->en_ops->bnxt_free_msix(rdev->en_dev, BNXT_ROCE_ULP);
 404
 405        return rc;
 406}
 407
 408static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
 409{
 410        int rc = 0, num_msix_want = BNXT_RE_MAX_MSIX, num_msix_got;
 411        struct bnxt_en_dev *en_dev;
 412
 413        if (!rdev)
 414                return -EINVAL;
 415
 416        en_dev = rdev->en_dev;
 417
 418        num_msix_want = min_t(u32, BNXT_RE_MAX_MSIX, num_online_cpus());
 419
 420        num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP,
 421                                                         rdev->msix_entries,
 422                                                         num_msix_want);
 423        if (num_msix_got < BNXT_RE_MIN_MSIX) {
 424                rc = -EINVAL;
 425                goto done;
 426        }
 427        if (num_msix_got != num_msix_want) {
 428                ibdev_warn(&rdev->ibdev,
 429                           "Requested %d MSI-X vectors, got %d\n",
 430                           num_msix_want, num_msix_got);
 431        }
 432        rdev->num_msix = num_msix_got;
 433done:
 434        return rc;
 435}
 436
 437static void bnxt_re_init_hwrm_hdr(struct bnxt_re_dev *rdev, struct input *hdr,
 438                                  u16 opcd, u16 crid, u16 trid)
 439{
 440        hdr->req_type = cpu_to_le16(opcd);
 441        hdr->cmpl_ring = cpu_to_le16(crid);
 442        hdr->target_id = cpu_to_le16(trid);
 443}
 444
 445static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
 446                                int msg_len, void *resp, int resp_max_len,
 447                                int timeout)
 448{
 449        fw_msg->msg = msg;
 450        fw_msg->msg_len = msg_len;
 451        fw_msg->resp = resp;
 452        fw_msg->resp_max_len = resp_max_len;
 453        fw_msg->timeout = timeout;
 454}
 455
 456static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev,
 457                                 u16 fw_ring_id, int type)
 458{
 459        struct bnxt_en_dev *en_dev = rdev->en_dev;
 460        struct hwrm_ring_free_input req = {0};
 461        struct hwrm_ring_free_output resp;
 462        struct bnxt_fw_msg fw_msg;
 463        int rc = -EINVAL;
 464
 465        if (!en_dev)
 466                return rc;
 467
 468        if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
 469                return 0;
 470
 471        memset(&fw_msg, 0, sizeof(fw_msg));
 472
 473        bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1);
 474        req.ring_type = type;
 475        req.ring_id = cpu_to_le16(fw_ring_id);
 476        bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
 477                            sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
 478        rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
 479        if (rc)
 480                ibdev_err(&rdev->ibdev, "Failed to free HW ring:%d :%#x",
 481                          req.ring_id, rc);
 482        return rc;
 483}
 484
 485static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev,
 486                                  struct bnxt_re_ring_attr *ring_attr,
 487                                  u16 *fw_ring_id)
 488{
 489        struct bnxt_en_dev *en_dev = rdev->en_dev;
 490        struct hwrm_ring_alloc_input req = {0};
 491        struct hwrm_ring_alloc_output resp;
 492        struct bnxt_fw_msg fw_msg;
 493        int rc = -EINVAL;
 494
 495        if (!en_dev)
 496                return rc;
 497
 498        memset(&fw_msg, 0, sizeof(fw_msg));
 499        bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1);
 500        req.enables = 0;
 501        req.page_tbl_addr =  cpu_to_le64(ring_attr->dma_arr[0]);
 502        if (ring_attr->pages > 1) {
 503                /* Page size is in log2 units */
 504                req.page_size = BNXT_PAGE_SHIFT;
 505                req.page_tbl_depth = 1;
 506        }
 507        req.fbo = 0;
 508        /* Association of ring index with doorbell index and MSIX number */
 509        req.logical_id = cpu_to_le16(ring_attr->lrid);
 510        req.length = cpu_to_le32(ring_attr->depth + 1);
 511        req.ring_type = ring_attr->type;
 512        req.int_mode = ring_attr->mode;
 513        bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
 514                            sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
 515        rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
 516        if (!rc)
 517                *fw_ring_id = le16_to_cpu(resp.ring_id);
 518
 519        return rc;
 520}
 521
 522static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
 523                                      u32 fw_stats_ctx_id)
 524{
 525        struct bnxt_en_dev *en_dev = rdev->en_dev;
 526        struct hwrm_stat_ctx_free_input req = {0};
 527        struct bnxt_fw_msg fw_msg;
 528        int rc = -EINVAL;
 529
 530        if (!en_dev)
 531                return rc;
 532
 533        if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
 534                return 0;
 535
 536        memset(&fw_msg, 0, sizeof(fw_msg));
 537
 538        bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1);
 539        req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
 540        bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&req,
 541                            sizeof(req), DFLT_HWRM_CMD_TIMEOUT);
 542        rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
 543        if (rc)
 544                ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x",
 545                          rc);
 546
 547        return rc;
 548}
 549
 550static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
 551                                       dma_addr_t dma_map,
 552                                       u32 *fw_stats_ctx_id)
 553{
 554        struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx;
 555        struct hwrm_stat_ctx_alloc_output resp = {0};
 556        struct hwrm_stat_ctx_alloc_input req = {0};
 557        struct bnxt_en_dev *en_dev = rdev->en_dev;
 558        struct bnxt_fw_msg fw_msg;
 559        int rc = -EINVAL;
 560
 561        *fw_stats_ctx_id = INVALID_STATS_CTX_ID;
 562
 563        if (!en_dev)
 564                return rc;
 565
 566        memset(&fw_msg, 0, sizeof(fw_msg));
 567
 568        bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
 569        req.update_period_ms = cpu_to_le32(1000);
 570        req.stats_dma_addr = cpu_to_le64(dma_map);
 571        req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size);
 572        req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
 573        bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
 574                            sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
 575        rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
 576        if (!rc)
 577                *fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id);
 578
 579        return rc;
 580}
 581
 582/* Device */
 583
 584static bool is_bnxt_re_dev(struct net_device *netdev)
 585{
 586        struct ethtool_drvinfo drvinfo;
 587
 588        if (netdev->ethtool_ops && netdev->ethtool_ops->get_drvinfo) {
 589                memset(&drvinfo, 0, sizeof(drvinfo));
 590                netdev->ethtool_ops->get_drvinfo(netdev, &drvinfo);
 591
 592                if (strcmp(drvinfo.driver, "bnxt_en"))
 593                        return false;
 594                return true;
 595        }
 596        return false;
 597}
 598
 599static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
 600{
 601        struct ib_device *ibdev =
 602                ib_device_get_by_netdev(netdev, RDMA_DRIVER_BNXT_RE);
 603        if (!ibdev)
 604                return NULL;
 605
 606        return container_of(ibdev, struct bnxt_re_dev, ibdev);
 607}
 608
 609static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
 610{
 611        struct bnxt_en_dev *en_dev;
 612        struct pci_dev *pdev;
 613
 614        en_dev = bnxt_ulp_probe(netdev);
 615        if (IS_ERR(en_dev))
 616                return en_dev;
 617
 618        pdev = en_dev->pdev;
 619        if (!pdev)
 620                return ERR_PTR(-EINVAL);
 621
 622        if (!(en_dev->flags & BNXT_EN_FLAG_ROCE_CAP)) {
 623                dev_info(&pdev->dev,
 624                        "%s: probe error: RoCE is not supported on this device",
 625                        ROCE_DRV_MODULE_NAME);
 626                return ERR_PTR(-ENODEV);
 627        }
 628
 629        dev_hold(netdev);
 630
 631        return en_dev;
 632}
 633
 634static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
 635                           char *buf)
 636{
 637        struct bnxt_re_dev *rdev =
 638                rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
 639
 640        return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->vendor);
 641}
 642static DEVICE_ATTR_RO(hw_rev);
 643
 644static ssize_t hca_type_show(struct device *device,
 645                             struct device_attribute *attr, char *buf)
 646{
 647        struct bnxt_re_dev *rdev =
 648                rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
 649
 650        return sysfs_emit(buf, "%s\n", rdev->ibdev.node_desc);
 651}
 652static DEVICE_ATTR_RO(hca_type);
 653
 654static struct attribute *bnxt_re_attributes[] = {
 655        &dev_attr_hw_rev.attr,
 656        &dev_attr_hca_type.attr,
 657        NULL
 658};
 659
 660static const struct attribute_group bnxt_re_dev_attr_group = {
 661        .attrs = bnxt_re_attributes,
 662};
 663
 664static const struct ib_device_ops bnxt_re_dev_ops = {
 665        .owner = THIS_MODULE,
 666        .driver_id = RDMA_DRIVER_BNXT_RE,
 667        .uverbs_abi_ver = BNXT_RE_ABI_VERSION,
 668
 669        .add_gid = bnxt_re_add_gid,
 670        .alloc_hw_port_stats = bnxt_re_ib_alloc_hw_port_stats,
 671        .alloc_mr = bnxt_re_alloc_mr,
 672        .alloc_pd = bnxt_re_alloc_pd,
 673        .alloc_ucontext = bnxt_re_alloc_ucontext,
 674        .create_ah = bnxt_re_create_ah,
 675        .create_cq = bnxt_re_create_cq,
 676        .create_qp = bnxt_re_create_qp,
 677        .create_srq = bnxt_re_create_srq,
 678        .create_user_ah = bnxt_re_create_ah,
 679        .dealloc_driver = bnxt_re_dealloc_driver,
 680        .dealloc_pd = bnxt_re_dealloc_pd,
 681        .dealloc_ucontext = bnxt_re_dealloc_ucontext,
 682        .del_gid = bnxt_re_del_gid,
 683        .dereg_mr = bnxt_re_dereg_mr,
 684        .destroy_ah = bnxt_re_destroy_ah,
 685        .destroy_cq = bnxt_re_destroy_cq,
 686        .destroy_qp = bnxt_re_destroy_qp,
 687        .destroy_srq = bnxt_re_destroy_srq,
 688        .device_group = &bnxt_re_dev_attr_group,
 689        .get_dev_fw_str = bnxt_re_query_fw_str,
 690        .get_dma_mr = bnxt_re_get_dma_mr,
 691        .get_hw_stats = bnxt_re_ib_get_hw_stats,
 692        .get_link_layer = bnxt_re_get_link_layer,
 693        .get_port_immutable = bnxt_re_get_port_immutable,
 694        .map_mr_sg = bnxt_re_map_mr_sg,
 695        .mmap = bnxt_re_mmap,
 696        .modify_ah = bnxt_re_modify_ah,
 697        .modify_qp = bnxt_re_modify_qp,
 698        .modify_srq = bnxt_re_modify_srq,
 699        .poll_cq = bnxt_re_poll_cq,
 700        .post_recv = bnxt_re_post_recv,
 701        .post_send = bnxt_re_post_send,
 702        .post_srq_recv = bnxt_re_post_srq_recv,
 703        .query_ah = bnxt_re_query_ah,
 704        .query_device = bnxt_re_query_device,
 705        .query_pkey = bnxt_re_query_pkey,
 706        .query_port = bnxt_re_query_port,
 707        .query_qp = bnxt_re_query_qp,
 708        .query_srq = bnxt_re_query_srq,
 709        .reg_user_mr = bnxt_re_reg_user_mr,
 710        .req_notify_cq = bnxt_re_req_notify_cq,
 711        INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah),
 712        INIT_RDMA_OBJ_SIZE(ib_cq, bnxt_re_cq, ib_cq),
 713        INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd),
 714        INIT_RDMA_OBJ_SIZE(ib_qp, bnxt_re_qp, ib_qp),
 715        INIT_RDMA_OBJ_SIZE(ib_srq, bnxt_re_srq, ib_srq),
 716        INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx),
 717};
 718
 719static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 720{
 721        struct ib_device *ibdev = &rdev->ibdev;
 722        int ret;
 723
 724        /* ib device init */
 725        ibdev->node_type = RDMA_NODE_IB_CA;
 726        strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
 727                strlen(BNXT_RE_DESC) + 5);
 728        ibdev->phys_port_cnt = 1;
 729
 730        bnxt_qplib_get_guid(rdev->netdev->dev_addr, (u8 *)&ibdev->node_guid);
 731
 732        ibdev->num_comp_vectors = rdev->num_msix - 1;
 733        ibdev->dev.parent = &rdev->en_dev->pdev->dev;
 734        ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
 735
 736        ib_set_device_ops(ibdev, &bnxt_re_dev_ops);
 737        ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1);
 738        if (ret)
 739                return ret;
 740
 741        dma_set_max_seg_size(&rdev->en_dev->pdev->dev, UINT_MAX);
 742        return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev);
 743}
 744
 745static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
 746{
 747        dev_put(rdev->netdev);
 748        rdev->netdev = NULL;
 749        mutex_lock(&bnxt_re_dev_lock);
 750        list_del_rcu(&rdev->list);
 751        mutex_unlock(&bnxt_re_dev_lock);
 752
 753        synchronize_rcu();
 754}
 755
 756static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
 757                                           struct bnxt_en_dev *en_dev)
 758{
 759        struct bnxt_re_dev *rdev;
 760
 761        /* Allocate bnxt_re_dev instance here */
 762        rdev = ib_alloc_device(bnxt_re_dev, ibdev);
 763        if (!rdev) {
 764                ibdev_err(NULL, "%s: bnxt_re_dev allocation failure!",
 765                          ROCE_DRV_MODULE_NAME);
 766                return NULL;
 767        }
 768        /* Default values */
 769        rdev->netdev = netdev;
 770        dev_hold(rdev->netdev);
 771        rdev->en_dev = en_dev;
 772        rdev->id = rdev->en_dev->pdev->devfn;
 773        INIT_LIST_HEAD(&rdev->qp_list);
 774        mutex_init(&rdev->qp_lock);
 775        atomic_set(&rdev->qp_count, 0);
 776        atomic_set(&rdev->cq_count, 0);
 777        atomic_set(&rdev->srq_count, 0);
 778        atomic_set(&rdev->mr_count, 0);
 779        atomic_set(&rdev->mw_count, 0);
 780        rdev->cosq[0] = 0xFFFF;
 781        rdev->cosq[1] = 0xFFFF;
 782
 783        mutex_lock(&bnxt_re_dev_lock);
 784        list_add_tail_rcu(&rdev->list, &bnxt_re_dev_list);
 785        mutex_unlock(&bnxt_re_dev_lock);
 786        return rdev;
 787}
 788
 789static int bnxt_re_handle_unaffi_async_event(struct creq_func_event
 790                                             *unaffi_async)
 791{
 792        switch (unaffi_async->event) {
 793        case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
 794                break;
 795        case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
 796                break;
 797        case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
 798                break;
 799        case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
 800                break;
 801        case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
 802                break;
 803        case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
 804                break;
 805        case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
 806                break;
 807        case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
 808                break;
 809        case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
 810                break;
 811        case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
 812                break;
 813        case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
 814                break;
 815        default:
 816                return -EINVAL;
 817        }
 818        return 0;
 819}
 820
 821static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
 822                                         struct bnxt_re_qp *qp)
 823{
 824        struct ib_event event;
 825        unsigned int flags;
 826
 827        if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR &&
 828            rdma_is_kernel_res(&qp->ib_qp.res)) {
 829                flags = bnxt_re_lock_cqs(qp);
 830                bnxt_qplib_add_flush_qp(&qp->qplib_qp);
 831                bnxt_re_unlock_cqs(qp, flags);
 832        }
 833
 834        memset(&event, 0, sizeof(event));
 835        if (qp->qplib_qp.srq) {
 836                event.device = &qp->rdev->ibdev;
 837                event.element.qp = &qp->ib_qp;
 838                event.event = IB_EVENT_QP_LAST_WQE_REACHED;
 839        }
 840
 841        if (event.device && qp->ib_qp.event_handler)
 842                qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
 843
 844        return 0;
 845}
 846
 847static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async,
 848                                           void *obj)
 849{
 850        int rc = 0;
 851        u8 event;
 852
 853        if (!obj)
 854                return rc; /* QP was already dead, still return success */
 855
 856        event = affi_async->event;
 857        if (event == CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION) {
 858                struct bnxt_qplib_qp *lib_qp = obj;
 859                struct bnxt_re_qp *qp = container_of(lib_qp, struct bnxt_re_qp,
 860                                                     qplib_qp);
 861                rc = bnxt_re_handle_qp_async_event(affi_async, qp);
 862        }
 863        return rc;
 864}
 865
 866static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
 867                               void *aeqe, void *obj)
 868{
 869        struct creq_qp_event *affi_async;
 870        struct creq_func_event *unaffi_async;
 871        u8 type;
 872        int rc;
 873
 874        type = ((struct creq_base *)aeqe)->type;
 875        if (type == CREQ_BASE_TYPE_FUNC_EVENT) {
 876                unaffi_async = aeqe;
 877                rc = bnxt_re_handle_unaffi_async_event(unaffi_async);
 878        } else {
 879                affi_async = aeqe;
 880                rc = bnxt_re_handle_affi_async_event(affi_async, obj);
 881        }
 882
 883        return rc;
 884}
 885
 886static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq,
 887                                struct bnxt_qplib_srq *handle, u8 event)
 888{
 889        struct bnxt_re_srq *srq = container_of(handle, struct bnxt_re_srq,
 890                                               qplib_srq);
 891        struct ib_event ib_event;
 892        int rc = 0;
 893
 894        ib_event.device = &srq->rdev->ibdev;
 895        ib_event.element.srq = &srq->ib_srq;
 896        if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT)
 897                ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED;
 898        else
 899                ib_event.event = IB_EVENT_SRQ_ERR;
 900
 901        if (srq->ib_srq.event_handler) {
 902                /* Lock event_handler? */
 903                (*srq->ib_srq.event_handler)(&ib_event,
 904                                             srq->ib_srq.srq_context);
 905        }
 906        return rc;
 907}
 908
 909static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
 910                               struct bnxt_qplib_cq *handle)
 911{
 912        struct bnxt_re_cq *cq = container_of(handle, struct bnxt_re_cq,
 913                                             qplib_cq);
 914
 915        if (cq->ib_cq.comp_handler) {
 916                /* Lock comp_handler? */
 917                (*cq->ib_cq.comp_handler)(&cq->ib_cq, cq->ib_cq.cq_context);
 918        }
 919
 920        return 0;
 921}
 922
 923#define BNXT_RE_GEN_P5_PF_NQ_DB         0x10000
 924#define BNXT_RE_GEN_P5_VF_NQ_DB         0x4000
 925static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx)
 926{
 927        return bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
 928                (rdev->is_virtfn ? BNXT_RE_GEN_P5_VF_NQ_DB :
 929                                   BNXT_RE_GEN_P5_PF_NQ_DB) :
 930                                   rdev->msix_entries[indx].db_offset;
 931}
 932
 933static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
 934{
 935        int i;
 936
 937        for (i = 1; i < rdev->num_msix; i++)
 938                bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
 939
 940        if (rdev->qplib_res.rcfw)
 941                bnxt_qplib_cleanup_res(&rdev->qplib_res);
 942}
 943
 944static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
 945{
 946        int num_vec_enabled = 0;
 947        int rc = 0, i;
 948        u32 db_offt;
 949
 950        bnxt_qplib_init_res(&rdev->qplib_res);
 951
 952        for (i = 1; i < rdev->num_msix ; i++) {
 953                db_offt = bnxt_re_get_nqdb_offset(rdev, i);
 954                rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1],
 955                                          i - 1, rdev->msix_entries[i].vector,
 956                                          db_offt, &bnxt_re_cqn_handler,
 957                                          &bnxt_re_srqn_handler);
 958                if (rc) {
 959                        ibdev_err(&rdev->ibdev,
 960                                  "Failed to enable NQ with rc = 0x%x", rc);
 961                        goto fail;
 962                }
 963                num_vec_enabled++;
 964        }
 965        return 0;
 966fail:
 967        for (i = num_vec_enabled; i >= 0; i--)
 968                bnxt_qplib_disable_nq(&rdev->nq[i]);
 969        return rc;
 970}
 971
 972static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev)
 973{
 974        u8 type;
 975        int i;
 976
 977        for (i = 0; i < rdev->num_msix - 1; i++) {
 978                type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
 979                bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
 980                bnxt_qplib_free_nq(&rdev->nq[i]);
 981                rdev->nq[i].res = NULL;
 982        }
 983}
 984
 985static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
 986{
 987        bnxt_re_free_nq_res(rdev);
 988
 989        if (rdev->qplib_res.dpi_tbl.max) {
 990                bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
 991                                       &rdev->qplib_res.dpi_tbl,
 992                                       &rdev->dpi_privileged);
 993        }
 994        if (rdev->qplib_res.rcfw) {
 995                bnxt_qplib_free_res(&rdev->qplib_res);
 996                rdev->qplib_res.rcfw = NULL;
 997        }
 998}
 999
1000static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
1001{
1002        struct bnxt_re_ring_attr rattr = {};
1003        int num_vec_created = 0;
1004        int rc = 0, i;
1005        u8 type;
1006
1007        /* Configure and allocate resources for qplib */
1008        rdev->qplib_res.rcfw = &rdev->rcfw;
1009        rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr,
1010                                     rdev->is_virtfn);
1011        if (rc)
1012                goto fail;
1013
1014        rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->en_dev->pdev,
1015                                  rdev->netdev, &rdev->dev_attr);
1016        if (rc)
1017                goto fail;
1018
1019        rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl,
1020                                  &rdev->dpi_privileged,
1021                                  rdev);
1022        if (rc)
1023                goto dealloc_res;
1024
1025        for (i = 0; i < rdev->num_msix - 1; i++) {
1026                struct bnxt_qplib_nq *nq;
1027
1028                nq = &rdev->nq[i];
1029                nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT;
1030                rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, &rdev->nq[i]);
1031                if (rc) {
1032                        ibdev_err(&rdev->ibdev, "Alloc Failed NQ%d rc:%#x",
1033                                  i, rc);
1034                        goto free_nq;
1035                }
1036                type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
1037                rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr;
1038                rattr.pages = nq->hwq.pbl[rdev->nq[i].hwq.level].pg_count;
1039                rattr.type = type;
1040                rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
1041                rattr.depth = BNXT_QPLIB_NQE_MAX_CNT - 1;
1042                rattr.lrid = rdev->msix_entries[i + 1].ring_idx;
1043                rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id);
1044                if (rc) {
1045                        ibdev_err(&rdev->ibdev,
1046                                  "Failed to allocate NQ fw id with rc = 0x%x",
1047                                  rc);
1048                        bnxt_qplib_free_nq(&rdev->nq[i]);
1049                        goto free_nq;
1050                }
1051                num_vec_created++;
1052        }
1053        return 0;
1054free_nq:
1055        for (i = num_vec_created - 1; i >= 0; i--) {
1056                type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
1057                bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
1058                bnxt_qplib_free_nq(&rdev->nq[i]);
1059        }
1060        bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
1061                               &rdev->qplib_res.dpi_tbl,
1062                               &rdev->dpi_privileged);
1063dealloc_res:
1064        bnxt_qplib_free_res(&rdev->qplib_res);
1065
1066fail:
1067        rdev->qplib_res.rcfw = NULL;
1068        return rc;
1069}
1070
1071static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
1072                                   u8 port_num, enum ib_event_type event)
1073{
1074        struct ib_event ib_event;
1075
1076        ib_event.device = ibdev;
1077        if (qp) {
1078                ib_event.element.qp = qp;
1079                ib_event.event = event;
1080                if (qp->event_handler)
1081                        qp->event_handler(&ib_event, qp->qp_context);
1082
1083        } else {
1084                ib_event.element.port_num = port_num;
1085                ib_event.event = event;
1086                ib_dispatch_event(&ib_event);
1087        }
1088}
1089
1090#define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN      0x02
1091static int bnxt_re_query_hwrm_pri2cos(struct bnxt_re_dev *rdev, u8 dir,
1092                                      u64 *cid_map)
1093{
1094        struct hwrm_queue_pri2cos_qcfg_input req = {0};
1095        struct bnxt *bp = netdev_priv(rdev->netdev);
1096        struct hwrm_queue_pri2cos_qcfg_output resp;
1097        struct bnxt_en_dev *en_dev = rdev->en_dev;
1098        struct bnxt_fw_msg fw_msg;
1099        u32 flags = 0;
1100        u8 *qcfgmap, *tmp_map;
1101        int rc = 0, i;
1102
1103        if (!cid_map)
1104                return -EINVAL;
1105
1106        memset(&fw_msg, 0, sizeof(fw_msg));
1107        bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
1108                              HWRM_QUEUE_PRI2COS_QCFG, -1, -1);
1109        flags |= (dir & 0x01);
1110        flags |= HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN;
1111        req.flags = cpu_to_le32(flags);
1112        req.port_id = bp->pf.port_id;
1113
1114        bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1115                            sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1116        rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1117        if (rc)
1118                return rc;
1119
1120        if (resp.queue_cfg_info) {
1121                ibdev_warn(&rdev->ibdev,
1122                           "Asymmetric cos queue configuration detected");
1123                ibdev_warn(&rdev->ibdev,
1124                           " on device, QoS may not be fully functional\n");
1125        }
1126        qcfgmap = &resp.pri0_cos_queue_id;
1127        tmp_map = (u8 *)cid_map;
1128        for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
1129                tmp_map[i] = qcfgmap[i];
1130
1131        return rc;
1132}
1133
1134static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev,
1135                                        struct bnxt_re_qp *qp)
1136{
1137        return (qp->ib_qp.qp_type == IB_QPT_GSI) ||
1138               (qp == rdev->gsi_ctx.gsi_sqp);
1139}
1140
1141static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev)
1142{
1143        int mask = IB_QP_STATE;
1144        struct ib_qp_attr qp_attr;
1145        struct bnxt_re_qp *qp;
1146
1147        qp_attr.qp_state = IB_QPS_ERR;
1148        mutex_lock(&rdev->qp_lock);
1149        list_for_each_entry(qp, &rdev->qp_list, list) {
1150                /* Modify the state of all QPs except QP1/Shadow QP */
1151                if (!bnxt_re_is_qp1_or_shadow_qp(rdev, qp)) {
1152                        if (qp->qplib_qp.state !=
1153                            CMDQ_MODIFY_QP_NEW_STATE_RESET &&
1154                            qp->qplib_qp.state !=
1155                            CMDQ_MODIFY_QP_NEW_STATE_ERR) {
1156                                bnxt_re_dispatch_event(&rdev->ibdev, &qp->ib_qp,
1157                                                       1, IB_EVENT_QP_FATAL);
1158                                bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, mask,
1159                                                  NULL);
1160                        }
1161                }
1162        }
1163        mutex_unlock(&rdev->qp_lock);
1164}
1165
1166static int bnxt_re_update_gid(struct bnxt_re_dev *rdev)
1167{
1168        struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
1169        struct bnxt_qplib_gid gid;
1170        u16 gid_idx, index;
1171        int rc = 0;
1172
1173        if (!ib_device_try_get(&rdev->ibdev))
1174                return 0;
1175
1176        if (!sgid_tbl) {
1177                ibdev_err(&rdev->ibdev, "QPLIB: SGID table not allocated");
1178                rc = -EINVAL;
1179                goto out;
1180        }
1181
1182        for (index = 0; index < sgid_tbl->active; index++) {
1183                gid_idx = sgid_tbl->hw_id[index];
1184
1185                if (!memcmp(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
1186                            sizeof(bnxt_qplib_gid_zero)))
1187                        continue;
1188                /* need to modify the VLAN enable setting of non VLAN GID only
1189                 * as setting is done for VLAN GID while adding GID
1190                 */
1191                if (sgid_tbl->vlan[index])
1192                        continue;
1193
1194                memcpy(&gid, &sgid_tbl->tbl[index], sizeof(gid));
1195
1196                rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx,
1197                                            rdev->qplib_res.netdev->dev_addr);
1198        }
1199out:
1200        ib_device_put(&rdev->ibdev);
1201        return rc;
1202}
1203
1204static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev)
1205{
1206        u32 prio_map = 0, tmp_map = 0;
1207        struct net_device *netdev;
1208        struct dcb_app app;
1209
1210        netdev = rdev->netdev;
1211
1212        memset(&app, 0, sizeof(app));
1213        app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
1214        app.protocol = ETH_P_IBOE;
1215        tmp_map = dcb_ieee_getapp_mask(netdev, &app);
1216        prio_map = tmp_map;
1217
1218        app.selector = IEEE_8021QAZ_APP_SEL_DGRAM;
1219        app.protocol = ROCE_V2_UDP_DPORT;
1220        tmp_map = dcb_ieee_getapp_mask(netdev, &app);
1221        prio_map |= tmp_map;
1222
1223        return prio_map;
1224}
1225
1226static void bnxt_re_parse_cid_map(u8 prio_map, u8 *cid_map, u16 *cosq)
1227{
1228        u16 prio;
1229        u8 id;
1230
1231        for (prio = 0, id = 0; prio < 8; prio++) {
1232                if (prio_map & (1 << prio)) {
1233                        cosq[id] = cid_map[prio];
1234                        id++;
1235                        if (id == 2) /* Max 2 tcs supported */
1236                                break;
1237                }
1238        }
1239}
1240
1241static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
1242{
1243        u8 prio_map = 0;
1244        u64 cid_map;
1245        int rc;
1246
1247        /* Get priority for roce */
1248        prio_map = bnxt_re_get_priority_mask(rdev);
1249
1250        if (prio_map == rdev->cur_prio_map)
1251                return 0;
1252        rdev->cur_prio_map = prio_map;
1253        /* Get cosq id for this priority */
1254        rc = bnxt_re_query_hwrm_pri2cos(rdev, 0, &cid_map);
1255        if (rc) {
1256                ibdev_warn(&rdev->ibdev, "no cos for p_mask %x\n", prio_map);
1257                return rc;
1258        }
1259        /* Parse CoS IDs for app priority */
1260        bnxt_re_parse_cid_map(prio_map, (u8 *)&cid_map, rdev->cosq);
1261
1262        /* Config BONO. */
1263        rc = bnxt_qplib_map_tc2cos(&rdev->qplib_res, rdev->cosq);
1264        if (rc) {
1265                ibdev_warn(&rdev->ibdev, "no tc for cos{%x, %x}\n",
1266                           rdev->cosq[0], rdev->cosq[1]);
1267                return rc;
1268        }
1269
1270        /* Actual priorities are not programmed as they are already
1271         * done by L2 driver; just enable or disable priority vlan tagging
1272         */
1273        if ((prio_map == 0 && rdev->qplib_res.prio) ||
1274            (prio_map != 0 && !rdev->qplib_res.prio)) {
1275                rdev->qplib_res.prio = prio_map ? true : false;
1276
1277                bnxt_re_update_gid(rdev);
1278        }
1279
1280        return 0;
1281}
1282
1283static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
1284{
1285        struct bnxt_en_dev *en_dev = rdev->en_dev;
1286        struct hwrm_ver_get_output resp = {0};
1287        struct hwrm_ver_get_input req = {0};
1288        struct bnxt_fw_msg fw_msg;
1289        int rc = 0;
1290
1291        memset(&fw_msg, 0, sizeof(fw_msg));
1292        bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
1293                              HWRM_VER_GET, -1, -1);
1294        req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
1295        req.hwrm_intf_min = HWRM_VERSION_MINOR;
1296        req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
1297        bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
1298                            sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
1299        rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
1300        if (rc) {
1301                ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x",
1302                          rc);
1303                return;
1304        }
1305        rdev->qplib_ctx.hwrm_intf_ver =
1306                (u64)le16_to_cpu(resp.hwrm_intf_major) << 48 |
1307                (u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 |
1308                (u64)le16_to_cpu(resp.hwrm_intf_build) << 16 |
1309                le16_to_cpu(resp.hwrm_intf_patch);
1310}
1311
1312static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
1313{
1314        int rc = 0;
1315        u32 event;
1316
1317        /* Register ib dev */
1318        rc = bnxt_re_register_ib(rdev);
1319        if (rc) {
1320                pr_err("Failed to register with IB: %#x\n", rc);
1321                return rc;
1322        }
1323        dev_info(rdev_to_dev(rdev), "Device registered successfully");
1324        ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
1325                         &rdev->active_width);
1326        set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
1327
1328        event = netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev) ?
1329                IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
1330
1331        bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, event);
1332
1333        return rc;
1334}
1335
1336static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev)
1337{
1338        u8 type;
1339        int rc;
1340
1341        if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
1342                cancel_delayed_work_sync(&rdev->worker);
1343
1344        if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED,
1345                               &rdev->flags))
1346                bnxt_re_cleanup_res(rdev);
1347        if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags))
1348                bnxt_re_free_res(rdev);
1349
1350        if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
1351                rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
1352                if (rc)
1353                        ibdev_warn(&rdev->ibdev,
1354                                   "Failed to deinitialize RCFW: %#x", rc);
1355                bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
1356                bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx);
1357                bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
1358                type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
1359                bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
1360                bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
1361        }
1362        if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) {
1363                rc = bnxt_re_free_msix(rdev);
1364                if (rc)
1365                        ibdev_warn(&rdev->ibdev,
1366                                   "Failed to free MSI-X vectors: %#x", rc);
1367        }
1368
1369        bnxt_re_destroy_chip_ctx(rdev);
1370        if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) {
1371                rc = bnxt_re_unregister_netdev(rdev);
1372                if (rc)
1373                        ibdev_warn(&rdev->ibdev,
1374                                   "Failed to unregister with netdev: %#x", rc);
1375        }
1376}
1377
1378/* worker thread for polling periodic events. Now used for QoS programming*/
1379static void bnxt_re_worker(struct work_struct *work)
1380{
1381        struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
1382                                                worker.work);
1383
1384        bnxt_re_setup_qos(rdev);
1385        schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
1386}
1387
1388static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode)
1389{
1390        struct bnxt_qplib_creq_ctx *creq;
1391        struct bnxt_re_ring_attr rattr;
1392        u32 db_offt;
1393        int vid;
1394        u8 type;
1395        int rc;
1396
1397        /* Registered a new RoCE device instance to netdev */
1398        memset(&rattr, 0, sizeof(rattr));
1399        rc = bnxt_re_register_netdev(rdev);
1400        if (rc) {
1401                ibdev_err(&rdev->ibdev,
1402                          "Failed to register with netedev: %#x\n", rc);
1403                return -EINVAL;
1404        }
1405        set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
1406
1407        rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode);
1408        if (rc) {
1409                ibdev_err(&rdev->ibdev, "Failed to get chip context\n");
1410                return -EINVAL;
1411        }
1412
1413        /* Check whether VF or PF */
1414        bnxt_re_get_sriov_func_type(rdev);
1415
1416        rc = bnxt_re_request_msix(rdev);
1417        if (rc) {
1418                ibdev_err(&rdev->ibdev,
1419                          "Failed to get MSI-X vectors: %#x\n", rc);
1420                rc = -EINVAL;
1421                goto fail;
1422        }
1423        set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags);
1424
1425        bnxt_re_query_hwrm_intf_version(rdev);
1426
1427        /* Establish RCFW Communication Channel to initialize the context
1428         * memory for the function and all child VFs
1429         */
1430        rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res, &rdev->rcfw,
1431                                           &rdev->qplib_ctx,
1432                                           BNXT_RE_MAX_QPC_COUNT);
1433        if (rc) {
1434                ibdev_err(&rdev->ibdev,
1435                          "Failed to allocate RCFW Channel: %#x\n", rc);
1436                goto fail;
1437        }
1438
1439        type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
1440        creq = &rdev->rcfw.creq;
1441        rattr.dma_arr = creq->hwq.pbl[PBL_LVL_0].pg_map_arr;
1442        rattr.pages = creq->hwq.pbl[creq->hwq.level].pg_count;
1443        rattr.type = type;
1444        rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
1445        rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1;
1446        rattr.lrid = rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx;
1447        rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id);
1448        if (rc) {
1449                ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc);
1450                goto free_rcfw;
1451        }
1452        db_offt = bnxt_re_get_nqdb_offset(rdev, BNXT_RE_AEQ_IDX);
1453        vid = rdev->msix_entries[BNXT_RE_AEQ_IDX].vector;
1454        rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw,
1455                                            vid, db_offt, rdev->is_virtfn,
1456                                            &bnxt_re_aeq_handler);
1457        if (rc) {
1458                ibdev_err(&rdev->ibdev, "Failed to enable RCFW channel: %#x\n",
1459                          rc);
1460                goto free_ring;
1461        }
1462
1463        rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr,
1464                                     rdev->is_virtfn);
1465        if (rc)
1466                goto disable_rcfw;
1467
1468        bnxt_re_set_resource_limits(rdev);
1469
1470        rc = bnxt_qplib_alloc_ctx(&rdev->qplib_res, &rdev->qplib_ctx, 0,
1471                                  bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx));
1472        if (rc) {
1473                ibdev_err(&rdev->ibdev,
1474                          "Failed to allocate QPLIB context: %#x\n", rc);
1475                goto disable_rcfw;
1476        }
1477        rc = bnxt_re_net_stats_ctx_alloc(rdev,
1478                                         rdev->qplib_ctx.stats.dma_map,
1479                                         &rdev->qplib_ctx.stats.fw_id);
1480        if (rc) {
1481                ibdev_err(&rdev->ibdev,
1482                          "Failed to allocate stats context: %#x\n", rc);
1483                goto free_ctx;
1484        }
1485
1486        rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx,
1487                                  rdev->is_virtfn);
1488        if (rc) {
1489                ibdev_err(&rdev->ibdev,
1490                          "Failed to initialize RCFW: %#x\n", rc);
1491                goto free_sctx;
1492        }
1493        set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags);
1494
1495        /* Resources based on the 'new' device caps */
1496        rc = bnxt_re_alloc_res(rdev);
1497        if (rc) {
1498                ibdev_err(&rdev->ibdev,
1499                          "Failed to allocate resources: %#x\n", rc);
1500                goto fail;
1501        }
1502        set_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags);
1503        rc = bnxt_re_init_res(rdev);
1504        if (rc) {
1505                ibdev_err(&rdev->ibdev,
1506                          "Failed to initialize resources: %#x\n", rc);
1507                goto fail;
1508        }
1509
1510        set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags);
1511
1512        if (!rdev->is_virtfn) {
1513                rc = bnxt_re_setup_qos(rdev);
1514                if (rc)
1515                        ibdev_info(&rdev->ibdev,
1516                                   "RoCE priority not yet configured\n");
1517
1518                INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
1519                set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
1520                schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
1521        }
1522
1523        return 0;
1524free_sctx:
1525        bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
1526free_ctx:
1527        bnxt_qplib_free_ctx(&rdev->qplib_res, &rdev->qplib_ctx);
1528disable_rcfw:
1529        bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
1530free_ring:
1531        type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
1532        bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
1533free_rcfw:
1534        bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
1535fail:
1536        bnxt_re_dev_uninit(rdev);
1537
1538        return rc;
1539}
1540
1541static void bnxt_re_dev_unreg(struct bnxt_re_dev *rdev)
1542{
1543        struct net_device *netdev = rdev->netdev;
1544
1545        bnxt_re_dev_remove(rdev);
1546
1547        if (netdev)
1548                dev_put(netdev);
1549}
1550
1551static int bnxt_re_dev_reg(struct bnxt_re_dev **rdev, struct net_device *netdev)
1552{
1553        struct bnxt_en_dev *en_dev;
1554        int rc = 0;
1555
1556        if (!is_bnxt_re_dev(netdev))
1557                return -ENODEV;
1558
1559        en_dev = bnxt_re_dev_probe(netdev);
1560        if (IS_ERR(en_dev)) {
1561                if (en_dev != ERR_PTR(-ENODEV))
1562                        ibdev_err(&(*rdev)->ibdev, "%s: Failed to probe\n",
1563                                  ROCE_DRV_MODULE_NAME);
1564                rc = PTR_ERR(en_dev);
1565                goto exit;
1566        }
1567        *rdev = bnxt_re_dev_add(netdev, en_dev);
1568        if (!*rdev) {
1569                rc = -ENOMEM;
1570                dev_put(netdev);
1571                goto exit;
1572        }
1573exit:
1574        return rc;
1575}
1576
1577static void bnxt_re_remove_device(struct bnxt_re_dev *rdev)
1578{
1579        bnxt_re_dev_uninit(rdev);
1580        pci_dev_put(rdev->en_dev->pdev);
1581        bnxt_re_dev_unreg(rdev);
1582}
1583
1584static int bnxt_re_add_device(struct bnxt_re_dev **rdev,
1585                              struct net_device *netdev, u8 wqe_mode)
1586{
1587        int rc;
1588
1589        rc = bnxt_re_dev_reg(rdev, netdev);
1590        if (rc == -ENODEV)
1591                return rc;
1592        if (rc) {
1593                pr_err("Failed to register with the device %s: %#x\n",
1594                       netdev->name, rc);
1595                return rc;
1596        }
1597
1598        pci_dev_get((*rdev)->en_dev->pdev);
1599        rc = bnxt_re_dev_init(*rdev, wqe_mode);
1600        if (rc) {
1601                pci_dev_put((*rdev)->en_dev->pdev);
1602                bnxt_re_dev_unreg(*rdev);
1603        }
1604
1605        return rc;
1606}
1607
1608static void bnxt_re_dealloc_driver(struct ib_device *ib_dev)
1609{
1610        struct bnxt_re_dev *rdev =
1611                container_of(ib_dev, struct bnxt_re_dev, ibdev);
1612
1613        dev_info(rdev_to_dev(rdev), "Unregistering Device");
1614
1615        rtnl_lock();
1616        bnxt_re_remove_device(rdev);
1617        rtnl_unlock();
1618}
1619
1620/* Handle all deferred netevents tasks */
1621static void bnxt_re_task(struct work_struct *work)
1622{
1623        struct bnxt_re_work *re_work;
1624        struct bnxt_re_dev *rdev;
1625        int rc = 0;
1626
1627        re_work = container_of(work, struct bnxt_re_work, work);
1628        rdev = re_work->rdev;
1629
1630        if (re_work->event == NETDEV_REGISTER) {
1631                rc = bnxt_re_ib_init(rdev);
1632                if (rc) {
1633                        ibdev_err(&rdev->ibdev,
1634                                  "Failed to register with IB: %#x", rc);
1635                        rtnl_lock();
1636                        bnxt_re_remove_device(rdev);
1637                        rtnl_unlock();
1638                        goto exit;
1639                }
1640                goto exit;
1641        }
1642
1643        if (!ib_device_try_get(&rdev->ibdev))
1644                goto exit;
1645
1646        switch (re_work->event) {
1647        case NETDEV_UP:
1648                bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
1649                                       IB_EVENT_PORT_ACTIVE);
1650                break;
1651        case NETDEV_DOWN:
1652                bnxt_re_dev_stop(rdev);
1653                break;
1654        case NETDEV_CHANGE:
1655                if (!netif_carrier_ok(rdev->netdev))
1656                        bnxt_re_dev_stop(rdev);
1657                else if (netif_carrier_ok(rdev->netdev))
1658                        bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
1659                                               IB_EVENT_PORT_ACTIVE);
1660                ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
1661                                 &rdev->active_width);
1662                break;
1663        default:
1664                break;
1665        }
1666        ib_device_put(&rdev->ibdev);
1667exit:
1668        put_device(&rdev->ibdev.dev);
1669        kfree(re_work);
1670}
1671
1672/*
1673 * "Notifier chain callback can be invoked for the same chain from
1674 * different CPUs at the same time".
1675 *
1676 * For cases when the netdev is already present, our call to the
1677 * register_netdevice_notifier() will actually get the rtnl_lock()
1678 * before sending NETDEV_REGISTER and (if up) NETDEV_UP
1679 * events.
1680 *
1681 * But for cases when the netdev is not already present, the notifier
1682 * chain is subjected to be invoked from different CPUs simultaneously.
1683 *
1684 * This is protected by the netdev_mutex.
1685 */
1686static int bnxt_re_netdev_event(struct notifier_block *notifier,
1687                                unsigned long event, void *ptr)
1688{
1689        struct net_device *real_dev, *netdev = netdev_notifier_info_to_dev(ptr);
1690        struct bnxt_re_work *re_work;
1691        struct bnxt_re_dev *rdev;
1692        int rc = 0;
1693        bool sch_work = false;
1694        bool release = true;
1695
1696        real_dev = rdma_vlan_dev_real_dev(netdev);
1697        if (!real_dev)
1698                real_dev = netdev;
1699
1700        rdev = bnxt_re_from_netdev(real_dev);
1701        if (!rdev && event != NETDEV_REGISTER)
1702                return NOTIFY_OK;
1703
1704        if (real_dev != netdev)
1705                goto exit;
1706
1707        switch (event) {
1708        case NETDEV_REGISTER:
1709                if (rdev)
1710                        break;
1711                rc = bnxt_re_add_device(&rdev, real_dev,
1712                                        BNXT_QPLIB_WQE_MODE_STATIC);
1713                if (!rc)
1714                        sch_work = true;
1715                release = false;
1716                break;
1717
1718        case NETDEV_UNREGISTER:
1719                ib_unregister_device_queued(&rdev->ibdev);
1720                break;
1721
1722        default:
1723                sch_work = true;
1724                break;
1725        }
1726        if (sch_work) {
1727                /* Allocate for the deferred task */
1728                re_work = kzalloc(sizeof(*re_work), GFP_ATOMIC);
1729                if (re_work) {
1730                        get_device(&rdev->ibdev.dev);
1731                        re_work->rdev = rdev;
1732                        re_work->event = event;
1733                        re_work->vlan_dev = (real_dev == netdev ?
1734                                             NULL : netdev);
1735                        INIT_WORK(&re_work->work, bnxt_re_task);
1736                        queue_work(bnxt_re_wq, &re_work->work);
1737                }
1738        }
1739
1740exit:
1741        if (rdev && release)
1742                ib_device_put(&rdev->ibdev);
1743        return NOTIFY_DONE;
1744}
1745
1746static struct notifier_block bnxt_re_netdev_notifier = {
1747        .notifier_call = bnxt_re_netdev_event
1748};
1749
1750static int __init bnxt_re_mod_init(void)
1751{
1752        int rc = 0;
1753
1754        pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version);
1755
1756        bnxt_re_wq = create_singlethread_workqueue("bnxt_re");
1757        if (!bnxt_re_wq)
1758                return -ENOMEM;
1759
1760        INIT_LIST_HEAD(&bnxt_re_dev_list);
1761
1762        rc = register_netdevice_notifier(&bnxt_re_netdev_notifier);
1763        if (rc) {
1764                pr_err("%s: Cannot register to netdevice_notifier",
1765                       ROCE_DRV_MODULE_NAME);
1766                goto err_netdev;
1767        }
1768        return 0;
1769
1770err_netdev:
1771        destroy_workqueue(bnxt_re_wq);
1772
1773        return rc;
1774}
1775
1776static void __exit bnxt_re_mod_exit(void)
1777{
1778        struct bnxt_re_dev *rdev;
1779
1780        unregister_netdevice_notifier(&bnxt_re_netdev_notifier);
1781        if (bnxt_re_wq)
1782                destroy_workqueue(bnxt_re_wq);
1783        list_for_each_entry(rdev, &bnxt_re_dev_list, list) {
1784                /* VF device removal should be called before the removal
1785                 * of PF device. Queue VFs unregister first, so that VFs
1786                 * shall be removed before the PF during the call of
1787                 * ib_unregister_driver.
1788                 */
1789                if (rdev->is_virtfn)
1790                        ib_unregister_device(&rdev->ibdev);
1791        }
1792        ib_unregister_driver(RDMA_DRIVER_BNXT_RE);
1793}
1794
1795module_init(bnxt_re_mod_init);
1796module_exit(bnxt_re_mod_exit);
1797