linux/drivers/vdpa/mlx5/net/mlx5_vnet.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/* Copyright (c) 2020 Mellanox Technologies Ltd. */
   3
   4#include <linux/module.h>
   5#include <linux/vdpa.h>
   6#include <linux/vringh.h>
   7#include <uapi/linux/virtio_net.h>
   8#include <uapi/linux/virtio_ids.h>
   9#include <linux/virtio_config.h>
  10#include <linux/auxiliary_bus.h>
  11#include <linux/mlx5/cq.h>
  12#include <linux/mlx5/qp.h>
  13#include <linux/mlx5/device.h>
  14#include <linux/mlx5/driver.h>
  15#include <linux/mlx5/vport.h>
  16#include <linux/mlx5/fs.h>
  17#include <linux/mlx5/mlx5_ifc_vdpa.h>
  18#include <linux/mlx5/mpfs.h>
  19#include "mlx5_vdpa.h"
  20
  21MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
  22MODULE_DESCRIPTION("Mellanox VDPA driver");
  23MODULE_LICENSE("Dual BSD/GPL");
  24
  25#define to_mlx5_vdpa_ndev(__mvdev)                                             \
  26        container_of(__mvdev, struct mlx5_vdpa_net, mvdev)
  27#define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
  28
  29#define VALID_FEATURES_MASK                                                                        \
  30        (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
  31         BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
  32         BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
  33         BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
  34         BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
  35         BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
  36         BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
  37         BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
  38         BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
  39         BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
  40         BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
  41         BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
  42         BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
  43
  44#define VALID_STATUS_MASK                                                                          \
  45        (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
  46         VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
  47
  48#define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
  49
  50struct mlx5_vdpa_net_resources {
  51        u32 tisn;
  52        u32 tdn;
  53        u32 tirn;
  54        u32 rqtn;
  55        bool valid;
  56};
  57
  58struct mlx5_vdpa_cq_buf {
  59        struct mlx5_frag_buf_ctrl fbc;
  60        struct mlx5_frag_buf frag_buf;
  61        int cqe_size;
  62        int nent;
  63};
  64
  65struct mlx5_vdpa_cq {
  66        struct mlx5_core_cq mcq;
  67        struct mlx5_vdpa_cq_buf buf;
  68        struct mlx5_db db;
  69        int cqe;
  70};
  71
  72struct mlx5_vdpa_umem {
  73        struct mlx5_frag_buf_ctrl fbc;
  74        struct mlx5_frag_buf frag_buf;
  75        int size;
  76        u32 id;
  77};
  78
  79struct mlx5_vdpa_qp {
  80        struct mlx5_core_qp mqp;
  81        struct mlx5_frag_buf frag_buf;
  82        struct mlx5_db db;
  83        u16 head;
  84        bool fw;
  85};
  86
  87struct mlx5_vq_restore_info {
  88        u32 num_ent;
  89        u64 desc_addr;
  90        u64 device_addr;
  91        u64 driver_addr;
  92        u16 avail_index;
  93        u16 used_index;
  94        bool ready;
  95        bool restore;
  96};
  97
  98struct mlx5_vdpa_virtqueue {
  99        bool ready;
 100        u64 desc_addr;
 101        u64 device_addr;
 102        u64 driver_addr;
 103        u32 num_ent;
 104
 105        /* Resources for implementing the notification channel from the device
 106         * to the driver. fwqp is the firmware end of an RC connection; the
 107         * other end is vqqp used by the driver. cq is is where completions are
 108         * reported.
 109         */
 110        struct mlx5_vdpa_cq cq;
 111        struct mlx5_vdpa_qp fwqp;
 112        struct mlx5_vdpa_qp vqqp;
 113
 114        /* umem resources are required for the virtqueue operation. They're use
 115         * is internal and they must be provided by the driver.
 116         */
 117        struct mlx5_vdpa_umem umem1;
 118        struct mlx5_vdpa_umem umem2;
 119        struct mlx5_vdpa_umem umem3;
 120
 121        bool initialized;
 122        int index;
 123        u32 virtq_id;
 124        struct mlx5_vdpa_net *ndev;
 125        u16 avail_idx;
 126        u16 used_idx;
 127        int fw_state;
 128
 129        /* keep last in the struct */
 130        struct mlx5_vq_restore_info ri;
 131};
 132
 133/* We will remove this limitation once mlx5_vdpa_alloc_resources()
 134 * provides for driver space allocation
 135 */
 136#define MLX5_MAX_SUPPORTED_VQS 16
 137
 138static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
 139{
 140        if (unlikely(idx > mvdev->max_idx))
 141                return false;
 142
 143        return true;
 144}
 145
 146struct mlx5_vdpa_net {
 147        struct mlx5_vdpa_dev mvdev;
 148        struct mlx5_vdpa_net_resources res;
 149        struct virtio_net_config config;
 150        struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
 151        struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
 152
 153        /* Serialize vq resources creation and destruction. This is required
 154         * since memory map might change and we need to destroy and create
 155         * resources while driver in operational.
 156         */
 157        struct mutex reslock;
 158        struct mlx5_flow_table *rxft;
 159        struct mlx5_fc *rx_counter;
 160        struct mlx5_flow_handle *rx_rule;
 161        bool setup;
 162        u16 mtu;
 163        u32 cur_num_vqs;
 164};
 165
 166static void free_resources(struct mlx5_vdpa_net *ndev);
 167static void init_mvqs(struct mlx5_vdpa_net *ndev);
 168static int setup_driver(struct mlx5_vdpa_dev *mvdev);
 169static void teardown_driver(struct mlx5_vdpa_net *ndev);
 170
 171static bool mlx5_vdpa_debug;
 172
 173#define MLX5_CVQ_MAX_ENT 16
 174
 175#define MLX5_LOG_VIO_FLAG(_feature)                                                                \
 176        do {                                                                                       \
 177                if (features & BIT_ULL(_feature))                                                  \
 178                        mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
 179        } while (0)
 180
 181#define MLX5_LOG_VIO_STAT(_status)                                                                 \
 182        do {                                                                                       \
 183                if (status & (_status))                                                            \
 184                        mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
 185        } while (0)
 186
 187/* TODO: cross-endian support */
 188static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
 189{
 190        return virtio_legacy_is_little_endian() ||
 191                (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
 192}
 193
 194static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
 195{
 196        return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
 197}
 198
 199static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
 200{
 201        return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
 202}
 203
 204static inline u32 mlx5_vdpa_max_qps(int max_vqs)
 205{
 206        return max_vqs / 2;
 207}
 208
 209static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
 210{
 211        if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
 212                return 2;
 213
 214        return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
 215}
 216
 217static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
 218{
 219        return idx == ctrl_vq_idx(mvdev);
 220}
 221
 222static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
 223{
 224        if (status & ~VALID_STATUS_MASK)
 225                mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
 226                               status & ~VALID_STATUS_MASK);
 227
 228        if (!mlx5_vdpa_debug)
 229                return;
 230
 231        mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
 232        if (set && !status) {
 233                mlx5_vdpa_info(mvdev, "driver resets the device\n");
 234                return;
 235        }
 236
 237        MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
 238        MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
 239        MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
 240        MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
 241        MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
 242        MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
 243}
 244
 245static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
 246{
 247        if (features & ~VALID_FEATURES_MASK)
 248                mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
 249                               features & ~VALID_FEATURES_MASK);
 250
 251        if (!mlx5_vdpa_debug)
 252                return;
 253
 254        mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
 255        if (!features)
 256                mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
 257
 258        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
 259        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
 260        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
 261        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
 262        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
 263        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
 264        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
 265        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
 266        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
 267        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
 268        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
 269        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
 270        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
 271        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
 272        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
 273        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
 274        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
 275        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
 276        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
 277        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
 278        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
 279        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
 280        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
 281        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
 282        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
 283        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
 284        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
 285        MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
 286        MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
 287        MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
 288        MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
 289        MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
 290        MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
 291        MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
 292}
 293
 294static int create_tis(struct mlx5_vdpa_net *ndev)
 295{
 296        struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
 297        u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
 298        void *tisc;
 299        int err;
 300
 301        tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 302        MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
 303        err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
 304        if (err)
 305                mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
 306
 307        return err;
 308}
 309
 310static void destroy_tis(struct mlx5_vdpa_net *ndev)
 311{
 312        mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
 313}
 314
 315#define MLX5_VDPA_CQE_SIZE 64
 316#define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
 317
 318static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
 319{
 320        struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
 321        u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
 322        u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
 323        int err;
 324
 325        err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
 326                                       ndev->mvdev.mdev->priv.numa_node);
 327        if (err)
 328                return err;
 329
 330        mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
 331
 332        buf->cqe_size = MLX5_VDPA_CQE_SIZE;
 333        buf->nent = nent;
 334
 335        return 0;
 336}
 337
 338static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
 339{
 340        struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
 341
 342        return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
 343                                        ndev->mvdev.mdev->priv.numa_node);
 344}
 345
 346static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
 347{
 348        mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
 349}
 350
 351static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
 352{
 353        return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
 354}
 355
 356static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
 357{
 358        struct mlx5_cqe64 *cqe64;
 359        void *cqe;
 360        int i;
 361
 362        for (i = 0; i < buf->nent; i++) {
 363                cqe = get_cqe(vcq, i);
 364                cqe64 = cqe;
 365                cqe64->op_own = MLX5_CQE_INVALID << 4;
 366        }
 367}
 368
 369static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
 370{
 371        struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
 372
 373        if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
 374            !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
 375                return cqe64;
 376
 377        return NULL;
 378}
 379
 380static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
 381{
 382        vqp->head += n;
 383        vqp->db.db[0] = cpu_to_be32(vqp->head);
 384}
 385
 386static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
 387                       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
 388{
 389        struct mlx5_vdpa_qp *vqp;
 390        __be64 *pas;
 391        void *qpc;
 392
 393        vqp = fw ? &mvq->fwqp : &mvq->vqqp;
 394        MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
 395        qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 396        if (vqp->fw) {
 397                /* Firmware QP is allocated by the driver for the firmware's
 398                 * use so we can skip part of the params as they will be chosen by firmware
 399                 */
 400                qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 401                MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
 402                MLX5_SET(qpc, qpc, no_sq, 1);
 403                return;
 404        }
 405
 406        MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
 407        MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
 408        MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
 409        MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
 410        MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
 411        MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 412        MLX5_SET(qpc, qpc, no_sq, 1);
 413        MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
 414        MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
 415        MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
 416        pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
 417        mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
 418}
 419
 420static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
 421{
 422        return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
 423                                        num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
 424                                        ndev->mvdev.mdev->priv.numa_node);
 425}
 426
 427static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
 428{
 429        mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
 430}
 431
 432static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
 433                     struct mlx5_vdpa_qp *vqp)
 434{
 435        struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
 436        int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
 437        u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
 438        void *qpc;
 439        void *in;
 440        int err;
 441
 442        if (!vqp->fw) {
 443                vqp = &mvq->vqqp;
 444                err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
 445                if (err)
 446                        return err;
 447
 448                err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
 449                if (err)
 450                        goto err_db;
 451                inlen += vqp->frag_buf.npages * sizeof(__be64);
 452        }
 453
 454        in = kzalloc(inlen, GFP_KERNEL);
 455        if (!in) {
 456                err = -ENOMEM;
 457                goto err_kzalloc;
 458        }
 459
 460        qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
 461        qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 462        MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
 463        MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
 464        MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
 465        MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
 466        if (!vqp->fw)
 467                MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
 468        MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
 469        err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
 470        kfree(in);
 471        if (err)
 472                goto err_kzalloc;
 473
 474        vqp->mqp.uid = ndev->mvdev.res.uid;
 475        vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
 476
 477        if (!vqp->fw)
 478                rx_post(vqp, mvq->num_ent);
 479
 480        return 0;
 481
 482err_kzalloc:
 483        if (!vqp->fw)
 484                mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
 485err_db:
 486        if (!vqp->fw)
 487                rq_buf_free(ndev, vqp);
 488
 489        return err;
 490}
 491
 492static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
 493{
 494        u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
 495
 496        MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
 497        MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
 498        MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
 499        if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
 500                mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
 501        if (!vqp->fw) {
 502                mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
 503                rq_buf_free(ndev, vqp);
 504        }
 505}
 506
 507static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
 508{
 509        return get_sw_cqe(cq, cq->mcq.cons_index);
 510}
 511
 512static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
 513{
 514        struct mlx5_cqe64 *cqe64;
 515
 516        cqe64 = next_cqe_sw(vcq);
 517        if (!cqe64)
 518                return -EAGAIN;
 519
 520        vcq->mcq.cons_index++;
 521        return 0;
 522}
 523
 524static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
 525{
 526        struct mlx5_vdpa_net *ndev = mvq->ndev;
 527        struct vdpa_callback *event_cb;
 528
 529        event_cb = &ndev->event_cbs[mvq->index];
 530        mlx5_cq_set_ci(&mvq->cq.mcq);
 531
 532        /* make sure CQ cosumer update is visible to the hardware before updating
 533         * RX doorbell record.
 534         */
 535        dma_wmb();
 536        rx_post(&mvq->vqqp, num);
 537        if (event_cb->callback)
 538                event_cb->callback(event_cb->private);
 539}
 540
 541static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
 542{
 543        struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
 544        struct mlx5_vdpa_net *ndev = mvq->ndev;
 545        void __iomem *uar_page = ndev->mvdev.res.uar->map;
 546        int num = 0;
 547
 548        while (!mlx5_vdpa_poll_one(&mvq->cq)) {
 549                num++;
 550                if (num > mvq->num_ent / 2) {
 551                        /* If completions keep coming while we poll, we want to
 552                         * let the hardware know that we consumed them by
 553                         * updating the doorbell record.  We also let vdpa core
 554                         * know about this so it passes it on the virtio driver
 555                         * on the guest.
 556                         */
 557                        mlx5_vdpa_handle_completions(mvq, num);
 558                        num = 0;
 559                }
 560        }
 561
 562        if (num)
 563                mlx5_vdpa_handle_completions(mvq, num);
 564
 565        mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
 566}
 567
 568static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
 569{
 570        struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
 571        struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
 572        void __iomem *uar_page = ndev->mvdev.res.uar->map;
 573        u32 out[MLX5_ST_SZ_DW(create_cq_out)];
 574        struct mlx5_vdpa_cq *vcq = &mvq->cq;
 575        __be64 *pas;
 576        int inlen;
 577        void *cqc;
 578        void *in;
 579        int err;
 580        int eqn;
 581
 582        err = mlx5_db_alloc(mdev, &vcq->db);
 583        if (err)
 584                return err;
 585
 586        vcq->mcq.set_ci_db = vcq->db.db;
 587        vcq->mcq.arm_db = vcq->db.db + 1;
 588        vcq->mcq.cqe_sz = 64;
 589
 590        err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
 591        if (err)
 592                goto err_db;
 593
 594        cq_frag_buf_init(vcq, &vcq->buf);
 595
 596        inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 597                MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
 598        in = kzalloc(inlen, GFP_KERNEL);
 599        if (!in) {
 600                err = -ENOMEM;
 601                goto err_vzalloc;
 602        }
 603
 604        MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
 605        pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
 606        mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
 607
 608        cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
 609        MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 610
 611        /* Use vector 0 by default. Consider adding code to choose least used
 612         * vector.
 613         */
 614        err = mlx5_vector2eqn(mdev, 0, &eqn);
 615        if (err)
 616                goto err_vec;
 617
 618        cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
 619        MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
 620        MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
 621        MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
 622        MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
 623
 624        err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
 625        if (err)
 626                goto err_vec;
 627
 628        vcq->mcq.comp = mlx5_vdpa_cq_comp;
 629        vcq->cqe = num_ent;
 630        vcq->mcq.set_ci_db = vcq->db.db;
 631        vcq->mcq.arm_db = vcq->db.db + 1;
 632        mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
 633        kfree(in);
 634        return 0;
 635
 636err_vec:
 637        kfree(in);
 638err_vzalloc:
 639        cq_frag_buf_free(ndev, &vcq->buf);
 640err_db:
 641        mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
 642        return err;
 643}
 644
 645static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
 646{
 647        struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
 648        struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
 649        struct mlx5_vdpa_cq *vcq = &mvq->cq;
 650
 651        if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
 652                mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
 653                return;
 654        }
 655        cq_frag_buf_free(ndev, &vcq->buf);
 656        mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
 657}
 658
 659static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
 660                          struct mlx5_vdpa_umem **umemp)
 661{
 662        struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
 663        int p_a;
 664        int p_b;
 665
 666        switch (num) {
 667        case 1:
 668                p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
 669                p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
 670                *umemp = &mvq->umem1;
 671                break;
 672        case 2:
 673                p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
 674                p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
 675                *umemp = &mvq->umem2;
 676                break;
 677        case 3:
 678                p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
 679                p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
 680                *umemp = &mvq->umem3;
 681                break;
 682        }
 683        (*umemp)->size = p_a * mvq->num_ent + p_b;
 684}
 685
 686static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
 687{
 688        mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
 689}
 690
 691static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
 692{
 693        int inlen;
 694        u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
 695        void *um;
 696        void *in;
 697        int err;
 698        __be64 *pas;
 699        struct mlx5_vdpa_umem *umem;
 700
 701        set_umem_size(ndev, mvq, num, &umem);
 702        err = umem_frag_buf_alloc(ndev, umem, umem->size);
 703        if (err)
 704                return err;
 705
 706        inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
 707
 708        in = kzalloc(inlen, GFP_KERNEL);
 709        if (!in) {
 710                err = -ENOMEM;
 711                goto err_in;
 712        }
 713
 714        MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
 715        MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
 716        um = MLX5_ADDR_OF(create_umem_in, in, umem);
 717        MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 718        MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
 719
 720        pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
 721        mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
 722
 723        err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
 724        if (err) {
 725                mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
 726                goto err_cmd;
 727        }
 728
 729        kfree(in);
 730        umem->id = MLX5_GET(create_umem_out, out, umem_id);
 731
 732        return 0;
 733
 734err_cmd:
 735        kfree(in);
 736err_in:
 737        umem_frag_buf_free(ndev, umem);
 738        return err;
 739}
 740
 741static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
 742{
 743        u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
 744        u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
 745        struct mlx5_vdpa_umem *umem;
 746
 747        switch (num) {
 748        case 1:
 749                umem = &mvq->umem1;
 750                break;
 751        case 2:
 752                umem = &mvq->umem2;
 753                break;
 754        case 3:
 755                umem = &mvq->umem3;
 756                break;
 757        }
 758
 759        MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
 760        MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
 761        if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
 762                return;
 763
 764        umem_frag_buf_free(ndev, umem);
 765}
 766
 767static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
 768{
 769        int num;
 770        int err;
 771
 772        for (num = 1; num <= 3; num++) {
 773                err = create_umem(ndev, mvq, num);
 774                if (err)
 775                        goto err_umem;
 776        }
 777        return 0;
 778
 779err_umem:
 780        for (num--; num > 0; num--)
 781                umem_destroy(ndev, mvq, num);
 782
 783        return err;
 784}
 785
 786static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
 787{
 788        int num;
 789
 790        for (num = 3; num > 0; num--)
 791                umem_destroy(ndev, mvq, num);
 792}
 793
 794static int get_queue_type(struct mlx5_vdpa_net *ndev)
 795{
 796        u32 type_mask;
 797
 798        type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
 799
 800        /* prefer split queue */
 801        if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
 802                return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
 803
 804        WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
 805
 806        return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
 807}
 808
 809static bool vq_is_tx(u16 idx)
 810{
 811        return idx % 2;
 812}
 813
 814static u16 get_features_12_3(u64 features)
 815{
 816        return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
 817               (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
 818               (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
 819               (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
 820}
 821
 822static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
 823{
 824        int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
 825        u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
 826        void *obj_context;
 827        void *cmd_hdr;
 828        void *vq_ctx;
 829        void *in;
 830        int err;
 831
 832        err = umems_create(ndev, mvq);
 833        if (err)
 834                return err;
 835
 836        in = kzalloc(inlen, GFP_KERNEL);
 837        if (!in) {
 838                err = -ENOMEM;
 839                goto err_alloc;
 840        }
 841
 842        cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
 843
 844        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
 845        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
 846        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
 847
 848        obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
 849        MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
 850        MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
 851        MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
 852                 get_features_12_3(ndev->mvdev.actual_features));
 853        vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
 854        MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
 855
 856        if (vq_is_tx(mvq->index))
 857                MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
 858
 859        MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
 860        MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
 861        MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
 862        MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
 863        MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
 864                 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
 865        MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
 866        MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
 867        MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
 868        MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key);
 869        MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
 870        MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
 871        MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
 872        MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
 873        MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
 874        MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
 875        MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
 876        if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
 877                MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
 878
 879        err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
 880        if (err)
 881                goto err_cmd;
 882
 883        kfree(in);
 884        mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
 885
 886        return 0;
 887
 888err_cmd:
 889        kfree(in);
 890err_alloc:
 891        umems_destroy(ndev, mvq);
 892        return err;
 893}
 894
 895static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
 896{
 897        u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
 898        u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
 899
 900        MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
 901                 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
 902        MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
 903        MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
 904        MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
 905                 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
 906        if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
 907                mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
 908                return;
 909        }
 910        umems_destroy(ndev, mvq);
 911}
 912
 913static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
 914{
 915        return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
 916}
 917
 918static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
 919{
 920        return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
 921}
 922
 923static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
 924                        int *outlen, u32 qpn, u32 rqpn)
 925{
 926        void *qpc;
 927        void *pp;
 928
 929        switch (cmd) {
 930        case MLX5_CMD_OP_2RST_QP:
 931                *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
 932                *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
 933                *in = kzalloc(*inlen, GFP_KERNEL);
 934                *out = kzalloc(*outlen, GFP_KERNEL);
 935                if (!*in || !*out)
 936                        goto outerr;
 937
 938                MLX5_SET(qp_2rst_in, *in, opcode, cmd);
 939                MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
 940                MLX5_SET(qp_2rst_in, *in, qpn, qpn);
 941                break;
 942        case MLX5_CMD_OP_RST2INIT_QP:
 943                *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
 944                *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
 945                *in = kzalloc(*inlen, GFP_KERNEL);
 946                *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
 947                if (!*in || !*out)
 948                        goto outerr;
 949
 950                MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
 951                MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
 952                MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
 953                qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
 954                MLX5_SET(qpc, qpc, remote_qpn, rqpn);
 955                MLX5_SET(qpc, qpc, rwe, 1);
 956                pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
 957                MLX5_SET(ads, pp, vhca_port_num, 1);
 958                break;
 959        case MLX5_CMD_OP_INIT2RTR_QP:
 960                *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
 961                *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
 962                *in = kzalloc(*inlen, GFP_KERNEL);
 963                *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
 964                if (!*in || !*out)
 965                        goto outerr;
 966
 967                MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
 968                MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
 969                MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
 970                qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
 971                MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
 972                MLX5_SET(qpc, qpc, log_msg_max, 30);
 973                MLX5_SET(qpc, qpc, remote_qpn, rqpn);
 974                pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
 975                MLX5_SET(ads, pp, fl, 1);
 976                break;
 977        case MLX5_CMD_OP_RTR2RTS_QP:
 978                *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
 979                *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
 980                *in = kzalloc(*inlen, GFP_KERNEL);
 981                *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
 982                if (!*in || !*out)
 983                        goto outerr;
 984
 985                MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
 986                MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
 987                MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
 988                qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
 989                pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
 990                MLX5_SET(ads, pp, ack_timeout, 14);
 991                MLX5_SET(qpc, qpc, retry_count, 7);
 992                MLX5_SET(qpc, qpc, rnr_retry, 7);
 993                break;
 994        default:
 995                goto outerr_nullify;
 996        }
 997
 998        return;
 999
1000outerr:
1001        kfree(*in);
1002        kfree(*out);
1003outerr_nullify:
1004        *in = NULL;
1005        *out = NULL;
1006}
1007
1008static void free_inout(void *in, void *out)
1009{
1010        kfree(in);
1011        kfree(out);
1012}
1013
1014/* Two QPs are used by each virtqueue. One is used by the driver and one by
1015 * firmware. The fw argument indicates whether the subjected QP is the one used
1016 * by firmware.
1017 */
1018static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1019{
1020        int outlen;
1021        int inlen;
1022        void *out;
1023        void *in;
1024        int err;
1025
1026        alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1027        if (!in || !out)
1028                return -ENOMEM;
1029
1030        err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1031        free_inout(in, out);
1032        return err;
1033}
1034
1035static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1036{
1037        int err;
1038
1039        err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1040        if (err)
1041                return err;
1042
1043        err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1044        if (err)
1045                return err;
1046
1047        err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1048        if (err)
1049                return err;
1050
1051        err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1052        if (err)
1053                return err;
1054
1055        err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1056        if (err)
1057                return err;
1058
1059        err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1060        if (err)
1061                return err;
1062
1063        return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1064}
1065
1066struct mlx5_virtq_attr {
1067        u8 state;
1068        u16 available_index;
1069        u16 used_index;
1070};
1071
1072static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1073                           struct mlx5_virtq_attr *attr)
1074{
1075        int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1076        u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1077        void *out;
1078        void *obj_context;
1079        void *cmd_hdr;
1080        int err;
1081
1082        out = kzalloc(outlen, GFP_KERNEL);
1083        if (!out)
1084                return -ENOMEM;
1085
1086        cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1087
1088        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1089        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1090        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1091        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1092        err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1093        if (err)
1094                goto err_cmd;
1095
1096        obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1097        memset(attr, 0, sizeof(*attr));
1098        attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1099        attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1100        attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1101        kfree(out);
1102        return 0;
1103
1104err_cmd:
1105        kfree(out);
1106        return err;
1107}
1108
1109static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1110{
1111        int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1112        u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1113        void *obj_context;
1114        void *cmd_hdr;
1115        void *in;
1116        int err;
1117
1118        in = kzalloc(inlen, GFP_KERNEL);
1119        if (!in)
1120                return -ENOMEM;
1121
1122        cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1123
1124        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1125        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1126        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1127        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1128
1129        obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1130        MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1131                   MLX5_VIRTQ_MODIFY_MASK_STATE);
1132        MLX5_SET(virtio_net_q_object, obj_context, state, state);
1133        err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1134        kfree(in);
1135        if (!err)
1136                mvq->fw_state = state;
1137
1138        return err;
1139}
1140
1141static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1142{
1143        u16 idx = mvq->index;
1144        int err;
1145
1146        if (!mvq->num_ent)
1147                return 0;
1148
1149        if (mvq->initialized)
1150                return 0;
1151
1152        err = cq_create(ndev, idx, mvq->num_ent);
1153        if (err)
1154                return err;
1155
1156        err = qp_create(ndev, mvq, &mvq->fwqp);
1157        if (err)
1158                goto err_fwqp;
1159
1160        err = qp_create(ndev, mvq, &mvq->vqqp);
1161        if (err)
1162                goto err_vqqp;
1163
1164        err = connect_qps(ndev, mvq);
1165        if (err)
1166                goto err_connect;
1167
1168        err = create_virtqueue(ndev, mvq);
1169        if (err)
1170                goto err_connect;
1171
1172        if (mvq->ready) {
1173                err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1174                if (err) {
1175                        mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1176                                       idx, err);
1177                        goto err_connect;
1178                }
1179        }
1180
1181        mvq->initialized = true;
1182        return 0;
1183
1184err_connect:
1185        qp_destroy(ndev, &mvq->vqqp);
1186err_vqqp:
1187        qp_destroy(ndev, &mvq->fwqp);
1188err_fwqp:
1189        cq_destroy(ndev, idx);
1190        return err;
1191}
1192
1193static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1194{
1195        struct mlx5_virtq_attr attr;
1196
1197        if (!mvq->initialized)
1198                return;
1199
1200        if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1201                return;
1202
1203        if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1204                mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1205
1206        if (query_virtqueue(ndev, mvq, &attr)) {
1207                mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1208                return;
1209        }
1210        mvq->avail_idx = attr.available_index;
1211        mvq->used_idx = attr.used_index;
1212}
1213
1214static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1215{
1216        int i;
1217
1218        for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
1219                suspend_vq(ndev, &ndev->vqs[i]);
1220}
1221
1222static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1223{
1224        if (!mvq->initialized)
1225                return;
1226
1227        suspend_vq(ndev, mvq);
1228        destroy_virtqueue(ndev, mvq);
1229        qp_destroy(ndev, &mvq->vqqp);
1230        qp_destroy(ndev, &mvq->fwqp);
1231        cq_destroy(ndev, mvq->index);
1232        mvq->initialized = false;
1233}
1234
1235static int create_rqt(struct mlx5_vdpa_net *ndev)
1236{
1237        __be32 *list;
1238        int max_rqt;
1239        void *rqtc;
1240        int inlen;
1241        void *in;
1242        int i, j;
1243        int err;
1244
1245        max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
1246                        1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1247        if (max_rqt < 1)
1248                return -EOPNOTSUPP;
1249
1250        inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
1251        in = kzalloc(inlen, GFP_KERNEL);
1252        if (!in)
1253                return -ENOMEM;
1254
1255        MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1256        rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1257
1258        MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1259        MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
1260        list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1261        for (i = 0, j = 0; j < max_rqt; j++) {
1262                if (!ndev->vqs[j].initialized)
1263                        continue;
1264
1265                if (!vq_is_tx(ndev->vqs[j].index)) {
1266                        list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
1267                        i++;
1268                }
1269        }
1270        MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
1271
1272        err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1273        kfree(in);
1274        if (err)
1275                return err;
1276
1277        return 0;
1278}
1279
1280#define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1281
1282static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1283{
1284        __be32 *list;
1285        int max_rqt;
1286        void *rqtc;
1287        int inlen;
1288        void *in;
1289        int i, j;
1290        int err;
1291
1292        max_rqt = min_t(int, ndev->cur_num_vqs / 2,
1293                        1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1294        if (max_rqt < 1)
1295                return -EOPNOTSUPP;
1296
1297        inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
1298        in = kzalloc(inlen, GFP_KERNEL);
1299        if (!in)
1300                return -ENOMEM;
1301
1302        MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1303        MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1304        rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1305        MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1306
1307        list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1308        for (i = 0, j = 0; j < num; j++) {
1309                if (!ndev->vqs[j].initialized)
1310                        continue;
1311
1312                if (!vq_is_tx(ndev->vqs[j].index)) {
1313                        list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
1314                        i++;
1315                }
1316        }
1317        MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
1318        err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1319        kfree(in);
1320        if (err)
1321                return err;
1322
1323        return 0;
1324}
1325
1326static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1327{
1328        mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1329}
1330
1331static int create_tir(struct mlx5_vdpa_net *ndev)
1332{
1333#define HASH_IP_L4PORTS                                                                            \
1334        (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1335         MLX5_HASH_FIELD_SEL_L4_DPORT)
1336        static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1337                                                   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1338                                                   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1339                                                   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1340                                                   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1341        void *rss_key;
1342        void *outer;
1343        void *tirc;
1344        void *in;
1345        int err;
1346
1347        in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1348        if (!in)
1349                return -ENOMEM;
1350
1351        MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1352        tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1353        MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1354
1355        MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1356        MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1357        rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1358        memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1359
1360        outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1361        MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1362        MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1363        MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1364
1365        MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1366        MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1367
1368        err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1369        kfree(in);
1370        return err;
1371}
1372
1373static void destroy_tir(struct mlx5_vdpa_net *ndev)
1374{
1375        mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1376}
1377
1378static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1379{
1380        struct mlx5_flow_destination dest[2] = {};
1381        struct mlx5_flow_table_attr ft_attr = {};
1382        struct mlx5_flow_act flow_act = {};
1383        struct mlx5_flow_namespace *ns;
1384        int err;
1385
1386        /* for now, one entry, match all, forward to tir */
1387        ft_attr.max_fte = 1;
1388        ft_attr.autogroup.max_num_groups = 1;
1389
1390        ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1391        if (!ns) {
1392                mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n");
1393                return -EOPNOTSUPP;
1394        }
1395
1396        ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1397        if (IS_ERR(ndev->rxft))
1398                return PTR_ERR(ndev->rxft);
1399
1400        ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1401        if (IS_ERR(ndev->rx_counter)) {
1402                err = PTR_ERR(ndev->rx_counter);
1403                goto err_fc;
1404        }
1405
1406        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT;
1407        dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1408        dest[0].tir_num = ndev->res.tirn;
1409        dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1410        dest[1].counter_id = mlx5_fc_id(ndev->rx_counter);
1411        ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2);
1412        if (IS_ERR(ndev->rx_rule)) {
1413                err = PTR_ERR(ndev->rx_rule);
1414                ndev->rx_rule = NULL;
1415                goto err_rule;
1416        }
1417
1418        return 0;
1419
1420err_rule:
1421        mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1422err_fc:
1423        mlx5_destroy_flow_table(ndev->rxft);
1424        return err;
1425}
1426
1427static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1428{
1429        if (!ndev->rx_rule)
1430                return;
1431
1432        mlx5_del_flow_rules(ndev->rx_rule);
1433        mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1434        mlx5_destroy_flow_table(ndev->rxft);
1435
1436        ndev->rx_rule = NULL;
1437}
1438
1439static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1440{
1441        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1442        struct mlx5_control_vq *cvq = &mvdev->cvq;
1443        virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1444        struct mlx5_core_dev *pfmdev;
1445        size_t read;
1446        u8 mac[ETH_ALEN];
1447
1448        pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1449        switch (cmd) {
1450        case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1451                read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1452                if (read != ETH_ALEN)
1453                        break;
1454
1455                if (!memcmp(ndev->config.mac, mac, 6)) {
1456                        status = VIRTIO_NET_OK;
1457                        break;
1458                }
1459
1460                if (!is_zero_ether_addr(ndev->config.mac)) {
1461                        if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1462                                mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1463                                               ndev->config.mac);
1464                                break;
1465                        }
1466                }
1467
1468                if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1469                        mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1470                                       mac);
1471                        break;
1472                }
1473
1474                memcpy(ndev->config.mac, mac, ETH_ALEN);
1475                status = VIRTIO_NET_OK;
1476                break;
1477
1478        default:
1479                break;
1480        }
1481
1482        return status;
1483}
1484
1485static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1486{
1487        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1488        int cur_qps = ndev->cur_num_vqs / 2;
1489        int err;
1490        int i;
1491
1492        if (cur_qps > newqps) {
1493                err = modify_rqt(ndev, 2 * newqps);
1494                if (err)
1495                        return err;
1496
1497                for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1498                        teardown_vq(ndev, &ndev->vqs[i]);
1499
1500                ndev->cur_num_vqs = 2 * newqps;
1501        } else {
1502                ndev->cur_num_vqs = 2 * newqps;
1503                for (i = cur_qps * 2; i < 2 * newqps; i++) {
1504                        err = setup_vq(ndev, &ndev->vqs[i]);
1505                        if (err)
1506                                goto clean_added;
1507                }
1508                err = modify_rqt(ndev, 2 * newqps);
1509                if (err)
1510                        goto clean_added;
1511        }
1512        return 0;
1513
1514clean_added:
1515        for (--i; i >= cur_qps; --i)
1516                teardown_vq(ndev, &ndev->vqs[i]);
1517
1518        return err;
1519}
1520
1521static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1522{
1523        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1524        virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1525        struct mlx5_control_vq *cvq = &mvdev->cvq;
1526        struct virtio_net_ctrl_mq mq;
1527        size_t read;
1528        u16 newqps;
1529
1530        switch (cmd) {
1531        case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1532                read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1533                if (read != sizeof(mq))
1534                        break;
1535
1536                newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1537                if (ndev->cur_num_vqs == 2 * newqps) {
1538                        status = VIRTIO_NET_OK;
1539                        break;
1540                }
1541
1542                if (newqps & (newqps - 1))
1543                        break;
1544
1545                if (!change_num_qps(mvdev, newqps))
1546                        status = VIRTIO_NET_OK;
1547
1548                break;
1549        default:
1550                break;
1551        }
1552
1553        return status;
1554}
1555
1556static void mlx5_cvq_kick_handler(struct work_struct *work)
1557{
1558        virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1559        struct virtio_net_ctrl_hdr ctrl;
1560        struct mlx5_ctrl_wq_ent *wqent;
1561        struct mlx5_vdpa_dev *mvdev;
1562        struct mlx5_control_vq *cvq;
1563        struct mlx5_vdpa_net *ndev;
1564        size_t read, write;
1565        int err;
1566
1567        wqent = container_of(work, struct mlx5_ctrl_wq_ent, work);
1568        mvdev = wqent->mvdev;
1569        ndev = to_mlx5_vdpa_ndev(mvdev);
1570        cvq = &mvdev->cvq;
1571        if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1572                goto out;
1573
1574        if (!cvq->ready)
1575                goto out;
1576
1577        while (true) {
1578                err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
1579                                           GFP_ATOMIC);
1580                if (err <= 0)
1581                        break;
1582
1583                read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
1584                if (read != sizeof(ctrl))
1585                        break;
1586
1587                switch (ctrl.class) {
1588                case VIRTIO_NET_CTRL_MAC:
1589                        status = handle_ctrl_mac(mvdev, ctrl.cmd);
1590                        break;
1591                case VIRTIO_NET_CTRL_MQ:
1592                        status = handle_ctrl_mq(mvdev, ctrl.cmd);
1593                        break;
1594
1595                default:
1596                        break;
1597                }
1598
1599                /* Make sure data is written before advancing index */
1600                smp_wmb();
1601
1602                write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
1603                vringh_complete_iotlb(&cvq->vring, cvq->head, write);
1604                vringh_kiov_cleanup(&cvq->riov);
1605                vringh_kiov_cleanup(&cvq->wiov);
1606
1607                if (vringh_need_notify_iotlb(&cvq->vring))
1608                        vringh_notify(&cvq->vring);
1609        }
1610out:
1611        kfree(wqent);
1612}
1613
1614static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1615{
1616        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1617        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1618        struct mlx5_vdpa_virtqueue *mvq;
1619        struct mlx5_ctrl_wq_ent *wqent;
1620
1621        if (!is_index_valid(mvdev, idx))
1622                return;
1623
1624        if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
1625                if (!mvdev->cvq.ready)
1626                        return;
1627
1628                wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
1629                if (!wqent)
1630                        return;
1631
1632                wqent->mvdev = mvdev;
1633                INIT_WORK(&wqent->work, mlx5_cvq_kick_handler);
1634                queue_work(mvdev->wq, &wqent->work);
1635                return;
1636        }
1637
1638        mvq = &ndev->vqs[idx];
1639        if (unlikely(!mvq->ready))
1640                return;
1641
1642        iowrite16(idx, ndev->mvdev.res.kick_addr);
1643}
1644
1645static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1646                                    u64 driver_area, u64 device_area)
1647{
1648        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1649        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1650        struct mlx5_vdpa_virtqueue *mvq;
1651
1652        if (!is_index_valid(mvdev, idx))
1653                return -EINVAL;
1654
1655        if (is_ctrl_vq_idx(mvdev, idx)) {
1656                mvdev->cvq.desc_addr = desc_area;
1657                mvdev->cvq.device_addr = device_area;
1658                mvdev->cvq.driver_addr = driver_area;
1659                return 0;
1660        }
1661
1662        mvq = &ndev->vqs[idx];
1663        mvq->desc_addr = desc_area;
1664        mvq->device_addr = device_area;
1665        mvq->driver_addr = driver_area;
1666        return 0;
1667}
1668
1669static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1670{
1671        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1672        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1673        struct mlx5_vdpa_virtqueue *mvq;
1674
1675        if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
1676                return;
1677
1678        mvq = &ndev->vqs[idx];
1679        mvq->num_ent = num;
1680}
1681
1682static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1683{
1684        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1685        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1686
1687        ndev->event_cbs[idx] = *cb;
1688}
1689
1690static void mlx5_cvq_notify(struct vringh *vring)
1691{
1692        struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
1693
1694        if (!cvq->event_cb.callback)
1695                return;
1696
1697        cvq->event_cb.callback(cvq->event_cb.private);
1698}
1699
1700static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
1701{
1702        struct mlx5_control_vq *cvq = &mvdev->cvq;
1703
1704        cvq->ready = ready;
1705        if (!ready)
1706                return;
1707
1708        cvq->vring.notify = mlx5_cvq_notify;
1709}
1710
1711static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
1712{
1713        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1714        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1715        struct mlx5_vdpa_virtqueue *mvq;
1716
1717        if (!mvdev->actual_features)
1718                return;
1719
1720        if (!is_index_valid(mvdev, idx))
1721                return;
1722
1723        if (is_ctrl_vq_idx(mvdev, idx)) {
1724                set_cvq_ready(mvdev, ready);
1725                return;
1726        }
1727
1728        mvq = &ndev->vqs[idx];
1729        if (!ready)
1730                suspend_vq(ndev, mvq);
1731
1732        mvq->ready = ready;
1733}
1734
1735static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
1736{
1737        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1738        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1739
1740        if (!is_index_valid(mvdev, idx))
1741                return false;
1742
1743        if (is_ctrl_vq_idx(mvdev, idx))
1744                return mvdev->cvq.ready;
1745
1746        return ndev->vqs[idx].ready;
1747}
1748
1749static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
1750                                  const struct vdpa_vq_state *state)
1751{
1752        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1753        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1754        struct mlx5_vdpa_virtqueue *mvq;
1755
1756        if (!is_index_valid(mvdev, idx))
1757                return -EINVAL;
1758
1759        if (is_ctrl_vq_idx(mvdev, idx)) {
1760                mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
1761                return 0;
1762        }
1763
1764        mvq = &ndev->vqs[idx];
1765        if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
1766                mlx5_vdpa_warn(mvdev, "can't modify available index\n");
1767                return -EINVAL;
1768        }
1769
1770        mvq->used_idx = state->split.avail_index;
1771        mvq->avail_idx = state->split.avail_index;
1772        return 0;
1773}
1774
1775static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
1776{
1777        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1778        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1779        struct mlx5_vdpa_virtqueue *mvq;
1780        struct mlx5_virtq_attr attr;
1781        int err;
1782
1783        if (!is_index_valid(mvdev, idx))
1784                return -EINVAL;
1785
1786        if (is_ctrl_vq_idx(mvdev, idx)) {
1787                state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
1788                return 0;
1789        }
1790
1791        mvq = &ndev->vqs[idx];
1792        /* If the virtq object was destroyed, use the value saved at
1793         * the last minute of suspend_vq. This caters for userspace
1794         * that cares about emulating the index after vq is stopped.
1795         */
1796        if (!mvq->initialized) {
1797                /* Firmware returns a wrong value for the available index.
1798                 * Since both values should be identical, we take the value of
1799                 * used_idx which is reported correctly.
1800                 */
1801                state->split.avail_index = mvq->used_idx;
1802                return 0;
1803        }
1804
1805        err = query_virtqueue(ndev, mvq, &attr);
1806        if (err) {
1807                mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
1808                return err;
1809        }
1810        state->split.avail_index = attr.used_index;
1811        return 0;
1812}
1813
1814static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
1815{
1816        return PAGE_SIZE;
1817}
1818
1819enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
1820        MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
1821        MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
1822        MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
1823};
1824
1825static u64 mlx_to_vritio_features(u16 dev_features)
1826{
1827        u64 result = 0;
1828
1829        if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
1830                result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
1831        if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
1832                result |= BIT_ULL(VIRTIO_NET_F_CSUM);
1833        if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
1834                result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
1835        if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
1836                result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
1837
1838        return result;
1839}
1840
1841static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
1842{
1843        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1844        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1845        u16 dev_features;
1846
1847        dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
1848        ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features);
1849        if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
1850                ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
1851        ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
1852        ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
1853        ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
1854        ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ);
1855
1856        print_features(mvdev, ndev->mvdev.mlx_features, false);
1857        return ndev->mvdev.mlx_features;
1858}
1859
1860static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
1861{
1862        if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1863                return -EOPNOTSUPP;
1864
1865        return 0;
1866}
1867
1868static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
1869{
1870        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1871        struct mlx5_control_vq *cvq = &mvdev->cvq;
1872        int err;
1873        int i;
1874
1875        for (i = 0; i < 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); i++) {
1876                err = setup_vq(ndev, &ndev->vqs[i]);
1877                if (err)
1878                        goto err_vq;
1879        }
1880
1881        if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
1882                err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
1883                                        MLX5_CVQ_MAX_ENT, false,
1884                                        (struct vring_desc *)(uintptr_t)cvq->desc_addr,
1885                                        (struct vring_avail *)(uintptr_t)cvq->driver_addr,
1886                                        (struct vring_used *)(uintptr_t)cvq->device_addr);
1887                if (err)
1888                        goto err_vq;
1889        }
1890
1891        return 0;
1892
1893err_vq:
1894        for (--i; i >= 0; i--)
1895                teardown_vq(ndev, &ndev->vqs[i]);
1896
1897        return err;
1898}
1899
1900static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
1901{
1902        struct mlx5_vdpa_virtqueue *mvq;
1903        int i;
1904
1905        for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
1906                mvq = &ndev->vqs[i];
1907                if (!mvq->initialized)
1908                        continue;
1909
1910                teardown_vq(ndev, mvq);
1911        }
1912}
1913
1914static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
1915{
1916        if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
1917                if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
1918                        /* MQ supported. CVQ index is right above the last data virtqueue's */
1919                        mvdev->max_idx = mvdev->max_vqs;
1920                } else {
1921                        /* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
1922                         * CVQ gets index 2
1923                         */
1924                        mvdev->max_idx = 2;
1925                }
1926        } else {
1927                /* Two data virtqueues only: one for rx and one for tx */
1928                mvdev->max_idx = 1;
1929        }
1930}
1931
1932static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
1933{
1934        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1935        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1936        int err;
1937
1938        print_features(mvdev, features, true);
1939
1940        err = verify_min_features(mvdev, features);
1941        if (err)
1942                return err;
1943
1944        ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
1945        ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu);
1946        ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
1947        update_cvq_info(mvdev);
1948        return err;
1949}
1950
1951static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
1952{
1953        /* not implemented */
1954        mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n");
1955}
1956
1957#define MLX5_VDPA_MAX_VQ_ENTRIES 256
1958static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
1959{
1960        return MLX5_VDPA_MAX_VQ_ENTRIES;
1961}
1962
1963static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
1964{
1965        return VIRTIO_ID_NET;
1966}
1967
1968static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
1969{
1970        return PCI_VENDOR_ID_MELLANOX;
1971}
1972
1973static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
1974{
1975        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1976        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1977
1978        print_status(mvdev, ndev->mvdev.status, false);
1979        return ndev->mvdev.status;
1980}
1981
1982static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1983{
1984        struct mlx5_vq_restore_info *ri = &mvq->ri;
1985        struct mlx5_virtq_attr attr = {};
1986        int err;
1987
1988        if (mvq->initialized) {
1989                err = query_virtqueue(ndev, mvq, &attr);
1990                if (err)
1991                        return err;
1992        }
1993
1994        ri->avail_index = attr.available_index;
1995        ri->used_index = attr.used_index;
1996        ri->ready = mvq->ready;
1997        ri->num_ent = mvq->num_ent;
1998        ri->desc_addr = mvq->desc_addr;
1999        ri->device_addr = mvq->device_addr;
2000        ri->driver_addr = mvq->driver_addr;
2001        ri->restore = true;
2002        return 0;
2003}
2004
2005static int save_channels_info(struct mlx5_vdpa_net *ndev)
2006{
2007        int i;
2008
2009        for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2010                memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2011                save_channel_info(ndev, &ndev->vqs[i]);
2012        }
2013        return 0;
2014}
2015
2016static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2017{
2018        int i;
2019
2020        for (i = 0; i < ndev->mvdev.max_vqs; i++)
2021                memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2022}
2023
2024static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2025{
2026        struct mlx5_vdpa_virtqueue *mvq;
2027        struct mlx5_vq_restore_info *ri;
2028        int i;
2029
2030        mlx5_clear_vqs(ndev);
2031        init_mvqs(ndev);
2032        for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2033                mvq = &ndev->vqs[i];
2034                ri = &mvq->ri;
2035                if (!ri->restore)
2036                        continue;
2037
2038                mvq->avail_idx = ri->avail_index;
2039                mvq->used_idx = ri->used_index;
2040                mvq->ready = ri->ready;
2041                mvq->num_ent = ri->num_ent;
2042                mvq->desc_addr = ri->desc_addr;
2043                mvq->device_addr = ri->device_addr;
2044                mvq->driver_addr = ri->driver_addr;
2045        }
2046}
2047
2048static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
2049{
2050        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2051        int err;
2052
2053        suspend_vqs(ndev);
2054        err = save_channels_info(ndev);
2055        if (err)
2056                goto err_mr;
2057
2058        teardown_driver(ndev);
2059        mlx5_vdpa_destroy_mr(mvdev);
2060        err = mlx5_vdpa_create_mr(mvdev, iotlb);
2061        if (err)
2062                goto err_mr;
2063
2064        if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2065                return 0;
2066
2067        restore_channels_info(ndev);
2068        err = setup_driver(mvdev);
2069        if (err)
2070                goto err_setup;
2071
2072        return 0;
2073
2074err_setup:
2075        mlx5_vdpa_destroy_mr(mvdev);
2076err_mr:
2077        return err;
2078}
2079
2080static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2081{
2082        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2083        int err;
2084
2085        mutex_lock(&ndev->reslock);
2086        if (ndev->setup) {
2087                mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2088                err = 0;
2089                goto out;
2090        }
2091        err = setup_virtqueues(mvdev);
2092        if (err) {
2093                mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2094                goto out;
2095        }
2096
2097        err = create_rqt(ndev);
2098        if (err) {
2099                mlx5_vdpa_warn(mvdev, "create_rqt\n");
2100                goto err_rqt;
2101        }
2102
2103        err = create_tir(ndev);
2104        if (err) {
2105                mlx5_vdpa_warn(mvdev, "create_tir\n");
2106                goto err_tir;
2107        }
2108
2109        err = add_fwd_to_tir(ndev);
2110        if (err) {
2111                mlx5_vdpa_warn(mvdev, "add_fwd_to_tir\n");
2112                goto err_fwd;
2113        }
2114        ndev->setup = true;
2115        mutex_unlock(&ndev->reslock);
2116
2117        return 0;
2118
2119err_fwd:
2120        destroy_tir(ndev);
2121err_tir:
2122        destroy_rqt(ndev);
2123err_rqt:
2124        teardown_virtqueues(ndev);
2125out:
2126        mutex_unlock(&ndev->reslock);
2127        return err;
2128}
2129
2130static void teardown_driver(struct mlx5_vdpa_net *ndev)
2131{
2132        mutex_lock(&ndev->reslock);
2133        if (!ndev->setup)
2134                goto out;
2135
2136        remove_fwd_to_tir(ndev);
2137        destroy_tir(ndev);
2138        destroy_rqt(ndev);
2139        teardown_virtqueues(ndev);
2140        ndev->setup = false;
2141out:
2142        mutex_unlock(&ndev->reslock);
2143}
2144
2145static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2146{
2147        int i;
2148
2149        for (i = 0; i < ndev->mvdev.max_vqs; i++)
2150                ndev->vqs[i].ready = false;
2151
2152        ndev->mvdev.cvq.ready = false;
2153}
2154
2155static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2156{
2157        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2158        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2159        int err;
2160
2161        print_status(mvdev, status, true);
2162
2163        if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2164                if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2165                        err = setup_driver(mvdev);
2166                        if (err) {
2167                                mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2168                                goto err_setup;
2169                        }
2170                } else {
2171                        mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2172                        return;
2173                }
2174        }
2175
2176        ndev->mvdev.status = status;
2177        return;
2178
2179err_setup:
2180        mlx5_vdpa_destroy_mr(&ndev->mvdev);
2181        ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2182}
2183
2184static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2185{
2186        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2187        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2188
2189        print_status(mvdev, 0, true);
2190        mlx5_vdpa_info(mvdev, "performing device reset\n");
2191        teardown_driver(ndev);
2192        clear_vqs_ready(ndev);
2193        mlx5_vdpa_destroy_mr(&ndev->mvdev);
2194        ndev->mvdev.status = 0;
2195        ndev->mvdev.mlx_features = 0;
2196        memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
2197        ndev->mvdev.actual_features = 0;
2198        ++mvdev->generation;
2199        if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2200                if (mlx5_vdpa_create_mr(mvdev, NULL))
2201                        mlx5_vdpa_warn(mvdev, "create MR failed\n");
2202        }
2203
2204        return 0;
2205}
2206
2207static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2208{
2209        return sizeof(struct virtio_net_config);
2210}
2211
2212static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2213                                 unsigned int len)
2214{
2215        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2216        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2217
2218        if (offset + len <= sizeof(struct virtio_net_config))
2219                memcpy(buf, (u8 *)&ndev->config + offset, len);
2220}
2221
2222static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2223                                 unsigned int len)
2224{
2225        /* not supported */
2226}
2227
2228static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2229{
2230        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2231
2232        return mvdev->generation;
2233}
2234
2235static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
2236{
2237        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2238        bool change_map;
2239        int err;
2240
2241        err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
2242        if (err) {
2243                mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
2244                return err;
2245        }
2246
2247        if (change_map)
2248                return mlx5_vdpa_change_map(mvdev, iotlb);
2249
2250        return 0;
2251}
2252
2253static void mlx5_vdpa_free(struct vdpa_device *vdev)
2254{
2255        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2256        struct mlx5_core_dev *pfmdev;
2257        struct mlx5_vdpa_net *ndev;
2258
2259        ndev = to_mlx5_vdpa_ndev(mvdev);
2260
2261        free_resources(ndev);
2262        mlx5_vdpa_destroy_mr(mvdev);
2263        if (!is_zero_ether_addr(ndev->config.mac)) {
2264                pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2265                mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
2266        }
2267        mlx5_vdpa_free_resources(&ndev->mvdev);
2268        mutex_destroy(&ndev->reslock);
2269}
2270
2271static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
2272{
2273        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2274        struct vdpa_notification_area ret = {};
2275        struct mlx5_vdpa_net *ndev;
2276        phys_addr_t addr;
2277
2278        if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2279                return ret;
2280
2281        /* If SF BAR size is smaller than PAGE_SIZE, do not use direct
2282         * notification to avoid the risk of mapping pages that contain BAR of more
2283         * than one SF
2284         */
2285        if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
2286                return ret;
2287
2288        ndev = to_mlx5_vdpa_ndev(mvdev);
2289        addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
2290        ret.addr = addr;
2291        ret.size = PAGE_SIZE;
2292        return ret;
2293}
2294
2295static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
2296{
2297        return -EOPNOTSUPP;
2298}
2299
2300static const struct vdpa_config_ops mlx5_vdpa_ops = {
2301        .set_vq_address = mlx5_vdpa_set_vq_address,
2302        .set_vq_num = mlx5_vdpa_set_vq_num,
2303        .kick_vq = mlx5_vdpa_kick_vq,
2304        .set_vq_cb = mlx5_vdpa_set_vq_cb,
2305        .set_vq_ready = mlx5_vdpa_set_vq_ready,
2306        .get_vq_ready = mlx5_vdpa_get_vq_ready,
2307        .set_vq_state = mlx5_vdpa_set_vq_state,
2308        .get_vq_state = mlx5_vdpa_get_vq_state,
2309        .get_vq_notification = mlx5_get_vq_notification,
2310        .get_vq_irq = mlx5_get_vq_irq,
2311        .get_vq_align = mlx5_vdpa_get_vq_align,
2312        .get_features = mlx5_vdpa_get_features,
2313        .set_features = mlx5_vdpa_set_features,
2314        .set_config_cb = mlx5_vdpa_set_config_cb,
2315        .get_vq_num_max = mlx5_vdpa_get_vq_num_max,
2316        .get_device_id = mlx5_vdpa_get_device_id,
2317        .get_vendor_id = mlx5_vdpa_get_vendor_id,
2318        .get_status = mlx5_vdpa_get_status,
2319        .set_status = mlx5_vdpa_set_status,
2320        .reset = mlx5_vdpa_reset,
2321        .get_config_size = mlx5_vdpa_get_config_size,
2322        .get_config = mlx5_vdpa_get_config,
2323        .set_config = mlx5_vdpa_set_config,
2324        .get_generation = mlx5_vdpa_get_generation,
2325        .set_map = mlx5_vdpa_set_map,
2326        .free = mlx5_vdpa_free,
2327};
2328
2329static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
2330{
2331        u16 hw_mtu;
2332        int err;
2333
2334        err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
2335        if (err)
2336                return err;
2337
2338        *mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
2339        return 0;
2340}
2341
2342static int alloc_resources(struct mlx5_vdpa_net *ndev)
2343{
2344        struct mlx5_vdpa_net_resources *res = &ndev->res;
2345        int err;
2346
2347        if (res->valid) {
2348                mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
2349                return -EEXIST;
2350        }
2351
2352        err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
2353        if (err)
2354                return err;
2355
2356        err = create_tis(ndev);
2357        if (err)
2358                goto err_tis;
2359
2360        res->valid = true;
2361
2362        return 0;
2363
2364err_tis:
2365        mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2366        return err;
2367}
2368
2369static void free_resources(struct mlx5_vdpa_net *ndev)
2370{
2371        struct mlx5_vdpa_net_resources *res = &ndev->res;
2372
2373        if (!res->valid)
2374                return;
2375
2376        destroy_tis(ndev);
2377        mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2378        res->valid = false;
2379}
2380
2381static void init_mvqs(struct mlx5_vdpa_net *ndev)
2382{
2383        struct mlx5_vdpa_virtqueue *mvq;
2384        int i;
2385
2386        for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) {
2387                mvq = &ndev->vqs[i];
2388                memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2389                mvq->index = i;
2390                mvq->ndev = ndev;
2391                mvq->fwqp.fw = true;
2392        }
2393        for (; i < ndev->mvdev.max_vqs; i++) {
2394                mvq = &ndev->vqs[i];
2395                memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2396                mvq->index = i;
2397                mvq->ndev = ndev;
2398        }
2399}
2400
2401struct mlx5_vdpa_mgmtdev {
2402        struct vdpa_mgmt_dev mgtdev;
2403        struct mlx5_adev *madev;
2404        struct mlx5_vdpa_net *ndev;
2405};
2406
2407static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
2408{
2409        struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2410        struct virtio_net_config *config;
2411        struct mlx5_core_dev *pfmdev;
2412        struct mlx5_vdpa_dev *mvdev;
2413        struct mlx5_vdpa_net *ndev;
2414        struct mlx5_core_dev *mdev;
2415        u32 max_vqs;
2416        int err;
2417
2418        if (mgtdev->ndev)
2419                return -ENOSPC;
2420
2421        mdev = mgtdev->madev->mdev;
2422        if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
2423            MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
2424                dev_warn(mdev->device, "missing support for split virtqueues\n");
2425                return -EOPNOTSUPP;
2426        }
2427
2428        /* we save one virtqueue for control virtqueue should we require it */
2429        max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
2430        max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
2431
2432        ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
2433                                 name, false);
2434        if (IS_ERR(ndev))
2435                return PTR_ERR(ndev);
2436
2437        ndev->mvdev.max_vqs = max_vqs;
2438        mvdev = &ndev->mvdev;
2439        mvdev->mdev = mdev;
2440        init_mvqs(ndev);
2441        mutex_init(&ndev->reslock);
2442        config = &ndev->config;
2443        err = query_mtu(mdev, &ndev->mtu);
2444        if (err)
2445                goto err_mtu;
2446
2447        err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
2448        if (err)
2449                goto err_mtu;
2450
2451        if (!is_zero_ether_addr(config->mac)) {
2452                pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
2453                err = mlx5_mpfs_add_mac(pfmdev, config->mac);
2454                if (err)
2455                        goto err_mtu;
2456
2457                ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2458        }
2459
2460        config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs));
2461        mvdev->vdev.dma_dev = &mdev->pdev->dev;
2462        err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
2463        if (err)
2464                goto err_mpfs;
2465
2466        if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2467                err = mlx5_vdpa_create_mr(mvdev, NULL);
2468                if (err)
2469                        goto err_res;
2470        }
2471
2472        err = alloc_resources(ndev);
2473        if (err)
2474                goto err_mr;
2475
2476        mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq");
2477        if (!mvdev->wq) {
2478                err = -ENOMEM;
2479                goto err_res2;
2480        }
2481
2482        ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs);
2483        mvdev->vdev.mdev = &mgtdev->mgtdev;
2484        err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1);
2485        if (err)
2486                goto err_reg;
2487
2488        mgtdev->ndev = ndev;
2489        return 0;
2490
2491err_reg:
2492        destroy_workqueue(mvdev->wq);
2493err_res2:
2494        free_resources(ndev);
2495err_mr:
2496        mlx5_vdpa_destroy_mr(mvdev);
2497err_res:
2498        mlx5_vdpa_free_resources(&ndev->mvdev);
2499err_mpfs:
2500        if (!is_zero_ether_addr(config->mac))
2501                mlx5_mpfs_del_mac(pfmdev, config->mac);
2502err_mtu:
2503        mutex_destroy(&ndev->reslock);
2504        put_device(&mvdev->vdev.dev);
2505        return err;
2506}
2507
2508static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
2509{
2510        struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
2511        struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
2512
2513        destroy_workqueue(mvdev->wq);
2514        _vdpa_unregister_device(dev);
2515        mgtdev->ndev = NULL;
2516}
2517
2518static const struct vdpa_mgmtdev_ops mdev_ops = {
2519        .dev_add = mlx5_vdpa_dev_add,
2520        .dev_del = mlx5_vdpa_dev_del,
2521};
2522
2523static struct virtio_device_id id_table[] = {
2524        { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2525        { 0 },
2526};
2527
2528static int mlx5v_probe(struct auxiliary_device *adev,
2529                       const struct auxiliary_device_id *id)
2530
2531{
2532        struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
2533        struct mlx5_core_dev *mdev = madev->mdev;
2534        struct mlx5_vdpa_mgmtdev *mgtdev;
2535        int err;
2536
2537        mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
2538        if (!mgtdev)
2539                return -ENOMEM;
2540
2541        mgtdev->mgtdev.ops = &mdev_ops;
2542        mgtdev->mgtdev.device = mdev->device;
2543        mgtdev->mgtdev.id_table = id_table;
2544        mgtdev->madev = madev;
2545
2546        err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
2547        if (err)
2548                goto reg_err;
2549
2550        dev_set_drvdata(&adev->dev, mgtdev);
2551
2552        return 0;
2553
2554reg_err:
2555        kfree(mgtdev);
2556        return err;
2557}
2558
2559static void mlx5v_remove(struct auxiliary_device *adev)
2560{
2561        struct mlx5_vdpa_mgmtdev *mgtdev;
2562
2563        mgtdev = dev_get_drvdata(&adev->dev);
2564        vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
2565        kfree(mgtdev);
2566}
2567
2568static const struct auxiliary_device_id mlx5v_id_table[] = {
2569        { .name = MLX5_ADEV_NAME ".vnet", },
2570        {},
2571};
2572
2573MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
2574
2575static struct auxiliary_driver mlx5v_driver = {
2576        .name = "vnet",
2577        .probe = mlx5v_probe,
2578        .remove = mlx5v_remove,
2579        .id_table = mlx5v_id_table,
2580};
2581
2582module_auxiliary_driver(mlx5v_driver);
2583