linux/drivers/vdpa/mlx5/net/mlx5_vnet.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/* Copyright (c) 2020 Mellanox Technologies Ltd. */
   3
   4#include <linux/vdpa.h>
   5#include <uapi/linux/virtio_ids.h>
   6#include <linux/virtio_config.h>
   7#include <linux/mlx5/qp.h>
   8#include <linux/mlx5/device.h>
   9#include <linux/mlx5/vport.h>
  10#include <linux/mlx5/fs.h>
  11#include <linux/mlx5/device.h>
  12#include "mlx5_vnet.h"
  13#include "mlx5_vdpa_ifc.h"
  14#include "mlx5_vdpa.h"
  15
  16#define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev)
  17
  18#define VALID_FEATURES_MASK                                                                        \
  19        (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
  20         BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
  21         BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
  22         BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
  23         BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
  24         BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
  25         BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
  26         BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
  27         BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
  28         BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
  29         BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
  30         BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
  31         BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
  32
  33#define VALID_STATUS_MASK                                                                          \
  34        (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
  35         VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
  36
  37struct mlx5_vdpa_net_resources {
  38        u32 tisn;
  39        u32 tdn;
  40        u32 tirn;
  41        u32 rqtn;
  42        bool valid;
  43};
  44
  45struct mlx5_vdpa_cq_buf {
  46        struct mlx5_frag_buf_ctrl fbc;
  47        struct mlx5_frag_buf frag_buf;
  48        int cqe_size;
  49        int nent;
  50};
  51
  52struct mlx5_vdpa_cq {
  53        struct mlx5_core_cq mcq;
  54        struct mlx5_vdpa_cq_buf buf;
  55        struct mlx5_db db;
  56        int cqe;
  57};
  58
  59struct mlx5_vdpa_umem {
  60        struct mlx5_frag_buf_ctrl fbc;
  61        struct mlx5_frag_buf frag_buf;
  62        int size;
  63        u32 id;
  64};
  65
  66struct mlx5_vdpa_qp {
  67        struct mlx5_core_qp mqp;
  68        struct mlx5_frag_buf frag_buf;
  69        struct mlx5_db db;
  70        u16 head;
  71        bool fw;
  72};
  73
  74struct mlx5_vq_restore_info {
  75        u32 num_ent;
  76        u64 desc_addr;
  77        u64 device_addr;
  78        u64 driver_addr;
  79        u16 avail_index;
  80        bool ready;
  81        struct vdpa_callback cb;
  82        bool restore;
  83};
  84
  85struct mlx5_vdpa_virtqueue {
  86        bool ready;
  87        u64 desc_addr;
  88        u64 device_addr;
  89        u64 driver_addr;
  90        u32 num_ent;
  91        struct vdpa_callback event_cb;
  92
  93        /* Resources for implementing the notification channel from the device
  94         * to the driver. fwqp is the firmware end of an RC connection; the
  95         * other end is vqqp used by the driver. cq is is where completions are
  96         * reported.
  97         */
  98        struct mlx5_vdpa_cq cq;
  99        struct mlx5_vdpa_qp fwqp;
 100        struct mlx5_vdpa_qp vqqp;
 101
 102        /* umem resources are required for the virtqueue operation. They're use
 103         * is internal and they must be provided by the driver.
 104         */
 105        struct mlx5_vdpa_umem umem1;
 106        struct mlx5_vdpa_umem umem2;
 107        struct mlx5_vdpa_umem umem3;
 108
 109        bool initialized;
 110        int index;
 111        u32 virtq_id;
 112        struct mlx5_vdpa_net *ndev;
 113        u16 avail_idx;
 114        int fw_state;
 115
 116        /* keep last in the struct */
 117        struct mlx5_vq_restore_info ri;
 118};
 119
 120/* We will remove this limitation once mlx5_vdpa_alloc_resources()
 121 * provides for driver space allocation
 122 */
 123#define MLX5_MAX_SUPPORTED_VQS 16
 124
 125struct mlx5_vdpa_net {
 126        struct mlx5_vdpa_dev mvdev;
 127        struct mlx5_vdpa_net_resources res;
 128        struct virtio_net_config config;
 129        struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
 130
 131        /* Serialize vq resources creation and destruction. This is required
 132         * since memory map might change and we need to destroy and create
 133         * resources while driver in operational.
 134         */
 135        struct mutex reslock;
 136        struct mlx5_flow_table *rxft;
 137        struct mlx5_fc *rx_counter;
 138        struct mlx5_flow_handle *rx_rule;
 139        bool setup;
 140        u16 mtu;
 141};
 142
 143static void free_resources(struct mlx5_vdpa_net *ndev);
 144static void init_mvqs(struct mlx5_vdpa_net *ndev);
 145static int setup_driver(struct mlx5_vdpa_net *ndev);
 146static void teardown_driver(struct mlx5_vdpa_net *ndev);
 147
 148static bool mlx5_vdpa_debug;
 149
 150#define MLX5_LOG_VIO_FLAG(_feature)                                                                \
 151        do {                                                                                       \
 152                if (features & BIT_ULL(_feature))                                                  \
 153                        mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
 154        } while (0)
 155
 156#define MLX5_LOG_VIO_STAT(_status)                                                                 \
 157        do {                                                                                       \
 158                if (status & (_status))                                                            \
 159                        mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
 160        } while (0)
 161
 162static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
 163{
 164        if (status & ~VALID_STATUS_MASK)
 165                mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
 166                               status & ~VALID_STATUS_MASK);
 167
 168        if (!mlx5_vdpa_debug)
 169                return;
 170
 171        mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
 172        if (set && !status) {
 173                mlx5_vdpa_info(mvdev, "driver resets the device\n");
 174                return;
 175        }
 176
 177        MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
 178        MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
 179        MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
 180        MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
 181        MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
 182        MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
 183}
 184
 185static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
 186{
 187        if (features & ~VALID_FEATURES_MASK)
 188                mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
 189                               features & ~VALID_FEATURES_MASK);
 190
 191        if (!mlx5_vdpa_debug)
 192                return;
 193
 194        mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
 195        if (!features)
 196                mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
 197
 198        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
 199        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
 200        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
 201        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
 202        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
 203        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
 204        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
 205        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
 206        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
 207        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
 208        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
 209        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
 210        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
 211        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
 212        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
 213        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
 214        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
 215        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
 216        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
 217        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
 218        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
 219        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
 220        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
 221        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
 222        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
 223        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
 224        MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
 225        MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
 226        MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
 227        MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
 228        MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
 229        MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
 230        MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
 231        MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
 232}
 233
 234static int create_tis(struct mlx5_vdpa_net *ndev)
 235{
 236        struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
 237        u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
 238        void *tisc;
 239        int err;
 240
 241        tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 242        MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
 243        err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
 244        if (err)
 245                mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
 246
 247        return err;
 248}
 249
 250static void destroy_tis(struct mlx5_vdpa_net *ndev)
 251{
 252        mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
 253}
 254
 255#define MLX5_VDPA_CQE_SIZE 64
 256#define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
 257
 258static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
 259{
 260        struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
 261        u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
 262        u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
 263        int err;
 264
 265        err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
 266                                       ndev->mvdev.mdev->priv.numa_node);
 267        if (err)
 268                return err;
 269
 270        mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
 271
 272        buf->cqe_size = MLX5_VDPA_CQE_SIZE;
 273        buf->nent = nent;
 274
 275        return 0;
 276}
 277
 278static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
 279{
 280        struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
 281
 282        return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
 283                                        ndev->mvdev.mdev->priv.numa_node);
 284}
 285
 286static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
 287{
 288        mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
 289}
 290
 291static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
 292{
 293        return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
 294}
 295
 296static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
 297{
 298        struct mlx5_cqe64 *cqe64;
 299        void *cqe;
 300        int i;
 301
 302        for (i = 0; i < buf->nent; i++) {
 303                cqe = get_cqe(vcq, i);
 304                cqe64 = cqe;
 305                cqe64->op_own = MLX5_CQE_INVALID << 4;
 306        }
 307}
 308
 309static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
 310{
 311        struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
 312
 313        if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
 314            !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
 315                return cqe64;
 316
 317        return NULL;
 318}
 319
 320static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
 321{
 322        vqp->head += n;
 323        vqp->db.db[0] = cpu_to_be32(vqp->head);
 324}
 325
 326static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
 327                       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
 328{
 329        struct mlx5_vdpa_qp *vqp;
 330        __be64 *pas;
 331        void *qpc;
 332
 333        vqp = fw ? &mvq->fwqp : &mvq->vqqp;
 334        MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
 335        qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 336        if (vqp->fw) {
 337                /* Firmware QP is allocated by the driver for the firmware's
 338                 * use so we can skip part of the params as they will be chosen by firmware
 339                 */
 340                qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 341                MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
 342                MLX5_SET(qpc, qpc, no_sq, 1);
 343                return;
 344        }
 345
 346        MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
 347        MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
 348        MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
 349        MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
 350        MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
 351        MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 352        MLX5_SET(qpc, qpc, no_sq, 1);
 353        MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
 354        MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
 355        MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
 356        pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
 357        mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
 358}
 359
 360static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
 361{
 362        return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
 363                                        num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
 364                                        ndev->mvdev.mdev->priv.numa_node);
 365}
 366
 367static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
 368{
 369        mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
 370}
 371
 372static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
 373                     struct mlx5_vdpa_qp *vqp)
 374{
 375        struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
 376        int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
 377        u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
 378        void *qpc;
 379        void *in;
 380        int err;
 381
 382        if (!vqp->fw) {
 383                vqp = &mvq->vqqp;
 384                err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
 385                if (err)
 386                        return err;
 387
 388                err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
 389                if (err)
 390                        goto err_db;
 391                inlen += vqp->frag_buf.npages * sizeof(__be64);
 392        }
 393
 394        in = kzalloc(inlen, GFP_KERNEL);
 395        if (!in) {
 396                err = -ENOMEM;
 397                goto err_kzalloc;
 398        }
 399
 400        qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
 401        qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 402        MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
 403        MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
 404        MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
 405        MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
 406        if (!vqp->fw)
 407                MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
 408        MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
 409        err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
 410        kfree(in);
 411        if (err)
 412                goto err_kzalloc;
 413
 414        vqp->mqp.uid = ndev->mvdev.res.uid;
 415        vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
 416
 417        if (!vqp->fw)
 418                rx_post(vqp, mvq->num_ent);
 419
 420        return 0;
 421
 422err_kzalloc:
 423        if (!vqp->fw)
 424                mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
 425err_db:
 426        if (!vqp->fw)
 427                rq_buf_free(ndev, vqp);
 428
 429        return err;
 430}
 431
 432static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
 433{
 434        u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
 435
 436        MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
 437        MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
 438        MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
 439        if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
 440                mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
 441        if (!vqp->fw) {
 442                mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
 443                rq_buf_free(ndev, vqp);
 444        }
 445}
 446
 447static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
 448{
 449        return get_sw_cqe(cq, cq->mcq.cons_index);
 450}
 451
 452static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
 453{
 454        struct mlx5_cqe64 *cqe64;
 455
 456        cqe64 = next_cqe_sw(vcq);
 457        if (!cqe64)
 458                return -EAGAIN;
 459
 460        vcq->mcq.cons_index++;
 461        return 0;
 462}
 463
 464static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
 465{
 466        mlx5_cq_set_ci(&mvq->cq.mcq);
 467        rx_post(&mvq->vqqp, num);
 468        if (mvq->event_cb.callback)
 469                mvq->event_cb.callback(mvq->event_cb.private);
 470}
 471
 472static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
 473{
 474        struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
 475        struct mlx5_vdpa_net *ndev = mvq->ndev;
 476        void __iomem *uar_page = ndev->mvdev.res.uar->map;
 477        int num = 0;
 478
 479        while (!mlx5_vdpa_poll_one(&mvq->cq)) {
 480                num++;
 481                if (num > mvq->num_ent / 2) {
 482                        /* If completions keep coming while we poll, we want to
 483                         * let the hardware know that we consumed them by
 484                         * updating the doorbell record.  We also let vdpa core
 485                         * know about this so it passes it on the virtio driver
 486                         * on the guest.
 487                         */
 488                        mlx5_vdpa_handle_completions(mvq, num);
 489                        num = 0;
 490                }
 491        }
 492
 493        if (num)
 494                mlx5_vdpa_handle_completions(mvq, num);
 495
 496        mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
 497}
 498
 499static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
 500{
 501        struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
 502        struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
 503        void __iomem *uar_page = ndev->mvdev.res.uar->map;
 504        u32 out[MLX5_ST_SZ_DW(create_cq_out)];
 505        struct mlx5_vdpa_cq *vcq = &mvq->cq;
 506        unsigned int irqn;
 507        __be64 *pas;
 508        int inlen;
 509        void *cqc;
 510        void *in;
 511        int err;
 512        int eqn;
 513
 514        err = mlx5_db_alloc(mdev, &vcq->db);
 515        if (err)
 516                return err;
 517
 518        vcq->mcq.set_ci_db = vcq->db.db;
 519        vcq->mcq.arm_db = vcq->db.db + 1;
 520        vcq->mcq.cqe_sz = 64;
 521
 522        err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
 523        if (err)
 524                goto err_db;
 525
 526        cq_frag_buf_init(vcq, &vcq->buf);
 527
 528        inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 529                MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
 530        in = kzalloc(inlen, GFP_KERNEL);
 531        if (!in) {
 532                err = -ENOMEM;
 533                goto err_vzalloc;
 534        }
 535
 536        MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
 537        pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
 538        mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
 539
 540        cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
 541        MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 542
 543        /* Use vector 0 by default. Consider adding code to choose least used
 544         * vector.
 545         */
 546        err = mlx5_vector2eqn(mdev, 0, &eqn, &irqn);
 547        if (err)
 548                goto err_vec;
 549
 550        cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
 551        MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
 552        MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
 553        MLX5_SET(cqc, cqc, c_eqn, eqn);
 554        MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
 555
 556        err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
 557        if (err)
 558                goto err_vec;
 559
 560        vcq->mcq.comp = mlx5_vdpa_cq_comp;
 561        vcq->cqe = num_ent;
 562        vcq->mcq.set_ci_db = vcq->db.db;
 563        vcq->mcq.arm_db = vcq->db.db + 1;
 564        mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
 565        kfree(in);
 566        return 0;
 567
 568err_vec:
 569        kfree(in);
 570err_vzalloc:
 571        cq_frag_buf_free(ndev, &vcq->buf);
 572err_db:
 573        mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
 574        return err;
 575}
 576
 577static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
 578{
 579        struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
 580        struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
 581        struct mlx5_vdpa_cq *vcq = &mvq->cq;
 582
 583        if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
 584                mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
 585                return;
 586        }
 587        cq_frag_buf_free(ndev, &vcq->buf);
 588        mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
 589}
 590
 591static int umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
 592                     struct mlx5_vdpa_umem **umemp)
 593{
 594        struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
 595        int p_a;
 596        int p_b;
 597
 598        switch (num) {
 599        case 1:
 600                p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
 601                p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
 602                *umemp = &mvq->umem1;
 603                break;
 604        case 2:
 605                p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
 606                p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
 607                *umemp = &mvq->umem2;
 608                break;
 609        case 3:
 610                p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
 611                p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
 612                *umemp = &mvq->umem3;
 613                break;
 614        }
 615        return p_a * mvq->num_ent + p_b;
 616}
 617
 618static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
 619{
 620        mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
 621}
 622
 623static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
 624{
 625        int inlen;
 626        u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
 627        void *um;
 628        void *in;
 629        int err;
 630        __be64 *pas;
 631        int size;
 632        struct mlx5_vdpa_umem *umem;
 633
 634        size = umem_size(ndev, mvq, num, &umem);
 635        if (size < 0)
 636                return size;
 637
 638        umem->size = size;
 639        err = umem_frag_buf_alloc(ndev, umem, size);
 640        if (err)
 641                return err;
 642
 643        inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
 644
 645        in = kzalloc(inlen, GFP_KERNEL);
 646        if (!in) {
 647                err = -ENOMEM;
 648                goto err_in;
 649        }
 650
 651        MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
 652        MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
 653        um = MLX5_ADDR_OF(create_umem_in, in, umem);
 654        MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 655        MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
 656
 657        pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
 658        mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
 659
 660        err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
 661        if (err) {
 662                mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
 663                goto err_cmd;
 664        }
 665
 666        kfree(in);
 667        umem->id = MLX5_GET(create_umem_out, out, umem_id);
 668
 669        return 0;
 670
 671err_cmd:
 672        kfree(in);
 673err_in:
 674        umem_frag_buf_free(ndev, umem);
 675        return err;
 676}
 677
 678static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
 679{
 680        u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
 681        u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
 682        struct mlx5_vdpa_umem *umem;
 683
 684        switch (num) {
 685        case 1:
 686                umem = &mvq->umem1;
 687                break;
 688        case 2:
 689                umem = &mvq->umem2;
 690                break;
 691        case 3:
 692                umem = &mvq->umem3;
 693                break;
 694        }
 695
 696        MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
 697        MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
 698        if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
 699                return;
 700
 701        umem_frag_buf_free(ndev, umem);
 702}
 703
 704static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
 705{
 706        int num;
 707        int err;
 708
 709        for (num = 1; num <= 3; num++) {
 710                err = create_umem(ndev, mvq, num);
 711                if (err)
 712                        goto err_umem;
 713        }
 714        return 0;
 715
 716err_umem:
 717        for (num--; num > 0; num--)
 718                umem_destroy(ndev, mvq, num);
 719
 720        return err;
 721}
 722
 723static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
 724{
 725        int num;
 726
 727        for (num = 3; num > 0; num--)
 728                umem_destroy(ndev, mvq, num);
 729}
 730
 731static int get_queue_type(struct mlx5_vdpa_net *ndev)
 732{
 733        u32 type_mask;
 734
 735        type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
 736
 737        /* prefer split queue */
 738        if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)
 739                return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
 740
 741        WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT));
 742
 743        return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
 744}
 745
 746static bool vq_is_tx(u16 idx)
 747{
 748        return idx % 2;
 749}
 750
 751static u16 get_features_12_3(u64 features)
 752{
 753        return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
 754               (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
 755               (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
 756               (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
 757}
 758
 759static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
 760{
 761        int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
 762        u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
 763        void *obj_context;
 764        void *cmd_hdr;
 765        void *vq_ctx;
 766        void *in;
 767        int err;
 768
 769        err = umems_create(ndev, mvq);
 770        if (err)
 771                return err;
 772
 773        in = kzalloc(inlen, GFP_KERNEL);
 774        if (!in) {
 775                err = -ENOMEM;
 776                goto err_alloc;
 777        }
 778
 779        cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
 780
 781        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
 782        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
 783        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
 784
 785        obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
 786        MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
 787        MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
 788                 get_features_12_3(ndev->mvdev.actual_features));
 789        vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
 790        MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
 791
 792        if (vq_is_tx(mvq->index))
 793                MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
 794
 795        MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
 796        MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
 797        MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
 798        MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
 799        MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
 800                 !!(ndev->mvdev.actual_features & VIRTIO_F_VERSION_1));
 801        MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
 802        MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
 803        MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
 804        MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key);
 805        MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
 806        MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
 807        MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
 808        MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem1.size);
 809        MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
 810        MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem1.size);
 811        MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
 812        if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
 813                MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
 814
 815        err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
 816        if (err)
 817                goto err_cmd;
 818
 819        kfree(in);
 820        mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
 821
 822        return 0;
 823
 824err_cmd:
 825        kfree(in);
 826err_alloc:
 827        umems_destroy(ndev, mvq);
 828        return err;
 829}
 830
 831static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
 832{
 833        u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
 834        u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
 835
 836        MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
 837                 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
 838        MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
 839        MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
 840        MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
 841                 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
 842        if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
 843                mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
 844                return;
 845        }
 846        umems_destroy(ndev, mvq);
 847}
 848
 849static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
 850{
 851        return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
 852}
 853
 854static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
 855{
 856        return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
 857}
 858
 859static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
 860                        int *outlen, u32 qpn, u32 rqpn)
 861{
 862        void *qpc;
 863        void *pp;
 864
 865        switch (cmd) {
 866        case MLX5_CMD_OP_2RST_QP:
 867                *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
 868                *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
 869                *in = kzalloc(*inlen, GFP_KERNEL);
 870                *out = kzalloc(*outlen, GFP_KERNEL);
 871                if (!*in || !*out)
 872                        goto outerr;
 873
 874                MLX5_SET(qp_2rst_in, *in, opcode, cmd);
 875                MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
 876                MLX5_SET(qp_2rst_in, *in, qpn, qpn);
 877                break;
 878        case MLX5_CMD_OP_RST2INIT_QP:
 879                *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
 880                *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
 881                *in = kzalloc(*inlen, GFP_KERNEL);
 882                *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
 883                if (!*in || !*out)
 884                        goto outerr;
 885
 886                MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
 887                MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
 888                MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
 889                qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
 890                MLX5_SET(qpc, qpc, remote_qpn, rqpn);
 891                MLX5_SET(qpc, qpc, rwe, 1);
 892                pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
 893                MLX5_SET(ads, pp, vhca_port_num, 1);
 894                break;
 895        case MLX5_CMD_OP_INIT2RTR_QP:
 896                *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
 897                *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
 898                *in = kzalloc(*inlen, GFP_KERNEL);
 899                *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
 900                if (!*in || !*out)
 901                        goto outerr;
 902
 903                MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
 904                MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
 905                MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
 906                qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
 907                MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
 908                MLX5_SET(qpc, qpc, log_msg_max, 30);
 909                MLX5_SET(qpc, qpc, remote_qpn, rqpn);
 910                pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
 911                MLX5_SET(ads, pp, fl, 1);
 912                break;
 913        case MLX5_CMD_OP_RTR2RTS_QP:
 914                *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
 915                *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
 916                *in = kzalloc(*inlen, GFP_KERNEL);
 917                *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
 918                if (!*in || !*out)
 919                        goto outerr;
 920
 921                MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
 922                MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
 923                MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
 924                qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
 925                pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
 926                MLX5_SET(ads, pp, ack_timeout, 14);
 927                MLX5_SET(qpc, qpc, retry_count, 7);
 928                MLX5_SET(qpc, qpc, rnr_retry, 7);
 929                break;
 930        default:
 931                goto outerr_nullify;
 932        }
 933
 934        return;
 935
 936outerr:
 937        kfree(*in);
 938        kfree(*out);
 939outerr_nullify:
 940        *in = NULL;
 941        *out = NULL;
 942}
 943
 944static void free_inout(void *in, void *out)
 945{
 946        kfree(in);
 947        kfree(out);
 948}
 949
 950/* Two QPs are used by each virtqueue. One is used by the driver and one by
 951 * firmware. The fw argument indicates whether the subjected QP is the one used
 952 * by firmware.
 953 */
 954static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
 955{
 956        int outlen;
 957        int inlen;
 958        void *out;
 959        void *in;
 960        int err;
 961
 962        alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
 963        if (!in || !out)
 964                return -ENOMEM;
 965
 966        err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
 967        free_inout(in, out);
 968        return err;
 969}
 970
 971static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
 972{
 973        int err;
 974
 975        err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
 976        if (err)
 977                return err;
 978
 979        err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
 980        if (err)
 981                return err;
 982
 983        err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
 984        if (err)
 985                return err;
 986
 987        err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
 988        if (err)
 989                return err;
 990
 991        err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
 992        if (err)
 993                return err;
 994
 995        err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
 996        if (err)
 997                return err;
 998
 999        return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1000}
1001
1002struct mlx5_virtq_attr {
1003        u8 state;
1004        u16 available_index;
1005};
1006
1007static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1008                           struct mlx5_virtq_attr *attr)
1009{
1010        int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1011        u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1012        void *out;
1013        void *obj_context;
1014        void *cmd_hdr;
1015        int err;
1016
1017        out = kzalloc(outlen, GFP_KERNEL);
1018        if (!out)
1019                return -ENOMEM;
1020
1021        cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1022
1023        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1024        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1025        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1026        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1027        err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1028        if (err)
1029                goto err_cmd;
1030
1031        obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1032        memset(attr, 0, sizeof(*attr));
1033        attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1034        attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1035        kfree(out);
1036        return 0;
1037
1038err_cmd:
1039        kfree(out);
1040        return err;
1041}
1042
1043static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1044{
1045        int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1046        u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1047        void *obj_context;
1048        void *cmd_hdr;
1049        void *in;
1050        int err;
1051
1052        in = kzalloc(inlen, GFP_KERNEL);
1053        if (!in)
1054                return -ENOMEM;
1055
1056        cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1057
1058        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1059        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1060        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1061        MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1062
1063        obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1064        MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1065                   MLX5_VIRTQ_MODIFY_MASK_STATE);
1066        MLX5_SET(virtio_net_q_object, obj_context, state, state);
1067        err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1068        kfree(in);
1069        if (!err)
1070                mvq->fw_state = state;
1071
1072        return err;
1073}
1074
1075static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1076{
1077        u16 idx = mvq->index;
1078        int err;
1079
1080        if (!mvq->num_ent)
1081                return 0;
1082
1083        if (mvq->initialized) {
1084                mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n");
1085                return -EINVAL;
1086        }
1087
1088        err = cq_create(ndev, idx, mvq->num_ent);
1089        if (err)
1090                return err;
1091
1092        err = qp_create(ndev, mvq, &mvq->fwqp);
1093        if (err)
1094                goto err_fwqp;
1095
1096        err = qp_create(ndev, mvq, &mvq->vqqp);
1097        if (err)
1098                goto err_vqqp;
1099
1100        err = connect_qps(ndev, mvq);
1101        if (err)
1102                goto err_connect;
1103
1104        err = create_virtqueue(ndev, mvq);
1105        if (err)
1106                goto err_connect;
1107
1108        if (mvq->ready) {
1109                err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1110                if (err) {
1111                        mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1112                                       idx, err);
1113                        goto err_connect;
1114                }
1115        }
1116
1117        mvq->initialized = true;
1118        return 0;
1119
1120err_connect:
1121        qp_destroy(ndev, &mvq->vqqp);
1122err_vqqp:
1123        qp_destroy(ndev, &mvq->fwqp);
1124err_fwqp:
1125        cq_destroy(ndev, idx);
1126        return err;
1127}
1128
1129static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1130{
1131        struct mlx5_virtq_attr attr;
1132
1133        if (!mvq->initialized)
1134                return;
1135
1136        if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1137                return;
1138
1139        if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1140                mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1141
1142        if (query_virtqueue(ndev, mvq, &attr)) {
1143                mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1144                return;
1145        }
1146        mvq->avail_idx = attr.available_index;
1147}
1148
1149static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1150{
1151        int i;
1152
1153        for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++)
1154                suspend_vq(ndev, &ndev->vqs[i]);
1155}
1156
1157static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1158{
1159        if (!mvq->initialized)
1160                return;
1161
1162        suspend_vq(ndev, mvq);
1163        destroy_virtqueue(ndev, mvq);
1164        qp_destroy(ndev, &mvq->vqqp);
1165        qp_destroy(ndev, &mvq->fwqp);
1166        cq_destroy(ndev, mvq->index);
1167        mvq->initialized = false;
1168}
1169
1170static int create_rqt(struct mlx5_vdpa_net *ndev)
1171{
1172        int log_max_rqt;
1173        __be32 *list;
1174        void *rqtc;
1175        int inlen;
1176        void *in;
1177        int i, j;
1178        int err;
1179
1180        log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
1181        if (log_max_rqt < 1)
1182                return -EOPNOTSUPP;
1183
1184        inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num);
1185        in = kzalloc(inlen, GFP_KERNEL);
1186        if (!in)
1187                return -ENOMEM;
1188
1189        MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1190        rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1191
1192        MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1193        MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt);
1194        MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
1195        list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1196        for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) {
1197                if (!ndev->vqs[j].initialized)
1198                        continue;
1199
1200                if (!vq_is_tx(ndev->vqs[j].index)) {
1201                        list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
1202                        i++;
1203                }
1204        }
1205
1206        err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1207        kfree(in);
1208        if (err)
1209                return err;
1210
1211        return 0;
1212}
1213
1214static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1215{
1216        mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1217}
1218
1219static int create_tir(struct mlx5_vdpa_net *ndev)
1220{
1221#define HASH_IP_L4PORTS                                                                            \
1222        (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1223         MLX5_HASH_FIELD_SEL_L4_DPORT)
1224        static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1225                                                   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1226                                                   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1227                                                   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1228                                                   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1229        void *rss_key;
1230        void *outer;
1231        void *tirc;
1232        void *in;
1233        int err;
1234
1235        in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1236        if (!in)
1237                return -ENOMEM;
1238
1239        MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1240        tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1241        MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1242
1243        MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1244        MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1245        rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1246        memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1247
1248        outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1249        MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1250        MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1251        MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1252
1253        MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1254        MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1255
1256        err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1257        kfree(in);
1258        return err;
1259}
1260
1261static void destroy_tir(struct mlx5_vdpa_net *ndev)
1262{
1263        mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1264}
1265
1266static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1267{
1268        struct mlx5_flow_destination dest[2] = {};
1269        struct mlx5_flow_table_attr ft_attr = {};
1270        struct mlx5_flow_act flow_act = {};
1271        struct mlx5_flow_namespace *ns;
1272        int err;
1273
1274        /* for now, one entry, match all, forward to tir */
1275        ft_attr.max_fte = 1;
1276        ft_attr.autogroup.max_num_groups = 1;
1277
1278        ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1279        if (!ns) {
1280                mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n");
1281                return -EOPNOTSUPP;
1282        }
1283
1284        ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1285        if (IS_ERR(ndev->rxft))
1286                return PTR_ERR(ndev->rxft);
1287
1288        ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1289        if (IS_ERR(ndev->rx_counter)) {
1290                err = PTR_ERR(ndev->rx_counter);
1291                goto err_fc;
1292        }
1293
1294        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT;
1295        dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1296        dest[0].tir_num = ndev->res.tirn;
1297        dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1298        dest[1].counter_id = mlx5_fc_id(ndev->rx_counter);
1299        ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2);
1300        if (IS_ERR(ndev->rx_rule)) {
1301                err = PTR_ERR(ndev->rx_rule);
1302                ndev->rx_rule = NULL;
1303                goto err_rule;
1304        }
1305
1306        return 0;
1307
1308err_rule:
1309        mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1310err_fc:
1311        mlx5_destroy_flow_table(ndev->rxft);
1312        return err;
1313}
1314
1315static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev)
1316{
1317        if (!ndev->rx_rule)
1318                return;
1319
1320        mlx5_del_flow_rules(ndev->rx_rule);
1321        mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter);
1322        mlx5_destroy_flow_table(ndev->rxft);
1323
1324        ndev->rx_rule = NULL;
1325}
1326
1327static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1328{
1329        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1330        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1331        struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1332
1333        if (unlikely(!mvq->ready))
1334                return;
1335
1336        iowrite16(idx, ndev->mvdev.res.kick_addr);
1337}
1338
1339static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1340                                    u64 driver_area, u64 device_area)
1341{
1342        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1343        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1344        struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1345
1346        mvq->desc_addr = desc_area;
1347        mvq->device_addr = device_area;
1348        mvq->driver_addr = driver_area;
1349        return 0;
1350}
1351
1352static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
1353{
1354        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1355        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1356        struct mlx5_vdpa_virtqueue *mvq;
1357
1358        mvq = &ndev->vqs[idx];
1359        mvq->num_ent = num;
1360}
1361
1362static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
1363{
1364        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1365        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1366        struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx];
1367
1368        vq->event_cb = *cb;
1369}
1370
1371static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
1372{
1373        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1374        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1375        struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1376
1377        if (!ready)
1378                suspend_vq(ndev, mvq);
1379
1380        mvq->ready = ready;
1381}
1382
1383static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
1384{
1385        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1386        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1387        struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1388
1389        return mvq->ready;
1390}
1391
1392static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
1393                                  const struct vdpa_vq_state *state)
1394{
1395        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1396        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1397        struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1398
1399        if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
1400                mlx5_vdpa_warn(mvdev, "can't modify available index\n");
1401                return -EINVAL;
1402        }
1403
1404        mvq->avail_idx = state->avail_index;
1405        return 0;
1406}
1407
1408static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
1409{
1410        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1411        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1412        struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
1413        struct mlx5_virtq_attr attr;
1414        int err;
1415
1416        /* If the virtq object was destroyed, use the value saved at
1417         * the last minute of suspend_vq. This caters for userspace
1418         * that cares about emulating the index after vq is stopped.
1419         */
1420        if (!mvq->initialized) {
1421                state->avail_index = mvq->avail_idx;
1422                return 0;
1423        }
1424
1425        err = query_virtqueue(ndev, mvq, &attr);
1426        if (err) {
1427                mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
1428                return err;
1429        }
1430        state->avail_index = attr.available_index;
1431        return 0;
1432}
1433
1434static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
1435{
1436        return PAGE_SIZE;
1437}
1438
1439enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
1440        MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
1441        MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
1442        MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
1443};
1444
1445static u64 mlx_to_vritio_features(u16 dev_features)
1446{
1447        u64 result = 0;
1448
1449        if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
1450                result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
1451        if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
1452                result |= BIT_ULL(VIRTIO_NET_F_CSUM);
1453        if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
1454                result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
1455        if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
1456                result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
1457
1458        return result;
1459}
1460
1461static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev)
1462{
1463        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1464        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1465        u16 dev_features;
1466
1467        dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
1468        ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
1469        if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
1470                ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
1471        ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
1472        print_features(mvdev, ndev->mvdev.mlx_features, false);
1473        return ndev->mvdev.mlx_features;
1474}
1475
1476static int verify_min_features(struct mlx5_vdpa_dev *mvdev, u64 features)
1477{
1478        if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1479                return -EOPNOTSUPP;
1480
1481        return 0;
1482}
1483
1484static int setup_virtqueues(struct mlx5_vdpa_net *ndev)
1485{
1486        int err;
1487        int i;
1488
1489        for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); i++) {
1490                err = setup_vq(ndev, &ndev->vqs[i]);
1491                if (err)
1492                        goto err_vq;
1493        }
1494
1495        return 0;
1496
1497err_vq:
1498        for (--i; i >= 0; i--)
1499                teardown_vq(ndev, &ndev->vqs[i]);
1500
1501        return err;
1502}
1503
1504static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
1505{
1506        struct mlx5_vdpa_virtqueue *mvq;
1507        int i;
1508
1509        for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
1510                mvq = &ndev->vqs[i];
1511                if (!mvq->initialized)
1512                        continue;
1513
1514                teardown_vq(ndev, mvq);
1515        }
1516}
1517
1518/* TODO: cross-endian support */
1519static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
1520{
1521        return virtio_legacy_is_little_endian() ||
1522                (mvdev->actual_features & (1ULL << VIRTIO_F_VERSION_1));
1523}
1524
1525static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
1526{
1527        return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
1528}
1529
1530static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
1531{
1532        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1533        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1534        int err;
1535
1536        print_features(mvdev, features, true);
1537
1538        err = verify_min_features(mvdev, features);
1539        if (err)
1540                return err;
1541
1542        ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
1543        ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu);
1544        ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
1545        return err;
1546}
1547
1548static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
1549{
1550        /* not implemented */
1551        mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n");
1552}
1553
1554#define MLX5_VDPA_MAX_VQ_ENTRIES 256
1555static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
1556{
1557        return MLX5_VDPA_MAX_VQ_ENTRIES;
1558}
1559
1560static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
1561{
1562        return VIRTIO_ID_NET;
1563}
1564
1565static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
1566{
1567        return PCI_VENDOR_ID_MELLANOX;
1568}
1569
1570static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
1571{
1572        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1573        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1574
1575        print_status(mvdev, ndev->mvdev.status, false);
1576        return ndev->mvdev.status;
1577}
1578
1579static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1580{
1581        struct mlx5_vq_restore_info *ri = &mvq->ri;
1582        struct mlx5_virtq_attr attr;
1583        int err;
1584
1585        if (!mvq->initialized)
1586                return 0;
1587
1588        err = query_virtqueue(ndev, mvq, &attr);
1589        if (err)
1590                return err;
1591
1592        ri->avail_index = attr.available_index;
1593        ri->ready = mvq->ready;
1594        ri->num_ent = mvq->num_ent;
1595        ri->desc_addr = mvq->desc_addr;
1596        ri->device_addr = mvq->device_addr;
1597        ri->driver_addr = mvq->driver_addr;
1598        ri->cb = mvq->event_cb;
1599        ri->restore = true;
1600        return 0;
1601}
1602
1603static int save_channels_info(struct mlx5_vdpa_net *ndev)
1604{
1605        int i;
1606
1607        for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1608                memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
1609                save_channel_info(ndev, &ndev->vqs[i]);
1610        }
1611        return 0;
1612}
1613
1614static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
1615{
1616        int i;
1617
1618        for (i = 0; i < ndev->mvdev.max_vqs; i++)
1619                memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1620}
1621
1622static void restore_channels_info(struct mlx5_vdpa_net *ndev)
1623{
1624        struct mlx5_vdpa_virtqueue *mvq;
1625        struct mlx5_vq_restore_info *ri;
1626        int i;
1627
1628        mlx5_clear_vqs(ndev);
1629        init_mvqs(ndev);
1630        for (i = 0; i < ndev->mvdev.max_vqs; i++) {
1631                mvq = &ndev->vqs[i];
1632                ri = &mvq->ri;
1633                if (!ri->restore)
1634                        continue;
1635
1636                mvq->avail_idx = ri->avail_index;
1637                mvq->ready = ri->ready;
1638                mvq->num_ent = ri->num_ent;
1639                mvq->desc_addr = ri->desc_addr;
1640                mvq->device_addr = ri->device_addr;
1641                mvq->driver_addr = ri->driver_addr;
1642                mvq->event_cb = ri->cb;
1643        }
1644}
1645
1646static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *iotlb)
1647{
1648        int err;
1649
1650        suspend_vqs(ndev);
1651        err = save_channels_info(ndev);
1652        if (err)
1653                goto err_mr;
1654
1655        teardown_driver(ndev);
1656        mlx5_vdpa_destroy_mr(&ndev->mvdev);
1657        err = mlx5_vdpa_create_mr(&ndev->mvdev, iotlb);
1658        if (err)
1659                goto err_mr;
1660
1661        if (!(ndev->mvdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
1662                return 0;
1663
1664        restore_channels_info(ndev);
1665        err = setup_driver(ndev);
1666        if (err)
1667                goto err_setup;
1668
1669        return 0;
1670
1671err_setup:
1672        mlx5_vdpa_destroy_mr(&ndev->mvdev);
1673err_mr:
1674        return err;
1675}
1676
1677static int setup_driver(struct mlx5_vdpa_net *ndev)
1678{
1679        int err;
1680
1681        mutex_lock(&ndev->reslock);
1682        if (ndev->setup) {
1683                mlx5_vdpa_warn(&ndev->mvdev, "setup driver called for already setup driver\n");
1684                err = 0;
1685                goto out;
1686        }
1687        err = setup_virtqueues(ndev);
1688        if (err) {
1689                mlx5_vdpa_warn(&ndev->mvdev, "setup_virtqueues\n");
1690                goto out;
1691        }
1692
1693        err = create_rqt(ndev);
1694        if (err) {
1695                mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n");
1696                goto err_rqt;
1697        }
1698
1699        err = create_tir(ndev);
1700        if (err) {
1701                mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n");
1702                goto err_tir;
1703        }
1704
1705        err = add_fwd_to_tir(ndev);
1706        if (err) {
1707                mlx5_vdpa_warn(&ndev->mvdev, "add_fwd_to_tir\n");
1708                goto err_fwd;
1709        }
1710        ndev->setup = true;
1711        mutex_unlock(&ndev->reslock);
1712
1713        return 0;
1714
1715err_fwd:
1716        destroy_tir(ndev);
1717err_tir:
1718        destroy_rqt(ndev);
1719err_rqt:
1720        teardown_virtqueues(ndev);
1721out:
1722        mutex_unlock(&ndev->reslock);
1723        return err;
1724}
1725
1726static void teardown_driver(struct mlx5_vdpa_net *ndev)
1727{
1728        mutex_lock(&ndev->reslock);
1729        if (!ndev->setup)
1730                goto out;
1731
1732        remove_fwd_to_tir(ndev);
1733        destroy_tir(ndev);
1734        destroy_rqt(ndev);
1735        teardown_virtqueues(ndev);
1736        ndev->setup = false;
1737out:
1738        mutex_unlock(&ndev->reslock);
1739}
1740
1741static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
1742{
1743        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1744        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1745        int err;
1746
1747        print_status(mvdev, status, true);
1748        if (!status) {
1749                mlx5_vdpa_info(mvdev, "performing device reset\n");
1750                teardown_driver(ndev);
1751                mlx5_vdpa_destroy_mr(&ndev->mvdev);
1752                ndev->mvdev.status = 0;
1753                ndev->mvdev.mlx_features = 0;
1754                ++mvdev->generation;
1755                return;
1756        }
1757
1758        if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
1759                if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
1760                        err = setup_driver(ndev);
1761                        if (err) {
1762                                mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
1763                                goto err_setup;
1764                        }
1765                } else {
1766                        mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
1767                        return;
1768                }
1769        }
1770
1771        ndev->mvdev.status = status;
1772        return;
1773
1774err_setup:
1775        mlx5_vdpa_destroy_mr(&ndev->mvdev);
1776        ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
1777}
1778
1779static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
1780                                 unsigned int len)
1781{
1782        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1783        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1784
1785        if (offset + len < sizeof(struct virtio_net_config))
1786                memcpy(buf, (u8 *)&ndev->config + offset, len);
1787}
1788
1789static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
1790                                 unsigned int len)
1791{
1792        /* not supported */
1793}
1794
1795static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
1796{
1797        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1798
1799        return mvdev->generation;
1800}
1801
1802static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
1803{
1804        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1805        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1806        bool change_map;
1807        int err;
1808
1809        err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map);
1810        if (err) {
1811                mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
1812                return err;
1813        }
1814
1815        if (change_map)
1816                return mlx5_vdpa_change_map(ndev, iotlb);
1817
1818        return 0;
1819}
1820
1821static void mlx5_vdpa_free(struct vdpa_device *vdev)
1822{
1823        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1824        struct mlx5_vdpa_net *ndev;
1825
1826        ndev = to_mlx5_vdpa_ndev(mvdev);
1827
1828        free_resources(ndev);
1829        mlx5_vdpa_free_resources(&ndev->mvdev);
1830        mutex_destroy(&ndev->reslock);
1831}
1832
1833static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
1834{
1835        struct vdpa_notification_area ret = {};
1836
1837        return ret;
1838}
1839
1840static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
1841{
1842        return -EOPNOTSUPP;
1843}
1844
1845static const struct vdpa_config_ops mlx5_vdpa_ops = {
1846        .set_vq_address = mlx5_vdpa_set_vq_address,
1847        .set_vq_num = mlx5_vdpa_set_vq_num,
1848        .kick_vq = mlx5_vdpa_kick_vq,
1849        .set_vq_cb = mlx5_vdpa_set_vq_cb,
1850        .set_vq_ready = mlx5_vdpa_set_vq_ready,
1851        .get_vq_ready = mlx5_vdpa_get_vq_ready,
1852        .set_vq_state = mlx5_vdpa_set_vq_state,
1853        .get_vq_state = mlx5_vdpa_get_vq_state,
1854        .get_vq_notification = mlx5_get_vq_notification,
1855        .get_vq_irq = mlx5_get_vq_irq,
1856        .get_vq_align = mlx5_vdpa_get_vq_align,
1857        .get_features = mlx5_vdpa_get_features,
1858        .set_features = mlx5_vdpa_set_features,
1859        .set_config_cb = mlx5_vdpa_set_config_cb,
1860        .get_vq_num_max = mlx5_vdpa_get_vq_num_max,
1861        .get_device_id = mlx5_vdpa_get_device_id,
1862        .get_vendor_id = mlx5_vdpa_get_vendor_id,
1863        .get_status = mlx5_vdpa_get_status,
1864        .set_status = mlx5_vdpa_set_status,
1865        .get_config = mlx5_vdpa_get_config,
1866        .set_config = mlx5_vdpa_set_config,
1867        .get_generation = mlx5_vdpa_get_generation,
1868        .set_map = mlx5_vdpa_set_map,
1869        .free = mlx5_vdpa_free,
1870};
1871
1872static int alloc_resources(struct mlx5_vdpa_net *ndev)
1873{
1874        struct mlx5_vdpa_net_resources *res = &ndev->res;
1875        int err;
1876
1877        if (res->valid) {
1878                mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
1879                return -EEXIST;
1880        }
1881
1882        err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
1883        if (err)
1884                return err;
1885
1886        err = create_tis(ndev);
1887        if (err)
1888                goto err_tis;
1889
1890        res->valid = true;
1891
1892        return 0;
1893
1894err_tis:
1895        mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1896        return err;
1897}
1898
1899static void free_resources(struct mlx5_vdpa_net *ndev)
1900{
1901        struct mlx5_vdpa_net_resources *res = &ndev->res;
1902
1903        if (!res->valid)
1904                return;
1905
1906        destroy_tis(ndev);
1907        mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
1908        res->valid = false;
1909}
1910
1911static void init_mvqs(struct mlx5_vdpa_net *ndev)
1912{
1913        struct mlx5_vdpa_virtqueue *mvq;
1914        int i;
1915
1916        for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) {
1917                mvq = &ndev->vqs[i];
1918                memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1919                mvq->index = i;
1920                mvq->ndev = ndev;
1921                mvq->fwqp.fw = true;
1922        }
1923        for (; i < ndev->mvdev.max_vqs; i++) {
1924                mvq = &ndev->vqs[i];
1925                memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
1926                mvq->index = i;
1927                mvq->ndev = ndev;
1928        }
1929}
1930
1931void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev)
1932{
1933        struct virtio_net_config *config;
1934        struct mlx5_vdpa_dev *mvdev;
1935        struct mlx5_vdpa_net *ndev;
1936        u32 max_vqs;
1937        int err;
1938
1939        /* we save one virtqueue for control virtqueue should we require it */
1940        max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
1941        max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
1942
1943        ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
1944                                 2 * mlx5_vdpa_max_qps(max_vqs));
1945        if (IS_ERR(ndev))
1946                return ndev;
1947
1948        ndev->mvdev.max_vqs = max_vqs;
1949        mvdev = &ndev->mvdev;
1950        mvdev->mdev = mdev;
1951        init_mvqs(ndev);
1952        mutex_init(&ndev->reslock);
1953        config = &ndev->config;
1954        err = mlx5_query_nic_vport_mtu(mdev, &ndev->mtu);
1955        if (err)
1956                goto err_mtu;
1957
1958        err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
1959        if (err)
1960                goto err_mtu;
1961
1962        mvdev->vdev.dma_dev = mdev->device;
1963        err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
1964        if (err)
1965                goto err_mtu;
1966
1967        err = alloc_resources(ndev);
1968        if (err)
1969                goto err_res;
1970
1971        err = vdpa_register_device(&mvdev->vdev);
1972        if (err)
1973                goto err_reg;
1974
1975        return ndev;
1976
1977err_reg:
1978        free_resources(ndev);
1979err_res:
1980        mlx5_vdpa_free_resources(&ndev->mvdev);
1981err_mtu:
1982        mutex_destroy(&ndev->reslock);
1983        put_device(&mvdev->vdev.dev);
1984        return ERR_PTR(err);
1985}
1986
1987void mlx5_vdpa_remove_dev(struct mlx5_vdpa_dev *mvdev)
1988{
1989        vdpa_unregister_device(&mvdev->vdev);
1990}
1991