qemu/hw/rdma/vmw/pvrdma_main.c
<<
>>
Prefs
   1/*
   2 * QEMU paravirtual RDMA
   3 *
   4 * Copyright (C) 2018 Oracle
   5 * Copyright (C) 2018 Red Hat Inc
   6 *
   7 * Authors:
   8 *     Yuval Shaia <yuval.shaia@oracle.com>
   9 *     Marcel Apfelbaum <marcel@redhat.com>
  10 *
  11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  12 * See the COPYING file in the top-level directory.
  13 *
  14 */
  15
  16#include "qemu/osdep.h"
  17#include "qapi/error.h"
  18#include "qemu/module.h"
  19#include "hw/pci/pci.h"
  20#include "hw/pci/pci_ids.h"
  21#include "hw/pci/msi.h"
  22#include "hw/pci/msix.h"
  23#include "hw/qdev-properties.h"
  24#include "cpu.h"
  25#include "trace.h"
  26#include "monitor/monitor.h"
  27#include "hw/rdma/rdma.h"
  28
  29#include "../rdma_rm.h"
  30#include "../rdma_backend.h"
  31#include "../rdma_utils.h"
  32
  33#include <infiniband/verbs.h>
  34#include "pvrdma.h"
  35#include "standard-headers/rdma/vmw_pvrdma-abi.h"
  36#include "sysemu/runstate.h"
  37#include "standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h"
  38#include "pvrdma_qp_ops.h"
  39
  40static Property pvrdma_dev_properties[] = {
  41    DEFINE_PROP_STRING("netdev", PVRDMADev, backend_eth_device_name),
  42    DEFINE_PROP_STRING("ibdev", PVRDMADev, backend_device_name),
  43    DEFINE_PROP_UINT8("ibport", PVRDMADev, backend_port_num, 1),
  44    DEFINE_PROP_UINT64("dev-caps-max-mr-size", PVRDMADev, dev_attr.max_mr_size,
  45                       MAX_MR_SIZE),
  46    DEFINE_PROP_INT32("dev-caps-max-qp", PVRDMADev, dev_attr.max_qp, MAX_QP),
  47    DEFINE_PROP_INT32("dev-caps-max-cq", PVRDMADev, dev_attr.max_cq, MAX_CQ),
  48    DEFINE_PROP_INT32("dev-caps-max-mr", PVRDMADev, dev_attr.max_mr, MAX_MR),
  49    DEFINE_PROP_INT32("dev-caps-max-pd", PVRDMADev, dev_attr.max_pd, MAX_PD),
  50    DEFINE_PROP_INT32("dev-caps-qp-rd-atom", PVRDMADev, dev_attr.max_qp_rd_atom,
  51                      MAX_QP_RD_ATOM),
  52    DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev,
  53                      dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM),
  54    DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH),
  55    DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ),
  56    DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr),
  57    DEFINE_PROP_END_OF_LIST(),
  58};
  59
  60static void pvrdma_print_statistics(Monitor *mon, RdmaProvider *obj)
  61{
  62    PVRDMADev *dev = PVRDMA_DEV(obj);
  63    PCIDevice *pdev = PCI_DEVICE(dev);
  64
  65    monitor_printf(mon, "%s, %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn),
  66                   PCI_FUNC(pdev->devfn));
  67    monitor_printf(mon, "\tcommands         : %" PRId64 "\n",
  68                   dev->stats.commands);
  69    monitor_printf(mon, "\tregs_reads       : %" PRId64 "\n",
  70                   dev->stats.regs_reads);
  71    monitor_printf(mon, "\tregs_writes      : %" PRId64 "\n",
  72                   dev->stats.regs_writes);
  73    monitor_printf(mon, "\tuar_writes       : %" PRId64 "\n",
  74                   dev->stats.uar_writes);
  75    monitor_printf(mon, "\tinterrupts       : %" PRId64 "\n",
  76                   dev->stats.interrupts);
  77    rdma_dump_device_counters(mon, &dev->rdma_dev_res);
  78}
  79
  80static void free_dev_ring(PCIDevice *pci_dev, PvrdmaRing *ring,
  81                          void *ring_state)
  82{
  83    pvrdma_ring_free(ring);
  84    rdma_pci_dma_unmap(pci_dev, ring_state, TARGET_PAGE_SIZE);
  85}
  86
  87static int init_dev_ring(PvrdmaRing *ring, struct pvrdma_ring **ring_state,
  88                         const char *name, PCIDevice *pci_dev,
  89                         dma_addr_t dir_addr, uint32_t num_pages)
  90{
  91    uint64_t *dir, *tbl;
  92    int rc = 0;
  93
  94    dir = rdma_pci_dma_map(pci_dev, dir_addr, TARGET_PAGE_SIZE);
  95    if (!dir) {
  96        rdma_error_report("Failed to map to page directory (ring %s)", name);
  97        rc = -ENOMEM;
  98        goto out;
  99    }
 100    tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE);
 101    if (!tbl) {
 102        rdma_error_report("Failed to map to page table (ring %s)", name);
 103        rc = -ENOMEM;
 104        goto out_free_dir;
 105    }
 106
 107    *ring_state = rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE);
 108    if (!*ring_state) {
 109        rdma_error_report("Failed to map to ring state (ring %s)", name);
 110        rc = -ENOMEM;
 111        goto out_free_tbl;
 112    }
 113    /* RX ring is the second */
 114    (*ring_state)++;
 115    rc = pvrdma_ring_init(ring, name, pci_dev,
 116                          (struct pvrdma_ring *)*ring_state,
 117                          (num_pages - 1) * TARGET_PAGE_SIZE /
 118                          sizeof(struct pvrdma_cqne),
 119                          sizeof(struct pvrdma_cqne),
 120                          (dma_addr_t *)&tbl[1], (dma_addr_t)num_pages - 1);
 121    if (rc) {
 122        rc = -ENOMEM;
 123        goto out_free_ring_state;
 124    }
 125
 126    goto out_free_tbl;
 127
 128out_free_ring_state:
 129    rdma_pci_dma_unmap(pci_dev, *ring_state, TARGET_PAGE_SIZE);
 130
 131out_free_tbl:
 132    rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE);
 133
 134out_free_dir:
 135    rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE);
 136
 137out:
 138    return rc;
 139}
 140
 141static void free_dsr(PVRDMADev *dev)
 142{
 143    PCIDevice *pci_dev = PCI_DEVICE(dev);
 144
 145    if (!dev->dsr_info.dsr) {
 146        return;
 147    }
 148
 149    free_dev_ring(pci_dev, &dev->dsr_info.async,
 150                  dev->dsr_info.async_ring_state);
 151
 152    free_dev_ring(pci_dev, &dev->dsr_info.cq, dev->dsr_info.cq_ring_state);
 153
 154    rdma_pci_dma_unmap(pci_dev, dev->dsr_info.req,
 155                         sizeof(union pvrdma_cmd_req));
 156
 157    rdma_pci_dma_unmap(pci_dev, dev->dsr_info.rsp,
 158                         sizeof(union pvrdma_cmd_resp));
 159
 160    rdma_pci_dma_unmap(pci_dev, dev->dsr_info.dsr,
 161                         sizeof(struct pvrdma_device_shared_region));
 162
 163    dev->dsr_info.dsr = NULL;
 164}
 165
 166static int load_dsr(PVRDMADev *dev)
 167{
 168    int rc = 0;
 169    PCIDevice *pci_dev = PCI_DEVICE(dev);
 170    DSRInfo *dsr_info;
 171    struct pvrdma_device_shared_region *dsr;
 172
 173    free_dsr(dev);
 174
 175    /* Map to DSR */
 176    dev->dsr_info.dsr = rdma_pci_dma_map(pci_dev, dev->dsr_info.dma,
 177                              sizeof(struct pvrdma_device_shared_region));
 178    if (!dev->dsr_info.dsr) {
 179        rdma_error_report("Failed to map to DSR");
 180        rc = -ENOMEM;
 181        goto out;
 182    }
 183
 184    /* Shortcuts */
 185    dsr_info = &dev->dsr_info;
 186    dsr = dsr_info->dsr;
 187
 188    /* Map to command slot */
 189    dsr_info->req = rdma_pci_dma_map(pci_dev, dsr->cmd_slot_dma,
 190                                     sizeof(union pvrdma_cmd_req));
 191    if (!dsr_info->req) {
 192        rdma_error_report("Failed to map to command slot address");
 193        rc = -ENOMEM;
 194        goto out_free_dsr;
 195    }
 196
 197    /* Map to response slot */
 198    dsr_info->rsp = rdma_pci_dma_map(pci_dev, dsr->resp_slot_dma,
 199                                     sizeof(union pvrdma_cmd_resp));
 200    if (!dsr_info->rsp) {
 201        rdma_error_report("Failed to map to response slot address");
 202        rc = -ENOMEM;
 203        goto out_free_req;
 204    }
 205
 206    /* Map to CQ notification ring */
 207    rc = init_dev_ring(&dsr_info->cq, &dsr_info->cq_ring_state, "dev_cq",
 208                       pci_dev, dsr->cq_ring_pages.pdir_dma,
 209                       dsr->cq_ring_pages.num_pages);
 210    if (rc) {
 211        rc = -ENOMEM;
 212        goto out_free_rsp;
 213    }
 214
 215    /* Map to event notification ring */
 216    rc = init_dev_ring(&dsr_info->async, &dsr_info->async_ring_state,
 217                       "dev_async", pci_dev, dsr->async_ring_pages.pdir_dma,
 218                       dsr->async_ring_pages.num_pages);
 219    if (rc) {
 220        rc = -ENOMEM;
 221        goto out_free_rsp;
 222    }
 223
 224    goto out;
 225
 226out_free_rsp:
 227    rdma_pci_dma_unmap(pci_dev, dsr_info->rsp, sizeof(union pvrdma_cmd_resp));
 228
 229out_free_req:
 230    rdma_pci_dma_unmap(pci_dev, dsr_info->req, sizeof(union pvrdma_cmd_req));
 231
 232out_free_dsr:
 233    rdma_pci_dma_unmap(pci_dev, dsr_info->dsr,
 234                       sizeof(struct pvrdma_device_shared_region));
 235    dsr_info->dsr = NULL;
 236
 237out:
 238    return rc;
 239}
 240
 241static void init_dsr_dev_caps(PVRDMADev *dev)
 242{
 243    struct pvrdma_device_shared_region *dsr;
 244
 245    if (dev->dsr_info.dsr == NULL) {
 246        rdma_error_report("Can't initialized DSR");
 247        return;
 248    }
 249
 250    dsr = dev->dsr_info.dsr;
 251    dsr->caps.fw_ver = PVRDMA_FW_VERSION;
 252    dsr->caps.mode = PVRDMA_DEVICE_MODE_ROCE;
 253    dsr->caps.gid_types |= PVRDMA_GID_TYPE_FLAG_ROCE_V1;
 254    dsr->caps.max_uar = RDMA_BAR2_UAR_SIZE;
 255    dsr->caps.max_mr_size = dev->dev_attr.max_mr_size;
 256    dsr->caps.max_qp = dev->dev_attr.max_qp;
 257    dsr->caps.max_qp_wr = dev->dev_attr.max_qp_wr;
 258    dsr->caps.max_sge = dev->dev_attr.max_sge;
 259    dsr->caps.max_cq = dev->dev_attr.max_cq;
 260    dsr->caps.max_cqe = dev->dev_attr.max_cqe;
 261    dsr->caps.max_mr = dev->dev_attr.max_mr;
 262    dsr->caps.max_pd = dev->dev_attr.max_pd;
 263    dsr->caps.max_ah = dev->dev_attr.max_ah;
 264    dsr->caps.max_srq = dev->dev_attr.max_srq;
 265    dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr;
 266    dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge;
 267    dsr->caps.gid_tbl_len = MAX_GIDS;
 268    dsr->caps.sys_image_guid = 0;
 269    dsr->caps.node_guid = dev->node_guid;
 270    dsr->caps.phys_port_cnt = MAX_PORTS;
 271    dsr->caps.max_pkeys = MAX_PKEYS;
 272}
 273
 274static void uninit_msix(PCIDevice *pdev, int used_vectors)
 275{
 276    PVRDMADev *dev = PVRDMA_DEV(pdev);
 277    int i;
 278
 279    for (i = 0; i < used_vectors; i++) {
 280        msix_vector_unuse(pdev, i);
 281    }
 282
 283    msix_uninit(pdev, &dev->msix, &dev->msix);
 284}
 285
 286static int init_msix(PCIDevice *pdev)
 287{
 288    PVRDMADev *dev = PVRDMA_DEV(pdev);
 289    int i;
 290    int rc;
 291
 292    rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX,
 293                   RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX,
 294                   RDMA_MSIX_PBA, 0, NULL);
 295
 296    if (rc < 0) {
 297        rdma_error_report("Failed to initialize MSI-X");
 298        return rc;
 299    }
 300
 301    for (i = 0; i < RDMA_MAX_INTRS; i++) {
 302        rc = msix_vector_use(PCI_DEVICE(dev), i);
 303        if (rc < 0) {
 304            rdma_error_report("Fail mark MSI-X vector %d", i);
 305            uninit_msix(pdev, i);
 306            return rc;
 307        }
 308    }
 309
 310    return 0;
 311}
 312
 313static void pvrdma_fini(PCIDevice *pdev)
 314{
 315    PVRDMADev *dev = PVRDMA_DEV(pdev);
 316
 317    notifier_remove(&dev->shutdown_notifier);
 318
 319    pvrdma_qp_ops_fini();
 320
 321    rdma_backend_stop(&dev->backend_dev);
 322
 323    rdma_rm_fini(&dev->rdma_dev_res, &dev->backend_dev,
 324                 dev->backend_eth_device_name);
 325
 326    rdma_backend_fini(&dev->backend_dev);
 327
 328    free_dsr(dev);
 329
 330    if (msix_enabled(pdev)) {
 331        uninit_msix(pdev, RDMA_MAX_INTRS);
 332    }
 333
 334    rdma_info_report("Device %s %x.%x is down", pdev->name,
 335                     PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 336}
 337
 338static void pvrdma_stop(PVRDMADev *dev)
 339{
 340    rdma_backend_stop(&dev->backend_dev);
 341}
 342
 343static void pvrdma_start(PVRDMADev *dev)
 344{
 345    rdma_backend_start(&dev->backend_dev);
 346}
 347
 348static void activate_device(PVRDMADev *dev)
 349{
 350    pvrdma_start(dev);
 351    set_reg_val(dev, PVRDMA_REG_ERR, 0);
 352}
 353
 354static int unquiesce_device(PVRDMADev *dev)
 355{
 356    return 0;
 357}
 358
 359static void reset_device(PVRDMADev *dev)
 360{
 361    pvrdma_stop(dev);
 362}
 363
 364static uint64_t pvrdma_regs_read(void *opaque, hwaddr addr, unsigned size)
 365{
 366    PVRDMADev *dev = opaque;
 367    uint32_t val;
 368
 369    dev->stats.regs_reads++;
 370
 371    if (get_reg_val(dev, addr, &val)) {
 372        rdma_error_report("Failed to read REG value from address 0x%x",
 373                          (uint32_t)addr);
 374        return -EINVAL;
 375    }
 376
 377    trace_pvrdma_regs_read(addr, val);
 378
 379    return val;
 380}
 381
 382static void pvrdma_regs_write(void *opaque, hwaddr addr, uint64_t val,
 383                              unsigned size)
 384{
 385    PVRDMADev *dev = opaque;
 386
 387    dev->stats.regs_writes++;
 388
 389    if (set_reg_val(dev, addr, val)) {
 390        rdma_error_report("Failed to set REG value, addr=0x%"PRIx64 ", val=0x%"PRIx64,
 391                          addr, val);
 392        return;
 393    }
 394
 395    switch (addr) {
 396    case PVRDMA_REG_DSRLOW:
 397        trace_pvrdma_regs_write(addr, val, "DSRLOW", "");
 398        dev->dsr_info.dma = val;
 399        break;
 400    case PVRDMA_REG_DSRHIGH:
 401        trace_pvrdma_regs_write(addr, val, "DSRHIGH", "");
 402        dev->dsr_info.dma |= val << 32;
 403        load_dsr(dev);
 404        init_dsr_dev_caps(dev);
 405        break;
 406    case PVRDMA_REG_CTL:
 407        switch (val) {
 408        case PVRDMA_DEVICE_CTL_ACTIVATE:
 409            trace_pvrdma_regs_write(addr, val, "CTL", "ACTIVATE");
 410            activate_device(dev);
 411            break;
 412        case PVRDMA_DEVICE_CTL_UNQUIESCE:
 413            trace_pvrdma_regs_write(addr, val, "CTL", "UNQUIESCE");
 414            unquiesce_device(dev);
 415            break;
 416        case PVRDMA_DEVICE_CTL_RESET:
 417            trace_pvrdma_regs_write(addr, val, "CTL", "URESET");
 418            reset_device(dev);
 419            break;
 420        }
 421        break;
 422    case PVRDMA_REG_IMR:
 423        trace_pvrdma_regs_write(addr, val, "INTR_MASK", "");
 424        dev->interrupt_mask = val;
 425        break;
 426    case PVRDMA_REG_REQUEST:
 427        if (val == 0) {
 428            trace_pvrdma_regs_write(addr, val, "REQUEST", "");
 429            pvrdma_exec_cmd(dev);
 430        }
 431        break;
 432    default:
 433        break;
 434    }
 435}
 436
 437static const MemoryRegionOps regs_ops = {
 438    .read = pvrdma_regs_read,
 439    .write = pvrdma_regs_write,
 440    .endianness = DEVICE_LITTLE_ENDIAN,
 441    .impl = {
 442        .min_access_size = sizeof(uint32_t),
 443        .max_access_size = sizeof(uint32_t),
 444    },
 445};
 446
 447static uint64_t pvrdma_uar_read(void *opaque, hwaddr addr, unsigned size)
 448{
 449    return 0xffffffff;
 450}
 451
 452static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val,
 453                             unsigned size)
 454{
 455    PVRDMADev *dev = opaque;
 456
 457    dev->stats.uar_writes++;
 458
 459    switch (addr & 0xFFF) { /* Mask with 0xFFF as each UC gets page */
 460    case PVRDMA_UAR_QP_OFFSET:
 461        if (val & PVRDMA_UAR_QP_SEND) {
 462            trace_pvrdma_uar_write(addr, val, "QP", "SEND",
 463                                   val & PVRDMA_UAR_HANDLE_MASK, 0);
 464            pvrdma_qp_send(dev, val & PVRDMA_UAR_HANDLE_MASK);
 465        }
 466        if (val & PVRDMA_UAR_QP_RECV) {
 467            trace_pvrdma_uar_write(addr, val, "QP", "RECV",
 468                                   val & PVRDMA_UAR_HANDLE_MASK, 0);
 469            pvrdma_qp_recv(dev, val & PVRDMA_UAR_HANDLE_MASK);
 470        }
 471        break;
 472    case PVRDMA_UAR_CQ_OFFSET:
 473        if (val & PVRDMA_UAR_CQ_ARM) {
 474            trace_pvrdma_uar_write(addr, val, "CQ", "ARM",
 475                                   val & PVRDMA_UAR_HANDLE_MASK,
 476                                   !!(val & PVRDMA_UAR_CQ_ARM_SOL));
 477            rdma_rm_req_notify_cq(&dev->rdma_dev_res,
 478                                  val & PVRDMA_UAR_HANDLE_MASK,
 479                                  !!(val & PVRDMA_UAR_CQ_ARM_SOL));
 480        }
 481        if (val & PVRDMA_UAR_CQ_ARM_SOL) {
 482            trace_pvrdma_uar_write(addr, val, "CQ", "ARMSOL - not supported", 0,
 483                                   0);
 484        }
 485        if (val & PVRDMA_UAR_CQ_POLL) {
 486            trace_pvrdma_uar_write(addr, val, "CQ", "POLL",
 487                                   val & PVRDMA_UAR_HANDLE_MASK, 0);
 488            pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK);
 489        }
 490        break;
 491    case PVRDMA_UAR_SRQ_OFFSET:
 492        if (val & PVRDMA_UAR_SRQ_RECV) {
 493            trace_pvrdma_uar_write(addr, val, "QP", "SRQ",
 494                                   val & PVRDMA_UAR_HANDLE_MASK, 0);
 495            pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK);
 496        }
 497        break;
 498    default:
 499        rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64,
 500                          addr, val);
 501        break;
 502    }
 503}
 504
 505static const MemoryRegionOps uar_ops = {
 506    .read = pvrdma_uar_read,
 507    .write = pvrdma_uar_write,
 508    .endianness = DEVICE_LITTLE_ENDIAN,
 509    .impl = {
 510        .min_access_size = sizeof(uint32_t),
 511        .max_access_size = sizeof(uint32_t),
 512    },
 513};
 514
 515static void init_pci_config(PCIDevice *pdev)
 516{
 517    pdev->config[PCI_INTERRUPT_PIN] = 1;
 518}
 519
 520static void init_bars(PCIDevice *pdev)
 521{
 522    PVRDMADev *dev = PVRDMA_DEV(pdev);
 523
 524    /* BAR 0 - MSI-X */
 525    memory_region_init(&dev->msix, OBJECT(dev), "pvrdma-msix",
 526                       RDMA_BAR0_MSIX_SIZE);
 527    pci_register_bar(pdev, RDMA_MSIX_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY,
 528                     &dev->msix);
 529
 530    /* BAR 1 - Registers */
 531    memset(&dev->regs_data, 0, sizeof(dev->regs_data));
 532    memory_region_init_io(&dev->regs, OBJECT(dev), &regs_ops, dev,
 533                          "pvrdma-regs", sizeof(dev->regs_data));
 534    pci_register_bar(pdev, RDMA_REG_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY,
 535                     &dev->regs);
 536
 537    /* BAR 2 - UAR */
 538    memset(&dev->uar_data, 0, sizeof(dev->uar_data));
 539    memory_region_init_io(&dev->uar, OBJECT(dev), &uar_ops, dev, "rdma-uar",
 540                          sizeof(dev->uar_data));
 541    pci_register_bar(pdev, RDMA_UAR_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY,
 542                     &dev->uar);
 543}
 544
 545static void init_regs(PCIDevice *pdev)
 546{
 547    PVRDMADev *dev = PVRDMA_DEV(pdev);
 548
 549    set_reg_val(dev, PVRDMA_REG_VERSION, PVRDMA_HW_VERSION);
 550    set_reg_val(dev, PVRDMA_REG_ERR, 0xFFFF);
 551}
 552
 553static void init_dev_caps(PVRDMADev *dev)
 554{
 555    size_t pg_tbl_bytes = TARGET_PAGE_SIZE *
 556                          (TARGET_PAGE_SIZE / sizeof(uint64_t));
 557    size_t wr_sz = MAX(sizeof(struct pvrdma_sq_wqe_hdr),
 558                       sizeof(struct pvrdma_rq_wqe_hdr));
 559
 560    dev->dev_attr.max_qp_wr = pg_tbl_bytes /
 561                              (wr_sz + sizeof(struct pvrdma_sge) *
 562                              dev->dev_attr.max_sge) - TARGET_PAGE_SIZE;
 563                              /* First page is ring state  ^^^^ */
 564
 565    dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) -
 566                            TARGET_PAGE_SIZE; /* First page is ring state */
 567
 568    dev->dev_attr.max_srq_wr = pg_tbl_bytes /
 569                                ((sizeof(struct pvrdma_rq_wqe_hdr) +
 570                                sizeof(struct pvrdma_sge)) *
 571                                dev->dev_attr.max_sge) - TARGET_PAGE_SIZE;
 572}
 573
 574static int pvrdma_check_ram_shared(Object *obj, void *opaque)
 575{
 576    bool *shared = opaque;
 577
 578    if (object_dynamic_cast(obj, "memory-backend-ram")) {
 579        *shared = object_property_get_bool(obj, "share", NULL);
 580    }
 581
 582    return 0;
 583}
 584
 585static void pvrdma_shutdown_notifier(Notifier *n, void *opaque)
 586{
 587    PVRDMADev *dev = container_of(n, PVRDMADev, shutdown_notifier);
 588    PCIDevice *pci_dev = PCI_DEVICE(dev);
 589
 590    pvrdma_fini(pci_dev);
 591}
 592
 593static void pvrdma_realize(PCIDevice *pdev, Error **errp)
 594{
 595    int rc = 0;
 596    PVRDMADev *dev = PVRDMA_DEV(pdev);
 597    Object *memdev_root;
 598    bool ram_shared = false;
 599    PCIDevice *func0;
 600
 601    rdma_info_report("Initializing device %s %x.%x", pdev->name,
 602                     PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 603
 604    if (TARGET_PAGE_SIZE != qemu_real_host_page_size) {
 605        error_setg(errp, "Target page size must be the same as host page size");
 606        return;
 607    }
 608
 609    func0 = pci_get_function_0(pdev);
 610    /* Break if not vmxnet3 device in slot 0 */
 611    if (strcmp(object_get_typename(OBJECT(func0)), TYPE_VMXNET3)) {
 612        error_setg(errp, "Device on %x.0 must be %s", PCI_SLOT(pdev->devfn),
 613                   TYPE_VMXNET3);
 614        return;
 615    }
 616    dev->func0 = VMXNET3(func0);
 617
 618    addrconf_addr_eui48((unsigned char *)&dev->node_guid,
 619                        (const char *)&dev->func0->conf.macaddr.a);
 620
 621    memdev_root = object_resolve_path("/objects", NULL);
 622    if (memdev_root) {
 623        object_child_foreach(memdev_root, pvrdma_check_ram_shared, &ram_shared);
 624    }
 625    if (!ram_shared) {
 626        error_setg(errp, "Only shared memory backed ram is supported");
 627        return;
 628    }
 629
 630    dev->dsr_info.dsr = NULL;
 631
 632    init_pci_config(pdev);
 633
 634    init_bars(pdev);
 635
 636    init_regs(pdev);
 637
 638    rc = init_msix(pdev);
 639    if (rc) {
 640        goto out;
 641    }
 642
 643    rc = rdma_backend_init(&dev->backend_dev, pdev, &dev->rdma_dev_res,
 644                           dev->backend_device_name, dev->backend_port_num,
 645                           &dev->dev_attr, &dev->mad_chr);
 646    if (rc) {
 647        goto out;
 648    }
 649
 650    init_dev_caps(dev);
 651
 652    rc = rdma_rm_init(&dev->rdma_dev_res, &dev->dev_attr);
 653    if (rc) {
 654        goto out;
 655    }
 656
 657    rc = pvrdma_qp_ops_init();
 658    if (rc) {
 659        goto out;
 660    }
 661
 662    memset(&dev->stats, 0, sizeof(dev->stats));
 663
 664    dev->shutdown_notifier.notify = pvrdma_shutdown_notifier;
 665    qemu_register_shutdown_notifier(&dev->shutdown_notifier);
 666
 667#ifdef LEGACY_RDMA_REG_MR
 668    rdma_info_report("Using legacy reg_mr");
 669#else
 670    rdma_info_report("Using iova reg_mr");
 671#endif
 672
 673out:
 674    if (rc) {
 675        pvrdma_fini(pdev);
 676        error_append_hint(errp, "Device failed to load\n");
 677    }
 678}
 679
 680static void pvrdma_class_init(ObjectClass *klass, void *data)
 681{
 682    DeviceClass *dc = DEVICE_CLASS(klass);
 683    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 684    RdmaProviderClass *ir = RDMA_PROVIDER_CLASS(klass);
 685
 686    k->realize = pvrdma_realize;
 687    k->vendor_id = PCI_VENDOR_ID_VMWARE;
 688    k->device_id = PCI_DEVICE_ID_VMWARE_PVRDMA;
 689    k->revision = 0x00;
 690    k->class_id = PCI_CLASS_NETWORK_OTHER;
 691
 692    dc->desc = "RDMA Device";
 693    device_class_set_props(dc, pvrdma_dev_properties);
 694    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
 695
 696    ir->print_statistics = pvrdma_print_statistics;
 697}
 698
 699static const TypeInfo pvrdma_info = {
 700    .name = PVRDMA_HW_NAME,
 701    .parent = TYPE_PCI_DEVICE,
 702    .instance_size = sizeof(PVRDMADev),
 703    .class_init = pvrdma_class_init,
 704    .interfaces = (InterfaceInfo[]) {
 705        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
 706        { INTERFACE_RDMA_PROVIDER },
 707        { }
 708    }
 709};
 710
 711static void register_types(void)
 712{
 713    type_register_static(&pvrdma_info);
 714}
 715
 716type_init(register_types)
 717