qemu/hw/rdma/rdma_rm.c
<<
>>
Prefs
   1/*
   2 * QEMU paravirtual RDMA - Resource Manager Implementation
   3 *
   4 * Copyright (C) 2018 Oracle
   5 * Copyright (C) 2018 Red Hat Inc
   6 *
   7 * Authors:
   8 *     Yuval Shaia <yuval.shaia@oracle.com>
   9 *     Marcel Apfelbaum <marcel@redhat.com>
  10 *
  11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  12 * See the COPYING file in the top-level directory.
  13 *
  14 */
  15
  16#include "qemu/osdep.h"
  17#include "qapi/error.h"
  18#include "cpu.h"
  19
  20#include "rdma_utils.h"
  21#include "rdma_backend.h"
  22#include "rdma_rm.h"
  23
  24/* Page directory and page tables */
  25#define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
  26#define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
  27
  28static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl,
  29                                uint32_t tbl_sz, uint32_t res_sz)
  30{
  31    tbl->tbl = g_malloc(tbl_sz * res_sz);
  32
  33    strncpy(tbl->name, name, MAX_RM_TBL_NAME);
  34    tbl->name[MAX_RM_TBL_NAME - 1] = 0;
  35
  36    tbl->bitmap = bitmap_new(tbl_sz);
  37    tbl->tbl_sz = tbl_sz;
  38    tbl->res_sz = res_sz;
  39    qemu_mutex_init(&tbl->lock);
  40}
  41
  42static inline void res_tbl_free(RdmaRmResTbl *tbl)
  43{
  44    qemu_mutex_destroy(&tbl->lock);
  45    g_free(tbl->tbl);
  46    bitmap_zero_extend(tbl->bitmap, tbl->tbl_sz, 0);
  47}
  48
  49static inline void *res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle)
  50{
  51    pr_dbg("%s, handle=%d\n", tbl->name, handle);
  52
  53    if ((handle < tbl->tbl_sz) && (test_bit(handle, tbl->bitmap))) {
  54        return tbl->tbl + handle * tbl->res_sz;
  55    } else {
  56        pr_dbg("Invalid handle %d\n", handle);
  57        return NULL;
  58    }
  59}
  60
  61static inline void *res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle)
  62{
  63    qemu_mutex_lock(&tbl->lock);
  64
  65    *handle = find_first_zero_bit(tbl->bitmap, tbl->tbl_sz);
  66    if (*handle > tbl->tbl_sz) {
  67        pr_dbg("Failed to alloc, bitmap is full\n");
  68        qemu_mutex_unlock(&tbl->lock);
  69        return NULL;
  70    }
  71
  72    set_bit(*handle, tbl->bitmap);
  73
  74    qemu_mutex_unlock(&tbl->lock);
  75
  76    memset(tbl->tbl + *handle * tbl->res_sz, 0, tbl->res_sz);
  77
  78    pr_dbg("%s, handle=%d\n", tbl->name, *handle);
  79
  80    return tbl->tbl + *handle * tbl->res_sz;
  81}
  82
  83static inline void res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle)
  84{
  85    pr_dbg("%s, handle=%d\n", tbl->name, handle);
  86
  87    qemu_mutex_lock(&tbl->lock);
  88
  89    if (handle < tbl->tbl_sz) {
  90        clear_bit(handle, tbl->bitmap);
  91    }
  92
  93    qemu_mutex_unlock(&tbl->lock);
  94}
  95
  96int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
  97                     uint32_t *pd_handle, uint32_t ctx_handle)
  98{
  99    RdmaRmPD *pd;
 100    int ret = -ENOMEM;
 101
 102    pd = res_tbl_alloc(&dev_res->pd_tbl, pd_handle);
 103    if (!pd) {
 104        goto out;
 105    }
 106
 107    ret = rdma_backend_create_pd(backend_dev, &pd->backend_pd);
 108    if (ret) {
 109        ret = -EIO;
 110        goto out_tbl_dealloc;
 111    }
 112
 113    pd->ctx_handle = ctx_handle;
 114
 115    return 0;
 116
 117out_tbl_dealloc:
 118    res_tbl_dealloc(&dev_res->pd_tbl, *pd_handle);
 119
 120out:
 121    return ret;
 122}
 123
 124RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
 125{
 126    return res_tbl_get(&dev_res->pd_tbl, pd_handle);
 127}
 128
 129void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
 130{
 131    RdmaRmPD *pd = rdma_rm_get_pd(dev_res, pd_handle);
 132
 133    if (pd) {
 134        rdma_backend_destroy_pd(&pd->backend_pd);
 135        res_tbl_dealloc(&dev_res->pd_tbl, pd_handle);
 136    }
 137}
 138
 139int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
 140                     uint64_t guest_start, size_t guest_length, void *host_virt,
 141                     int access_flags, uint32_t *mr_handle, uint32_t *lkey,
 142                     uint32_t *rkey)
 143{
 144    RdmaRmMR *mr;
 145    int ret = 0;
 146    RdmaRmPD *pd;
 147
 148    pd = rdma_rm_get_pd(dev_res, pd_handle);
 149    if (!pd) {
 150        pr_dbg("Invalid PD\n");
 151        return -EINVAL;
 152    }
 153
 154    mr = res_tbl_alloc(&dev_res->mr_tbl, mr_handle);
 155    if (!mr) {
 156        pr_dbg("Failed to allocate obj in table\n");
 157        return -ENOMEM;
 158    }
 159    pr_dbg("mr_handle=%d\n", *mr_handle);
 160
 161    pr_dbg("host_virt=0x%p\n", host_virt);
 162    pr_dbg("guest_start=0x%" PRIx64 "\n", guest_start);
 163    pr_dbg("length=%zu\n", guest_length);
 164
 165    if (host_virt) {
 166        mr->virt = host_virt;
 167        mr->start = guest_start;
 168        mr->length = guest_length;
 169        mr->virt += (mr->start & (TARGET_PAGE_SIZE - 1));
 170
 171        ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt,
 172                                     mr->length, access_flags);
 173        if (ret) {
 174            pr_dbg("Fail in rdma_backend_create_mr, err=%d\n", ret);
 175            ret = -EIO;
 176            goto out_dealloc_mr;
 177        }
 178    }
 179
 180    /* We keep mr_handle in lkey so send and recv get get mr ptr */
 181    *lkey = *mr_handle;
 182    *rkey = -1;
 183
 184    mr->pd_handle = pd_handle;
 185
 186    return 0;
 187
 188out_dealloc_mr:
 189    res_tbl_dealloc(&dev_res->mr_tbl, *mr_handle);
 190
 191    return ret;
 192}
 193
 194RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
 195{
 196    return res_tbl_get(&dev_res->mr_tbl, mr_handle);
 197}
 198
 199void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
 200{
 201    RdmaRmMR *mr = rdma_rm_get_mr(dev_res, mr_handle);
 202
 203    if (mr) {
 204        rdma_backend_destroy_mr(&mr->backend_mr);
 205        pr_dbg("start=0x%" PRIx64 "\n", mr->start);
 206        if (mr->start) {
 207            mr->virt -= (mr->start & (TARGET_PAGE_SIZE - 1));
 208            munmap(mr->virt, mr->length);
 209        }
 210        res_tbl_dealloc(&dev_res->mr_tbl, mr_handle);
 211    }
 212}
 213
 214int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn,
 215                     uint32_t *uc_handle)
 216{
 217    RdmaRmUC *uc;
 218
 219    /* TODO: Need to make sure pfn is between bar start address and
 220     * bsd+RDMA_BAR2_UAR_SIZE
 221    if (pfn > RDMA_BAR2_UAR_SIZE) {
 222        pr_err("pfn out of range (%d > %d)\n", pfn, RDMA_BAR2_UAR_SIZE);
 223        return -ENOMEM;
 224    }
 225    */
 226
 227    uc = res_tbl_alloc(&dev_res->uc_tbl, uc_handle);
 228    if (!uc) {
 229        return -ENOMEM;
 230    }
 231
 232    return 0;
 233}
 234
 235RdmaRmUC *rdma_rm_get_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
 236{
 237    return res_tbl_get(&dev_res->uc_tbl, uc_handle);
 238}
 239
 240void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
 241{
 242    RdmaRmUC *uc = rdma_rm_get_uc(dev_res, uc_handle);
 243
 244    if (uc) {
 245        res_tbl_dealloc(&dev_res->uc_tbl, uc_handle);
 246    }
 247}
 248
 249RdmaRmCQ *rdma_rm_get_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
 250{
 251    return res_tbl_get(&dev_res->cq_tbl, cq_handle);
 252}
 253
 254int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
 255                     uint32_t cqe, uint32_t *cq_handle, void *opaque)
 256{
 257    int rc;
 258    RdmaRmCQ *cq;
 259
 260    cq = res_tbl_alloc(&dev_res->cq_tbl, cq_handle);
 261    if (!cq) {
 262        return -ENOMEM;
 263    }
 264
 265    cq->opaque = opaque;
 266    cq->notify = false;
 267
 268    rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe);
 269    if (rc) {
 270        rc = -EIO;
 271        goto out_dealloc_cq;
 272    }
 273
 274    return 0;
 275
 276out_dealloc_cq:
 277    rdma_rm_dealloc_cq(dev_res, *cq_handle);
 278
 279    return rc;
 280}
 281
 282void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle,
 283                           bool notify)
 284{
 285    RdmaRmCQ *cq;
 286
 287    pr_dbg("cq_handle=%d, notify=0x%x\n", cq_handle, notify);
 288
 289    cq = rdma_rm_get_cq(dev_res, cq_handle);
 290    if (!cq) {
 291        return;
 292    }
 293
 294    cq->notify = notify;
 295    pr_dbg("notify=%d\n", cq->notify);
 296}
 297
 298void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
 299{
 300    RdmaRmCQ *cq;
 301
 302    cq = rdma_rm_get_cq(dev_res, cq_handle);
 303    if (!cq) {
 304        return;
 305    }
 306
 307    rdma_backend_destroy_cq(&cq->backend_cq);
 308
 309    res_tbl_dealloc(&dev_res->cq_tbl, cq_handle);
 310}
 311
 312RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn)
 313{
 314    GBytes *key = g_bytes_new(&qpn, sizeof(qpn));
 315
 316    RdmaRmQP *qp = g_hash_table_lookup(dev_res->qp_hash, key);
 317
 318    g_bytes_unref(key);
 319
 320    return qp;
 321}
 322
 323int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
 324                     uint8_t qp_type, uint32_t max_send_wr,
 325                     uint32_t max_send_sge, uint32_t send_cq_handle,
 326                     uint32_t max_recv_wr, uint32_t max_recv_sge,
 327                     uint32_t recv_cq_handle, void *opaque, uint32_t *qpn)
 328{
 329    int rc;
 330    RdmaRmQP *qp;
 331    RdmaRmCQ *scq, *rcq;
 332    RdmaRmPD *pd;
 333    uint32_t rm_qpn;
 334
 335    pr_dbg("qp_type=%d\n", qp_type);
 336
 337    pd = rdma_rm_get_pd(dev_res, pd_handle);
 338    if (!pd) {
 339        pr_err("Invalid pd handle (%d)\n", pd_handle);
 340        return -EINVAL;
 341    }
 342
 343    scq = rdma_rm_get_cq(dev_res, send_cq_handle);
 344    rcq = rdma_rm_get_cq(dev_res, recv_cq_handle);
 345
 346    if (!scq || !rcq) {
 347        pr_err("Invalid send_cqn or recv_cqn (%d, %d)\n",
 348               send_cq_handle, recv_cq_handle);
 349        return -EINVAL;
 350    }
 351
 352    qp = res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn);
 353    if (!qp) {
 354        return -ENOMEM;
 355    }
 356    pr_dbg("rm_qpn=%d\n", rm_qpn);
 357
 358    qp->qpn = rm_qpn;
 359    qp->qp_state = IBV_QPS_RESET;
 360    qp->qp_type = qp_type;
 361    qp->send_cq_handle = send_cq_handle;
 362    qp->recv_cq_handle = recv_cq_handle;
 363    qp->opaque = opaque;
 364
 365    rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
 366                                &scq->backend_cq, &rcq->backend_cq, max_send_wr,
 367                                max_recv_wr, max_send_sge, max_recv_sge);
 368    if (rc) {
 369        rc = -EIO;
 370        goto out_dealloc_qp;
 371    }
 372
 373    *qpn = rdma_backend_qpn(&qp->backend_qp);
 374    pr_dbg("rm_qpn=%d, backend_qpn=0x%x\n", rm_qpn, *qpn);
 375    g_hash_table_insert(dev_res->qp_hash, g_bytes_new(qpn, sizeof(*qpn)), qp);
 376
 377    return 0;
 378
 379out_dealloc_qp:
 380    res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
 381
 382    return rc;
 383}
 384
 385int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
 386                      uint32_t qp_handle, uint32_t attr_mask,
 387                      union ibv_gid *dgid, uint32_t dqpn,
 388                      enum ibv_qp_state qp_state, uint32_t qkey,
 389                      uint32_t rq_psn, uint32_t sq_psn)
 390{
 391    RdmaRmQP *qp;
 392    int ret;
 393
 394    pr_dbg("qpn=0x%x\n", qp_handle);
 395
 396    qp = rdma_rm_get_qp(dev_res, qp_handle);
 397    if (!qp) {
 398        return -EINVAL;
 399    }
 400
 401    pr_dbg("qp_type=%d\n", qp->qp_type);
 402    pr_dbg("attr_mask=0x%x\n", attr_mask);
 403
 404    if (qp->qp_type == IBV_QPT_SMI) {
 405        pr_dbg("QP0 unsupported\n");
 406        return -EPERM;
 407    } else if (qp->qp_type == IBV_QPT_GSI) {
 408        pr_dbg("QP1\n");
 409        return 0;
 410    }
 411
 412    if (attr_mask & IBV_QP_STATE) {
 413        qp->qp_state = qp_state;
 414        pr_dbg("qp_state=%d\n", qp->qp_state);
 415
 416        if (qp->qp_state == IBV_QPS_INIT) {
 417            ret = rdma_backend_qp_state_init(backend_dev, &qp->backend_qp,
 418                                             qp->qp_type, qkey);
 419            if (ret) {
 420                return -EIO;
 421            }
 422        }
 423
 424        if (qp->qp_state == IBV_QPS_RTR) {
 425            ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp,
 426                                            qp->qp_type, dgid, dqpn, rq_psn,
 427                                            qkey, attr_mask & IBV_QP_QKEY);
 428            if (ret) {
 429                return -EIO;
 430            }
 431        }
 432
 433        if (qp->qp_state == IBV_QPS_RTS) {
 434            ret = rdma_backend_qp_state_rts(&qp->backend_qp, qp->qp_type,
 435                                            sq_psn, qkey,
 436                                            attr_mask & IBV_QP_QKEY);
 437            if (ret) {
 438                return -EIO;
 439            }
 440        }
 441    }
 442
 443    return 0;
 444}
 445
 446int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
 447                     uint32_t qp_handle, struct ibv_qp_attr *attr,
 448                     int attr_mask, struct ibv_qp_init_attr *init_attr)
 449{
 450    RdmaRmQP *qp;
 451
 452    pr_dbg("qpn=0x%x\n", qp_handle);
 453
 454    qp = rdma_rm_get_qp(dev_res, qp_handle);
 455    if (!qp) {
 456        return -EINVAL;
 457    }
 458
 459    pr_dbg("qp_type=%d\n", qp->qp_type);
 460
 461    return rdma_backend_query_qp(&qp->backend_qp, attr, attr_mask, init_attr);
 462}
 463
 464void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
 465{
 466    RdmaRmQP *qp;
 467    GBytes *key;
 468
 469    key = g_bytes_new(&qp_handle, sizeof(qp_handle));
 470    qp = g_hash_table_lookup(dev_res->qp_hash, key);
 471    g_hash_table_remove(dev_res->qp_hash, key);
 472    g_bytes_unref(key);
 473
 474    if (!qp) {
 475        return;
 476    }
 477
 478    rdma_backend_destroy_qp(&qp->backend_qp);
 479
 480    res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
 481}
 482
 483void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
 484{
 485    void **cqe_ctx;
 486
 487    cqe_ctx = res_tbl_get(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
 488    if (!cqe_ctx) {
 489        return NULL;
 490    }
 491
 492    pr_dbg("ctx=%p\n", *cqe_ctx);
 493
 494    return *cqe_ctx;
 495}
 496
 497int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
 498                          void *ctx)
 499{
 500    void **cqe_ctx;
 501
 502    cqe_ctx = res_tbl_alloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
 503    if (!cqe_ctx) {
 504        return -ENOMEM;
 505    }
 506
 507    pr_dbg("ctx=%p\n", ctx);
 508    *cqe_ctx = ctx;
 509
 510    return 0;
 511}
 512
 513void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
 514{
 515    res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
 516}
 517
 518static void destroy_qp_hash_key(gpointer data)
 519{
 520    g_bytes_unref(data);
 521}
 522
 523int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
 524                 Error **errp)
 525{
 526    dev_res->qp_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal,
 527                                             destroy_qp_hash_key, NULL);
 528    if (!dev_res->qp_hash) {
 529        return -ENOMEM;
 530    }
 531
 532    res_tbl_init("PD", &dev_res->pd_tbl, dev_attr->max_pd, sizeof(RdmaRmPD));
 533    res_tbl_init("CQ", &dev_res->cq_tbl, dev_attr->max_cq, sizeof(RdmaRmCQ));
 534    res_tbl_init("MR", &dev_res->mr_tbl, dev_attr->max_mr, sizeof(RdmaRmMR));
 535    res_tbl_init("QP", &dev_res->qp_tbl, dev_attr->max_qp, sizeof(RdmaRmQP));
 536    res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
 537                       dev_attr->max_qp_wr, sizeof(void *));
 538    res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
 539
 540    return 0;
 541}
 542
 543void rdma_rm_fini(RdmaDeviceResources *dev_res)
 544{
 545    res_tbl_free(&dev_res->uc_tbl);
 546    res_tbl_free(&dev_res->cqe_ctx_tbl);
 547    res_tbl_free(&dev_res->qp_tbl);
 548    res_tbl_free(&dev_res->mr_tbl);
 549    res_tbl_free(&dev_res->cq_tbl);
 550    res_tbl_free(&dev_res->pd_tbl);
 551
 552    g_hash_table_destroy(dev_res->qp_hash);
 553}
 554