qemu/hw/rdma/rdma_rm.c
<<
>>
Prefs
   1/*
   2 * QEMU paravirtual RDMA - Resource Manager Implementation
   3 *
   4 * Copyright (C) 2018 Oracle
   5 * Copyright (C) 2018 Red Hat Inc
   6 *
   7 * Authors:
   8 *     Yuval Shaia <yuval.shaia@oracle.com>
   9 *     Marcel Apfelbaum <marcel@redhat.com>
  10 *
  11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  12 * See the COPYING file in the top-level directory.
  13 *
  14 */
  15
  16#include "qemu/osdep.h"
  17#include "qapi/error.h"
  18#include "cpu.h"
  19
  20#include "rdma_utils.h"
  21#include "rdma_backend.h"
  22#include "rdma_rm.h"
  23
  24#define MAX_RM_TBL_NAME 16
  25
  26/* Page directory and page tables */
  27#define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
  28#define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
  29
  30static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl,
  31                                uint32_t tbl_sz, uint32_t res_sz)
  32{
  33    tbl->tbl = g_malloc(tbl_sz * res_sz);
  34
  35    strncpy(tbl->name, name, MAX_RM_TBL_NAME);
  36    tbl->name[MAX_RM_TBL_NAME - 1] = 0;
  37
  38    tbl->bitmap = bitmap_new(tbl_sz);
  39    tbl->tbl_sz = tbl_sz;
  40    tbl->res_sz = res_sz;
  41    qemu_mutex_init(&tbl->lock);
  42}
  43
  44static inline void res_tbl_free(RdmaRmResTbl *tbl)
  45{
  46    qemu_mutex_destroy(&tbl->lock);
  47    g_free(tbl->tbl);
  48    bitmap_zero_extend(tbl->bitmap, tbl->tbl_sz, 0);
  49}
  50
  51static inline void *res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle)
  52{
  53    pr_dbg("%s, handle=%d\n", tbl->name, handle);
  54
  55    if ((handle < tbl->tbl_sz) && (test_bit(handle, tbl->bitmap))) {
  56        return tbl->tbl + handle * tbl->res_sz;
  57    } else {
  58        pr_dbg("Invalid handle %d\n", handle);
  59        return NULL;
  60    }
  61}
  62
  63static inline void *res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle)
  64{
  65    qemu_mutex_lock(&tbl->lock);
  66
  67    *handle = find_first_zero_bit(tbl->bitmap, tbl->tbl_sz);
  68    if (*handle > tbl->tbl_sz) {
  69        pr_dbg("Failed to alloc, bitmap is full\n");
  70        qemu_mutex_unlock(&tbl->lock);
  71        return NULL;
  72    }
  73
  74    set_bit(*handle, tbl->bitmap);
  75
  76    qemu_mutex_unlock(&tbl->lock);
  77
  78    memset(tbl->tbl + *handle * tbl->res_sz, 0, tbl->res_sz);
  79
  80    pr_dbg("%s, handle=%d\n", tbl->name, *handle);
  81
  82    return tbl->tbl + *handle * tbl->res_sz;
  83}
  84
  85static inline void res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle)
  86{
  87    pr_dbg("%s, handle=%d\n", tbl->name, handle);
  88
  89    qemu_mutex_lock(&tbl->lock);
  90
  91    if (handle < tbl->tbl_sz) {
  92        clear_bit(handle, tbl->bitmap);
  93    }
  94
  95    qemu_mutex_unlock(&tbl->lock);
  96}
  97
  98int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
  99                     uint32_t *pd_handle, uint32_t ctx_handle)
 100{
 101    RdmaRmPD *pd;
 102    int ret = -ENOMEM;
 103
 104    pd = res_tbl_alloc(&dev_res->pd_tbl, pd_handle);
 105    if (!pd) {
 106        goto out;
 107    }
 108
 109    ret = rdma_backend_create_pd(backend_dev, &pd->backend_pd);
 110    if (ret) {
 111        ret = -EIO;
 112        goto out_tbl_dealloc;
 113    }
 114
 115    pd->ctx_handle = ctx_handle;
 116
 117    return 0;
 118
 119out_tbl_dealloc:
 120    res_tbl_dealloc(&dev_res->pd_tbl, *pd_handle);
 121
 122out:
 123    return ret;
 124}
 125
 126RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
 127{
 128    return res_tbl_get(&dev_res->pd_tbl, pd_handle);
 129}
 130
 131void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
 132{
 133    RdmaRmPD *pd = rdma_rm_get_pd(dev_res, pd_handle);
 134
 135    if (pd) {
 136        rdma_backend_destroy_pd(&pd->backend_pd);
 137        res_tbl_dealloc(&dev_res->pd_tbl, pd_handle);
 138    }
 139}
 140
 141int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
 142                     uint64_t guest_start, size_t guest_length, void *host_virt,
 143                     int access_flags, uint32_t *mr_handle, uint32_t *lkey,
 144                     uint32_t *rkey)
 145{
 146    RdmaRmMR *mr;
 147    int ret = 0;
 148    RdmaRmPD *pd;
 149    void *addr;
 150    size_t length;
 151
 152    pd = rdma_rm_get_pd(dev_res, pd_handle);
 153    if (!pd) {
 154        pr_dbg("Invalid PD\n");
 155        return -EINVAL;
 156    }
 157
 158    mr = res_tbl_alloc(&dev_res->mr_tbl, mr_handle);
 159    if (!mr) {
 160        pr_dbg("Failed to allocate obj in table\n");
 161        return -ENOMEM;
 162    }
 163
 164    if (!host_virt) {
 165        /* TODO: This is my guess but not so sure that this needs to be
 166         * done */
 167        length = TARGET_PAGE_SIZE;
 168        addr = g_malloc(length);
 169    } else {
 170        mr->user_mr.host_virt = host_virt;
 171        pr_dbg("host_virt=0x%p\n", mr->user_mr.host_virt);
 172        mr->user_mr.length = guest_length;
 173        pr_dbg("length=%zu\n", guest_length);
 174        mr->user_mr.guest_start = guest_start;
 175        pr_dbg("guest_start=0x%" PRIx64 "\n", mr->user_mr.guest_start);
 176
 177        length = mr->user_mr.length;
 178        addr = mr->user_mr.host_virt;
 179    }
 180
 181    ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, addr, length,
 182                                 access_flags);
 183    if (ret) {
 184        pr_dbg("Fail in rdma_backend_create_mr, err=%d\n", ret);
 185        ret = -EIO;
 186        goto out_dealloc_mr;
 187    }
 188
 189    if (!host_virt) {
 190        *lkey = mr->lkey = rdma_backend_mr_lkey(&mr->backend_mr);
 191        *rkey = mr->rkey = rdma_backend_mr_rkey(&mr->backend_mr);
 192    } else {
 193        /* We keep mr_handle in lkey so send and recv get get mr ptr */
 194        *lkey = *mr_handle;
 195        *rkey = -1;
 196    }
 197
 198    mr->pd_handle = pd_handle;
 199
 200    return 0;
 201
 202out_dealloc_mr:
 203    res_tbl_dealloc(&dev_res->mr_tbl, *mr_handle);
 204
 205    return ret;
 206}
 207
 208RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
 209{
 210    return res_tbl_get(&dev_res->mr_tbl, mr_handle);
 211}
 212
 213void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
 214{
 215    RdmaRmMR *mr = rdma_rm_get_mr(dev_res, mr_handle);
 216
 217    if (mr) {
 218        rdma_backend_destroy_mr(&mr->backend_mr);
 219        munmap(mr->user_mr.host_virt, mr->user_mr.length);
 220        res_tbl_dealloc(&dev_res->mr_tbl, mr_handle);
 221    }
 222}
 223
 224int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn,
 225                     uint32_t *uc_handle)
 226{
 227    RdmaRmUC *uc;
 228
 229    /* TODO: Need to make sure pfn is between bar start address and
 230     * bsd+RDMA_BAR2_UAR_SIZE
 231    if (pfn > RDMA_BAR2_UAR_SIZE) {
 232        pr_err("pfn out of range (%d > %d)\n", pfn, RDMA_BAR2_UAR_SIZE);
 233        return -ENOMEM;
 234    }
 235    */
 236
 237    uc = res_tbl_alloc(&dev_res->uc_tbl, uc_handle);
 238    if (!uc) {
 239        return -ENOMEM;
 240    }
 241
 242    return 0;
 243}
 244
 245RdmaRmUC *rdma_rm_get_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
 246{
 247    return res_tbl_get(&dev_res->uc_tbl, uc_handle);
 248}
 249
 250void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
 251{
 252    RdmaRmUC *uc = rdma_rm_get_uc(dev_res, uc_handle);
 253
 254    if (uc) {
 255        res_tbl_dealloc(&dev_res->uc_tbl, uc_handle);
 256    }
 257}
 258
 259RdmaRmCQ *rdma_rm_get_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
 260{
 261    return res_tbl_get(&dev_res->cq_tbl, cq_handle);
 262}
 263
 264int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
 265                     uint32_t cqe, uint32_t *cq_handle, void *opaque)
 266{
 267    int rc;
 268    RdmaRmCQ *cq;
 269
 270    cq = res_tbl_alloc(&dev_res->cq_tbl, cq_handle);
 271    if (!cq) {
 272        return -ENOMEM;
 273    }
 274
 275    cq->opaque = opaque;
 276    cq->notify = false;
 277
 278    rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe);
 279    if (rc) {
 280        rc = -EIO;
 281        goto out_dealloc_cq;
 282    }
 283
 284    return 0;
 285
 286out_dealloc_cq:
 287    rdma_rm_dealloc_cq(dev_res, *cq_handle);
 288
 289    return rc;
 290}
 291
 292void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle,
 293                           bool notify)
 294{
 295    RdmaRmCQ *cq;
 296
 297    pr_dbg("cq_handle=%d, notify=0x%x\n", cq_handle, notify);
 298
 299    cq = rdma_rm_get_cq(dev_res, cq_handle);
 300    if (!cq) {
 301        return;
 302    }
 303
 304    cq->notify = notify;
 305    pr_dbg("notify=%d\n", cq->notify);
 306}
 307
 308void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
 309{
 310    RdmaRmCQ *cq;
 311
 312    cq = rdma_rm_get_cq(dev_res, cq_handle);
 313    if (!cq) {
 314        return;
 315    }
 316
 317    rdma_backend_destroy_cq(&cq->backend_cq);
 318
 319    res_tbl_dealloc(&dev_res->cq_tbl, cq_handle);
 320}
 321
 322RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn)
 323{
 324    GBytes *key = g_bytes_new(&qpn, sizeof(qpn));
 325
 326    RdmaRmQP *qp = g_hash_table_lookup(dev_res->qp_hash, key);
 327
 328    g_bytes_unref(key);
 329
 330    return qp;
 331}
 332
 333int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
 334                     uint8_t qp_type, uint32_t max_send_wr,
 335                     uint32_t max_send_sge, uint32_t send_cq_handle,
 336                     uint32_t max_recv_wr, uint32_t max_recv_sge,
 337                     uint32_t recv_cq_handle, void *opaque, uint32_t *qpn)
 338{
 339    int rc;
 340    RdmaRmQP *qp;
 341    RdmaRmCQ *scq, *rcq;
 342    RdmaRmPD *pd;
 343    uint32_t rm_qpn;
 344
 345    pr_dbg("qp_type=%d\n", qp_type);
 346
 347    pd = rdma_rm_get_pd(dev_res, pd_handle);
 348    if (!pd) {
 349        pr_err("Invalid pd handle (%d)\n", pd_handle);
 350        return -EINVAL;
 351    }
 352
 353    scq = rdma_rm_get_cq(dev_res, send_cq_handle);
 354    rcq = rdma_rm_get_cq(dev_res, recv_cq_handle);
 355
 356    if (!scq || !rcq) {
 357        pr_err("Invalid send_cqn or recv_cqn (%d, %d)\n",
 358               send_cq_handle, recv_cq_handle);
 359        return -EINVAL;
 360    }
 361
 362    qp = res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn);
 363    if (!qp) {
 364        return -ENOMEM;
 365    }
 366    pr_dbg("rm_qpn=%d\n", rm_qpn);
 367
 368    qp->qpn = rm_qpn;
 369    qp->qp_state = IBV_QPS_RESET;
 370    qp->qp_type = qp_type;
 371    qp->send_cq_handle = send_cq_handle;
 372    qp->recv_cq_handle = recv_cq_handle;
 373    qp->opaque = opaque;
 374
 375    rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
 376                                &scq->backend_cq, &rcq->backend_cq, max_send_wr,
 377                                max_recv_wr, max_send_sge, max_recv_sge);
 378    if (rc) {
 379        rc = -EIO;
 380        goto out_dealloc_qp;
 381    }
 382
 383    *qpn = rdma_backend_qpn(&qp->backend_qp);
 384    pr_dbg("rm_qpn=%d, backend_qpn=0x%x\n", rm_qpn, *qpn);
 385    g_hash_table_insert(dev_res->qp_hash, g_bytes_new(qpn, sizeof(*qpn)), qp);
 386
 387    return 0;
 388
 389out_dealloc_qp:
 390    res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
 391
 392    return rc;
 393}
 394
 395int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
 396                      uint32_t qp_handle, uint32_t attr_mask,
 397                      union ibv_gid *dgid, uint32_t dqpn,
 398                      enum ibv_qp_state qp_state, uint32_t qkey,
 399                      uint32_t rq_psn, uint32_t sq_psn)
 400{
 401    RdmaRmQP *qp;
 402    int ret;
 403
 404    pr_dbg("qpn=%d\n", qp_handle);
 405
 406    qp = rdma_rm_get_qp(dev_res, qp_handle);
 407    if (!qp) {
 408        return -EINVAL;
 409    }
 410
 411    pr_dbg("qp_type=%d\n", qp->qp_type);
 412    pr_dbg("attr_mask=0x%x\n", attr_mask);
 413
 414    if (qp->qp_type == IBV_QPT_SMI) {
 415        pr_dbg("QP0 unsupported\n");
 416        return -EPERM;
 417    } else if (qp->qp_type == IBV_QPT_GSI) {
 418        pr_dbg("QP1\n");
 419        return 0;
 420    }
 421
 422    if (attr_mask & IBV_QP_STATE) {
 423        qp->qp_state = qp_state;
 424        pr_dbg("qp_state=%d\n", qp->qp_state);
 425
 426        if (qp->qp_state == IBV_QPS_INIT) {
 427            ret = rdma_backend_qp_state_init(backend_dev, &qp->backend_qp,
 428                                             qp->qp_type, qkey);
 429            if (ret) {
 430                return -EIO;
 431            }
 432        }
 433
 434        if (qp->qp_state == IBV_QPS_RTR) {
 435            ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp,
 436                                            qp->qp_type, dgid, dqpn, rq_psn,
 437                                            qkey, attr_mask & IBV_QP_QKEY);
 438            if (ret) {
 439                return -EIO;
 440            }
 441        }
 442
 443        if (qp->qp_state == IBV_QPS_RTS) {
 444            ret = rdma_backend_qp_state_rts(&qp->backend_qp, qp->qp_type,
 445                                            sq_psn, qkey,
 446                                            attr_mask & IBV_QP_QKEY);
 447            if (ret) {
 448                return -EIO;
 449            }
 450        }
 451    }
 452
 453    return 0;
 454}
 455
 456int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
 457                     uint32_t qp_handle, struct ibv_qp_attr *attr,
 458                     int attr_mask, struct ibv_qp_init_attr *init_attr)
 459{
 460    RdmaRmQP *qp;
 461
 462    pr_dbg("qpn=%d\n", qp_handle);
 463
 464    qp = rdma_rm_get_qp(dev_res, qp_handle);
 465    if (!qp) {
 466        return -EINVAL;
 467    }
 468
 469    pr_dbg("qp_type=%d\n", qp->qp_type);
 470
 471    return rdma_backend_query_qp(&qp->backend_qp, attr, attr_mask, init_attr);
 472}
 473
 474void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
 475{
 476    RdmaRmQP *qp;
 477    GBytes *key;
 478
 479    key = g_bytes_new(&qp_handle, sizeof(qp_handle));
 480    qp = g_hash_table_lookup(dev_res->qp_hash, key);
 481    g_hash_table_remove(dev_res->qp_hash, key);
 482    g_bytes_unref(key);
 483
 484    if (!qp) {
 485        return;
 486    }
 487
 488    rdma_backend_destroy_qp(&qp->backend_qp);
 489
 490    res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
 491}
 492
 493void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
 494{
 495    void **cqe_ctx;
 496
 497    cqe_ctx = res_tbl_get(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
 498    if (!cqe_ctx) {
 499        return NULL;
 500    }
 501
 502    pr_dbg("ctx=%p\n", *cqe_ctx);
 503
 504    return *cqe_ctx;
 505}
 506
 507int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
 508                          void *ctx)
 509{
 510    void **cqe_ctx;
 511
 512    cqe_ctx = res_tbl_alloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
 513    if (!cqe_ctx) {
 514        return -ENOMEM;
 515    }
 516
 517    pr_dbg("ctx=%p\n", ctx);
 518    *cqe_ctx = ctx;
 519
 520    return 0;
 521}
 522
 523void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
 524{
 525    res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
 526}
 527
 528static void destroy_qp_hash_key(gpointer data)
 529{
 530    g_bytes_unref(data);
 531}
 532
 533int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
 534                 Error **errp)
 535{
 536    dev_res->qp_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal,
 537                                             destroy_qp_hash_key, NULL);
 538    if (!dev_res->qp_hash) {
 539        return -ENOMEM;
 540    }
 541
 542    res_tbl_init("PD", &dev_res->pd_tbl, dev_attr->max_pd, sizeof(RdmaRmPD));
 543    res_tbl_init("CQ", &dev_res->cq_tbl, dev_attr->max_cq, sizeof(RdmaRmCQ));
 544    res_tbl_init("MR", &dev_res->mr_tbl, dev_attr->max_mr, sizeof(RdmaRmMR));
 545    res_tbl_init("QP", &dev_res->qp_tbl, dev_attr->max_qp, sizeof(RdmaRmQP));
 546    res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
 547                       dev_attr->max_qp_wr, sizeof(void *));
 548    res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
 549
 550    return 0;
 551}
 552
 553void rdma_rm_fini(RdmaDeviceResources *dev_res)
 554{
 555    res_tbl_free(&dev_res->uc_tbl);
 556    res_tbl_free(&dev_res->cqe_ctx_tbl);
 557    res_tbl_free(&dev_res->qp_tbl);
 558    res_tbl_free(&dev_res->cq_tbl);
 559    res_tbl_free(&dev_res->mr_tbl);
 560    res_tbl_free(&dev_res->pd_tbl);
 561    g_hash_table_destroy(dev_res->qp_hash);
 562}
 563