qemu/hw/rdma/vmw/pvrdma_qp_ops.c
<<
>>
Prefs
   1/*
   2 * QEMU paravirtual RDMA - QP implementation
   3 *
   4 * Copyright (C) 2018 Oracle
   5 * Copyright (C) 2018 Red Hat Inc
   6 *
   7 * Authors:
   8 *     Yuval Shaia <yuval.shaia@oracle.com>
   9 *     Marcel Apfelbaum <marcel@redhat.com>
  10 *
  11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  12 * See the COPYING file in the top-level directory.
  13 *
  14 */
  15
  16#include "qemu/osdep.h"
  17
  18#include "../rdma_utils.h"
  19#include "../rdma_rm.h"
  20#include "../rdma_backend.h"
  21
  22#include "trace.h"
  23
  24#include "pvrdma.h"
  25#include "standard-headers/rdma/vmw_pvrdma-abi.h"
  26#include "pvrdma_qp_ops.h"
  27
  28typedef struct CompHandlerCtx {
  29    PVRDMADev *dev;
  30    uint32_t cq_handle;
  31    struct pvrdma_cqe cqe;
  32} CompHandlerCtx;
  33
  34/* Send Queue WQE */
  35typedef struct PvrdmaSqWqe {
  36    struct pvrdma_sq_wqe_hdr hdr;
  37    struct pvrdma_sge sge[];
  38} PvrdmaSqWqe;
  39
  40/* Recv Queue WQE */
  41typedef struct PvrdmaRqWqe {
  42    struct pvrdma_rq_wqe_hdr hdr;
  43    struct pvrdma_sge sge[];
  44} PvrdmaRqWqe;
  45
  46/*
  47 * 1. Put CQE on send CQ ring
  48 * 2. Put CQ number on dsr completion ring
  49 * 3. Interrupt host
  50 */
  51static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
  52                           struct pvrdma_cqe *cqe, struct ibv_wc *wc)
  53{
  54    struct pvrdma_cqe *cqe1;
  55    struct pvrdma_cqne *cqne;
  56    PvrdmaRing *ring;
  57    RdmaRmCQ *cq = rdma_rm_get_cq(&dev->rdma_dev_res, cq_handle);
  58
  59    if (unlikely(!cq)) {
  60        return -EINVAL;
  61    }
  62
  63    ring = (PvrdmaRing *)cq->opaque;
  64
  65    /* Step #1: Put CQE on CQ ring */
  66    cqe1 = pvrdma_ring_next_elem_write(ring);
  67    if (unlikely(!cqe1)) {
  68        return -EINVAL;
  69    }
  70
  71    memset(cqe1, 0, sizeof(*cqe1));
  72    cqe1->wr_id = cqe->wr_id;
  73    cqe1->qp = cqe->qp ? cqe->qp : wc->qp_num;
  74    cqe1->opcode = cqe->opcode;
  75    cqe1->status = wc->status;
  76    cqe1->byte_len = wc->byte_len;
  77    cqe1->src_qp = wc->src_qp;
  78    cqe1->wc_flags = wc->wc_flags;
  79    cqe1->vendor_err = wc->vendor_err;
  80
  81    trace_pvrdma_post_cqe(cq_handle, cq->notify, cqe1->wr_id, cqe1->qp,
  82                          cqe1->opcode, cqe1->status, cqe1->byte_len,
  83                          cqe1->src_qp, cqe1->wc_flags, cqe1->vendor_err);
  84
  85    pvrdma_ring_write_inc(ring);
  86
  87    /* Step #2: Put CQ number on dsr completion ring */
  88    cqne = pvrdma_ring_next_elem_write(&dev->dsr_info.cq);
  89    if (unlikely(!cqne)) {
  90        return -EINVAL;
  91    }
  92
  93    cqne->info = cq_handle;
  94    pvrdma_ring_write_inc(&dev->dsr_info.cq);
  95
  96    if (cq->notify != CNT_CLEAR) {
  97        if (cq->notify == CNT_ARM) {
  98            cq->notify = CNT_CLEAR;
  99        }
 100        post_interrupt(dev, INTR_VEC_CMD_COMPLETION_Q);
 101    }
 102
 103    return 0;
 104}
 105
 106static void pvrdma_qp_ops_comp_handler(void *ctx, struct ibv_wc *wc)
 107{
 108    CompHandlerCtx *comp_ctx = (CompHandlerCtx *)ctx;
 109
 110    pvrdma_post_cqe(comp_ctx->dev, comp_ctx->cq_handle, &comp_ctx->cqe, wc);
 111
 112    g_free(ctx);
 113}
 114
 115static void complete_with_error(uint32_t vendor_err, void *ctx)
 116{
 117    struct ibv_wc wc = {};
 118
 119    wc.status = IBV_WC_GENERAL_ERR;
 120    wc.vendor_err = vendor_err;
 121
 122    pvrdma_qp_ops_comp_handler(ctx, &wc);
 123}
 124
 125void pvrdma_qp_ops_fini(void)
 126{
 127    rdma_backend_unregister_comp_handler();
 128}
 129
 130int pvrdma_qp_ops_init(void)
 131{
 132    rdma_backend_register_comp_handler(pvrdma_qp_ops_comp_handler);
 133
 134    return 0;
 135}
 136
 137void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
 138{
 139    RdmaRmQP *qp;
 140    PvrdmaSqWqe *wqe;
 141    PvrdmaRing *ring;
 142    int sgid_idx;
 143    union ibv_gid *sgid;
 144
 145    qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
 146    if (unlikely(!qp)) {
 147        return;
 148    }
 149
 150    ring = (PvrdmaRing *)qp->opaque;
 151
 152    wqe = (struct PvrdmaSqWqe *)pvrdma_ring_next_elem_read(ring);
 153    while (wqe) {
 154        CompHandlerCtx *comp_ctx;
 155
 156        /* Prepare CQE */
 157        comp_ctx = g_malloc(sizeof(CompHandlerCtx));
 158        comp_ctx->dev = dev;
 159        comp_ctx->cq_handle = qp->send_cq_handle;
 160        comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
 161        comp_ctx->cqe.qp = qp_handle;
 162        comp_ctx->cqe.opcode = IBV_WC_SEND;
 163
 164        sgid = rdma_rm_get_gid(&dev->rdma_dev_res, wqe->hdr.wr.ud.av.gid_index);
 165        if (!sgid) {
 166            rdma_error_report("Failed to get gid for idx %d",
 167                              wqe->hdr.wr.ud.av.gid_index);
 168            complete_with_error(VENDOR_ERR_INV_GID_IDX, comp_ctx);
 169            continue;
 170        }
 171
 172        sgid_idx = rdma_rm_get_backend_gid_index(&dev->rdma_dev_res,
 173                                                 &dev->backend_dev,
 174                                                 wqe->hdr.wr.ud.av.gid_index);
 175        if (sgid_idx <= 0) {
 176            rdma_error_report("Failed to get bk sgid_idx for sgid_idx %d",
 177                              wqe->hdr.wr.ud.av.gid_index);
 178            complete_with_error(VENDOR_ERR_INV_GID_IDX, comp_ctx);
 179            continue;
 180        }
 181
 182        if (wqe->hdr.num_sge > dev->dev_attr.max_sge) {
 183            rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge,
 184                              dev->dev_attr.max_sge);
 185            complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx);
 186            continue;
 187        }
 188
 189        rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type,
 190                               (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
 191                               sgid_idx, sgid,
 192                               (union ibv_gid *)wqe->hdr.wr.ud.av.dgid,
 193                               wqe->hdr.wr.ud.remote_qpn,
 194                               wqe->hdr.wr.ud.remote_qkey, comp_ctx);
 195
 196        pvrdma_ring_read_inc(ring);
 197
 198        wqe = pvrdma_ring_next_elem_read(ring);
 199    }
 200}
 201
 202void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
 203{
 204    RdmaRmQP *qp;
 205    PvrdmaRqWqe *wqe;
 206    PvrdmaRing *ring;
 207
 208    qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
 209    if (unlikely(!qp)) {
 210        return;
 211    }
 212
 213    ring = &((PvrdmaRing *)qp->opaque)[1];
 214
 215    wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring);
 216    while (wqe) {
 217        CompHandlerCtx *comp_ctx;
 218
 219        /* Prepare CQE */
 220        comp_ctx = g_malloc(sizeof(CompHandlerCtx));
 221        comp_ctx->dev = dev;
 222        comp_ctx->cq_handle = qp->recv_cq_handle;
 223        comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
 224        comp_ctx->cqe.qp = qp_handle;
 225        comp_ctx->cqe.opcode = IBV_WC_RECV;
 226
 227        if (wqe->hdr.num_sge > dev->dev_attr.max_sge) {
 228            rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge,
 229                              dev->dev_attr.max_sge);
 230            complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx);
 231            continue;
 232        }
 233
 234        rdma_backend_post_recv(&dev->backend_dev, &qp->backend_qp, qp->qp_type,
 235                               (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
 236                               comp_ctx);
 237
 238        pvrdma_ring_read_inc(ring);
 239
 240        wqe = pvrdma_ring_next_elem_read(ring);
 241    }
 242}
 243
 244void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle)
 245{
 246    RdmaRmSRQ *srq;
 247    PvrdmaRqWqe *wqe;
 248    PvrdmaRing *ring;
 249
 250    srq = rdma_rm_get_srq(&dev->rdma_dev_res, srq_handle);
 251    if (unlikely(!srq)) {
 252        return;
 253    }
 254
 255    ring = (PvrdmaRing *)srq->opaque;
 256
 257    wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring);
 258    while (wqe) {
 259        CompHandlerCtx *comp_ctx;
 260
 261        /* Prepare CQE */
 262        comp_ctx = g_malloc(sizeof(CompHandlerCtx));
 263        comp_ctx->dev = dev;
 264        comp_ctx->cq_handle = srq->recv_cq_handle;
 265        comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
 266        comp_ctx->cqe.qp = 0;
 267        comp_ctx->cqe.opcode = IBV_WC_RECV;
 268
 269        if (wqe->hdr.num_sge > dev->dev_attr.max_sge) {
 270            rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge,
 271                              dev->dev_attr.max_sge);
 272            complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx);
 273            continue;
 274        }
 275
 276        rdma_backend_post_srq_recv(&dev->backend_dev, &srq->backend_srq,
 277                                   (struct ibv_sge *)&wqe->sge[0],
 278                                   wqe->hdr.num_sge,
 279                                   comp_ctx);
 280
 281        pvrdma_ring_read_inc(ring);
 282
 283        wqe = pvrdma_ring_next_elem_read(ring);
 284    }
 285
 286}
 287
 288void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle)
 289{
 290    RdmaRmCQ *cq;
 291
 292    cq = rdma_rm_get_cq(dev_res, cq_handle);
 293    if (!cq) {
 294        return;
 295    }
 296
 297    rdma_backend_poll_cq(dev_res, &cq->backend_cq);
 298}
 299