linux/drivers/infiniband/core/rw.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2016 HGST, a Western Digital Company.
   4 */
   5#include <linux/moduleparam.h>
   6#include <linux/slab.h>
   7#include <linux/pci-p2pdma.h>
   8#include <rdma/mr_pool.h>
   9#include <rdma/rw.h>
  10
  11enum {
  12        RDMA_RW_SINGLE_WR,
  13        RDMA_RW_MULTI_WR,
  14        RDMA_RW_MR,
  15        RDMA_RW_SIG_MR,
  16};
  17
  18static bool rdma_rw_force_mr;
  19module_param_named(force_mr, rdma_rw_force_mr, bool, 0);
  20MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations");
  21
  22/*
  23 * Report whether memory registration should be used. Memory registration must
  24 * be used for iWarp devices because of iWARP-specific limitations. Memory
  25 * registration is also enabled if registering memory might yield better
  26 * performance than using multiple SGE entries, see rdma_rw_io_needs_mr()
  27 */
  28static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u32 port_num)
  29{
  30        if (rdma_protocol_iwarp(dev, port_num))
  31                return true;
  32        if (dev->attrs.max_sgl_rd)
  33                return true;
  34        if (unlikely(rdma_rw_force_mr))
  35                return true;
  36        return false;
  37}
  38
  39/*
  40 * Check if the device will use memory registration for this RW operation.
  41 * For RDMA READs we must use MRs on iWarp and can optionally use them as an
  42 * optimization otherwise.  Additionally we have a debug option to force usage
  43 * of MRs to help testing this code path.
  44 */
  45static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u32 port_num,
  46                enum dma_data_direction dir, int dma_nents)
  47{
  48        if (dir == DMA_FROM_DEVICE) {
  49                if (rdma_protocol_iwarp(dev, port_num))
  50                        return true;
  51                if (dev->attrs.max_sgl_rd && dma_nents > dev->attrs.max_sgl_rd)
  52                        return true;
  53        }
  54        if (unlikely(rdma_rw_force_mr))
  55                return true;
  56        return false;
  57}
  58
  59static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev,
  60                                           bool pi_support)
  61{
  62        u32 max_pages;
  63
  64        if (pi_support)
  65                max_pages = dev->attrs.max_pi_fast_reg_page_list_len;
  66        else
  67                max_pages = dev->attrs.max_fast_reg_page_list_len;
  68
  69        /* arbitrary limit to avoid allocating gigantic resources */
  70        return min_t(u32, max_pages, 256);
  71}
  72
  73static inline int rdma_rw_inv_key(struct rdma_rw_reg_ctx *reg)
  74{
  75        int count = 0;
  76
  77        if (reg->mr->need_inval) {
  78                reg->inv_wr.opcode = IB_WR_LOCAL_INV;
  79                reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey;
  80                reg->inv_wr.next = &reg->reg_wr.wr;
  81                count++;
  82        } else {
  83                reg->inv_wr.next = NULL;
  84        }
  85
  86        return count;
  87}
  88
  89/* Caller must have zero-initialized *reg. */
  90static int rdma_rw_init_one_mr(struct ib_qp *qp, u32 port_num,
  91                struct rdma_rw_reg_ctx *reg, struct scatterlist *sg,
  92                u32 sg_cnt, u32 offset)
  93{
  94        u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
  95                                                    qp->integrity_en);
  96        u32 nents = min(sg_cnt, pages_per_mr);
  97        int count = 0, ret;
  98
  99        reg->mr = ib_mr_pool_get(qp, &qp->rdma_mrs);
 100        if (!reg->mr)
 101                return -EAGAIN;
 102
 103        count += rdma_rw_inv_key(reg);
 104
 105        ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE);
 106        if (ret < 0 || ret < nents) {
 107                ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr);
 108                return -EINVAL;
 109        }
 110
 111        reg->reg_wr.wr.opcode = IB_WR_REG_MR;
 112        reg->reg_wr.mr = reg->mr;
 113        reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE;
 114        if (rdma_protocol_iwarp(qp->device, port_num))
 115                reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE;
 116        count++;
 117
 118        reg->sge.addr = reg->mr->iova;
 119        reg->sge.length = reg->mr->length;
 120        return count;
 121}
 122
 123static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 124                u32 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset,
 125                u64 remote_addr, u32 rkey, enum dma_data_direction dir)
 126{
 127        struct rdma_rw_reg_ctx *prev = NULL;
 128        u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
 129                                                    qp->integrity_en);
 130        int i, j, ret = 0, count = 0;
 131
 132        ctx->nr_ops = DIV_ROUND_UP(sg_cnt, pages_per_mr);
 133        ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL);
 134        if (!ctx->reg) {
 135                ret = -ENOMEM;
 136                goto out;
 137        }
 138
 139        for (i = 0; i < ctx->nr_ops; i++) {
 140                struct rdma_rw_reg_ctx *reg = &ctx->reg[i];
 141                u32 nents = min(sg_cnt, pages_per_mr);
 142
 143                ret = rdma_rw_init_one_mr(qp, port_num, reg, sg, sg_cnt,
 144                                offset);
 145                if (ret < 0)
 146                        goto out_free;
 147                count += ret;
 148
 149                if (prev) {
 150                        if (reg->mr->need_inval)
 151                                prev->wr.wr.next = &reg->inv_wr;
 152                        else
 153                                prev->wr.wr.next = &reg->reg_wr.wr;
 154                }
 155
 156                reg->reg_wr.wr.next = &reg->wr.wr;
 157
 158                reg->wr.wr.sg_list = &reg->sge;
 159                reg->wr.wr.num_sge = 1;
 160                reg->wr.remote_addr = remote_addr;
 161                reg->wr.rkey = rkey;
 162                if (dir == DMA_TO_DEVICE) {
 163                        reg->wr.wr.opcode = IB_WR_RDMA_WRITE;
 164                } else if (!rdma_cap_read_inv(qp->device, port_num)) {
 165                        reg->wr.wr.opcode = IB_WR_RDMA_READ;
 166                } else {
 167                        reg->wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
 168                        reg->wr.wr.ex.invalidate_rkey = reg->mr->lkey;
 169                }
 170                count++;
 171
 172                remote_addr += reg->sge.length;
 173                sg_cnt -= nents;
 174                for (j = 0; j < nents; j++)
 175                        sg = sg_next(sg);
 176                prev = reg;
 177                offset = 0;
 178        }
 179
 180        if (prev)
 181                prev->wr.wr.next = NULL;
 182
 183        ctx->type = RDMA_RW_MR;
 184        return count;
 185
 186out_free:
 187        while (--i >= 0)
 188                ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
 189        kfree(ctx->reg);
 190out:
 191        return ret;
 192}
 193
 194static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 195                struct scatterlist *sg, u32 sg_cnt, u32 offset,
 196                u64 remote_addr, u32 rkey, enum dma_data_direction dir)
 197{
 198        u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge :
 199                      qp->max_read_sge;
 200        struct ib_sge *sge;
 201        u32 total_len = 0, i, j;
 202
 203        ctx->nr_ops = DIV_ROUND_UP(sg_cnt, max_sge);
 204
 205        ctx->map.sges = sge = kcalloc(sg_cnt, sizeof(*sge), GFP_KERNEL);
 206        if (!ctx->map.sges)
 207                goto out;
 208
 209        ctx->map.wrs = kcalloc(ctx->nr_ops, sizeof(*ctx->map.wrs), GFP_KERNEL);
 210        if (!ctx->map.wrs)
 211                goto out_free_sges;
 212
 213        for (i = 0; i < ctx->nr_ops; i++) {
 214                struct ib_rdma_wr *rdma_wr = &ctx->map.wrs[i];
 215                u32 nr_sge = min(sg_cnt, max_sge);
 216
 217                if (dir == DMA_TO_DEVICE)
 218                        rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
 219                else
 220                        rdma_wr->wr.opcode = IB_WR_RDMA_READ;
 221                rdma_wr->remote_addr = remote_addr + total_len;
 222                rdma_wr->rkey = rkey;
 223                rdma_wr->wr.num_sge = nr_sge;
 224                rdma_wr->wr.sg_list = sge;
 225
 226                for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) {
 227                        sge->addr = sg_dma_address(sg) + offset;
 228                        sge->length = sg_dma_len(sg) - offset;
 229                        sge->lkey = qp->pd->local_dma_lkey;
 230
 231                        total_len += sge->length;
 232                        sge++;
 233                        sg_cnt--;
 234                        offset = 0;
 235                }
 236
 237                rdma_wr->wr.next = i + 1 < ctx->nr_ops ?
 238                        &ctx->map.wrs[i + 1].wr : NULL;
 239        }
 240
 241        ctx->type = RDMA_RW_MULTI_WR;
 242        return ctx->nr_ops;
 243
 244out_free_sges:
 245        kfree(ctx->map.sges);
 246out:
 247        return -ENOMEM;
 248}
 249
 250static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 251                struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey,
 252                enum dma_data_direction dir)
 253{
 254        struct ib_rdma_wr *rdma_wr = &ctx->single.wr;
 255
 256        ctx->nr_ops = 1;
 257
 258        ctx->single.sge.lkey = qp->pd->local_dma_lkey;
 259        ctx->single.sge.addr = sg_dma_address(sg) + offset;
 260        ctx->single.sge.length = sg_dma_len(sg) - offset;
 261
 262        memset(rdma_wr, 0, sizeof(*rdma_wr));
 263        if (dir == DMA_TO_DEVICE)
 264                rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
 265        else
 266                rdma_wr->wr.opcode = IB_WR_RDMA_READ;
 267        rdma_wr->wr.sg_list = &ctx->single.sge;
 268        rdma_wr->wr.num_sge = 1;
 269        rdma_wr->remote_addr = remote_addr;
 270        rdma_wr->rkey = rkey;
 271
 272        ctx->type = RDMA_RW_SINGLE_WR;
 273        return 1;
 274}
 275
 276static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
 277                             u32 sg_cnt, enum dma_data_direction dir)
 278{
 279        if (is_pci_p2pdma_page(sg_page(sg)))
 280                pci_p2pdma_unmap_sg(dev->dma_device, sg, sg_cnt, dir);
 281        else
 282                ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
 283}
 284
 285static int rdma_rw_map_sgtable(struct ib_device *dev, struct sg_table *sgt,
 286                               enum dma_data_direction dir)
 287{
 288        int nents;
 289
 290        if (is_pci_p2pdma_page(sg_page(sgt->sgl))) {
 291                if (WARN_ON_ONCE(ib_uses_virt_dma(dev)))
 292                        return 0;
 293                nents = pci_p2pdma_map_sg(dev->dma_device, sgt->sgl,
 294                                          sgt->orig_nents, dir);
 295                if (!nents)
 296                        return -EIO;
 297                sgt->nents = nents;
 298                return 0;
 299        }
 300        return ib_dma_map_sgtable_attrs(dev, sgt, dir, 0);
 301}
 302
 303/**
 304 * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context
 305 * @ctx:        context to initialize
 306 * @qp:         queue pair to operate on
 307 * @port_num:   port num to which the connection is bound
 308 * @sg:         scatterlist to READ/WRITE from/to
 309 * @sg_cnt:     number of entries in @sg
 310 * @sg_offset:  current byte offset into @sg
 311 * @remote_addr:remote address to read/write (relative to @rkey)
 312 * @rkey:       remote key to operate on
 313 * @dir:        %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
 314 *
 315 * Returns the number of WQEs that will be needed on the workqueue if
 316 * successful, or a negative error code.
 317 */
 318int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
 319                struct scatterlist *sg, u32 sg_cnt, u32 sg_offset,
 320                u64 remote_addr, u32 rkey, enum dma_data_direction dir)
 321{
 322        struct ib_device *dev = qp->pd->device;
 323        struct sg_table sgt = {
 324                .sgl = sg,
 325                .orig_nents = sg_cnt,
 326        };
 327        int ret;
 328
 329        ret = rdma_rw_map_sgtable(dev, &sgt, dir);
 330        if (ret)
 331                return ret;
 332        sg_cnt = sgt.nents;
 333
 334        /*
 335         * Skip to the S/G entry that sg_offset falls into:
 336         */
 337        for (;;) {
 338                u32 len = sg_dma_len(sg);
 339
 340                if (sg_offset < len)
 341                        break;
 342
 343                sg = sg_next(sg);
 344                sg_offset -= len;
 345                sg_cnt--;
 346        }
 347
 348        ret = -EIO;
 349        if (WARN_ON_ONCE(sg_cnt == 0))
 350                goto out_unmap_sg;
 351
 352        if (rdma_rw_io_needs_mr(qp->device, port_num, dir, sg_cnt)) {
 353                ret = rdma_rw_init_mr_wrs(ctx, qp, port_num, sg, sg_cnt,
 354                                sg_offset, remote_addr, rkey, dir);
 355        } else if (sg_cnt > 1) {
 356                ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset,
 357                                remote_addr, rkey, dir);
 358        } else {
 359                ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset,
 360                                remote_addr, rkey, dir);
 361        }
 362
 363        if (ret < 0)
 364                goto out_unmap_sg;
 365        return ret;
 366
 367out_unmap_sg:
 368        rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir);
 369        return ret;
 370}
 371EXPORT_SYMBOL(rdma_rw_ctx_init);
 372
 373/**
 374 * rdma_rw_ctx_signature_init - initialize a RW context with signature offload
 375 * @ctx:        context to initialize
 376 * @qp:         queue pair to operate on
 377 * @port_num:   port num to which the connection is bound
 378 * @sg:         scatterlist to READ/WRITE from/to
 379 * @sg_cnt:     number of entries in @sg
 380 * @prot_sg:    scatterlist to READ/WRITE protection information from/to
 381 * @prot_sg_cnt: number of entries in @prot_sg
 382 * @sig_attrs:  signature offloading algorithms
 383 * @remote_addr:remote address to read/write (relative to @rkey)
 384 * @rkey:       remote key to operate on
 385 * @dir:        %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
 386 *
 387 * Returns the number of WQEs that will be needed on the workqueue if
 388 * successful, or a negative error code.
 389 */
 390int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 391                u32 port_num, struct scatterlist *sg, u32 sg_cnt,
 392                struct scatterlist *prot_sg, u32 prot_sg_cnt,
 393                struct ib_sig_attrs *sig_attrs,
 394                u64 remote_addr, u32 rkey, enum dma_data_direction dir)
 395{
 396        struct ib_device *dev = qp->pd->device;
 397        u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
 398                                                    qp->integrity_en);
 399        struct sg_table sgt = {
 400                .sgl = sg,
 401                .orig_nents = sg_cnt,
 402        };
 403        struct sg_table prot_sgt = {
 404                .sgl = prot_sg,
 405                .orig_nents = prot_sg_cnt,
 406        };
 407        struct ib_rdma_wr *rdma_wr;
 408        int count = 0, ret;
 409
 410        if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) {
 411                pr_err("SG count too large: sg_cnt=%u, prot_sg_cnt=%u, pages_per_mr=%u\n",
 412                       sg_cnt, prot_sg_cnt, pages_per_mr);
 413                return -EINVAL;
 414        }
 415
 416        ret = rdma_rw_map_sgtable(dev, &sgt, dir);
 417        if (ret)
 418                return ret;
 419
 420        if (prot_sg_cnt) {
 421                ret = rdma_rw_map_sgtable(dev, &prot_sgt, dir);
 422                if (ret)
 423                        goto out_unmap_sg;
 424        }
 425
 426        ctx->type = RDMA_RW_SIG_MR;
 427        ctx->nr_ops = 1;
 428        ctx->reg = kzalloc(sizeof(*ctx->reg), GFP_KERNEL);
 429        if (!ctx->reg) {
 430                ret = -ENOMEM;
 431                goto out_unmap_prot_sg;
 432        }
 433
 434        ctx->reg->mr = ib_mr_pool_get(qp, &qp->sig_mrs);
 435        if (!ctx->reg->mr) {
 436                ret = -EAGAIN;
 437                goto out_free_ctx;
 438        }
 439
 440        count += rdma_rw_inv_key(ctx->reg);
 441
 442        memcpy(ctx->reg->mr->sig_attrs, sig_attrs, sizeof(struct ib_sig_attrs));
 443
 444        ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sgt.nents, NULL, prot_sg,
 445                              prot_sgt.nents, NULL, SZ_4K);
 446        if (unlikely(ret)) {
 447                pr_err("failed to map PI sg (%u)\n",
 448                       sgt.nents + prot_sgt.nents);
 449                goto out_destroy_sig_mr;
 450        }
 451
 452        ctx->reg->reg_wr.wr.opcode = IB_WR_REG_MR_INTEGRITY;
 453        ctx->reg->reg_wr.wr.wr_cqe = NULL;
 454        ctx->reg->reg_wr.wr.num_sge = 0;
 455        ctx->reg->reg_wr.wr.send_flags = 0;
 456        ctx->reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE;
 457        if (rdma_protocol_iwarp(qp->device, port_num))
 458                ctx->reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE;
 459        ctx->reg->reg_wr.mr = ctx->reg->mr;
 460        ctx->reg->reg_wr.key = ctx->reg->mr->lkey;
 461        count++;
 462
 463        ctx->reg->sge.addr = ctx->reg->mr->iova;
 464        ctx->reg->sge.length = ctx->reg->mr->length;
 465        if (sig_attrs->wire.sig_type == IB_SIG_TYPE_NONE)
 466                ctx->reg->sge.length -= ctx->reg->mr->sig_attrs->meta_length;
 467
 468        rdma_wr = &ctx->reg->wr;
 469        rdma_wr->wr.sg_list = &ctx->reg->sge;
 470        rdma_wr->wr.num_sge = 1;
 471        rdma_wr->remote_addr = remote_addr;
 472        rdma_wr->rkey = rkey;
 473        if (dir == DMA_TO_DEVICE)
 474                rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
 475        else
 476                rdma_wr->wr.opcode = IB_WR_RDMA_READ;
 477        ctx->reg->reg_wr.wr.next = &rdma_wr->wr;
 478        count++;
 479
 480        return count;
 481
 482out_destroy_sig_mr:
 483        ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
 484out_free_ctx:
 485        kfree(ctx->reg);
 486out_unmap_prot_sg:
 487        if (prot_sgt.nents)
 488                rdma_rw_unmap_sg(dev, prot_sgt.sgl, prot_sgt.orig_nents, dir);
 489out_unmap_sg:
 490        rdma_rw_unmap_sg(dev, sgt.sgl, sgt.orig_nents, dir);
 491        return ret;
 492}
 493EXPORT_SYMBOL(rdma_rw_ctx_signature_init);
 494
 495/*
 496 * Now that we are going to post the WRs we can update the lkey and need_inval
 497 * state on the MRs.  If we were doing this at init time, we would get double
 498 * or missing invalidations if a context was initialized but not actually
 499 * posted.
 500 */
 501static void rdma_rw_update_lkey(struct rdma_rw_reg_ctx *reg, bool need_inval)
 502{
 503        reg->mr->need_inval = need_inval;
 504        ib_update_fast_reg_key(reg->mr, ib_inc_rkey(reg->mr->lkey));
 505        reg->reg_wr.key = reg->mr->lkey;
 506        reg->sge.lkey = reg->mr->lkey;
 507}
 508
 509/**
 510 * rdma_rw_ctx_wrs - return chain of WRs for a RDMA READ or WRITE operation
 511 * @ctx:        context to operate on
 512 * @qp:         queue pair to operate on
 513 * @port_num:   port num to which the connection is bound
 514 * @cqe:        completion queue entry for the last WR
 515 * @chain_wr:   WR to append to the posted chain
 516 *
 517 * Return the WR chain for the set of RDMA READ/WRITE operations described by
 518 * @ctx, as well as any memory registration operations needed.  If @chain_wr
 519 * is non-NULL the WR it points to will be appended to the chain of WRs posted.
 520 * If @chain_wr is not set @cqe must be set so that the caller gets a
 521 * completion notification.
 522 */
 523struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 524                u32 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
 525{
 526        struct ib_send_wr *first_wr, *last_wr;
 527        int i;
 528
 529        switch (ctx->type) {
 530        case RDMA_RW_SIG_MR:
 531        case RDMA_RW_MR:
 532                for (i = 0; i < ctx->nr_ops; i++) {
 533                        rdma_rw_update_lkey(&ctx->reg[i],
 534                                ctx->reg[i].wr.wr.opcode !=
 535                                        IB_WR_RDMA_READ_WITH_INV);
 536                }
 537
 538                if (ctx->reg[0].inv_wr.next)
 539                        first_wr = &ctx->reg[0].inv_wr;
 540                else
 541                        first_wr = &ctx->reg[0].reg_wr.wr;
 542                last_wr = &ctx->reg[ctx->nr_ops - 1].wr.wr;
 543                break;
 544        case RDMA_RW_MULTI_WR:
 545                first_wr = &ctx->map.wrs[0].wr;
 546                last_wr = &ctx->map.wrs[ctx->nr_ops - 1].wr;
 547                break;
 548        case RDMA_RW_SINGLE_WR:
 549                first_wr = &ctx->single.wr.wr;
 550                last_wr = &ctx->single.wr.wr;
 551                break;
 552        default:
 553                BUG();
 554        }
 555
 556        if (chain_wr) {
 557                last_wr->next = chain_wr;
 558        } else {
 559                last_wr->wr_cqe = cqe;
 560                last_wr->send_flags |= IB_SEND_SIGNALED;
 561        }
 562
 563        return first_wr;
 564}
 565EXPORT_SYMBOL(rdma_rw_ctx_wrs);
 566
 567/**
 568 * rdma_rw_ctx_post - post a RDMA READ or RDMA WRITE operation
 569 * @ctx:        context to operate on
 570 * @qp:         queue pair to operate on
 571 * @port_num:   port num to which the connection is bound
 572 * @cqe:        completion queue entry for the last WR
 573 * @chain_wr:   WR to append to the posted chain
 574 *
 575 * Post the set of RDMA READ/WRITE operations described by @ctx, as well as
 576 * any memory registration operations needed.  If @chain_wr is non-NULL the
 577 * WR it points to will be appended to the chain of WRs posted.  If @chain_wr
 578 * is not set @cqe must be set so that the caller gets a completion
 579 * notification.
 580 */
 581int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
 582                struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
 583{
 584        struct ib_send_wr *first_wr;
 585
 586        first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr);
 587        return ib_post_send(qp, first_wr, NULL);
 588}
 589EXPORT_SYMBOL(rdma_rw_ctx_post);
 590
 591/**
 592 * rdma_rw_ctx_destroy - release all resources allocated by rdma_rw_ctx_init
 593 * @ctx:        context to release
 594 * @qp:         queue pair to operate on
 595 * @port_num:   port num to which the connection is bound
 596 * @sg:         scatterlist that was used for the READ/WRITE
 597 * @sg_cnt:     number of entries in @sg
 598 * @dir:        %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
 599 */
 600void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 601                         u32 port_num, struct scatterlist *sg, u32 sg_cnt,
 602                         enum dma_data_direction dir)
 603{
 604        int i;
 605
 606        switch (ctx->type) {
 607        case RDMA_RW_MR:
 608                for (i = 0; i < ctx->nr_ops; i++)
 609                        ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
 610                kfree(ctx->reg);
 611                break;
 612        case RDMA_RW_MULTI_WR:
 613                kfree(ctx->map.wrs);
 614                kfree(ctx->map.sges);
 615                break;
 616        case RDMA_RW_SINGLE_WR:
 617                break;
 618        default:
 619                BUG();
 620                break;
 621        }
 622
 623        rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
 624}
 625EXPORT_SYMBOL(rdma_rw_ctx_destroy);
 626
 627/**
 628 * rdma_rw_ctx_destroy_signature - release all resources allocated by
 629 *      rdma_rw_ctx_signature_init
 630 * @ctx:        context to release
 631 * @qp:         queue pair to operate on
 632 * @port_num:   port num to which the connection is bound
 633 * @sg:         scatterlist that was used for the READ/WRITE
 634 * @sg_cnt:     number of entries in @sg
 635 * @prot_sg:    scatterlist that was used for the READ/WRITE of the PI
 636 * @prot_sg_cnt: number of entries in @prot_sg
 637 * @dir:        %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
 638 */
 639void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 640                u32 port_num, struct scatterlist *sg, u32 sg_cnt,
 641                struct scatterlist *prot_sg, u32 prot_sg_cnt,
 642                enum dma_data_direction dir)
 643{
 644        if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR))
 645                return;
 646
 647        ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
 648        kfree(ctx->reg);
 649
 650        if (prot_sg_cnt)
 651                rdma_rw_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir);
 652        rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
 653}
 654EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
 655
 656/**
 657 * rdma_rw_mr_factor - return number of MRs required for a payload
 658 * @device:     device handling the connection
 659 * @port_num:   port num to which the connection is bound
 660 * @maxpages:   maximum payload pages per rdma_rw_ctx
 661 *
 662 * Returns the number of MRs the device requires to move @maxpayload
 663 * bytes. The returned value is used during transport creation to
 664 * compute max_rdma_ctxts and the size of the transport's Send and
 665 * Send Completion Queues.
 666 */
 667unsigned int rdma_rw_mr_factor(struct ib_device *device, u32 port_num,
 668                               unsigned int maxpages)
 669{
 670        unsigned int mr_pages;
 671
 672        if (rdma_rw_can_use_mr(device, port_num))
 673                mr_pages = rdma_rw_fr_page_list_len(device, false);
 674        else
 675                mr_pages = device->attrs.max_sge_rd;
 676        return DIV_ROUND_UP(maxpages, mr_pages);
 677}
 678EXPORT_SYMBOL(rdma_rw_mr_factor);
 679
 680void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
 681{
 682        u32 factor;
 683
 684        WARN_ON_ONCE(attr->port_num == 0);
 685
 686        /*
 687         * Each context needs at least one RDMA READ or WRITE WR.
 688         *
 689         * For some hardware we might need more, eventually we should ask the
 690         * HCA driver for a multiplier here.
 691         */
 692        factor = 1;
 693
 694        /*
 695         * If the devices needs MRs to perform RDMA READ or WRITE operations,
 696         * we'll need two additional MRs for the registrations and the
 697         * invalidation.
 698         */
 699        if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN ||
 700            rdma_rw_can_use_mr(dev, attr->port_num))
 701                factor += 2;    /* inv + reg */
 702
 703        attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs;
 704
 705        /*
 706         * But maybe we were just too high in the sky and the device doesn't
 707         * even support all we need, and we'll have to live with what we get..
 708         */
 709        attr->cap.max_send_wr =
 710                min_t(u32, attr->cap.max_send_wr, dev->attrs.max_qp_wr);
 711}
 712
 713int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr)
 714{
 715        struct ib_device *dev = qp->pd->device;
 716        u32 nr_mrs = 0, nr_sig_mrs = 0, max_num_sg = 0;
 717        int ret = 0;
 718
 719        if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) {
 720                nr_sig_mrs = attr->cap.max_rdma_ctxs;
 721                nr_mrs = attr->cap.max_rdma_ctxs;
 722                max_num_sg = rdma_rw_fr_page_list_len(dev, true);
 723        } else if (rdma_rw_can_use_mr(dev, attr->port_num)) {
 724                nr_mrs = attr->cap.max_rdma_ctxs;
 725                max_num_sg = rdma_rw_fr_page_list_len(dev, false);
 726        }
 727
 728        if (nr_mrs) {
 729                ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs,
 730                                IB_MR_TYPE_MEM_REG,
 731                                max_num_sg, 0);
 732                if (ret) {
 733                        pr_err("%s: failed to allocated %u MRs\n",
 734                                __func__, nr_mrs);
 735                        return ret;
 736                }
 737        }
 738
 739        if (nr_sig_mrs) {
 740                ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs,
 741                                IB_MR_TYPE_INTEGRITY, max_num_sg, max_num_sg);
 742                if (ret) {
 743                        pr_err("%s: failed to allocated %u SIG MRs\n",
 744                                __func__, nr_sig_mrs);
 745                        goto out_free_rdma_mrs;
 746                }
 747        }
 748
 749        return 0;
 750
 751out_free_rdma_mrs:
 752        ib_mr_pool_destroy(qp, &qp->rdma_mrs);
 753        return ret;
 754}
 755
 756void rdma_rw_cleanup_mrs(struct ib_qp *qp)
 757{
 758        ib_mr_pool_destroy(qp, &qp->sig_mrs);
 759        ib_mr_pool_destroy(qp, &qp->rdma_mrs);
 760}
 761