linux/drivers/infiniband/core/rw.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2016 HGST, a Western Digital Company.
   4 */
   5#include <linux/moduleparam.h>
   6#include <linux/slab.h>
   7#include <linux/pci-p2pdma.h>
   8#include <rdma/mr_pool.h>
   9#include <rdma/rw.h>
  10
  11enum {
  12        RDMA_RW_SINGLE_WR,
  13        RDMA_RW_MULTI_WR,
  14        RDMA_RW_MR,
  15        RDMA_RW_SIG_MR,
  16};
  17
  18static bool rdma_rw_force_mr;
  19module_param_named(force_mr, rdma_rw_force_mr, bool, 0);
  20MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations");
  21
  22/*
  23 * Report whether memory registration should be used. Memory registration must
  24 * be used for iWarp devices because of iWARP-specific limitations. Memory
  25 * registration is also enabled if registering memory might yield better
  26 * performance than using multiple SGE entries, see rdma_rw_io_needs_mr()
  27 */
  28static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
  29{
  30        if (rdma_protocol_iwarp(dev, port_num))
  31                return true;
  32        if (dev->attrs.max_sgl_rd)
  33                return true;
  34        if (unlikely(rdma_rw_force_mr))
  35                return true;
  36        return false;
  37}
  38
  39/*
  40 * Check if the device will use memory registration for this RW operation.
  41 * For RDMA READs we must use MRs on iWarp and can optionally use them as an
  42 * optimization otherwise.  Additionally we have a debug option to force usage
  43 * of MRs to help testing this code path.
  44 */
  45static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num,
  46                enum dma_data_direction dir, int dma_nents)
  47{
  48        if (dir == DMA_FROM_DEVICE) {
  49                if (rdma_protocol_iwarp(dev, port_num))
  50                        return true;
  51                if (dev->attrs.max_sgl_rd && dma_nents > dev->attrs.max_sgl_rd)
  52                        return true;
  53        }
  54        if (unlikely(rdma_rw_force_mr))
  55                return true;
  56        return false;
  57}
  58
  59static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev,
  60                                           bool pi_support)
  61{
  62        u32 max_pages;
  63
  64        if (pi_support)
  65                max_pages = dev->attrs.max_pi_fast_reg_page_list_len;
  66        else
  67                max_pages = dev->attrs.max_fast_reg_page_list_len;
  68
  69        /* arbitrary limit to avoid allocating gigantic resources */
  70        return min_t(u32, max_pages, 256);
  71}
  72
  73static inline int rdma_rw_inv_key(struct rdma_rw_reg_ctx *reg)
  74{
  75        int count = 0;
  76
  77        if (reg->mr->need_inval) {
  78                reg->inv_wr.opcode = IB_WR_LOCAL_INV;
  79                reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey;
  80                reg->inv_wr.next = &reg->reg_wr.wr;
  81                count++;
  82        } else {
  83                reg->inv_wr.next = NULL;
  84        }
  85
  86        return count;
  87}
  88
  89/* Caller must have zero-initialized *reg. */
  90static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
  91                struct rdma_rw_reg_ctx *reg, struct scatterlist *sg,
  92                u32 sg_cnt, u32 offset)
  93{
  94        u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
  95                                                    qp->integrity_en);
  96        u32 nents = min(sg_cnt, pages_per_mr);
  97        int count = 0, ret;
  98
  99        reg->mr = ib_mr_pool_get(qp, &qp->rdma_mrs);
 100        if (!reg->mr)
 101                return -EAGAIN;
 102
 103        count += rdma_rw_inv_key(reg);
 104
 105        ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE);
 106        if (ret < 0 || ret < nents) {
 107                ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr);
 108                return -EINVAL;
 109        }
 110
 111        reg->reg_wr.wr.opcode = IB_WR_REG_MR;
 112        reg->reg_wr.mr = reg->mr;
 113        reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE;
 114        if (rdma_protocol_iwarp(qp->device, port_num))
 115                reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE;
 116        count++;
 117
 118        reg->sge.addr = reg->mr->iova;
 119        reg->sge.length = reg->mr->length;
 120        return count;
 121}
 122
 123static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 124                u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset,
 125                u64 remote_addr, u32 rkey, enum dma_data_direction dir)
 126{
 127        struct rdma_rw_reg_ctx *prev = NULL;
 128        u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
 129                                                    qp->integrity_en);
 130        int i, j, ret = 0, count = 0;
 131
 132        ctx->nr_ops = DIV_ROUND_UP(sg_cnt, pages_per_mr);
 133        ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL);
 134        if (!ctx->reg) {
 135                ret = -ENOMEM;
 136                goto out;
 137        }
 138
 139        for (i = 0; i < ctx->nr_ops; i++) {
 140                struct rdma_rw_reg_ctx *reg = &ctx->reg[i];
 141                u32 nents = min(sg_cnt, pages_per_mr);
 142
 143                ret = rdma_rw_init_one_mr(qp, port_num, reg, sg, sg_cnt,
 144                                offset);
 145                if (ret < 0)
 146                        goto out_free;
 147                count += ret;
 148
 149                if (prev) {
 150                        if (reg->mr->need_inval)
 151                                prev->wr.wr.next = &reg->inv_wr;
 152                        else
 153                                prev->wr.wr.next = &reg->reg_wr.wr;
 154                }
 155
 156                reg->reg_wr.wr.next = &reg->wr.wr;
 157
 158                reg->wr.wr.sg_list = &reg->sge;
 159                reg->wr.wr.num_sge = 1;
 160                reg->wr.remote_addr = remote_addr;
 161                reg->wr.rkey = rkey;
 162                if (dir == DMA_TO_DEVICE) {
 163                        reg->wr.wr.opcode = IB_WR_RDMA_WRITE;
 164                } else if (!rdma_cap_read_inv(qp->device, port_num)) {
 165                        reg->wr.wr.opcode = IB_WR_RDMA_READ;
 166                } else {
 167                        reg->wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
 168                        reg->wr.wr.ex.invalidate_rkey = reg->mr->lkey;
 169                }
 170                count++;
 171
 172                remote_addr += reg->sge.length;
 173                sg_cnt -= nents;
 174                for (j = 0; j < nents; j++)
 175                        sg = sg_next(sg);
 176                prev = reg;
 177                offset = 0;
 178        }
 179
 180        if (prev)
 181                prev->wr.wr.next = NULL;
 182
 183        ctx->type = RDMA_RW_MR;
 184        return count;
 185
 186out_free:
 187        while (--i >= 0)
 188                ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
 189        kfree(ctx->reg);
 190out:
 191        return ret;
 192}
 193
 194static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 195                struct scatterlist *sg, u32 sg_cnt, u32 offset,
 196                u64 remote_addr, u32 rkey, enum dma_data_direction dir)
 197{
 198        u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge :
 199                      qp->max_read_sge;
 200        struct ib_sge *sge;
 201        u32 total_len = 0, i, j;
 202
 203        ctx->nr_ops = DIV_ROUND_UP(sg_cnt, max_sge);
 204
 205        ctx->map.sges = sge = kcalloc(sg_cnt, sizeof(*sge), GFP_KERNEL);
 206        if (!ctx->map.sges)
 207                goto out;
 208
 209        ctx->map.wrs = kcalloc(ctx->nr_ops, sizeof(*ctx->map.wrs), GFP_KERNEL);
 210        if (!ctx->map.wrs)
 211                goto out_free_sges;
 212
 213        for (i = 0; i < ctx->nr_ops; i++) {
 214                struct ib_rdma_wr *rdma_wr = &ctx->map.wrs[i];
 215                u32 nr_sge = min(sg_cnt, max_sge);
 216
 217                if (dir == DMA_TO_DEVICE)
 218                        rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
 219                else
 220                        rdma_wr->wr.opcode = IB_WR_RDMA_READ;
 221                rdma_wr->remote_addr = remote_addr + total_len;
 222                rdma_wr->rkey = rkey;
 223                rdma_wr->wr.num_sge = nr_sge;
 224                rdma_wr->wr.sg_list = sge;
 225
 226                for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) {
 227                        sge->addr = sg_dma_address(sg) + offset;
 228                        sge->length = sg_dma_len(sg) - offset;
 229                        sge->lkey = qp->pd->local_dma_lkey;
 230
 231                        total_len += sge->length;
 232                        sge++;
 233                        sg_cnt--;
 234                        offset = 0;
 235                }
 236
 237                rdma_wr->wr.next = i + 1 < ctx->nr_ops ?
 238                        &ctx->map.wrs[i + 1].wr : NULL;
 239        }
 240
 241        ctx->type = RDMA_RW_MULTI_WR;
 242        return ctx->nr_ops;
 243
 244out_free_sges:
 245        kfree(ctx->map.sges);
 246out:
 247        return -ENOMEM;
 248}
 249
 250static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 251                struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey,
 252                enum dma_data_direction dir)
 253{
 254        struct ib_rdma_wr *rdma_wr = &ctx->single.wr;
 255
 256        ctx->nr_ops = 1;
 257
 258        ctx->single.sge.lkey = qp->pd->local_dma_lkey;
 259        ctx->single.sge.addr = sg_dma_address(sg) + offset;
 260        ctx->single.sge.length = sg_dma_len(sg) - offset;
 261
 262        memset(rdma_wr, 0, sizeof(*rdma_wr));
 263        if (dir == DMA_TO_DEVICE)
 264                rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
 265        else
 266                rdma_wr->wr.opcode = IB_WR_RDMA_READ;
 267        rdma_wr->wr.sg_list = &ctx->single.sge;
 268        rdma_wr->wr.num_sge = 1;
 269        rdma_wr->remote_addr = remote_addr;
 270        rdma_wr->rkey = rkey;
 271
 272        ctx->type = RDMA_RW_SINGLE_WR;
 273        return 1;
 274}
 275
 276static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
 277                             u32 sg_cnt, enum dma_data_direction dir)
 278{
 279        if (is_pci_p2pdma_page(sg_page(sg)))
 280                pci_p2pdma_unmap_sg(dev->dma_device, sg, sg_cnt, dir);
 281        else
 282                ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
 283}
 284
 285static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg,
 286                          u32 sg_cnt, enum dma_data_direction dir)
 287{
 288        if (is_pci_p2pdma_page(sg_page(sg))) {
 289                if (WARN_ON_ONCE(ib_uses_virt_dma(dev)))
 290                        return 0;
 291                return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir);
 292        }
 293        return ib_dma_map_sg(dev, sg, sg_cnt, dir);
 294}
 295
 296/**
 297 * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context
 298 * @ctx:        context to initialize
 299 * @qp:         queue pair to operate on
 300 * @port_num:   port num to which the connection is bound
 301 * @sg:         scatterlist to READ/WRITE from/to
 302 * @sg_cnt:     number of entries in @sg
 303 * @sg_offset:  current byte offset into @sg
 304 * @remote_addr:remote address to read/write (relative to @rkey)
 305 * @rkey:       remote key to operate on
 306 * @dir:        %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
 307 *
 308 * Returns the number of WQEs that will be needed on the workqueue if
 309 * successful, or a negative error code.
 310 */
 311int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
 312                struct scatterlist *sg, u32 sg_cnt, u32 sg_offset,
 313                u64 remote_addr, u32 rkey, enum dma_data_direction dir)
 314{
 315        struct ib_device *dev = qp->pd->device;
 316        int ret;
 317
 318        ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir);
 319        if (!ret)
 320                return -ENOMEM;
 321        sg_cnt = ret;
 322
 323        /*
 324         * Skip to the S/G entry that sg_offset falls into:
 325         */
 326        for (;;) {
 327                u32 len = sg_dma_len(sg);
 328
 329                if (sg_offset < len)
 330                        break;
 331
 332                sg = sg_next(sg);
 333                sg_offset -= len;
 334                sg_cnt--;
 335        }
 336
 337        ret = -EIO;
 338        if (WARN_ON_ONCE(sg_cnt == 0))
 339                goto out_unmap_sg;
 340
 341        if (rdma_rw_io_needs_mr(qp->device, port_num, dir, sg_cnt)) {
 342                ret = rdma_rw_init_mr_wrs(ctx, qp, port_num, sg, sg_cnt,
 343                                sg_offset, remote_addr, rkey, dir);
 344        } else if (sg_cnt > 1) {
 345                ret = rdma_rw_init_map_wrs(ctx, qp, sg, sg_cnt, sg_offset,
 346                                remote_addr, rkey, dir);
 347        } else {
 348                ret = rdma_rw_init_single_wr(ctx, qp, sg, sg_offset,
 349                                remote_addr, rkey, dir);
 350        }
 351
 352        if (ret < 0)
 353                goto out_unmap_sg;
 354        return ret;
 355
 356out_unmap_sg:
 357        rdma_rw_unmap_sg(dev, sg, sg_cnt, dir);
 358        return ret;
 359}
 360EXPORT_SYMBOL(rdma_rw_ctx_init);
 361
 362/**
 363 * rdma_rw_ctx_signature_init - initialize a RW context with signature offload
 364 * @ctx:        context to initialize
 365 * @qp:         queue pair to operate on
 366 * @port_num:   port num to which the connection is bound
 367 * @sg:         scatterlist to READ/WRITE from/to
 368 * @sg_cnt:     number of entries in @sg
 369 * @prot_sg:    scatterlist to READ/WRITE protection information from/to
 370 * @prot_sg_cnt: number of entries in @prot_sg
 371 * @sig_attrs:  signature offloading algorithms
 372 * @remote_addr:remote address to read/write (relative to @rkey)
 373 * @rkey:       remote key to operate on
 374 * @dir:        %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
 375 *
 376 * Returns the number of WQEs that will be needed on the workqueue if
 377 * successful, or a negative error code.
 378 */
 379int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 380                u8 port_num, struct scatterlist *sg, u32 sg_cnt,
 381                struct scatterlist *prot_sg, u32 prot_sg_cnt,
 382                struct ib_sig_attrs *sig_attrs,
 383                u64 remote_addr, u32 rkey, enum dma_data_direction dir)
 384{
 385        struct ib_device *dev = qp->pd->device;
 386        u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
 387                                                    qp->integrity_en);
 388        struct ib_rdma_wr *rdma_wr;
 389        int count = 0, ret;
 390
 391        if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) {
 392                pr_err("SG count too large: sg_cnt=%d, prot_sg_cnt=%d, pages_per_mr=%d\n",
 393                       sg_cnt, prot_sg_cnt, pages_per_mr);
 394                return -EINVAL;
 395        }
 396
 397        ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir);
 398        if (!ret)
 399                return -ENOMEM;
 400        sg_cnt = ret;
 401
 402        if (prot_sg_cnt) {
 403                ret = rdma_rw_map_sg(dev, prot_sg, prot_sg_cnt, dir);
 404                if (!ret) {
 405                        ret = -ENOMEM;
 406                        goto out_unmap_sg;
 407                }
 408                prot_sg_cnt = ret;
 409        }
 410
 411        ctx->type = RDMA_RW_SIG_MR;
 412        ctx->nr_ops = 1;
 413        ctx->reg = kzalloc(sizeof(*ctx->reg), GFP_KERNEL);
 414        if (!ctx->reg) {
 415                ret = -ENOMEM;
 416                goto out_unmap_prot_sg;
 417        }
 418
 419        ctx->reg->mr = ib_mr_pool_get(qp, &qp->sig_mrs);
 420        if (!ctx->reg->mr) {
 421                ret = -EAGAIN;
 422                goto out_free_ctx;
 423        }
 424
 425        count += rdma_rw_inv_key(ctx->reg);
 426
 427        memcpy(ctx->reg->mr->sig_attrs, sig_attrs, sizeof(struct ib_sig_attrs));
 428
 429        ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sg_cnt, NULL, prot_sg,
 430                              prot_sg_cnt, NULL, SZ_4K);
 431        if (unlikely(ret)) {
 432                pr_err("failed to map PI sg (%d)\n", sg_cnt + prot_sg_cnt);
 433                goto out_destroy_sig_mr;
 434        }
 435
 436        ctx->reg->reg_wr.wr.opcode = IB_WR_REG_MR_INTEGRITY;
 437        ctx->reg->reg_wr.wr.wr_cqe = NULL;
 438        ctx->reg->reg_wr.wr.num_sge = 0;
 439        ctx->reg->reg_wr.wr.send_flags = 0;
 440        ctx->reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE;
 441        if (rdma_protocol_iwarp(qp->device, port_num))
 442                ctx->reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE;
 443        ctx->reg->reg_wr.mr = ctx->reg->mr;
 444        ctx->reg->reg_wr.key = ctx->reg->mr->lkey;
 445        count++;
 446
 447        ctx->reg->sge.addr = ctx->reg->mr->iova;
 448        ctx->reg->sge.length = ctx->reg->mr->length;
 449        if (sig_attrs->wire.sig_type == IB_SIG_TYPE_NONE)
 450                ctx->reg->sge.length -= ctx->reg->mr->sig_attrs->meta_length;
 451
 452        rdma_wr = &ctx->reg->wr;
 453        rdma_wr->wr.sg_list = &ctx->reg->sge;
 454        rdma_wr->wr.num_sge = 1;
 455        rdma_wr->remote_addr = remote_addr;
 456        rdma_wr->rkey = rkey;
 457        if (dir == DMA_TO_DEVICE)
 458                rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
 459        else
 460                rdma_wr->wr.opcode = IB_WR_RDMA_READ;
 461        ctx->reg->reg_wr.wr.next = &rdma_wr->wr;
 462        count++;
 463
 464        return count;
 465
 466out_destroy_sig_mr:
 467        ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
 468out_free_ctx:
 469        kfree(ctx->reg);
 470out_unmap_prot_sg:
 471        if (prot_sg_cnt)
 472                rdma_rw_unmap_sg(dev, prot_sg, prot_sg_cnt, dir);
 473out_unmap_sg:
 474        rdma_rw_unmap_sg(dev, sg, sg_cnt, dir);
 475        return ret;
 476}
 477EXPORT_SYMBOL(rdma_rw_ctx_signature_init);
 478
 479/*
 480 * Now that we are going to post the WRs we can update the lkey and need_inval
 481 * state on the MRs.  If we were doing this at init time, we would get double
 482 * or missing invalidations if a context was initialized but not actually
 483 * posted.
 484 */
 485static void rdma_rw_update_lkey(struct rdma_rw_reg_ctx *reg, bool need_inval)
 486{
 487        reg->mr->need_inval = need_inval;
 488        ib_update_fast_reg_key(reg->mr, ib_inc_rkey(reg->mr->lkey));
 489        reg->reg_wr.key = reg->mr->lkey;
 490        reg->sge.lkey = reg->mr->lkey;
 491}
 492
 493/**
 494 * rdma_rw_ctx_wrs - return chain of WRs for a RDMA READ or WRITE operation
 495 * @ctx:        context to operate on
 496 * @qp:         queue pair to operate on
 497 * @port_num:   port num to which the connection is bound
 498 * @cqe:        completion queue entry for the last WR
 499 * @chain_wr:   WR to append to the posted chain
 500 *
 501 * Return the WR chain for the set of RDMA READ/WRITE operations described by
 502 * @ctx, as well as any memory registration operations needed.  If @chain_wr
 503 * is non-NULL the WR it points to will be appended to the chain of WRs posted.
 504 * If @chain_wr is not set @cqe must be set so that the caller gets a
 505 * completion notification.
 506 */
 507struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 508                u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
 509{
 510        struct ib_send_wr *first_wr, *last_wr;
 511        int i;
 512
 513        switch (ctx->type) {
 514        case RDMA_RW_SIG_MR:
 515        case RDMA_RW_MR:
 516                for (i = 0; i < ctx->nr_ops; i++) {
 517                        rdma_rw_update_lkey(&ctx->reg[i],
 518                                ctx->reg[i].wr.wr.opcode !=
 519                                        IB_WR_RDMA_READ_WITH_INV);
 520                }
 521
 522                if (ctx->reg[0].inv_wr.next)
 523                        first_wr = &ctx->reg[0].inv_wr;
 524                else
 525                        first_wr = &ctx->reg[0].reg_wr.wr;
 526                last_wr = &ctx->reg[ctx->nr_ops - 1].wr.wr;
 527                break;
 528        case RDMA_RW_MULTI_WR:
 529                first_wr = &ctx->map.wrs[0].wr;
 530                last_wr = &ctx->map.wrs[ctx->nr_ops - 1].wr;
 531                break;
 532        case RDMA_RW_SINGLE_WR:
 533                first_wr = &ctx->single.wr.wr;
 534                last_wr = &ctx->single.wr.wr;
 535                break;
 536        default:
 537                BUG();
 538        }
 539
 540        if (chain_wr) {
 541                last_wr->next = chain_wr;
 542        } else {
 543                last_wr->wr_cqe = cqe;
 544                last_wr->send_flags |= IB_SEND_SIGNALED;
 545        }
 546
 547        return first_wr;
 548}
 549EXPORT_SYMBOL(rdma_rw_ctx_wrs);
 550
 551/**
 552 * rdma_rw_ctx_post - post a RDMA READ or RDMA WRITE operation
 553 * @ctx:        context to operate on
 554 * @qp:         queue pair to operate on
 555 * @port_num:   port num to which the connection is bound
 556 * @cqe:        completion queue entry for the last WR
 557 * @chain_wr:   WR to append to the posted chain
 558 *
 559 * Post the set of RDMA READ/WRITE operations described by @ctx, as well as
 560 * any memory registration operations needed.  If @chain_wr is non-NULL the
 561 * WR it points to will be appended to the chain of WRs posted.  If @chain_wr
 562 * is not set @cqe must be set so that the caller gets a completion
 563 * notification.
 564 */
 565int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
 566                struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
 567{
 568        struct ib_send_wr *first_wr;
 569
 570        first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr);
 571        return ib_post_send(qp, first_wr, NULL);
 572}
 573EXPORT_SYMBOL(rdma_rw_ctx_post);
 574
 575/**
 576 * rdma_rw_ctx_destroy - release all resources allocated by rdma_rw_ctx_init
 577 * @ctx:        context to release
 578 * @qp:         queue pair to operate on
 579 * @port_num:   port num to which the connection is bound
 580 * @sg:         scatterlist that was used for the READ/WRITE
 581 * @sg_cnt:     number of entries in @sg
 582 * @dir:        %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
 583 */
 584void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
 585                struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir)
 586{
 587        int i;
 588
 589        switch (ctx->type) {
 590        case RDMA_RW_MR:
 591                for (i = 0; i < ctx->nr_ops; i++)
 592                        ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->reg[i].mr);
 593                kfree(ctx->reg);
 594                break;
 595        case RDMA_RW_MULTI_WR:
 596                kfree(ctx->map.wrs);
 597                kfree(ctx->map.sges);
 598                break;
 599        case RDMA_RW_SINGLE_WR:
 600                break;
 601        default:
 602                BUG();
 603                break;
 604        }
 605
 606        rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
 607}
 608EXPORT_SYMBOL(rdma_rw_ctx_destroy);
 609
 610/**
 611 * rdma_rw_ctx_destroy_signature - release all resources allocated by
 612 *      rdma_rw_ctx_signature_init
 613 * @ctx:        context to release
 614 * @qp:         queue pair to operate on
 615 * @port_num:   port num to which the connection is bound
 616 * @sg:         scatterlist that was used for the READ/WRITE
 617 * @sg_cnt:     number of entries in @sg
 618 * @prot_sg:    scatterlist that was used for the READ/WRITE of the PI
 619 * @prot_sg_cnt: number of entries in @prot_sg
 620 * @dir:        %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
 621 */
 622void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 623                u8 port_num, struct scatterlist *sg, u32 sg_cnt,
 624                struct scatterlist *prot_sg, u32 prot_sg_cnt,
 625                enum dma_data_direction dir)
 626{
 627        if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR))
 628                return;
 629
 630        ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
 631        kfree(ctx->reg);
 632
 633        if (prot_sg_cnt)
 634                rdma_rw_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir);
 635        rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
 636}
 637EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
 638
 639/**
 640 * rdma_rw_mr_factor - return number of MRs required for a payload
 641 * @device:     device handling the connection
 642 * @port_num:   port num to which the connection is bound
 643 * @maxpages:   maximum payload pages per rdma_rw_ctx
 644 *
 645 * Returns the number of MRs the device requires to move @maxpayload
 646 * bytes. The returned value is used during transport creation to
 647 * compute max_rdma_ctxts and the size of the transport's Send and
 648 * Send Completion Queues.
 649 */
 650unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num,
 651                               unsigned int maxpages)
 652{
 653        unsigned int mr_pages;
 654
 655        if (rdma_rw_can_use_mr(device, port_num))
 656                mr_pages = rdma_rw_fr_page_list_len(device, false);
 657        else
 658                mr_pages = device->attrs.max_sge_rd;
 659        return DIV_ROUND_UP(maxpages, mr_pages);
 660}
 661EXPORT_SYMBOL(rdma_rw_mr_factor);
 662
 663void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
 664{
 665        u32 factor;
 666
 667        WARN_ON_ONCE(attr->port_num == 0);
 668
 669        /*
 670         * Each context needs at least one RDMA READ or WRITE WR.
 671         *
 672         * For some hardware we might need more, eventually we should ask the
 673         * HCA driver for a multiplier here.
 674         */
 675        factor = 1;
 676
 677        /*
 678         * If the devices needs MRs to perform RDMA READ or WRITE operations,
 679         * we'll need two additional MRs for the registrations and the
 680         * invalidation.
 681         */
 682        if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN ||
 683            rdma_rw_can_use_mr(dev, attr->port_num))
 684                factor += 2;    /* inv + reg */
 685
 686        attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs;
 687
 688        /*
 689         * But maybe we were just too high in the sky and the device doesn't
 690         * even support all we need, and we'll have to live with what we get..
 691         */
 692        attr->cap.max_send_wr =
 693                min_t(u32, attr->cap.max_send_wr, dev->attrs.max_qp_wr);
 694}
 695
 696int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr)
 697{
 698        struct ib_device *dev = qp->pd->device;
 699        u32 nr_mrs = 0, nr_sig_mrs = 0, max_num_sg = 0;
 700        int ret = 0;
 701
 702        if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) {
 703                nr_sig_mrs = attr->cap.max_rdma_ctxs;
 704                nr_mrs = attr->cap.max_rdma_ctxs;
 705                max_num_sg = rdma_rw_fr_page_list_len(dev, true);
 706        } else if (rdma_rw_can_use_mr(dev, attr->port_num)) {
 707                nr_mrs = attr->cap.max_rdma_ctxs;
 708                max_num_sg = rdma_rw_fr_page_list_len(dev, false);
 709        }
 710
 711        if (nr_mrs) {
 712                ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs,
 713                                IB_MR_TYPE_MEM_REG,
 714                                max_num_sg, 0);
 715                if (ret) {
 716                        pr_err("%s: failed to allocated %d MRs\n",
 717                                __func__, nr_mrs);
 718                        return ret;
 719                }
 720        }
 721
 722        if (nr_sig_mrs) {
 723                ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs,
 724                                IB_MR_TYPE_INTEGRITY, max_num_sg, max_num_sg);
 725                if (ret) {
 726                        pr_err("%s: failed to allocated %d SIG MRs\n",
 727                                __func__, nr_sig_mrs);
 728                        goto out_free_rdma_mrs;
 729                }
 730        }
 731
 732        return 0;
 733
 734out_free_rdma_mrs:
 735        ib_mr_pool_destroy(qp, &qp->rdma_mrs);
 736        return ret;
 737}
 738
 739void rdma_rw_cleanup_mrs(struct ib_qp *qp)
 740{
 741        ib_mr_pool_destroy(qp, &qp->sig_mrs);
 742        ib_mr_pool_destroy(qp, &qp->rdma_mrs);
 743}
 744