linux/drivers/infiniband/ulp/iser/iser_memory.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
   3 * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33#include <linux/module.h>
  34#include <linux/kernel.h>
  35#include <linux/slab.h>
  36#include <linux/mm.h>
  37#include <linux/highmem.h>
  38#include <linux/scatterlist.h>
  39
  40#include "iscsi_iser.h"
  41static
  42int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
  43                      struct iser_data_buf *mem,
  44                      struct iser_reg_resources *rsc,
  45                      struct iser_mem_reg *mem_reg);
  46static
  47int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
  48                     struct iser_data_buf *mem,
  49                     struct iser_reg_resources *rsc,
  50                     struct iser_mem_reg *mem_reg);
  51
  52static const struct iser_reg_ops fastreg_ops = {
  53        .alloc_reg_res  = iser_alloc_fastreg_pool,
  54        .free_reg_res   = iser_free_fastreg_pool,
  55        .reg_mem        = iser_fast_reg_mr,
  56        .unreg_mem      = iser_unreg_mem_fastreg,
  57        .reg_desc_get   = iser_reg_desc_get_fr,
  58        .reg_desc_put   = iser_reg_desc_put_fr,
  59};
  60
  61static const struct iser_reg_ops fmr_ops = {
  62        .alloc_reg_res  = iser_alloc_fmr_pool,
  63        .free_reg_res   = iser_free_fmr_pool,
  64        .reg_mem        = iser_fast_reg_fmr,
  65        .unreg_mem      = iser_unreg_mem_fmr,
  66        .reg_desc_get   = iser_reg_desc_get_fmr,
  67        .reg_desc_put   = iser_reg_desc_put_fmr,
  68};
  69
  70void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc)
  71{
  72        iser_err_comp(wc, "memreg");
  73}
  74
  75int iser_assign_reg_ops(struct iser_device *device)
  76{
  77        struct ib_device *ib_dev = device->ib_device;
  78
  79        /* Assign function handles  - based on FMR support */
  80        if (ib_dev->ops.alloc_fmr && ib_dev->ops.dealloc_fmr &&
  81            ib_dev->ops.map_phys_fmr && ib_dev->ops.unmap_fmr) {
  82                iser_info("FMR supported, using FMR for registration\n");
  83                device->reg_ops = &fmr_ops;
  84        } else if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
  85                iser_info("FastReg supported, using FastReg for registration\n");
  86                device->reg_ops = &fastreg_ops;
  87                device->remote_inv_sup = iser_always_reg;
  88        } else {
  89                iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
  90                return -1;
  91        }
  92
  93        return 0;
  94}
  95
  96struct iser_fr_desc *
  97iser_reg_desc_get_fr(struct ib_conn *ib_conn)
  98{
  99        struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
 100        struct iser_fr_desc *desc;
 101        unsigned long flags;
 102
 103        spin_lock_irqsave(&fr_pool->lock, flags);
 104        desc = list_first_entry(&fr_pool->list,
 105                                struct iser_fr_desc, list);
 106        list_del(&desc->list);
 107        spin_unlock_irqrestore(&fr_pool->lock, flags);
 108
 109        return desc;
 110}
 111
 112void
 113iser_reg_desc_put_fr(struct ib_conn *ib_conn,
 114                     struct iser_fr_desc *desc)
 115{
 116        struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
 117        unsigned long flags;
 118
 119        spin_lock_irqsave(&fr_pool->lock, flags);
 120        list_add(&desc->list, &fr_pool->list);
 121        spin_unlock_irqrestore(&fr_pool->lock, flags);
 122}
 123
 124struct iser_fr_desc *
 125iser_reg_desc_get_fmr(struct ib_conn *ib_conn)
 126{
 127        struct iser_fr_pool *fr_pool = &ib_conn->fr_pool;
 128
 129        return list_first_entry(&fr_pool->list,
 130                                struct iser_fr_desc, list);
 131}
 132
 133void
 134iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
 135                      struct iser_fr_desc *desc)
 136{
 137}
 138
 139static void iser_data_buf_dump(struct iser_data_buf *data,
 140                               struct ib_device *ibdev)
 141{
 142        struct scatterlist *sg;
 143        int i;
 144
 145        for_each_sg(data->sg, sg, data->dma_nents, i)
 146                iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
 147                         "off:0x%x sz:0x%x dma_len:0x%x\n",
 148                         i, (unsigned long)sg_dma_address(sg),
 149                         sg_page(sg), sg->offset, sg->length, sg_dma_len(sg));
 150}
 151
 152static void iser_dump_page_vec(struct iser_page_vec *page_vec)
 153{
 154        int i;
 155
 156        iser_err("page vec npages %d data length %lld\n",
 157                 page_vec->npages, page_vec->fake_mr.length);
 158        for (i = 0; i < page_vec->npages; i++)
 159                iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]);
 160}
 161
 162int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
 163                            struct iser_data_buf *data,
 164                            enum iser_data_dir iser_dir,
 165                            enum dma_data_direction dma_dir)
 166{
 167        struct ib_device *dev;
 168
 169        iser_task->dir[iser_dir] = 1;
 170        dev = iser_task->iser_conn->ib_conn.device->ib_device;
 171
 172        data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir);
 173        if (data->dma_nents == 0) {
 174                iser_err("dma_map_sg failed!!!\n");
 175                return -EINVAL;
 176        }
 177        return 0;
 178}
 179
 180void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
 181                              struct iser_data_buf *data,
 182                              enum dma_data_direction dir)
 183{
 184        struct ib_device *dev;
 185
 186        dev = iser_task->iser_conn->ib_conn.device->ib_device;
 187        ib_dma_unmap_sg(dev, data->sg, data->size, dir);
 188}
 189
 190static int
 191iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
 192             struct iser_mem_reg *reg)
 193{
 194        struct scatterlist *sg = mem->sg;
 195
 196        reg->sge.lkey = device->pd->local_dma_lkey;
 197        /*
 198         * FIXME: rework the registration code path to differentiate
 199         * rkey/lkey use cases
 200         */
 201
 202        if (device->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
 203                reg->rkey = device->pd->unsafe_global_rkey;
 204        else
 205                reg->rkey = 0;
 206        reg->sge.addr = sg_dma_address(&sg[0]);
 207        reg->sge.length = sg_dma_len(&sg[0]);
 208
 209        iser_dbg("Single DMA entry: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
 210                 " length=0x%x\n", reg->sge.lkey, reg->rkey,
 211                 reg->sge.addr, reg->sge.length);
 212
 213        return 0;
 214}
 215
 216static int iser_set_page(struct ib_mr *mr, u64 addr)
 217{
 218        struct iser_page_vec *page_vec =
 219                container_of(mr, struct iser_page_vec, fake_mr);
 220
 221        page_vec->pages[page_vec->npages++] = addr;
 222
 223        return 0;
 224}
 225
 226static
 227int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
 228                      struct iser_data_buf *mem,
 229                      struct iser_reg_resources *rsc,
 230                      struct iser_mem_reg *reg)
 231{
 232        struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
 233        struct iser_device *device = ib_conn->device;
 234        struct iser_page_vec *page_vec = rsc->page_vec;
 235        struct ib_fmr_pool *fmr_pool = rsc->fmr_pool;
 236        struct ib_pool_fmr *fmr;
 237        int ret, plen;
 238
 239        page_vec->npages = 0;
 240        page_vec->fake_mr.page_size = SIZE_4K;
 241        plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg,
 242                              mem->dma_nents, NULL, iser_set_page);
 243        if (unlikely(plen < mem->dma_nents)) {
 244                iser_err("page vec too short to hold this SG\n");
 245                iser_data_buf_dump(mem, device->ib_device);
 246                iser_dump_page_vec(page_vec);
 247                return -EINVAL;
 248        }
 249
 250        fmr  = ib_fmr_pool_map_phys(fmr_pool, page_vec->pages,
 251                                    page_vec->npages, page_vec->pages[0]);
 252        if (IS_ERR(fmr)) {
 253                ret = PTR_ERR(fmr);
 254                iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
 255                return ret;
 256        }
 257
 258        reg->sge.lkey = fmr->fmr->lkey;
 259        reg->rkey = fmr->fmr->rkey;
 260        reg->sge.addr = page_vec->fake_mr.iova;
 261        reg->sge.length = page_vec->fake_mr.length;
 262        reg->mem_h = fmr;
 263
 264        iser_dbg("fmr reg: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
 265                 " length=0x%x\n", reg->sge.lkey, reg->rkey,
 266                 reg->sge.addr, reg->sge.length);
 267
 268        return 0;
 269}
 270
 271/**
 272 * Unregister (previosuly registered using FMR) memory.
 273 * If memory is non-FMR does nothing.
 274 */
 275void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
 276                        enum iser_data_dir cmd_dir)
 277{
 278        struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
 279
 280        if (!reg->mem_h)
 281                return;
 282
 283        iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);
 284
 285        ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
 286
 287        reg->mem_h = NULL;
 288}
 289
 290void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
 291                            enum iser_data_dir cmd_dir)
 292{
 293        struct iser_device *device = iser_task->iser_conn->ib_conn.device;
 294        struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
 295
 296        if (!reg->mem_h)
 297                return;
 298
 299        device->reg_ops->reg_desc_put(&iser_task->iser_conn->ib_conn,
 300                                     reg->mem_h);
 301        reg->mem_h = NULL;
 302}
 303
 304static void
 305iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_domain *domain)
 306{
 307        domain->sig_type = IB_SIG_TYPE_T10_DIF;
 308        domain->sig.dif.pi_interval = scsi_prot_interval(sc);
 309        domain->sig.dif.ref_tag = t10_pi_ref_tag(sc->request);
 310        /*
 311         * At the moment we hard code those, but in the future
 312         * we will take them from sc.
 313         */
 314        domain->sig.dif.apptag_check_mask = 0xffff;
 315        domain->sig.dif.app_escape = true;
 316        domain->sig.dif.ref_escape = true;
 317        if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
 318                domain->sig.dif.ref_remap = true;
 319};
 320
 321static int
 322iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
 323{
 324        switch (scsi_get_prot_op(sc)) {
 325        case SCSI_PROT_WRITE_INSERT:
 326        case SCSI_PROT_READ_STRIP:
 327                sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
 328                iser_set_dif_domain(sc, &sig_attrs->wire);
 329                sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
 330                break;
 331        case SCSI_PROT_READ_INSERT:
 332        case SCSI_PROT_WRITE_STRIP:
 333                sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
 334                iser_set_dif_domain(sc, &sig_attrs->mem);
 335                sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
 336                                                IB_T10DIF_CSUM : IB_T10DIF_CRC;
 337                break;
 338        case SCSI_PROT_READ_PASS:
 339        case SCSI_PROT_WRITE_PASS:
 340                iser_set_dif_domain(sc, &sig_attrs->wire);
 341                sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
 342                iser_set_dif_domain(sc, &sig_attrs->mem);
 343                sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
 344                                                IB_T10DIF_CSUM : IB_T10DIF_CRC;
 345                break;
 346        default:
 347                iser_err("Unsupported PI operation %d\n",
 348                         scsi_get_prot_op(sc));
 349                return -EINVAL;
 350        }
 351
 352        return 0;
 353}
 354
 355static inline void
 356iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
 357{
 358        *mask = 0;
 359        if (sc->prot_flags & SCSI_PROT_REF_CHECK)
 360                *mask |= IB_SIG_CHECK_REFTAG;
 361        if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
 362                *mask |= IB_SIG_CHECK_GUARD;
 363}
 364
 365static inline void
 366iser_inv_rkey(struct ib_send_wr *inv_wr,
 367              struct ib_mr *mr,
 368              struct ib_cqe *cqe,
 369              struct ib_send_wr *next_wr)
 370{
 371        inv_wr->opcode = IB_WR_LOCAL_INV;
 372        inv_wr->wr_cqe = cqe;
 373        inv_wr->ex.invalidate_rkey = mr->rkey;
 374        inv_wr->send_flags = 0;
 375        inv_wr->num_sge = 0;
 376        inv_wr->next = next_wr;
 377}
 378
 379static int
 380iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
 381                struct iser_data_buf *mem,
 382                struct iser_data_buf *sig_mem,
 383                struct iser_reg_resources *rsc,
 384                struct iser_mem_reg *sig_reg)
 385{
 386        struct iser_tx_desc *tx_desc = &iser_task->desc;
 387        struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
 388        struct ib_mr *mr = rsc->sig_mr;
 389        struct ib_sig_attrs *sig_attrs = mr->sig_attrs;
 390        struct ib_reg_wr *wr = &tx_desc->reg_wr;
 391        int ret;
 392
 393        memset(sig_attrs, 0, sizeof(*sig_attrs));
 394        ret = iser_set_sig_attrs(iser_task->sc, sig_attrs);
 395        if (ret)
 396                goto err;
 397
 398        iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask);
 399
 400        if (rsc->mr_valid)
 401                iser_inv_rkey(&tx_desc->inv_wr, mr, cqe, &wr->wr);
 402
 403        ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
 404
 405        ret = ib_map_mr_sg_pi(mr, mem->sg, mem->dma_nents, NULL,
 406                              sig_mem->sg, sig_mem->dma_nents, NULL, SZ_4K);
 407        if (unlikely(ret)) {
 408                iser_err("failed to map PI sg (%d)\n",
 409                         mem->dma_nents + sig_mem->dma_nents);
 410                goto err;
 411        }
 412
 413        memset(wr, 0, sizeof(*wr));
 414        wr->wr.next = &tx_desc->send_wr;
 415        wr->wr.opcode = IB_WR_REG_MR_INTEGRITY;
 416        wr->wr.wr_cqe = cqe;
 417        wr->wr.num_sge = 0;
 418        wr->wr.send_flags = 0;
 419        wr->mr = mr;
 420        wr->key = mr->rkey;
 421        wr->access = IB_ACCESS_LOCAL_WRITE |
 422                     IB_ACCESS_REMOTE_READ |
 423                     IB_ACCESS_REMOTE_WRITE;
 424        rsc->mr_valid = 1;
 425
 426        sig_reg->sge.lkey = mr->lkey;
 427        sig_reg->rkey = mr->rkey;
 428        sig_reg->sge.addr = mr->iova;
 429        sig_reg->sge.length = mr->length;
 430
 431        iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=%u\n",
 432                 sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
 433                 sig_reg->sge.length);
 434err:
 435        return ret;
 436}
 437
 438static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
 439                            struct iser_data_buf *mem,
 440                            struct iser_reg_resources *rsc,
 441                            struct iser_mem_reg *reg)
 442{
 443        struct iser_tx_desc *tx_desc = &iser_task->desc;
 444        struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
 445        struct ib_mr *mr = rsc->mr;
 446        struct ib_reg_wr *wr = &tx_desc->reg_wr;
 447        int n;
 448
 449        if (rsc->mr_valid)
 450                iser_inv_rkey(&tx_desc->inv_wr, mr, cqe, &wr->wr);
 451
 452        ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
 453
 454        n = ib_map_mr_sg(mr, mem->sg, mem->dma_nents, NULL, SIZE_4K);
 455        if (unlikely(n != mem->dma_nents)) {
 456                iser_err("failed to map sg (%d/%d)\n",
 457                         n, mem->dma_nents);
 458                return n < 0 ? n : -EINVAL;
 459        }
 460
 461        wr->wr.next = &tx_desc->send_wr;
 462        wr->wr.opcode = IB_WR_REG_MR;
 463        wr->wr.wr_cqe = cqe;
 464        wr->wr.send_flags = 0;
 465        wr->wr.num_sge = 0;
 466        wr->mr = mr;
 467        wr->key = mr->rkey;
 468        wr->access = IB_ACCESS_LOCAL_WRITE  |
 469                     IB_ACCESS_REMOTE_WRITE |
 470                     IB_ACCESS_REMOTE_READ;
 471
 472        rsc->mr_valid = 1;
 473
 474        reg->sge.lkey = mr->lkey;
 475        reg->rkey = mr->rkey;
 476        reg->sge.addr = mr->iova;
 477        reg->sge.length = mr->length;
 478
 479        iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=0x%x\n",
 480                 reg->sge.lkey, reg->rkey, reg->sge.addr, reg->sge.length);
 481
 482        return 0;
 483}
 484
 485static int
 486iser_reg_data_sg(struct iscsi_iser_task *task,
 487                 struct iser_data_buf *mem,
 488                 struct iser_fr_desc *desc,
 489                 bool use_dma_key,
 490                 struct iser_mem_reg *reg)
 491{
 492        struct iser_device *device = task->iser_conn->ib_conn.device;
 493
 494        if (use_dma_key)
 495                return iser_reg_dma(device, mem, reg);
 496
 497        return device->reg_ops->reg_mem(task, mem, &desc->rsc, reg);
 498}
 499
 500int iser_reg_rdma_mem(struct iscsi_iser_task *task,
 501                      enum iser_data_dir dir,
 502                      bool all_imm)
 503{
 504        struct ib_conn *ib_conn = &task->iser_conn->ib_conn;
 505        struct iser_device *device = ib_conn->device;
 506        struct iser_data_buf *mem = &task->data[dir];
 507        struct iser_mem_reg *reg = &task->rdma_reg[dir];
 508        struct iser_fr_desc *desc = NULL;
 509        bool use_dma_key;
 510        int err;
 511
 512        use_dma_key = mem->dma_nents == 1 && (all_imm || !iser_always_reg) &&
 513                      scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL;
 514
 515        if (!use_dma_key) {
 516                desc = device->reg_ops->reg_desc_get(ib_conn);
 517                reg->mem_h = desc;
 518        }
 519
 520        if (scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL) {
 521                err = iser_reg_data_sg(task, mem, desc, use_dma_key, reg);
 522                if (unlikely(err))
 523                        goto err_reg;
 524        } else {
 525                err = iser_reg_sig_mr(task, mem, &task->prot[dir],
 526                                      &desc->rsc, reg);
 527                if (unlikely(err))
 528                        goto err_reg;
 529
 530                desc->sig_protected = 1;
 531        }
 532
 533        return 0;
 534
 535err_reg:
 536        if (desc)
 537                device->reg_ops->reg_desc_put(ib_conn, desc);
 538
 539        return err;
 540}
 541
 542void iser_unreg_rdma_mem(struct iscsi_iser_task *task,
 543                         enum iser_data_dir dir)
 544{
 545        struct iser_device *device = task->iser_conn->ib_conn.device;
 546
 547        device->reg_ops->unreg_mem(task, dir);
 548}
 549