linux/drivers/infiniband/sw/rdmavt/mr.c
<<
>>
Prefs
   1/*
   2 * Copyright(c) 2016 Intel Corporation.
   3 *
   4 * This file is provided under a dual BSD/GPLv2 license.  When using or
   5 * redistributing this file, you may do so under either license.
   6 *
   7 * GPL LICENSE SUMMARY
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of version 2 of the GNU General Public License as
  11 * published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful, but
  14 * WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * BSD LICENSE
  19 *
  20 * Redistribution and use in source and binary forms, with or without
  21 * modification, are permitted provided that the following conditions
  22 * are met:
  23 *
  24 *  - Redistributions of source code must retain the above copyright
  25 *    notice, this list of conditions and the following disclaimer.
  26 *  - Redistributions in binary form must reproduce the above copyright
  27 *    notice, this list of conditions and the following disclaimer in
  28 *    the documentation and/or other materials provided with the
  29 *    distribution.
  30 *  - Neither the name of Intel Corporation nor the names of its
  31 *    contributors may be used to endorse or promote products derived
  32 *    from this software without specific prior written permission.
  33 *
  34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45 *
  46 */
  47
  48#include <linux/slab.h>
  49#include <linux/vmalloc.h>
  50#include <rdma/ib_umem.h>
  51#include <rdma/rdma_vt.h>
  52#include "vt.h"
  53#include "mr.h"
  54#include "trace.h"
  55
  56/**
  57 * rvt_driver_mr_init - Init MR resources per driver
  58 * @rdi: rvt dev struct
  59 *
  60 * Do any intilization needed when a driver registers with rdmavt.
  61 *
  62 * Return: 0 on success or errno on failure
  63 */
  64int rvt_driver_mr_init(struct rvt_dev_info *rdi)
  65{
  66        unsigned int lkey_table_size = rdi->dparms.lkey_table_size;
  67        unsigned lk_tab_size;
  68        int i;
  69
  70        /*
  71         * The top hfi1_lkey_table_size bits are used to index the
  72         * table.  The lower 8 bits can be owned by the user (copied from
  73         * the LKEY).  The remaining bits act as a generation number or tag.
  74         */
  75        if (!lkey_table_size)
  76                return -EINVAL;
  77
  78        spin_lock_init(&rdi->lkey_table.lock);
  79
  80        /* ensure generation is at least 4 bits */
  81        if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) {
  82                rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n",
  83                            lkey_table_size, RVT_MAX_LKEY_TABLE_BITS);
  84                rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS;
  85                lkey_table_size = rdi->dparms.lkey_table_size;
  86        }
  87        rdi->lkey_table.max = 1 << lkey_table_size;
  88        rdi->lkey_table.shift = 32 - lkey_table_size;
  89        lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table);
  90        rdi->lkey_table.table = (struct rvt_mregion __rcu **)
  91                               vmalloc_node(lk_tab_size, rdi->dparms.node);
  92        if (!rdi->lkey_table.table)
  93                return -ENOMEM;
  94
  95        RCU_INIT_POINTER(rdi->dma_mr, NULL);
  96        for (i = 0; i < rdi->lkey_table.max; i++)
  97                RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL);
  98
  99        return 0;
 100}
 101
 102/**
 103 *rvt_mr_exit: clean up MR
 104 *@rdi: rvt dev structure
 105 *
 106 * called when drivers have unregistered or perhaps failed to register with us
 107 */
 108void rvt_mr_exit(struct rvt_dev_info *rdi)
 109{
 110        if (rdi->dma_mr)
 111                rvt_pr_err(rdi, "DMA MR not null!\n");
 112
 113        vfree(rdi->lkey_table.table);
 114}
 115
 116static void rvt_deinit_mregion(struct rvt_mregion *mr)
 117{
 118        int i = mr->mapsz;
 119
 120        mr->mapsz = 0;
 121        while (i)
 122                kfree(mr->map[--i]);
 123        percpu_ref_exit(&mr->refcount);
 124}
 125
 126static void __rvt_mregion_complete(struct percpu_ref *ref)
 127{
 128        struct rvt_mregion *mr = container_of(ref, struct rvt_mregion,
 129                                              refcount);
 130
 131        complete(&mr->comp);
 132}
 133
 134static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
 135                            int count, unsigned int percpu_flags)
 136{
 137        int m, i = 0;
 138        struct rvt_dev_info *dev = ib_to_rvt(pd->device);
 139
 140        mr->mapsz = 0;
 141        m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
 142        for (; i < m; i++) {
 143                mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL,
 144                                          dev->dparms.node);
 145                if (!mr->map[i])
 146                        goto bail;
 147                mr->mapsz++;
 148        }
 149        init_completion(&mr->comp);
 150        /* count returning the ptr to user */
 151        if (percpu_ref_init(&mr->refcount, &__rvt_mregion_complete,
 152                            percpu_flags, GFP_KERNEL))
 153                goto bail;
 154
 155        atomic_set(&mr->lkey_invalid, 0);
 156        mr->pd = pd;
 157        mr->max_segs = count;
 158        return 0;
 159bail:
 160        rvt_deinit_mregion(mr);
 161        return -ENOMEM;
 162}
 163
 164/**
 165 * rvt_alloc_lkey - allocate an lkey
 166 * @mr: memory region that this lkey protects
 167 * @dma_region: 0->normal key, 1->restricted DMA key
 168 *
 169 * Returns 0 if successful, otherwise returns -errno.
 170 *
 171 * Increments mr reference count as required.
 172 *
 173 * Sets the lkey field mr for non-dma regions.
 174 *
 175 */
 176static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
 177{
 178        unsigned long flags;
 179        u32 r;
 180        u32 n;
 181        int ret = 0;
 182        struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
 183        struct rvt_lkey_table *rkt = &dev->lkey_table;
 184
 185        rvt_get_mr(mr);
 186        spin_lock_irqsave(&rkt->lock, flags);
 187
 188        /* special case for dma_mr lkey == 0 */
 189        if (dma_region) {
 190                struct rvt_mregion *tmr;
 191
 192                tmr = rcu_access_pointer(dev->dma_mr);
 193                if (!tmr) {
 194                        rcu_assign_pointer(dev->dma_mr, mr);
 195                        mr->lkey_published = 1;
 196                        rvt_get_mr(mr);
 197                }
 198                goto success;
 199        }
 200
 201        /* Find the next available LKEY */
 202        r = rkt->next;
 203        n = r;
 204        for (;;) {
 205                if (!rcu_access_pointer(rkt->table[r]))
 206                        break;
 207                r = (r + 1) & (rkt->max - 1);
 208                if (r == n)
 209                        goto bail;
 210        }
 211        rkt->next = (r + 1) & (rkt->max - 1);
 212        /*
 213         * Make sure lkey is never zero which is reserved to indicate an
 214         * unrestricted LKEY.
 215         */
 216        rkt->gen++;
 217        /*
 218         * bits are capped to ensure enough bits for generation number
 219         */
 220        mr->lkey = (r << (32 - dev->dparms.lkey_table_size)) |
 221                ((((1 << (24 - dev->dparms.lkey_table_size)) - 1) & rkt->gen)
 222                 << 8);
 223        if (mr->lkey == 0) {
 224                mr->lkey |= 1 << 8;
 225                rkt->gen++;
 226        }
 227        rcu_assign_pointer(rkt->table[r], mr);
 228        mr->lkey_published = 1;
 229success:
 230        spin_unlock_irqrestore(&rkt->lock, flags);
 231out:
 232        return ret;
 233bail:
 234        rvt_put_mr(mr);
 235        spin_unlock_irqrestore(&rkt->lock, flags);
 236        ret = -ENOMEM;
 237        goto out;
 238}
 239
 240/**
 241 * rvt_free_lkey - free an lkey
 242 * @mr: mr to free from tables
 243 */
 244static void rvt_free_lkey(struct rvt_mregion *mr)
 245{
 246        unsigned long flags;
 247        u32 lkey = mr->lkey;
 248        u32 r;
 249        struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
 250        struct rvt_lkey_table *rkt = &dev->lkey_table;
 251        int freed = 0;
 252
 253        spin_lock_irqsave(&rkt->lock, flags);
 254        if (!lkey) {
 255                if (mr->lkey_published) {
 256                        RCU_INIT_POINTER(dev->dma_mr, NULL);
 257                        rvt_put_mr(mr);
 258                }
 259        } else {
 260                if (!mr->lkey_published)
 261                        goto out;
 262                r = lkey >> (32 - dev->dparms.lkey_table_size);
 263                RCU_INIT_POINTER(rkt->table[r], NULL);
 264        }
 265        mr->lkey_published = 0;
 266        freed++;
 267out:
 268        spin_unlock_irqrestore(&rkt->lock, flags);
 269        if (freed) {
 270                synchronize_rcu();
 271                percpu_ref_kill(&mr->refcount);
 272        }
 273}
 274
 275static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd)
 276{
 277        struct rvt_mr *mr;
 278        int rval = -ENOMEM;
 279        int m;
 280
 281        /* Allocate struct plus pointers to first level page tables. */
 282        m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
 283        mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL);
 284        if (!mr)
 285                goto bail;
 286
 287        rval = rvt_init_mregion(&mr->mr, pd, count, 0);
 288        if (rval)
 289                goto bail;
 290        /*
 291         * ib_reg_phys_mr() will initialize mr->ibmr except for
 292         * lkey and rkey.
 293         */
 294        rval = rvt_alloc_lkey(&mr->mr, 0);
 295        if (rval)
 296                goto bail_mregion;
 297        mr->ibmr.lkey = mr->mr.lkey;
 298        mr->ibmr.rkey = mr->mr.lkey;
 299done:
 300        return mr;
 301
 302bail_mregion:
 303        rvt_deinit_mregion(&mr->mr);
 304bail:
 305        kfree(mr);
 306        mr = ERR_PTR(rval);
 307        goto done;
 308}
 309
 310static void __rvt_free_mr(struct rvt_mr *mr)
 311{
 312        rvt_free_lkey(&mr->mr);
 313        rvt_deinit_mregion(&mr->mr);
 314        kfree(mr);
 315}
 316
 317/**
 318 * rvt_get_dma_mr - get a DMA memory region
 319 * @pd: protection domain for this memory region
 320 * @acc: access flags
 321 *
 322 * Return: the memory region on success, otherwise returns an errno.
 323 * Note that all DMA addresses should be created via the functions in
 324 * struct dma_virt_ops.
 325 */
 326struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
 327{
 328        struct rvt_mr *mr;
 329        struct ib_mr *ret;
 330        int rval;
 331
 332        if (ibpd_to_rvtpd(pd)->user)
 333                return ERR_PTR(-EPERM);
 334
 335        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 336        if (!mr) {
 337                ret = ERR_PTR(-ENOMEM);
 338                goto bail;
 339        }
 340
 341        rval = rvt_init_mregion(&mr->mr, pd, 0, 0);
 342        if (rval) {
 343                ret = ERR_PTR(rval);
 344                goto bail;
 345        }
 346
 347        rval = rvt_alloc_lkey(&mr->mr, 1);
 348        if (rval) {
 349                ret = ERR_PTR(rval);
 350                goto bail_mregion;
 351        }
 352
 353        mr->mr.access_flags = acc;
 354        ret = &mr->ibmr;
 355done:
 356        return ret;
 357
 358bail_mregion:
 359        rvt_deinit_mregion(&mr->mr);
 360bail:
 361        kfree(mr);
 362        goto done;
 363}
 364
 365/**
 366 * rvt_reg_user_mr - register a userspace memory region
 367 * @pd: protection domain for this memory region
 368 * @start: starting userspace address
 369 * @length: length of region to register
 370 * @mr_access_flags: access flags for this memory region
 371 * @udata: unused by the driver
 372 *
 373 * Return: the memory region on success, otherwise returns an errno.
 374 */
 375struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 376                              u64 virt_addr, int mr_access_flags,
 377                              struct ib_udata *udata)
 378{
 379        struct rvt_mr *mr;
 380        struct ib_umem *umem;
 381        struct scatterlist *sg;
 382        int n, m, entry;
 383        struct ib_mr *ret;
 384
 385        if (length == 0)
 386                return ERR_PTR(-EINVAL);
 387
 388        umem = ib_umem_get(pd->uobject->context, start, length,
 389                           mr_access_flags, 0);
 390        if (IS_ERR(umem))
 391                return (void *)umem;
 392
 393        n = umem->nmap;
 394
 395        mr = __rvt_alloc_mr(n, pd);
 396        if (IS_ERR(mr)) {
 397                ret = (struct ib_mr *)mr;
 398                goto bail_umem;
 399        }
 400
 401        mr->mr.user_base = start;
 402        mr->mr.iova = virt_addr;
 403        mr->mr.length = length;
 404        mr->mr.offset = ib_umem_offset(umem);
 405        mr->mr.access_flags = mr_access_flags;
 406        mr->umem = umem;
 407
 408        if (is_power_of_2(umem->page_size))
 409                mr->mr.page_shift = ilog2(umem->page_size);
 410        m = 0;
 411        n = 0;
 412        for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
 413                void *vaddr;
 414
 415                vaddr = page_address(sg_page(sg));
 416                if (!vaddr) {
 417                        ret = ERR_PTR(-EINVAL);
 418                        goto bail_inval;
 419                }
 420                mr->mr.map[m]->segs[n].vaddr = vaddr;
 421                mr->mr.map[m]->segs[n].length = umem->page_size;
 422                trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, umem->page_size);
 423                n++;
 424                if (n == RVT_SEGSZ) {
 425                        m++;
 426                        n = 0;
 427                }
 428        }
 429        return &mr->ibmr;
 430
 431bail_inval:
 432        __rvt_free_mr(mr);
 433
 434bail_umem:
 435        ib_umem_release(umem);
 436
 437        return ret;
 438}
 439
 440/**
 441 * rvt_dereg_mr - unregister and free a memory region
 442 * @ibmr: the memory region to free
 443 *
 444 *
 445 * Note that this is called to free MRs created by rvt_get_dma_mr()
 446 * or rvt_reg_user_mr().
 447 *
 448 * Returns 0 on success.
 449 */
 450int rvt_dereg_mr(struct ib_mr *ibmr)
 451{
 452        struct rvt_mr *mr = to_imr(ibmr);
 453        struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device);
 454        int ret = 0;
 455        unsigned long timeout;
 456
 457        rvt_free_lkey(&mr->mr);
 458
 459        rvt_put_mr(&mr->mr); /* will set completion if last */
 460        timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ);
 461        if (!timeout) {
 462                rvt_pr_err(rdi,
 463                           "rvt_dereg_mr timeout mr %p pd %p\n",
 464                           mr, mr->mr.pd);
 465                rvt_get_mr(&mr->mr);
 466                ret = -EBUSY;
 467                goto out;
 468        }
 469        rvt_deinit_mregion(&mr->mr);
 470        if (mr->umem)
 471                ib_umem_release(mr->umem);
 472        kfree(mr);
 473out:
 474        return ret;
 475}
 476
 477/**
 478 * rvt_alloc_mr - Allocate a memory region usable with the
 479 * @pd: protection domain for this memory region
 480 * @mr_type: mem region type
 481 * @max_num_sg: Max number of segments allowed
 482 *
 483 * Return: the memory region on success, otherwise return an errno.
 484 */
 485struct ib_mr *rvt_alloc_mr(struct ib_pd *pd,
 486                           enum ib_mr_type mr_type,
 487                           u32 max_num_sg)
 488{
 489        struct rvt_mr *mr;
 490
 491        if (mr_type != IB_MR_TYPE_MEM_REG)
 492                return ERR_PTR(-EINVAL);
 493
 494        mr = __rvt_alloc_mr(max_num_sg, pd);
 495        if (IS_ERR(mr))
 496                return (struct ib_mr *)mr;
 497
 498        return &mr->ibmr;
 499}
 500
 501/**
 502 * rvt_set_page - page assignment function called by ib_sg_to_pages
 503 * @ibmr: memory region
 504 * @addr: dma address of mapped page
 505 *
 506 * Return: 0 on success
 507 */
 508static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
 509{
 510        struct rvt_mr *mr = to_imr(ibmr);
 511        u32 ps = 1 << mr->mr.page_shift;
 512        u32 mapped_segs = mr->mr.length >> mr->mr.page_shift;
 513        int m, n;
 514
 515        if (unlikely(mapped_segs == mr->mr.max_segs))
 516                return -ENOMEM;
 517
 518        if (mr->mr.length == 0) {
 519                mr->mr.user_base = addr;
 520                mr->mr.iova = addr;
 521        }
 522
 523        m = mapped_segs / RVT_SEGSZ;
 524        n = mapped_segs % RVT_SEGSZ;
 525        mr->mr.map[m]->segs[n].vaddr = (void *)addr;
 526        mr->mr.map[m]->segs[n].length = ps;
 527        trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps);
 528        mr->mr.length += ps;
 529
 530        return 0;
 531}
 532
 533/**
 534 * rvt_map_mr_sg - map sg list and set it the memory region
 535 * @ibmr: memory region
 536 * @sg: dma mapped scatterlist
 537 * @sg_nents: number of entries in sg
 538 * @sg_offset: offset in bytes into sg
 539 *
 540 * Return: number of sg elements mapped to the memory region
 541 */
 542int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
 543                  int sg_nents, unsigned int *sg_offset)
 544{
 545        struct rvt_mr *mr = to_imr(ibmr);
 546
 547        mr->mr.length = 0;
 548        mr->mr.page_shift = PAGE_SHIFT;
 549        return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
 550                              rvt_set_page);
 551}
 552
 553/**
 554 * rvt_fast_reg_mr - fast register physical MR
 555 * @qp: the queue pair where the work request comes from
 556 * @ibmr: the memory region to be registered
 557 * @key: updated key for this memory region
 558 * @access: access flags for this memory region
 559 *
 560 * Returns 0 on success.
 561 */
 562int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key,
 563                    int access)
 564{
 565        struct rvt_mr *mr = to_imr(ibmr);
 566
 567        if (qp->ibqp.pd != mr->mr.pd)
 568                return -EACCES;
 569
 570        /* not applicable to dma MR or user MR */
 571        if (!mr->mr.lkey || mr->umem)
 572                return -EINVAL;
 573
 574        if ((key & 0xFFFFFF00) != (mr->mr.lkey & 0xFFFFFF00))
 575                return -EINVAL;
 576
 577        ibmr->lkey = key;
 578        ibmr->rkey = key;
 579        mr->mr.lkey = key;
 580        mr->mr.access_flags = access;
 581        atomic_set(&mr->mr.lkey_invalid, 0);
 582
 583        return 0;
 584}
 585EXPORT_SYMBOL(rvt_fast_reg_mr);
 586
 587/**
 588 * rvt_invalidate_rkey - invalidate an MR rkey
 589 * @qp: queue pair associated with the invalidate op
 590 * @rkey: rkey to invalidate
 591 *
 592 * Returns 0 on success.
 593 */
 594int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey)
 595{
 596        struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device);
 597        struct rvt_lkey_table *rkt = &dev->lkey_table;
 598        struct rvt_mregion *mr;
 599
 600        if (rkey == 0)
 601                return -EINVAL;
 602
 603        rcu_read_lock();
 604        mr = rcu_dereference(
 605                rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
 606        if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
 607                goto bail;
 608
 609        atomic_set(&mr->lkey_invalid, 1);
 610        rcu_read_unlock();
 611        return 0;
 612
 613bail:
 614        rcu_read_unlock();
 615        return -EINVAL;
 616}
 617EXPORT_SYMBOL(rvt_invalidate_rkey);
 618
 619/**
 620 * rvt_alloc_fmr - allocate a fast memory region
 621 * @pd: the protection domain for this memory region
 622 * @mr_access_flags: access flags for this memory region
 623 * @fmr_attr: fast memory region attributes
 624 *
 625 * Return: the memory region on success, otherwise returns an errno.
 626 */
 627struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
 628                             struct ib_fmr_attr *fmr_attr)
 629{
 630        struct rvt_fmr *fmr;
 631        int m;
 632        struct ib_fmr *ret;
 633        int rval = -ENOMEM;
 634
 635        /* Allocate struct plus pointers to first level page tables. */
 636        m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ;
 637        fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL);
 638        if (!fmr)
 639                goto bail;
 640
 641        rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages,
 642                                PERCPU_REF_INIT_ATOMIC);
 643        if (rval)
 644                goto bail;
 645
 646        /*
 647         * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
 648         * rkey.
 649         */
 650        rval = rvt_alloc_lkey(&fmr->mr, 0);
 651        if (rval)
 652                goto bail_mregion;
 653        fmr->ibfmr.rkey = fmr->mr.lkey;
 654        fmr->ibfmr.lkey = fmr->mr.lkey;
 655        /*
 656         * Resources are allocated but no valid mapping (RKEY can't be
 657         * used).
 658         */
 659        fmr->mr.access_flags = mr_access_flags;
 660        fmr->mr.max_segs = fmr_attr->max_pages;
 661        fmr->mr.page_shift = fmr_attr->page_shift;
 662
 663        ret = &fmr->ibfmr;
 664done:
 665        return ret;
 666
 667bail_mregion:
 668        rvt_deinit_mregion(&fmr->mr);
 669bail:
 670        kfree(fmr);
 671        ret = ERR_PTR(rval);
 672        goto done;
 673}
 674
 675/**
 676 * rvt_map_phys_fmr - set up a fast memory region
 677 * @ibmfr: the fast memory region to set up
 678 * @page_list: the list of pages to associate with the fast memory region
 679 * @list_len: the number of pages to associate with the fast memory region
 680 * @iova: the virtual address of the start of the fast memory region
 681 *
 682 * This may be called from interrupt context.
 683 *
 684 * Return: 0 on success
 685 */
 686
 687int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
 688                     int list_len, u64 iova)
 689{
 690        struct rvt_fmr *fmr = to_ifmr(ibfmr);
 691        struct rvt_lkey_table *rkt;
 692        unsigned long flags;
 693        int m, n;
 694        unsigned long i;
 695        u32 ps;
 696        struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device);
 697
 698        i = atomic_long_read(&fmr->mr.refcount.count);
 699        if (i > 2)
 700                return -EBUSY;
 701
 702        if (list_len > fmr->mr.max_segs)
 703                return -EINVAL;
 704
 705        rkt = &rdi->lkey_table;
 706        spin_lock_irqsave(&rkt->lock, flags);
 707        fmr->mr.user_base = iova;
 708        fmr->mr.iova = iova;
 709        ps = 1 << fmr->mr.page_shift;
 710        fmr->mr.length = list_len * ps;
 711        m = 0;
 712        n = 0;
 713        for (i = 0; i < list_len; i++) {
 714                fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i];
 715                fmr->mr.map[m]->segs[n].length = ps;
 716                trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps);
 717                if (++n == RVT_SEGSZ) {
 718                        m++;
 719                        n = 0;
 720                }
 721        }
 722        spin_unlock_irqrestore(&rkt->lock, flags);
 723        return 0;
 724}
 725
 726/**
 727 * rvt_unmap_fmr - unmap fast memory regions
 728 * @fmr_list: the list of fast memory regions to unmap
 729 *
 730 * Return: 0 on success.
 731 */
 732int rvt_unmap_fmr(struct list_head *fmr_list)
 733{
 734        struct rvt_fmr *fmr;
 735        struct rvt_lkey_table *rkt;
 736        unsigned long flags;
 737        struct rvt_dev_info *rdi;
 738
 739        list_for_each_entry(fmr, fmr_list, ibfmr.list) {
 740                rdi = ib_to_rvt(fmr->ibfmr.device);
 741                rkt = &rdi->lkey_table;
 742                spin_lock_irqsave(&rkt->lock, flags);
 743                fmr->mr.user_base = 0;
 744                fmr->mr.iova = 0;
 745                fmr->mr.length = 0;
 746                spin_unlock_irqrestore(&rkt->lock, flags);
 747        }
 748        return 0;
 749}
 750
 751/**
 752 * rvt_dealloc_fmr - deallocate a fast memory region
 753 * @ibfmr: the fast memory region to deallocate
 754 *
 755 * Return: 0 on success.
 756 */
 757int rvt_dealloc_fmr(struct ib_fmr *ibfmr)
 758{
 759        struct rvt_fmr *fmr = to_ifmr(ibfmr);
 760        int ret = 0;
 761        unsigned long timeout;
 762
 763        rvt_free_lkey(&fmr->mr);
 764        rvt_put_mr(&fmr->mr); /* will set completion if last */
 765        timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ);
 766        if (!timeout) {
 767                rvt_get_mr(&fmr->mr);
 768                ret = -EBUSY;
 769                goto out;
 770        }
 771        rvt_deinit_mregion(&fmr->mr);
 772        kfree(fmr);
 773out:
 774        return ret;
 775}
 776
 777/**
 778 * rvt_lkey_ok - check IB SGE for validity and initialize
 779 * @rkt: table containing lkey to check SGE against
 780 * @pd: protection domain
 781 * @isge: outgoing internal SGE
 782 * @sge: SGE to check
 783 * @acc: access flags
 784 *
 785 * Check the IB SGE for validity and initialize our internal version
 786 * of it.
 787 *
 788 * Return: 1 if valid and successful, otherwise returns 0.
 789 *
 790 * increments the reference count upon success
 791 *
 792 */
 793int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
 794                struct rvt_sge *isge, struct ib_sge *sge, int acc)
 795{
 796        struct rvt_mregion *mr;
 797        unsigned n, m;
 798        size_t off;
 799
 800        /*
 801         * We use LKEY == zero for kernel virtual addresses
 802         * (see rvt_get_dma_mr() and dma_virt_ops).
 803         */
 804        rcu_read_lock();
 805        if (sge->lkey == 0) {
 806                struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
 807
 808                if (pd->user)
 809                        goto bail;
 810                mr = rcu_dereference(dev->dma_mr);
 811                if (!mr)
 812                        goto bail;
 813                rvt_get_mr(mr);
 814                rcu_read_unlock();
 815
 816                isge->mr = mr;
 817                isge->vaddr = (void *)sge->addr;
 818                isge->length = sge->length;
 819                isge->sge_length = sge->length;
 820                isge->m = 0;
 821                isge->n = 0;
 822                goto ok;
 823        }
 824        mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
 825        if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
 826                     mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
 827                goto bail;
 828
 829        off = sge->addr - mr->user_base;
 830        if (unlikely(sge->addr < mr->user_base ||
 831                     off + sge->length > mr->length ||
 832                     (mr->access_flags & acc) != acc))
 833                goto bail;
 834        rvt_get_mr(mr);
 835        rcu_read_unlock();
 836
 837        off += mr->offset;
 838        if (mr->page_shift) {
 839                /*
 840                 * page sizes are uniform power of 2 so no loop is necessary
 841                 * entries_spanned_by_off is the number of times the loop below
 842                 * would have executed.
 843                */
 844                size_t entries_spanned_by_off;
 845
 846                entries_spanned_by_off = off >> mr->page_shift;
 847                off -= (entries_spanned_by_off << mr->page_shift);
 848                m = entries_spanned_by_off / RVT_SEGSZ;
 849                n = entries_spanned_by_off % RVT_SEGSZ;
 850        } else {
 851                m = 0;
 852                n = 0;
 853                while (off >= mr->map[m]->segs[n].length) {
 854                        off -= mr->map[m]->segs[n].length;
 855                        n++;
 856                        if (n >= RVT_SEGSZ) {
 857                                m++;
 858                                n = 0;
 859                        }
 860                }
 861        }
 862        isge->mr = mr;
 863        isge->vaddr = mr->map[m]->segs[n].vaddr + off;
 864        isge->length = mr->map[m]->segs[n].length - off;
 865        isge->sge_length = sge->length;
 866        isge->m = m;
 867        isge->n = n;
 868ok:
 869        return 1;
 870bail:
 871        rcu_read_unlock();
 872        return 0;
 873}
 874EXPORT_SYMBOL(rvt_lkey_ok);
 875
 876/**
 877 * rvt_rkey_ok - check the IB virtual address, length, and RKEY
 878 * @qp: qp for validation
 879 * @sge: SGE state
 880 * @len: length of data
 881 * @vaddr: virtual address to place data
 882 * @rkey: rkey to check
 883 * @acc: access flags
 884 *
 885 * Return: 1 if successful, otherwise 0.
 886 *
 887 * increments the reference count upon success
 888 */
 889int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
 890                u32 len, u64 vaddr, u32 rkey, int acc)
 891{
 892        struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device);
 893        struct rvt_lkey_table *rkt = &dev->lkey_table;
 894        struct rvt_mregion *mr;
 895        unsigned n, m;
 896        size_t off;
 897
 898        /*
 899         * We use RKEY == zero for kernel virtual addresses
 900         * (see rvt_get_dma_mr() and dma_virt_ops).
 901         */
 902        rcu_read_lock();
 903        if (rkey == 0) {
 904                struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
 905                struct rvt_dev_info *rdi = ib_to_rvt(pd->ibpd.device);
 906
 907                if (pd->user)
 908                        goto bail;
 909                mr = rcu_dereference(rdi->dma_mr);
 910                if (!mr)
 911                        goto bail;
 912                rvt_get_mr(mr);
 913                rcu_read_unlock();
 914
 915                sge->mr = mr;
 916                sge->vaddr = (void *)vaddr;
 917                sge->length = len;
 918                sge->sge_length = len;
 919                sge->m = 0;
 920                sge->n = 0;
 921                goto ok;
 922        }
 923
 924        mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
 925        if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
 926                     mr->lkey != rkey || qp->ibqp.pd != mr->pd))
 927                goto bail;
 928
 929        off = vaddr - mr->iova;
 930        if (unlikely(vaddr < mr->iova || off + len > mr->length ||
 931                     (mr->access_flags & acc) == 0))
 932                goto bail;
 933        rvt_get_mr(mr);
 934        rcu_read_unlock();
 935
 936        off += mr->offset;
 937        if (mr->page_shift) {
 938                /*
 939                 * page sizes are uniform power of 2 so no loop is necessary
 940                 * entries_spanned_by_off is the number of times the loop below
 941                 * would have executed.
 942                */
 943                size_t entries_spanned_by_off;
 944
 945                entries_spanned_by_off = off >> mr->page_shift;
 946                off -= (entries_spanned_by_off << mr->page_shift);
 947                m = entries_spanned_by_off / RVT_SEGSZ;
 948                n = entries_spanned_by_off % RVT_SEGSZ;
 949        } else {
 950                m = 0;
 951                n = 0;
 952                while (off >= mr->map[m]->segs[n].length) {
 953                        off -= mr->map[m]->segs[n].length;
 954                        n++;
 955                        if (n >= RVT_SEGSZ) {
 956                                m++;
 957                                n = 0;
 958                        }
 959                }
 960        }
 961        sge->mr = mr;
 962        sge->vaddr = mr->map[m]->segs[n].vaddr + off;
 963        sge->length = mr->map[m]->segs[n].length - off;
 964        sge->sge_length = len;
 965        sge->m = m;
 966        sge->n = n;
 967ok:
 968        return 1;
 969bail:
 970        rcu_read_unlock();
 971        return 0;
 972}
 973EXPORT_SYMBOL(rvt_rkey_ok);
 974