linux/drivers/infiniband/sw/rxe/rxe_mr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
   4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
   5 */
   6
   7#include "rxe.h"
   8#include "rxe_loc.h"
   9
  10/* Return a random 8 bit key value that is
  11 * different than the last_key. Set last_key to -1
  12 * if this is the first key for an MR or MW
  13 */
  14u8 rxe_get_next_key(u32 last_key)
  15{
  16        u8 key;
  17
  18        do {
  19                get_random_bytes(&key, 1);
  20        } while (key == last_key);
  21
  22        return key;
  23}
  24
  25int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
  26{
  27        switch (mr->type) {
  28        case RXE_MR_TYPE_DMA:
  29                return 0;
  30
  31        case RXE_MR_TYPE_MR:
  32                if (iova < mr->iova || length > mr->length ||
  33                    iova > mr->iova + mr->length - length)
  34                        return -EFAULT;
  35                return 0;
  36
  37        default:
  38                return -EFAULT;
  39        }
  40}
  41
  42#define IB_ACCESS_REMOTE        (IB_ACCESS_REMOTE_READ          \
  43                                | IB_ACCESS_REMOTE_WRITE        \
  44                                | IB_ACCESS_REMOTE_ATOMIC)
  45
  46static void rxe_mr_init(int access, struct rxe_mr *mr)
  47{
  48        u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1);
  49        u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
  50
  51        mr->ibmr.lkey = lkey;
  52        mr->ibmr.rkey = rkey;
  53        mr->state = RXE_MR_STATE_INVALID;
  54        mr->type = RXE_MR_TYPE_NONE;
  55        mr->map_shift = ilog2(RXE_BUF_PER_MAP);
  56}
  57
  58static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
  59{
  60        int i;
  61        int num_map;
  62        struct rxe_map **map = mr->map;
  63
  64        num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
  65
  66        mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
  67        if (!mr->map)
  68                goto err1;
  69
  70        for (i = 0; i < num_map; i++) {
  71                mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
  72                if (!mr->map[i])
  73                        goto err2;
  74        }
  75
  76        BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
  77
  78        mr->map_shift = ilog2(RXE_BUF_PER_MAP);
  79        mr->map_mask = RXE_BUF_PER_MAP - 1;
  80
  81        mr->num_buf = num_buf;
  82        mr->num_map = num_map;
  83        mr->max_buf = num_map * RXE_BUF_PER_MAP;
  84
  85        return 0;
  86
  87err2:
  88        for (i--; i >= 0; i--)
  89                kfree(mr->map[i]);
  90
  91        kfree(mr->map);
  92err1:
  93        return -ENOMEM;
  94}
  95
  96void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
  97{
  98        rxe_mr_init(access, mr);
  99
 100        mr->ibmr.pd = &pd->ibpd;
 101        mr->access = access;
 102        mr->state = RXE_MR_STATE_VALID;
 103        mr->type = RXE_MR_TYPE_DMA;
 104}
 105
 106int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 107                     int access, struct ib_udata *udata, struct rxe_mr *mr)
 108{
 109        struct rxe_map          **map;
 110        struct rxe_phys_buf     *buf = NULL;
 111        struct ib_umem          *umem;
 112        struct sg_page_iter     sg_iter;
 113        int                     num_buf;
 114        void                    *vaddr;
 115        int err;
 116        int i;
 117
 118        umem = ib_umem_get(udata, start, length, access);
 119        if (IS_ERR(umem)) {
 120                pr_warn("%s: Unable to pin memory region err = %d\n",
 121                        __func__, (int)PTR_ERR(umem));
 122                err = PTR_ERR(umem);
 123                goto err_out;
 124        }
 125
 126        mr->umem = umem;
 127        num_buf = ib_umem_num_pages(umem);
 128
 129        rxe_mr_init(access, mr);
 130
 131        err = rxe_mr_alloc(mr, num_buf);
 132        if (err) {
 133                pr_warn("%s: Unable to allocate memory for map\n",
 134                                __func__);
 135                goto err_release_umem;
 136        }
 137
 138        mr->page_shift = PAGE_SHIFT;
 139        mr->page_mask = PAGE_SIZE - 1;
 140
 141        num_buf                 = 0;
 142        map = mr->map;
 143        if (length > 0) {
 144                buf = map[0]->buf;
 145
 146                for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
 147                        if (num_buf >= RXE_BUF_PER_MAP) {
 148                                map++;
 149                                buf = map[0]->buf;
 150                                num_buf = 0;
 151                        }
 152
 153                        vaddr = page_address(sg_page_iter_page(&sg_iter));
 154                        if (!vaddr) {
 155                                pr_warn("%s: Unable to get virtual address\n",
 156                                                __func__);
 157                                err = -ENOMEM;
 158                                goto err_cleanup_map;
 159                        }
 160
 161                        buf->addr = (uintptr_t)vaddr;
 162                        buf->size = PAGE_SIZE;
 163                        num_buf++;
 164                        buf++;
 165
 166                }
 167        }
 168
 169        mr->ibmr.pd = &pd->ibpd;
 170        mr->umem = umem;
 171        mr->access = access;
 172        mr->length = length;
 173        mr->iova = iova;
 174        mr->va = start;
 175        mr->offset = ib_umem_offset(umem);
 176        mr->state = RXE_MR_STATE_VALID;
 177        mr->type = RXE_MR_TYPE_MR;
 178
 179        return 0;
 180
 181err_cleanup_map:
 182        for (i = 0; i < mr->num_map; i++)
 183                kfree(mr->map[i]);
 184        kfree(mr->map);
 185err_release_umem:
 186        ib_umem_release(umem);
 187err_out:
 188        return err;
 189}
 190
 191int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
 192{
 193        int err;
 194
 195        rxe_mr_init(0, mr);
 196
 197        /* In fastreg, we also set the rkey */
 198        mr->ibmr.rkey = mr->ibmr.lkey;
 199
 200        err = rxe_mr_alloc(mr, max_pages);
 201        if (err)
 202                goto err1;
 203
 204        mr->ibmr.pd = &pd->ibpd;
 205        mr->max_buf = max_pages;
 206        mr->state = RXE_MR_STATE_FREE;
 207        mr->type = RXE_MR_TYPE_MR;
 208
 209        return 0;
 210
 211err1:
 212        return err;
 213}
 214
 215static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
 216                        size_t *offset_out)
 217{
 218        size_t offset = iova - mr->iova + mr->offset;
 219        int                     map_index;
 220        int                     buf_index;
 221        u64                     length;
 222
 223        if (likely(mr->page_shift)) {
 224                *offset_out = offset & mr->page_mask;
 225                offset >>= mr->page_shift;
 226                *n_out = offset & mr->map_mask;
 227                *m_out = offset >> mr->map_shift;
 228        } else {
 229                map_index = 0;
 230                buf_index = 0;
 231
 232                length = mr->map[map_index]->buf[buf_index].size;
 233
 234                while (offset >= length) {
 235                        offset -= length;
 236                        buf_index++;
 237
 238                        if (buf_index == RXE_BUF_PER_MAP) {
 239                                map_index++;
 240                                buf_index = 0;
 241                        }
 242                        length = mr->map[map_index]->buf[buf_index].size;
 243                }
 244
 245                *m_out = map_index;
 246                *n_out = buf_index;
 247                *offset_out = offset;
 248        }
 249}
 250
 251void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
 252{
 253        size_t offset;
 254        int m, n;
 255        void *addr;
 256
 257        if (mr->state != RXE_MR_STATE_VALID) {
 258                pr_warn("mr not in valid state\n");
 259                addr = NULL;
 260                goto out;
 261        }
 262
 263        if (!mr->map) {
 264                addr = (void *)(uintptr_t)iova;
 265                goto out;
 266        }
 267
 268        if (mr_check_range(mr, iova, length)) {
 269                pr_warn("range violation\n");
 270                addr = NULL;
 271                goto out;
 272        }
 273
 274        lookup_iova(mr, iova, &m, &n, &offset);
 275
 276        if (offset + length > mr->map[m]->buf[n].size) {
 277                pr_warn("crosses page boundary\n");
 278                addr = NULL;
 279                goto out;
 280        }
 281
 282        addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
 283
 284out:
 285        return addr;
 286}
 287
 288/* copy data from a range (vaddr, vaddr+length-1) to or from
 289 * a mr object starting at iova. Compute incremental value of
 290 * crc32 if crcp is not zero. caller must hold a reference to mr
 291 */
 292int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 293                enum rxe_mr_copy_dir dir, u32 *crcp)
 294{
 295        int                     err;
 296        int                     bytes;
 297        u8                      *va;
 298        struct rxe_map          **map;
 299        struct rxe_phys_buf     *buf;
 300        int                     m;
 301        int                     i;
 302        size_t                  offset;
 303        u32                     crc = crcp ? (*crcp) : 0;
 304
 305        if (length == 0)
 306                return 0;
 307
 308        if (mr->type == RXE_MR_TYPE_DMA) {
 309                u8 *src, *dest;
 310
 311                src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
 312
 313                dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
 314
 315                memcpy(dest, src, length);
 316
 317                if (crcp)
 318                        *crcp = rxe_crc32(to_rdev(mr->ibmr.device), *crcp, dest,
 319                                          length);
 320
 321                return 0;
 322        }
 323
 324        WARN_ON_ONCE(!mr->map);
 325
 326        err = mr_check_range(mr, iova, length);
 327        if (err) {
 328                err = -EFAULT;
 329                goto err1;
 330        }
 331
 332        lookup_iova(mr, iova, &m, &i, &offset);
 333
 334        map = mr->map + m;
 335        buf     = map[0]->buf + i;
 336
 337        while (length > 0) {
 338                u8 *src, *dest;
 339
 340                va      = (u8 *)(uintptr_t)buf->addr + offset;
 341                src = (dir == RXE_TO_MR_OBJ) ? addr : va;
 342                dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
 343
 344                bytes   = buf->size - offset;
 345
 346                if (bytes > length)
 347                        bytes = length;
 348
 349                memcpy(dest, src, bytes);
 350
 351                if (crcp)
 352                        crc = rxe_crc32(to_rdev(mr->ibmr.device), crc, dest,
 353                                        bytes);
 354
 355                length  -= bytes;
 356                addr    += bytes;
 357
 358                offset  = 0;
 359                buf++;
 360                i++;
 361
 362                if (i == RXE_BUF_PER_MAP) {
 363                        i = 0;
 364                        map++;
 365                        buf = map[0]->buf;
 366                }
 367        }
 368
 369        if (crcp)
 370                *crcp = crc;
 371
 372        return 0;
 373
 374err1:
 375        return err;
 376}
 377
 378/* copy data in or out of a wqe, i.e. sg list
 379 * under the control of a dma descriptor
 380 */
 381int copy_data(
 382        struct rxe_pd           *pd,
 383        int                     access,
 384        struct rxe_dma_info     *dma,
 385        void                    *addr,
 386        int                     length,
 387        enum rxe_mr_copy_dir    dir,
 388        u32                     *crcp)
 389{
 390        int                     bytes;
 391        struct rxe_sge          *sge    = &dma->sge[dma->cur_sge];
 392        int                     offset  = dma->sge_offset;
 393        int                     resid   = dma->resid;
 394        struct rxe_mr           *mr     = NULL;
 395        u64                     iova;
 396        int                     err;
 397
 398        if (length == 0)
 399                return 0;
 400
 401        if (length > resid) {
 402                err = -EINVAL;
 403                goto err2;
 404        }
 405
 406        if (sge->length && (offset < sge->length)) {
 407                mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
 408                if (!mr) {
 409                        err = -EINVAL;
 410                        goto err1;
 411                }
 412        }
 413
 414        while (length > 0) {
 415                bytes = length;
 416
 417                if (offset >= sge->length) {
 418                        if (mr) {
 419                                rxe_drop_ref(mr);
 420                                mr = NULL;
 421                        }
 422                        sge++;
 423                        dma->cur_sge++;
 424                        offset = 0;
 425
 426                        if (dma->cur_sge >= dma->num_sge) {
 427                                err = -ENOSPC;
 428                                goto err2;
 429                        }
 430
 431                        if (sge->length) {
 432                                mr = lookup_mr(pd, access, sge->lkey,
 433                                               RXE_LOOKUP_LOCAL);
 434                                if (!mr) {
 435                                        err = -EINVAL;
 436                                        goto err1;
 437                                }
 438                        } else {
 439                                continue;
 440                        }
 441                }
 442
 443                if (bytes > sge->length - offset)
 444                        bytes = sge->length - offset;
 445
 446                if (bytes > 0) {
 447                        iova = sge->addr + offset;
 448
 449                        err = rxe_mr_copy(mr, iova, addr, bytes, dir, crcp);
 450                        if (err)
 451                                goto err2;
 452
 453                        offset  += bytes;
 454                        resid   -= bytes;
 455                        length  -= bytes;
 456                        addr    += bytes;
 457                }
 458        }
 459
 460        dma->sge_offset = offset;
 461        dma->resid      = resid;
 462
 463        if (mr)
 464                rxe_drop_ref(mr);
 465
 466        return 0;
 467
 468err2:
 469        if (mr)
 470                rxe_drop_ref(mr);
 471err1:
 472        return err;
 473}
 474
 475int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
 476{
 477        struct rxe_sge          *sge    = &dma->sge[dma->cur_sge];
 478        int                     offset  = dma->sge_offset;
 479        int                     resid   = dma->resid;
 480
 481        while (length) {
 482                unsigned int bytes;
 483
 484                if (offset >= sge->length) {
 485                        sge++;
 486                        dma->cur_sge++;
 487                        offset = 0;
 488                        if (dma->cur_sge >= dma->num_sge)
 489                                return -ENOSPC;
 490                }
 491
 492                bytes = length;
 493
 494                if (bytes > sge->length - offset)
 495                        bytes = sge->length - offset;
 496
 497                offset  += bytes;
 498                resid   -= bytes;
 499                length  -= bytes;
 500        }
 501
 502        dma->sge_offset = offset;
 503        dma->resid      = resid;
 504
 505        return 0;
 506}
 507
 508/* (1) find the mr corresponding to lkey/rkey
 509 *     depending on lookup_type
 510 * (2) verify that the (qp) pd matches the mr pd
 511 * (3) verify that the mr can support the requested access
 512 * (4) verify that mr state is valid
 513 */
 514struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
 515                         enum rxe_mr_lookup_type type)
 516{
 517        struct rxe_mr *mr;
 518        struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
 519        int index = key >> 8;
 520
 521        mr = rxe_pool_get_index(&rxe->mr_pool, index);
 522        if (!mr)
 523                return NULL;
 524
 525        if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) ||
 526                     (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) ||
 527                     mr_pd(mr) != pd || (access && !(access & mr->access)) ||
 528                     mr->state != RXE_MR_STATE_VALID)) {
 529                rxe_drop_ref(mr);
 530                mr = NULL;
 531        }
 532
 533        return mr;
 534}
 535
 536int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
 537{
 538        struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 539        struct rxe_mr *mr;
 540        int ret;
 541
 542        mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
 543        if (!mr) {
 544                pr_err("%s: No MR for rkey %#x\n", __func__, rkey);
 545                ret = -EINVAL;
 546                goto err;
 547        }
 548
 549        if (rkey != mr->ibmr.rkey) {
 550                pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n",
 551                        __func__, rkey, mr->ibmr.rkey);
 552                ret = -EINVAL;
 553                goto err_drop_ref;
 554        }
 555
 556        if (atomic_read(&mr->num_mw) > 0) {
 557                pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
 558                        __func__);
 559                ret = -EINVAL;
 560                goto err_drop_ref;
 561        }
 562
 563        mr->state = RXE_MR_STATE_FREE;
 564        ret = 0;
 565
 566err_drop_ref:
 567        rxe_drop_ref(mr);
 568err:
 569        return ret;
 570}
 571
 572int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 573{
 574        struct rxe_mr *mr = to_rmr(ibmr);
 575
 576        if (atomic_read(&mr->num_mw) > 0) {
 577                pr_warn("%s: Attempt to deregister an MR while bound to MWs\n",
 578                        __func__);
 579                return -EINVAL;
 580        }
 581
 582        mr->state = RXE_MR_STATE_ZOMBIE;
 583        rxe_drop_ref(mr_pd(mr));
 584        rxe_drop_index(mr);
 585        rxe_drop_ref(mr);
 586
 587        return 0;
 588}
 589
 590void rxe_mr_cleanup(struct rxe_pool_entry *arg)
 591{
 592        struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
 593        int i;
 594
 595        ib_umem_release(mr->umem);
 596
 597        if (mr->map) {
 598                for (i = 0; i < mr->num_map; i++)
 599                        kfree(mr->map[i]);
 600
 601                kfree(mr->map);
 602        }
 603}
 604