linux/drivers/infiniband/sw/rxe/rxe_mr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
   4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
   5 */
   6
   7#include "rxe.h"
   8#include "rxe_loc.h"
   9
  10/* Return a random 8 bit key value that is
  11 * different than the last_key. Set last_key to -1
  12 * if this is the first key for an MR or MW
  13 */
  14u8 rxe_get_next_key(u32 last_key)
  15{
  16        u8 key;
  17
  18        do {
  19                get_random_bytes(&key, 1);
  20        } while (key == last_key);
  21
  22        return key;
  23}
  24
  25int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
  26{
  27        switch (mr->type) {
  28        case RXE_MR_TYPE_DMA:
  29                return 0;
  30
  31        case RXE_MR_TYPE_MR:
  32                if (iova < mr->iova || length > mr->length ||
  33                    iova > mr->iova + mr->length - length)
  34                        return -EFAULT;
  35                return 0;
  36
  37        default:
  38                return -EFAULT;
  39        }
  40}
  41
  42#define IB_ACCESS_REMOTE        (IB_ACCESS_REMOTE_READ          \
  43                                | IB_ACCESS_REMOTE_WRITE        \
  44                                | IB_ACCESS_REMOTE_ATOMIC)
  45
  46static void rxe_mr_init(int access, struct rxe_mr *mr)
  47{
  48        u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1);
  49        u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
  50
  51        mr->ibmr.lkey = lkey;
  52        mr->ibmr.rkey = rkey;
  53        mr->state = RXE_MR_STATE_INVALID;
  54        mr->type = RXE_MR_TYPE_NONE;
  55        mr->map_shift = ilog2(RXE_BUF_PER_MAP);
  56}
  57
  58static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
  59{
  60        int i;
  61        int num_map;
  62        struct rxe_map **map = mr->map;
  63
  64        num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
  65
  66        mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
  67        if (!mr->map)
  68                goto err1;
  69
  70        for (i = 0; i < num_map; i++) {
  71                mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
  72                if (!mr->map[i])
  73                        goto err2;
  74        }
  75
  76        BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
  77
  78        mr->map_shift = ilog2(RXE_BUF_PER_MAP);
  79        mr->map_mask = RXE_BUF_PER_MAP - 1;
  80
  81        mr->num_buf = num_buf;
  82        mr->num_map = num_map;
  83        mr->max_buf = num_map * RXE_BUF_PER_MAP;
  84
  85        return 0;
  86
  87err2:
  88        for (i--; i >= 0; i--)
  89                kfree(mr->map[i]);
  90
  91        kfree(mr->map);
  92err1:
  93        return -ENOMEM;
  94}
  95
  96void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
  97{
  98        rxe_mr_init(access, mr);
  99
 100        mr->ibmr.pd = &pd->ibpd;
 101        mr->access = access;
 102        mr->state = RXE_MR_STATE_VALID;
 103        mr->type = RXE_MR_TYPE_DMA;
 104}
 105
 106int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
 107                     int access, struct rxe_mr *mr)
 108{
 109        struct rxe_map          **map;
 110        struct rxe_phys_buf     *buf = NULL;
 111        struct ib_umem          *umem;
 112        struct sg_page_iter     sg_iter;
 113        int                     num_buf;
 114        void                    *vaddr;
 115        int err;
 116        int i;
 117
 118        umem = ib_umem_get(pd->ibpd.device, start, length, access);
 119        if (IS_ERR(umem)) {
 120                pr_warn("%s: Unable to pin memory region err = %d\n",
 121                        __func__, (int)PTR_ERR(umem));
 122                err = PTR_ERR(umem);
 123                goto err_out;
 124        }
 125
 126        num_buf = ib_umem_num_pages(umem);
 127
 128        rxe_mr_init(access, mr);
 129
 130        err = rxe_mr_alloc(mr, num_buf);
 131        if (err) {
 132                pr_warn("%s: Unable to allocate memory for map\n",
 133                                __func__);
 134                goto err_release_umem;
 135        }
 136
 137        mr->page_shift = PAGE_SHIFT;
 138        mr->page_mask = PAGE_SIZE - 1;
 139
 140        num_buf                 = 0;
 141        map = mr->map;
 142        if (length > 0) {
 143                buf = map[0]->buf;
 144
 145                for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
 146                        if (num_buf >= RXE_BUF_PER_MAP) {
 147                                map++;
 148                                buf = map[0]->buf;
 149                                num_buf = 0;
 150                        }
 151
 152                        vaddr = page_address(sg_page_iter_page(&sg_iter));
 153                        if (!vaddr) {
 154                                pr_warn("%s: Unable to get virtual address\n",
 155                                                __func__);
 156                                err = -ENOMEM;
 157                                goto err_cleanup_map;
 158                        }
 159
 160                        buf->addr = (uintptr_t)vaddr;
 161                        buf->size = PAGE_SIZE;
 162                        num_buf++;
 163                        buf++;
 164
 165                }
 166        }
 167
 168        mr->ibmr.pd = &pd->ibpd;
 169        mr->umem = umem;
 170        mr->access = access;
 171        mr->length = length;
 172        mr->iova = iova;
 173        mr->va = start;
 174        mr->offset = ib_umem_offset(umem);
 175        mr->state = RXE_MR_STATE_VALID;
 176        mr->type = RXE_MR_TYPE_MR;
 177
 178        return 0;
 179
 180err_cleanup_map:
 181        for (i = 0; i < mr->num_map; i++)
 182                kfree(mr->map[i]);
 183        kfree(mr->map);
 184err_release_umem:
 185        ib_umem_release(umem);
 186err_out:
 187        return err;
 188}
 189
 190int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
 191{
 192        int err;
 193
 194        rxe_mr_init(0, mr);
 195
 196        /* In fastreg, we also set the rkey */
 197        mr->ibmr.rkey = mr->ibmr.lkey;
 198
 199        err = rxe_mr_alloc(mr, max_pages);
 200        if (err)
 201                goto err1;
 202
 203        mr->ibmr.pd = &pd->ibpd;
 204        mr->max_buf = max_pages;
 205        mr->state = RXE_MR_STATE_FREE;
 206        mr->type = RXE_MR_TYPE_MR;
 207
 208        return 0;
 209
 210err1:
 211        return err;
 212}
 213
 214static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
 215                        size_t *offset_out)
 216{
 217        size_t offset = iova - mr->iova + mr->offset;
 218        int                     map_index;
 219        int                     buf_index;
 220        u64                     length;
 221
 222        if (likely(mr->page_shift)) {
 223                *offset_out = offset & mr->page_mask;
 224                offset >>= mr->page_shift;
 225                *n_out = offset & mr->map_mask;
 226                *m_out = offset >> mr->map_shift;
 227        } else {
 228                map_index = 0;
 229                buf_index = 0;
 230
 231                length = mr->map[map_index]->buf[buf_index].size;
 232
 233                while (offset >= length) {
 234                        offset -= length;
 235                        buf_index++;
 236
 237                        if (buf_index == RXE_BUF_PER_MAP) {
 238                                map_index++;
 239                                buf_index = 0;
 240                        }
 241                        length = mr->map[map_index]->buf[buf_index].size;
 242                }
 243
 244                *m_out = map_index;
 245                *n_out = buf_index;
 246                *offset_out = offset;
 247        }
 248}
 249
 250void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
 251{
 252        size_t offset;
 253        int m, n;
 254        void *addr;
 255
 256        if (mr->state != RXE_MR_STATE_VALID) {
 257                pr_warn("mr not in valid state\n");
 258                addr = NULL;
 259                goto out;
 260        }
 261
 262        if (!mr->map) {
 263                addr = (void *)(uintptr_t)iova;
 264                goto out;
 265        }
 266
 267        if (mr_check_range(mr, iova, length)) {
 268                pr_warn("range violation\n");
 269                addr = NULL;
 270                goto out;
 271        }
 272
 273        lookup_iova(mr, iova, &m, &n, &offset);
 274
 275        if (offset + length > mr->map[m]->buf[n].size) {
 276                pr_warn("crosses page boundary\n");
 277                addr = NULL;
 278                goto out;
 279        }
 280
 281        addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
 282
 283out:
 284        return addr;
 285}
 286
 287/* copy data from a range (vaddr, vaddr+length-1) to or from
 288 * a mr object starting at iova.
 289 */
 290int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 291                enum rxe_mr_copy_dir dir)
 292{
 293        int                     err;
 294        int                     bytes;
 295        u8                      *va;
 296        struct rxe_map          **map;
 297        struct rxe_phys_buf     *buf;
 298        int                     m;
 299        int                     i;
 300        size_t                  offset;
 301
 302        if (length == 0)
 303                return 0;
 304
 305        if (mr->type == RXE_MR_TYPE_DMA) {
 306                u8 *src, *dest;
 307
 308                src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
 309
 310                dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
 311
 312                memcpy(dest, src, length);
 313
 314                return 0;
 315        }
 316
 317        WARN_ON_ONCE(!mr->map);
 318
 319        err = mr_check_range(mr, iova, length);
 320        if (err) {
 321                err = -EFAULT;
 322                goto err1;
 323        }
 324
 325        lookup_iova(mr, iova, &m, &i, &offset);
 326
 327        map = mr->map + m;
 328        buf     = map[0]->buf + i;
 329
 330        while (length > 0) {
 331                u8 *src, *dest;
 332
 333                va      = (u8 *)(uintptr_t)buf->addr + offset;
 334                src = (dir == RXE_TO_MR_OBJ) ? addr : va;
 335                dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
 336
 337                bytes   = buf->size - offset;
 338
 339                if (bytes > length)
 340                        bytes = length;
 341
 342                memcpy(dest, src, bytes);
 343
 344                length  -= bytes;
 345                addr    += bytes;
 346
 347                offset  = 0;
 348                buf++;
 349                i++;
 350
 351                if (i == RXE_BUF_PER_MAP) {
 352                        i = 0;
 353                        map++;
 354                        buf = map[0]->buf;
 355                }
 356        }
 357
 358        return 0;
 359
 360err1:
 361        return err;
 362}
 363
 364/* copy data in or out of a wqe, i.e. sg list
 365 * under the control of a dma descriptor
 366 */
 367int copy_data(
 368        struct rxe_pd           *pd,
 369        int                     access,
 370        struct rxe_dma_info     *dma,
 371        void                    *addr,
 372        int                     length,
 373        enum rxe_mr_copy_dir    dir)
 374{
 375        int                     bytes;
 376        struct rxe_sge          *sge    = &dma->sge[dma->cur_sge];
 377        int                     offset  = dma->sge_offset;
 378        int                     resid   = dma->resid;
 379        struct rxe_mr           *mr     = NULL;
 380        u64                     iova;
 381        int                     err;
 382
 383        if (length == 0)
 384                return 0;
 385
 386        if (length > resid) {
 387                err = -EINVAL;
 388                goto err2;
 389        }
 390
 391        if (sge->length && (offset < sge->length)) {
 392                mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
 393                if (!mr) {
 394                        err = -EINVAL;
 395                        goto err1;
 396                }
 397        }
 398
 399        while (length > 0) {
 400                bytes = length;
 401
 402                if (offset >= sge->length) {
 403                        if (mr) {
 404                                rxe_drop_ref(mr);
 405                                mr = NULL;
 406                        }
 407                        sge++;
 408                        dma->cur_sge++;
 409                        offset = 0;
 410
 411                        if (dma->cur_sge >= dma->num_sge) {
 412                                err = -ENOSPC;
 413                                goto err2;
 414                        }
 415
 416                        if (sge->length) {
 417                                mr = lookup_mr(pd, access, sge->lkey,
 418                                               RXE_LOOKUP_LOCAL);
 419                                if (!mr) {
 420                                        err = -EINVAL;
 421                                        goto err1;
 422                                }
 423                        } else {
 424                                continue;
 425                        }
 426                }
 427
 428                if (bytes > sge->length - offset)
 429                        bytes = sge->length - offset;
 430
 431                if (bytes > 0) {
 432                        iova = sge->addr + offset;
 433
 434                        err = rxe_mr_copy(mr, iova, addr, bytes, dir);
 435                        if (err)
 436                                goto err2;
 437
 438                        offset  += bytes;
 439                        resid   -= bytes;
 440                        length  -= bytes;
 441                        addr    += bytes;
 442                }
 443        }
 444
 445        dma->sge_offset = offset;
 446        dma->resid      = resid;
 447
 448        if (mr)
 449                rxe_drop_ref(mr);
 450
 451        return 0;
 452
 453err2:
 454        if (mr)
 455                rxe_drop_ref(mr);
 456err1:
 457        return err;
 458}
 459
 460int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
 461{
 462        struct rxe_sge          *sge    = &dma->sge[dma->cur_sge];
 463        int                     offset  = dma->sge_offset;
 464        int                     resid   = dma->resid;
 465
 466        while (length) {
 467                unsigned int bytes;
 468
 469                if (offset >= sge->length) {
 470                        sge++;
 471                        dma->cur_sge++;
 472                        offset = 0;
 473                        if (dma->cur_sge >= dma->num_sge)
 474                                return -ENOSPC;
 475                }
 476
 477                bytes = length;
 478
 479                if (bytes > sge->length - offset)
 480                        bytes = sge->length - offset;
 481
 482                offset  += bytes;
 483                resid   -= bytes;
 484                length  -= bytes;
 485        }
 486
 487        dma->sge_offset = offset;
 488        dma->resid      = resid;
 489
 490        return 0;
 491}
 492
 493/* (1) find the mr corresponding to lkey/rkey
 494 *     depending on lookup_type
 495 * (2) verify that the (qp) pd matches the mr pd
 496 * (3) verify that the mr can support the requested access
 497 * (4) verify that mr state is valid
 498 */
 499struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
 500                         enum rxe_mr_lookup_type type)
 501{
 502        struct rxe_mr *mr;
 503        struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
 504        int index = key >> 8;
 505
 506        mr = rxe_pool_get_index(&rxe->mr_pool, index);
 507        if (!mr)
 508                return NULL;
 509
 510        if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) ||
 511                     (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) ||
 512                     mr_pd(mr) != pd || (access && !(access & mr->access)) ||
 513                     mr->state != RXE_MR_STATE_VALID)) {
 514                rxe_drop_ref(mr);
 515                mr = NULL;
 516        }
 517
 518        return mr;
 519}
 520
 521int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
 522{
 523        struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
 524        struct rxe_mr *mr;
 525        int ret;
 526
 527        mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
 528        if (!mr) {
 529                pr_err("%s: No MR for rkey %#x\n", __func__, rkey);
 530                ret = -EINVAL;
 531                goto err;
 532        }
 533
 534        if (rkey != mr->ibmr.rkey) {
 535                pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n",
 536                        __func__, rkey, mr->ibmr.rkey);
 537                ret = -EINVAL;
 538                goto err_drop_ref;
 539        }
 540
 541        if (atomic_read(&mr->num_mw) > 0) {
 542                pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
 543                        __func__);
 544                ret = -EINVAL;
 545                goto err_drop_ref;
 546        }
 547
 548        mr->state = RXE_MR_STATE_FREE;
 549        ret = 0;
 550
 551err_drop_ref:
 552        rxe_drop_ref(mr);
 553err:
 554        return ret;
 555}
 556
 557int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 558{
 559        struct rxe_mr *mr = to_rmr(ibmr);
 560
 561        if (atomic_read(&mr->num_mw) > 0) {
 562                pr_warn("%s: Attempt to deregister an MR while bound to MWs\n",
 563                        __func__);
 564                return -EINVAL;
 565        }
 566
 567        mr->state = RXE_MR_STATE_ZOMBIE;
 568        rxe_drop_ref(mr_pd(mr));
 569        rxe_drop_index(mr);
 570        rxe_drop_ref(mr);
 571
 572        return 0;
 573}
 574
 575void rxe_mr_cleanup(struct rxe_pool_entry *arg)
 576{
 577        struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
 578        int i;
 579
 580        ib_umem_release(mr->umem);
 581
 582        if (mr->map) {
 583                for (i = 0; i < mr->num_map; i++)
 584                        kfree(mr->map[i]);
 585
 586                kfree(mr->map);
 587        }
 588}
 589