linux/drivers/infiniband/sw/rxe/rxe_mr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
   3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33
  34#include "rxe.h"
  35#include "rxe_loc.h"
  36
  37/*
  38 * lfsr (linear feedback shift register) with period 255
  39 */
  40static u8 rxe_get_key(void)
  41{
  42        static u32 key = 1;
  43
  44        key = key << 1;
  45
  46        key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10))
  47                ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40));
  48
  49        key &= 0xff;
  50
  51        return key;
  52}
  53
  54int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length)
  55{
  56        switch (mem->type) {
  57        case RXE_MEM_TYPE_DMA:
  58                return 0;
  59
  60        case RXE_MEM_TYPE_MR:
  61        case RXE_MEM_TYPE_FMR:
  62                if (iova < mem->iova ||
  63                    length > mem->length ||
  64                    iova > mem->iova + mem->length - length)
  65                        return -EFAULT;
  66                return 0;
  67
  68        default:
  69                return -EFAULT;
  70        }
  71}
  72
  73#define IB_ACCESS_REMOTE        (IB_ACCESS_REMOTE_READ          \
  74                                | IB_ACCESS_REMOTE_WRITE        \
  75                                | IB_ACCESS_REMOTE_ATOMIC)
  76
  77static void rxe_mem_init(int access, struct rxe_mem *mem)
  78{
  79        u32 lkey = mem->pelem.index << 8 | rxe_get_key();
  80        u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
  81
  82        if (mem->pelem.pool->type == RXE_TYPE_MR) {
  83                mem->ibmr.lkey          = lkey;
  84                mem->ibmr.rkey          = rkey;
  85        }
  86
  87        mem->lkey               = lkey;
  88        mem->rkey               = rkey;
  89        mem->state              = RXE_MEM_STATE_INVALID;
  90        mem->type               = RXE_MEM_TYPE_NONE;
  91        mem->map_shift          = ilog2(RXE_BUF_PER_MAP);
  92}
  93
  94void rxe_mem_cleanup(struct rxe_pool_entry *arg)
  95{
  96        struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem);
  97        int i;
  98
  99        ib_umem_release(mem->umem);
 100
 101        if (mem->map) {
 102                for (i = 0; i < mem->num_map; i++)
 103                        kfree(mem->map[i]);
 104
 105                kfree(mem->map);
 106        }
 107}
 108
 109static int rxe_mem_alloc(struct rxe_mem *mem, int num_buf)
 110{
 111        int i;
 112        int num_map;
 113        struct rxe_map **map = mem->map;
 114
 115        num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
 116
 117        mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
 118        if (!mem->map)
 119                goto err1;
 120
 121        for (i = 0; i < num_map; i++) {
 122                mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
 123                if (!mem->map[i])
 124                        goto err2;
 125        }
 126
 127        BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
 128
 129        mem->map_shift  = ilog2(RXE_BUF_PER_MAP);
 130        mem->map_mask   = RXE_BUF_PER_MAP - 1;
 131
 132        mem->num_buf = num_buf;
 133        mem->num_map = num_map;
 134        mem->max_buf = num_map * RXE_BUF_PER_MAP;
 135
 136        return 0;
 137
 138err2:
 139        for (i--; i >= 0; i--)
 140                kfree(mem->map[i]);
 141
 142        kfree(mem->map);
 143err1:
 144        return -ENOMEM;
 145}
 146
 147int rxe_mem_init_dma(struct rxe_pd *pd,
 148                     int access, struct rxe_mem *mem)
 149{
 150        rxe_mem_init(access, mem);
 151
 152        mem->pd                 = pd;
 153        mem->access             = access;
 154        mem->state              = RXE_MEM_STATE_VALID;
 155        mem->type               = RXE_MEM_TYPE_DMA;
 156
 157        return 0;
 158}
 159
 160int rxe_mem_init_user(struct rxe_pd *pd, u64 start,
 161                      u64 length, u64 iova, int access, struct ib_udata *udata,
 162                      struct rxe_mem *mem)
 163{
 164        struct rxe_map          **map;
 165        struct rxe_phys_buf     *buf = NULL;
 166        struct ib_umem          *umem;
 167        struct sg_page_iter     sg_iter;
 168        int                     num_buf;
 169        void                    *vaddr;
 170        int err;
 171
 172        umem = ib_umem_get(udata, start, length, access, 0);
 173        if (IS_ERR(umem)) {
 174                pr_warn("err %d from rxe_umem_get\n",
 175                        (int)PTR_ERR(umem));
 176                err = -EINVAL;
 177                goto err1;
 178        }
 179
 180        mem->umem = umem;
 181        num_buf = ib_umem_num_pages(umem);
 182
 183        rxe_mem_init(access, mem);
 184
 185        err = rxe_mem_alloc(mem, num_buf);
 186        if (err) {
 187                pr_warn("err %d from rxe_mem_alloc\n", err);
 188                ib_umem_release(umem);
 189                goto err1;
 190        }
 191
 192        mem->page_shift         = PAGE_SHIFT;
 193        mem->page_mask = PAGE_SIZE - 1;
 194
 195        num_buf                 = 0;
 196        map                     = mem->map;
 197        if (length > 0) {
 198                buf = map[0]->buf;
 199
 200                for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
 201                        if (num_buf >= RXE_BUF_PER_MAP) {
 202                                map++;
 203                                buf = map[0]->buf;
 204                                num_buf = 0;
 205                        }
 206
 207                        vaddr = page_address(sg_page_iter_page(&sg_iter));
 208                        if (!vaddr) {
 209                                pr_warn("null vaddr\n");
 210                                err = -ENOMEM;
 211                                goto err1;
 212                        }
 213
 214                        buf->addr = (uintptr_t)vaddr;
 215                        buf->size = PAGE_SIZE;
 216                        num_buf++;
 217                        buf++;
 218
 219                }
 220        }
 221
 222        mem->pd                 = pd;
 223        mem->umem               = umem;
 224        mem->access             = access;
 225        mem->length             = length;
 226        mem->iova               = iova;
 227        mem->va                 = start;
 228        mem->offset             = ib_umem_offset(umem);
 229        mem->state              = RXE_MEM_STATE_VALID;
 230        mem->type               = RXE_MEM_TYPE_MR;
 231
 232        return 0;
 233
 234err1:
 235        return err;
 236}
 237
 238int rxe_mem_init_fast(struct rxe_pd *pd,
 239                      int max_pages, struct rxe_mem *mem)
 240{
 241        int err;
 242
 243        rxe_mem_init(0, mem);
 244
 245        /* In fastreg, we also set the rkey */
 246        mem->ibmr.rkey = mem->ibmr.lkey;
 247
 248        err = rxe_mem_alloc(mem, max_pages);
 249        if (err)
 250                goto err1;
 251
 252        mem->pd                 = pd;
 253        mem->max_buf            = max_pages;
 254        mem->state              = RXE_MEM_STATE_FREE;
 255        mem->type               = RXE_MEM_TYPE_MR;
 256
 257        return 0;
 258
 259err1:
 260        return err;
 261}
 262
 263static void lookup_iova(
 264        struct rxe_mem  *mem,
 265        u64                     iova,
 266        int                     *m_out,
 267        int                     *n_out,
 268        size_t                  *offset_out)
 269{
 270        size_t                  offset = iova - mem->iova + mem->offset;
 271        int                     map_index;
 272        int                     buf_index;
 273        u64                     length;
 274
 275        if (likely(mem->page_shift)) {
 276                *offset_out = offset & mem->page_mask;
 277                offset >>= mem->page_shift;
 278                *n_out = offset & mem->map_mask;
 279                *m_out = offset >> mem->map_shift;
 280        } else {
 281                map_index = 0;
 282                buf_index = 0;
 283
 284                length = mem->map[map_index]->buf[buf_index].size;
 285
 286                while (offset >= length) {
 287                        offset -= length;
 288                        buf_index++;
 289
 290                        if (buf_index == RXE_BUF_PER_MAP) {
 291                                map_index++;
 292                                buf_index = 0;
 293                        }
 294                        length = mem->map[map_index]->buf[buf_index].size;
 295                }
 296
 297                *m_out = map_index;
 298                *n_out = buf_index;
 299                *offset_out = offset;
 300        }
 301}
 302
 303void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length)
 304{
 305        size_t offset;
 306        int m, n;
 307        void *addr;
 308
 309        if (mem->state != RXE_MEM_STATE_VALID) {
 310                pr_warn("mem not in valid state\n");
 311                addr = NULL;
 312                goto out;
 313        }
 314
 315        if (!mem->map) {
 316                addr = (void *)(uintptr_t)iova;
 317                goto out;
 318        }
 319
 320        if (mem_check_range(mem, iova, length)) {
 321                pr_warn("range violation\n");
 322                addr = NULL;
 323                goto out;
 324        }
 325
 326        lookup_iova(mem, iova, &m, &n, &offset);
 327
 328        if (offset + length > mem->map[m]->buf[n].size) {
 329                pr_warn("crosses page boundary\n");
 330                addr = NULL;
 331                goto out;
 332        }
 333
 334        addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset;
 335
 336out:
 337        return addr;
 338}
 339
 340/* copy data from a range (vaddr, vaddr+length-1) to or from
 341 * a mem object starting at iova. Compute incremental value of
 342 * crc32 if crcp is not zero. caller must hold a reference to mem
 343 */
 344int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
 345                 enum copy_direction dir, u32 *crcp)
 346{
 347        int                     err;
 348        int                     bytes;
 349        u8                      *va;
 350        struct rxe_map          **map;
 351        struct rxe_phys_buf     *buf;
 352        int                     m;
 353        int                     i;
 354        size_t                  offset;
 355        u32                     crc = crcp ? (*crcp) : 0;
 356
 357        if (length == 0)
 358                return 0;
 359
 360        if (mem->type == RXE_MEM_TYPE_DMA) {
 361                u8 *src, *dest;
 362
 363                src  = (dir == to_mem_obj) ?
 364                        addr : ((void *)(uintptr_t)iova);
 365
 366                dest = (dir == to_mem_obj) ?
 367                        ((void *)(uintptr_t)iova) : addr;
 368
 369                memcpy(dest, src, length);
 370
 371                if (crcp)
 372                        *crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device),
 373                                        *crcp, dest, length);
 374
 375                return 0;
 376        }
 377
 378        WARN_ON_ONCE(!mem->map);
 379
 380        err = mem_check_range(mem, iova, length);
 381        if (err) {
 382                err = -EFAULT;
 383                goto err1;
 384        }
 385
 386        lookup_iova(mem, iova, &m, &i, &offset);
 387
 388        map     = mem->map + m;
 389        buf     = map[0]->buf + i;
 390
 391        while (length > 0) {
 392                u8 *src, *dest;
 393
 394                va      = (u8 *)(uintptr_t)buf->addr + offset;
 395                src  = (dir == to_mem_obj) ? addr : va;
 396                dest = (dir == to_mem_obj) ? va : addr;
 397
 398                bytes   = buf->size - offset;
 399
 400                if (bytes > length)
 401                        bytes = length;
 402
 403                memcpy(dest, src, bytes);
 404
 405                if (crcp)
 406                        crc = rxe_crc32(to_rdev(mem->pd->ibpd.device),
 407                                        crc, dest, bytes);
 408
 409                length  -= bytes;
 410                addr    += bytes;
 411
 412                offset  = 0;
 413                buf++;
 414                i++;
 415
 416                if (i == RXE_BUF_PER_MAP) {
 417                        i = 0;
 418                        map++;
 419                        buf = map[0]->buf;
 420                }
 421        }
 422
 423        if (crcp)
 424                *crcp = crc;
 425
 426        return 0;
 427
 428err1:
 429        return err;
 430}
 431
 432/* copy data in or out of a wqe, i.e. sg list
 433 * under the control of a dma descriptor
 434 */
 435int copy_data(
 436        struct rxe_pd           *pd,
 437        int                     access,
 438        struct rxe_dma_info     *dma,
 439        void                    *addr,
 440        int                     length,
 441        enum copy_direction     dir,
 442        u32                     *crcp)
 443{
 444        int                     bytes;
 445        struct rxe_sge          *sge    = &dma->sge[dma->cur_sge];
 446        int                     offset  = dma->sge_offset;
 447        int                     resid   = dma->resid;
 448        struct rxe_mem          *mem    = NULL;
 449        u64                     iova;
 450        int                     err;
 451
 452        if (length == 0)
 453                return 0;
 454
 455        if (length > resid) {
 456                err = -EINVAL;
 457                goto err2;
 458        }
 459
 460        if (sge->length && (offset < sge->length)) {
 461                mem = lookup_mem(pd, access, sge->lkey, lookup_local);
 462                if (!mem) {
 463                        err = -EINVAL;
 464                        goto err1;
 465                }
 466        }
 467
 468        while (length > 0) {
 469                bytes = length;
 470
 471                if (offset >= sge->length) {
 472                        if (mem) {
 473                                rxe_drop_ref(mem);
 474                                mem = NULL;
 475                        }
 476                        sge++;
 477                        dma->cur_sge++;
 478                        offset = 0;
 479
 480                        if (dma->cur_sge >= dma->num_sge) {
 481                                err = -ENOSPC;
 482                                goto err2;
 483                        }
 484
 485                        if (sge->length) {
 486                                mem = lookup_mem(pd, access, sge->lkey,
 487                                                 lookup_local);
 488                                if (!mem) {
 489                                        err = -EINVAL;
 490                                        goto err1;
 491                                }
 492                        } else {
 493                                continue;
 494                        }
 495                }
 496
 497                if (bytes > sge->length - offset)
 498                        bytes = sge->length - offset;
 499
 500                if (bytes > 0) {
 501                        iova = sge->addr + offset;
 502
 503                        err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp);
 504                        if (err)
 505                                goto err2;
 506
 507                        offset  += bytes;
 508                        resid   -= bytes;
 509                        length  -= bytes;
 510                        addr    += bytes;
 511                }
 512        }
 513
 514        dma->sge_offset = offset;
 515        dma->resid      = resid;
 516
 517        if (mem)
 518                rxe_drop_ref(mem);
 519
 520        return 0;
 521
 522err2:
 523        if (mem)
 524                rxe_drop_ref(mem);
 525err1:
 526        return err;
 527}
 528
 529int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
 530{
 531        struct rxe_sge          *sge    = &dma->sge[dma->cur_sge];
 532        int                     offset  = dma->sge_offset;
 533        int                     resid   = dma->resid;
 534
 535        while (length) {
 536                unsigned int bytes;
 537
 538                if (offset >= sge->length) {
 539                        sge++;
 540                        dma->cur_sge++;
 541                        offset = 0;
 542                        if (dma->cur_sge >= dma->num_sge)
 543                                return -ENOSPC;
 544                }
 545
 546                bytes = length;
 547
 548                if (bytes > sge->length - offset)
 549                        bytes = sge->length - offset;
 550
 551                offset  += bytes;
 552                resid   -= bytes;
 553                length  -= bytes;
 554        }
 555
 556        dma->sge_offset = offset;
 557        dma->resid      = resid;
 558
 559        return 0;
 560}
 561
 562/* (1) find the mem (mr or mw) corresponding to lkey/rkey
 563 *     depending on lookup_type
 564 * (2) verify that the (qp) pd matches the mem pd
 565 * (3) verify that the mem can support the requested access
 566 * (4) verify that mem state is valid
 567 */
 568struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
 569                           enum lookup_type type)
 570{
 571        struct rxe_mem *mem;
 572        struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
 573        int index = key >> 8;
 574
 575        mem = rxe_pool_get_index(&rxe->mr_pool, index);
 576        if (!mem)
 577                return NULL;
 578
 579        if (unlikely((type == lookup_local && mem->lkey != key) ||
 580                     (type == lookup_remote && mem->rkey != key) ||
 581                     mem->pd != pd ||
 582                     (access && !(access & mem->access)) ||
 583                     mem->state != RXE_MEM_STATE_VALID)) {
 584                rxe_drop_ref(mem);
 585                mem = NULL;
 586        }
 587
 588        return mem;
 589}
 590
 591int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,
 592                      u64 *page, int num_pages, u64 iova)
 593{
 594        int i;
 595        int num_buf;
 596        int err;
 597        struct rxe_map **map;
 598        struct rxe_phys_buf *buf;
 599        int page_size;
 600
 601        if (num_pages > mem->max_buf) {
 602                err = -EINVAL;
 603                goto err1;
 604        }
 605
 606        num_buf         = 0;
 607        page_size       = 1 << mem->page_shift;
 608        map             = mem->map;
 609        buf             = map[0]->buf;
 610
 611        for (i = 0; i < num_pages; i++) {
 612                buf->addr = *page++;
 613                buf->size = page_size;
 614                buf++;
 615                num_buf++;
 616
 617                if (num_buf == RXE_BUF_PER_MAP) {
 618                        map++;
 619                        buf = map[0]->buf;
 620                        num_buf = 0;
 621                }
 622        }
 623
 624        mem->iova       = iova;
 625        mem->va         = iova;
 626        mem->length     = num_pages << mem->page_shift;
 627        mem->state      = RXE_MEM_STATE_VALID;
 628
 629        return 0;
 630
 631err1:
 632        return err;
 633}
 634