linux/drivers/infiniband/sw/rxe/rxe_mr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
   4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
   5 */
   6
   7#include "rxe.h"
   8#include "rxe_loc.h"
   9
  10/*
  11 * lfsr (linear feedback shift register) with period 255
  12 */
  13static u8 rxe_get_key(void)
  14{
  15        static u32 key = 1;
  16
  17        key = key << 1;
  18
  19        key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10))
  20                ^ (0 != (key & 0x80)) ^ (0 != (key & 0x40));
  21
  22        key &= 0xff;
  23
  24        return key;
  25}
  26
  27int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length)
  28{
  29        switch (mem->type) {
  30        case RXE_MEM_TYPE_DMA:
  31                return 0;
  32
  33        case RXE_MEM_TYPE_MR:
  34                if (iova < mem->iova ||
  35                    length > mem->length ||
  36                    iova > mem->iova + mem->length - length)
  37                        return -EFAULT;
  38                return 0;
  39
  40        default:
  41                return -EFAULT;
  42        }
  43}
  44
  45#define IB_ACCESS_REMOTE        (IB_ACCESS_REMOTE_READ          \
  46                                | IB_ACCESS_REMOTE_WRITE        \
  47                                | IB_ACCESS_REMOTE_ATOMIC)
  48
  49static void rxe_mem_init(int access, struct rxe_mem *mem)
  50{
  51        u32 lkey = mem->pelem.index << 8 | rxe_get_key();
  52        u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
  53
  54        mem->ibmr.lkey          = lkey;
  55        mem->ibmr.rkey          = rkey;
  56        mem->state              = RXE_MEM_STATE_INVALID;
  57        mem->type               = RXE_MEM_TYPE_NONE;
  58        mem->map_shift          = ilog2(RXE_BUF_PER_MAP);
  59}
  60
  61void rxe_mem_cleanup(struct rxe_pool_entry *arg)
  62{
  63        struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem);
  64        int i;
  65
  66        ib_umem_release(mem->umem);
  67
  68        if (mem->map) {
  69                for (i = 0; i < mem->num_map; i++)
  70                        kfree(mem->map[i]);
  71
  72                kfree(mem->map);
  73        }
  74}
  75
  76static int rxe_mem_alloc(struct rxe_mem *mem, int num_buf)
  77{
  78        int i;
  79        int num_map;
  80        struct rxe_map **map = mem->map;
  81
  82        num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
  83
  84        mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
  85        if (!mem->map)
  86                goto err1;
  87
  88        for (i = 0; i < num_map; i++) {
  89                mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
  90                if (!mem->map[i])
  91                        goto err2;
  92        }
  93
  94        BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
  95
  96        mem->map_shift  = ilog2(RXE_BUF_PER_MAP);
  97        mem->map_mask   = RXE_BUF_PER_MAP - 1;
  98
  99        mem->num_buf = num_buf;
 100        mem->num_map = num_map;
 101        mem->max_buf = num_map * RXE_BUF_PER_MAP;
 102
 103        return 0;
 104
 105err2:
 106        for (i--; i >= 0; i--)
 107                kfree(mem->map[i]);
 108
 109        kfree(mem->map);
 110err1:
 111        return -ENOMEM;
 112}
 113
 114void rxe_mem_init_dma(struct rxe_pd *pd,
 115                      int access, struct rxe_mem *mem)
 116{
 117        rxe_mem_init(access, mem);
 118
 119        mem->ibmr.pd            = &pd->ibpd;
 120        mem->access             = access;
 121        mem->state              = RXE_MEM_STATE_VALID;
 122        mem->type               = RXE_MEM_TYPE_DMA;
 123}
 124
 125int rxe_mem_init_user(struct rxe_pd *pd, u64 start,
 126                      u64 length, u64 iova, int access, struct ib_udata *udata,
 127                      struct rxe_mem *mem)
 128{
 129        struct rxe_map          **map;
 130        struct rxe_phys_buf     *buf = NULL;
 131        struct ib_umem          *umem;
 132        struct sg_page_iter     sg_iter;
 133        int                     num_buf;
 134        void                    *vaddr;
 135        int err;
 136
 137        umem = ib_umem_get(pd->ibpd.device, start, length, access);
 138        if (IS_ERR(umem)) {
 139                pr_warn("err %d from rxe_umem_get\n",
 140                        (int)PTR_ERR(umem));
 141                err = -EINVAL;
 142                goto err1;
 143        }
 144
 145        mem->umem = umem;
 146        num_buf = ib_umem_num_pages(umem);
 147
 148        rxe_mem_init(access, mem);
 149
 150        err = rxe_mem_alloc(mem, num_buf);
 151        if (err) {
 152                pr_warn("err %d from rxe_mem_alloc\n", err);
 153                ib_umem_release(umem);
 154                goto err1;
 155        }
 156
 157        mem->page_shift         = PAGE_SHIFT;
 158        mem->page_mask = PAGE_SIZE - 1;
 159
 160        num_buf                 = 0;
 161        map                     = mem->map;
 162        if (length > 0) {
 163                buf = map[0]->buf;
 164
 165                for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
 166                        if (num_buf >= RXE_BUF_PER_MAP) {
 167                                map++;
 168                                buf = map[0]->buf;
 169                                num_buf = 0;
 170                        }
 171
 172                        vaddr = page_address(sg_page_iter_page(&sg_iter));
 173                        if (!vaddr) {
 174                                pr_warn("null vaddr\n");
 175                                ib_umem_release(umem);
 176                                err = -ENOMEM;
 177                                goto err1;
 178                        }
 179
 180                        buf->addr = (uintptr_t)vaddr;
 181                        buf->size = PAGE_SIZE;
 182                        num_buf++;
 183                        buf++;
 184
 185                }
 186        }
 187
 188        mem->ibmr.pd            = &pd->ibpd;
 189        mem->umem               = umem;
 190        mem->access             = access;
 191        mem->length             = length;
 192        mem->iova               = iova;
 193        mem->va                 = start;
 194        mem->offset             = ib_umem_offset(umem);
 195        mem->state              = RXE_MEM_STATE_VALID;
 196        mem->type               = RXE_MEM_TYPE_MR;
 197
 198        return 0;
 199
 200err1:
 201        return err;
 202}
 203
 204int rxe_mem_init_fast(struct rxe_pd *pd,
 205                      int max_pages, struct rxe_mem *mem)
 206{
 207        int err;
 208
 209        rxe_mem_init(0, mem);
 210
 211        /* In fastreg, we also set the rkey */
 212        mem->ibmr.rkey = mem->ibmr.lkey;
 213
 214        err = rxe_mem_alloc(mem, max_pages);
 215        if (err)
 216                goto err1;
 217
 218        mem->ibmr.pd            = &pd->ibpd;
 219        mem->max_buf            = max_pages;
 220        mem->state              = RXE_MEM_STATE_FREE;
 221        mem->type               = RXE_MEM_TYPE_MR;
 222
 223        return 0;
 224
 225err1:
 226        return err;
 227}
 228
 229static void lookup_iova(
 230        struct rxe_mem  *mem,
 231        u64                     iova,
 232        int                     *m_out,
 233        int                     *n_out,
 234        size_t                  *offset_out)
 235{
 236        size_t                  offset = iova - mem->iova + mem->offset;
 237        int                     map_index;
 238        int                     buf_index;
 239        u64                     length;
 240
 241        if (likely(mem->page_shift)) {
 242                *offset_out = offset & mem->page_mask;
 243                offset >>= mem->page_shift;
 244                *n_out = offset & mem->map_mask;
 245                *m_out = offset >> mem->map_shift;
 246        } else {
 247                map_index = 0;
 248                buf_index = 0;
 249
 250                length = mem->map[map_index]->buf[buf_index].size;
 251
 252                while (offset >= length) {
 253                        offset -= length;
 254                        buf_index++;
 255
 256                        if (buf_index == RXE_BUF_PER_MAP) {
 257                                map_index++;
 258                                buf_index = 0;
 259                        }
 260                        length = mem->map[map_index]->buf[buf_index].size;
 261                }
 262
 263                *m_out = map_index;
 264                *n_out = buf_index;
 265                *offset_out = offset;
 266        }
 267}
 268
 269void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length)
 270{
 271        size_t offset;
 272        int m, n;
 273        void *addr;
 274
 275        if (mem->state != RXE_MEM_STATE_VALID) {
 276                pr_warn("mem not in valid state\n");
 277                addr = NULL;
 278                goto out;
 279        }
 280
 281        if (!mem->map) {
 282                addr = (void *)(uintptr_t)iova;
 283                goto out;
 284        }
 285
 286        if (mem_check_range(mem, iova, length)) {
 287                pr_warn("range violation\n");
 288                addr = NULL;
 289                goto out;
 290        }
 291
 292        lookup_iova(mem, iova, &m, &n, &offset);
 293
 294        if (offset + length > mem->map[m]->buf[n].size) {
 295                pr_warn("crosses page boundary\n");
 296                addr = NULL;
 297                goto out;
 298        }
 299
 300        addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset;
 301
 302out:
 303        return addr;
 304}
 305
 306/* copy data from a range (vaddr, vaddr+length-1) to or from
 307 * a mem object starting at iova. Compute incremental value of
 308 * crc32 if crcp is not zero. caller must hold a reference to mem
 309 */
 310int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
 311                 enum copy_direction dir, u32 *crcp)
 312{
 313        int                     err;
 314        int                     bytes;
 315        u8                      *va;
 316        struct rxe_map          **map;
 317        struct rxe_phys_buf     *buf;
 318        int                     m;
 319        int                     i;
 320        size_t                  offset;
 321        u32                     crc = crcp ? (*crcp) : 0;
 322
 323        if (length == 0)
 324                return 0;
 325
 326        if (mem->type == RXE_MEM_TYPE_DMA) {
 327                u8 *src, *dest;
 328
 329                src  = (dir == to_mem_obj) ?
 330                        addr : ((void *)(uintptr_t)iova);
 331
 332                dest = (dir == to_mem_obj) ?
 333                        ((void *)(uintptr_t)iova) : addr;
 334
 335                memcpy(dest, src, length);
 336
 337                if (crcp)
 338                        *crcp = rxe_crc32(to_rdev(mem->ibmr.device),
 339                                        *crcp, dest, length);
 340
 341                return 0;
 342        }
 343
 344        WARN_ON_ONCE(!mem->map);
 345
 346        err = mem_check_range(mem, iova, length);
 347        if (err) {
 348                err = -EFAULT;
 349                goto err1;
 350        }
 351
 352        lookup_iova(mem, iova, &m, &i, &offset);
 353
 354        map     = mem->map + m;
 355        buf     = map[0]->buf + i;
 356
 357        while (length > 0) {
 358                u8 *src, *dest;
 359
 360                va      = (u8 *)(uintptr_t)buf->addr + offset;
 361                src  = (dir == to_mem_obj) ? addr : va;
 362                dest = (dir == to_mem_obj) ? va : addr;
 363
 364                bytes   = buf->size - offset;
 365
 366                if (bytes > length)
 367                        bytes = length;
 368
 369                memcpy(dest, src, bytes);
 370
 371                if (crcp)
 372                        crc = rxe_crc32(to_rdev(mem->ibmr.device),
 373                                        crc, dest, bytes);
 374
 375                length  -= bytes;
 376                addr    += bytes;
 377
 378                offset  = 0;
 379                buf++;
 380                i++;
 381
 382                if (i == RXE_BUF_PER_MAP) {
 383                        i = 0;
 384                        map++;
 385                        buf = map[0]->buf;
 386                }
 387        }
 388
 389        if (crcp)
 390                *crcp = crc;
 391
 392        return 0;
 393
 394err1:
 395        return err;
 396}
 397
 398/* copy data in or out of a wqe, i.e. sg list
 399 * under the control of a dma descriptor
 400 */
 401int copy_data(
 402        struct rxe_pd           *pd,
 403        int                     access,
 404        struct rxe_dma_info     *dma,
 405        void                    *addr,
 406        int                     length,
 407        enum copy_direction     dir,
 408        u32                     *crcp)
 409{
 410        int                     bytes;
 411        struct rxe_sge          *sge    = &dma->sge[dma->cur_sge];
 412        int                     offset  = dma->sge_offset;
 413        int                     resid   = dma->resid;
 414        struct rxe_mem          *mem    = NULL;
 415        u64                     iova;
 416        int                     err;
 417
 418        if (length == 0)
 419                return 0;
 420
 421        if (length > resid) {
 422                err = -EINVAL;
 423                goto err2;
 424        }
 425
 426        if (sge->length && (offset < sge->length)) {
 427                mem = lookup_mem(pd, access, sge->lkey, lookup_local);
 428                if (!mem) {
 429                        err = -EINVAL;
 430                        goto err1;
 431                }
 432        }
 433
 434        while (length > 0) {
 435                bytes = length;
 436
 437                if (offset >= sge->length) {
 438                        if (mem) {
 439                                rxe_drop_ref(mem);
 440                                mem = NULL;
 441                        }
 442                        sge++;
 443                        dma->cur_sge++;
 444                        offset = 0;
 445
 446                        if (dma->cur_sge >= dma->num_sge) {
 447                                err = -ENOSPC;
 448                                goto err2;
 449                        }
 450
 451                        if (sge->length) {
 452                                mem = lookup_mem(pd, access, sge->lkey,
 453                                                 lookup_local);
 454                                if (!mem) {
 455                                        err = -EINVAL;
 456                                        goto err1;
 457                                }
 458                        } else {
 459                                continue;
 460                        }
 461                }
 462
 463                if (bytes > sge->length - offset)
 464                        bytes = sge->length - offset;
 465
 466                if (bytes > 0) {
 467                        iova = sge->addr + offset;
 468
 469                        err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp);
 470                        if (err)
 471                                goto err2;
 472
 473                        offset  += bytes;
 474                        resid   -= bytes;
 475                        length  -= bytes;
 476                        addr    += bytes;
 477                }
 478        }
 479
 480        dma->sge_offset = offset;
 481        dma->resid      = resid;
 482
 483        if (mem)
 484                rxe_drop_ref(mem);
 485
 486        return 0;
 487
 488err2:
 489        if (mem)
 490                rxe_drop_ref(mem);
 491err1:
 492        return err;
 493}
 494
 495int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
 496{
 497        struct rxe_sge          *sge    = &dma->sge[dma->cur_sge];
 498        int                     offset  = dma->sge_offset;
 499        int                     resid   = dma->resid;
 500
 501        while (length) {
 502                unsigned int bytes;
 503
 504                if (offset >= sge->length) {
 505                        sge++;
 506                        dma->cur_sge++;
 507                        offset = 0;
 508                        if (dma->cur_sge >= dma->num_sge)
 509                                return -ENOSPC;
 510                }
 511
 512                bytes = length;
 513
 514                if (bytes > sge->length - offset)
 515                        bytes = sge->length - offset;
 516
 517                offset  += bytes;
 518                resid   -= bytes;
 519                length  -= bytes;
 520        }
 521
 522        dma->sge_offset = offset;
 523        dma->resid      = resid;
 524
 525        return 0;
 526}
 527
 528/* (1) find the mem (mr or mw) corresponding to lkey/rkey
 529 *     depending on lookup_type
 530 * (2) verify that the (qp) pd matches the mem pd
 531 * (3) verify that the mem can support the requested access
 532 * (4) verify that mem state is valid
 533 */
 534struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
 535                           enum lookup_type type)
 536{
 537        struct rxe_mem *mem;
 538        struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
 539        int index = key >> 8;
 540
 541        mem = rxe_pool_get_index(&rxe->mr_pool, index);
 542        if (!mem)
 543                return NULL;
 544
 545        if (unlikely((type == lookup_local && mr_lkey(mem) != key) ||
 546                     (type == lookup_remote && mr_rkey(mem) != key) ||
 547                     mr_pd(mem) != pd ||
 548                     (access && !(access & mem->access)) ||
 549                     mem->state != RXE_MEM_STATE_VALID)) {
 550                rxe_drop_ref(mem);
 551                mem = NULL;
 552        }
 553
 554        return mem;
 555}
 556