linux/drivers/infiniband/sw/siw/siw_mem.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
   2
   3/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
   4/* Copyright (c) 2008-2019, IBM Corporation */
   5
   6#include <linux/gfp.h>
   7#include <rdma/ib_verbs.h>
   8#include <linux/dma-mapping.h>
   9#include <linux/slab.h>
  10#include <linux/sched/mm.h>
  11#include <linux/resource.h>
  12
  13#include "siw.h"
  14#include "siw_mem.h"
  15
  16/*
  17 * Stag lookup is based on its index part only (24 bits).
  18 * The code avoids special Stag of zero and tries to randomize
  19 * STag values between 1 and SIW_STAG_MAX_INDEX.
  20 */
  21int siw_mem_add(struct siw_device *sdev, struct siw_mem *m)
  22{
  23        struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
  24        u32 id, next;
  25
  26        get_random_bytes(&next, 4);
  27        next &= 0x00ffffff;
  28
  29        if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next,
  30            GFP_KERNEL) < 0)
  31                return -ENOMEM;
  32
  33        /* Set the STag index part */
  34        m->stag = id << 8;
  35
  36        siw_dbg_mem(m, "new MEM object\n");
  37
  38        return 0;
  39}
  40
  41/*
  42 * siw_mem_id2obj()
  43 *
  44 * resolves memory from stag given by id. might be called from:
  45 * o process context before sending out of sgl, or
  46 * o in softirq when resolving target memory
  47 */
  48struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
  49{
  50        struct siw_mem *mem;
  51
  52        rcu_read_lock();
  53        mem = xa_load(&sdev->mem_xa, stag_index);
  54        if (likely(mem && kref_get_unless_zero(&mem->ref))) {
  55                rcu_read_unlock();
  56                return mem;
  57        }
  58        rcu_read_unlock();
  59
  60        return NULL;
  61}
  62
  63static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
  64                           bool dirty)
  65{
  66        struct page **p = chunk->plist;
  67
  68        while (num_pages--) {
  69                if (!PageDirty(*p) && dirty)
  70                        put_user_pages_dirty_lock(p, 1);
  71                else
  72                        put_user_page(*p);
  73                p++;
  74        }
  75}
  76
  77void siw_umem_release(struct siw_umem *umem, bool dirty)
  78{
  79        struct mm_struct *mm_s = umem->owning_mm;
  80        int i, num_pages = umem->num_pages;
  81
  82        for (i = 0; num_pages; i++) {
  83                int to_free = min_t(int, PAGES_PER_CHUNK, num_pages);
  84
  85                siw_free_plist(&umem->page_chunk[i], to_free,
  86                               umem->writable && dirty);
  87                kfree(umem->page_chunk[i].plist);
  88                num_pages -= to_free;
  89        }
  90        atomic64_sub(umem->num_pages, &mm_s->pinned_vm);
  91
  92        mmdrop(mm_s);
  93        kfree(umem->page_chunk);
  94        kfree(umem);
  95}
  96
  97int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
  98                   u64 start, u64 len, int rights)
  99{
 100        struct siw_device *sdev = to_siw_dev(pd->device);
 101        struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
 102        struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
 103        u32 id, next;
 104
 105        if (!mem)
 106                return -ENOMEM;
 107
 108        mem->mem_obj = mem_obj;
 109        mem->stag_valid = 0;
 110        mem->sdev = sdev;
 111        mem->va = start;
 112        mem->len = len;
 113        mem->pd = pd;
 114        mem->perms = rights & IWARP_ACCESS_MASK;
 115        kref_init(&mem->ref);
 116
 117        mr->mem = mem;
 118
 119        get_random_bytes(&next, 4);
 120        next &= 0x00ffffff;
 121
 122        if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next,
 123            GFP_KERNEL) < 0) {
 124                kfree(mem);
 125                return -ENOMEM;
 126        }
 127        /* Set the STag index part */
 128        mem->stag = id << 8;
 129        mr->base_mr.lkey = mr->base_mr.rkey = mem->stag;
 130
 131        return 0;
 132}
 133
 134void siw_mr_drop_mem(struct siw_mr *mr)
 135{
 136        struct siw_mem *mem = mr->mem, *found;
 137
 138        mem->stag_valid = 0;
 139
 140        /* make STag invalid visible asap */
 141        smp_mb();
 142
 143        found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8);
 144        WARN_ON(found != mem);
 145        siw_mem_put(mem);
 146}
 147
 148void siw_free_mem(struct kref *ref)
 149{
 150        struct siw_mem *mem = container_of(ref, struct siw_mem, ref);
 151
 152        siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n");
 153
 154        if (!mem->is_mw && mem->mem_obj) {
 155                if (mem->is_pbl == 0)
 156                        siw_umem_release(mem->umem, true);
 157                else
 158                        kfree(mem->pbl);
 159        }
 160        kfree(mem);
 161}
 162
 163/*
 164 * siw_check_mem()
 165 *
 166 * Check protection domain, STAG state, access permissions and
 167 * address range for memory object.
 168 *
 169 * @pd:         Protection Domain memory should belong to
 170 * @mem:        memory to be checked
 171 * @addr:       starting addr of mem
 172 * @perms:      requested access permissions
 173 * @len:        len of memory interval to be checked
 174 *
 175 */
 176int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
 177                  enum ib_access_flags perms, int len)
 178{
 179        if (!mem->stag_valid) {
 180                siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag);
 181                return -E_STAG_INVALID;
 182        }
 183        if (mem->pd != pd) {
 184                siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag);
 185                return -E_PD_MISMATCH;
 186        }
 187        /*
 188         * check access permissions
 189         */
 190        if ((mem->perms & perms) < perms) {
 191                siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n",
 192                           mem->perms, perms);
 193                return -E_ACCESS_PERM;
 194        }
 195        /*
 196         * Check if access falls into valid memory interval.
 197         */
 198        if (addr < mem->va || addr + len > mem->va + mem->len) {
 199                siw_dbg_pd(pd, "MEM interval len %d\n", len);
 200                siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n",
 201                           (void *)(uintptr_t)addr,
 202                           (void *)(uintptr_t)(addr + len));
 203                siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n",
 204                           (void *)(uintptr_t)mem->va,
 205                           (void *)(uintptr_t)(mem->va + mem->len),
 206                           mem->stag);
 207
 208                return -E_BASE_BOUNDS;
 209        }
 210        return E_ACCESS_OK;
 211}
 212
 213/*
 214 * siw_check_sge()
 215 *
 216 * Check SGE for access rights in given interval
 217 *
 218 * @pd:         Protection Domain memory should belong to
 219 * @sge:        SGE to be checked
 220 * @mem:        location of memory reference within array
 221 * @perms:      requested access permissions
 222 * @off:        starting offset in SGE
 223 * @len:        len of memory interval to be checked
 224 *
 225 * NOTE: Function references SGE's memory object (mem->obj)
 226 * if not yet done. New reference is kept if check went ok and
 227 * released if check failed. If mem->obj is already valid, no new
 228 * lookup is being done and mem is not released it check fails.
 229 */
 230int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[],
 231                  enum ib_access_flags perms, u32 off, int len)
 232{
 233        struct siw_device *sdev = to_siw_dev(pd->device);
 234        struct siw_mem *new = NULL;
 235        int rv = E_ACCESS_OK;
 236
 237        if (len + off > sge->length) {
 238                rv = -E_BASE_BOUNDS;
 239                goto fail;
 240        }
 241        if (*mem == NULL) {
 242                new = siw_mem_id2obj(sdev, sge->lkey >> 8);
 243                if (unlikely(!new)) {
 244                        siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey);
 245                        rv = -E_STAG_INVALID;
 246                        goto fail;
 247                }
 248                *mem = new;
 249        }
 250        /* Check if user re-registered with different STag key */
 251        if (unlikely((*mem)->stag != sge->lkey)) {
 252                siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey);
 253                rv = -E_STAG_INVALID;
 254                goto fail;
 255        }
 256        rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len);
 257        if (unlikely(rv))
 258                goto fail;
 259
 260        return 0;
 261
 262fail:
 263        if (new) {
 264                *mem = NULL;
 265                siw_mem_put(new);
 266        }
 267        return rv;
 268}
 269
 270void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op)
 271{
 272        switch (op) {
 273        case SIW_OP_SEND:
 274        case SIW_OP_WRITE:
 275        case SIW_OP_SEND_WITH_IMM:
 276        case SIW_OP_SEND_REMOTE_INV:
 277        case SIW_OP_READ:
 278        case SIW_OP_READ_LOCAL_INV:
 279                if (!(wqe->sqe.flags & SIW_WQE_INLINE))
 280                        siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge);
 281                break;
 282
 283        case SIW_OP_RECEIVE:
 284                siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge);
 285                break;
 286
 287        case SIW_OP_READ_RESPONSE:
 288                siw_unref_mem_sgl(wqe->mem, 1);
 289                break;
 290
 291        default:
 292                /*
 293                 * SIW_OP_INVAL_STAG and SIW_OP_REG_MR
 294                 * do not hold memory references
 295                 */
 296                break;
 297        }
 298}
 299
 300int siw_invalidate_stag(struct ib_pd *pd, u32 stag)
 301{
 302        struct siw_device *sdev = to_siw_dev(pd->device);
 303        struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8);
 304        int rv = 0;
 305
 306        if (unlikely(!mem)) {
 307                siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag);
 308                return -EINVAL;
 309        }
 310        if (unlikely(mem->pd != pd)) {
 311                siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag);
 312                rv = -EACCES;
 313                goto out;
 314        }
 315        /*
 316         * Per RDMA verbs definition, an STag may already be in invalid
 317         * state if invalidation is requested. So no state check here.
 318         */
 319        mem->stag_valid = 0;
 320
 321        siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag);
 322out:
 323        siw_mem_put(mem);
 324        return rv;
 325}
 326
 327/*
 328 * Gets physical address backed by PBL element. Address is referenced
 329 * by linear byte offset into list of variably sized PB elements.
 330 * Optionally, provides remaining len within current element, and
 331 * current PBL index for later resume at same element.
 332 */
 333dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx)
 334{
 335        int i = idx ? *idx : 0;
 336
 337        while (i < pbl->num_buf) {
 338                struct siw_pble *pble = &pbl->pbe[i];
 339
 340                if (pble->pbl_off + pble->size > off) {
 341                        u64 pble_off = off - pble->pbl_off;
 342
 343                        if (len)
 344                                *len = pble->size - pble_off;
 345                        if (idx)
 346                                *idx = i;
 347
 348                        return pble->addr + pble_off;
 349                }
 350                i++;
 351        }
 352        if (len)
 353                *len = 0;
 354        return 0;
 355}
 356
 357struct siw_pbl *siw_pbl_alloc(u32 num_buf)
 358{
 359        struct siw_pbl *pbl;
 360        int buf_size = sizeof(*pbl);
 361
 362        if (num_buf == 0)
 363                return ERR_PTR(-EINVAL);
 364
 365        buf_size += ((num_buf - 1) * sizeof(struct siw_pble));
 366
 367        pbl = kzalloc(buf_size, GFP_KERNEL);
 368        if (!pbl)
 369                return ERR_PTR(-ENOMEM);
 370
 371        pbl->max_buf = num_buf;
 372
 373        return pbl;
 374}
 375
 376struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
 377{
 378        struct siw_umem *umem;
 379        struct mm_struct *mm_s;
 380        u64 first_page_va;
 381        unsigned long mlock_limit;
 382        unsigned int foll_flags = FOLL_WRITE;
 383        int num_pages, num_chunks, i, rv = 0;
 384
 385        if (!can_do_mlock())
 386                return ERR_PTR(-EPERM);
 387
 388        if (!len)
 389                return ERR_PTR(-EINVAL);
 390
 391        first_page_va = start & PAGE_MASK;
 392        num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
 393        num_chunks = (num_pages >> CHUNK_SHIFT) + 1;
 394
 395        umem = kzalloc(sizeof(*umem), GFP_KERNEL);
 396        if (!umem)
 397                return ERR_PTR(-ENOMEM);
 398
 399        mm_s = current->mm;
 400        umem->owning_mm = mm_s;
 401        umem->writable = writable;
 402
 403        mmgrab(mm_s);
 404
 405        if (!writable)
 406                foll_flags |= FOLL_FORCE;
 407
 408        down_read(&mm_s->mmap_sem);
 409
 410        mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 411
 412        if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
 413                rv = -ENOMEM;
 414                goto out_sem_up;
 415        }
 416        umem->fp_addr = first_page_va;
 417
 418        umem->page_chunk =
 419                kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
 420        if (!umem->page_chunk) {
 421                rv = -ENOMEM;
 422                goto out_sem_up;
 423        }
 424        for (i = 0; num_pages; i++) {
 425                int got, nents = min_t(int, num_pages, PAGES_PER_CHUNK);
 426
 427                umem->page_chunk[i].plist =
 428                        kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
 429                if (!umem->page_chunk[i].plist) {
 430                        rv = -ENOMEM;
 431                        goto out_sem_up;
 432                }
 433                got = 0;
 434                while (nents) {
 435                        struct page **plist = &umem->page_chunk[i].plist[got];
 436
 437                        rv = get_user_pages(first_page_va, nents,
 438                                            foll_flags | FOLL_LONGTERM,
 439                                            plist, NULL);
 440                        if (rv < 0)
 441                                goto out_sem_up;
 442
 443                        umem->num_pages += rv;
 444                        atomic64_add(rv, &mm_s->pinned_vm);
 445                        first_page_va += rv * PAGE_SIZE;
 446                        nents -= rv;
 447                        got += rv;
 448                }
 449                num_pages -= got;
 450        }
 451out_sem_up:
 452        up_read(&mm_s->mmap_sem);
 453
 454        if (rv > 0)
 455                return umem;
 456
 457        siw_umem_release(umem, false);
 458
 459        return ERR_PTR(rv);
 460}
 461