linux/drivers/vdpa/mlx5/core/mr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/* Copyright (c) 2020 Mellanox Technologies Ltd. */
   3
   4#include <linux/vhost_types.h>
   5#include <linux/vdpa.h>
   6#include <linux/gcd.h>
   7#include <linux/string.h>
   8#include <linux/mlx5/qp.h>
   9#include "mlx5_vdpa.h"
  10
  11/* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */
  12#define MLX5_DIV_ROUND_UP_POW2(_n, _s) \
  13({ \
  14        u64 __s = _s; \
  15        u64 _res; \
  16        _res = (((_n) + (1 << (__s)) - 1) >> (__s)); \
  17        _res; \
  18})
  19
  20static int get_octo_len(u64 len, int page_shift)
  21{
  22        u64 page_size = 1ULL << page_shift;
  23        int npages;
  24
  25        npages = ALIGN(len, page_size) >> page_shift;
  26        return (npages + 1) / 2;
  27}
  28
  29static void mlx5_set_access_mode(void *mkc, int mode)
  30{
  31        MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
  32        MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2);
  33}
  34
  35static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt)
  36{
  37        struct scatterlist *sg;
  38        int nsg = mr->nsg;
  39        u64 dma_addr;
  40        u64 dma_len;
  41        int j = 0;
  42        int i;
  43
  44        for_each_sg(mr->sg_head.sgl, sg, mr->nent, i) {
  45                for (dma_addr = sg_dma_address(sg), dma_len = sg_dma_len(sg);
  46                     nsg && dma_len;
  47                     nsg--, dma_addr += BIT(mr->log_size), dma_len -= BIT(mr->log_size))
  48                        mtt[j++] = cpu_to_be64(dma_addr);
  49        }
  50}
  51
  52static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
  53{
  54        int inlen;
  55        void *mkc;
  56        void *in;
  57        int err;
  58
  59        inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16);
  60        in = kvzalloc(inlen, GFP_KERNEL);
  61        if (!in)
  62                return -ENOMEM;
  63
  64        MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
  65        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  66        MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO));
  67        MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO));
  68        mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT);
  69        MLX5_SET(mkc, mkc, qpn, 0xffffff);
  70        MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
  71        MLX5_SET64(mkc, mkc, start_addr, mr->offset);
  72        MLX5_SET64(mkc, mkc, len, mr->end - mr->start);
  73        MLX5_SET(mkc, mkc, log_page_size, mr->log_size);
  74        MLX5_SET(mkc, mkc, translations_octword_size,
  75                 get_octo_len(mr->end - mr->start, mr->log_size));
  76        MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
  77                 get_octo_len(mr->end - mr->start, mr->log_size));
  78        populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt));
  79        err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen);
  80        kvfree(in);
  81        if (err) {
  82                mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n");
  83                return err;
  84        }
  85
  86        return 0;
  87}
  88
  89static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
  90{
  91        mlx5_vdpa_destroy_mkey(mvdev, &mr->mr);
  92}
  93
  94static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
  95{
  96        return max_t(u64, map->start, mr->start);
  97}
  98
  99static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
 100{
 101        return min_t(u64, map->last + 1, mr->end);
 102}
 103
 104static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
 105{
 106        return map_end(map, mr) - map_start(map, mr);
 107}
 108
 109#define MLX5_VDPA_INVALID_START_ADDR ((u64)-1)
 110#define MLX5_VDPA_INVALID_LEN ((u64)-1)
 111
 112static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey)
 113{
 114        struct mlx5_vdpa_direct_mr *s;
 115
 116        s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
 117        if (!s)
 118                return MLX5_VDPA_INVALID_START_ADDR;
 119
 120        return s->start;
 121}
 122
 123static u64 indir_len(struct mlx5_vdpa_mr *mkey)
 124{
 125        struct mlx5_vdpa_direct_mr *s;
 126        struct mlx5_vdpa_direct_mr *e;
 127
 128        s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
 129        if (!s)
 130                return MLX5_VDPA_INVALID_LEN;
 131
 132        e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list);
 133
 134        return e->end - s->start;
 135}
 136
 137#define LOG_MAX_KLM_SIZE 30
 138#define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE)
 139
 140static u32 klm_bcount(u64 size)
 141{
 142        return (u32)size;
 143}
 144
 145static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in)
 146{
 147        struct mlx5_vdpa_direct_mr *dmr;
 148        struct mlx5_klm *klmarr;
 149        struct mlx5_klm *klm;
 150        bool first = true;
 151        u64 preve;
 152        int i;
 153
 154        klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
 155        i = 0;
 156        list_for_each_entry(dmr, &mkey->head, list) {
 157again:
 158                klm = &klmarr[i++];
 159                if (first) {
 160                        preve = dmr->start;
 161                        first = false;
 162                }
 163
 164                if (preve == dmr->start) {
 165                        klm->key = cpu_to_be32(dmr->mr.key);
 166                        klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start));
 167                        preve = dmr->end;
 168                } else {
 169                        klm->key = cpu_to_be32(mvdev->res.null_mkey);
 170                        klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve));
 171                        preve = dmr->start;
 172                        goto again;
 173                }
 174        }
 175}
 176
 177static int klm_byte_size(int nklms)
 178{
 179        return 16 * ALIGN(nklms, 4);
 180}
 181
 182static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
 183{
 184        int inlen;
 185        void *mkc;
 186        void *in;
 187        int err;
 188        u64 start;
 189        u64 len;
 190
 191        start = indir_start_addr(mr);
 192        len = indir_len(mr);
 193        if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN)
 194                return -EINVAL;
 195
 196        inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms);
 197        in = kzalloc(inlen, GFP_KERNEL);
 198        if (!in)
 199                return -ENOMEM;
 200
 201        MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
 202        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 203        MLX5_SET(mkc, mkc, lw, 1);
 204        MLX5_SET(mkc, mkc, lr, 1);
 205        mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS);
 206        MLX5_SET(mkc, mkc, qpn, 0xffffff);
 207        MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
 208        MLX5_SET64(mkc, mkc, start_addr, start);
 209        MLX5_SET64(mkc, mkc, len, len);
 210        MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16);
 211        MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms);
 212        fill_indir(mvdev, mr, in);
 213        err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
 214        kfree(in);
 215        return err;
 216}
 217
 218static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey)
 219{
 220        mlx5_vdpa_destroy_mkey(mvdev, &mkey->mkey);
 221}
 222
 223static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr,
 224                         struct vhost_iotlb *iotlb)
 225{
 226        struct vhost_iotlb_map *map;
 227        unsigned long lgcd = 0;
 228        int log_entity_size;
 229        unsigned long size;
 230        u64 start = 0;
 231        int err;
 232        struct page *pg;
 233        unsigned int nsg;
 234        int sglen;
 235        u64 pa;
 236        u64 paend;
 237        struct scatterlist *sg;
 238        struct device *dma = mvdev->vdev.dma_dev;
 239
 240        for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
 241             map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
 242                size = maplen(map, mr);
 243                lgcd = gcd(lgcd, size);
 244                start += size;
 245        }
 246        log_entity_size = ilog2(lgcd);
 247
 248        sglen = 1 << log_entity_size;
 249        nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size);
 250
 251        err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL);
 252        if (err)
 253                return err;
 254
 255        sg = mr->sg_head.sgl;
 256        for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
 257             map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) {
 258                paend = map->addr + maplen(map, mr);
 259                for (pa = map->addr; pa < paend; pa += sglen) {
 260                        pg = pfn_to_page(__phys_to_pfn(pa));
 261                        if (!sg) {
 262                                mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n",
 263                                               map->start, map->last + 1);
 264                                err = -ENOMEM;
 265                                goto err_map;
 266                        }
 267                        sg_set_page(sg, pg, sglen, 0);
 268                        sg = sg_next(sg);
 269                        if (!sg)
 270                                goto done;
 271                }
 272        }
 273done:
 274        mr->log_size = log_entity_size;
 275        mr->nsg = nsg;
 276        mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
 277        if (!mr->nent) {
 278                err = -ENOMEM;
 279                goto err_map;
 280        }
 281
 282        err = create_direct_mr(mvdev, mr);
 283        if (err)
 284                goto err_direct;
 285
 286        return 0;
 287
 288err_direct:
 289        dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
 290err_map:
 291        sg_free_table(&mr->sg_head);
 292        return err;
 293}
 294
 295static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
 296{
 297        struct device *dma = mvdev->vdev.dma_dev;
 298
 299        destroy_direct_mr(mvdev, mr);
 300        dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
 301        sg_free_table(&mr->sg_head);
 302}
 303
 304static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 perm,
 305                            struct vhost_iotlb *iotlb)
 306{
 307        struct mlx5_vdpa_mr *mr = &mvdev->mr;
 308        struct mlx5_vdpa_direct_mr *dmr;
 309        struct mlx5_vdpa_direct_mr *n;
 310        LIST_HEAD(tmp);
 311        u64 st;
 312        u64 sz;
 313        int err;
 314        int i = 0;
 315
 316        st = start;
 317        while (size) {
 318                sz = (u32)min_t(u64, MAX_KLM_SIZE, size);
 319                dmr = kzalloc(sizeof(*dmr), GFP_KERNEL);
 320                if (!dmr) {
 321                        err = -ENOMEM;
 322                        goto err_alloc;
 323                }
 324
 325                dmr->start = st;
 326                dmr->end = st + sz;
 327                dmr->perm = perm;
 328                err = map_direct_mr(mvdev, dmr, iotlb);
 329                if (err) {
 330                        kfree(dmr);
 331                        goto err_alloc;
 332                }
 333
 334                list_add_tail(&dmr->list, &tmp);
 335                size -= sz;
 336                mr->num_directs++;
 337                mr->num_klms++;
 338                st += sz;
 339                i++;
 340        }
 341        list_splice_tail(&tmp, &mr->head);
 342        return 0;
 343
 344err_alloc:
 345        list_for_each_entry_safe(dmr, n, &mr->head, list) {
 346                list_del_init(&dmr->list);
 347                unmap_direct_mr(mvdev, dmr);
 348                kfree(dmr);
 349        }
 350        return err;
 351}
 352
 353/* The iotlb pointer contains a list of maps. Go over the maps, possibly
 354 * merging mergeable maps, and create direct memory keys that provide the
 355 * device access to memory. The direct mkeys are then referred to by the
 356 * indirect memory key that provides access to the enitre address space given
 357 * by iotlb.
 358 */
 359static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
 360{
 361        struct mlx5_vdpa_mr *mr = &mvdev->mr;
 362        struct mlx5_vdpa_direct_mr *dmr;
 363        struct mlx5_vdpa_direct_mr *n;
 364        struct vhost_iotlb_map *map;
 365        u32 pperm = U16_MAX;
 366        u64 last = U64_MAX;
 367        u64 ps = U64_MAX;
 368        u64 pe = U64_MAX;
 369        u64 start = 0;
 370        int err = 0;
 371        int nnuls;
 372
 373        INIT_LIST_HEAD(&mr->head);
 374        for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
 375             map = vhost_iotlb_itree_next(map, start, last)) {
 376                start = map->start;
 377                if (pe == map->start && pperm == map->perm) {
 378                        pe = map->last + 1;
 379                } else {
 380                        if (ps != U64_MAX) {
 381                                if (pe < map->start) {
 382                                        /* We have a hole in the map. Check how
 383                                         * many null keys are required to fill it.
 384                                         */
 385                                        nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe,
 386                                                                       LOG_MAX_KLM_SIZE);
 387                                        mr->num_klms += nnuls;
 388                                }
 389                                err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb);
 390                                if (err)
 391                                        goto err_chain;
 392                        }
 393                        ps = map->start;
 394                        pe = map->last + 1;
 395                        pperm = map->perm;
 396                }
 397        }
 398        err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb);
 399        if (err)
 400                goto err_chain;
 401
 402        /* Create the memory key that defines the guests's address space. This
 403         * memory key refers to the direct keys that contain the MTT
 404         * translations
 405         */
 406        err = create_indirect_key(mvdev, mr);
 407        if (err)
 408                goto err_chain;
 409
 410        mr->user_mr = true;
 411        return 0;
 412
 413err_chain:
 414        list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
 415                list_del_init(&dmr->list);
 416                unmap_direct_mr(mvdev, dmr);
 417                kfree(dmr);
 418        }
 419        return err;
 420}
 421
 422static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
 423{
 424        int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 425        void *mkc;
 426        u32 *in;
 427        int err;
 428
 429        in = kzalloc(inlen, GFP_KERNEL);
 430        if (!in)
 431                return -ENOMEM;
 432
 433        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 434
 435        MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
 436        MLX5_SET(mkc, mkc, length64, 1);
 437        MLX5_SET(mkc, mkc, lw, 1);
 438        MLX5_SET(mkc, mkc, lr, 1);
 439        MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
 440        MLX5_SET(mkc, mkc, qpn, 0xffffff);
 441
 442        err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
 443        if (!err)
 444                mr->user_mr = false;
 445
 446        kfree(in);
 447        return err;
 448}
 449
 450static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
 451{
 452        mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey);
 453}
 454
 455static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
 456{
 457        struct vhost_iotlb_map *map;
 458        u64 start = 0, last = ULLONG_MAX;
 459        int err;
 460
 461        if (!src) {
 462                err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW);
 463                return err;
 464        }
 465
 466        for (map = vhost_iotlb_itree_first(src, start, last); map;
 467                map = vhost_iotlb_itree_next(map, start, last)) {
 468                err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last,
 469                                            map->addr, map->perm);
 470                if (err)
 471                        return err;
 472        }
 473        return 0;
 474}
 475
 476static void prune_iotlb(struct mlx5_vdpa_dev *mvdev)
 477{
 478        vhost_iotlb_del_range(mvdev->cvq.iotlb, 0, ULLONG_MAX);
 479}
 480
 481static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
 482{
 483        struct mlx5_vdpa_direct_mr *dmr;
 484        struct mlx5_vdpa_direct_mr *n;
 485
 486        destroy_indirect_key(mvdev, mr);
 487        list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
 488                list_del_init(&dmr->list);
 489                unmap_direct_mr(mvdev, dmr);
 490                kfree(dmr);
 491        }
 492}
 493
 494void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
 495{
 496        struct mlx5_vdpa_mr *mr = &mvdev->mr;
 497
 498        mutex_lock(&mr->mkey_mtx);
 499        if (!mr->initialized)
 500                goto out;
 501
 502        prune_iotlb(mvdev);
 503        if (mr->user_mr)
 504                destroy_user_mr(mvdev, mr);
 505        else
 506                destroy_dma_mr(mvdev, mr);
 507
 508        memset(mr, 0, sizeof(*mr));
 509        mr->initialized = false;
 510out:
 511        mutex_unlock(&mr->mkey_mtx);
 512}
 513
 514static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
 515{
 516        struct mlx5_vdpa_mr *mr = &mvdev->mr;
 517        int err;
 518
 519        if (mr->initialized)
 520                return 0;
 521
 522        if (iotlb)
 523                err = create_user_mr(mvdev, iotlb);
 524        else
 525                err = create_dma_mr(mvdev, mr);
 526
 527        if (err)
 528                return err;
 529
 530        err = dup_iotlb(mvdev, iotlb);
 531        if (err)
 532                goto out_err;
 533
 534        mr->initialized = true;
 535        return 0;
 536
 537out_err:
 538        if (iotlb)
 539                destroy_user_mr(mvdev, mr);
 540        else
 541                destroy_dma_mr(mvdev, mr);
 542
 543        return err;
 544}
 545
 546int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
 547{
 548        int err;
 549
 550        mutex_lock(&mvdev->mr.mkey_mtx);
 551        err = _mlx5_vdpa_create_mr(mvdev, iotlb);
 552        mutex_unlock(&mvdev->mr.mkey_mtx);
 553        return err;
 554}
 555
 556int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
 557                             bool *change_map)
 558{
 559        struct mlx5_vdpa_mr *mr = &mvdev->mr;
 560        int err = 0;
 561
 562        *change_map = false;
 563        mutex_lock(&mr->mkey_mtx);
 564        if (mr->initialized) {
 565                mlx5_vdpa_info(mvdev, "memory map update\n");
 566                *change_map = true;
 567        }
 568        if (!*change_map)
 569                err = _mlx5_vdpa_create_mr(mvdev, iotlb);
 570        mutex_unlock(&mr->mkey_mtx);
 571
 572        return err;
 573}
 574