linux/drivers/infiniband/hw/mlx5/mr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33
  34#include <linux/kref.h>
  35#include <linux/random.h>
  36#include <linux/debugfs.h>
  37#include <linux/export.h>
  38#include <linux/delay.h>
  39#include <rdma/ib_umem.h>
  40#include <rdma/ib_umem_odp.h>
  41#include <rdma/ib_verbs.h>
  42#include "mlx5_ib.h"
  43
  44enum {
  45        MAX_PENDING_REG_MR = 8,
  46};
  47
  48#define MLX5_UMR_ALIGN 2048
  49
  50static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
  51static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
  52static int mr_cache_max_order(struct mlx5_ib_dev *dev);
  53static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
  54
  55static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  56{
  57        int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
  58
  59#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  60        /* Wait until all page fault handlers using the mr complete. */
  61        synchronize_srcu(&dev->mr_srcu);
  62#endif
  63
  64        return err;
  65}
  66
  67static int order2idx(struct mlx5_ib_dev *dev, int order)
  68{
  69        struct mlx5_mr_cache *cache = &dev->cache;
  70
  71        if (order < cache->ent[0].order)
  72                return 0;
  73        else
  74                return order - cache->ent[0].order;
  75}
  76
  77static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
  78{
  79        return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
  80                length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
  81}
  82
  83#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  84static void update_odp_mr(struct mlx5_ib_mr *mr)
  85{
  86        if (mr->umem->odp_data) {
  87                /*
  88                 * This barrier prevents the compiler from moving the
  89                 * setting of umem->odp_data->private to point to our
  90                 * MR, before reg_umr finished, to ensure that the MR
  91                 * initialization have finished before starting to
  92                 * handle invalidations.
  93                 */
  94                smp_wmb();
  95                mr->umem->odp_data->private = mr;
  96                /*
  97                 * Make sure we will see the new
  98                 * umem->odp_data->private value in the invalidation
  99                 * routines, before we can get page faults on the
 100                 * MR. Page faults can happen once we put the MR in
 101                 * the tree, below this line. Without the barrier,
 102                 * there can be a fault handling and an invalidation
 103                 * before umem->odp_data->private == mr is visible to
 104                 * the invalidation handler.
 105                 */
 106                smp_wmb();
 107        }
 108}
 109#endif
 110
 111static void reg_mr_callback(int status, void *context)
 112{
 113        struct mlx5_ib_mr *mr = context;
 114        struct mlx5_ib_dev *dev = mr->dev;
 115        struct mlx5_mr_cache *cache = &dev->cache;
 116        int c = order2idx(dev, mr->order);
 117        struct mlx5_cache_ent *ent = &cache->ent[c];
 118        u8 key;
 119        unsigned long flags;
 120        struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
 121        int err;
 122
 123        spin_lock_irqsave(&ent->lock, flags);
 124        ent->pending--;
 125        spin_unlock_irqrestore(&ent->lock, flags);
 126        if (status) {
 127                mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
 128                kfree(mr);
 129                dev->fill_delay = 1;
 130                mod_timer(&dev->delay_timer, jiffies + HZ);
 131                return;
 132        }
 133
 134        mr->mmkey.type = MLX5_MKEY_MR;
 135        spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
 136        key = dev->mdev->priv.mkey_key++;
 137        spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
 138        mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
 139
 140        cache->last_add = jiffies;
 141
 142        spin_lock_irqsave(&ent->lock, flags);
 143        list_add_tail(&mr->list, &ent->head);
 144        ent->cur++;
 145        ent->size++;
 146        spin_unlock_irqrestore(&ent->lock, flags);
 147
 148        write_lock_irqsave(&table->lock, flags);
 149        err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
 150                                &mr->mmkey);
 151        if (err)
 152                pr_err("Error inserting to mkey tree. 0x%x\n", -err);
 153        write_unlock_irqrestore(&table->lock, flags);
 154
 155        if (!completion_done(&ent->compl))
 156                complete(&ent->compl);
 157}
 158
 159static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 160{
 161        struct mlx5_mr_cache *cache = &dev->cache;
 162        struct mlx5_cache_ent *ent = &cache->ent[c];
 163        int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 164        struct mlx5_ib_mr *mr;
 165        void *mkc;
 166        u32 *in;
 167        int err = 0;
 168        int i;
 169
 170        in = kzalloc(inlen, GFP_KERNEL);
 171        if (!in)
 172                return -ENOMEM;
 173
 174        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 175        for (i = 0; i < num; i++) {
 176                if (ent->pending >= MAX_PENDING_REG_MR) {
 177                        err = -EAGAIN;
 178                        break;
 179                }
 180
 181                mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 182                if (!mr) {
 183                        err = -ENOMEM;
 184                        break;
 185                }
 186                mr->order = ent->order;
 187                mr->allocated_from_cache = 1;
 188                mr->dev = dev;
 189
 190                MLX5_SET(mkc, mkc, free, 1);
 191                MLX5_SET(mkc, mkc, umr_en, 1);
 192                MLX5_SET(mkc, mkc, access_mode, ent->access_mode);
 193
 194                MLX5_SET(mkc, mkc, qpn, 0xffffff);
 195                MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
 196                MLX5_SET(mkc, mkc, log_page_size, ent->page);
 197
 198                spin_lock_irq(&ent->lock);
 199                ent->pending++;
 200                spin_unlock_irq(&ent->lock);
 201                err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
 202                                               in, inlen,
 203                                               mr->out, sizeof(mr->out),
 204                                               reg_mr_callback, mr);
 205                if (err) {
 206                        spin_lock_irq(&ent->lock);
 207                        ent->pending--;
 208                        spin_unlock_irq(&ent->lock);
 209                        mlx5_ib_warn(dev, "create mkey failed %d\n", err);
 210                        kfree(mr);
 211                        break;
 212                }
 213        }
 214
 215        kfree(in);
 216        return err;
 217}
 218
 219static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
 220{
 221        struct mlx5_mr_cache *cache = &dev->cache;
 222        struct mlx5_cache_ent *ent = &cache->ent[c];
 223        struct mlx5_ib_mr *mr;
 224        int err;
 225        int i;
 226
 227        for (i = 0; i < num; i++) {
 228                spin_lock_irq(&ent->lock);
 229                if (list_empty(&ent->head)) {
 230                        spin_unlock_irq(&ent->lock);
 231                        return;
 232                }
 233                mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 234                list_del(&mr->list);
 235                ent->cur--;
 236                ent->size--;
 237                spin_unlock_irq(&ent->lock);
 238                err = destroy_mkey(dev, mr);
 239                if (err)
 240                        mlx5_ib_warn(dev, "failed destroy mkey\n");
 241                else
 242                        kfree(mr);
 243        }
 244}
 245
 246static ssize_t size_write(struct file *filp, const char __user *buf,
 247                          size_t count, loff_t *pos)
 248{
 249        struct mlx5_cache_ent *ent = filp->private_data;
 250        struct mlx5_ib_dev *dev = ent->dev;
 251        char lbuf[20];
 252        u32 var;
 253        int err;
 254        int c;
 255
 256        if (copy_from_user(lbuf, buf, sizeof(lbuf)))
 257                return -EFAULT;
 258
 259        c = order2idx(dev, ent->order);
 260        lbuf[sizeof(lbuf) - 1] = 0;
 261
 262        if (sscanf(lbuf, "%u", &var) != 1)
 263                return -EINVAL;
 264
 265        if (var < ent->limit)
 266                return -EINVAL;
 267
 268        if (var > ent->size) {
 269                do {
 270                        err = add_keys(dev, c, var - ent->size);
 271                        if (err && err != -EAGAIN)
 272                                return err;
 273
 274                        usleep_range(3000, 5000);
 275                } while (err);
 276        } else if (var < ent->size) {
 277                remove_keys(dev, c, ent->size - var);
 278        }
 279
 280        return count;
 281}
 282
 283static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
 284                         loff_t *pos)
 285{
 286        struct mlx5_cache_ent *ent = filp->private_data;
 287        char lbuf[20];
 288        int err;
 289
 290        if (*pos)
 291                return 0;
 292
 293        err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
 294        if (err < 0)
 295                return err;
 296
 297        if (copy_to_user(buf, lbuf, err))
 298                return -EFAULT;
 299
 300        *pos += err;
 301
 302        return err;
 303}
 304
 305static const struct file_operations size_fops = {
 306        .owner  = THIS_MODULE,
 307        .open   = simple_open,
 308        .write  = size_write,
 309        .read   = size_read,
 310};
 311
 312static ssize_t limit_write(struct file *filp, const char __user *buf,
 313                           size_t count, loff_t *pos)
 314{
 315        struct mlx5_cache_ent *ent = filp->private_data;
 316        struct mlx5_ib_dev *dev = ent->dev;
 317        char lbuf[20];
 318        u32 var;
 319        int err;
 320        int c;
 321
 322        if (copy_from_user(lbuf, buf, sizeof(lbuf)))
 323                return -EFAULT;
 324
 325        c = order2idx(dev, ent->order);
 326        lbuf[sizeof(lbuf) - 1] = 0;
 327
 328        if (sscanf(lbuf, "%u", &var) != 1)
 329                return -EINVAL;
 330
 331        if (var > ent->size)
 332                return -EINVAL;
 333
 334        ent->limit = var;
 335
 336        if (ent->cur < ent->limit) {
 337                err = add_keys(dev, c, 2 * ent->limit - ent->cur);
 338                if (err)
 339                        return err;
 340        }
 341
 342        return count;
 343}
 344
 345static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
 346                          loff_t *pos)
 347{
 348        struct mlx5_cache_ent *ent = filp->private_data;
 349        char lbuf[20];
 350        int err;
 351
 352        if (*pos)
 353                return 0;
 354
 355        err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
 356        if (err < 0)
 357                return err;
 358
 359        if (copy_to_user(buf, lbuf, err))
 360                return -EFAULT;
 361
 362        *pos += err;
 363
 364        return err;
 365}
 366
 367static const struct file_operations limit_fops = {
 368        .owner  = THIS_MODULE,
 369        .open   = simple_open,
 370        .write  = limit_write,
 371        .read   = limit_read,
 372};
 373
 374static int someone_adding(struct mlx5_mr_cache *cache)
 375{
 376        int i;
 377
 378        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 379                if (cache->ent[i].cur < cache->ent[i].limit)
 380                        return 1;
 381        }
 382
 383        return 0;
 384}
 385
 386static void __cache_work_func(struct mlx5_cache_ent *ent)
 387{
 388        struct mlx5_ib_dev *dev = ent->dev;
 389        struct mlx5_mr_cache *cache = &dev->cache;
 390        int i = order2idx(dev, ent->order);
 391        int err;
 392
 393        if (cache->stopped)
 394                return;
 395
 396        ent = &dev->cache.ent[i];
 397        if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
 398                err = add_keys(dev, i, 1);
 399                if (ent->cur < 2 * ent->limit) {
 400                        if (err == -EAGAIN) {
 401                                mlx5_ib_dbg(dev, "returned eagain, order %d\n",
 402                                            i + 2);
 403                                queue_delayed_work(cache->wq, &ent->dwork,
 404                                                   msecs_to_jiffies(3));
 405                        } else if (err) {
 406                                mlx5_ib_warn(dev, "command failed order %d, err %d\n",
 407                                             i + 2, err);
 408                                queue_delayed_work(cache->wq, &ent->dwork,
 409                                                   msecs_to_jiffies(1000));
 410                        } else {
 411                                queue_work(cache->wq, &ent->work);
 412                        }
 413                }
 414        } else if (ent->cur > 2 * ent->limit) {
 415                /*
 416                 * The remove_keys() logic is performed as garbage collection
 417                 * task. Such task is intended to be run when no other active
 418                 * processes are running.
 419                 *
 420                 * The need_resched() will return TRUE if there are user tasks
 421                 * to be activated in near future.
 422                 *
 423                 * In such case, we don't execute remove_keys() and postpone
 424                 * the garbage collection work to try to run in next cycle,
 425                 * in order to free CPU resources to other tasks.
 426                 */
 427                if (!need_resched() && !someone_adding(cache) &&
 428                    time_after(jiffies, cache->last_add + 300 * HZ)) {
 429                        remove_keys(dev, i, 1);
 430                        if (ent->cur > ent->limit)
 431                                queue_work(cache->wq, &ent->work);
 432                } else {
 433                        queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
 434                }
 435        }
 436}
 437
 438static void delayed_cache_work_func(struct work_struct *work)
 439{
 440        struct mlx5_cache_ent *ent;
 441
 442        ent = container_of(work, struct mlx5_cache_ent, dwork.work);
 443        __cache_work_func(ent);
 444}
 445
 446static void cache_work_func(struct work_struct *work)
 447{
 448        struct mlx5_cache_ent *ent;
 449
 450        ent = container_of(work, struct mlx5_cache_ent, work);
 451        __cache_work_func(ent);
 452}
 453
 454struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
 455{
 456        struct mlx5_mr_cache *cache = &dev->cache;
 457        struct mlx5_cache_ent *ent;
 458        struct mlx5_ib_mr *mr;
 459        int err;
 460
 461        if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
 462                mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
 463                return NULL;
 464        }
 465
 466        ent = &cache->ent[entry];
 467        while (1) {
 468                spin_lock_irq(&ent->lock);
 469                if (list_empty(&ent->head)) {
 470                        spin_unlock_irq(&ent->lock);
 471
 472                        err = add_keys(dev, entry, 1);
 473                        if (err && err != -EAGAIN)
 474                                return ERR_PTR(err);
 475
 476                        wait_for_completion(&ent->compl);
 477                } else {
 478                        mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
 479                                              list);
 480                        list_del(&mr->list);
 481                        ent->cur--;
 482                        spin_unlock_irq(&ent->lock);
 483                        if (ent->cur < ent->limit)
 484                                queue_work(cache->wq, &ent->work);
 485                        return mr;
 486                }
 487        }
 488}
 489
 490static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
 491{
 492        struct mlx5_mr_cache *cache = &dev->cache;
 493        struct mlx5_ib_mr *mr = NULL;
 494        struct mlx5_cache_ent *ent;
 495        int last_umr_cache_entry;
 496        int c;
 497        int i;
 498
 499        c = order2idx(dev, order);
 500        last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev));
 501        if (c < 0 || c > last_umr_cache_entry) {
 502                mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
 503                return NULL;
 504        }
 505
 506        for (i = c; i <= last_umr_cache_entry; i++) {
 507                ent = &cache->ent[i];
 508
 509                mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
 510
 511                spin_lock_irq(&ent->lock);
 512                if (!list_empty(&ent->head)) {
 513                        mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
 514                                              list);
 515                        list_del(&mr->list);
 516                        ent->cur--;
 517                        spin_unlock_irq(&ent->lock);
 518                        if (ent->cur < ent->limit)
 519                                queue_work(cache->wq, &ent->work);
 520                        break;
 521                }
 522                spin_unlock_irq(&ent->lock);
 523
 524                queue_work(cache->wq, &ent->work);
 525        }
 526
 527        if (!mr)
 528                cache->ent[c].miss++;
 529
 530        return mr;
 531}
 532
 533void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 534{
 535        struct mlx5_mr_cache *cache = &dev->cache;
 536        struct mlx5_cache_ent *ent;
 537        int shrink = 0;
 538        int c;
 539
 540        c = order2idx(dev, mr->order);
 541        if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
 542                mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
 543                return;
 544        }
 545
 546        if (unreg_umr(dev, mr))
 547                return;
 548
 549        ent = &cache->ent[c];
 550        spin_lock_irq(&ent->lock);
 551        list_add_tail(&mr->list, &ent->head);
 552        ent->cur++;
 553        if (ent->cur > 2 * ent->limit)
 554                shrink = 1;
 555        spin_unlock_irq(&ent->lock);
 556
 557        if (shrink)
 558                queue_work(cache->wq, &ent->work);
 559}
 560
 561static void clean_keys(struct mlx5_ib_dev *dev, int c)
 562{
 563        struct mlx5_mr_cache *cache = &dev->cache;
 564        struct mlx5_cache_ent *ent = &cache->ent[c];
 565        struct mlx5_ib_mr *mr;
 566        int err;
 567
 568        cancel_delayed_work(&ent->dwork);
 569        while (1) {
 570                spin_lock_irq(&ent->lock);
 571                if (list_empty(&ent->head)) {
 572                        spin_unlock_irq(&ent->lock);
 573                        return;
 574                }
 575                mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 576                list_del(&mr->list);
 577                ent->cur--;
 578                ent->size--;
 579                spin_unlock_irq(&ent->lock);
 580                err = destroy_mkey(dev, mr);
 581                if (err)
 582                        mlx5_ib_warn(dev, "failed destroy mkey\n");
 583                else
 584                        kfree(mr);
 585        }
 586}
 587
 588static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
 589{
 590        if (!mlx5_debugfs_root)
 591                return;
 592
 593        debugfs_remove_recursive(dev->cache.root);
 594        dev->cache.root = NULL;
 595}
 596
 597static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
 598{
 599        struct mlx5_mr_cache *cache = &dev->cache;
 600        struct mlx5_cache_ent *ent;
 601        int i;
 602
 603        if (!mlx5_debugfs_root)
 604                return 0;
 605
 606        cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
 607        if (!cache->root)
 608                return -ENOMEM;
 609
 610        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 611                ent = &cache->ent[i];
 612                sprintf(ent->name, "%d", ent->order);
 613                ent->dir = debugfs_create_dir(ent->name,  cache->root);
 614                if (!ent->dir)
 615                        goto err;
 616
 617                ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
 618                                                 &size_fops);
 619                if (!ent->fsize)
 620                        goto err;
 621
 622                ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
 623                                                  &limit_fops);
 624                if (!ent->flimit)
 625                        goto err;
 626
 627                ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
 628                                               &ent->cur);
 629                if (!ent->fcur)
 630                        goto err;
 631
 632                ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
 633                                                &ent->miss);
 634                if (!ent->fmiss)
 635                        goto err;
 636        }
 637
 638        return 0;
 639err:
 640        mlx5_mr_cache_debugfs_cleanup(dev);
 641
 642        return -ENOMEM;
 643}
 644
 645static void delay_time_func(struct timer_list *t)
 646{
 647        struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
 648
 649        dev->fill_delay = 0;
 650}
 651
 652int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 653{
 654        struct mlx5_mr_cache *cache = &dev->cache;
 655        struct mlx5_cache_ent *ent;
 656        int err;
 657        int i;
 658
 659        mutex_init(&dev->slow_path_mutex);
 660        cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
 661        if (!cache->wq) {
 662                mlx5_ib_warn(dev, "failed to create work queue\n");
 663                return -ENOMEM;
 664        }
 665
 666        timer_setup(&dev->delay_timer, delay_time_func, 0);
 667        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 668                ent = &cache->ent[i];
 669                INIT_LIST_HEAD(&ent->head);
 670                spin_lock_init(&ent->lock);
 671                ent->order = i + 2;
 672                ent->dev = dev;
 673                ent->limit = 0;
 674
 675                init_completion(&ent->compl);
 676                INIT_WORK(&ent->work, cache_work_func);
 677                INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
 678                queue_work(cache->wq, &ent->work);
 679
 680                if (i > MR_CACHE_LAST_STD_ENTRY) {
 681                        mlx5_odp_init_mr_cache_entry(ent);
 682                        continue;
 683                }
 684
 685                if (ent->order > mr_cache_max_order(dev))
 686                        continue;
 687
 688                ent->page = PAGE_SHIFT;
 689                ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
 690                           MLX5_IB_UMR_OCTOWORD;
 691                ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
 692                if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
 693                    mlx5_core_is_pf(dev->mdev))
 694                        ent->limit = dev->mdev->profile->mr_cache[i].limit;
 695                else
 696                        ent->limit = 0;
 697        }
 698
 699        err = mlx5_mr_cache_debugfs_init(dev);
 700        if (err)
 701                mlx5_ib_warn(dev, "cache debugfs failure\n");
 702
 703        /*
 704         * We don't want to fail driver if debugfs failed to initialize,
 705         * so we are not forwarding error to the user.
 706         */
 707
 708        return 0;
 709}
 710
 711static void wait_for_async_commands(struct mlx5_ib_dev *dev)
 712{
 713        struct mlx5_mr_cache *cache = &dev->cache;
 714        struct mlx5_cache_ent *ent;
 715        int total = 0;
 716        int i;
 717        int j;
 718
 719        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 720                ent = &cache->ent[i];
 721                for (j = 0 ; j < 1000; j++) {
 722                        if (!ent->pending)
 723                                break;
 724                        msleep(50);
 725                }
 726        }
 727        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 728                ent = &cache->ent[i];
 729                total += ent->pending;
 730        }
 731
 732        if (total)
 733                mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total);
 734        else
 735                mlx5_ib_warn(dev, "done with all pending requests\n");
 736}
 737
 738int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 739{
 740        int i;
 741
 742        if (!dev->cache.wq)
 743                return 0;
 744
 745        dev->cache.stopped = 1;
 746        flush_workqueue(dev->cache.wq);
 747
 748        mlx5_mr_cache_debugfs_cleanup(dev);
 749
 750        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
 751                clean_keys(dev, i);
 752
 753        destroy_workqueue(dev->cache.wq);
 754        wait_for_async_commands(dev);
 755        del_timer_sync(&dev->delay_timer);
 756
 757        return 0;
 758}
 759
 760struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
 761{
 762        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 763        int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 764        struct mlx5_core_dev *mdev = dev->mdev;
 765        struct mlx5_ib_mr *mr;
 766        void *mkc;
 767        u32 *in;
 768        int err;
 769
 770        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 771        if (!mr)
 772                return ERR_PTR(-ENOMEM);
 773
 774        in = kzalloc(inlen, GFP_KERNEL);
 775        if (!in) {
 776                err = -ENOMEM;
 777                goto err_free;
 778        }
 779
 780        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 781
 782        MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA);
 783        MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
 784        MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
 785        MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
 786        MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
 787        MLX5_SET(mkc, mkc, lr, 1);
 788
 789        MLX5_SET(mkc, mkc, length64, 1);
 790        MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
 791        MLX5_SET(mkc, mkc, qpn, 0xffffff);
 792        MLX5_SET64(mkc, mkc, start_addr, 0);
 793
 794        err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
 795        if (err)
 796                goto err_in;
 797
 798        kfree(in);
 799        mr->mmkey.type = MLX5_MKEY_MR;
 800        mr->ibmr.lkey = mr->mmkey.key;
 801        mr->ibmr.rkey = mr->mmkey.key;
 802        mr->umem = NULL;
 803
 804        return &mr->ibmr;
 805
 806err_in:
 807        kfree(in);
 808
 809err_free:
 810        kfree(mr);
 811
 812        return ERR_PTR(err);
 813}
 814
 815static int get_octo_len(u64 addr, u64 len, int page_shift)
 816{
 817        u64 page_size = 1ULL << page_shift;
 818        u64 offset;
 819        int npages;
 820
 821        offset = addr & (page_size - 1);
 822        npages = ALIGN(len + offset, page_size) >> page_shift;
 823        return (npages + 1) / 2;
 824}
 825
 826static int mr_cache_max_order(struct mlx5_ib_dev *dev)
 827{
 828        if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
 829                return MR_CACHE_LAST_STD_ENTRY + 2;
 830        return MLX5_MAX_UMR_SHIFT;
 831}
 832
 833static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
 834                       int access_flags, struct ib_umem **umem,
 835                       int *npages, int *page_shift, int *ncont,
 836                       int *order)
 837{
 838        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 839        int err;
 840
 841        *umem = ib_umem_get(pd->uobject->context, start, length,
 842                            access_flags, 0);
 843        err = PTR_ERR_OR_ZERO(*umem);
 844        if (err) {
 845                *umem = NULL;
 846                mlx5_ib_err(dev, "umem get failed (%d)\n", err);
 847                return err;
 848        }
 849
 850        mlx5_ib_cont_pages(*umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
 851                           page_shift, ncont, order);
 852        if (!*npages) {
 853                mlx5_ib_warn(dev, "avoid zero region\n");
 854                ib_umem_release(*umem);
 855                return -EINVAL;
 856        }
 857
 858        mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
 859                    *npages, *ncont, *order, *page_shift);
 860
 861        return 0;
 862}
 863
 864static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
 865{
 866        struct mlx5_ib_umr_context *context =
 867                container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
 868
 869        context->status = wc->status;
 870        complete(&context->done);
 871}
 872
 873static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
 874{
 875        context->cqe.done = mlx5_ib_umr_done;
 876        context->status = -1;
 877        init_completion(&context->done);
 878}
 879
 880static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
 881                                  struct mlx5_umr_wr *umrwr)
 882{
 883        struct umr_common *umrc = &dev->umrc;
 884        struct ib_send_wr *bad;
 885        int err;
 886        struct mlx5_ib_umr_context umr_context;
 887
 888        mlx5_ib_init_umr_context(&umr_context);
 889        umrwr->wr.wr_cqe = &umr_context.cqe;
 890
 891        down(&umrc->sem);
 892        err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
 893        if (err) {
 894                mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
 895        } else {
 896                wait_for_completion(&umr_context.done);
 897                if (umr_context.status != IB_WC_SUCCESS) {
 898                        mlx5_ib_warn(dev, "reg umr failed (%u)\n",
 899                                     umr_context.status);
 900                        err = -EFAULT;
 901                }
 902        }
 903        up(&umrc->sem);
 904        return err;
 905}
 906
 907static struct mlx5_ib_mr *alloc_mr_from_cache(
 908                                  struct ib_pd *pd, struct ib_umem *umem,
 909                                  u64 virt_addr, u64 len, int npages,
 910                                  int page_shift, int order, int access_flags)
 911{
 912        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 913        struct mlx5_ib_mr *mr;
 914        int err = 0;
 915        int i;
 916
 917        for (i = 0; i < 1; i++) {
 918                mr = alloc_cached_mr(dev, order);
 919                if (mr)
 920                        break;
 921
 922                err = add_keys(dev, order2idx(dev, order), 1);
 923                if (err && err != -EAGAIN) {
 924                        mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
 925                        break;
 926                }
 927        }
 928
 929        if (!mr)
 930                return ERR_PTR(-EAGAIN);
 931
 932        mr->ibmr.pd = pd;
 933        mr->umem = umem;
 934        mr->access_flags = access_flags;
 935        mr->desc_size = sizeof(struct mlx5_mtt);
 936        mr->mmkey.iova = virt_addr;
 937        mr->mmkey.size = len;
 938        mr->mmkey.pd = to_mpd(pd)->pdn;
 939
 940        return mr;
 941}
 942
 943static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
 944                               void *xlt, int page_shift, size_t size,
 945                               int flags)
 946{
 947        struct mlx5_ib_dev *dev = mr->dev;
 948        struct ib_umem *umem = mr->umem;
 949        if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
 950                mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
 951                return npages;
 952        }
 953
 954        npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
 955
 956        if (!(flags & MLX5_IB_UPD_XLT_ZAP)) {
 957                __mlx5_ib_populate_pas(dev, umem, page_shift,
 958                                       idx, npages, xlt,
 959                                       MLX5_IB_MTT_PRESENT);
 960                /* Clear padding after the pages
 961                 * brought from the umem.
 962                 */
 963                memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0,
 964                       size - npages * sizeof(struct mlx5_mtt));
 965        }
 966
 967        return npages;
 968}
 969
 970#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
 971                            MLX5_UMR_MTT_ALIGNMENT)
 972#define MLX5_SPARE_UMR_CHUNK 0x10000
 973
 974int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
 975                       int page_shift, int flags)
 976{
 977        struct mlx5_ib_dev *dev = mr->dev;
 978        struct device *ddev = dev->ib_dev.dev.parent;
 979        struct mlx5_ib_ucontext *uctx = NULL;
 980        int size;
 981        void *xlt;
 982        dma_addr_t dma;
 983        struct mlx5_umr_wr wr;
 984        struct ib_sge sg;
 985        int err = 0;
 986        int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
 987                               ? sizeof(struct mlx5_klm)
 988                               : sizeof(struct mlx5_mtt);
 989        const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
 990        const int page_mask = page_align - 1;
 991        size_t pages_mapped = 0;
 992        size_t pages_to_map = 0;
 993        size_t pages_iter = 0;
 994        gfp_t gfp;
 995
 996        /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
 997         * so we need to align the offset and length accordingly
 998         */
 999        if (idx & page_mask) {
1000                npages += idx & page_mask;
1001                idx &= ~page_mask;
1002        }
1003
1004        gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
1005        gfp |= __GFP_ZERO | __GFP_NOWARN;
1006
1007        pages_to_map = ALIGN(npages, page_align);
1008        size = desc_size * pages_to_map;
1009        size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
1010
1011        xlt = (void *)__get_free_pages(gfp, get_order(size));
1012        if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
1013                mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
1014                            size, get_order(size), MLX5_SPARE_UMR_CHUNK);
1015
1016                size = MLX5_SPARE_UMR_CHUNK;
1017                xlt = (void *)__get_free_pages(gfp, get_order(size));
1018        }
1019
1020        if (!xlt) {
1021                uctx = to_mucontext(mr->ibmr.pd->uobject->context);
1022                mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
1023                size = PAGE_SIZE;
1024                xlt = (void *)uctx->upd_xlt_page;
1025                mutex_lock(&uctx->upd_xlt_page_mutex);
1026                memset(xlt, 0, size);
1027        }
1028        pages_iter = size / desc_size;
1029        dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
1030        if (dma_mapping_error(ddev, dma)) {
1031                mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
1032                err = -ENOMEM;
1033                goto free_xlt;
1034        }
1035
1036        sg.addr = dma;
1037        sg.lkey = dev->umrc.pd->local_dma_lkey;
1038
1039        memset(&wr, 0, sizeof(wr));
1040        wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
1041        if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
1042                wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1043        wr.wr.sg_list = &sg;
1044        wr.wr.num_sge = 1;
1045        wr.wr.opcode = MLX5_IB_WR_UMR;
1046
1047        wr.pd = mr->ibmr.pd;
1048        wr.mkey = mr->mmkey.key;
1049        wr.length = mr->mmkey.size;
1050        wr.virt_addr = mr->mmkey.iova;
1051        wr.access_flags = mr->access_flags;
1052        wr.page_shift = page_shift;
1053
1054        for (pages_mapped = 0;
1055             pages_mapped < pages_to_map && !err;
1056             pages_mapped += pages_iter, idx += pages_iter) {
1057                npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
1058                dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
1059                npages = populate_xlt(mr, idx, npages, xlt,
1060                                      page_shift, size, flags);
1061
1062                dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
1063
1064                sg.length = ALIGN(npages * desc_size,
1065                                  MLX5_UMR_MTT_ALIGNMENT);
1066
1067                if (pages_mapped + pages_iter >= pages_to_map) {
1068                        if (flags & MLX5_IB_UPD_XLT_ENABLE)
1069                                wr.wr.send_flags |=
1070                                        MLX5_IB_SEND_UMR_ENABLE_MR |
1071                                        MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
1072                                        MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1073                        if (flags & MLX5_IB_UPD_XLT_PD ||
1074                            flags & MLX5_IB_UPD_XLT_ACCESS)
1075                                wr.wr.send_flags |=
1076                                        MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
1077                        if (flags & MLX5_IB_UPD_XLT_ADDR)
1078                                wr.wr.send_flags |=
1079                                        MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1080                }
1081
1082                wr.offset = idx * desc_size;
1083                wr.xlt_size = sg.length;
1084
1085                err = mlx5_ib_post_send_wait(dev, &wr);
1086        }
1087        dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
1088
1089free_xlt:
1090        if (uctx)
1091                mutex_unlock(&uctx->upd_xlt_page_mutex);
1092        else
1093                free_pages((unsigned long)xlt, get_order(size));
1094
1095        return err;
1096}
1097
1098/*
1099 * If ibmr is NULL it will be allocated by reg_create.
1100 * Else, the given ibmr will be used.
1101 */
1102static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
1103                                     u64 virt_addr, u64 length,
1104                                     struct ib_umem *umem, int npages,
1105                                     int page_shift, int access_flags,
1106                                     bool populate)
1107{
1108        struct mlx5_ib_dev *dev = to_mdev(pd->device);
1109        struct mlx5_ib_mr *mr;
1110        __be64 *pas;
1111        void *mkc;
1112        int inlen;
1113        u32 *in;
1114        int err;
1115        bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
1116
1117        mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
1118        if (!mr)
1119                return ERR_PTR(-ENOMEM);
1120
1121        mr->ibmr.pd = pd;
1122        mr->access_flags = access_flags;
1123
1124        inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1125        if (populate)
1126                inlen += sizeof(*pas) * roundup(npages, 2);
1127        in = kvzalloc(inlen, GFP_KERNEL);
1128        if (!in) {
1129                err = -ENOMEM;
1130                goto err_1;
1131        }
1132        pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
1133        if (populate && !(access_flags & IB_ACCESS_ON_DEMAND))
1134                mlx5_ib_populate_pas(dev, umem, page_shift, pas,
1135                                     pg_cap ? MLX5_IB_MTT_PRESENT : 0);
1136
1137        /* The pg_access bit allows setting the access flags
1138         * in the page list submitted with the command. */
1139        MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
1140
1141        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1142        MLX5_SET(mkc, mkc, free, !populate);
1143        MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
1144        MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
1145        MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
1146        MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
1147        MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
1148        MLX5_SET(mkc, mkc, lr, 1);
1149        MLX5_SET(mkc, mkc, umr_en, 1);
1150
1151        MLX5_SET64(mkc, mkc, start_addr, virt_addr);
1152        MLX5_SET64(mkc, mkc, len, length);
1153        MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1154        MLX5_SET(mkc, mkc, bsf_octword_size, 0);
1155        MLX5_SET(mkc, mkc, translations_octword_size,
1156                 get_octo_len(virt_addr, length, page_shift));
1157        MLX5_SET(mkc, mkc, log_page_size, page_shift);
1158        MLX5_SET(mkc, mkc, qpn, 0xffffff);
1159        if (populate) {
1160                MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
1161                         get_octo_len(virt_addr, length, page_shift));
1162        }
1163
1164        err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
1165        if (err) {
1166                mlx5_ib_warn(dev, "create mkey failed\n");
1167                goto err_2;
1168        }
1169        mr->mmkey.type = MLX5_MKEY_MR;
1170        mr->desc_size = sizeof(struct mlx5_mtt);
1171        mr->dev = dev;
1172        kvfree(in);
1173
1174        mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
1175
1176        return mr;
1177
1178err_2:
1179        kvfree(in);
1180
1181err_1:
1182        if (!ibmr)
1183                kfree(mr);
1184
1185        return ERR_PTR(err);
1186}
1187
1188static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
1189                          int npages, u64 length, int access_flags)
1190{
1191        mr->npages = npages;
1192        atomic_add(npages, &dev->mdev->priv.reg_pages);
1193        mr->ibmr.lkey = mr->mmkey.key;
1194        mr->ibmr.rkey = mr->mmkey.key;
1195        mr->ibmr.length = length;
1196        mr->access_flags = access_flags;
1197}
1198
1199struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1200                                  u64 virt_addr, int access_flags,
1201                                  struct ib_udata *udata)
1202{
1203        struct mlx5_ib_dev *dev = to_mdev(pd->device);
1204        struct mlx5_ib_mr *mr = NULL;
1205        struct ib_umem *umem;
1206        int page_shift;
1207        int npages;
1208        int ncont;
1209        int order;
1210        int err;
1211        bool use_umr = true;
1212
1213        if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
1214                return ERR_PTR(-EINVAL);
1215
1216        mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1217                    start, virt_addr, length, access_flags);
1218
1219#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1220        if (!start && length == U64_MAX) {
1221                if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
1222                    !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
1223                        return ERR_PTR(-EINVAL);
1224
1225                mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
1226                return &mr->ibmr;
1227        }
1228#endif
1229
1230        err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
1231                           &page_shift, &ncont, &order);
1232
1233        if (err < 0)
1234                return ERR_PTR(err);
1235
1236        if (order <= mr_cache_max_order(dev)) {
1237                mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
1238                                         page_shift, order, access_flags);
1239                if (PTR_ERR(mr) == -EAGAIN) {
1240                        mlx5_ib_dbg(dev, "cache empty for order %d\n", order);
1241                        mr = NULL;
1242                }
1243        } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
1244                if (access_flags & IB_ACCESS_ON_DEMAND) {
1245                        err = -EINVAL;
1246                        pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n");
1247                        goto error;
1248                }
1249                use_umr = false;
1250        }
1251
1252        if (!mr) {
1253                mutex_lock(&dev->slow_path_mutex);
1254                mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
1255                                page_shift, access_flags, !use_umr);
1256                mutex_unlock(&dev->slow_path_mutex);
1257        }
1258
1259        if (IS_ERR(mr)) {
1260                err = PTR_ERR(mr);
1261                goto error;
1262        }
1263
1264        mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
1265
1266        mr->umem = umem;
1267        set_mr_fileds(dev, mr, npages, length, access_flags);
1268
1269#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1270        update_odp_mr(mr);
1271#endif
1272
1273        if (use_umr) {
1274                int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
1275
1276                if (access_flags & IB_ACCESS_ON_DEMAND)
1277                        update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP;
1278
1279                err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
1280                                         update_xlt_flags);
1281
1282                if (err) {
1283                        dereg_mr(dev, mr);
1284                        return ERR_PTR(err);
1285                }
1286        }
1287
1288        mr->live = 1;
1289        return &mr->ibmr;
1290error:
1291        ib_umem_release(umem);
1292        return ERR_PTR(err);
1293}
1294
1295static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1296{
1297        struct mlx5_core_dev *mdev = dev->mdev;
1298        struct mlx5_umr_wr umrwr = {};
1299
1300        if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
1301                return 0;
1302
1303        umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
1304                              MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1305        umrwr.wr.opcode = MLX5_IB_WR_UMR;
1306        umrwr.mkey = mr->mmkey.key;
1307
1308        return mlx5_ib_post_send_wait(dev, &umrwr);
1309}
1310
1311static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1312                     int access_flags, int flags)
1313{
1314        struct mlx5_ib_dev *dev = to_mdev(pd->device);
1315        struct mlx5_umr_wr umrwr = {};
1316        int err;
1317
1318        umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1319
1320        umrwr.wr.opcode = MLX5_IB_WR_UMR;
1321        umrwr.mkey = mr->mmkey.key;
1322
1323        if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
1324                umrwr.pd = pd;
1325                umrwr.access_flags = access_flags;
1326                umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
1327        }
1328
1329        err = mlx5_ib_post_send_wait(dev, &umrwr);
1330
1331        return err;
1332}
1333
1334int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1335                          u64 length, u64 virt_addr, int new_access_flags,
1336                          struct ib_pd *new_pd, struct ib_udata *udata)
1337{
1338        struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
1339        struct mlx5_ib_mr *mr = to_mmr(ib_mr);
1340        struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
1341        int access_flags = flags & IB_MR_REREG_ACCESS ?
1342                            new_access_flags :
1343                            mr->access_flags;
1344        u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address;
1345        u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length;
1346        int page_shift = 0;
1347        int upd_flags = 0;
1348        int npages = 0;
1349        int ncont = 0;
1350        int order = 0;
1351        int err;
1352
1353        mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1354                    start, virt_addr, length, access_flags);
1355
1356        atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
1357
1358        if (flags != IB_MR_REREG_PD) {
1359                /*
1360                 * Replace umem. This needs to be done whether or not UMR is
1361                 * used.
1362                 */
1363                flags |= IB_MR_REREG_TRANS;
1364                ib_umem_release(mr->umem);
1365                err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
1366                                  &npages, &page_shift, &ncont, &order);
1367                if (err < 0) {
1368                        clean_mr(dev, mr);
1369                        return err;
1370                }
1371        }
1372
1373        if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
1374                /*
1375                 * UMR can't be used - MKey needs to be replaced.
1376                 */
1377                if (mr->allocated_from_cache) {
1378                        err = unreg_umr(dev, mr);
1379                        if (err)
1380                                mlx5_ib_warn(dev, "Failed to unregister MR\n");
1381                } else {
1382                        err = destroy_mkey(dev, mr);
1383                        if (err)
1384                                mlx5_ib_warn(dev, "Failed to destroy MKey\n");
1385                }
1386                if (err)
1387                        return err;
1388
1389                mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
1390                                page_shift, access_flags, true);
1391
1392                if (IS_ERR(mr))
1393                        return PTR_ERR(mr);
1394
1395                mr->allocated_from_cache = 0;
1396                mr->live = 1;
1397        } else {
1398                /*
1399                 * Send a UMR WQE
1400                 */
1401                mr->ibmr.pd = pd;
1402                mr->access_flags = access_flags;
1403                mr->mmkey.iova = addr;
1404                mr->mmkey.size = len;
1405                mr->mmkey.pd = to_mpd(pd)->pdn;
1406
1407                if (flags & IB_MR_REREG_TRANS) {
1408                        upd_flags = MLX5_IB_UPD_XLT_ADDR;
1409                        if (flags & IB_MR_REREG_PD)
1410                                upd_flags |= MLX5_IB_UPD_XLT_PD;
1411                        if (flags & IB_MR_REREG_ACCESS)
1412                                upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
1413                        err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
1414                                                 upd_flags);
1415                } else {
1416                        err = rereg_umr(pd, mr, access_flags, flags);
1417                }
1418
1419                if (err) {
1420                        mlx5_ib_warn(dev, "Failed to rereg UMR\n");
1421                        ib_umem_release(mr->umem);
1422                        mr->umem = NULL;
1423                        clean_mr(dev, mr);
1424                        return err;
1425                }
1426        }
1427
1428        set_mr_fileds(dev, mr, npages, len, access_flags);
1429
1430#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1431        update_odp_mr(mr);
1432#endif
1433        return 0;
1434}
1435
1436static int
1437mlx5_alloc_priv_descs(struct ib_device *device,
1438                      struct mlx5_ib_mr *mr,
1439                      int ndescs,
1440                      int desc_size)
1441{
1442        int size = ndescs * desc_size;
1443        int add_size;
1444        int ret;
1445
1446        add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
1447
1448        mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
1449        if (!mr->descs_alloc)
1450                return -ENOMEM;
1451
1452        mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
1453
1454        mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
1455                                      size, DMA_TO_DEVICE);
1456        if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
1457                ret = -ENOMEM;
1458                goto err;
1459        }
1460
1461        return 0;
1462err:
1463        kfree(mr->descs_alloc);
1464
1465        return ret;
1466}
1467
1468static void
1469mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1470{
1471        if (mr->descs) {
1472                struct ib_device *device = mr->ibmr.device;
1473                int size = mr->max_descs * mr->desc_size;
1474
1475                dma_unmap_single(device->dev.parent, mr->desc_map,
1476                                 size, DMA_TO_DEVICE);
1477                kfree(mr->descs_alloc);
1478                mr->descs = NULL;
1479        }
1480}
1481
1482static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1483{
1484        int allocated_from_cache = mr->allocated_from_cache;
1485        int err;
1486
1487        if (mr->sig) {
1488                if (mlx5_core_destroy_psv(dev->mdev,
1489                                          mr->sig->psv_memory.psv_idx))
1490                        mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1491                                     mr->sig->psv_memory.psv_idx);
1492                if (mlx5_core_destroy_psv(dev->mdev,
1493                                          mr->sig->psv_wire.psv_idx))
1494                        mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1495                                     mr->sig->psv_wire.psv_idx);
1496                kfree(mr->sig);
1497                mr->sig = NULL;
1498        }
1499
1500        mlx5_free_priv_descs(mr);
1501
1502        if (!allocated_from_cache) {
1503                u32 key = mr->mmkey.key;
1504
1505                err = destroy_mkey(dev, mr);
1506                if (err) {
1507                        mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
1508                                     key, err);
1509                        return err;
1510                }
1511        }
1512
1513        return 0;
1514}
1515
1516static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1517{
1518        int npages = mr->npages;
1519        struct ib_umem *umem = mr->umem;
1520
1521#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1522        if (umem && umem->odp_data) {
1523                /* Prevent new page faults from succeeding */
1524                mr->live = 0;
1525                /* Wait for all running page-fault handlers to finish. */
1526                synchronize_srcu(&dev->mr_srcu);
1527                /* Destroy all page mappings */
1528                if (umem->odp_data->page_list)
1529                        mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
1530                                                 ib_umem_end(umem));
1531                else
1532                        mlx5_ib_free_implicit_mr(mr);
1533                /*
1534                 * We kill the umem before the MR for ODP,
1535                 * so that there will not be any invalidations in
1536                 * flight, looking at the *mr struct.
1537                 */
1538                ib_umem_release(umem);
1539                atomic_sub(npages, &dev->mdev->priv.reg_pages);
1540
1541                /* Avoid double-freeing the umem. */
1542                umem = NULL;
1543        }
1544#endif
1545
1546        clean_mr(dev, mr);
1547
1548        if (umem) {
1549                ib_umem_release(umem);
1550                atomic_sub(npages, &dev->mdev->priv.reg_pages);
1551        }
1552
1553        if (!mr->allocated_from_cache)
1554                kfree(mr);
1555        else
1556                mlx5_mr_cache_free(dev, mr);
1557
1558        return 0;
1559}
1560
1561int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
1562{
1563        struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1564        struct mlx5_ib_mr *mr = to_mmr(ibmr);
1565
1566        return dereg_mr(dev, mr);
1567}
1568
1569struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
1570                               enum ib_mr_type mr_type,
1571                               u32 max_num_sg)
1572{
1573        struct mlx5_ib_dev *dev = to_mdev(pd->device);
1574        int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1575        int ndescs = ALIGN(max_num_sg, 4);
1576        struct mlx5_ib_mr *mr;
1577        void *mkc;
1578        u32 *in;
1579        int err;
1580
1581        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1582        if (!mr)
1583                return ERR_PTR(-ENOMEM);
1584
1585        in = kzalloc(inlen, GFP_KERNEL);
1586        if (!in) {
1587                err = -ENOMEM;
1588                goto err_free;
1589        }
1590
1591        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1592        MLX5_SET(mkc, mkc, free, 1);
1593        MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1594        MLX5_SET(mkc, mkc, qpn, 0xffffff);
1595        MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1596
1597        if (mr_type == IB_MR_TYPE_MEM_REG) {
1598                mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
1599                MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
1600                err = mlx5_alloc_priv_descs(pd->device, mr,
1601                                            ndescs, sizeof(struct mlx5_mtt));
1602                if (err)
1603                        goto err_free_in;
1604
1605                mr->desc_size = sizeof(struct mlx5_mtt);
1606                mr->max_descs = ndescs;
1607        } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
1608                mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
1609
1610                err = mlx5_alloc_priv_descs(pd->device, mr,
1611                                            ndescs, sizeof(struct mlx5_klm));
1612                if (err)
1613                        goto err_free_in;
1614                mr->desc_size = sizeof(struct mlx5_klm);
1615                mr->max_descs = ndescs;
1616        } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
1617                u32 psv_index[2];
1618
1619                MLX5_SET(mkc, mkc, bsf_en, 1);
1620                MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
1621                mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1622                if (!mr->sig) {
1623                        err = -ENOMEM;
1624                        goto err_free_in;
1625                }
1626
1627                /* create mem & wire PSVs */
1628                err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
1629                                           2, psv_index);
1630                if (err)
1631                        goto err_free_sig;
1632
1633                mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
1634                mr->sig->psv_memory.psv_idx = psv_index[0];
1635                mr->sig->psv_wire.psv_idx = psv_index[1];
1636
1637                mr->sig->sig_status_checked = true;
1638                mr->sig->sig_err_exists = false;
1639                /* Next UMR, Arm SIGERR */
1640                ++mr->sig->sigerr_count;
1641        } else {
1642                mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
1643                err = -EINVAL;
1644                goto err_free_in;
1645        }
1646
1647        MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
1648        MLX5_SET(mkc, mkc, umr_en, 1);
1649
1650        mr->ibmr.device = pd->device;
1651        err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
1652        if (err)
1653                goto err_destroy_psv;
1654
1655        mr->mmkey.type = MLX5_MKEY_MR;
1656        mr->ibmr.lkey = mr->mmkey.key;
1657        mr->ibmr.rkey = mr->mmkey.key;
1658        mr->umem = NULL;
1659        kfree(in);
1660
1661        return &mr->ibmr;
1662
1663err_destroy_psv:
1664        if (mr->sig) {
1665                if (mlx5_core_destroy_psv(dev->mdev,
1666                                          mr->sig->psv_memory.psv_idx))
1667                        mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1668                                     mr->sig->psv_memory.psv_idx);
1669                if (mlx5_core_destroy_psv(dev->mdev,
1670                                          mr->sig->psv_wire.psv_idx))
1671                        mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1672                                     mr->sig->psv_wire.psv_idx);
1673        }
1674        mlx5_free_priv_descs(mr);
1675err_free_sig:
1676        kfree(mr->sig);
1677err_free_in:
1678        kfree(in);
1679err_free:
1680        kfree(mr);
1681        return ERR_PTR(err);
1682}
1683
1684struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
1685                               struct ib_udata *udata)
1686{
1687        struct mlx5_ib_dev *dev = to_mdev(pd->device);
1688        int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1689        struct mlx5_ib_mw *mw = NULL;
1690        u32 *in = NULL;
1691        void *mkc;
1692        int ndescs;
1693        int err;
1694        struct mlx5_ib_alloc_mw req = {};
1695        struct {
1696                __u32   comp_mask;
1697                __u32   response_length;
1698        } resp = {};
1699
1700        err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
1701        if (err)
1702                return ERR_PTR(err);
1703
1704        if (req.comp_mask || req.reserved1 || req.reserved2)
1705                return ERR_PTR(-EOPNOTSUPP);
1706
1707        if (udata->inlen > sizeof(req) &&
1708            !ib_is_udata_cleared(udata, sizeof(req),
1709                                 udata->inlen - sizeof(req)))
1710                return ERR_PTR(-EOPNOTSUPP);
1711
1712        ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
1713
1714        mw = kzalloc(sizeof(*mw), GFP_KERNEL);
1715        in = kzalloc(inlen, GFP_KERNEL);
1716        if (!mw || !in) {
1717                err = -ENOMEM;
1718                goto free;
1719        }
1720
1721        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1722
1723        MLX5_SET(mkc, mkc, free, 1);
1724        MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1725        MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1726        MLX5_SET(mkc, mkc, umr_en, 1);
1727        MLX5_SET(mkc, mkc, lr, 1);
1728        MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_KLMS);
1729        MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
1730        MLX5_SET(mkc, mkc, qpn, 0xffffff);
1731
1732        err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen);
1733        if (err)
1734                goto free;
1735
1736        mw->mmkey.type = MLX5_MKEY_MW;
1737        mw->ibmw.rkey = mw->mmkey.key;
1738        mw->ndescs = ndescs;
1739
1740        resp.response_length = min(offsetof(typeof(resp), response_length) +
1741                                   sizeof(resp.response_length), udata->outlen);
1742        if (resp.response_length) {
1743                err = ib_copy_to_udata(udata, &resp, resp.response_length);
1744                if (err) {
1745                        mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
1746                        goto free;
1747                }
1748        }
1749
1750        kfree(in);
1751        return &mw->ibmw;
1752
1753free:
1754        kfree(mw);
1755        kfree(in);
1756        return ERR_PTR(err);
1757}
1758
1759int mlx5_ib_dealloc_mw(struct ib_mw *mw)
1760{
1761        struct mlx5_ib_mw *mmw = to_mmw(mw);
1762        int err;
1763
1764        err =  mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
1765                                      &mmw->mmkey);
1766        if (!err)
1767                kfree(mmw);
1768        return err;
1769}
1770
1771int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1772                            struct ib_mr_status *mr_status)
1773{
1774        struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1775        int ret = 0;
1776
1777        if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
1778                pr_err("Invalid status check mask\n");
1779                ret = -EINVAL;
1780                goto done;
1781        }
1782
1783        mr_status->fail_status = 0;
1784        if (check_mask & IB_MR_CHECK_SIG_STATUS) {
1785                if (!mmr->sig) {
1786                        ret = -EINVAL;
1787                        pr_err("signature status check requested on a non-signature enabled MR\n");
1788                        goto done;
1789                }
1790
1791                mmr->sig->sig_status_checked = true;
1792                if (!mmr->sig->sig_err_exists)
1793                        goto done;
1794
1795                if (ibmr->lkey == mmr->sig->err_item.key)
1796                        memcpy(&mr_status->sig_err, &mmr->sig->err_item,
1797                               sizeof(mr_status->sig_err));
1798                else {
1799                        mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
1800                        mr_status->sig_err.sig_err_offset = 0;
1801                        mr_status->sig_err.key = mmr->sig->err_item.key;
1802                }
1803
1804                mmr->sig->sig_err_exists = false;
1805                mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
1806        }
1807
1808done:
1809        return ret;
1810}
1811
1812static int
1813mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
1814                   struct scatterlist *sgl,
1815                   unsigned short sg_nents,
1816                   unsigned int *sg_offset_p)
1817{
1818        struct scatterlist *sg = sgl;
1819        struct mlx5_klm *klms = mr->descs;
1820        unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1821        u32 lkey = mr->ibmr.pd->local_dma_lkey;
1822        int i;
1823
1824        mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
1825        mr->ibmr.length = 0;
1826
1827        for_each_sg(sgl, sg, sg_nents, i) {
1828                if (unlikely(i >= mr->max_descs))
1829                        break;
1830                klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
1831                klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
1832                klms[i].key = cpu_to_be32(lkey);
1833                mr->ibmr.length += sg_dma_len(sg) - sg_offset;
1834
1835                sg_offset = 0;
1836        }
1837        mr->ndescs = i;
1838
1839        if (sg_offset_p)
1840                *sg_offset_p = sg_offset;
1841
1842        return i;
1843}
1844
1845static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
1846{
1847        struct mlx5_ib_mr *mr = to_mmr(ibmr);
1848        __be64 *descs;
1849
1850        if (unlikely(mr->ndescs == mr->max_descs))
1851                return -ENOMEM;
1852
1853        descs = mr->descs;
1854        descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
1855
1856        return 0;
1857}
1858
1859int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
1860                      unsigned int *sg_offset)
1861{
1862        struct mlx5_ib_mr *mr = to_mmr(ibmr);
1863        int n;
1864
1865        mr->ndescs = 0;
1866
1867        ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
1868                                   mr->desc_size * mr->max_descs,
1869                                   DMA_TO_DEVICE);
1870
1871        if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
1872                n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
1873        else
1874                n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
1875                                mlx5_set_page);
1876
1877        ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
1878                                      mr->desc_size * mr->max_descs,
1879                                      DMA_TO_DEVICE);
1880
1881        return n;
1882}
1883