linux/drivers/infiniband/hw/mlx5/mr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33
  34#include <linux/kref.h>
  35#include <linux/random.h>
  36#include <linux/debugfs.h>
  37#include <linux/export.h>
  38#include <linux/delay.h>
  39#include <rdma/ib_umem.h>
  40#include <rdma/ib_umem_odp.h>
  41#include <rdma/ib_verbs.h>
  42#include "mlx5_ib.h"
  43
  44enum {
  45        MAX_PENDING_REG_MR = 8,
  46};
  47
  48#define MLX5_UMR_ALIGN 2048
  49#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  50static __be64 mlx5_ib_update_mtt_emergency_buffer[
  51                MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
  52        __aligned(MLX5_UMR_ALIGN);
  53static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
  54#endif
  55
  56static int clean_mr(struct mlx5_ib_mr *mr);
  57
  58static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  59{
  60        int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
  61
  62#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  63        /* Wait until all page fault handlers using the mr complete. */
  64        synchronize_srcu(&dev->mr_srcu);
  65#endif
  66
  67        return err;
  68}
  69
  70static int order2idx(struct mlx5_ib_dev *dev, int order)
  71{
  72        struct mlx5_mr_cache *cache = &dev->cache;
  73
  74        if (order < cache->ent[0].order)
  75                return 0;
  76        else
  77                return order - cache->ent[0].order;
  78}
  79
  80static void reg_mr_callback(int status, void *context)
  81{
  82        struct mlx5_ib_mr *mr = context;
  83        struct mlx5_ib_dev *dev = mr->dev;
  84        struct mlx5_mr_cache *cache = &dev->cache;
  85        int c = order2idx(dev, mr->order);
  86        struct mlx5_cache_ent *ent = &cache->ent[c];
  87        u8 key;
  88        unsigned long flags;
  89        struct mlx5_mr_table *table = &dev->mdev->priv.mr_table;
  90        int err;
  91
  92        spin_lock_irqsave(&ent->lock, flags);
  93        ent->pending--;
  94        spin_unlock_irqrestore(&ent->lock, flags);
  95        if (status) {
  96                mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
  97                kfree(mr);
  98                dev->fill_delay = 1;
  99                mod_timer(&dev->delay_timer, jiffies + HZ);
 100                return;
 101        }
 102
 103        if (mr->out.hdr.status) {
 104                mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
 105                             mr->out.hdr.status,
 106                             be32_to_cpu(mr->out.hdr.syndrome));
 107                kfree(mr);
 108                dev->fill_delay = 1;
 109                mod_timer(&dev->delay_timer, jiffies + HZ);
 110                return;
 111        }
 112
 113        spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
 114        key = dev->mdev->priv.mkey_key++;
 115        spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
 116        mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
 117
 118        cache->last_add = jiffies;
 119
 120        spin_lock_irqsave(&ent->lock, flags);
 121        list_add_tail(&mr->list, &ent->head);
 122        ent->cur++;
 123        ent->size++;
 124        spin_unlock_irqrestore(&ent->lock, flags);
 125
 126        write_lock_irqsave(&table->lock, flags);
 127        err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key),
 128                                &mr->mmr);
 129        if (err)
 130                pr_err("Error inserting to mr tree. 0x%x\n", -err);
 131        write_unlock_irqrestore(&table->lock, flags);
 132}
 133
 134static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 135{
 136        struct mlx5_mr_cache *cache = &dev->cache;
 137        struct mlx5_cache_ent *ent = &cache->ent[c];
 138        struct mlx5_create_mkey_mbox_in *in;
 139        struct mlx5_ib_mr *mr;
 140        int npages = 1 << ent->order;
 141        int err = 0;
 142        int i;
 143
 144        in = kzalloc(sizeof(*in), GFP_KERNEL);
 145        if (!in)
 146                return -ENOMEM;
 147
 148        for (i = 0; i < num; i++) {
 149                if (ent->pending >= MAX_PENDING_REG_MR) {
 150                        err = -EAGAIN;
 151                        break;
 152                }
 153
 154                mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 155                if (!mr) {
 156                        err = -ENOMEM;
 157                        break;
 158                }
 159                mr->order = ent->order;
 160                mr->umred = 1;
 161                mr->dev = dev;
 162                in->seg.status = MLX5_MKEY_STATUS_FREE;
 163                in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
 164                in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 165                in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
 166                in->seg.log2_page_size = 12;
 167
 168                spin_lock_irq(&ent->lock);
 169                ent->pending++;
 170                spin_unlock_irq(&ent->lock);
 171                err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in,
 172                                            sizeof(*in), reg_mr_callback,
 173                                            mr, &mr->out);
 174                if (err) {
 175                        spin_lock_irq(&ent->lock);
 176                        ent->pending--;
 177                        spin_unlock_irq(&ent->lock);
 178                        mlx5_ib_warn(dev, "create mkey failed %d\n", err);
 179                        kfree(mr);
 180                        break;
 181                }
 182        }
 183
 184        kfree(in);
 185        return err;
 186}
 187
 188static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
 189{
 190        struct mlx5_mr_cache *cache = &dev->cache;
 191        struct mlx5_cache_ent *ent = &cache->ent[c];
 192        struct mlx5_ib_mr *mr;
 193        int err;
 194        int i;
 195
 196        for (i = 0; i < num; i++) {
 197                spin_lock_irq(&ent->lock);
 198                if (list_empty(&ent->head)) {
 199                        spin_unlock_irq(&ent->lock);
 200                        return;
 201                }
 202                mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 203                list_del(&mr->list);
 204                ent->cur--;
 205                ent->size--;
 206                spin_unlock_irq(&ent->lock);
 207                err = destroy_mkey(dev, mr);
 208                if (err)
 209                        mlx5_ib_warn(dev, "failed destroy mkey\n");
 210                else
 211                        kfree(mr);
 212        }
 213}
 214
 215static ssize_t size_write(struct file *filp, const char __user *buf,
 216                          size_t count, loff_t *pos)
 217{
 218        struct mlx5_cache_ent *ent = filp->private_data;
 219        struct mlx5_ib_dev *dev = ent->dev;
 220        char lbuf[20];
 221        u32 var;
 222        int err;
 223        int c;
 224
 225        if (copy_from_user(lbuf, buf, sizeof(lbuf)))
 226                return -EFAULT;
 227
 228        c = order2idx(dev, ent->order);
 229        lbuf[sizeof(lbuf) - 1] = 0;
 230
 231        if (sscanf(lbuf, "%u", &var) != 1)
 232                return -EINVAL;
 233
 234        if (var < ent->limit)
 235                return -EINVAL;
 236
 237        if (var > ent->size) {
 238                do {
 239                        err = add_keys(dev, c, var - ent->size);
 240                        if (err && err != -EAGAIN)
 241                                return err;
 242
 243                        usleep_range(3000, 5000);
 244                } while (err);
 245        } else if (var < ent->size) {
 246                remove_keys(dev, c, ent->size - var);
 247        }
 248
 249        return count;
 250}
 251
 252static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
 253                         loff_t *pos)
 254{
 255        struct mlx5_cache_ent *ent = filp->private_data;
 256        char lbuf[20];
 257        int err;
 258
 259        if (*pos)
 260                return 0;
 261
 262        err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
 263        if (err < 0)
 264                return err;
 265
 266        if (copy_to_user(buf, lbuf, err))
 267                return -EFAULT;
 268
 269        *pos += err;
 270
 271        return err;
 272}
 273
 274static const struct file_operations size_fops = {
 275        .owner  = THIS_MODULE,
 276        .open   = simple_open,
 277        .write  = size_write,
 278        .read   = size_read,
 279};
 280
 281static ssize_t limit_write(struct file *filp, const char __user *buf,
 282                           size_t count, loff_t *pos)
 283{
 284        struct mlx5_cache_ent *ent = filp->private_data;
 285        struct mlx5_ib_dev *dev = ent->dev;
 286        char lbuf[20];
 287        u32 var;
 288        int err;
 289        int c;
 290
 291        if (copy_from_user(lbuf, buf, sizeof(lbuf)))
 292                return -EFAULT;
 293
 294        c = order2idx(dev, ent->order);
 295        lbuf[sizeof(lbuf) - 1] = 0;
 296
 297        if (sscanf(lbuf, "%u", &var) != 1)
 298                return -EINVAL;
 299
 300        if (var > ent->size)
 301                return -EINVAL;
 302
 303        ent->limit = var;
 304
 305        if (ent->cur < ent->limit) {
 306                err = add_keys(dev, c, 2 * ent->limit - ent->cur);
 307                if (err)
 308                        return err;
 309        }
 310
 311        return count;
 312}
 313
 314static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
 315                          loff_t *pos)
 316{
 317        struct mlx5_cache_ent *ent = filp->private_data;
 318        char lbuf[20];
 319        int err;
 320
 321        if (*pos)
 322                return 0;
 323
 324        err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
 325        if (err < 0)
 326                return err;
 327
 328        if (copy_to_user(buf, lbuf, err))
 329                return -EFAULT;
 330
 331        *pos += err;
 332
 333        return err;
 334}
 335
 336static const struct file_operations limit_fops = {
 337        .owner  = THIS_MODULE,
 338        .open   = simple_open,
 339        .write  = limit_write,
 340        .read   = limit_read,
 341};
 342
 343static int someone_adding(struct mlx5_mr_cache *cache)
 344{
 345        int i;
 346
 347        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 348                if (cache->ent[i].cur < cache->ent[i].limit)
 349                        return 1;
 350        }
 351
 352        return 0;
 353}
 354
 355static void __cache_work_func(struct mlx5_cache_ent *ent)
 356{
 357        struct mlx5_ib_dev *dev = ent->dev;
 358        struct mlx5_mr_cache *cache = &dev->cache;
 359        int i = order2idx(dev, ent->order);
 360        int err;
 361
 362        if (cache->stopped)
 363                return;
 364
 365        ent = &dev->cache.ent[i];
 366        if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
 367                err = add_keys(dev, i, 1);
 368                if (ent->cur < 2 * ent->limit) {
 369                        if (err == -EAGAIN) {
 370                                mlx5_ib_dbg(dev, "returned eagain, order %d\n",
 371                                            i + 2);
 372                                queue_delayed_work(cache->wq, &ent->dwork,
 373                                                   msecs_to_jiffies(3));
 374                        } else if (err) {
 375                                mlx5_ib_warn(dev, "command failed order %d, err %d\n",
 376                                             i + 2, err);
 377                                queue_delayed_work(cache->wq, &ent->dwork,
 378                                                   msecs_to_jiffies(1000));
 379                        } else {
 380                                queue_work(cache->wq, &ent->work);
 381                        }
 382                }
 383        } else if (ent->cur > 2 * ent->limit) {
 384                /*
 385                 * The remove_keys() logic is performed as garbage collection
 386                 * task. Such task is intended to be run when no other active
 387                 * processes are running.
 388                 *
 389                 * The need_resched() will return TRUE if there are user tasks
 390                 * to be activated in near future.
 391                 *
 392                 * In such case, we don't execute remove_keys() and postpone
 393                 * the garbage collection work to try to run in next cycle,
 394                 * in order to free CPU resources to other tasks.
 395                 */
 396                if (!need_resched() && !someone_adding(cache) &&
 397                    time_after(jiffies, cache->last_add + 300 * HZ)) {
 398                        remove_keys(dev, i, 1);
 399                        if (ent->cur > ent->limit)
 400                                queue_work(cache->wq, &ent->work);
 401                } else {
 402                        queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
 403                }
 404        }
 405}
 406
 407static void delayed_cache_work_func(struct work_struct *work)
 408{
 409        struct mlx5_cache_ent *ent;
 410
 411        ent = container_of(work, struct mlx5_cache_ent, dwork.work);
 412        __cache_work_func(ent);
 413}
 414
 415static void cache_work_func(struct work_struct *work)
 416{
 417        struct mlx5_cache_ent *ent;
 418
 419        ent = container_of(work, struct mlx5_cache_ent, work);
 420        __cache_work_func(ent);
 421}
 422
 423static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
 424{
 425        struct mlx5_mr_cache *cache = &dev->cache;
 426        struct mlx5_ib_mr *mr = NULL;
 427        struct mlx5_cache_ent *ent;
 428        int c;
 429        int i;
 430
 431        c = order2idx(dev, order);
 432        if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
 433                mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
 434                return NULL;
 435        }
 436
 437        for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
 438                ent = &cache->ent[i];
 439
 440                mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
 441
 442                spin_lock_irq(&ent->lock);
 443                if (!list_empty(&ent->head)) {
 444                        mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
 445                                              list);
 446                        list_del(&mr->list);
 447                        ent->cur--;
 448                        spin_unlock_irq(&ent->lock);
 449                        if (ent->cur < ent->limit)
 450                                queue_work(cache->wq, &ent->work);
 451                        break;
 452                }
 453                spin_unlock_irq(&ent->lock);
 454
 455                queue_work(cache->wq, &ent->work);
 456        }
 457
 458        if (!mr)
 459                cache->ent[c].miss++;
 460
 461        return mr;
 462}
 463
 464static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 465{
 466        struct mlx5_mr_cache *cache = &dev->cache;
 467        struct mlx5_cache_ent *ent;
 468        int shrink = 0;
 469        int c;
 470
 471        c = order2idx(dev, mr->order);
 472        if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
 473                mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
 474                return;
 475        }
 476        ent = &cache->ent[c];
 477        spin_lock_irq(&ent->lock);
 478        list_add_tail(&mr->list, &ent->head);
 479        ent->cur++;
 480        if (ent->cur > 2 * ent->limit)
 481                shrink = 1;
 482        spin_unlock_irq(&ent->lock);
 483
 484        if (shrink)
 485                queue_work(cache->wq, &ent->work);
 486}
 487
 488static void clean_keys(struct mlx5_ib_dev *dev, int c)
 489{
 490        struct mlx5_mr_cache *cache = &dev->cache;
 491        struct mlx5_cache_ent *ent = &cache->ent[c];
 492        struct mlx5_ib_mr *mr;
 493        int err;
 494
 495        cancel_delayed_work(&ent->dwork);
 496        while (1) {
 497                spin_lock_irq(&ent->lock);
 498                if (list_empty(&ent->head)) {
 499                        spin_unlock_irq(&ent->lock);
 500                        return;
 501                }
 502                mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 503                list_del(&mr->list);
 504                ent->cur--;
 505                ent->size--;
 506                spin_unlock_irq(&ent->lock);
 507                err = destroy_mkey(dev, mr);
 508                if (err)
 509                        mlx5_ib_warn(dev, "failed destroy mkey\n");
 510                else
 511                        kfree(mr);
 512        }
 513}
 514
 515static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
 516{
 517        struct mlx5_mr_cache *cache = &dev->cache;
 518        struct mlx5_cache_ent *ent;
 519        int i;
 520
 521        if (!mlx5_debugfs_root)
 522                return 0;
 523
 524        cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
 525        if (!cache->root)
 526                return -ENOMEM;
 527
 528        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 529                ent = &cache->ent[i];
 530                sprintf(ent->name, "%d", ent->order);
 531                ent->dir = debugfs_create_dir(ent->name,  cache->root);
 532                if (!ent->dir)
 533                        return -ENOMEM;
 534
 535                ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
 536                                                 &size_fops);
 537                if (!ent->fsize)
 538                        return -ENOMEM;
 539
 540                ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
 541                                                  &limit_fops);
 542                if (!ent->flimit)
 543                        return -ENOMEM;
 544
 545                ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
 546                                               &ent->cur);
 547                if (!ent->fcur)
 548                        return -ENOMEM;
 549
 550                ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
 551                                                &ent->miss);
 552                if (!ent->fmiss)
 553                        return -ENOMEM;
 554        }
 555
 556        return 0;
 557}
 558
 559static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
 560{
 561        if (!mlx5_debugfs_root)
 562                return;
 563
 564        debugfs_remove_recursive(dev->cache.root);
 565}
 566
 567static void delay_time_func(unsigned long ctx)
 568{
 569        struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
 570
 571        dev->fill_delay = 0;
 572}
 573
 574int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 575{
 576        struct mlx5_mr_cache *cache = &dev->cache;
 577        struct mlx5_cache_ent *ent;
 578        int limit;
 579        int err;
 580        int i;
 581
 582        cache->wq = create_singlethread_workqueue("mkey_cache");
 583        if (!cache->wq) {
 584                mlx5_ib_warn(dev, "failed to create work queue\n");
 585                return -ENOMEM;
 586        }
 587
 588        setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
 589        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 590                INIT_LIST_HEAD(&cache->ent[i].head);
 591                spin_lock_init(&cache->ent[i].lock);
 592
 593                ent = &cache->ent[i];
 594                INIT_LIST_HEAD(&ent->head);
 595                spin_lock_init(&ent->lock);
 596                ent->order = i + 2;
 597                ent->dev = dev;
 598
 599                if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE)
 600                        limit = dev->mdev->profile->mr_cache[i].limit;
 601                else
 602                        limit = 0;
 603
 604                INIT_WORK(&ent->work, cache_work_func);
 605                INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
 606                ent->limit = limit;
 607                queue_work(cache->wq, &ent->work);
 608        }
 609
 610        err = mlx5_mr_cache_debugfs_init(dev);
 611        if (err)
 612                mlx5_ib_warn(dev, "cache debugfs failure\n");
 613
 614        return 0;
 615}
 616
 617int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 618{
 619        int i;
 620
 621        dev->cache.stopped = 1;
 622        flush_workqueue(dev->cache.wq);
 623
 624        mlx5_mr_cache_debugfs_cleanup(dev);
 625
 626        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
 627                clean_keys(dev, i);
 628
 629        destroy_workqueue(dev->cache.wq);
 630        del_timer_sync(&dev->delay_timer);
 631
 632        return 0;
 633}
 634
 635struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
 636{
 637        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 638        struct mlx5_core_dev *mdev = dev->mdev;
 639        struct mlx5_create_mkey_mbox_in *in;
 640        struct mlx5_mkey_seg *seg;
 641        struct mlx5_ib_mr *mr;
 642        int err;
 643
 644        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 645        if (!mr)
 646                return ERR_PTR(-ENOMEM);
 647
 648        in = kzalloc(sizeof(*in), GFP_KERNEL);
 649        if (!in) {
 650                err = -ENOMEM;
 651                goto err_free;
 652        }
 653
 654        seg = &in->seg;
 655        seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
 656        seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
 657        seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 658        seg->start_addr = 0;
 659
 660        err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
 661                                    NULL);
 662        if (err)
 663                goto err_in;
 664
 665        kfree(in);
 666        mr->ibmr.lkey = mr->mmr.key;
 667        mr->ibmr.rkey = mr->mmr.key;
 668        mr->umem = NULL;
 669
 670        return &mr->ibmr;
 671
 672err_in:
 673        kfree(in);
 674
 675err_free:
 676        kfree(mr);
 677
 678        return ERR_PTR(err);
 679}
 680
 681static int get_octo_len(u64 addr, u64 len, int page_size)
 682{
 683        u64 offset;
 684        int npages;
 685
 686        offset = addr & (page_size - 1);
 687        npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
 688        return (npages + 1) / 2;
 689}
 690
 691static int use_umr(int order)
 692{
 693        return order <= MLX5_MAX_UMR_SHIFT;
 694}
 695
 696static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
 697                             struct ib_sge *sg, u64 dma, int n, u32 key,
 698                             int page_shift, u64 virt_addr, u64 len,
 699                             int access_flags)
 700{
 701        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 702        struct mlx5_umr_wr *umrwr = umr_wr(wr);
 703
 704        sg->addr = dma;
 705        sg->length = ALIGN(sizeof(u64) * n, 64);
 706        sg->lkey = dev->umrc.pd->local_dma_lkey;
 707
 708        wr->next = NULL;
 709        wr->send_flags = 0;
 710        wr->sg_list = sg;
 711        if (n)
 712                wr->num_sge = 1;
 713        else
 714                wr->num_sge = 0;
 715
 716        wr->opcode = MLX5_IB_WR_UMR;
 717
 718        umrwr->npages = n;
 719        umrwr->page_shift = page_shift;
 720        umrwr->mkey = key;
 721        umrwr->target.virt_addr = virt_addr;
 722        umrwr->length = len;
 723        umrwr->access_flags = access_flags;
 724        umrwr->pd = pd;
 725}
 726
 727static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
 728                               struct ib_send_wr *wr, u32 key)
 729{
 730        struct mlx5_umr_wr *umrwr = umr_wr(wr);
 731
 732        wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
 733        wr->opcode = MLX5_IB_WR_UMR;
 734        umrwr->mkey = key;
 735}
 736
 737void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
 738{
 739        struct mlx5_ib_umr_context *context;
 740        struct ib_wc wc;
 741        int err;
 742
 743        while (1) {
 744                err = ib_poll_cq(cq, 1, &wc);
 745                if (err < 0) {
 746                        pr_warn("poll cq error %d\n", err);
 747                        return;
 748                }
 749                if (err == 0)
 750                        break;
 751
 752                context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id;
 753                context->status = wc.status;
 754                complete(&context->done);
 755        }
 756        ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 757}
 758
 759static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 760                                  u64 virt_addr, u64 len, int npages,
 761                                  int page_shift, int order, int access_flags)
 762{
 763        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 764        struct device *ddev = dev->ib_dev.dma_device;
 765        struct umr_common *umrc = &dev->umrc;
 766        struct mlx5_ib_umr_context umr_context;
 767        struct mlx5_umr_wr umrwr;
 768        struct ib_send_wr *bad;
 769        struct mlx5_ib_mr *mr;
 770        struct ib_sge sg;
 771        int size;
 772        __be64 *mr_pas;
 773        __be64 *pas;
 774        dma_addr_t dma;
 775        int err = 0;
 776        int i;
 777
 778        for (i = 0; i < 1; i++) {
 779                mr = alloc_cached_mr(dev, order);
 780                if (mr)
 781                        break;
 782
 783                err = add_keys(dev, order2idx(dev, order), 1);
 784                if (err && err != -EAGAIN) {
 785                        mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
 786                        break;
 787                }
 788        }
 789
 790        if (!mr)
 791                return ERR_PTR(-EAGAIN);
 792
 793        /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
 794         * To avoid copying garbage after the pas array, we allocate
 795         * a little more. */
 796        size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
 797        mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
 798        if (!mr_pas) {
 799                err = -ENOMEM;
 800                goto free_mr;
 801        }
 802
 803        pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN);
 804        mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
 805        /* Clear padding after the actual pages. */
 806        memset(pas + npages, 0, size - npages * sizeof(u64));
 807
 808        dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
 809        if (dma_mapping_error(ddev, dma)) {
 810                err = -ENOMEM;
 811                goto free_pas;
 812        }
 813
 814        memset(&umrwr, 0, sizeof(umrwr));
 815        umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
 816        prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key,
 817                         page_shift, virt_addr, len, access_flags);
 818
 819        mlx5_ib_init_umr_context(&umr_context);
 820        down(&umrc->sem);
 821        err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
 822        if (err) {
 823                mlx5_ib_warn(dev, "post send failed, err %d\n", err);
 824                goto unmap_dma;
 825        } else {
 826                wait_for_completion(&umr_context.done);
 827                if (umr_context.status != IB_WC_SUCCESS) {
 828                        mlx5_ib_warn(dev, "reg umr failed\n");
 829                        err = -EFAULT;
 830                }
 831        }
 832
 833        mr->mmr.iova = virt_addr;
 834        mr->mmr.size = len;
 835        mr->mmr.pd = to_mpd(pd)->pdn;
 836
 837        mr->live = 1;
 838
 839unmap_dma:
 840        up(&umrc->sem);
 841        dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
 842
 843free_pas:
 844        kfree(mr_pas);
 845
 846free_mr:
 847        if (err) {
 848                free_cached_mr(dev, mr);
 849                return ERR_PTR(err);
 850        }
 851
 852        return mr;
 853}
 854
 855#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 856int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
 857                       int zap)
 858{
 859        struct mlx5_ib_dev *dev = mr->dev;
 860        struct device *ddev = dev->ib_dev.dma_device;
 861        struct umr_common *umrc = &dev->umrc;
 862        struct mlx5_ib_umr_context umr_context;
 863        struct ib_umem *umem = mr->umem;
 864        int size;
 865        __be64 *pas;
 866        dma_addr_t dma;
 867        struct ib_send_wr *bad;
 868        struct mlx5_umr_wr wr;
 869        struct ib_sge sg;
 870        int err = 0;
 871        const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
 872        const int page_index_mask = page_index_alignment - 1;
 873        size_t pages_mapped = 0;
 874        size_t pages_to_map = 0;
 875        size_t pages_iter = 0;
 876        int use_emergency_buf = 0;
 877
 878        /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
 879         * so we need to align the offset and length accordingly */
 880        if (start_page_index & page_index_mask) {
 881                npages += start_page_index & page_index_mask;
 882                start_page_index &= ~page_index_mask;
 883        }
 884
 885        pages_to_map = ALIGN(npages, page_index_alignment);
 886
 887        if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
 888                return -EINVAL;
 889
 890        size = sizeof(u64) * pages_to_map;
 891        size = min_t(int, PAGE_SIZE, size);
 892        /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
 893         * code, when we are called from an invalidation. The pas buffer must
 894         * be 2k-aligned for Connect-IB. */
 895        pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
 896        if (!pas) {
 897                mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
 898                pas = mlx5_ib_update_mtt_emergency_buffer;
 899                size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
 900                use_emergency_buf = 1;
 901                mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
 902                memset(pas, 0, size);
 903        }
 904        pages_iter = size / sizeof(u64);
 905        dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
 906        if (dma_mapping_error(ddev, dma)) {
 907                mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
 908                err = -ENOMEM;
 909                goto free_pas;
 910        }
 911
 912        for (pages_mapped = 0;
 913             pages_mapped < pages_to_map && !err;
 914             pages_mapped += pages_iter, start_page_index += pages_iter) {
 915                dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
 916
 917                npages = min_t(size_t,
 918                               pages_iter,
 919                               ib_umem_num_pages(umem) - start_page_index);
 920
 921                if (!zap) {
 922                        __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
 923                                               start_page_index, npages, pas,
 924                                               MLX5_IB_MTT_PRESENT);
 925                        /* Clear padding after the pages brought from the
 926                         * umem. */
 927                        memset(pas + npages, 0, size - npages * sizeof(u64));
 928                }
 929
 930                dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
 931
 932                memset(&wr, 0, sizeof(wr));
 933                wr.wr.wr_id = (u64)(unsigned long)&umr_context;
 934
 935                sg.addr = dma;
 936                sg.length = ALIGN(npages * sizeof(u64),
 937                                MLX5_UMR_MTT_ALIGNMENT);
 938                sg.lkey = dev->umrc.pd->local_dma_lkey;
 939
 940                wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
 941                                MLX5_IB_SEND_UMR_UPDATE_MTT;
 942                wr.wr.sg_list = &sg;
 943                wr.wr.num_sge = 1;
 944                wr.wr.opcode = MLX5_IB_WR_UMR;
 945                wr.npages = sg.length / sizeof(u64);
 946                wr.page_shift = PAGE_SHIFT;
 947                wr.mkey = mr->mmr.key;
 948                wr.target.offset = start_page_index;
 949
 950                mlx5_ib_init_umr_context(&umr_context);
 951                down(&umrc->sem);
 952                err = ib_post_send(umrc->qp, &wr.wr, &bad);
 953                if (err) {
 954                        mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
 955                } else {
 956                        wait_for_completion(&umr_context.done);
 957                        if (umr_context.status != IB_WC_SUCCESS) {
 958                                mlx5_ib_err(dev, "UMR completion failed, code %d\n",
 959                                            umr_context.status);
 960                                err = -EFAULT;
 961                        }
 962                }
 963                up(&umrc->sem);
 964        }
 965        dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
 966
 967free_pas:
 968        if (!use_emergency_buf)
 969                free_page((unsigned long)pas);
 970        else
 971                mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
 972
 973        return err;
 974}
 975#endif
 976
 977static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 978                                     u64 length, struct ib_umem *umem,
 979                                     int npages, int page_shift,
 980                                     int access_flags)
 981{
 982        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 983        struct mlx5_create_mkey_mbox_in *in;
 984        struct mlx5_ib_mr *mr;
 985        int inlen;
 986        int err;
 987        bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
 988
 989        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 990        if (!mr)
 991                return ERR_PTR(-ENOMEM);
 992
 993        inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
 994        in = mlx5_vzalloc(inlen);
 995        if (!in) {
 996                err = -ENOMEM;
 997                goto err_1;
 998        }
 999        mlx5_ib_populate_pas(dev, umem, page_shift, in->pas,
1000                             pg_cap ? MLX5_IB_MTT_PRESENT : 0);
1001
1002        /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
1003         * in the page list submitted with the command. */
1004        in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0;
1005        in->seg.flags = convert_access(access_flags) |
1006                MLX5_ACCESS_MODE_MTT;
1007        in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
1008        in->seg.start_addr = cpu_to_be64(virt_addr);
1009        in->seg.len = cpu_to_be64(length);
1010        in->seg.bsfs_octo_size = 0;
1011        in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
1012        in->seg.log2_page_size = page_shift;
1013        in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1014        in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
1015                                                         1 << page_shift));
1016        err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL,
1017                                    NULL, NULL);
1018        if (err) {
1019                mlx5_ib_warn(dev, "create mkey failed\n");
1020                goto err_2;
1021        }
1022        mr->umem = umem;
1023        mr->dev = dev;
1024        mr->live = 1;
1025        kvfree(in);
1026
1027        mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
1028
1029        return mr;
1030
1031err_2:
1032        kvfree(in);
1033
1034err_1:
1035        kfree(mr);
1036
1037        return ERR_PTR(err);
1038}
1039
1040struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1041                                  u64 virt_addr, int access_flags,
1042                                  struct ib_udata *udata)
1043{
1044        struct mlx5_ib_dev *dev = to_mdev(pd->device);
1045        struct mlx5_ib_mr *mr = NULL;
1046        struct ib_umem *umem;
1047        int page_shift;
1048        int npages;
1049        int ncont;
1050        int order;
1051        int err;
1052
1053        mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1054                    start, virt_addr, length, access_flags);
1055        umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
1056                           0);
1057        if (IS_ERR(umem)) {
1058                mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
1059                return (void *)umem;
1060        }
1061
1062        mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
1063        if (!npages) {
1064                mlx5_ib_warn(dev, "avoid zero region\n");
1065                err = -EINVAL;
1066                goto error;
1067        }
1068
1069        mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
1070                    npages, ncont, order, page_shift);
1071
1072        if (use_umr(order)) {
1073                mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
1074                             order, access_flags);
1075                if (PTR_ERR(mr) == -EAGAIN) {
1076                        mlx5_ib_dbg(dev, "cache empty for order %d", order);
1077                        mr = NULL;
1078                }
1079        } else if (access_flags & IB_ACCESS_ON_DEMAND) {
1080                err = -EINVAL;
1081                pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
1082                goto error;
1083        }
1084
1085        if (!mr)
1086                mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
1087                                access_flags);
1088
1089        if (IS_ERR(mr)) {
1090                err = PTR_ERR(mr);
1091                goto error;
1092        }
1093
1094        mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
1095
1096        mr->umem = umem;
1097        mr->npages = npages;
1098        atomic_add(npages, &dev->mdev->priv.reg_pages);
1099        mr->ibmr.lkey = mr->mmr.key;
1100        mr->ibmr.rkey = mr->mmr.key;
1101
1102#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1103        if (umem->odp_data) {
1104                /*
1105                 * This barrier prevents the compiler from moving the
1106                 * setting of umem->odp_data->private to point to our
1107                 * MR, before reg_umr finished, to ensure that the MR
1108                 * initialization have finished before starting to
1109                 * handle invalidations.
1110                 */
1111                smp_wmb();
1112                mr->umem->odp_data->private = mr;
1113                /*
1114                 * Make sure we will see the new
1115                 * umem->odp_data->private value in the invalidation
1116                 * routines, before we can get page faults on the
1117                 * MR. Page faults can happen once we put the MR in
1118                 * the tree, below this line. Without the barrier,
1119                 * there can be a fault handling and an invalidation
1120                 * before umem->odp_data->private == mr is visible to
1121                 * the invalidation handler.
1122                 */
1123                smp_wmb();
1124        }
1125#endif
1126
1127        return &mr->ibmr;
1128
1129error:
1130        ib_umem_release(umem);
1131        return ERR_PTR(err);
1132}
1133
1134static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1135{
1136        struct umr_common *umrc = &dev->umrc;
1137        struct mlx5_ib_umr_context umr_context;
1138        struct mlx5_umr_wr umrwr;
1139        struct ib_send_wr *bad;
1140        int err;
1141
1142        memset(&umrwr.wr, 0, sizeof(umrwr));
1143        umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
1144        prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key);
1145
1146        mlx5_ib_init_umr_context(&umr_context);
1147        down(&umrc->sem);
1148        err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
1149        if (err) {
1150                up(&umrc->sem);
1151                mlx5_ib_dbg(dev, "err %d\n", err);
1152                goto error;
1153        } else {
1154                wait_for_completion(&umr_context.done);
1155                up(&umrc->sem);
1156        }
1157        if (umr_context.status != IB_WC_SUCCESS) {
1158                mlx5_ib_warn(dev, "unreg umr failed\n");
1159                err = -EFAULT;
1160                goto error;
1161        }
1162        return 0;
1163
1164error:
1165        return err;
1166}
1167
1168static int
1169mlx5_alloc_priv_descs(struct ib_device *device,
1170                      struct mlx5_ib_mr *mr,
1171                      int ndescs,
1172                      int desc_size)
1173{
1174        int size = ndescs * desc_size;
1175        int add_size;
1176        int ret;
1177
1178        add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
1179
1180        mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
1181        if (!mr->descs_alloc)
1182                return -ENOMEM;
1183
1184        mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
1185
1186        mr->desc_map = dma_map_single(device->dma_device, mr->descs,
1187                                      size, DMA_TO_DEVICE);
1188        if (dma_mapping_error(device->dma_device, mr->desc_map)) {
1189                ret = -ENOMEM;
1190                goto err;
1191        }
1192
1193        return 0;
1194err:
1195        kfree(mr->descs_alloc);
1196
1197        return ret;
1198}
1199
1200static void
1201mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1202{
1203        if (mr->descs) {
1204                struct ib_device *device = mr->ibmr.device;
1205                int size = mr->max_descs * mr->desc_size;
1206
1207                dma_unmap_single(device->dma_device, mr->desc_map,
1208                                 size, DMA_TO_DEVICE);
1209                kfree(mr->descs_alloc);
1210                mr->descs = NULL;
1211        }
1212}
1213
1214static int clean_mr(struct mlx5_ib_mr *mr)
1215{
1216        struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
1217        int umred = mr->umred;
1218        int err;
1219
1220        if (mr->sig) {
1221                if (mlx5_core_destroy_psv(dev->mdev,
1222                                          mr->sig->psv_memory.psv_idx))
1223                        mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1224                                     mr->sig->psv_memory.psv_idx);
1225                if (mlx5_core_destroy_psv(dev->mdev,
1226                                          mr->sig->psv_wire.psv_idx))
1227                        mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1228                                     mr->sig->psv_wire.psv_idx);
1229                kfree(mr->sig);
1230                mr->sig = NULL;
1231        }
1232
1233        mlx5_free_priv_descs(mr);
1234
1235        if (!umred) {
1236                err = destroy_mkey(dev, mr);
1237                if (err) {
1238                        mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
1239                                     mr->mmr.key, err);
1240                        return err;
1241                }
1242        } else {
1243                err = unreg_umr(dev, mr);
1244                if (err) {
1245                        mlx5_ib_warn(dev, "failed unregister\n");
1246                        return err;
1247                }
1248                free_cached_mr(dev, mr);
1249        }
1250
1251        if (!umred)
1252                kfree(mr);
1253
1254        return 0;
1255}
1256
1257int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
1258{
1259        struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1260        struct mlx5_ib_mr *mr = to_mmr(ibmr);
1261        int npages = mr->npages;
1262        struct ib_umem *umem = mr->umem;
1263
1264#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1265        if (umem && umem->odp_data) {
1266                /* Prevent new page faults from succeeding */
1267                mr->live = 0;
1268                /* Wait for all running page-fault handlers to finish. */
1269                synchronize_srcu(&dev->mr_srcu);
1270                /* Destroy all page mappings */
1271                mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
1272                                         ib_umem_end(umem));
1273                /*
1274                 * We kill the umem before the MR for ODP,
1275                 * so that there will not be any invalidations in
1276                 * flight, looking at the *mr struct.
1277                 */
1278                ib_umem_release(umem);
1279                atomic_sub(npages, &dev->mdev->priv.reg_pages);
1280
1281                /* Avoid double-freeing the umem. */
1282                umem = NULL;
1283        }
1284#endif
1285
1286        clean_mr(mr);
1287
1288        if (umem) {
1289                ib_umem_release(umem);
1290                atomic_sub(npages, &dev->mdev->priv.reg_pages);
1291        }
1292
1293        return 0;
1294}
1295
1296struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
1297                               enum ib_mr_type mr_type,
1298                               u32 max_num_sg)
1299{
1300        struct mlx5_ib_dev *dev = to_mdev(pd->device);
1301        struct mlx5_create_mkey_mbox_in *in;
1302        struct mlx5_ib_mr *mr;
1303        int access_mode, err;
1304        int ndescs = roundup(max_num_sg, 4);
1305
1306        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1307        if (!mr)
1308                return ERR_PTR(-ENOMEM);
1309
1310        in = kzalloc(sizeof(*in), GFP_KERNEL);
1311        if (!in) {
1312                err = -ENOMEM;
1313                goto err_free;
1314        }
1315
1316        in->seg.status = MLX5_MKEY_STATUS_FREE;
1317        in->seg.xlt_oct_size = cpu_to_be32(ndescs);
1318        in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1319        in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
1320
1321        if (mr_type == IB_MR_TYPE_MEM_REG) {
1322                access_mode = MLX5_ACCESS_MODE_MTT;
1323                in->seg.log2_page_size = PAGE_SHIFT;
1324
1325                err = mlx5_alloc_priv_descs(pd->device, mr,
1326                                            ndescs, sizeof(u64));
1327                if (err)
1328                        goto err_free_in;
1329
1330                mr->desc_size = sizeof(u64);
1331                mr->max_descs = ndescs;
1332        } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
1333                u32 psv_index[2];
1334
1335                in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) |
1336                                                           MLX5_MKEY_BSF_EN);
1337                in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
1338                mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1339                if (!mr->sig) {
1340                        err = -ENOMEM;
1341                        goto err_free_in;
1342                }
1343
1344                /* create mem & wire PSVs */
1345                err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
1346                                           2, psv_index);
1347                if (err)
1348                        goto err_free_sig;
1349
1350                access_mode = MLX5_ACCESS_MODE_KLM;
1351                mr->sig->psv_memory.psv_idx = psv_index[0];
1352                mr->sig->psv_wire.psv_idx = psv_index[1];
1353
1354                mr->sig->sig_status_checked = true;
1355                mr->sig->sig_err_exists = false;
1356                /* Next UMR, Arm SIGERR */
1357                ++mr->sig->sigerr_count;
1358        } else {
1359                mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
1360                err = -EINVAL;
1361                goto err_free_in;
1362        }
1363
1364        in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
1365        err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in),
1366                                    NULL, NULL, NULL);
1367        if (err)
1368                goto err_destroy_psv;
1369
1370        mr->ibmr.lkey = mr->mmr.key;
1371        mr->ibmr.rkey = mr->mmr.key;
1372        mr->umem = NULL;
1373        kfree(in);
1374
1375        return &mr->ibmr;
1376
1377err_destroy_psv:
1378        if (mr->sig) {
1379                if (mlx5_core_destroy_psv(dev->mdev,
1380                                          mr->sig->psv_memory.psv_idx))
1381                        mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1382                                     mr->sig->psv_memory.psv_idx);
1383                if (mlx5_core_destroy_psv(dev->mdev,
1384                                          mr->sig->psv_wire.psv_idx))
1385                        mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1386                                     mr->sig->psv_wire.psv_idx);
1387        }
1388        mlx5_free_priv_descs(mr);
1389err_free_sig:
1390        kfree(mr->sig);
1391err_free_in:
1392        kfree(in);
1393err_free:
1394        kfree(mr);
1395        return ERR_PTR(err);
1396}
1397
1398int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1399                            struct ib_mr_status *mr_status)
1400{
1401        struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1402        int ret = 0;
1403
1404        if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
1405                pr_err("Invalid status check mask\n");
1406                ret = -EINVAL;
1407                goto done;
1408        }
1409
1410        mr_status->fail_status = 0;
1411        if (check_mask & IB_MR_CHECK_SIG_STATUS) {
1412                if (!mmr->sig) {
1413                        ret = -EINVAL;
1414                        pr_err("signature status check requested on a non-signature enabled MR\n");
1415                        goto done;
1416                }
1417
1418                mmr->sig->sig_status_checked = true;
1419                if (!mmr->sig->sig_err_exists)
1420                        goto done;
1421
1422                if (ibmr->lkey == mmr->sig->err_item.key)
1423                        memcpy(&mr_status->sig_err, &mmr->sig->err_item,
1424                               sizeof(mr_status->sig_err));
1425                else {
1426                        mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
1427                        mr_status->sig_err.sig_err_offset = 0;
1428                        mr_status->sig_err.key = mmr->sig->err_item.key;
1429                }
1430
1431                mmr->sig->sig_err_exists = false;
1432                mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
1433        }
1434
1435done:
1436        return ret;
1437}
1438
1439static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
1440{
1441        struct mlx5_ib_mr *mr = to_mmr(ibmr);
1442        __be64 *descs;
1443
1444        if (unlikely(mr->ndescs == mr->max_descs))
1445                return -ENOMEM;
1446
1447        descs = mr->descs;
1448        descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
1449
1450        return 0;
1451}
1452
1453int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
1454                      struct scatterlist *sg,
1455                      int sg_nents)
1456{
1457        struct mlx5_ib_mr *mr = to_mmr(ibmr);
1458        int n;
1459
1460        mr->ndescs = 0;
1461
1462        ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
1463                                   mr->desc_size * mr->max_descs,
1464                                   DMA_TO_DEVICE);
1465
1466        n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page);
1467
1468        ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
1469                                      mr->desc_size * mr->max_descs,
1470                                      DMA_TO_DEVICE);
1471
1472        return n;
1473}
1474