linux/drivers/infiniband/hw/mlx5/mr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33
  34#include <linux/kref.h>
  35#include <linux/random.h>
  36#include <linux/debugfs.h>
  37#include <linux/export.h>
  38#include <linux/delay.h>
  39#include <rdma/ib_umem.h>
  40#include <rdma/ib_umem_odp.h>
  41#include <rdma/ib_verbs.h>
  42#include "mlx5_ib.h"
  43
  44enum {
  45        MAX_PENDING_REG_MR = 8,
  46};
  47
  48#define MLX5_UMR_ALIGN 2048
  49#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  50static __be64 mlx5_ib_update_mtt_emergency_buffer[
  51                MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
  52        __aligned(MLX5_UMR_ALIGN);
  53static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
  54#endif
  55
  56static int clean_mr(struct mlx5_ib_mr *mr);
  57
  58static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  59{
  60        int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
  61
  62#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  63        /* Wait until all page fault handlers using the mr complete. */
  64        synchronize_srcu(&dev->mr_srcu);
  65#endif
  66
  67        return err;
  68}
  69
  70static int order2idx(struct mlx5_ib_dev *dev, int order)
  71{
  72        struct mlx5_mr_cache *cache = &dev->cache;
  73
  74        if (order < cache->ent[0].order)
  75                return 0;
  76        else
  77                return order - cache->ent[0].order;
  78}
  79
  80static void reg_mr_callback(int status, void *context)
  81{
  82        struct mlx5_ib_mr *mr = context;
  83        struct mlx5_ib_dev *dev = mr->dev;
  84        struct mlx5_mr_cache *cache = &dev->cache;
  85        int c = order2idx(dev, mr->order);
  86        struct mlx5_cache_ent *ent = &cache->ent[c];
  87        u8 key;
  88        unsigned long flags;
  89        struct mlx5_mr_table *table = &dev->mdev->priv.mr_table;
  90        int err;
  91
  92        spin_lock_irqsave(&ent->lock, flags);
  93        ent->pending--;
  94        spin_unlock_irqrestore(&ent->lock, flags);
  95        if (status) {
  96                mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
  97                kfree(mr);
  98                dev->fill_delay = 1;
  99                mod_timer(&dev->delay_timer, jiffies + HZ);
 100                return;
 101        }
 102
 103        if (mr->out.hdr.status) {
 104                mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
 105                             mr->out.hdr.status,
 106                             be32_to_cpu(mr->out.hdr.syndrome));
 107                kfree(mr);
 108                dev->fill_delay = 1;
 109                mod_timer(&dev->delay_timer, jiffies + HZ);
 110                return;
 111        }
 112
 113        spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
 114        key = dev->mdev->priv.mkey_key++;
 115        spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
 116        mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
 117
 118        cache->last_add = jiffies;
 119
 120        spin_lock_irqsave(&ent->lock, flags);
 121        list_add_tail(&mr->list, &ent->head);
 122        ent->cur++;
 123        ent->size++;
 124        spin_unlock_irqrestore(&ent->lock, flags);
 125
 126        write_lock_irqsave(&table->lock, flags);
 127        err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key),
 128                                &mr->mmr);
 129        if (err)
 130                pr_err("Error inserting to mr tree. 0x%x\n", -err);
 131        write_unlock_irqrestore(&table->lock, flags);
 132}
 133
 134static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 135{
 136        struct mlx5_mr_cache *cache = &dev->cache;
 137        struct mlx5_cache_ent *ent = &cache->ent[c];
 138        struct mlx5_create_mkey_mbox_in *in;
 139        struct mlx5_ib_mr *mr;
 140        int npages = 1 << ent->order;
 141        int err = 0;
 142        int i;
 143
 144        in = kzalloc(sizeof(*in), GFP_KERNEL);
 145        if (!in)
 146                return -ENOMEM;
 147
 148        for (i = 0; i < num; i++) {
 149                if (ent->pending >= MAX_PENDING_REG_MR) {
 150                        err = -EAGAIN;
 151                        break;
 152                }
 153
 154                mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 155                if (!mr) {
 156                        err = -ENOMEM;
 157                        break;
 158                }
 159                mr->order = ent->order;
 160                mr->umred = 1;
 161                mr->dev = dev;
 162                in->seg.status = MLX5_MKEY_STATUS_FREE;
 163                in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
 164                in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 165                in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
 166                in->seg.log2_page_size = 12;
 167
 168                spin_lock_irq(&ent->lock);
 169                ent->pending++;
 170                spin_unlock_irq(&ent->lock);
 171                err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in,
 172                                            sizeof(*in), reg_mr_callback,
 173                                            mr, &mr->out);
 174                if (err) {
 175                        spin_lock_irq(&ent->lock);
 176                        ent->pending--;
 177                        spin_unlock_irq(&ent->lock);
 178                        mlx5_ib_warn(dev, "create mkey failed %d\n", err);
 179                        kfree(mr);
 180                        break;
 181                }
 182        }
 183
 184        kfree(in);
 185        return err;
 186}
 187
 188static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
 189{
 190        struct mlx5_mr_cache *cache = &dev->cache;
 191        struct mlx5_cache_ent *ent = &cache->ent[c];
 192        struct mlx5_ib_mr *mr;
 193        int err;
 194        int i;
 195
 196        for (i = 0; i < num; i++) {
 197                spin_lock_irq(&ent->lock);
 198                if (list_empty(&ent->head)) {
 199                        spin_unlock_irq(&ent->lock);
 200                        return;
 201                }
 202                mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 203                list_del(&mr->list);
 204                ent->cur--;
 205                ent->size--;
 206                spin_unlock_irq(&ent->lock);
 207                err = destroy_mkey(dev, mr);
 208                if (err)
 209                        mlx5_ib_warn(dev, "failed destroy mkey\n");
 210                else
 211                        kfree(mr);
 212        }
 213}
 214
 215static ssize_t size_write(struct file *filp, const char __user *buf,
 216                          size_t count, loff_t *pos)
 217{
 218        struct mlx5_cache_ent *ent = filp->private_data;
 219        struct mlx5_ib_dev *dev = ent->dev;
 220        char lbuf[20];
 221        u32 var;
 222        int err;
 223        int c;
 224
 225        if (copy_from_user(lbuf, buf, sizeof(lbuf)))
 226                return -EFAULT;
 227
 228        c = order2idx(dev, ent->order);
 229        lbuf[sizeof(lbuf) - 1] = 0;
 230
 231        if (sscanf(lbuf, "%u", &var) != 1)
 232                return -EINVAL;
 233
 234        if (var < ent->limit)
 235                return -EINVAL;
 236
 237        if (var > ent->size) {
 238                do {
 239                        err = add_keys(dev, c, var - ent->size);
 240                        if (err && err != -EAGAIN)
 241                                return err;
 242
 243                        usleep_range(3000, 5000);
 244                } while (err);
 245        } else if (var < ent->size) {
 246                remove_keys(dev, c, ent->size - var);
 247        }
 248
 249        return count;
 250}
 251
 252static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
 253                         loff_t *pos)
 254{
 255        struct mlx5_cache_ent *ent = filp->private_data;
 256        char lbuf[20];
 257        int err;
 258
 259        if (*pos)
 260                return 0;
 261
 262        err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
 263        if (err < 0)
 264                return err;
 265
 266        if (copy_to_user(buf, lbuf, err))
 267                return -EFAULT;
 268
 269        *pos += err;
 270
 271        return err;
 272}
 273
 274static const struct file_operations size_fops = {
 275        .owner  = THIS_MODULE,
 276        .open   = simple_open,
 277        .write  = size_write,
 278        .read   = size_read,
 279};
 280
 281static ssize_t limit_write(struct file *filp, const char __user *buf,
 282                           size_t count, loff_t *pos)
 283{
 284        struct mlx5_cache_ent *ent = filp->private_data;
 285        struct mlx5_ib_dev *dev = ent->dev;
 286        char lbuf[20];
 287        u32 var;
 288        int err;
 289        int c;
 290
 291        if (copy_from_user(lbuf, buf, sizeof(lbuf)))
 292                return -EFAULT;
 293
 294        c = order2idx(dev, ent->order);
 295        lbuf[sizeof(lbuf) - 1] = 0;
 296
 297        if (sscanf(lbuf, "%u", &var) != 1)
 298                return -EINVAL;
 299
 300        if (var > ent->size)
 301                return -EINVAL;
 302
 303        ent->limit = var;
 304
 305        if (ent->cur < ent->limit) {
 306                err = add_keys(dev, c, 2 * ent->limit - ent->cur);
 307                if (err)
 308                        return err;
 309        }
 310
 311        return count;
 312}
 313
 314static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
 315                          loff_t *pos)
 316{
 317        struct mlx5_cache_ent *ent = filp->private_data;
 318        char lbuf[20];
 319        int err;
 320
 321        if (*pos)
 322                return 0;
 323
 324        err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
 325        if (err < 0)
 326                return err;
 327
 328        if (copy_to_user(buf, lbuf, err))
 329                return -EFAULT;
 330
 331        *pos += err;
 332
 333        return err;
 334}
 335
 336static const struct file_operations limit_fops = {
 337        .owner  = THIS_MODULE,
 338        .open   = simple_open,
 339        .write  = limit_write,
 340        .read   = limit_read,
 341};
 342
 343static int someone_adding(struct mlx5_mr_cache *cache)
 344{
 345        int i;
 346
 347        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 348                if (cache->ent[i].cur < cache->ent[i].limit)
 349                        return 1;
 350        }
 351
 352        return 0;
 353}
 354
 355static void __cache_work_func(struct mlx5_cache_ent *ent)
 356{
 357        struct mlx5_ib_dev *dev = ent->dev;
 358        struct mlx5_mr_cache *cache = &dev->cache;
 359        int i = order2idx(dev, ent->order);
 360        int err;
 361
 362        if (cache->stopped)
 363                return;
 364
 365        ent = &dev->cache.ent[i];
 366        if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
 367                err = add_keys(dev, i, 1);
 368                if (ent->cur < 2 * ent->limit) {
 369                        if (err == -EAGAIN) {
 370                                mlx5_ib_dbg(dev, "returned eagain, order %d\n",
 371                                            i + 2);
 372                                queue_delayed_work(cache->wq, &ent->dwork,
 373                                                   msecs_to_jiffies(3));
 374                        } else if (err) {
 375                                mlx5_ib_warn(dev, "command failed order %d, err %d\n",
 376                                             i + 2, err);
 377                                queue_delayed_work(cache->wq, &ent->dwork,
 378                                                   msecs_to_jiffies(1000));
 379                        } else {
 380                                queue_work(cache->wq, &ent->work);
 381                        }
 382                }
 383        } else if (ent->cur > 2 * ent->limit) {
 384                if (!someone_adding(cache) &&
 385                    time_after(jiffies, cache->last_add + 300 * HZ)) {
 386                        remove_keys(dev, i, 1);
 387                        if (ent->cur > ent->limit)
 388                                queue_work(cache->wq, &ent->work);
 389                } else {
 390                        queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
 391                }
 392        }
 393}
 394
 395static void delayed_cache_work_func(struct work_struct *work)
 396{
 397        struct mlx5_cache_ent *ent;
 398
 399        ent = container_of(work, struct mlx5_cache_ent, dwork.work);
 400        __cache_work_func(ent);
 401}
 402
 403static void cache_work_func(struct work_struct *work)
 404{
 405        struct mlx5_cache_ent *ent;
 406
 407        ent = container_of(work, struct mlx5_cache_ent, work);
 408        __cache_work_func(ent);
 409}
 410
 411static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
 412{
 413        struct mlx5_mr_cache *cache = &dev->cache;
 414        struct mlx5_ib_mr *mr = NULL;
 415        struct mlx5_cache_ent *ent;
 416        int c;
 417        int i;
 418
 419        c = order2idx(dev, order);
 420        if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
 421                mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
 422                return NULL;
 423        }
 424
 425        for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
 426                ent = &cache->ent[i];
 427
 428                mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
 429
 430                spin_lock_irq(&ent->lock);
 431                if (!list_empty(&ent->head)) {
 432                        mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
 433                                              list);
 434                        list_del(&mr->list);
 435                        ent->cur--;
 436                        spin_unlock_irq(&ent->lock);
 437                        if (ent->cur < ent->limit)
 438                                queue_work(cache->wq, &ent->work);
 439                        break;
 440                }
 441                spin_unlock_irq(&ent->lock);
 442
 443                queue_work(cache->wq, &ent->work);
 444
 445                if (mr)
 446                        break;
 447        }
 448
 449        if (!mr)
 450                cache->ent[c].miss++;
 451
 452        return mr;
 453}
 454
 455static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 456{
 457        struct mlx5_mr_cache *cache = &dev->cache;
 458        struct mlx5_cache_ent *ent;
 459        int shrink = 0;
 460        int c;
 461
 462        c = order2idx(dev, mr->order);
 463        if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
 464                mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
 465                return;
 466        }
 467        ent = &cache->ent[c];
 468        spin_lock_irq(&ent->lock);
 469        list_add_tail(&mr->list, &ent->head);
 470        ent->cur++;
 471        if (ent->cur > 2 * ent->limit)
 472                shrink = 1;
 473        spin_unlock_irq(&ent->lock);
 474
 475        if (shrink)
 476                queue_work(cache->wq, &ent->work);
 477}
 478
 479static void clean_keys(struct mlx5_ib_dev *dev, int c)
 480{
 481        struct mlx5_mr_cache *cache = &dev->cache;
 482        struct mlx5_cache_ent *ent = &cache->ent[c];
 483        struct mlx5_ib_mr *mr;
 484        int err;
 485
 486        cancel_delayed_work(&ent->dwork);
 487        while (1) {
 488                spin_lock_irq(&ent->lock);
 489                if (list_empty(&ent->head)) {
 490                        spin_unlock_irq(&ent->lock);
 491                        return;
 492                }
 493                mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 494                list_del(&mr->list);
 495                ent->cur--;
 496                ent->size--;
 497                spin_unlock_irq(&ent->lock);
 498                err = destroy_mkey(dev, mr);
 499                if (err)
 500                        mlx5_ib_warn(dev, "failed destroy mkey\n");
 501                else
 502                        kfree(mr);
 503        }
 504}
 505
 506static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
 507{
 508        struct mlx5_mr_cache *cache = &dev->cache;
 509        struct mlx5_cache_ent *ent;
 510        int i;
 511
 512        if (!mlx5_debugfs_root)
 513                return 0;
 514
 515        cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
 516        if (!cache->root)
 517                return -ENOMEM;
 518
 519        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 520                ent = &cache->ent[i];
 521                sprintf(ent->name, "%d", ent->order);
 522                ent->dir = debugfs_create_dir(ent->name,  cache->root);
 523                if (!ent->dir)
 524                        return -ENOMEM;
 525
 526                ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
 527                                                 &size_fops);
 528                if (!ent->fsize)
 529                        return -ENOMEM;
 530
 531                ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
 532                                                  &limit_fops);
 533                if (!ent->flimit)
 534                        return -ENOMEM;
 535
 536                ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
 537                                               &ent->cur);
 538                if (!ent->fcur)
 539                        return -ENOMEM;
 540
 541                ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
 542                                                &ent->miss);
 543                if (!ent->fmiss)
 544                        return -ENOMEM;
 545        }
 546
 547        return 0;
 548}
 549
 550static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
 551{
 552        if (!mlx5_debugfs_root)
 553                return;
 554
 555        debugfs_remove_recursive(dev->cache.root);
 556}
 557
 558static void delay_time_func(unsigned long ctx)
 559{
 560        struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
 561
 562        dev->fill_delay = 0;
 563}
 564
 565int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 566{
 567        struct mlx5_mr_cache *cache = &dev->cache;
 568        struct mlx5_cache_ent *ent;
 569        int limit;
 570        int err;
 571        int i;
 572
 573        cache->wq = create_singlethread_workqueue("mkey_cache");
 574        if (!cache->wq) {
 575                mlx5_ib_warn(dev, "failed to create work queue\n");
 576                return -ENOMEM;
 577        }
 578
 579        setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
 580        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 581                INIT_LIST_HEAD(&cache->ent[i].head);
 582                spin_lock_init(&cache->ent[i].lock);
 583
 584                ent = &cache->ent[i];
 585                INIT_LIST_HEAD(&ent->head);
 586                spin_lock_init(&ent->lock);
 587                ent->order = i + 2;
 588                ent->dev = dev;
 589
 590                if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE)
 591                        limit = dev->mdev->profile->mr_cache[i].limit;
 592                else
 593                        limit = 0;
 594
 595                INIT_WORK(&ent->work, cache_work_func);
 596                INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
 597                ent->limit = limit;
 598                queue_work(cache->wq, &ent->work);
 599        }
 600
 601        err = mlx5_mr_cache_debugfs_init(dev);
 602        if (err)
 603                mlx5_ib_warn(dev, "cache debugfs failure\n");
 604
 605        return 0;
 606}
 607
 608int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 609{
 610        int i;
 611
 612        dev->cache.stopped = 1;
 613        flush_workqueue(dev->cache.wq);
 614
 615        mlx5_mr_cache_debugfs_cleanup(dev);
 616
 617        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
 618                clean_keys(dev, i);
 619
 620        destroy_workqueue(dev->cache.wq);
 621        del_timer_sync(&dev->delay_timer);
 622
 623        return 0;
 624}
 625
 626struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
 627{
 628        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 629        struct mlx5_core_dev *mdev = dev->mdev;
 630        struct mlx5_create_mkey_mbox_in *in;
 631        struct mlx5_mkey_seg *seg;
 632        struct mlx5_ib_mr *mr;
 633        int err;
 634
 635        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 636        if (!mr)
 637                return ERR_PTR(-ENOMEM);
 638
 639        in = kzalloc(sizeof(*in), GFP_KERNEL);
 640        if (!in) {
 641                err = -ENOMEM;
 642                goto err_free;
 643        }
 644
 645        seg = &in->seg;
 646        seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
 647        seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
 648        seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 649        seg->start_addr = 0;
 650
 651        err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
 652                                    NULL);
 653        if (err)
 654                goto err_in;
 655
 656        kfree(in);
 657        mr->ibmr.lkey = mr->mmr.key;
 658        mr->ibmr.rkey = mr->mmr.key;
 659        mr->umem = NULL;
 660
 661        return &mr->ibmr;
 662
 663err_in:
 664        kfree(in);
 665
 666err_free:
 667        kfree(mr);
 668
 669        return ERR_PTR(err);
 670}
 671
 672static int get_octo_len(u64 addr, u64 len, int page_size)
 673{
 674        u64 offset;
 675        int npages;
 676
 677        offset = addr & (page_size - 1);
 678        npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
 679        return (npages + 1) / 2;
 680}
 681
 682static int use_umr(int order)
 683{
 684        return order <= MLX5_MAX_UMR_SHIFT;
 685}
 686
 687static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
 688                             struct ib_sge *sg, u64 dma, int n, u32 key,
 689                             int page_shift, u64 virt_addr, u64 len,
 690                             int access_flags)
 691{
 692        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 693        struct ib_mr *mr = dev->umrc.mr;
 694        struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
 695
 696        sg->addr = dma;
 697        sg->length = ALIGN(sizeof(u64) * n, 64);
 698        sg->lkey = mr->lkey;
 699
 700        wr->next = NULL;
 701        wr->send_flags = 0;
 702        wr->sg_list = sg;
 703        if (n)
 704                wr->num_sge = 1;
 705        else
 706                wr->num_sge = 0;
 707
 708        wr->opcode = MLX5_IB_WR_UMR;
 709
 710        umrwr->npages = n;
 711        umrwr->page_shift = page_shift;
 712        umrwr->mkey = key;
 713        umrwr->target.virt_addr = virt_addr;
 714        umrwr->length = len;
 715        umrwr->access_flags = access_flags;
 716        umrwr->pd = pd;
 717}
 718
 719static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
 720                               struct ib_send_wr *wr, u32 key)
 721{
 722        struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
 723
 724        wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
 725        wr->opcode = MLX5_IB_WR_UMR;
 726        umrwr->mkey = key;
 727}
 728
 729void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
 730{
 731        struct mlx5_ib_umr_context *context;
 732        struct ib_wc wc;
 733        int err;
 734
 735        while (1) {
 736                err = ib_poll_cq(cq, 1, &wc);
 737                if (err < 0) {
 738                        pr_warn("poll cq error %d\n", err);
 739                        return;
 740                }
 741                if (err == 0)
 742                        break;
 743
 744                context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id;
 745                context->status = wc.status;
 746                complete(&context->done);
 747        }
 748        ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 749}
 750
 751static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 752                                  u64 virt_addr, u64 len, int npages,
 753                                  int page_shift, int order, int access_flags)
 754{
 755        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 756        struct device *ddev = dev->ib_dev.dma_device;
 757        struct umr_common *umrc = &dev->umrc;
 758        struct mlx5_ib_umr_context umr_context;
 759        struct ib_send_wr wr, *bad;
 760        struct mlx5_ib_mr *mr;
 761        struct ib_sge sg;
 762        int size;
 763        __be64 *mr_pas;
 764        __be64 *pas;
 765        dma_addr_t dma;
 766        int err = 0;
 767        int i;
 768
 769        for (i = 0; i < 1; i++) {
 770                mr = alloc_cached_mr(dev, order);
 771                if (mr)
 772                        break;
 773
 774                err = add_keys(dev, order2idx(dev, order), 1);
 775                if (err && err != -EAGAIN) {
 776                        mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
 777                        break;
 778                }
 779        }
 780
 781        if (!mr)
 782                return ERR_PTR(-EAGAIN);
 783
 784        /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
 785         * To avoid copying garbage after the pas array, we allocate
 786         * a little more. */
 787        size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
 788        mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
 789        if (!mr_pas) {
 790                err = -ENOMEM;
 791                goto free_mr;
 792        }
 793
 794        pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN);
 795        mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
 796        /* Clear padding after the actual pages. */
 797        memset(pas + npages, 0, size - npages * sizeof(u64));
 798
 799        dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
 800        if (dma_mapping_error(ddev, dma)) {
 801                err = -ENOMEM;
 802                goto free_pas;
 803        }
 804
 805        memset(&wr, 0, sizeof(wr));
 806        wr.wr_id = (u64)(unsigned long)&umr_context;
 807        prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift,
 808                         virt_addr, len, access_flags);
 809
 810        mlx5_ib_init_umr_context(&umr_context);
 811        down(&umrc->sem);
 812        err = ib_post_send(umrc->qp, &wr, &bad);
 813        if (err) {
 814                mlx5_ib_warn(dev, "post send failed, err %d\n", err);
 815                goto unmap_dma;
 816        } else {
 817                wait_for_completion(&umr_context.done);
 818                if (umr_context.status != IB_WC_SUCCESS) {
 819                        mlx5_ib_warn(dev, "reg umr failed\n");
 820                        err = -EFAULT;
 821                }
 822        }
 823
 824        mr->mmr.iova = virt_addr;
 825        mr->mmr.size = len;
 826        mr->mmr.pd = to_mpd(pd)->pdn;
 827
 828        mr->live = 1;
 829
 830unmap_dma:
 831        up(&umrc->sem);
 832        dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
 833
 834free_pas:
 835        kfree(mr_pas);
 836
 837free_mr:
 838        if (err) {
 839                free_cached_mr(dev, mr);
 840                return ERR_PTR(err);
 841        }
 842
 843        return mr;
 844}
 845
 846#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 847int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
 848                       int zap)
 849{
 850        struct mlx5_ib_dev *dev = mr->dev;
 851        struct device *ddev = dev->ib_dev.dma_device;
 852        struct umr_common *umrc = &dev->umrc;
 853        struct mlx5_ib_umr_context umr_context;
 854        struct ib_umem *umem = mr->umem;
 855        int size;
 856        __be64 *pas;
 857        dma_addr_t dma;
 858        struct ib_send_wr wr, *bad;
 859        struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg;
 860        struct ib_sge sg;
 861        int err = 0;
 862        const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
 863        const int page_index_mask = page_index_alignment - 1;
 864        size_t pages_mapped = 0;
 865        size_t pages_to_map = 0;
 866        size_t pages_iter = 0;
 867        int use_emergency_buf = 0;
 868
 869        /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
 870         * so we need to align the offset and length accordingly */
 871        if (start_page_index & page_index_mask) {
 872                npages += start_page_index & page_index_mask;
 873                start_page_index &= ~page_index_mask;
 874        }
 875
 876        pages_to_map = ALIGN(npages, page_index_alignment);
 877
 878        if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
 879                return -EINVAL;
 880
 881        size = sizeof(u64) * pages_to_map;
 882        size = min_t(int, PAGE_SIZE, size);
 883        /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
 884         * code, when we are called from an invalidation. The pas buffer must
 885         * be 2k-aligned for Connect-IB. */
 886        pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
 887        if (!pas) {
 888                mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
 889                pas = mlx5_ib_update_mtt_emergency_buffer;
 890                size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
 891                use_emergency_buf = 1;
 892                mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
 893                memset(pas, 0, size);
 894        }
 895        pages_iter = size / sizeof(u64);
 896        dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
 897        if (dma_mapping_error(ddev, dma)) {
 898                mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
 899                err = -ENOMEM;
 900                goto free_pas;
 901        }
 902
 903        for (pages_mapped = 0;
 904             pages_mapped < pages_to_map && !err;
 905             pages_mapped += pages_iter, start_page_index += pages_iter) {
 906                dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
 907
 908                npages = min_t(size_t,
 909                               pages_iter,
 910                               ib_umem_num_pages(umem) - start_page_index);
 911
 912                if (!zap) {
 913                        __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
 914                                               start_page_index, npages, pas,
 915                                               MLX5_IB_MTT_PRESENT);
 916                        /* Clear padding after the pages brought from the
 917                         * umem. */
 918                        memset(pas + npages, 0, size - npages * sizeof(u64));
 919                }
 920
 921                dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
 922
 923                memset(&wr, 0, sizeof(wr));
 924                wr.wr_id = (u64)(unsigned long)&umr_context;
 925
 926                sg.addr = dma;
 927                sg.length = ALIGN(npages * sizeof(u64),
 928                                MLX5_UMR_MTT_ALIGNMENT);
 929                sg.lkey = dev->umrc.mr->lkey;
 930
 931                wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
 932                                MLX5_IB_SEND_UMR_UPDATE_MTT;
 933                wr.sg_list = &sg;
 934                wr.num_sge = 1;
 935                wr.opcode = MLX5_IB_WR_UMR;
 936                umrwr->npages = sg.length / sizeof(u64);
 937                umrwr->page_shift = PAGE_SHIFT;
 938                umrwr->mkey = mr->mmr.key;
 939                umrwr->target.offset = start_page_index;
 940
 941                mlx5_ib_init_umr_context(&umr_context);
 942                down(&umrc->sem);
 943                err = ib_post_send(umrc->qp, &wr, &bad);
 944                if (err) {
 945                        mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
 946                } else {
 947                        wait_for_completion(&umr_context.done);
 948                        if (umr_context.status != IB_WC_SUCCESS) {
 949                                mlx5_ib_err(dev, "UMR completion failed, code %d\n",
 950                                            umr_context.status);
 951                                err = -EFAULT;
 952                        }
 953                }
 954                up(&umrc->sem);
 955        }
 956        dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
 957
 958free_pas:
 959        if (!use_emergency_buf)
 960                free_page((unsigned long)pas);
 961        else
 962                mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
 963
 964        return err;
 965}
 966#endif
 967
 968static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 969                                     u64 length, struct ib_umem *umem,
 970                                     int npages, int page_shift,
 971                                     int access_flags)
 972{
 973        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 974        struct mlx5_create_mkey_mbox_in *in;
 975        struct mlx5_ib_mr *mr;
 976        int inlen;
 977        int err;
 978        bool pg_cap = !!(dev->mdev->caps.gen.flags &
 979                         MLX5_DEV_CAP_FLAG_ON_DMND_PG);
 980
 981        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 982        if (!mr)
 983                return ERR_PTR(-ENOMEM);
 984
 985        inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
 986        in = mlx5_vzalloc(inlen);
 987        if (!in) {
 988                err = -ENOMEM;
 989                goto err_1;
 990        }
 991        mlx5_ib_populate_pas(dev, umem, page_shift, in->pas,
 992                             pg_cap ? MLX5_IB_MTT_PRESENT : 0);
 993
 994        /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
 995         * in the page list submitted with the command. */
 996        in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0;
 997        in->seg.flags = convert_access(access_flags) |
 998                MLX5_ACCESS_MODE_MTT;
 999        in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
1000        in->seg.start_addr = cpu_to_be64(virt_addr);
1001        in->seg.len = cpu_to_be64(length);
1002        in->seg.bsfs_octo_size = 0;
1003        in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
1004        in->seg.log2_page_size = page_shift;
1005        in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1006        in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
1007                                                         1 << page_shift));
1008        err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL,
1009                                    NULL, NULL);
1010        if (err) {
1011                mlx5_ib_warn(dev, "create mkey failed\n");
1012                goto err_2;
1013        }
1014        mr->umem = umem;
1015        mr->dev = dev;
1016        mr->live = 1;
1017        kvfree(in);
1018
1019        mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
1020
1021        return mr;
1022
1023err_2:
1024        kvfree(in);
1025
1026err_1:
1027        kfree(mr);
1028
1029        return ERR_PTR(err);
1030}
1031
1032struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1033                                  u64 virt_addr, int access_flags,
1034                                  struct ib_udata *udata)
1035{
1036        struct mlx5_ib_dev *dev = to_mdev(pd->device);
1037        struct mlx5_ib_mr *mr = NULL;
1038        struct ib_umem *umem;
1039        int page_shift;
1040        int npages;
1041        int ncont;
1042        int order;
1043        int err;
1044
1045        mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1046                    start, virt_addr, length, access_flags);
1047        umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
1048                           0);
1049        if (IS_ERR(umem)) {
1050                mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
1051                return (void *)umem;
1052        }
1053
1054        mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
1055        if (!npages) {
1056                mlx5_ib_warn(dev, "avoid zero region\n");
1057                err = -EINVAL;
1058                goto error;
1059        }
1060
1061        mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
1062                    npages, ncont, order, page_shift);
1063
1064        if (use_umr(order)) {
1065                mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
1066                             order, access_flags);
1067                if (PTR_ERR(mr) == -EAGAIN) {
1068                        mlx5_ib_dbg(dev, "cache empty for order %d", order);
1069                        mr = NULL;
1070                }
1071        } else if (access_flags & IB_ACCESS_ON_DEMAND) {
1072                err = -EINVAL;
1073                pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
1074                goto error;
1075        }
1076
1077        if (!mr)
1078                mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
1079                                access_flags);
1080
1081        if (IS_ERR(mr)) {
1082                err = PTR_ERR(mr);
1083                goto error;
1084        }
1085
1086        mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
1087
1088        mr->umem = umem;
1089        mr->npages = npages;
1090        atomic_add(npages, &dev->mdev->priv.reg_pages);
1091        mr->ibmr.lkey = mr->mmr.key;
1092        mr->ibmr.rkey = mr->mmr.key;
1093
1094#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1095        if (umem->odp_data) {
1096                /*
1097                 * This barrier prevents the compiler from moving the
1098                 * setting of umem->odp_data->private to point to our
1099                 * MR, before reg_umr finished, to ensure that the MR
1100                 * initialization have finished before starting to
1101                 * handle invalidations.
1102                 */
1103                smp_wmb();
1104                mr->umem->odp_data->private = mr;
1105                /*
1106                 * Make sure we will see the new
1107                 * umem->odp_data->private value in the invalidation
1108                 * routines, before we can get page faults on the
1109                 * MR. Page faults can happen once we put the MR in
1110                 * the tree, below this line. Without the barrier,
1111                 * there can be a fault handling and an invalidation
1112                 * before umem->odp_data->private == mr is visible to
1113                 * the invalidation handler.
1114                 */
1115                smp_wmb();
1116        }
1117#endif
1118
1119        return &mr->ibmr;
1120
1121error:
1122        /*
1123         * Destroy the umem *before* destroying the MR, to ensure we
1124         * will not have any in-flight notifiers when destroying the
1125         * MR.
1126         *
1127         * As the MR is completely invalid to begin with, and this
1128         * error path is only taken if we can't push the mr entry into
1129         * the pagefault tree, this is safe.
1130         */
1131
1132        ib_umem_release(umem);
1133        /* Kill the MR, and return an error code. */
1134        clean_mr(mr);
1135        return ERR_PTR(err);
1136}
1137
1138static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1139{
1140        struct umr_common *umrc = &dev->umrc;
1141        struct mlx5_ib_umr_context umr_context;
1142        struct ib_send_wr wr, *bad;
1143        int err;
1144
1145        memset(&wr, 0, sizeof(wr));
1146        wr.wr_id = (u64)(unsigned long)&umr_context;
1147        prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
1148
1149        mlx5_ib_init_umr_context(&umr_context);
1150        down(&umrc->sem);
1151        err = ib_post_send(umrc->qp, &wr, &bad);
1152        if (err) {
1153                up(&umrc->sem);
1154                mlx5_ib_dbg(dev, "err %d\n", err);
1155                goto error;
1156        } else {
1157                wait_for_completion(&umr_context.done);
1158                up(&umrc->sem);
1159        }
1160        if (umr_context.status != IB_WC_SUCCESS) {
1161                mlx5_ib_warn(dev, "unreg umr failed\n");
1162                err = -EFAULT;
1163                goto error;
1164        }
1165        return 0;
1166
1167error:
1168        return err;
1169}
1170
1171static int clean_mr(struct mlx5_ib_mr *mr)
1172{
1173        struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
1174        int umred = mr->umred;
1175        int err;
1176
1177        if (!umred) {
1178                err = destroy_mkey(dev, mr);
1179                if (err) {
1180                        mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
1181                                     mr->mmr.key, err);
1182                        return err;
1183                }
1184        } else {
1185                err = unreg_umr(dev, mr);
1186                if (err) {
1187                        mlx5_ib_warn(dev, "failed unregister\n");
1188                        return err;
1189                }
1190                free_cached_mr(dev, mr);
1191        }
1192
1193        if (!umred)
1194                kfree(mr);
1195
1196        return 0;
1197}
1198
1199int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
1200{
1201        struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1202        struct mlx5_ib_mr *mr = to_mmr(ibmr);
1203        int npages = mr->npages;
1204        struct ib_umem *umem = mr->umem;
1205
1206#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1207        if (umem && umem->odp_data) {
1208                /* Prevent new page faults from succeeding */
1209                mr->live = 0;
1210                /* Wait for all running page-fault handlers to finish. */
1211                synchronize_srcu(&dev->mr_srcu);
1212                /* Destroy all page mappings */
1213                mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
1214                                         ib_umem_end(umem));
1215                /*
1216                 * We kill the umem before the MR for ODP,
1217                 * so that there will not be any invalidations in
1218                 * flight, looking at the *mr struct.
1219                 */
1220                ib_umem_release(umem);
1221                atomic_sub(npages, &dev->mdev->priv.reg_pages);
1222
1223                /* Avoid double-freeing the umem. */
1224                umem = NULL;
1225        }
1226#endif
1227
1228        clean_mr(mr);
1229
1230        if (umem) {
1231                ib_umem_release(umem);
1232                atomic_sub(npages, &dev->mdev->priv.reg_pages);
1233        }
1234
1235        return 0;
1236}
1237
1238struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
1239                                struct ib_mr_init_attr *mr_init_attr)
1240{
1241        struct mlx5_ib_dev *dev = to_mdev(pd->device);
1242        struct mlx5_create_mkey_mbox_in *in;
1243        struct mlx5_ib_mr *mr;
1244        int access_mode, err;
1245        int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4);
1246
1247        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1248        if (!mr)
1249                return ERR_PTR(-ENOMEM);
1250
1251        in = kzalloc(sizeof(*in), GFP_KERNEL);
1252        if (!in) {
1253                err = -ENOMEM;
1254                goto err_free;
1255        }
1256
1257        in->seg.status = MLX5_MKEY_STATUS_FREE;
1258        in->seg.xlt_oct_size = cpu_to_be32(ndescs);
1259        in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1260        in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
1261        access_mode = MLX5_ACCESS_MODE_MTT;
1262
1263        if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) {
1264                u32 psv_index[2];
1265
1266                in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) |
1267                                                           MLX5_MKEY_BSF_EN);
1268                in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
1269                mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1270                if (!mr->sig) {
1271                        err = -ENOMEM;
1272                        goto err_free_in;
1273                }
1274
1275                /* create mem & wire PSVs */
1276                err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
1277                                           2, psv_index);
1278                if (err)
1279                        goto err_free_sig;
1280
1281                access_mode = MLX5_ACCESS_MODE_KLM;
1282                mr->sig->psv_memory.psv_idx = psv_index[0];
1283                mr->sig->psv_wire.psv_idx = psv_index[1];
1284
1285                mr->sig->sig_status_checked = true;
1286                mr->sig->sig_err_exists = false;
1287                /* Next UMR, Arm SIGERR */
1288                ++mr->sig->sigerr_count;
1289        }
1290
1291        in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
1292        err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in),
1293                                    NULL, NULL, NULL);
1294        if (err)
1295                goto err_destroy_psv;
1296
1297        mr->ibmr.lkey = mr->mmr.key;
1298        mr->ibmr.rkey = mr->mmr.key;
1299        mr->umem = NULL;
1300        kfree(in);
1301
1302        return &mr->ibmr;
1303
1304err_destroy_psv:
1305        if (mr->sig) {
1306                if (mlx5_core_destroy_psv(dev->mdev,
1307                                          mr->sig->psv_memory.psv_idx))
1308                        mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1309                                     mr->sig->psv_memory.psv_idx);
1310                if (mlx5_core_destroy_psv(dev->mdev,
1311                                          mr->sig->psv_wire.psv_idx))
1312                        mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1313                                     mr->sig->psv_wire.psv_idx);
1314        }
1315err_free_sig:
1316        kfree(mr->sig);
1317err_free_in:
1318        kfree(in);
1319err_free:
1320        kfree(mr);
1321        return ERR_PTR(err);
1322}
1323
1324int mlx5_ib_destroy_mr(struct ib_mr *ibmr)
1325{
1326        struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1327        struct mlx5_ib_mr *mr = to_mmr(ibmr);
1328        int err;
1329
1330        if (mr->sig) {
1331                if (mlx5_core_destroy_psv(dev->mdev,
1332                                          mr->sig->psv_memory.psv_idx))
1333                        mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1334                                     mr->sig->psv_memory.psv_idx);
1335                if (mlx5_core_destroy_psv(dev->mdev,
1336                                          mr->sig->psv_wire.psv_idx))
1337                        mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1338                                     mr->sig->psv_wire.psv_idx);
1339                kfree(mr->sig);
1340        }
1341
1342        err = destroy_mkey(dev, mr);
1343        if (err) {
1344                mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
1345                             mr->mmr.key, err);
1346                return err;
1347        }
1348
1349        kfree(mr);
1350
1351        return err;
1352}
1353
1354struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
1355                                        int max_page_list_len)
1356{
1357        struct mlx5_ib_dev *dev = to_mdev(pd->device);
1358        struct mlx5_create_mkey_mbox_in *in;
1359        struct mlx5_ib_mr *mr;
1360        int err;
1361
1362        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1363        if (!mr)
1364                return ERR_PTR(-ENOMEM);
1365
1366        in = kzalloc(sizeof(*in), GFP_KERNEL);
1367        if (!in) {
1368                err = -ENOMEM;
1369                goto err_free;
1370        }
1371
1372        in->seg.status = MLX5_MKEY_STATUS_FREE;
1373        in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
1374        in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1375        in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
1376        in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
1377        /*
1378         * TBD not needed - issue 197292 */
1379        in->seg.log2_page_size = PAGE_SHIFT;
1380
1381        err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
1382                                    NULL, NULL);
1383        kfree(in);
1384        if (err)
1385                goto err_free;
1386
1387        mr->ibmr.lkey = mr->mmr.key;
1388        mr->ibmr.rkey = mr->mmr.key;
1389        mr->umem = NULL;
1390
1391        return &mr->ibmr;
1392
1393err_free:
1394        kfree(mr);
1395        return ERR_PTR(err);
1396}
1397
1398struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
1399                                                               int page_list_len)
1400{
1401        struct mlx5_ib_fast_reg_page_list *mfrpl;
1402        int size = page_list_len * sizeof(u64);
1403
1404        mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
1405        if (!mfrpl)
1406                return ERR_PTR(-ENOMEM);
1407
1408        mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
1409        if (!mfrpl->ibfrpl.page_list)
1410                goto err_free;
1411
1412        mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
1413                                                     size, &mfrpl->map,
1414                                                     GFP_KERNEL);
1415        if (!mfrpl->mapped_page_list)
1416                goto err_free;
1417
1418        WARN_ON(mfrpl->map & 0x3f);
1419
1420        return &mfrpl->ibfrpl;
1421
1422err_free:
1423        kfree(mfrpl->ibfrpl.page_list);
1424        kfree(mfrpl);
1425        return ERR_PTR(-ENOMEM);
1426}
1427
1428void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
1429{
1430        struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
1431        struct mlx5_ib_dev *dev = to_mdev(page_list->device);
1432        int size = page_list->max_page_list_len * sizeof(u64);
1433
1434        dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list,
1435                          mfrpl->map);
1436        kfree(mfrpl->ibfrpl.page_list);
1437        kfree(mfrpl);
1438}
1439
1440int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1441                            struct ib_mr_status *mr_status)
1442{
1443        struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1444        int ret = 0;
1445
1446        if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
1447                pr_err("Invalid status check mask\n");
1448                ret = -EINVAL;
1449                goto done;
1450        }
1451
1452        mr_status->fail_status = 0;
1453        if (check_mask & IB_MR_CHECK_SIG_STATUS) {
1454                if (!mmr->sig) {
1455                        ret = -EINVAL;
1456                        pr_err("signature status check requested on a non-signature enabled MR\n");
1457                        goto done;
1458                }
1459
1460                mmr->sig->sig_status_checked = true;
1461                if (!mmr->sig->sig_err_exists)
1462                        goto done;
1463
1464                if (ibmr->lkey == mmr->sig->err_item.key)
1465                        memcpy(&mr_status->sig_err, &mmr->sig->err_item,
1466                               sizeof(mr_status->sig_err));
1467                else {
1468                        mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
1469                        mr_status->sig_err.sig_err_offset = 0;
1470                        mr_status->sig_err.key = mmr->sig->err_item.key;
1471                }
1472
1473                mmr->sig->sig_err_exists = false;
1474                mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
1475        }
1476
1477done:
1478        return ret;
1479}
1480