linux/drivers/infiniband/hw/mlx5/mr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33
  34#include <linux/kref.h>
  35#include <linux/random.h>
  36#include <linux/debugfs.h>
  37#include <linux/export.h>
  38#include <linux/delay.h>
  39#include <rdma/ib_umem.h>
  40#include "mlx5_ib.h"
  41
  42enum {
  43        MAX_PENDING_REG_MR = 8,
  44};
  45
  46enum {
  47        MLX5_UMR_ALIGN  = 2048
  48};
  49
  50static __be64 *mr_align(__be64 *ptr, int align)
  51{
  52        unsigned long mask = align - 1;
  53
  54        return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
  55}
  56
  57static int order2idx(struct mlx5_ib_dev *dev, int order)
  58{
  59        struct mlx5_mr_cache *cache = &dev->cache;
  60
  61        if (order < cache->ent[0].order)
  62                return 0;
  63        else
  64                return order - cache->ent[0].order;
  65}
  66
  67static void reg_mr_callback(int status, void *context)
  68{
  69        struct mlx5_ib_mr *mr = context;
  70        struct mlx5_ib_dev *dev = mr->dev;
  71        struct mlx5_mr_cache *cache = &dev->cache;
  72        int c = order2idx(dev, mr->order);
  73        struct mlx5_cache_ent *ent = &cache->ent[c];
  74        u8 key;
  75        unsigned long flags;
  76        struct mlx5_mr_table *table = &dev->mdev->priv.mr_table;
  77        int err;
  78
  79        spin_lock_irqsave(&ent->lock, flags);
  80        ent->pending--;
  81        spin_unlock_irqrestore(&ent->lock, flags);
  82        if (status) {
  83                mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
  84                kfree(mr);
  85                dev->fill_delay = 1;
  86                mod_timer(&dev->delay_timer, jiffies + HZ);
  87                return;
  88        }
  89
  90        if (mr->out.hdr.status) {
  91                mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
  92                             mr->out.hdr.status,
  93                             be32_to_cpu(mr->out.hdr.syndrome));
  94                kfree(mr);
  95                dev->fill_delay = 1;
  96                mod_timer(&dev->delay_timer, jiffies + HZ);
  97                return;
  98        }
  99
 100        spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
 101        key = dev->mdev->priv.mkey_key++;
 102        spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
 103        mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
 104
 105        cache->last_add = jiffies;
 106
 107        spin_lock_irqsave(&ent->lock, flags);
 108        list_add_tail(&mr->list, &ent->head);
 109        ent->cur++;
 110        ent->size++;
 111        spin_unlock_irqrestore(&ent->lock, flags);
 112
 113        write_lock_irqsave(&table->lock, flags);
 114        err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key),
 115                                &mr->mmr);
 116        if (err)
 117                pr_err("Error inserting to mr tree. 0x%x\n", -err);
 118        write_unlock_irqrestore(&table->lock, flags);
 119}
 120
 121static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 122{
 123        struct mlx5_mr_cache *cache = &dev->cache;
 124        struct mlx5_cache_ent *ent = &cache->ent[c];
 125        struct mlx5_create_mkey_mbox_in *in;
 126        struct mlx5_ib_mr *mr;
 127        int npages = 1 << ent->order;
 128        int err = 0;
 129        int i;
 130
 131        in = kzalloc(sizeof(*in), GFP_KERNEL);
 132        if (!in)
 133                return -ENOMEM;
 134
 135        for (i = 0; i < num; i++) {
 136                if (ent->pending >= MAX_PENDING_REG_MR) {
 137                        err = -EAGAIN;
 138                        break;
 139                }
 140
 141                mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 142                if (!mr) {
 143                        err = -ENOMEM;
 144                        break;
 145                }
 146                mr->order = ent->order;
 147                mr->umred = 1;
 148                mr->dev = dev;
 149                in->seg.status = 1 << 6;
 150                in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
 151                in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 152                in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
 153                in->seg.log2_page_size = 12;
 154
 155                spin_lock_irq(&ent->lock);
 156                ent->pending++;
 157                spin_unlock_irq(&ent->lock);
 158                err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in,
 159                                            sizeof(*in), reg_mr_callback,
 160                                            mr, &mr->out);
 161                if (err) {
 162                        mlx5_ib_warn(dev, "create mkey failed %d\n", err);
 163                        kfree(mr);
 164                        break;
 165                }
 166        }
 167
 168        kfree(in);
 169        return err;
 170}
 171
 172static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
 173{
 174        struct mlx5_mr_cache *cache = &dev->cache;
 175        struct mlx5_cache_ent *ent = &cache->ent[c];
 176        struct mlx5_ib_mr *mr;
 177        int err;
 178        int i;
 179
 180        for (i = 0; i < num; i++) {
 181                spin_lock_irq(&ent->lock);
 182                if (list_empty(&ent->head)) {
 183                        spin_unlock_irq(&ent->lock);
 184                        return;
 185                }
 186                mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 187                list_del(&mr->list);
 188                ent->cur--;
 189                ent->size--;
 190                spin_unlock_irq(&ent->lock);
 191                err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
 192                if (err)
 193                        mlx5_ib_warn(dev, "failed destroy mkey\n");
 194                else
 195                        kfree(mr);
 196        }
 197}
 198
 199static ssize_t size_write(struct file *filp, const char __user *buf,
 200                          size_t count, loff_t *pos)
 201{
 202        struct mlx5_cache_ent *ent = filp->private_data;
 203        struct mlx5_ib_dev *dev = ent->dev;
 204        char lbuf[20];
 205        u32 var;
 206        int err;
 207        int c;
 208
 209        if (copy_from_user(lbuf, buf, sizeof(lbuf)))
 210                return -EFAULT;
 211
 212        c = order2idx(dev, ent->order);
 213        lbuf[sizeof(lbuf) - 1] = 0;
 214
 215        if (sscanf(lbuf, "%u", &var) != 1)
 216                return -EINVAL;
 217
 218        if (var < ent->limit)
 219                return -EINVAL;
 220
 221        if (var > ent->size) {
 222                do {
 223                        err = add_keys(dev, c, var - ent->size);
 224                        if (err && err != -EAGAIN)
 225                                return err;
 226
 227                        usleep_range(3000, 5000);
 228                } while (err);
 229        } else if (var < ent->size) {
 230                remove_keys(dev, c, ent->size - var);
 231        }
 232
 233        return count;
 234}
 235
 236static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
 237                         loff_t *pos)
 238{
 239        struct mlx5_cache_ent *ent = filp->private_data;
 240        char lbuf[20];
 241        int err;
 242
 243        if (*pos)
 244                return 0;
 245
 246        err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
 247        if (err < 0)
 248                return err;
 249
 250        if (copy_to_user(buf, lbuf, err))
 251                return -EFAULT;
 252
 253        *pos += err;
 254
 255        return err;
 256}
 257
 258static const struct file_operations size_fops = {
 259        .owner  = THIS_MODULE,
 260        .open   = simple_open,
 261        .write  = size_write,
 262        .read   = size_read,
 263};
 264
 265static ssize_t limit_write(struct file *filp, const char __user *buf,
 266                           size_t count, loff_t *pos)
 267{
 268        struct mlx5_cache_ent *ent = filp->private_data;
 269        struct mlx5_ib_dev *dev = ent->dev;
 270        char lbuf[20];
 271        u32 var;
 272        int err;
 273        int c;
 274
 275        if (copy_from_user(lbuf, buf, sizeof(lbuf)))
 276                return -EFAULT;
 277
 278        c = order2idx(dev, ent->order);
 279        lbuf[sizeof(lbuf) - 1] = 0;
 280
 281        if (sscanf(lbuf, "%u", &var) != 1)
 282                return -EINVAL;
 283
 284        if (var > ent->size)
 285                return -EINVAL;
 286
 287        ent->limit = var;
 288
 289        if (ent->cur < ent->limit) {
 290                err = add_keys(dev, c, 2 * ent->limit - ent->cur);
 291                if (err)
 292                        return err;
 293        }
 294
 295        return count;
 296}
 297
 298static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
 299                          loff_t *pos)
 300{
 301        struct mlx5_cache_ent *ent = filp->private_data;
 302        char lbuf[20];
 303        int err;
 304
 305        if (*pos)
 306                return 0;
 307
 308        err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
 309        if (err < 0)
 310                return err;
 311
 312        if (copy_to_user(buf, lbuf, err))
 313                return -EFAULT;
 314
 315        *pos += err;
 316
 317        return err;
 318}
 319
 320static const struct file_operations limit_fops = {
 321        .owner  = THIS_MODULE,
 322        .open   = simple_open,
 323        .write  = limit_write,
 324        .read   = limit_read,
 325};
 326
 327static int someone_adding(struct mlx5_mr_cache *cache)
 328{
 329        int i;
 330
 331        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 332                if (cache->ent[i].cur < cache->ent[i].limit)
 333                        return 1;
 334        }
 335
 336        return 0;
 337}
 338
 339static void __cache_work_func(struct mlx5_cache_ent *ent)
 340{
 341        struct mlx5_ib_dev *dev = ent->dev;
 342        struct mlx5_mr_cache *cache = &dev->cache;
 343        int i = order2idx(dev, ent->order);
 344        int err;
 345
 346        if (cache->stopped)
 347                return;
 348
 349        ent = &dev->cache.ent[i];
 350        if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
 351                err = add_keys(dev, i, 1);
 352                if (ent->cur < 2 * ent->limit) {
 353                        if (err == -EAGAIN) {
 354                                mlx5_ib_dbg(dev, "returned eagain, order %d\n",
 355                                            i + 2);
 356                                queue_delayed_work(cache->wq, &ent->dwork,
 357                                                   msecs_to_jiffies(3));
 358                        } else if (err) {
 359                                mlx5_ib_warn(dev, "command failed order %d, err %d\n",
 360                                             i + 2, err);
 361                                queue_delayed_work(cache->wq, &ent->dwork,
 362                                                   msecs_to_jiffies(1000));
 363                        } else {
 364                                queue_work(cache->wq, &ent->work);
 365                        }
 366                }
 367        } else if (ent->cur > 2 * ent->limit) {
 368                if (!someone_adding(cache) &&
 369                    time_after(jiffies, cache->last_add + 300 * HZ)) {
 370                        remove_keys(dev, i, 1);
 371                        if (ent->cur > ent->limit)
 372                                queue_work(cache->wq, &ent->work);
 373                } else {
 374                        queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
 375                }
 376        }
 377}
 378
 379static void delayed_cache_work_func(struct work_struct *work)
 380{
 381        struct mlx5_cache_ent *ent;
 382
 383        ent = container_of(work, struct mlx5_cache_ent, dwork.work);
 384        __cache_work_func(ent);
 385}
 386
 387static void cache_work_func(struct work_struct *work)
 388{
 389        struct mlx5_cache_ent *ent;
 390
 391        ent = container_of(work, struct mlx5_cache_ent, work);
 392        __cache_work_func(ent);
 393}
 394
 395static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
 396{
 397        struct mlx5_mr_cache *cache = &dev->cache;
 398        struct mlx5_ib_mr *mr = NULL;
 399        struct mlx5_cache_ent *ent;
 400        int c;
 401        int i;
 402
 403        c = order2idx(dev, order);
 404        if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
 405                mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
 406                return NULL;
 407        }
 408
 409        for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
 410                ent = &cache->ent[i];
 411
 412                mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
 413
 414                spin_lock_irq(&ent->lock);
 415                if (!list_empty(&ent->head)) {
 416                        mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
 417                                              list);
 418                        list_del(&mr->list);
 419                        ent->cur--;
 420                        spin_unlock_irq(&ent->lock);
 421                        if (ent->cur < ent->limit)
 422                                queue_work(cache->wq, &ent->work);
 423                        break;
 424                }
 425                spin_unlock_irq(&ent->lock);
 426
 427                queue_work(cache->wq, &ent->work);
 428
 429                if (mr)
 430                        break;
 431        }
 432
 433        if (!mr)
 434                cache->ent[c].miss++;
 435
 436        return mr;
 437}
 438
 439static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 440{
 441        struct mlx5_mr_cache *cache = &dev->cache;
 442        struct mlx5_cache_ent *ent;
 443        int shrink = 0;
 444        int c;
 445
 446        c = order2idx(dev, mr->order);
 447        if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
 448                mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
 449                return;
 450        }
 451        ent = &cache->ent[c];
 452        spin_lock_irq(&ent->lock);
 453        list_add_tail(&mr->list, &ent->head);
 454        ent->cur++;
 455        if (ent->cur > 2 * ent->limit)
 456                shrink = 1;
 457        spin_unlock_irq(&ent->lock);
 458
 459        if (shrink)
 460                queue_work(cache->wq, &ent->work);
 461}
 462
 463static void clean_keys(struct mlx5_ib_dev *dev, int c)
 464{
 465        struct mlx5_mr_cache *cache = &dev->cache;
 466        struct mlx5_cache_ent *ent = &cache->ent[c];
 467        struct mlx5_ib_mr *mr;
 468        int err;
 469
 470        cancel_delayed_work(&ent->dwork);
 471        while (1) {
 472                spin_lock_irq(&ent->lock);
 473                if (list_empty(&ent->head)) {
 474                        spin_unlock_irq(&ent->lock);
 475                        return;
 476                }
 477                mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 478                list_del(&mr->list);
 479                ent->cur--;
 480                ent->size--;
 481                spin_unlock_irq(&ent->lock);
 482                err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
 483                if (err)
 484                        mlx5_ib_warn(dev, "failed destroy mkey\n");
 485                else
 486                        kfree(mr);
 487        }
 488}
 489
 490static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
 491{
 492        struct mlx5_mr_cache *cache = &dev->cache;
 493        struct mlx5_cache_ent *ent;
 494        int i;
 495
 496        if (!mlx5_debugfs_root)
 497                return 0;
 498
 499        cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
 500        if (!cache->root)
 501                return -ENOMEM;
 502
 503        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 504                ent = &cache->ent[i];
 505                sprintf(ent->name, "%d", ent->order);
 506                ent->dir = debugfs_create_dir(ent->name,  cache->root);
 507                if (!ent->dir)
 508                        return -ENOMEM;
 509
 510                ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
 511                                                 &size_fops);
 512                if (!ent->fsize)
 513                        return -ENOMEM;
 514
 515                ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
 516                                                  &limit_fops);
 517                if (!ent->flimit)
 518                        return -ENOMEM;
 519
 520                ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
 521                                               &ent->cur);
 522                if (!ent->fcur)
 523                        return -ENOMEM;
 524
 525                ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
 526                                                &ent->miss);
 527                if (!ent->fmiss)
 528                        return -ENOMEM;
 529        }
 530
 531        return 0;
 532}
 533
 534static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
 535{
 536        if (!mlx5_debugfs_root)
 537                return;
 538
 539        debugfs_remove_recursive(dev->cache.root);
 540}
 541
 542static void delay_time_func(unsigned long ctx)
 543{
 544        struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
 545
 546        dev->fill_delay = 0;
 547}
 548
 549int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 550{
 551        struct mlx5_mr_cache *cache = &dev->cache;
 552        struct mlx5_cache_ent *ent;
 553        int limit;
 554        int err;
 555        int i;
 556
 557        cache->wq = create_singlethread_workqueue("mkey_cache");
 558        if (!cache->wq) {
 559                mlx5_ib_warn(dev, "failed to create work queue\n");
 560                return -ENOMEM;
 561        }
 562
 563        setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
 564        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 565                INIT_LIST_HEAD(&cache->ent[i].head);
 566                spin_lock_init(&cache->ent[i].lock);
 567
 568                ent = &cache->ent[i];
 569                INIT_LIST_HEAD(&ent->head);
 570                spin_lock_init(&ent->lock);
 571                ent->order = i + 2;
 572                ent->dev = dev;
 573
 574                if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE)
 575                        limit = dev->mdev->profile->mr_cache[i].limit;
 576                else
 577                        limit = 0;
 578
 579                INIT_WORK(&ent->work, cache_work_func);
 580                INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
 581                ent->limit = limit;
 582                queue_work(cache->wq, &ent->work);
 583        }
 584
 585        err = mlx5_mr_cache_debugfs_init(dev);
 586        if (err)
 587                mlx5_ib_warn(dev, "cache debugfs failure\n");
 588
 589        return 0;
 590}
 591
 592int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 593{
 594        int i;
 595
 596        dev->cache.stopped = 1;
 597        flush_workqueue(dev->cache.wq);
 598
 599        mlx5_mr_cache_debugfs_cleanup(dev);
 600
 601        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
 602                clean_keys(dev, i);
 603
 604        destroy_workqueue(dev->cache.wq);
 605        del_timer_sync(&dev->delay_timer);
 606
 607        return 0;
 608}
 609
 610struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
 611{
 612        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 613        struct mlx5_core_dev *mdev = dev->mdev;
 614        struct mlx5_create_mkey_mbox_in *in;
 615        struct mlx5_mkey_seg *seg;
 616        struct mlx5_ib_mr *mr;
 617        int err;
 618
 619        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 620        if (!mr)
 621                return ERR_PTR(-ENOMEM);
 622
 623        in = kzalloc(sizeof(*in), GFP_KERNEL);
 624        if (!in) {
 625                err = -ENOMEM;
 626                goto err_free;
 627        }
 628
 629        seg = &in->seg;
 630        seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
 631        seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
 632        seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 633        seg->start_addr = 0;
 634
 635        err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
 636                                    NULL);
 637        if (err)
 638                goto err_in;
 639
 640        kfree(in);
 641        mr->ibmr.lkey = mr->mmr.key;
 642        mr->ibmr.rkey = mr->mmr.key;
 643        mr->umem = NULL;
 644
 645        return &mr->ibmr;
 646
 647err_in:
 648        kfree(in);
 649
 650err_free:
 651        kfree(mr);
 652
 653        return ERR_PTR(err);
 654}
 655
 656static int get_octo_len(u64 addr, u64 len, int page_size)
 657{
 658        u64 offset;
 659        int npages;
 660
 661        offset = addr & (page_size - 1);
 662        npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
 663        return (npages + 1) / 2;
 664}
 665
 666static int use_umr(int order)
 667{
 668        return order <= 17;
 669}
 670
 671static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
 672                             struct ib_sge *sg, u64 dma, int n, u32 key,
 673                             int page_shift, u64 virt_addr, u64 len,
 674                             int access_flags)
 675{
 676        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 677        struct ib_mr *mr = dev->umrc.mr;
 678
 679        sg->addr = dma;
 680        sg->length = ALIGN(sizeof(u64) * n, 64);
 681        sg->lkey = mr->lkey;
 682
 683        wr->next = NULL;
 684        wr->send_flags = 0;
 685        wr->sg_list = sg;
 686        if (n)
 687                wr->num_sge = 1;
 688        else
 689                wr->num_sge = 0;
 690
 691        wr->opcode = MLX5_IB_WR_UMR;
 692        wr->wr.fast_reg.page_list_len = n;
 693        wr->wr.fast_reg.page_shift = page_shift;
 694        wr->wr.fast_reg.rkey = key;
 695        wr->wr.fast_reg.iova_start = virt_addr;
 696        wr->wr.fast_reg.length = len;
 697        wr->wr.fast_reg.access_flags = access_flags;
 698        wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
 699}
 700
 701static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
 702                               struct ib_send_wr *wr, u32 key)
 703{
 704        wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
 705        wr->opcode = MLX5_IB_WR_UMR;
 706        wr->wr.fast_reg.rkey = key;
 707}
 708
 709void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
 710{
 711        struct mlx5_ib_umr_context *context;
 712        struct ib_wc wc;
 713        int err;
 714
 715        while (1) {
 716                err = ib_poll_cq(cq, 1, &wc);
 717                if (err < 0) {
 718                        pr_warn("poll cq error %d\n", err);
 719                        return;
 720                }
 721                if (err == 0)
 722                        break;
 723
 724                context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id;
 725                context->status = wc.status;
 726                complete(&context->done);
 727        }
 728        ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 729}
 730
 731static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 732                                  u64 virt_addr, u64 len, int npages,
 733                                  int page_shift, int order, int access_flags)
 734{
 735        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 736        struct device *ddev = dev->ib_dev.dma_device;
 737        struct umr_common *umrc = &dev->umrc;
 738        struct mlx5_ib_umr_context umr_context;
 739        struct ib_send_wr wr, *bad;
 740        struct mlx5_ib_mr *mr;
 741        struct ib_sge sg;
 742        int size = sizeof(u64) * npages;
 743        int err = 0;
 744        int i;
 745
 746        for (i = 0; i < 1; i++) {
 747                mr = alloc_cached_mr(dev, order);
 748                if (mr)
 749                        break;
 750
 751                err = add_keys(dev, order2idx(dev, order), 1);
 752                if (err && err != -EAGAIN) {
 753                        mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
 754                        break;
 755                }
 756        }
 757
 758        if (!mr)
 759                return ERR_PTR(-EAGAIN);
 760
 761        mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
 762        if (!mr->pas) {
 763                err = -ENOMEM;
 764                goto free_mr;
 765        }
 766
 767        mlx5_ib_populate_pas(dev, umem, page_shift,
 768                             mr_align(mr->pas, MLX5_UMR_ALIGN), 1);
 769
 770        mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size,
 771                                 DMA_TO_DEVICE);
 772        if (dma_mapping_error(ddev, mr->dma)) {
 773                err = -ENOMEM;
 774                goto free_pas;
 775        }
 776
 777        memset(&wr, 0, sizeof(wr));
 778        wr.wr_id = (u64)(unsigned long)&umr_context;
 779        prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
 780
 781        mlx5_ib_init_umr_context(&umr_context);
 782        down(&umrc->sem);
 783        err = ib_post_send(umrc->qp, &wr, &bad);
 784        if (err) {
 785                mlx5_ib_warn(dev, "post send failed, err %d\n", err);
 786                goto unmap_dma;
 787        } else {
 788                wait_for_completion(&umr_context.done);
 789                if (umr_context.status != IB_WC_SUCCESS) {
 790                        mlx5_ib_warn(dev, "reg umr failed\n");
 791                        err = -EFAULT;
 792                }
 793        }
 794
 795        mr->mmr.iova = virt_addr;
 796        mr->mmr.size = len;
 797        mr->mmr.pd = to_mpd(pd)->pdn;
 798
 799unmap_dma:
 800        up(&umrc->sem);
 801        dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
 802
 803free_pas:
 804        kfree(mr->pas);
 805
 806free_mr:
 807        if (err) {
 808                free_cached_mr(dev, mr);
 809                return ERR_PTR(err);
 810        }
 811
 812        return mr;
 813}
 814
 815static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 816                                     u64 length, struct ib_umem *umem,
 817                                     int npages, int page_shift,
 818                                     int access_flags)
 819{
 820        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 821        struct mlx5_create_mkey_mbox_in *in;
 822        struct mlx5_ib_mr *mr;
 823        int inlen;
 824        int err;
 825
 826        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 827        if (!mr)
 828                return ERR_PTR(-ENOMEM);
 829
 830        inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
 831        in = mlx5_vzalloc(inlen);
 832        if (!in) {
 833                err = -ENOMEM;
 834                goto err_1;
 835        }
 836        mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
 837
 838        in->seg.flags = convert_access(access_flags) |
 839                MLX5_ACCESS_MODE_MTT;
 840        in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
 841        in->seg.start_addr = cpu_to_be64(virt_addr);
 842        in->seg.len = cpu_to_be64(length);
 843        in->seg.bsfs_octo_size = 0;
 844        in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
 845        in->seg.log2_page_size = page_shift;
 846        in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 847        in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
 848                                                         1 << page_shift));
 849        err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL,
 850                                    NULL, NULL);
 851        if (err) {
 852                mlx5_ib_warn(dev, "create mkey failed\n");
 853                goto err_2;
 854        }
 855        mr->umem = umem;
 856        mlx5_vfree(in);
 857
 858        mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
 859
 860        return mr;
 861
 862err_2:
 863        mlx5_vfree(in);
 864
 865err_1:
 866        kfree(mr);
 867
 868        return ERR_PTR(err);
 869}
 870
 871struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 872                                  u64 virt_addr, int access_flags,
 873                                  struct ib_udata *udata)
 874{
 875        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 876        struct mlx5_ib_mr *mr = NULL;
 877        struct ib_umem *umem;
 878        int page_shift;
 879        int npages;
 880        int ncont;
 881        int order;
 882        int err;
 883
 884        mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
 885                    start, virt_addr, length, access_flags);
 886        umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
 887                           0);
 888        if (IS_ERR(umem)) {
 889                mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
 890                return (void *)umem;
 891        }
 892
 893        mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
 894        if (!npages) {
 895                mlx5_ib_warn(dev, "avoid zero region\n");
 896                err = -EINVAL;
 897                goto error;
 898        }
 899
 900        mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
 901                    npages, ncont, order, page_shift);
 902
 903        if (use_umr(order)) {
 904                mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
 905                             order, access_flags);
 906                if (PTR_ERR(mr) == -EAGAIN) {
 907                        mlx5_ib_dbg(dev, "cache empty for order %d", order);
 908                        mr = NULL;
 909                }
 910        }
 911
 912        if (!mr)
 913                mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
 914                                access_flags);
 915
 916        if (IS_ERR(mr)) {
 917                err = PTR_ERR(mr);
 918                goto error;
 919        }
 920
 921        mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
 922
 923        mr->umem = umem;
 924        mr->npages = npages;
 925        spin_lock(&dev->mr_lock);
 926        dev->mdev->priv.reg_pages += npages;
 927        spin_unlock(&dev->mr_lock);
 928        mr->ibmr.lkey = mr->mmr.key;
 929        mr->ibmr.rkey = mr->mmr.key;
 930
 931        return &mr->ibmr;
 932
 933error:
 934        ib_umem_release(umem);
 935        return ERR_PTR(err);
 936}
 937
 938static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 939{
 940        struct umr_common *umrc = &dev->umrc;
 941        struct mlx5_ib_umr_context umr_context;
 942        struct ib_send_wr wr, *bad;
 943        int err;
 944
 945        memset(&wr, 0, sizeof(wr));
 946        wr.wr_id = (u64)(unsigned long)&umr_context;
 947        prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
 948
 949        mlx5_ib_init_umr_context(&umr_context);
 950        down(&umrc->sem);
 951        err = ib_post_send(umrc->qp, &wr, &bad);
 952        if (err) {
 953                up(&umrc->sem);
 954                mlx5_ib_dbg(dev, "err %d\n", err);
 955                goto error;
 956        } else {
 957                wait_for_completion(&umr_context.done);
 958                up(&umrc->sem);
 959        }
 960        if (umr_context.status != IB_WC_SUCCESS) {
 961                mlx5_ib_warn(dev, "unreg umr failed\n");
 962                err = -EFAULT;
 963                goto error;
 964        }
 965        return 0;
 966
 967error:
 968        return err;
 969}
 970
 971int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
 972{
 973        struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
 974        struct mlx5_ib_mr *mr = to_mmr(ibmr);
 975        struct ib_umem *umem = mr->umem;
 976        int npages = mr->npages;
 977        int umred = mr->umred;
 978        int err;
 979
 980        if (!umred) {
 981                err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
 982                if (err) {
 983                        mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
 984                                     mr->mmr.key, err);
 985                        return err;
 986                }
 987        } else {
 988                err = unreg_umr(dev, mr);
 989                if (err) {
 990                        mlx5_ib_warn(dev, "failed unregister\n");
 991                        return err;
 992                }
 993                free_cached_mr(dev, mr);
 994        }
 995
 996        if (umem) {
 997                ib_umem_release(umem);
 998                spin_lock(&dev->mr_lock);
 999                dev->mdev->priv.reg_pages -= npages;
1000                spin_unlock(&dev->mr_lock);
1001        }
1002
1003        if (!umred)
1004                kfree(mr);
1005
1006        return 0;
1007}
1008
1009struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
1010                                struct ib_mr_init_attr *mr_init_attr)
1011{
1012        struct mlx5_ib_dev *dev = to_mdev(pd->device);
1013        struct mlx5_create_mkey_mbox_in *in;
1014        struct mlx5_ib_mr *mr;
1015        int access_mode, err;
1016        int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4);
1017
1018        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1019        if (!mr)
1020                return ERR_PTR(-ENOMEM);
1021
1022        in = kzalloc(sizeof(*in), GFP_KERNEL);
1023        if (!in) {
1024                err = -ENOMEM;
1025                goto err_free;
1026        }
1027
1028        in->seg.status = 1 << 6; /* free */
1029        in->seg.xlt_oct_size = cpu_to_be32(ndescs);
1030        in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1031        in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
1032        access_mode = MLX5_ACCESS_MODE_MTT;
1033
1034        if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) {
1035                u32 psv_index[2];
1036
1037                in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) |
1038                                                           MLX5_MKEY_BSF_EN);
1039                in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
1040                mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1041                if (!mr->sig) {
1042                        err = -ENOMEM;
1043                        goto err_free_in;
1044                }
1045
1046                /* create mem & wire PSVs */
1047                err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
1048                                           2, psv_index);
1049                if (err)
1050                        goto err_free_sig;
1051
1052                access_mode = MLX5_ACCESS_MODE_KLM;
1053                mr->sig->psv_memory.psv_idx = psv_index[0];
1054                mr->sig->psv_wire.psv_idx = psv_index[1];
1055
1056                mr->sig->sig_status_checked = true;
1057                mr->sig->sig_err_exists = false;
1058                /* Next UMR, Arm SIGERR */
1059                ++mr->sig->sigerr_count;
1060        }
1061
1062        in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
1063        err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in),
1064                                    NULL, NULL, NULL);
1065        if (err)
1066                goto err_destroy_psv;
1067
1068        mr->ibmr.lkey = mr->mmr.key;
1069        mr->ibmr.rkey = mr->mmr.key;
1070        mr->umem = NULL;
1071        kfree(in);
1072
1073        return &mr->ibmr;
1074
1075err_destroy_psv:
1076        if (mr->sig) {
1077                if (mlx5_core_destroy_psv(dev->mdev,
1078                                          mr->sig->psv_memory.psv_idx))
1079                        mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1080                                     mr->sig->psv_memory.psv_idx);
1081                if (mlx5_core_destroy_psv(dev->mdev,
1082                                          mr->sig->psv_wire.psv_idx))
1083                        mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1084                                     mr->sig->psv_wire.psv_idx);
1085        }
1086err_free_sig:
1087        kfree(mr->sig);
1088err_free_in:
1089        kfree(in);
1090err_free:
1091        kfree(mr);
1092        return ERR_PTR(err);
1093}
1094
1095int mlx5_ib_destroy_mr(struct ib_mr *ibmr)
1096{
1097        struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1098        struct mlx5_ib_mr *mr = to_mmr(ibmr);
1099        int err;
1100
1101        if (mr->sig) {
1102                if (mlx5_core_destroy_psv(dev->mdev,
1103                                          mr->sig->psv_memory.psv_idx))
1104                        mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1105                                     mr->sig->psv_memory.psv_idx);
1106                if (mlx5_core_destroy_psv(dev->mdev,
1107                                          mr->sig->psv_wire.psv_idx))
1108                        mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1109                                     mr->sig->psv_wire.psv_idx);
1110                kfree(mr->sig);
1111        }
1112
1113        err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
1114        if (err) {
1115                mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
1116                             mr->mmr.key, err);
1117                return err;
1118        }
1119
1120        kfree(mr);
1121
1122        return err;
1123}
1124
1125struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
1126                                        int max_page_list_len)
1127{
1128        struct mlx5_ib_dev *dev = to_mdev(pd->device);
1129        struct mlx5_create_mkey_mbox_in *in;
1130        struct mlx5_ib_mr *mr;
1131        int err;
1132
1133        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1134        if (!mr)
1135                return ERR_PTR(-ENOMEM);
1136
1137        in = kzalloc(sizeof(*in), GFP_KERNEL);
1138        if (!in) {
1139                err = -ENOMEM;
1140                goto err_free;
1141        }
1142
1143        in->seg.status = 1 << 6; /* free */
1144        in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
1145        in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1146        in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
1147        in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
1148        /*
1149         * TBD not needed - issue 197292 */
1150        in->seg.log2_page_size = PAGE_SHIFT;
1151
1152        err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
1153                                    NULL, NULL);
1154        kfree(in);
1155        if (err)
1156                goto err_free;
1157
1158        mr->ibmr.lkey = mr->mmr.key;
1159        mr->ibmr.rkey = mr->mmr.key;
1160        mr->umem = NULL;
1161
1162        return &mr->ibmr;
1163
1164err_free:
1165        kfree(mr);
1166        return ERR_PTR(err);
1167}
1168
1169struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
1170                                                               int page_list_len)
1171{
1172        struct mlx5_ib_fast_reg_page_list *mfrpl;
1173        int size = page_list_len * sizeof(u64);
1174
1175        mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
1176        if (!mfrpl)
1177                return ERR_PTR(-ENOMEM);
1178
1179        mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
1180        if (!mfrpl->ibfrpl.page_list)
1181                goto err_free;
1182
1183        mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
1184                                                     size, &mfrpl->map,
1185                                                     GFP_KERNEL);
1186        if (!mfrpl->mapped_page_list)
1187                goto err_free;
1188
1189        WARN_ON(mfrpl->map & 0x3f);
1190
1191        return &mfrpl->ibfrpl;
1192
1193err_free:
1194        kfree(mfrpl->ibfrpl.page_list);
1195        kfree(mfrpl);
1196        return ERR_PTR(-ENOMEM);
1197}
1198
1199void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
1200{
1201        struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
1202        struct mlx5_ib_dev *dev = to_mdev(page_list->device);
1203        int size = page_list->max_page_list_len * sizeof(u64);
1204
1205        dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list,
1206                          mfrpl->map);
1207        kfree(mfrpl->ibfrpl.page_list);
1208        kfree(mfrpl);
1209}
1210
1211int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1212                            struct ib_mr_status *mr_status)
1213{
1214        struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1215        int ret = 0;
1216
1217        if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
1218                pr_err("Invalid status check mask\n");
1219                ret = -EINVAL;
1220                goto done;
1221        }
1222
1223        mr_status->fail_status = 0;
1224        if (check_mask & IB_MR_CHECK_SIG_STATUS) {
1225                if (!mmr->sig) {
1226                        ret = -EINVAL;
1227                        pr_err("signature status check requested on a non-signature enabled MR\n");
1228                        goto done;
1229                }
1230
1231                mmr->sig->sig_status_checked = true;
1232                if (!mmr->sig->sig_err_exists)
1233                        goto done;
1234
1235                if (ibmr->lkey == mmr->sig->err_item.key)
1236                        memcpy(&mr_status->sig_err, &mmr->sig->err_item,
1237                               sizeof(mr_status->sig_err));
1238                else {
1239                        mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
1240                        mr_status->sig_err.sig_err_offset = 0;
1241                        mr_status->sig_err.key = mmr->sig->err_item.key;
1242                }
1243
1244                mmr->sig->sig_err_exists = false;
1245                mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
1246        }
1247
1248done:
1249        return ret;
1250}
1251