linux/drivers/infiniband/hw/mlx5/mr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33
  34#include <linux/kref.h>
  35#include <linux/random.h>
  36#include <linux/debugfs.h>
  37#include <linux/export.h>
  38#include <linux/delay.h>
  39#include <rdma/ib_umem.h>
  40#include "mlx5_ib.h"
  41
  42enum {
  43        MAX_PENDING_REG_MR = 8,
  44};
  45
  46enum {
  47        MLX5_UMR_ALIGN  = 2048
  48};
  49
  50static __be64 *mr_align(__be64 *ptr, int align)
  51{
  52        unsigned long mask = align - 1;
  53
  54        return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
  55}
  56
  57static int order2idx(struct mlx5_ib_dev *dev, int order)
  58{
  59        struct mlx5_mr_cache *cache = &dev->cache;
  60
  61        if (order < cache->ent[0].order)
  62                return 0;
  63        else
  64                return order - cache->ent[0].order;
  65}
  66
  67static void reg_mr_callback(int status, void *context)
  68{
  69        struct mlx5_ib_mr *mr = context;
  70        struct mlx5_ib_dev *dev = mr->dev;
  71        struct mlx5_mr_cache *cache = &dev->cache;
  72        int c = order2idx(dev, mr->order);
  73        struct mlx5_cache_ent *ent = &cache->ent[c];
  74        u8 key;
  75        unsigned long flags;
  76
  77        spin_lock_irqsave(&ent->lock, flags);
  78        ent->pending--;
  79        spin_unlock_irqrestore(&ent->lock, flags);
  80        if (status) {
  81                mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
  82                kfree(mr);
  83                dev->fill_delay = 1;
  84                mod_timer(&dev->delay_timer, jiffies + HZ);
  85                return;
  86        }
  87
  88        if (mr->out.hdr.status) {
  89                mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
  90                             mr->out.hdr.status,
  91                             be32_to_cpu(mr->out.hdr.syndrome));
  92                kfree(mr);
  93                dev->fill_delay = 1;
  94                mod_timer(&dev->delay_timer, jiffies + HZ);
  95                return;
  96        }
  97
  98        spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags);
  99        key = dev->mdev.priv.mkey_key++;
 100        spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags);
 101        mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
 102
 103        cache->last_add = jiffies;
 104
 105        spin_lock_irqsave(&ent->lock, flags);
 106        list_add_tail(&mr->list, &ent->head);
 107        ent->cur++;
 108        ent->size++;
 109        spin_unlock_irqrestore(&ent->lock, flags);
 110}
 111
 112static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 113{
 114        struct mlx5_mr_cache *cache = &dev->cache;
 115        struct mlx5_cache_ent *ent = &cache->ent[c];
 116        struct mlx5_create_mkey_mbox_in *in;
 117        struct mlx5_ib_mr *mr;
 118        int npages = 1 << ent->order;
 119        int err = 0;
 120        int i;
 121
 122        in = kzalloc(sizeof(*in), GFP_KERNEL);
 123        if (!in)
 124                return -ENOMEM;
 125
 126        for (i = 0; i < num; i++) {
 127                if (ent->pending >= MAX_PENDING_REG_MR) {
 128                        err = -EAGAIN;
 129                        break;
 130                }
 131
 132                mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 133                if (!mr) {
 134                        err = -ENOMEM;
 135                        break;
 136                }
 137                mr->order = ent->order;
 138                mr->umred = 1;
 139                mr->dev = dev;
 140                in->seg.status = 1 << 6;
 141                in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
 142                in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 143                in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
 144                in->seg.log2_page_size = 12;
 145
 146                spin_lock_irq(&ent->lock);
 147                ent->pending++;
 148                spin_unlock_irq(&ent->lock);
 149                mr->start = jiffies;
 150                err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
 151                                            sizeof(*in), reg_mr_callback,
 152                                            mr, &mr->out);
 153                if (err) {
 154                        mlx5_ib_warn(dev, "create mkey failed %d\n", err);
 155                        kfree(mr);
 156                        break;
 157                }
 158        }
 159
 160        kfree(in);
 161        return err;
 162}
 163
 164static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
 165{
 166        struct mlx5_mr_cache *cache = &dev->cache;
 167        struct mlx5_cache_ent *ent = &cache->ent[c];
 168        struct mlx5_ib_mr *mr;
 169        int err;
 170        int i;
 171
 172        for (i = 0; i < num; i++) {
 173                spin_lock_irq(&ent->lock);
 174                if (list_empty(&ent->head)) {
 175                        spin_unlock_irq(&ent->lock);
 176                        return;
 177                }
 178                mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 179                list_del(&mr->list);
 180                ent->cur--;
 181                ent->size--;
 182                spin_unlock_irq(&ent->lock);
 183                err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
 184                if (err)
 185                        mlx5_ib_warn(dev, "failed destroy mkey\n");
 186                else
 187                        kfree(mr);
 188        }
 189}
 190
 191static ssize_t size_write(struct file *filp, const char __user *buf,
 192                          size_t count, loff_t *pos)
 193{
 194        struct mlx5_cache_ent *ent = filp->private_data;
 195        struct mlx5_ib_dev *dev = ent->dev;
 196        char lbuf[20];
 197        u32 var;
 198        int err;
 199        int c;
 200
 201        if (copy_from_user(lbuf, buf, sizeof(lbuf)))
 202                return -EFAULT;
 203
 204        c = order2idx(dev, ent->order);
 205        lbuf[sizeof(lbuf) - 1] = 0;
 206
 207        if (sscanf(lbuf, "%u", &var) != 1)
 208                return -EINVAL;
 209
 210        if (var < ent->limit)
 211                return -EINVAL;
 212
 213        if (var > ent->size) {
 214                do {
 215                        err = add_keys(dev, c, var - ent->size);
 216                        if (err && err != -EAGAIN)
 217                                return err;
 218
 219                        usleep_range(3000, 5000);
 220                } while (err);
 221        } else if (var < ent->size) {
 222                remove_keys(dev, c, ent->size - var);
 223        }
 224
 225        return count;
 226}
 227
 228static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
 229                         loff_t *pos)
 230{
 231        struct mlx5_cache_ent *ent = filp->private_data;
 232        char lbuf[20];
 233        int err;
 234
 235        if (*pos)
 236                return 0;
 237
 238        err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
 239        if (err < 0)
 240                return err;
 241
 242        if (copy_to_user(buf, lbuf, err))
 243                return -EFAULT;
 244
 245        *pos += err;
 246
 247        return err;
 248}
 249
 250static const struct file_operations size_fops = {
 251        .owner  = THIS_MODULE,
 252        .open   = simple_open,
 253        .write  = size_write,
 254        .read   = size_read,
 255};
 256
 257static ssize_t limit_write(struct file *filp, const char __user *buf,
 258                           size_t count, loff_t *pos)
 259{
 260        struct mlx5_cache_ent *ent = filp->private_data;
 261        struct mlx5_ib_dev *dev = ent->dev;
 262        char lbuf[20];
 263        u32 var;
 264        int err;
 265        int c;
 266
 267        if (copy_from_user(lbuf, buf, sizeof(lbuf)))
 268                return -EFAULT;
 269
 270        c = order2idx(dev, ent->order);
 271        lbuf[sizeof(lbuf) - 1] = 0;
 272
 273        if (sscanf(lbuf, "%u", &var) != 1)
 274                return -EINVAL;
 275
 276        if (var > ent->size)
 277                return -EINVAL;
 278
 279        ent->limit = var;
 280
 281        if (ent->cur < ent->limit) {
 282                err = add_keys(dev, c, 2 * ent->limit - ent->cur);
 283                if (err)
 284                        return err;
 285        }
 286
 287        return count;
 288}
 289
 290static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
 291                          loff_t *pos)
 292{
 293        struct mlx5_cache_ent *ent = filp->private_data;
 294        char lbuf[20];
 295        int err;
 296
 297        if (*pos)
 298                return 0;
 299
 300        err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
 301        if (err < 0)
 302                return err;
 303
 304        if (copy_to_user(buf, lbuf, err))
 305                return -EFAULT;
 306
 307        *pos += err;
 308
 309        return err;
 310}
 311
 312static const struct file_operations limit_fops = {
 313        .owner  = THIS_MODULE,
 314        .open   = simple_open,
 315        .write  = limit_write,
 316        .read   = limit_read,
 317};
 318
 319static int someone_adding(struct mlx5_mr_cache *cache)
 320{
 321        int i;
 322
 323        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 324                if (cache->ent[i].cur < cache->ent[i].limit)
 325                        return 1;
 326        }
 327
 328        return 0;
 329}
 330
 331static void __cache_work_func(struct mlx5_cache_ent *ent)
 332{
 333        struct mlx5_ib_dev *dev = ent->dev;
 334        struct mlx5_mr_cache *cache = &dev->cache;
 335        int i = order2idx(dev, ent->order);
 336        int err;
 337
 338        if (cache->stopped)
 339                return;
 340
 341        ent = &dev->cache.ent[i];
 342        if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
 343                err = add_keys(dev, i, 1);
 344                if (ent->cur < 2 * ent->limit) {
 345                        if (err == -EAGAIN) {
 346                                mlx5_ib_dbg(dev, "returned eagain, order %d\n",
 347                                            i + 2);
 348                                queue_delayed_work(cache->wq, &ent->dwork,
 349                                                   msecs_to_jiffies(3));
 350                        } else if (err) {
 351                                mlx5_ib_warn(dev, "command failed order %d, err %d\n",
 352                                             i + 2, err);
 353                                queue_delayed_work(cache->wq, &ent->dwork,
 354                                                   msecs_to_jiffies(1000));
 355                        } else {
 356                                queue_work(cache->wq, &ent->work);
 357                        }
 358                }
 359        } else if (ent->cur > 2 * ent->limit) {
 360                if (!someone_adding(cache) &&
 361                    time_after(jiffies, cache->last_add + 300 * HZ)) {
 362                        remove_keys(dev, i, 1);
 363                        if (ent->cur > ent->limit)
 364                                queue_work(cache->wq, &ent->work);
 365                } else {
 366                        queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
 367                }
 368        }
 369}
 370
 371static void delayed_cache_work_func(struct work_struct *work)
 372{
 373        struct mlx5_cache_ent *ent;
 374
 375        ent = container_of(work, struct mlx5_cache_ent, dwork.work);
 376        __cache_work_func(ent);
 377}
 378
 379static void cache_work_func(struct work_struct *work)
 380{
 381        struct mlx5_cache_ent *ent;
 382
 383        ent = container_of(work, struct mlx5_cache_ent, work);
 384        __cache_work_func(ent);
 385}
 386
 387static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
 388{
 389        struct mlx5_mr_cache *cache = &dev->cache;
 390        struct mlx5_ib_mr *mr = NULL;
 391        struct mlx5_cache_ent *ent;
 392        int c;
 393        int i;
 394
 395        c = order2idx(dev, order);
 396        if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
 397                mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
 398                return NULL;
 399        }
 400
 401        for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
 402                ent = &cache->ent[i];
 403
 404                mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
 405
 406                spin_lock_irq(&ent->lock);
 407                if (!list_empty(&ent->head)) {
 408                        mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
 409                                              list);
 410                        list_del(&mr->list);
 411                        ent->cur--;
 412                        spin_unlock_irq(&ent->lock);
 413                        if (ent->cur < ent->limit)
 414                                queue_work(cache->wq, &ent->work);
 415                        break;
 416                }
 417                spin_unlock_irq(&ent->lock);
 418
 419                queue_work(cache->wq, &ent->work);
 420
 421                if (mr)
 422                        break;
 423        }
 424
 425        if (!mr)
 426                cache->ent[c].miss++;
 427
 428        return mr;
 429}
 430
 431static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 432{
 433        struct mlx5_mr_cache *cache = &dev->cache;
 434        struct mlx5_cache_ent *ent;
 435        int shrink = 0;
 436        int c;
 437
 438        c = order2idx(dev, mr->order);
 439        if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
 440                mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
 441                return;
 442        }
 443        ent = &cache->ent[c];
 444        spin_lock_irq(&ent->lock);
 445        list_add_tail(&mr->list, &ent->head);
 446        ent->cur++;
 447        if (ent->cur > 2 * ent->limit)
 448                shrink = 1;
 449        spin_unlock_irq(&ent->lock);
 450
 451        if (shrink)
 452                queue_work(cache->wq, &ent->work);
 453}
 454
 455static void clean_keys(struct mlx5_ib_dev *dev, int c)
 456{
 457        struct mlx5_mr_cache *cache = &dev->cache;
 458        struct mlx5_cache_ent *ent = &cache->ent[c];
 459        struct mlx5_ib_mr *mr;
 460        int err;
 461
 462        cancel_delayed_work(&ent->dwork);
 463        while (1) {
 464                spin_lock_irq(&ent->lock);
 465                if (list_empty(&ent->head)) {
 466                        spin_unlock_irq(&ent->lock);
 467                        return;
 468                }
 469                mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 470                list_del(&mr->list);
 471                ent->cur--;
 472                ent->size--;
 473                spin_unlock_irq(&ent->lock);
 474                err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
 475                if (err)
 476                        mlx5_ib_warn(dev, "failed destroy mkey\n");
 477                else
 478                        kfree(mr);
 479        }
 480}
 481
 482static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
 483{
 484        struct mlx5_mr_cache *cache = &dev->cache;
 485        struct mlx5_cache_ent *ent;
 486        int i;
 487
 488        if (!mlx5_debugfs_root)
 489                return 0;
 490
 491        cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root);
 492        if (!cache->root)
 493                return -ENOMEM;
 494
 495        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 496                ent = &cache->ent[i];
 497                sprintf(ent->name, "%d", ent->order);
 498                ent->dir = debugfs_create_dir(ent->name,  cache->root);
 499                if (!ent->dir)
 500                        return -ENOMEM;
 501
 502                ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
 503                                                 &size_fops);
 504                if (!ent->fsize)
 505                        return -ENOMEM;
 506
 507                ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
 508                                                  &limit_fops);
 509                if (!ent->flimit)
 510                        return -ENOMEM;
 511
 512                ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
 513                                               &ent->cur);
 514                if (!ent->fcur)
 515                        return -ENOMEM;
 516
 517                ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
 518                                                &ent->miss);
 519                if (!ent->fmiss)
 520                        return -ENOMEM;
 521        }
 522
 523        return 0;
 524}
 525
 526static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
 527{
 528        if (!mlx5_debugfs_root)
 529                return;
 530
 531        debugfs_remove_recursive(dev->cache.root);
 532}
 533
 534static void delay_time_func(unsigned long ctx)
 535{
 536        struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
 537
 538        dev->fill_delay = 0;
 539}
 540
 541int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 542{
 543        struct mlx5_mr_cache *cache = &dev->cache;
 544        struct mlx5_cache_ent *ent;
 545        int limit;
 546        int err;
 547        int i;
 548
 549        cache->wq = create_singlethread_workqueue("mkey_cache");
 550        if (!cache->wq) {
 551                mlx5_ib_warn(dev, "failed to create work queue\n");
 552                return -ENOMEM;
 553        }
 554
 555        setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
 556        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 557                INIT_LIST_HEAD(&cache->ent[i].head);
 558                spin_lock_init(&cache->ent[i].lock);
 559
 560                ent = &cache->ent[i];
 561                INIT_LIST_HEAD(&ent->head);
 562                spin_lock_init(&ent->lock);
 563                ent->order = i + 2;
 564                ent->dev = dev;
 565
 566                if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE)
 567                        limit = dev->mdev.profile->mr_cache[i].limit;
 568                else
 569                        limit = 0;
 570
 571                INIT_WORK(&ent->work, cache_work_func);
 572                INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
 573                ent->limit = limit;
 574                queue_work(cache->wq, &ent->work);
 575        }
 576
 577        err = mlx5_mr_cache_debugfs_init(dev);
 578        if (err)
 579                mlx5_ib_warn(dev, "cache debugfs failure\n");
 580
 581        return 0;
 582}
 583
 584int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 585{
 586        int i;
 587
 588        dev->cache.stopped = 1;
 589        flush_workqueue(dev->cache.wq);
 590
 591        mlx5_mr_cache_debugfs_cleanup(dev);
 592
 593        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
 594                clean_keys(dev, i);
 595
 596        destroy_workqueue(dev->cache.wq);
 597        del_timer_sync(&dev->delay_timer);
 598
 599        return 0;
 600}
 601
 602struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
 603{
 604        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 605        struct mlx5_core_dev *mdev = &dev->mdev;
 606        struct mlx5_create_mkey_mbox_in *in;
 607        struct mlx5_mkey_seg *seg;
 608        struct mlx5_ib_mr *mr;
 609        int err;
 610
 611        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 612        if (!mr)
 613                return ERR_PTR(-ENOMEM);
 614
 615        in = kzalloc(sizeof(*in), GFP_KERNEL);
 616        if (!in) {
 617                err = -ENOMEM;
 618                goto err_free;
 619        }
 620
 621        seg = &in->seg;
 622        seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
 623        seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
 624        seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 625        seg->start_addr = 0;
 626
 627        err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
 628                                    NULL);
 629        if (err)
 630                goto err_in;
 631
 632        kfree(in);
 633        mr->ibmr.lkey = mr->mmr.key;
 634        mr->ibmr.rkey = mr->mmr.key;
 635        mr->umem = NULL;
 636
 637        return &mr->ibmr;
 638
 639err_in:
 640        kfree(in);
 641
 642err_free:
 643        kfree(mr);
 644
 645        return ERR_PTR(err);
 646}
 647
 648static int get_octo_len(u64 addr, u64 len, int page_size)
 649{
 650        u64 offset;
 651        int npages;
 652
 653        offset = addr & (page_size - 1);
 654        npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
 655        return (npages + 1) / 2;
 656}
 657
 658static int use_umr(int order)
 659{
 660        return order <= 17;
 661}
 662
 663static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
 664                             struct ib_sge *sg, u64 dma, int n, u32 key,
 665                             int page_shift, u64 virt_addr, u64 len,
 666                             int access_flags)
 667{
 668        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 669        struct ib_mr *mr = dev->umrc.mr;
 670
 671        sg->addr = dma;
 672        sg->length = ALIGN(sizeof(u64) * n, 64);
 673        sg->lkey = mr->lkey;
 674
 675        wr->next = NULL;
 676        wr->send_flags = 0;
 677        wr->sg_list = sg;
 678        if (n)
 679                wr->num_sge = 1;
 680        else
 681                wr->num_sge = 0;
 682
 683        wr->opcode = MLX5_IB_WR_UMR;
 684        wr->wr.fast_reg.page_list_len = n;
 685        wr->wr.fast_reg.page_shift = page_shift;
 686        wr->wr.fast_reg.rkey = key;
 687        wr->wr.fast_reg.iova_start = virt_addr;
 688        wr->wr.fast_reg.length = len;
 689        wr->wr.fast_reg.access_flags = access_flags;
 690        wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
 691}
 692
 693static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
 694                               struct ib_send_wr *wr, u32 key)
 695{
 696        wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
 697        wr->opcode = MLX5_IB_WR_UMR;
 698        wr->wr.fast_reg.rkey = key;
 699}
 700
 701void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
 702{
 703        struct mlx5_ib_mr *mr;
 704        struct ib_wc wc;
 705        int err;
 706
 707        while (1) {
 708                err = ib_poll_cq(cq, 1, &wc);
 709                if (err < 0) {
 710                        pr_warn("poll cq error %d\n", err);
 711                        return;
 712                }
 713                if (err == 0)
 714                        break;
 715
 716                mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id;
 717                mr->status = wc.status;
 718                complete(&mr->done);
 719        }
 720        ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 721}
 722
 723static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 724                                  u64 virt_addr, u64 len, int npages,
 725                                  int page_shift, int order, int access_flags)
 726{
 727        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 728        struct device *ddev = dev->ib_dev.dma_device;
 729        struct umr_common *umrc = &dev->umrc;
 730        struct ib_send_wr wr, *bad;
 731        struct mlx5_ib_mr *mr;
 732        struct ib_sge sg;
 733        int size = sizeof(u64) * npages;
 734        int err;
 735        int i;
 736
 737        for (i = 0; i < 1; i++) {
 738                mr = alloc_cached_mr(dev, order);
 739                if (mr)
 740                        break;
 741
 742                err = add_keys(dev, order2idx(dev, order), 1);
 743                if (err && err != -EAGAIN) {
 744                        mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
 745                        break;
 746                }
 747        }
 748
 749        if (!mr)
 750                return ERR_PTR(-EAGAIN);
 751
 752        mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
 753        if (!mr->pas) {
 754                err = -ENOMEM;
 755                goto error;
 756        }
 757
 758        mlx5_ib_populate_pas(dev, umem, page_shift,
 759                             mr_align(mr->pas, MLX5_UMR_ALIGN), 1);
 760
 761        mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size,
 762                                 DMA_TO_DEVICE);
 763        if (dma_mapping_error(ddev, mr->dma)) {
 764                kfree(mr->pas);
 765                err = -ENOMEM;
 766                goto error;
 767        }
 768
 769        memset(&wr, 0, sizeof(wr));
 770        wr.wr_id = (u64)(unsigned long)mr;
 771        prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
 772
 773        /* We serialize polls so one process does not kidnap another's
 774         * completion. This is not a problem since wr is completed in
 775         * around 1 usec
 776         */
 777        down(&umrc->sem);
 778        init_completion(&mr->done);
 779        err = ib_post_send(umrc->qp, &wr, &bad);
 780        if (err) {
 781                mlx5_ib_warn(dev, "post send failed, err %d\n", err);
 782                up(&umrc->sem);
 783                goto error;
 784        }
 785        wait_for_completion(&mr->done);
 786        up(&umrc->sem);
 787
 788        dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
 789        kfree(mr->pas);
 790
 791        if (mr->status != IB_WC_SUCCESS) {
 792                mlx5_ib_warn(dev, "reg umr failed\n");
 793                err = -EFAULT;
 794                goto error;
 795        }
 796
 797        return mr;
 798
 799error:
 800        free_cached_mr(dev, mr);
 801        return ERR_PTR(err);
 802}
 803
 804static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 805                                     u64 length, struct ib_umem *umem,
 806                                     int npages, int page_shift,
 807                                     int access_flags)
 808{
 809        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 810        struct mlx5_create_mkey_mbox_in *in;
 811        struct mlx5_ib_mr *mr;
 812        int inlen;
 813        int err;
 814
 815        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 816        if (!mr)
 817                return ERR_PTR(-ENOMEM);
 818
 819        inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
 820        in = mlx5_vzalloc(inlen);
 821        if (!in) {
 822                err = -ENOMEM;
 823                goto err_1;
 824        }
 825        mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
 826
 827        in->seg.flags = convert_access(access_flags) |
 828                MLX5_ACCESS_MODE_MTT;
 829        in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
 830        in->seg.start_addr = cpu_to_be64(virt_addr);
 831        in->seg.len = cpu_to_be64(length);
 832        in->seg.bsfs_octo_size = 0;
 833        in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
 834        in->seg.log2_page_size = page_shift;
 835        in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 836        in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
 837                                                         1 << page_shift));
 838        err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL,
 839                                    NULL, NULL);
 840        if (err) {
 841                mlx5_ib_warn(dev, "create mkey failed\n");
 842                goto err_2;
 843        }
 844        mr->umem = umem;
 845        mlx5_vfree(in);
 846
 847        mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
 848
 849        return mr;
 850
 851err_2:
 852        mlx5_vfree(in);
 853
 854err_1:
 855        kfree(mr);
 856
 857        return ERR_PTR(err);
 858}
 859
 860struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 861                                  u64 virt_addr, int access_flags,
 862                                  struct ib_udata *udata)
 863{
 864        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 865        struct mlx5_ib_mr *mr = NULL;
 866        struct ib_umem *umem;
 867        int page_shift;
 868        int npages;
 869        int ncont;
 870        int order;
 871        int err;
 872
 873        mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n",
 874                    start, virt_addr, length);
 875        umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
 876                           0);
 877        if (IS_ERR(umem)) {
 878                mlx5_ib_dbg(dev, "umem get failed\n");
 879                return (void *)umem;
 880        }
 881
 882        mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
 883        if (!npages) {
 884                mlx5_ib_warn(dev, "avoid zero region\n");
 885                err = -EINVAL;
 886                goto error;
 887        }
 888
 889        mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
 890                    npages, ncont, order, page_shift);
 891
 892        if (use_umr(order)) {
 893                mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
 894                             order, access_flags);
 895                if (PTR_ERR(mr) == -EAGAIN) {
 896                        mlx5_ib_dbg(dev, "cache empty for order %d", order);
 897                        mr = NULL;
 898                }
 899        }
 900
 901        if (!mr)
 902                mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
 903                                access_flags);
 904
 905        if (IS_ERR(mr)) {
 906                err = PTR_ERR(mr);
 907                goto error;
 908        }
 909
 910        mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
 911
 912        mr->umem = umem;
 913        mr->npages = npages;
 914        spin_lock(&dev->mr_lock);
 915        dev->mdev.priv.reg_pages += npages;
 916        spin_unlock(&dev->mr_lock);
 917        mr->ibmr.lkey = mr->mmr.key;
 918        mr->ibmr.rkey = mr->mmr.key;
 919
 920        return &mr->ibmr;
 921
 922error:
 923        ib_umem_release(umem);
 924        return ERR_PTR(err);
 925}
 926
 927static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 928{
 929        struct umr_common *umrc = &dev->umrc;
 930        struct ib_send_wr wr, *bad;
 931        int err;
 932
 933        memset(&wr, 0, sizeof(wr));
 934        wr.wr_id = (u64)(unsigned long)mr;
 935        prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
 936
 937        down(&umrc->sem);
 938        init_completion(&mr->done);
 939        err = ib_post_send(umrc->qp, &wr, &bad);
 940        if (err) {
 941                up(&umrc->sem);
 942                mlx5_ib_dbg(dev, "err %d\n", err);
 943                goto error;
 944        }
 945        wait_for_completion(&mr->done);
 946        up(&umrc->sem);
 947        if (mr->status != IB_WC_SUCCESS) {
 948                mlx5_ib_warn(dev, "unreg umr failed\n");
 949                err = -EFAULT;
 950                goto error;
 951        }
 952        return 0;
 953
 954error:
 955        return err;
 956}
 957
 958int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
 959{
 960        struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
 961        struct mlx5_ib_mr *mr = to_mmr(ibmr);
 962        struct ib_umem *umem = mr->umem;
 963        int npages = mr->npages;
 964        int umred = mr->umred;
 965        int err;
 966
 967        if (!umred) {
 968                err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
 969                if (err) {
 970                        mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
 971                                     mr->mmr.key, err);
 972                        return err;
 973                }
 974        } else {
 975                err = unreg_umr(dev, mr);
 976                if (err) {
 977                        mlx5_ib_warn(dev, "failed unregister\n");
 978                        return err;
 979                }
 980                free_cached_mr(dev, mr);
 981        }
 982
 983        if (umem) {
 984                ib_umem_release(umem);
 985                spin_lock(&dev->mr_lock);
 986                dev->mdev.priv.reg_pages -= npages;
 987                spin_unlock(&dev->mr_lock);
 988        }
 989
 990        if (!umred)
 991                kfree(mr);
 992
 993        return 0;
 994}
 995
 996struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
 997                                        int max_page_list_len)
 998{
 999        struct mlx5_ib_dev *dev = to_mdev(pd->device);
1000        struct mlx5_create_mkey_mbox_in *in;
1001        struct mlx5_ib_mr *mr;
1002        int err;
1003
1004        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1005        if (!mr)
1006                return ERR_PTR(-ENOMEM);
1007
1008        in = kzalloc(sizeof(*in), GFP_KERNEL);
1009        if (!in) {
1010                err = -ENOMEM;
1011                goto err_free;
1012        }
1013
1014        in->seg.status = 1 << 6; /* free */
1015        in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
1016        in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1017        in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
1018        in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
1019        /*
1020         * TBD not needed - issue 197292 */
1021        in->seg.log2_page_size = PAGE_SHIFT;
1022
1023        err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
1024                                    NULL, NULL);
1025        kfree(in);
1026        if (err)
1027                goto err_free;
1028
1029        mr->ibmr.lkey = mr->mmr.key;
1030        mr->ibmr.rkey = mr->mmr.key;
1031        mr->umem = NULL;
1032
1033        return &mr->ibmr;
1034
1035err_free:
1036        kfree(mr);
1037        return ERR_PTR(err);
1038}
1039
1040struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
1041                                                               int page_list_len)
1042{
1043        struct mlx5_ib_fast_reg_page_list *mfrpl;
1044        int size = page_list_len * sizeof(u64);
1045
1046        mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
1047        if (!mfrpl)
1048                return ERR_PTR(-ENOMEM);
1049
1050        mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
1051        if (!mfrpl->ibfrpl.page_list)
1052                goto err_free;
1053
1054        mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
1055                                                     size, &mfrpl->map,
1056                                                     GFP_KERNEL);
1057        if (!mfrpl->mapped_page_list)
1058                goto err_free;
1059
1060        WARN_ON(mfrpl->map & 0x3f);
1061
1062        return &mfrpl->ibfrpl;
1063
1064err_free:
1065        kfree(mfrpl->ibfrpl.page_list);
1066        kfree(mfrpl);
1067        return ERR_PTR(-ENOMEM);
1068}
1069
1070void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
1071{
1072        struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
1073        struct mlx5_ib_dev *dev = to_mdev(page_list->device);
1074        int size = page_list->max_page_list_len * sizeof(u64);
1075
1076        dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list,
1077                          mfrpl->map);
1078        kfree(mfrpl->ibfrpl.page_list);
1079        kfree(mfrpl);
1080}
1081