linux/drivers/infiniband/hw/mlx5/mr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33
  34#include <linux/kref.h>
  35#include <linux/random.h>
  36#include <linux/debugfs.h>
  37#include <linux/export.h>
  38#include <rdma/ib_umem.h>
  39#include "mlx5_ib.h"
  40
  41enum {
  42        DEF_CACHE_SIZE  = 10,
  43};
  44
  45static __be64 *mr_align(__be64 *ptr, int align)
  46{
  47        unsigned long mask = align - 1;
  48
  49        return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
  50}
  51
  52static int order2idx(struct mlx5_ib_dev *dev, int order)
  53{
  54        struct mlx5_mr_cache *cache = &dev->cache;
  55
  56        if (order < cache->ent[0].order)
  57                return 0;
  58        else
  59                return order - cache->ent[0].order;
  60}
  61
  62static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
  63{
  64        struct device *ddev = dev->ib_dev.dma_device;
  65        struct mlx5_mr_cache *cache = &dev->cache;
  66        struct mlx5_cache_ent *ent = &cache->ent[c];
  67        struct mlx5_create_mkey_mbox_in *in;
  68        struct mlx5_ib_mr *mr;
  69        int npages = 1 << ent->order;
  70        int size = sizeof(u64) * npages;
  71        int err = 0;
  72        int i;
  73
  74        in = kzalloc(sizeof(*in), GFP_KERNEL);
  75        if (!in)
  76                return -ENOMEM;
  77
  78        for (i = 0; i < num; i++) {
  79                mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  80                if (!mr) {
  81                        err = -ENOMEM;
  82                        goto out;
  83                }
  84                mr->order = ent->order;
  85                mr->umred = 1;
  86                mr->pas = kmalloc(size + 0x3f, GFP_KERNEL);
  87                if (!mr->pas) {
  88                        kfree(mr);
  89                        err = -ENOMEM;
  90                        goto out;
  91                }
  92                mr->dma = dma_map_single(ddev, mr_align(mr->pas, 0x40), size,
  93                                         DMA_TO_DEVICE);
  94                if (dma_mapping_error(ddev, mr->dma)) {
  95                        kfree(mr->pas);
  96                        kfree(mr);
  97                        err = -ENOMEM;
  98                        goto out;
  99                }
 100
 101                in->seg.status = 1 << 6;
 102                in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
 103                in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 104                in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
 105                in->seg.log2_page_size = 12;
 106
 107                err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
 108                                            sizeof(*in));
 109                if (err) {
 110                        mlx5_ib_warn(dev, "create mkey failed %d\n", err);
 111                        dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
 112                        kfree(mr->pas);
 113                        kfree(mr);
 114                        goto out;
 115                }
 116                cache->last_add = jiffies;
 117
 118                spin_lock(&ent->lock);
 119                list_add_tail(&mr->list, &ent->head);
 120                ent->cur++;
 121                ent->size++;
 122                spin_unlock(&ent->lock);
 123        }
 124
 125out:
 126        kfree(in);
 127        return err;
 128}
 129
 130static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
 131{
 132        struct device *ddev = dev->ib_dev.dma_device;
 133        struct mlx5_mr_cache *cache = &dev->cache;
 134        struct mlx5_cache_ent *ent = &cache->ent[c];
 135        struct mlx5_ib_mr *mr;
 136        int size;
 137        int err;
 138        int i;
 139
 140        for (i = 0; i < num; i++) {
 141                spin_lock(&ent->lock);
 142                if (list_empty(&ent->head)) {
 143                        spin_unlock(&ent->lock);
 144                        return;
 145                }
 146                mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 147                list_del(&mr->list);
 148                ent->cur--;
 149                ent->size--;
 150                spin_unlock(&ent->lock);
 151                err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
 152                if (err) {
 153                        mlx5_ib_warn(dev, "failed destroy mkey\n");
 154                } else {
 155                        size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
 156                        dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
 157                        kfree(mr->pas);
 158                        kfree(mr);
 159                }
 160        }
 161}
 162
 163static ssize_t size_write(struct file *filp, const char __user *buf,
 164                          size_t count, loff_t *pos)
 165{
 166        struct mlx5_cache_ent *ent = filp->private_data;
 167        struct mlx5_ib_dev *dev = ent->dev;
 168        char lbuf[20];
 169        u32 var;
 170        int err;
 171        int c;
 172
 173        if (copy_from_user(lbuf, buf, sizeof(lbuf)))
 174                return -EFAULT;
 175
 176        c = order2idx(dev, ent->order);
 177        lbuf[sizeof(lbuf) - 1] = 0;
 178
 179        if (sscanf(lbuf, "%u", &var) != 1)
 180                return -EINVAL;
 181
 182        if (var < ent->limit)
 183                return -EINVAL;
 184
 185        if (var > ent->size) {
 186                err = add_keys(dev, c, var - ent->size);
 187                if (err)
 188                        return err;
 189        } else if (var < ent->size) {
 190                remove_keys(dev, c, ent->size - var);
 191        }
 192
 193        return count;
 194}
 195
 196static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
 197                         loff_t *pos)
 198{
 199        struct mlx5_cache_ent *ent = filp->private_data;
 200        char lbuf[20];
 201        int err;
 202
 203        if (*pos)
 204                return 0;
 205
 206        err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
 207        if (err < 0)
 208                return err;
 209
 210        if (copy_to_user(buf, lbuf, err))
 211                return -EFAULT;
 212
 213        *pos += err;
 214
 215        return err;
 216}
 217
 218static const struct file_operations size_fops = {
 219        .owner  = THIS_MODULE,
 220        .open   = simple_open,
 221        .write  = size_write,
 222        .read   = size_read,
 223};
 224
 225static ssize_t limit_write(struct file *filp, const char __user *buf,
 226                           size_t count, loff_t *pos)
 227{
 228        struct mlx5_cache_ent *ent = filp->private_data;
 229        struct mlx5_ib_dev *dev = ent->dev;
 230        char lbuf[20];
 231        u32 var;
 232        int err;
 233        int c;
 234
 235        if (copy_from_user(lbuf, buf, sizeof(lbuf)))
 236                return -EFAULT;
 237
 238        c = order2idx(dev, ent->order);
 239        lbuf[sizeof(lbuf) - 1] = 0;
 240
 241        if (sscanf(lbuf, "%u", &var) != 1)
 242                return -EINVAL;
 243
 244        if (var > ent->size)
 245                return -EINVAL;
 246
 247        ent->limit = var;
 248
 249        if (ent->cur < ent->limit) {
 250                err = add_keys(dev, c, 2 * ent->limit - ent->cur);
 251                if (err)
 252                        return err;
 253        }
 254
 255        return count;
 256}
 257
 258static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
 259                          loff_t *pos)
 260{
 261        struct mlx5_cache_ent *ent = filp->private_data;
 262        char lbuf[20];
 263        int err;
 264
 265        if (*pos)
 266                return 0;
 267
 268        err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
 269        if (err < 0)
 270                return err;
 271
 272        if (copy_to_user(buf, lbuf, err))
 273                return -EFAULT;
 274
 275        *pos += err;
 276
 277        return err;
 278}
 279
 280static const struct file_operations limit_fops = {
 281        .owner  = THIS_MODULE,
 282        .open   = simple_open,
 283        .write  = limit_write,
 284        .read   = limit_read,
 285};
 286
 287static int someone_adding(struct mlx5_mr_cache *cache)
 288{
 289        int i;
 290
 291        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 292                if (cache->ent[i].cur < cache->ent[i].limit)
 293                        return 1;
 294        }
 295
 296        return 0;
 297}
 298
 299static void __cache_work_func(struct mlx5_cache_ent *ent)
 300{
 301        struct mlx5_ib_dev *dev = ent->dev;
 302        struct mlx5_mr_cache *cache = &dev->cache;
 303        int i = order2idx(dev, ent->order);
 304
 305        if (cache->stopped)
 306                return;
 307
 308        ent = &dev->cache.ent[i];
 309        if (ent->cur < 2 * ent->limit) {
 310                add_keys(dev, i, 1);
 311                if (ent->cur < 2 * ent->limit)
 312                        queue_work(cache->wq, &ent->work);
 313        } else if (ent->cur > 2 * ent->limit) {
 314                if (!someone_adding(cache) &&
 315                    time_after(jiffies, cache->last_add + 60 * HZ)) {
 316                        remove_keys(dev, i, 1);
 317                        if (ent->cur > ent->limit)
 318                                queue_work(cache->wq, &ent->work);
 319                } else {
 320                        queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ);
 321                }
 322        }
 323}
 324
 325static void delayed_cache_work_func(struct work_struct *work)
 326{
 327        struct mlx5_cache_ent *ent;
 328
 329        ent = container_of(work, struct mlx5_cache_ent, dwork.work);
 330        __cache_work_func(ent);
 331}
 332
 333static void cache_work_func(struct work_struct *work)
 334{
 335        struct mlx5_cache_ent *ent;
 336
 337        ent = container_of(work, struct mlx5_cache_ent, work);
 338        __cache_work_func(ent);
 339}
 340
 341static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
 342{
 343        struct mlx5_mr_cache *cache = &dev->cache;
 344        struct mlx5_ib_mr *mr = NULL;
 345        struct mlx5_cache_ent *ent;
 346        int c;
 347        int i;
 348
 349        c = order2idx(dev, order);
 350        if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
 351                mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
 352                return NULL;
 353        }
 354
 355        for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
 356                ent = &cache->ent[i];
 357
 358                mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
 359
 360                spin_lock(&ent->lock);
 361                if (!list_empty(&ent->head)) {
 362                        mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
 363                                              list);
 364                        list_del(&mr->list);
 365                        ent->cur--;
 366                        spin_unlock(&ent->lock);
 367                        if (ent->cur < ent->limit)
 368                                queue_work(cache->wq, &ent->work);
 369                        break;
 370                }
 371                spin_unlock(&ent->lock);
 372
 373                queue_work(cache->wq, &ent->work);
 374
 375                if (mr)
 376                        break;
 377        }
 378
 379        if (!mr)
 380                cache->ent[c].miss++;
 381
 382        return mr;
 383}
 384
 385static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 386{
 387        struct mlx5_mr_cache *cache = &dev->cache;
 388        struct mlx5_cache_ent *ent;
 389        int shrink = 0;
 390        int c;
 391
 392        c = order2idx(dev, mr->order);
 393        if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
 394                mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
 395                return;
 396        }
 397        ent = &cache->ent[c];
 398        spin_lock(&ent->lock);
 399        list_add_tail(&mr->list, &ent->head);
 400        ent->cur++;
 401        if (ent->cur > 2 * ent->limit)
 402                shrink = 1;
 403        spin_unlock(&ent->lock);
 404
 405        if (shrink)
 406                queue_work(cache->wq, &ent->work);
 407}
 408
 409static void clean_keys(struct mlx5_ib_dev *dev, int c)
 410{
 411        struct device *ddev = dev->ib_dev.dma_device;
 412        struct mlx5_mr_cache *cache = &dev->cache;
 413        struct mlx5_cache_ent *ent = &cache->ent[c];
 414        struct mlx5_ib_mr *mr;
 415        int size;
 416        int err;
 417
 418        while (1) {
 419                spin_lock(&ent->lock);
 420                if (list_empty(&ent->head)) {
 421                        spin_unlock(&ent->lock);
 422                        return;
 423                }
 424                mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
 425                list_del(&mr->list);
 426                ent->cur--;
 427                ent->size--;
 428                spin_unlock(&ent->lock);
 429                err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
 430                if (err) {
 431                        mlx5_ib_warn(dev, "failed destroy mkey\n");
 432                } else {
 433                        size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
 434                        dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
 435                        kfree(mr->pas);
 436                        kfree(mr);
 437                }
 438        }
 439}
 440
 441static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
 442{
 443        struct mlx5_mr_cache *cache = &dev->cache;
 444        struct mlx5_cache_ent *ent;
 445        int i;
 446
 447        if (!mlx5_debugfs_root)
 448                return 0;
 449
 450        cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root);
 451        if (!cache->root)
 452                return -ENOMEM;
 453
 454        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 455                ent = &cache->ent[i];
 456                sprintf(ent->name, "%d", ent->order);
 457                ent->dir = debugfs_create_dir(ent->name,  cache->root);
 458                if (!ent->dir)
 459                        return -ENOMEM;
 460
 461                ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
 462                                                 &size_fops);
 463                if (!ent->fsize)
 464                        return -ENOMEM;
 465
 466                ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
 467                                                  &limit_fops);
 468                if (!ent->flimit)
 469                        return -ENOMEM;
 470
 471                ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
 472                                               &ent->cur);
 473                if (!ent->fcur)
 474                        return -ENOMEM;
 475
 476                ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
 477                                                &ent->miss);
 478                if (!ent->fmiss)
 479                        return -ENOMEM;
 480        }
 481
 482        return 0;
 483}
 484
 485static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
 486{
 487        if (!mlx5_debugfs_root)
 488                return;
 489
 490        debugfs_remove_recursive(dev->cache.root);
 491}
 492
 493int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 494{
 495        struct mlx5_mr_cache *cache = &dev->cache;
 496        struct mlx5_cache_ent *ent;
 497        int limit;
 498        int size;
 499        int err;
 500        int i;
 501
 502        cache->wq = create_singlethread_workqueue("mkey_cache");
 503        if (!cache->wq) {
 504                mlx5_ib_warn(dev, "failed to create work queue\n");
 505                return -ENOMEM;
 506        }
 507
 508        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 509                INIT_LIST_HEAD(&cache->ent[i].head);
 510                spin_lock_init(&cache->ent[i].lock);
 511
 512                ent = &cache->ent[i];
 513                INIT_LIST_HEAD(&ent->head);
 514                spin_lock_init(&ent->lock);
 515                ent->order = i + 2;
 516                ent->dev = dev;
 517
 518                if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) {
 519                        size = dev->mdev.profile->mr_cache[i].size;
 520                        limit = dev->mdev.profile->mr_cache[i].limit;
 521                } else {
 522                        size = DEF_CACHE_SIZE;
 523                        limit = 0;
 524                }
 525                INIT_WORK(&ent->work, cache_work_func);
 526                INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
 527                ent->limit = limit;
 528                queue_work(cache->wq, &ent->work);
 529        }
 530
 531        err = mlx5_mr_cache_debugfs_init(dev);
 532        if (err)
 533                mlx5_ib_warn(dev, "cache debugfs failure\n");
 534
 535        return 0;
 536}
 537
 538int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 539{
 540        int i;
 541
 542        dev->cache.stopped = 1;
 543        destroy_workqueue(dev->cache.wq);
 544
 545        mlx5_mr_cache_debugfs_cleanup(dev);
 546
 547        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
 548                clean_keys(dev, i);
 549
 550        return 0;
 551}
 552
 553struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
 554{
 555        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 556        struct mlx5_core_dev *mdev = &dev->mdev;
 557        struct mlx5_create_mkey_mbox_in *in;
 558        struct mlx5_mkey_seg *seg;
 559        struct mlx5_ib_mr *mr;
 560        int err;
 561
 562        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 563        if (!mr)
 564                return ERR_PTR(-ENOMEM);
 565
 566        in = kzalloc(sizeof(*in), GFP_KERNEL);
 567        if (!in) {
 568                err = -ENOMEM;
 569                goto err_free;
 570        }
 571
 572        seg = &in->seg;
 573        seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
 574        seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
 575        seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 576        seg->start_addr = 0;
 577
 578        err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in));
 579        if (err)
 580                goto err_in;
 581
 582        kfree(in);
 583        mr->ibmr.lkey = mr->mmr.key;
 584        mr->ibmr.rkey = mr->mmr.key;
 585        mr->umem = NULL;
 586
 587        return &mr->ibmr;
 588
 589err_in:
 590        kfree(in);
 591
 592err_free:
 593        kfree(mr);
 594
 595        return ERR_PTR(err);
 596}
 597
 598static int get_octo_len(u64 addr, u64 len, int page_size)
 599{
 600        u64 offset;
 601        int npages;
 602
 603        offset = addr & (page_size - 1);
 604        npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
 605        return (npages + 1) / 2;
 606}
 607
 608static int use_umr(int order)
 609{
 610        return order <= 17;
 611}
 612
 613static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
 614                             struct ib_sge *sg, u64 dma, int n, u32 key,
 615                             int page_shift, u64 virt_addr, u64 len,
 616                             int access_flags)
 617{
 618        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 619        struct ib_mr *mr = dev->umrc.mr;
 620
 621        sg->addr = dma;
 622        sg->length = ALIGN(sizeof(u64) * n, 64);
 623        sg->lkey = mr->lkey;
 624
 625        wr->next = NULL;
 626        wr->send_flags = 0;
 627        wr->sg_list = sg;
 628        if (n)
 629                wr->num_sge = 1;
 630        else
 631                wr->num_sge = 0;
 632
 633        wr->opcode = MLX5_IB_WR_UMR;
 634        wr->wr.fast_reg.page_list_len = n;
 635        wr->wr.fast_reg.page_shift = page_shift;
 636        wr->wr.fast_reg.rkey = key;
 637        wr->wr.fast_reg.iova_start = virt_addr;
 638        wr->wr.fast_reg.length = len;
 639        wr->wr.fast_reg.access_flags = access_flags;
 640        wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
 641}
 642
 643static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
 644                               struct ib_send_wr *wr, u32 key)
 645{
 646        wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
 647        wr->opcode = MLX5_IB_WR_UMR;
 648        wr->wr.fast_reg.rkey = key;
 649}
 650
 651void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
 652{
 653        struct mlx5_ib_mr *mr;
 654        struct ib_wc wc;
 655        int err;
 656
 657        while (1) {
 658                err = ib_poll_cq(cq, 1, &wc);
 659                if (err < 0) {
 660                        pr_warn("poll cq error %d\n", err);
 661                        return;
 662                }
 663                if (err == 0)
 664                        break;
 665
 666                mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id;
 667                mr->status = wc.status;
 668                complete(&mr->done);
 669        }
 670        ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 671}
 672
 673static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 674                                  u64 virt_addr, u64 len, int npages,
 675                                  int page_shift, int order, int access_flags)
 676{
 677        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 678        struct umr_common *umrc = &dev->umrc;
 679        struct ib_send_wr wr, *bad;
 680        struct mlx5_ib_mr *mr;
 681        struct ib_sge sg;
 682        int err;
 683        int i;
 684
 685        for (i = 0; i < 10; i++) {
 686                mr = alloc_cached_mr(dev, order);
 687                if (mr)
 688                        break;
 689
 690                err = add_keys(dev, order2idx(dev, order), 1);
 691                if (err) {
 692                        mlx5_ib_warn(dev, "add_keys failed\n");
 693                        break;
 694                }
 695        }
 696
 697        if (!mr)
 698                return ERR_PTR(-EAGAIN);
 699
 700        mlx5_ib_populate_pas(dev, umem, page_shift, mr_align(mr->pas, 0x40), 1);
 701
 702        memset(&wr, 0, sizeof(wr));
 703        wr.wr_id = (u64)(unsigned long)mr;
 704        prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
 705
 706        /* We serialize polls so one process does not kidnap another's
 707         * completion. This is not a problem since wr is completed in
 708         * around 1 usec
 709         */
 710        down(&umrc->sem);
 711        init_completion(&mr->done);
 712        err = ib_post_send(umrc->qp, &wr, &bad);
 713        if (err) {
 714                mlx5_ib_warn(dev, "post send failed, err %d\n", err);
 715                up(&umrc->sem);
 716                goto error;
 717        }
 718        wait_for_completion(&mr->done);
 719        up(&umrc->sem);
 720
 721        if (mr->status != IB_WC_SUCCESS) {
 722                mlx5_ib_warn(dev, "reg umr failed\n");
 723                err = -EFAULT;
 724                goto error;
 725        }
 726
 727        return mr;
 728
 729error:
 730        free_cached_mr(dev, mr);
 731        return ERR_PTR(err);
 732}
 733
 734static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 735                                     u64 length, struct ib_umem *umem,
 736                                     int npages, int page_shift,
 737                                     int access_flags)
 738{
 739        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 740        struct mlx5_create_mkey_mbox_in *in;
 741        struct mlx5_ib_mr *mr;
 742        int inlen;
 743        int err;
 744
 745        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 746        if (!mr)
 747                return ERR_PTR(-ENOMEM);
 748
 749        inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
 750        in = mlx5_vzalloc(inlen);
 751        if (!in) {
 752                err = -ENOMEM;
 753                goto err_1;
 754        }
 755        mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
 756
 757        in->seg.flags = convert_access(access_flags) |
 758                MLX5_ACCESS_MODE_MTT;
 759        in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
 760        in->seg.start_addr = cpu_to_be64(virt_addr);
 761        in->seg.len = cpu_to_be64(length);
 762        in->seg.bsfs_octo_size = 0;
 763        in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
 764        in->seg.log2_page_size = page_shift;
 765        in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 766        in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
 767        err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen);
 768        if (err) {
 769                mlx5_ib_warn(dev, "create mkey failed\n");
 770                goto err_2;
 771        }
 772        mr->umem = umem;
 773        mlx5_vfree(in);
 774
 775        mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
 776
 777        return mr;
 778
 779err_2:
 780        mlx5_vfree(in);
 781
 782err_1:
 783        kfree(mr);
 784
 785        return ERR_PTR(err);
 786}
 787
 788struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 789                                  u64 virt_addr, int access_flags,
 790                                  struct ib_udata *udata)
 791{
 792        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 793        struct mlx5_ib_mr *mr = NULL;
 794        struct ib_umem *umem;
 795        int page_shift;
 796        int npages;
 797        int ncont;
 798        int order;
 799        int err;
 800
 801        mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n",
 802                    start, virt_addr, length);
 803        umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
 804                           0);
 805        if (IS_ERR(umem)) {
 806                mlx5_ib_dbg(dev, "umem get failed\n");
 807                return (void *)umem;
 808        }
 809
 810        mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
 811        if (!npages) {
 812                mlx5_ib_warn(dev, "avoid zero region\n");
 813                err = -EINVAL;
 814                goto error;
 815        }
 816
 817        mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
 818                    npages, ncont, order, page_shift);
 819
 820        if (use_umr(order)) {
 821                mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
 822                             order, access_flags);
 823                if (PTR_ERR(mr) == -EAGAIN) {
 824                        mlx5_ib_dbg(dev, "cache empty for order %d", order);
 825                        mr = NULL;
 826                }
 827        }
 828
 829        if (!mr)
 830                mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
 831                                access_flags);
 832
 833        if (IS_ERR(mr)) {
 834                err = PTR_ERR(mr);
 835                goto error;
 836        }
 837
 838        mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
 839
 840        mr->umem = umem;
 841        mr->npages = npages;
 842        spin_lock(&dev->mr_lock);
 843        dev->mdev.priv.reg_pages += npages;
 844        spin_unlock(&dev->mr_lock);
 845        mr->ibmr.lkey = mr->mmr.key;
 846        mr->ibmr.rkey = mr->mmr.key;
 847
 848        return &mr->ibmr;
 849
 850error:
 851        ib_umem_release(umem);
 852        return ERR_PTR(err);
 853}
 854
 855static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 856{
 857        struct umr_common *umrc = &dev->umrc;
 858        struct ib_send_wr wr, *bad;
 859        int err;
 860
 861        memset(&wr, 0, sizeof(wr));
 862        wr.wr_id = (u64)(unsigned long)mr;
 863        prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
 864
 865        down(&umrc->sem);
 866        init_completion(&mr->done);
 867        err = ib_post_send(umrc->qp, &wr, &bad);
 868        if (err) {
 869                up(&umrc->sem);
 870                mlx5_ib_dbg(dev, "err %d\n", err);
 871                goto error;
 872        }
 873        wait_for_completion(&mr->done);
 874        up(&umrc->sem);
 875        if (mr->status != IB_WC_SUCCESS) {
 876                mlx5_ib_warn(dev, "unreg umr failed\n");
 877                err = -EFAULT;
 878                goto error;
 879        }
 880        return 0;
 881
 882error:
 883        return err;
 884}
 885
 886int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
 887{
 888        struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
 889        struct mlx5_ib_mr *mr = to_mmr(ibmr);
 890        struct ib_umem *umem = mr->umem;
 891        int npages = mr->npages;
 892        int umred = mr->umred;
 893        int err;
 894
 895        if (!umred) {
 896                err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
 897                if (err) {
 898                        mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
 899                                     mr->mmr.key, err);
 900                        return err;
 901                }
 902        } else {
 903                err = unreg_umr(dev, mr);
 904                if (err) {
 905                        mlx5_ib_warn(dev, "failed unregister\n");
 906                        return err;
 907                }
 908                free_cached_mr(dev, mr);
 909        }
 910
 911        if (umem) {
 912                ib_umem_release(umem);
 913                spin_lock(&dev->mr_lock);
 914                dev->mdev.priv.reg_pages -= npages;
 915                spin_unlock(&dev->mr_lock);
 916        }
 917
 918        if (!umred)
 919                kfree(mr);
 920
 921        return 0;
 922}
 923
 924struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
 925                                        int max_page_list_len)
 926{
 927        struct mlx5_ib_dev *dev = to_mdev(pd->device);
 928        struct mlx5_create_mkey_mbox_in *in;
 929        struct mlx5_ib_mr *mr;
 930        int err;
 931
 932        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 933        if (!mr)
 934                return ERR_PTR(-ENOMEM);
 935
 936        in = kzalloc(sizeof(*in), GFP_KERNEL);
 937        if (!in) {
 938                err = -ENOMEM;
 939                goto err_free;
 940        }
 941
 942        in->seg.status = 1 << 6; /* free */
 943        in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
 944        in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 945        in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
 946        in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
 947        /*
 948         * TBD not needed - issue 197292 */
 949        in->seg.log2_page_size = PAGE_SHIFT;
 950
 951        err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in));
 952        kfree(in);
 953        if (err)
 954                goto err_free;
 955
 956        mr->ibmr.lkey = mr->mmr.key;
 957        mr->ibmr.rkey = mr->mmr.key;
 958        mr->umem = NULL;
 959
 960        return &mr->ibmr;
 961
 962err_free:
 963        kfree(mr);
 964        return ERR_PTR(err);
 965}
 966
 967struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
 968                                                               int page_list_len)
 969{
 970        struct mlx5_ib_fast_reg_page_list *mfrpl;
 971        int size = page_list_len * sizeof(u64);
 972
 973        mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
 974        if (!mfrpl)
 975                return ERR_PTR(-ENOMEM);
 976
 977        mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
 978        if (!mfrpl->ibfrpl.page_list)
 979                goto err_free;
 980
 981        mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
 982                                                     size, &mfrpl->map,
 983                                                     GFP_KERNEL);
 984        if (!mfrpl->mapped_page_list)
 985                goto err_free;
 986
 987        WARN_ON(mfrpl->map & 0x3f);
 988
 989        return &mfrpl->ibfrpl;
 990
 991err_free:
 992        kfree(mfrpl->ibfrpl.page_list);
 993        kfree(mfrpl);
 994        return ERR_PTR(-ENOMEM);
 995}
 996
 997void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
 998{
 999        struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
1000        struct mlx5_ib_dev *dev = to_mdev(page_list->device);
1001        int size = page_list->max_page_list_len * sizeof(u64);
1002
1003        dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list,
1004                          mfrpl->map);
1005        kfree(mfrpl->ibfrpl.page_list);
1006        kfree(mfrpl);
1007}
1008