linux/drivers/infiniband/hw/hns/hns_roce_mr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016 Hisilicon Limited.
   3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33
  34#include <linux/platform_device.h>
  35#include <linux/vmalloc.h>
  36#include <rdma/ib_umem.h>
  37#include "hns_roce_device.h"
  38#include "hns_roce_cmd.h"
  39#include "hns_roce_hem.h"
  40
  41static u32 hw_index_to_key(unsigned long ind)
  42{
  43        return (u32)(ind >> 24) | (ind << 8);
  44}
  45
  46unsigned long key_to_hw_index(u32 key)
  47{
  48        return (key << 24) | (key >> 8);
  49}
  50
  51static int hns_roce_hw_create_mpt(struct hns_roce_dev *hr_dev,
  52                                  struct hns_roce_cmd_mailbox *mailbox,
  53                                  unsigned long mpt_index)
  54{
  55        return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0,
  56                                 HNS_ROCE_CMD_CREATE_MPT,
  57                                 HNS_ROCE_CMD_TIMEOUT_MSECS);
  58}
  59
  60int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev,
  61                            struct hns_roce_cmd_mailbox *mailbox,
  62                            unsigned long mpt_index)
  63{
  64        return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0,
  65                                 mpt_index, !mailbox, HNS_ROCE_CMD_DESTROY_MPT,
  66                                 HNS_ROCE_CMD_TIMEOUT_MSECS);
  67}
  68
  69static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
  70                                unsigned long *seg)
  71{
  72        int o;
  73        u32 m;
  74
  75        spin_lock(&buddy->lock);
  76
  77        for (o = order; o <= buddy->max_order; ++o) {
  78                if (buddy->num_free[o]) {
  79                        m = 1 << (buddy->max_order - o);
  80                        *seg = find_first_bit(buddy->bits[o], m);
  81                        if (*seg < m)
  82                                goto found;
  83                }
  84        }
  85        spin_unlock(&buddy->lock);
  86        return -EINVAL;
  87
  88 found:
  89        clear_bit(*seg, buddy->bits[o]);
  90        --buddy->num_free[o];
  91
  92        while (o > order) {
  93                --o;
  94                *seg <<= 1;
  95                set_bit(*seg ^ 1, buddy->bits[o]);
  96                ++buddy->num_free[o];
  97        }
  98
  99        spin_unlock(&buddy->lock);
 100
 101        *seg <<= order;
 102        return 0;
 103}
 104
 105static void hns_roce_buddy_free(struct hns_roce_buddy *buddy, unsigned long seg,
 106                                int order)
 107{
 108        seg >>= order;
 109
 110        spin_lock(&buddy->lock);
 111
 112        while (test_bit(seg ^ 1, buddy->bits[order])) {
 113                clear_bit(seg ^ 1, buddy->bits[order]);
 114                --buddy->num_free[order];
 115                seg >>= 1;
 116                ++order;
 117        }
 118
 119        set_bit(seg, buddy->bits[order]);
 120        ++buddy->num_free[order];
 121
 122        spin_unlock(&buddy->lock);
 123}
 124
 125static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
 126{
 127        int i, s;
 128
 129        buddy->max_order = max_order;
 130        spin_lock_init(&buddy->lock);
 131        buddy->bits = kcalloc(buddy->max_order + 1,
 132                              sizeof(*buddy->bits),
 133                              GFP_KERNEL);
 134        buddy->num_free = kcalloc(buddy->max_order + 1,
 135                                  sizeof(*buddy->num_free),
 136                                  GFP_KERNEL);
 137        if (!buddy->bits || !buddy->num_free)
 138                goto err_out;
 139
 140        for (i = 0; i <= buddy->max_order; ++i) {
 141                s = BITS_TO_LONGS(1 << (buddy->max_order - i));
 142                buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL |
 143                                         __GFP_NOWARN);
 144                if (!buddy->bits[i]) {
 145                        buddy->bits[i] = vzalloc(array_size(s, sizeof(long)));
 146                        if (!buddy->bits[i])
 147                                goto err_out_free;
 148                }
 149        }
 150
 151        set_bit(0, buddy->bits[buddy->max_order]);
 152        buddy->num_free[buddy->max_order] = 1;
 153
 154        return 0;
 155
 156err_out_free:
 157        for (i = 0; i <= buddy->max_order; ++i)
 158                kvfree(buddy->bits[i]);
 159
 160err_out:
 161        kfree(buddy->bits);
 162        kfree(buddy->num_free);
 163        return -ENOMEM;
 164}
 165
 166static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy)
 167{
 168        int i;
 169
 170        for (i = 0; i <= buddy->max_order; ++i)
 171                kvfree(buddy->bits[i]);
 172
 173        kfree(buddy->bits);
 174        kfree(buddy->num_free);
 175}
 176
 177static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order,
 178                                    unsigned long *seg, u32 mtt_type)
 179{
 180        struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
 181        struct hns_roce_hem_table *table;
 182        struct hns_roce_buddy *buddy;
 183        int ret;
 184
 185        switch (mtt_type) {
 186        case MTT_TYPE_WQE:
 187                buddy = &mr_table->mtt_buddy;
 188                table = &mr_table->mtt_table;
 189                break;
 190        case MTT_TYPE_CQE:
 191                buddy = &mr_table->mtt_cqe_buddy;
 192                table = &mr_table->mtt_cqe_table;
 193                break;
 194        case MTT_TYPE_SRQWQE:
 195                buddy = &mr_table->mtt_srqwqe_buddy;
 196                table = &mr_table->mtt_srqwqe_table;
 197                break;
 198        case MTT_TYPE_IDX:
 199                buddy = &mr_table->mtt_idx_buddy;
 200                table = &mr_table->mtt_idx_table;
 201                break;
 202        default:
 203                dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n",
 204                        mtt_type);
 205                return -EINVAL;
 206        }
 207
 208        ret = hns_roce_buddy_alloc(buddy, order, seg);
 209        if (ret)
 210                return ret;
 211
 212        ret = hns_roce_table_get_range(hr_dev, table, *seg,
 213                                       *seg + (1 << order) - 1);
 214        if (ret) {
 215                hns_roce_buddy_free(buddy, *seg, order);
 216                return ret;
 217        }
 218
 219        return 0;
 220}
 221
 222int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
 223                      struct hns_roce_mtt *mtt)
 224{
 225        int ret;
 226        int i;
 227
 228        /* Page num is zero, correspond to DMA memory register */
 229        if (!npages) {
 230                mtt->order = -1;
 231                mtt->page_shift = HNS_ROCE_HEM_PAGE_SHIFT;
 232                return 0;
 233        }
 234
 235        /* Note: if page_shift is zero, FAST memory register */
 236        mtt->page_shift = page_shift;
 237
 238        /* Compute MTT entry necessary */
 239        for (mtt->order = 0, i = HNS_ROCE_MTT_ENTRY_PER_SEG; i < npages;
 240             i <<= 1)
 241                ++mtt->order;
 242
 243        /* Allocate MTT entry */
 244        ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg,
 245                                       mtt->mtt_type);
 246        if (ret == -1)
 247                return -ENOMEM;
 248
 249        return 0;
 250}
 251
 252void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt)
 253{
 254        struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
 255
 256        if (mtt->order < 0)
 257                return;
 258
 259        switch (mtt->mtt_type) {
 260        case MTT_TYPE_WQE:
 261                hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg,
 262                                    mtt->order);
 263                hns_roce_table_put_range(hr_dev, &mr_table->mtt_table,
 264                                        mtt->first_seg,
 265                                        mtt->first_seg + (1 << mtt->order) - 1);
 266                break;
 267        case MTT_TYPE_CQE:
 268                hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg,
 269                                    mtt->order);
 270                hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table,
 271                                        mtt->first_seg,
 272                                        mtt->first_seg + (1 << mtt->order) - 1);
 273                break;
 274        case MTT_TYPE_SRQWQE:
 275                hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg,
 276                                    mtt->order);
 277                hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table,
 278                                        mtt->first_seg,
 279                                        mtt->first_seg + (1 << mtt->order) - 1);
 280                break;
 281        case MTT_TYPE_IDX:
 282                hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg,
 283                                    mtt->order);
 284                hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table,
 285                                        mtt->first_seg,
 286                                        mtt->first_seg + (1 << mtt->order) - 1);
 287                break;
 288        default:
 289                dev_err(hr_dev->dev,
 290                        "Unsupport mtt type %d, clean mtt failed\n",
 291                        mtt->mtt_type);
 292                break;
 293        }
 294}
 295
 296static void hns_roce_loop_free(struct hns_roce_dev *hr_dev,
 297                               struct hns_roce_mr *mr, int err_loop_index,
 298                               int loop_i, int loop_j)
 299{
 300        struct device *dev = hr_dev->dev;
 301        u32 mhop_num;
 302        u32 pbl_bt_sz;
 303        u64 bt_idx;
 304        int i, j;
 305
 306        pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
 307        mhop_num = hr_dev->caps.pbl_hop_num;
 308
 309        i = loop_i;
 310        if (mhop_num == 3 && err_loop_index == 2) {
 311                for (; i >= 0; i--) {
 312                        dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
 313                                          mr->pbl_l1_dma_addr[i]);
 314
 315                        for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
 316                                if (i == loop_i && j >= loop_j)
 317                                        break;
 318
 319                                bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j;
 320                                dma_free_coherent(dev, pbl_bt_sz,
 321                                                  mr->pbl_bt_l2[bt_idx],
 322                                                  mr->pbl_l2_dma_addr[bt_idx]);
 323                        }
 324                }
 325        } else if (mhop_num == 3 && err_loop_index == 1) {
 326                for (i -= 1; i >= 0; i--) {
 327                        dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
 328                                          mr->pbl_l1_dma_addr[i]);
 329
 330                        for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
 331                                bt_idx = i * pbl_bt_sz / BA_BYTE_LEN + j;
 332                                dma_free_coherent(dev, pbl_bt_sz,
 333                                                  mr->pbl_bt_l2[bt_idx],
 334                                                  mr->pbl_l2_dma_addr[bt_idx]);
 335                        }
 336                }
 337        } else if (mhop_num == 2 && err_loop_index == 1) {
 338                for (i -= 1; i >= 0; i--)
 339                        dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
 340                                          mr->pbl_l1_dma_addr[i]);
 341        } else {
 342                dev_warn(dev, "not support: mhop_num=%d, err_loop_index=%d.",
 343                         mhop_num, err_loop_index);
 344                return;
 345        }
 346
 347        dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, mr->pbl_l0_dma_addr);
 348        mr->pbl_bt_l0 = NULL;
 349        mr->pbl_l0_dma_addr = 0;
 350}
 351static int pbl_1hop_alloc(struct hns_roce_dev *hr_dev, int npages,
 352                               struct hns_roce_mr *mr, u32 pbl_bt_sz)
 353{
 354        struct device *dev = hr_dev->dev;
 355
 356        if (npages > pbl_bt_sz / 8) {
 357                dev_err(dev, "npages %d is larger than buf_pg_sz!",
 358                        npages);
 359                return -EINVAL;
 360        }
 361        mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
 362                                         &(mr->pbl_dma_addr),
 363                                         GFP_KERNEL);
 364        if (!mr->pbl_buf)
 365                return -ENOMEM;
 366
 367        mr->pbl_size = npages;
 368        mr->pbl_ba = mr->pbl_dma_addr;
 369        mr->pbl_hop_num = 1;
 370        mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
 371        mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
 372        return 0;
 373
 374}
 375
 376
 377static int pbl_2hop_alloc(struct hns_roce_dev *hr_dev, int npages,
 378                               struct hns_roce_mr *mr, u32 pbl_bt_sz)
 379{
 380        struct device *dev = hr_dev->dev;
 381        int npages_allocated;
 382        u64 pbl_last_bt_num;
 383        u64 pbl_bt_cnt = 0;
 384        u64 size;
 385        int i;
 386
 387        pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
 388
 389        /* alloc L1 BT */
 390        for (i = 0; i < pbl_bt_sz / 8; i++) {
 391                if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
 392                        size = pbl_bt_sz;
 393                } else {
 394                        npages_allocated = i * (pbl_bt_sz / 8);
 395                        size = (npages - npages_allocated) * 8;
 396                }
 397                mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size,
 398                                            &(mr->pbl_l1_dma_addr[i]),
 399                                            GFP_KERNEL);
 400                if (!mr->pbl_bt_l1[i]) {
 401                        hns_roce_loop_free(hr_dev, mr, 1, i, 0);
 402                        return -ENOMEM;
 403                }
 404
 405                *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
 406
 407                pbl_bt_cnt++;
 408                if (pbl_bt_cnt >= pbl_last_bt_num)
 409                        break;
 410        }
 411
 412        mr->l0_chunk_last_num = i + 1;
 413
 414        return 0;
 415}
 416
 417static int pbl_3hop_alloc(struct hns_roce_dev *hr_dev, int npages,
 418                               struct hns_roce_mr *mr, u32 pbl_bt_sz)
 419{
 420        struct device *dev = hr_dev->dev;
 421        int mr_alloc_done = 0;
 422        int npages_allocated;
 423        u64 pbl_last_bt_num;
 424        u64 pbl_bt_cnt = 0;
 425        u64 bt_idx;
 426        u64 size;
 427        int i;
 428        int j = 0;
 429
 430        pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
 431
 432        mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num,
 433                                      sizeof(*mr->pbl_l2_dma_addr),
 434                                      GFP_KERNEL);
 435        if (!mr->pbl_l2_dma_addr)
 436                return -ENOMEM;
 437
 438        mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num,
 439                                sizeof(*mr->pbl_bt_l2),
 440                                GFP_KERNEL);
 441        if (!mr->pbl_bt_l2)
 442                goto err_kcalloc_bt_l2;
 443
 444        /* alloc L1, L2 BT */
 445        for (i = 0; i < pbl_bt_sz / 8; i++) {
 446                mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz,
 447                                            &(mr->pbl_l1_dma_addr[i]),
 448                                            GFP_KERNEL);
 449                if (!mr->pbl_bt_l1[i]) {
 450                        hns_roce_loop_free(hr_dev, mr, 1, i, 0);
 451                        goto err_dma_alloc_l0;
 452                }
 453
 454                *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
 455
 456                for (j = 0; j < pbl_bt_sz / 8; j++) {
 457                        bt_idx = i * pbl_bt_sz / 8 + j;
 458
 459                        if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
 460                                size = pbl_bt_sz;
 461                        } else {
 462                                npages_allocated = bt_idx *
 463                                                   (pbl_bt_sz / 8);
 464                                size = (npages - npages_allocated) * 8;
 465                        }
 466                        mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent(
 467                                      dev, size,
 468                                      &(mr->pbl_l2_dma_addr[bt_idx]),
 469                                      GFP_KERNEL);
 470                        if (!mr->pbl_bt_l2[bt_idx]) {
 471                                hns_roce_loop_free(hr_dev, mr, 2, i, j);
 472                                goto err_dma_alloc_l0;
 473                        }
 474
 475                        *(mr->pbl_bt_l1[i] + j) =
 476                                        mr->pbl_l2_dma_addr[bt_idx];
 477
 478                        pbl_bt_cnt++;
 479                        if (pbl_bt_cnt >= pbl_last_bt_num) {
 480                                mr_alloc_done = 1;
 481                                break;
 482                        }
 483                }
 484
 485                if (mr_alloc_done)
 486                        break;
 487        }
 488
 489        mr->l0_chunk_last_num = i + 1;
 490        mr->l1_chunk_last_num = j + 1;
 491
 492
 493        return 0;
 494
 495err_dma_alloc_l0:
 496        kfree(mr->pbl_bt_l2);
 497        mr->pbl_bt_l2 = NULL;
 498
 499err_kcalloc_bt_l2:
 500        kfree(mr->pbl_l2_dma_addr);
 501        mr->pbl_l2_dma_addr = NULL;
 502
 503        return -ENOMEM;
 504}
 505
 506
 507/* PBL multi hop addressing */
 508static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
 509                               struct hns_roce_mr *mr)
 510{
 511        struct device *dev = hr_dev->dev;
 512        u32 pbl_bt_sz;
 513        u32 mhop_num;
 514
 515        mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
 516        pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
 517
 518        if (mhop_num == HNS_ROCE_HOP_NUM_0)
 519                return 0;
 520
 521        if (mhop_num == 1)
 522                return pbl_1hop_alloc(hr_dev, npages, mr, pbl_bt_sz);
 523
 524        mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8,
 525                                      sizeof(*mr->pbl_l1_dma_addr),
 526                                      GFP_KERNEL);
 527        if (!mr->pbl_l1_dma_addr)
 528                return -ENOMEM;
 529
 530        mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1),
 531                                GFP_KERNEL);
 532        if (!mr->pbl_bt_l1)
 533                goto err_kcalloc_bt_l1;
 534
 535        /* alloc L0 BT */
 536        mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz,
 537                                           &(mr->pbl_l0_dma_addr),
 538                                           GFP_KERNEL);
 539        if (!mr->pbl_bt_l0)
 540                goto err_kcalloc_l2_dma;
 541
 542        if (mhop_num == 2) {
 543                if (pbl_2hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
 544                        goto err_kcalloc_l2_dma;
 545        }
 546
 547        if (mhop_num == 3) {
 548                if (pbl_3hop_alloc(hr_dev, npages, mr, pbl_bt_sz))
 549                        goto err_kcalloc_l2_dma;
 550        }
 551
 552
 553        mr->pbl_size = npages;
 554        mr->pbl_ba = mr->pbl_l0_dma_addr;
 555        mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
 556        mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
 557        mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
 558
 559        return 0;
 560
 561err_kcalloc_l2_dma:
 562        kfree(mr->pbl_bt_l1);
 563        mr->pbl_bt_l1 = NULL;
 564
 565err_kcalloc_bt_l1:
 566        kfree(mr->pbl_l1_dma_addr);
 567        mr->pbl_l1_dma_addr = NULL;
 568
 569        return -ENOMEM;
 570}
 571
 572static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
 573                             u64 size, u32 access, int npages,
 574                             struct hns_roce_mr *mr)
 575{
 576        struct device *dev = hr_dev->dev;
 577        unsigned long index = 0;
 578        int ret;
 579
 580        /* Allocate a key for mr from mr_table */
 581        ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
 582        if (ret)
 583                return -ENOMEM;
 584
 585        mr->iova = iova;                        /* MR va starting addr */
 586        mr->size = size;                        /* MR addr range */
 587        mr->pd = pd;                            /* MR num */
 588        mr->access = access;                    /* MR access permit */
 589        mr->enabled = 0;                        /* MR active status */
 590        mr->key = hw_index_to_key(index);       /* MR key */
 591
 592        if (size == ~0ull) {
 593                mr->pbl_buf = NULL;
 594                mr->pbl_dma_addr = 0;
 595                /* PBL multi-hop addressing parameters */
 596                mr->pbl_bt_l2 = NULL;
 597                mr->pbl_bt_l1 = NULL;
 598                mr->pbl_bt_l0 = NULL;
 599                mr->pbl_l2_dma_addr = NULL;
 600                mr->pbl_l1_dma_addr = NULL;
 601                mr->pbl_l0_dma_addr = 0;
 602        } else {
 603                if (!hr_dev->caps.pbl_hop_num) {
 604                        mr->pbl_buf = dma_alloc_coherent(dev,
 605                                                         npages * BA_BYTE_LEN,
 606                                                         &(mr->pbl_dma_addr),
 607                                                         GFP_KERNEL);
 608                        if (!mr->pbl_buf)
 609                                return -ENOMEM;
 610                } else {
 611                        ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
 612                }
 613        }
 614
 615        return ret;
 616}
 617
 618static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
 619                               struct hns_roce_mr *mr)
 620{
 621        struct device *dev = hr_dev->dev;
 622        int npages_allocated;
 623        int npages;
 624        int i, j;
 625        u32 pbl_bt_sz;
 626        u32 mhop_num;
 627        u64 bt_idx;
 628
 629        npages = mr->pbl_size;
 630        pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
 631        mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num;
 632
 633        if (mhop_num == HNS_ROCE_HOP_NUM_0)
 634                return;
 635
 636        if (mhop_num == 1) {
 637                dma_free_coherent(dev, (unsigned int)(npages * BA_BYTE_LEN),
 638                                  mr->pbl_buf, mr->pbl_dma_addr);
 639                return;
 640        }
 641
 642        dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0,
 643                          mr->pbl_l0_dma_addr);
 644
 645        if (mhop_num == 2) {
 646                for (i = 0; i < mr->l0_chunk_last_num; i++) {
 647                        if (i == mr->l0_chunk_last_num - 1) {
 648                                npages_allocated =
 649                                                i * (pbl_bt_sz / BA_BYTE_LEN);
 650
 651                                dma_free_coherent(dev,
 652                                      (npages - npages_allocated) * BA_BYTE_LEN,
 653                                       mr->pbl_bt_l1[i],
 654                                       mr->pbl_l1_dma_addr[i]);
 655
 656                                break;
 657                        }
 658
 659                        dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
 660                                          mr->pbl_l1_dma_addr[i]);
 661                }
 662        } else if (mhop_num == 3) {
 663                for (i = 0; i < mr->l0_chunk_last_num; i++) {
 664                        dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
 665                                          mr->pbl_l1_dma_addr[i]);
 666
 667                        for (j = 0; j < pbl_bt_sz / BA_BYTE_LEN; j++) {
 668                                bt_idx = i * (pbl_bt_sz / BA_BYTE_LEN) + j;
 669
 670                                if ((i == mr->l0_chunk_last_num - 1)
 671                                    && j == mr->l1_chunk_last_num - 1) {
 672                                        npages_allocated = bt_idx *
 673                                                      (pbl_bt_sz / BA_BYTE_LEN);
 674
 675                                        dma_free_coherent(dev,
 676                                              (npages - npages_allocated) *
 677                                              BA_BYTE_LEN,
 678                                              mr->pbl_bt_l2[bt_idx],
 679                                              mr->pbl_l2_dma_addr[bt_idx]);
 680
 681                                        break;
 682                                }
 683
 684                                dma_free_coherent(dev, pbl_bt_sz,
 685                                                mr->pbl_bt_l2[bt_idx],
 686                                                mr->pbl_l2_dma_addr[bt_idx]);
 687                        }
 688                }
 689        }
 690
 691        kfree(mr->pbl_bt_l1);
 692        kfree(mr->pbl_l1_dma_addr);
 693        mr->pbl_bt_l1 = NULL;
 694        mr->pbl_l1_dma_addr = NULL;
 695        if (mhop_num == 3) {
 696                kfree(mr->pbl_bt_l2);
 697                kfree(mr->pbl_l2_dma_addr);
 698                mr->pbl_bt_l2 = NULL;
 699                mr->pbl_l2_dma_addr = NULL;
 700        }
 701}
 702
 703static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
 704                             struct hns_roce_mr *mr)
 705{
 706        struct device *dev = hr_dev->dev;
 707        int npages = 0;
 708        int ret;
 709
 710        if (mr->enabled) {
 711                ret = hns_roce_hw_destroy_mpt(hr_dev, NULL,
 712                                              key_to_hw_index(mr->key) &
 713                                              (hr_dev->caps.num_mtpts - 1));
 714                if (ret)
 715                        dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret);
 716        }
 717
 718        if (mr->size != ~0ULL) {
 719                if (mr->type == MR_TYPE_MR)
 720                        npages = ib_umem_page_count(mr->umem);
 721
 722                if (!hr_dev->caps.pbl_hop_num)
 723                        dma_free_coherent(dev,
 724                                          (unsigned int)(npages * BA_BYTE_LEN),
 725                                          mr->pbl_buf, mr->pbl_dma_addr);
 726                else
 727                        hns_roce_mhop_free(hr_dev, mr);
 728        }
 729
 730        if (mr->enabled)
 731                hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
 732                                   key_to_hw_index(mr->key));
 733
 734        hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
 735                             key_to_hw_index(mr->key), BITMAP_NO_RR);
 736}
 737
 738static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
 739                              struct hns_roce_mr *mr)
 740{
 741        int ret;
 742        unsigned long mtpt_idx = key_to_hw_index(mr->key);
 743        struct device *dev = hr_dev->dev;
 744        struct hns_roce_cmd_mailbox *mailbox;
 745        struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
 746
 747        /* Prepare HEM entry memory */
 748        ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
 749        if (ret)
 750                return ret;
 751
 752        /* Allocate mailbox memory */
 753        mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
 754        if (IS_ERR(mailbox)) {
 755                ret = PTR_ERR(mailbox);
 756                goto err_table;
 757        }
 758
 759        if (mr->type != MR_TYPE_FRMR)
 760                ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
 761        else
 762                ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
 763        if (ret) {
 764                dev_err(dev, "Write mtpt fail!\n");
 765                goto err_page;
 766        }
 767
 768        ret = hns_roce_hw_create_mpt(hr_dev, mailbox,
 769                                     mtpt_idx & (hr_dev->caps.num_mtpts - 1));
 770        if (ret) {
 771                dev_err(dev, "CREATE_MPT failed (%d)\n", ret);
 772                goto err_page;
 773        }
 774
 775        mr->enabled = 1;
 776        hns_roce_free_cmd_mailbox(hr_dev, mailbox);
 777
 778        return 0;
 779
 780err_page:
 781        hns_roce_free_cmd_mailbox(hr_dev, mailbox);
 782
 783err_table:
 784        hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
 785        return ret;
 786}
 787
 788static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
 789                                    struct hns_roce_mtt *mtt, u32 start_index,
 790                                    u32 npages, u64 *page_list)
 791{
 792        struct hns_roce_hem_table *table;
 793        dma_addr_t dma_handle;
 794        __le64 *mtts;
 795        u32 bt_page_size;
 796        u32 i;
 797
 798        switch (mtt->mtt_type) {
 799        case MTT_TYPE_WQE:
 800                table = &hr_dev->mr_table.mtt_table;
 801                bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
 802                break;
 803        case MTT_TYPE_CQE:
 804                table = &hr_dev->mr_table.mtt_cqe_table;
 805                bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
 806                break;
 807        case MTT_TYPE_SRQWQE:
 808                table = &hr_dev->mr_table.mtt_srqwqe_table;
 809                bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
 810                break;
 811        case MTT_TYPE_IDX:
 812                table = &hr_dev->mr_table.mtt_idx_table;
 813                bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
 814                break;
 815        default:
 816                return -EINVAL;
 817        }
 818
 819        /* All MTTs must fit in the same page */
 820        if (start_index / (bt_page_size / sizeof(u64)) !=
 821                (start_index + npages - 1) / (bt_page_size / sizeof(u64)))
 822                return -EINVAL;
 823
 824        if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1))
 825                return -EINVAL;
 826
 827        mtts = hns_roce_table_find(hr_dev, table,
 828                                mtt->first_seg +
 829                                start_index / HNS_ROCE_MTT_ENTRY_PER_SEG,
 830                                &dma_handle);
 831        if (!mtts)
 832                return -ENOMEM;
 833
 834        /* Save page addr, low 12 bits : 0 */
 835        for (i = 0; i < npages; ++i) {
 836                if (!hr_dev->caps.mtt_hop_num)
 837                        mtts[i] = cpu_to_le64(page_list[i] >> PAGE_ADDR_SHIFT);
 838                else
 839                        mtts[i] = cpu_to_le64(page_list[i]);
 840        }
 841
 842        return 0;
 843}
 844
 845static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev,
 846                              struct hns_roce_mtt *mtt, u32 start_index,
 847                              u32 npages, u64 *page_list)
 848{
 849        int chunk;
 850        int ret;
 851        u32 bt_page_size;
 852
 853        if (mtt->order < 0)
 854                return -EINVAL;
 855
 856        switch (mtt->mtt_type) {
 857        case MTT_TYPE_WQE:
 858                bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
 859                break;
 860        case MTT_TYPE_CQE:
 861                bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
 862                break;
 863        case MTT_TYPE_SRQWQE:
 864                bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
 865                break;
 866        case MTT_TYPE_IDX:
 867                bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
 868                break;
 869        default:
 870                dev_err(hr_dev->dev,
 871                        "Unsupport mtt type %d, write mtt failed\n",
 872                        mtt->mtt_type);
 873                return -EINVAL;
 874        }
 875
 876        while (npages > 0) {
 877                chunk = min_t(int, bt_page_size / sizeof(u64), npages);
 878
 879                ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk,
 880                                               page_list);
 881                if (ret)
 882                        return ret;
 883
 884                npages -= chunk;
 885                start_index += chunk;
 886                page_list += chunk;
 887        }
 888
 889        return 0;
 890}
 891
 892int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev,
 893                           struct hns_roce_mtt *mtt, struct hns_roce_buf *buf)
 894{
 895        u64 *page_list;
 896        int ret;
 897        u32 i;
 898
 899        page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL);
 900        if (!page_list)
 901                return -ENOMEM;
 902
 903        for (i = 0; i < buf->npages; ++i) {
 904                if (buf->nbufs == 1)
 905                        page_list[i] = buf->direct.map + (i << buf->page_shift);
 906                else
 907                        page_list[i] = buf->page_list[i].map;
 908
 909        }
 910        ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list);
 911
 912        kfree(page_list);
 913
 914        return ret;
 915}
 916
 917int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev)
 918{
 919        struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
 920        int ret;
 921
 922        ret = hns_roce_bitmap_init(&mr_table->mtpt_bitmap,
 923                                   hr_dev->caps.num_mtpts,
 924                                   hr_dev->caps.num_mtpts - 1,
 925                                   hr_dev->caps.reserved_mrws, 0);
 926        if (ret)
 927                return ret;
 928
 929        ret = hns_roce_buddy_init(&mr_table->mtt_buddy,
 930                                  ilog2(hr_dev->caps.num_mtt_segs));
 931        if (ret)
 932                goto err_buddy;
 933
 934        if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) {
 935                ret = hns_roce_buddy_init(&mr_table->mtt_cqe_buddy,
 936                                          ilog2(hr_dev->caps.num_cqe_segs));
 937                if (ret)
 938                        goto err_buddy_cqe;
 939        }
 940
 941        if (hr_dev->caps.num_srqwqe_segs) {
 942                ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy,
 943                                          ilog2(hr_dev->caps.num_srqwqe_segs));
 944                if (ret)
 945                        goto err_buddy_srqwqe;
 946        }
 947
 948        if (hr_dev->caps.num_idx_segs) {
 949                ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy,
 950                                          ilog2(hr_dev->caps.num_idx_segs));
 951                if (ret)
 952                        goto err_buddy_idx;
 953        }
 954
 955        return 0;
 956
 957err_buddy_idx:
 958        if (hr_dev->caps.num_srqwqe_segs)
 959                hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
 960
 961err_buddy_srqwqe:
 962        if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
 963                hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
 964
 965err_buddy_cqe:
 966        hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
 967
 968err_buddy:
 969        hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
 970        return ret;
 971}
 972
 973void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev)
 974{
 975        struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
 976
 977        if (hr_dev->caps.num_idx_segs)
 978                hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy);
 979        if (hr_dev->caps.num_srqwqe_segs)
 980                hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
 981        hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
 982        if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
 983                hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
 984        hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
 985}
 986
 987struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
 988{
 989        struct hns_roce_mr *mr;
 990        int ret;
 991
 992        mr = kmalloc(sizeof(*mr), GFP_KERNEL);
 993        if (mr == NULL)
 994                return  ERR_PTR(-ENOMEM);
 995
 996        mr->type = MR_TYPE_DMA;
 997
 998        /* Allocate memory region key */
 999        ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
1000                                ~0ULL, acc, 0, mr);
1001        if (ret)
1002                goto err_free;
1003
1004        ret = hns_roce_mr_enable(to_hr_dev(pd->device), mr);
1005        if (ret)
1006                goto err_mr;
1007
1008        mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
1009        mr->umem = NULL;
1010
1011        return &mr->ibmr;
1012
1013err_mr:
1014        hns_roce_mr_free(to_hr_dev(pd->device), mr);
1015
1016err_free:
1017        kfree(mr);
1018        return ERR_PTR(ret);
1019}
1020
1021int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
1022                               struct hns_roce_mtt *mtt, struct ib_umem *umem)
1023{
1024        struct device *dev = hr_dev->dev;
1025        struct sg_dma_page_iter sg_iter;
1026        unsigned int order;
1027        int npage = 0;
1028        int ret = 0;
1029        int i;
1030        u64 page_addr;
1031        u64 *pages;
1032        u32 bt_page_size;
1033        u32 n;
1034
1035        switch (mtt->mtt_type) {
1036        case MTT_TYPE_WQE:
1037                order = hr_dev->caps.mtt_ba_pg_sz;
1038                break;
1039        case MTT_TYPE_CQE:
1040                order = hr_dev->caps.cqe_ba_pg_sz;
1041                break;
1042        case MTT_TYPE_SRQWQE:
1043                order = hr_dev->caps.srqwqe_ba_pg_sz;
1044                break;
1045        case MTT_TYPE_IDX:
1046                order = hr_dev->caps.idx_ba_pg_sz;
1047                break;
1048        default:
1049                dev_err(dev, "Unsupport mtt type %d, write mtt failed\n",
1050                        mtt->mtt_type);
1051                return -EINVAL;
1052        }
1053
1054        bt_page_size = 1 << (order + PAGE_SHIFT);
1055
1056        pages = (u64 *) __get_free_pages(GFP_KERNEL, order);
1057        if (!pages)
1058                return -ENOMEM;
1059
1060        i = n = 0;
1061
1062        for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
1063                page_addr = sg_page_iter_dma_address(&sg_iter);
1064                if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) {
1065                        if (page_addr & ((1 << mtt->page_shift) - 1)) {
1066                                dev_err(dev,
1067                                        "page_addr 0x%llx is not page_shift %d alignment!\n",
1068                                        page_addr, mtt->page_shift);
1069                                ret = -EINVAL;
1070                                goto out;
1071                        }
1072                        pages[i++] = page_addr;
1073                }
1074                npage++;
1075                if (i == bt_page_size / sizeof(u64)) {
1076                        ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
1077                        if (ret)
1078                                goto out;
1079                        n += i;
1080                        i = 0;
1081                }
1082        }
1083
1084        if (i)
1085                ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
1086
1087out:
1088        free_pages((unsigned long) pages, order);
1089        return ret;
1090}
1091
1092static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev,
1093                                     struct hns_roce_mr *mr,
1094                                     struct ib_umem *umem)
1095{
1096        struct sg_dma_page_iter sg_iter;
1097        int i = 0, j = 0;
1098        u64 page_addr;
1099        u32 pbl_bt_sz;
1100
1101        if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0)
1102                return 0;
1103
1104        pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
1105        for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
1106                page_addr = sg_page_iter_dma_address(&sg_iter);
1107                if (!hr_dev->caps.pbl_hop_num) {
1108                        /* for hip06, page addr is aligned to 4K */
1109                        mr->pbl_buf[i++] = page_addr >> 12;
1110                } else if (hr_dev->caps.pbl_hop_num == 1) {
1111                        mr->pbl_buf[i++] = page_addr;
1112                } else {
1113                        if (hr_dev->caps.pbl_hop_num == 2)
1114                                mr->pbl_bt_l1[i][j] = page_addr;
1115                        else if (hr_dev->caps.pbl_hop_num == 3)
1116                                mr->pbl_bt_l2[i][j] = page_addr;
1117
1118                        j++;
1119                        if (j >= (pbl_bt_sz / BA_BYTE_LEN)) {
1120                                i++;
1121                                j = 0;
1122                        }
1123                }
1124        }
1125
1126        /* Memory barrier */
1127        mb();
1128
1129        return 0;
1130}
1131
1132struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1133                                   u64 virt_addr, int access_flags,
1134                                   struct ib_udata *udata)
1135{
1136        struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
1137        struct device *dev = hr_dev->dev;
1138        struct hns_roce_mr *mr;
1139        int bt_size;
1140        int ret;
1141        int n;
1142        int i;
1143
1144        mr = kmalloc(sizeof(*mr), GFP_KERNEL);
1145        if (!mr)
1146                return ERR_PTR(-ENOMEM);
1147
1148        mr->umem = ib_umem_get(udata, start, length, access_flags);
1149        if (IS_ERR(mr->umem)) {
1150                ret = PTR_ERR(mr->umem);
1151                goto err_free;
1152        }
1153
1154        n = ib_umem_page_count(mr->umem);
1155
1156        if (!hr_dev->caps.pbl_hop_num) {
1157                if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) {
1158                        dev_err(dev,
1159                             " MR len %lld err. MR is limited to 4G at most!\n",
1160                             length);
1161                        ret = -EINVAL;
1162                        goto err_umem;
1163                }
1164        } else {
1165                u64 pbl_size = 1;
1166
1167                bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) /
1168                          BA_BYTE_LEN;
1169                for (i = 0; i < hr_dev->caps.pbl_hop_num; i++)
1170                        pbl_size *= bt_size;
1171                if (n > pbl_size) {
1172                        dev_err(dev,
1173                            " MR len %lld err. MR page num is limited to %lld!\n",
1174                            length, pbl_size);
1175                        ret = -EINVAL;
1176                        goto err_umem;
1177                }
1178        }
1179
1180        mr->type = MR_TYPE_MR;
1181
1182        ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
1183                                access_flags, n, mr);
1184        if (ret)
1185                goto err_umem;
1186
1187        ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
1188        if (ret)
1189                goto err_mr;
1190
1191        ret = hns_roce_mr_enable(hr_dev, mr);
1192        if (ret)
1193                goto err_mr;
1194
1195        mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
1196
1197        return &mr->ibmr;
1198
1199err_mr:
1200        hns_roce_mr_free(hr_dev, mr);
1201
1202err_umem:
1203        ib_umem_release(mr->umem);
1204
1205err_free:
1206        kfree(mr);
1207        return ERR_PTR(ret);
1208}
1209
1210static int rereg_mr_trans(struct ib_mr *ibmr, int flags,
1211                          u64 start, u64 length,
1212                          u64 virt_addr, int mr_access_flags,
1213                          struct hns_roce_cmd_mailbox *mailbox,
1214                          u32 pdn, struct ib_udata *udata)
1215{
1216        struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
1217        struct hns_roce_mr *mr = to_hr_mr(ibmr);
1218        struct device *dev = hr_dev->dev;
1219        int npages;
1220        int ret;
1221
1222        if (mr->size != ~0ULL) {
1223                npages = ib_umem_page_count(mr->umem);
1224
1225                if (hr_dev->caps.pbl_hop_num)
1226                        hns_roce_mhop_free(hr_dev, mr);
1227                else
1228                        dma_free_coherent(dev, npages * 8,
1229                                          mr->pbl_buf, mr->pbl_dma_addr);
1230        }
1231        ib_umem_release(mr->umem);
1232
1233        mr->umem = ib_umem_get(udata, start, length, mr_access_flags);
1234        if (IS_ERR(mr->umem)) {
1235                ret = PTR_ERR(mr->umem);
1236                mr->umem = NULL;
1237                return -ENOMEM;
1238        }
1239        npages = ib_umem_page_count(mr->umem);
1240
1241        if (hr_dev->caps.pbl_hop_num) {
1242                ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
1243                if (ret)
1244                        goto release_umem;
1245        } else {
1246                mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
1247                                                 &(mr->pbl_dma_addr),
1248                                                 GFP_KERNEL);
1249                if (!mr->pbl_buf) {
1250                        ret = -ENOMEM;
1251                        goto release_umem;
1252                }
1253        }
1254
1255        ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
1256                                           mr_access_flags, virt_addr,
1257                                           length, mailbox->buf);
1258        if (ret)
1259                goto release_umem;
1260
1261
1262        ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
1263        if (ret) {
1264                if (mr->size != ~0ULL) {
1265                        npages = ib_umem_page_count(mr->umem);
1266
1267                        if (hr_dev->caps.pbl_hop_num)
1268                                hns_roce_mhop_free(hr_dev, mr);
1269                        else
1270                                dma_free_coherent(dev, npages * 8,
1271                                                  mr->pbl_buf,
1272                                                  mr->pbl_dma_addr);
1273                }
1274
1275                goto release_umem;
1276        }
1277
1278        return 0;
1279
1280release_umem:
1281        ib_umem_release(mr->umem);
1282        return ret;
1283
1284}
1285
1286
1287int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
1288                           u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
1289                           struct ib_udata *udata)
1290{
1291        struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
1292        struct hns_roce_mr *mr = to_hr_mr(ibmr);
1293        struct hns_roce_cmd_mailbox *mailbox;
1294        struct device *dev = hr_dev->dev;
1295        unsigned long mtpt_idx;
1296        u32 pdn = 0;
1297        int ret;
1298
1299        if (!mr->enabled)
1300                return -EINVAL;
1301
1302        mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
1303        if (IS_ERR(mailbox))
1304                return PTR_ERR(mailbox);
1305
1306        mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1);
1307        ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0,
1308                                HNS_ROCE_CMD_QUERY_MPT,
1309                                HNS_ROCE_CMD_TIMEOUT_MSECS);
1310        if (ret)
1311                goto free_cmd_mbox;
1312
1313        ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx);
1314        if (ret)
1315                dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret);
1316
1317        mr->enabled = 0;
1318
1319        if (flags & IB_MR_REREG_PD)
1320                pdn = to_hr_pd(pd)->pdn;
1321
1322        if (flags & IB_MR_REREG_TRANS) {
1323                ret = rereg_mr_trans(ibmr, flags,
1324                                     start, length,
1325                                     virt_addr, mr_access_flags,
1326                                     mailbox, pdn, udata);
1327                if (ret)
1328                        goto free_cmd_mbox;
1329        } else {
1330                ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
1331                                                   mr_access_flags, virt_addr,
1332                                                   length, mailbox->buf);
1333                if (ret)
1334                        goto free_cmd_mbox;
1335        }
1336
1337        ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx);
1338        if (ret) {
1339                dev_err(dev, "CREATE_MPT failed (%d)\n", ret);
1340                ib_umem_release(mr->umem);
1341                goto free_cmd_mbox;
1342        }
1343
1344        mr->enabled = 1;
1345        if (flags & IB_MR_REREG_ACCESS)
1346                mr->access = mr_access_flags;
1347
1348        hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1349
1350        return 0;
1351
1352free_cmd_mbox:
1353        hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1354
1355        return ret;
1356}
1357
1358int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1359{
1360        struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
1361        struct hns_roce_mr *mr = to_hr_mr(ibmr);
1362        int ret = 0;
1363
1364        if (hr_dev->hw->dereg_mr) {
1365                ret = hr_dev->hw->dereg_mr(hr_dev, mr, udata);
1366        } else {
1367                hns_roce_mr_free(hr_dev, mr);
1368
1369                ib_umem_release(mr->umem);
1370                kfree(mr);
1371        }
1372
1373        return ret;
1374}
1375
1376struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
1377                                u32 max_num_sg, struct ib_udata *udata)
1378{
1379        struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
1380        struct device *dev = hr_dev->dev;
1381        struct hns_roce_mr *mr;
1382        u64 length;
1383        u32 page_size;
1384        int ret;
1385
1386        page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT);
1387        length = max_num_sg * page_size;
1388
1389        if (mr_type != IB_MR_TYPE_MEM_REG)
1390                return ERR_PTR(-EINVAL);
1391
1392        if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
1393                dev_err(dev, "max_num_sg larger than %d\n",
1394                        HNS_ROCE_FRMR_MAX_PA);
1395                return ERR_PTR(-EINVAL);
1396        }
1397
1398        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1399        if (!mr)
1400                return ERR_PTR(-ENOMEM);
1401
1402        mr->type = MR_TYPE_FRMR;
1403
1404        /* Allocate memory region key */
1405        ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length,
1406                                0, max_num_sg, mr);
1407        if (ret)
1408                goto err_free;
1409
1410        ret = hns_roce_mr_enable(hr_dev, mr);
1411        if (ret)
1412                goto err_mr;
1413
1414        mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
1415        mr->umem = NULL;
1416
1417        return &mr->ibmr;
1418
1419err_mr:
1420        hns_roce_mr_free(to_hr_dev(pd->device), mr);
1421
1422err_free:
1423        kfree(mr);
1424        return ERR_PTR(ret);
1425}
1426
1427static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
1428{
1429        struct hns_roce_mr *mr = to_hr_mr(ibmr);
1430
1431        mr->pbl_buf[mr->npages++] = addr;
1432
1433        return 0;
1434}
1435
1436int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
1437                       unsigned int *sg_offset)
1438{
1439        struct hns_roce_mr *mr = to_hr_mr(ibmr);
1440
1441        mr->npages = 0;
1442
1443        return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
1444}
1445
1446static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
1447                             struct hns_roce_mw *mw)
1448{
1449        struct device *dev = hr_dev->dev;
1450        int ret;
1451
1452        if (mw->enabled) {
1453                ret = hns_roce_hw_destroy_mpt(hr_dev, NULL,
1454                                              key_to_hw_index(mw->rkey) &
1455                                              (hr_dev->caps.num_mtpts - 1));
1456                if (ret)
1457                        dev_warn(dev, "MW DESTROY_MPT failed (%d)\n", ret);
1458
1459                hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
1460                                   key_to_hw_index(mw->rkey));
1461        }
1462
1463        hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
1464                             key_to_hw_index(mw->rkey), BITMAP_NO_RR);
1465}
1466
1467static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
1468                              struct hns_roce_mw *mw)
1469{
1470        struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
1471        struct hns_roce_cmd_mailbox *mailbox;
1472        struct device *dev = hr_dev->dev;
1473        unsigned long mtpt_idx = key_to_hw_index(mw->rkey);
1474        int ret;
1475
1476        /* prepare HEM entry memory */
1477        ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
1478        if (ret)
1479                return ret;
1480
1481        mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
1482        if (IS_ERR(mailbox)) {
1483                ret = PTR_ERR(mailbox);
1484                goto err_table;
1485        }
1486
1487        ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw);
1488        if (ret) {
1489                dev_err(dev, "MW write mtpt fail!\n");
1490                goto err_page;
1491        }
1492
1493        ret = hns_roce_hw_create_mpt(hr_dev, mailbox,
1494                                     mtpt_idx & (hr_dev->caps.num_mtpts - 1));
1495        if (ret) {
1496                dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret);
1497                goto err_page;
1498        }
1499
1500        mw->enabled = 1;
1501
1502        hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1503
1504        return 0;
1505
1506err_page:
1507        hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1508
1509err_table:
1510        hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
1511
1512        return ret;
1513}
1514
1515struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
1516                                struct ib_udata *udata)
1517{
1518        struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device);
1519        struct hns_roce_mw *mw;
1520        unsigned long index = 0;
1521        int ret;
1522
1523        mw = kmalloc(sizeof(*mw), GFP_KERNEL);
1524        if (!mw)
1525                return ERR_PTR(-ENOMEM);
1526
1527        /* Allocate a key for mw from bitmap */
1528        ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
1529        if (ret)
1530                goto err_bitmap;
1531
1532        mw->rkey = hw_index_to_key(index);
1533
1534        mw->ibmw.rkey = mw->rkey;
1535        mw->ibmw.type = type;
1536        mw->pdn = to_hr_pd(ib_pd)->pdn;
1537        mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
1538        mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
1539        mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
1540
1541        ret = hns_roce_mw_enable(hr_dev, mw);
1542        if (ret)
1543                goto err_mw;
1544
1545        return &mw->ibmw;
1546
1547err_mw:
1548        hns_roce_mw_free(hr_dev, mw);
1549
1550err_bitmap:
1551        kfree(mw);
1552
1553        return ERR_PTR(ret);
1554}
1555
1556int hns_roce_dealloc_mw(struct ib_mw *ibmw)
1557{
1558        struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
1559        struct hns_roce_mw *mw = to_hr_mw(ibmw);
1560
1561        hns_roce_mw_free(hr_dev, mw);
1562        kfree(mw);
1563
1564        return 0;
1565}
1566
1567void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift,
1568                       int buf_pg_shift)
1569{
1570        hns_roce_hem_list_init(&mtr->hem_list, bt_pg_shift);
1571        mtr->buf_pg_shift = buf_pg_shift;
1572}
1573
1574void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev,
1575                          struct hns_roce_mtr *mtr)
1576{
1577        hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
1578}
1579
1580static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev,
1581                              struct hns_roce_mtr *mtr, dma_addr_t *bufs,
1582                              struct hns_roce_buf_region *r)
1583{
1584        int offset;
1585        int count;
1586        int npage;
1587        u64 *mtts;
1588        int end;
1589        int i;
1590
1591        offset = r->offset;
1592        end = offset + r->count;
1593        npage = 0;
1594        while (offset < end) {
1595                mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
1596                                                  offset, &count, NULL);
1597                if (!mtts)
1598                        return -ENOBUFS;
1599
1600                /* Save page addr, low 12 bits : 0 */
1601                for (i = 0; i < count; i++) {
1602                        if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
1603                                mtts[i] = bufs[npage] >> PAGE_ADDR_SHIFT;
1604                        else
1605                                mtts[i] = bufs[npage];
1606
1607                        npage++;
1608                }
1609                offset += count;
1610        }
1611
1612        return 0;
1613}
1614
1615int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
1616                        dma_addr_t **bufs, struct hns_roce_buf_region *regions,
1617                        int region_cnt)
1618{
1619        struct hns_roce_buf_region *r;
1620        int ret;
1621        int i;
1622
1623        ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, regions,
1624                                        region_cnt);
1625        if (ret)
1626                return ret;
1627
1628        for (i = 0; i < region_cnt; i++) {
1629                r = &regions[i];
1630                ret = hns_roce_write_mtr(hr_dev, mtr, bufs[i], r);
1631                if (ret) {
1632                        dev_err(hr_dev->dev,
1633                                "write mtr[%d/%d] err %d,offset=%d.\n",
1634                                i, region_cnt, ret,  r->offset);
1635                        goto err_write;
1636                }
1637        }
1638
1639        return 0;
1640
1641err_write:
1642        hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
1643
1644        return ret;
1645}
1646
1647int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
1648                      int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
1649{
1650        u64 *mtts = mtt_buf;
1651        int mtt_count;
1652        int total = 0;
1653        u64 *addr;
1654        int npage;
1655        int left;
1656
1657        if (mtts == NULL || mtt_max < 1)
1658                goto done;
1659
1660        left = mtt_max;
1661        while (left > 0) {
1662                mtt_count = 0;
1663                addr = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
1664                                                  offset + total,
1665                                                  &mtt_count, NULL);
1666                if (!addr || !mtt_count)
1667                        goto done;
1668
1669                npage = min(mtt_count, left);
1670                memcpy(&mtts[total], addr, BA_BYTE_LEN * npage);
1671                left -= npage;
1672                total += npage;
1673        }
1674
1675done:
1676        if (base_addr)
1677                *base_addr = mtr->hem_list.root_ba;
1678
1679        return total;
1680}
1681