linux/drivers/infiniband/hw/mthca/mthca_mr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
   3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33
  34#include <linux/slab.h>
  35#include <linux/errno.h>
  36
  37#include "mthca_dev.h"
  38#include "mthca_cmd.h"
  39#include "mthca_memfree.h"
  40
  41struct mthca_mtt {
  42        struct mthca_buddy *buddy;
  43        int                 order;
  44        u32                 first_seg;
  45};
  46
  47/*
  48 * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
  49 */
  50struct mthca_mpt_entry {
  51        __be32 flags;
  52        __be32 page_size;
  53        __be32 key;
  54        __be32 pd;
  55        __be64 start;
  56        __be64 length;
  57        __be32 lkey;
  58        __be32 window_count;
  59        __be32 window_count_limit;
  60        __be64 mtt_seg;
  61        __be32 mtt_sz;          /* Arbel only */
  62        u32    reserved[2];
  63} __packed;
  64
  65#define MTHCA_MPT_FLAG_SW_OWNS       (0xfUL << 28)
  66#define MTHCA_MPT_FLAG_MIO           (1 << 17)
  67#define MTHCA_MPT_FLAG_BIND_ENABLE   (1 << 15)
  68#define MTHCA_MPT_FLAG_PHYSICAL      (1 <<  9)
  69#define MTHCA_MPT_FLAG_REGION        (1 <<  8)
  70
  71#define MTHCA_MTT_FLAG_PRESENT       1
  72
  73#define MTHCA_MPT_STATUS_SW 0xF0
  74#define MTHCA_MPT_STATUS_HW 0x00
  75
  76#define SINAI_FMR_KEY_INC 0x1000000
  77
  78/*
  79 * Buddy allocator for MTT segments (currently not very efficient
  80 * since it doesn't keep a free list and just searches linearly
  81 * through the bitmaps)
  82 */
  83
  84static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
  85{
  86        int o;
  87        int m;
  88        u32 seg;
  89
  90        spin_lock(&buddy->lock);
  91
  92        for (o = order; o <= buddy->max_order; ++o)
  93                if (buddy->num_free[o]) {
  94                        m = 1 << (buddy->max_order - o);
  95                        seg = find_first_bit(buddy->bits[o], m);
  96                        if (seg < m)
  97                                goto found;
  98                }
  99
 100        spin_unlock(&buddy->lock);
 101        return -1;
 102
 103 found:
 104        clear_bit(seg, buddy->bits[o]);
 105        --buddy->num_free[o];
 106
 107        while (o > order) {
 108                --o;
 109                seg <<= 1;
 110                set_bit(seg ^ 1, buddy->bits[o]);
 111                ++buddy->num_free[o];
 112        }
 113
 114        spin_unlock(&buddy->lock);
 115
 116        seg <<= order;
 117
 118        return seg;
 119}
 120
 121static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
 122{
 123        seg >>= order;
 124
 125        spin_lock(&buddy->lock);
 126
 127        while (test_bit(seg ^ 1, buddy->bits[order])) {
 128                clear_bit(seg ^ 1, buddy->bits[order]);
 129                --buddy->num_free[order];
 130                seg >>= 1;
 131                ++order;
 132        }
 133
 134        set_bit(seg, buddy->bits[order]);
 135        ++buddy->num_free[order];
 136
 137        spin_unlock(&buddy->lock);
 138}
 139
 140static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
 141{
 142        int i, s;
 143
 144        buddy->max_order = max_order;
 145        spin_lock_init(&buddy->lock);
 146
 147        buddy->bits = kcalloc(buddy->max_order + 1, sizeof(long *),
 148                              GFP_KERNEL);
 149        buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free,
 150                                  GFP_KERNEL);
 151        if (!buddy->bits || !buddy->num_free)
 152                goto err_out;
 153
 154        for (i = 0; i <= buddy->max_order; ++i) {
 155                s = BITS_TO_LONGS(1 << (buddy->max_order - i));
 156                buddy->bits[i] = kmalloc_array(s, sizeof(long), GFP_KERNEL);
 157                if (!buddy->bits[i])
 158                        goto err_out_free;
 159                bitmap_zero(buddy->bits[i],
 160                            1 << (buddy->max_order - i));
 161        }
 162
 163        set_bit(0, buddy->bits[buddy->max_order]);
 164        buddy->num_free[buddy->max_order] = 1;
 165
 166        return 0;
 167
 168err_out_free:
 169        for (i = 0; i <= buddy->max_order; ++i)
 170                kfree(buddy->bits[i]);
 171
 172err_out:
 173        kfree(buddy->bits);
 174        kfree(buddy->num_free);
 175
 176        return -ENOMEM;
 177}
 178
 179static void mthca_buddy_cleanup(struct mthca_buddy *buddy)
 180{
 181        int i;
 182
 183        for (i = 0; i <= buddy->max_order; ++i)
 184                kfree(buddy->bits[i]);
 185
 186        kfree(buddy->bits);
 187        kfree(buddy->num_free);
 188}
 189
 190static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
 191                                 struct mthca_buddy *buddy)
 192{
 193        u32 seg = mthca_buddy_alloc(buddy, order);
 194
 195        if (seg == -1)
 196                return -1;
 197
 198        if (mthca_is_memfree(dev))
 199                if (mthca_table_get_range(dev, dev->mr_table.mtt_table, seg,
 200                                          seg + (1 << order) - 1)) {
 201                        mthca_buddy_free(buddy, seg, order);
 202                        seg = -1;
 203                }
 204
 205        return seg;
 206}
 207
 208static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size,
 209                                           struct mthca_buddy *buddy)
 210{
 211        struct mthca_mtt *mtt;
 212        int i;
 213
 214        if (size <= 0)
 215                return ERR_PTR(-EINVAL);
 216
 217        mtt = kmalloc(sizeof *mtt, GFP_KERNEL);
 218        if (!mtt)
 219                return ERR_PTR(-ENOMEM);
 220
 221        mtt->buddy = buddy;
 222        mtt->order = 0;
 223        for (i = dev->limits.mtt_seg_size / 8; i < size; i <<= 1)
 224                ++mtt->order;
 225
 226        mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
 227        if (mtt->first_seg == -1) {
 228                kfree(mtt);
 229                return ERR_PTR(-ENOMEM);
 230        }
 231
 232        return mtt;
 233}
 234
 235struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size)
 236{
 237        return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy);
 238}
 239
 240void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt)
 241{
 242        if (!mtt)
 243                return;
 244
 245        mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order);
 246
 247        mthca_table_put_range(dev, dev->mr_table.mtt_table,
 248                              mtt->first_seg,
 249                              mtt->first_seg + (1 << mtt->order) - 1);
 250
 251        kfree(mtt);
 252}
 253
 254static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
 255                             int start_index, u64 *buffer_list, int list_len)
 256{
 257        struct mthca_mailbox *mailbox;
 258        __be64 *mtt_entry;
 259        int err = 0;
 260        int i;
 261
 262        mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 263        if (IS_ERR(mailbox))
 264                return PTR_ERR(mailbox);
 265        mtt_entry = mailbox->buf;
 266
 267        while (list_len > 0) {
 268                mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
 269                                           mtt->first_seg * dev->limits.mtt_seg_size +
 270                                           start_index * 8);
 271                mtt_entry[1] = 0;
 272                for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
 273                        mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
 274                                                       MTHCA_MTT_FLAG_PRESENT);
 275
 276                /*
 277                 * If we have an odd number of entries to write, add
 278                 * one more dummy entry for firmware efficiency.
 279                 */
 280                if (i & 1)
 281                        mtt_entry[i + 2] = 0;
 282
 283                err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1);
 284                if (err) {
 285                        mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
 286                        goto out;
 287                }
 288
 289                list_len    -= i;
 290                start_index += i;
 291                buffer_list += i;
 292        }
 293
 294out:
 295        mthca_free_mailbox(dev, mailbox);
 296        return err;
 297}
 298
 299int mthca_write_mtt_size(struct mthca_dev *dev)
 300{
 301        if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
 302            !(dev->mthca_flags & MTHCA_FLAG_FMR))
 303                /*
 304                 * Be friendly to WRITE_MTT command
 305                 * and leave two empty slots for the
 306                 * index and reserved fields of the
 307                 * mailbox.
 308                 */
 309                return PAGE_SIZE / sizeof (u64) - 2;
 310
 311        /* For Arbel, all MTTs must fit in the same page. */
 312        return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff;
 313}
 314
 315static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev,
 316                                      struct mthca_mtt *mtt, int start_index,
 317                                      u64 *buffer_list, int list_len)
 318{
 319        u64 __iomem *mtts;
 320        int i;
 321
 322        mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * dev->limits.mtt_seg_size +
 323                start_index * sizeof (u64);
 324        for (i = 0; i < list_len; ++i)
 325                mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT),
 326                                  mtts + i);
 327}
 328
 329static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev,
 330                                      struct mthca_mtt *mtt, int start_index,
 331                                      u64 *buffer_list, int list_len)
 332{
 333        __be64 *mtts;
 334        dma_addr_t dma_handle;
 335        int i;
 336        int s = start_index * sizeof (u64);
 337
 338        /* For Arbel, all MTTs must fit in the same page. */
 339        BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE);
 340        /* Require full segments */
 341        BUG_ON(s % dev->limits.mtt_seg_size);
 342
 343        mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg +
 344                                s / dev->limits.mtt_seg_size, &dma_handle);
 345
 346        BUG_ON(!mtts);
 347
 348        dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle,
 349                                list_len * sizeof (u64), DMA_TO_DEVICE);
 350
 351        for (i = 0; i < list_len; ++i)
 352                mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT);
 353
 354        dma_sync_single_for_device(&dev->pdev->dev, dma_handle,
 355                                   list_len * sizeof (u64), DMA_TO_DEVICE);
 356}
 357
 358int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
 359                    int start_index, u64 *buffer_list, int list_len)
 360{
 361        int size = mthca_write_mtt_size(dev);
 362        int chunk;
 363
 364        if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
 365            !(dev->mthca_flags & MTHCA_FLAG_FMR))
 366                return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
 367
 368        while (list_len > 0) {
 369                chunk = min(size, list_len);
 370                if (mthca_is_memfree(dev))
 371                        mthca_arbel_write_mtt_seg(dev, mtt, start_index,
 372                                                  buffer_list, chunk);
 373                else
 374                        mthca_tavor_write_mtt_seg(dev, mtt, start_index,
 375                                                  buffer_list, chunk);
 376
 377                list_len    -= chunk;
 378                start_index += chunk;
 379                buffer_list += chunk;
 380        }
 381
 382        return 0;
 383}
 384
 385static inline u32 tavor_hw_index_to_key(u32 ind)
 386{
 387        return ind;
 388}
 389
 390static inline u32 tavor_key_to_hw_index(u32 key)
 391{
 392        return key;
 393}
 394
 395static inline u32 arbel_hw_index_to_key(u32 ind)
 396{
 397        return (ind >> 24) | (ind << 8);
 398}
 399
 400static inline u32 arbel_key_to_hw_index(u32 key)
 401{
 402        return (key << 24) | (key >> 8);
 403}
 404
 405static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind)
 406{
 407        if (mthca_is_memfree(dev))
 408                return arbel_hw_index_to_key(ind);
 409        else
 410                return tavor_hw_index_to_key(ind);
 411}
 412
 413static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
 414{
 415        if (mthca_is_memfree(dev))
 416                return arbel_key_to_hw_index(key);
 417        else
 418                return tavor_key_to_hw_index(key);
 419}
 420
 421static inline u32 adjust_key(struct mthca_dev *dev, u32 key)
 422{
 423        if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
 424                return ((key << 20) & 0x800000) | (key & 0x7fffff);
 425        else
 426                return key;
 427}
 428
 429int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
 430                   u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
 431{
 432        struct mthca_mailbox *mailbox;
 433        struct mthca_mpt_entry *mpt_entry;
 434        u32 key;
 435        int i;
 436        int err;
 437
 438        WARN_ON(buffer_size_shift >= 32);
 439
 440        key = mthca_alloc(&dev->mr_table.mpt_alloc);
 441        if (key == -1)
 442                return -ENOMEM;
 443        key = adjust_key(dev, key);
 444        mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
 445
 446        if (mthca_is_memfree(dev)) {
 447                err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
 448                if (err)
 449                        goto err_out_mpt_free;
 450        }
 451
 452        mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 453        if (IS_ERR(mailbox)) {
 454                err = PTR_ERR(mailbox);
 455                goto err_out_table;
 456        }
 457        mpt_entry = mailbox->buf;
 458
 459        mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
 460                                       MTHCA_MPT_FLAG_MIO         |
 461                                       MTHCA_MPT_FLAG_REGION      |
 462                                       access);
 463        if (!mr->mtt)
 464                mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL);
 465
 466        mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
 467        mpt_entry->key       = cpu_to_be32(key);
 468        mpt_entry->pd        = cpu_to_be32(pd);
 469        mpt_entry->start     = cpu_to_be64(iova);
 470        mpt_entry->length    = cpu_to_be64(total_size);
 471
 472        memset(&mpt_entry->lkey, 0,
 473               sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
 474
 475        if (mr->mtt)
 476                mpt_entry->mtt_seg =
 477                        cpu_to_be64(dev->mr_table.mtt_base +
 478                                    mr->mtt->first_seg * dev->limits.mtt_seg_size);
 479
 480        if (0) {
 481                mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
 482                for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
 483                        if (i % 4 == 0)
 484                                printk("[%02x] ", i * 4);
 485                        printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i]));
 486                        if ((i + 1) % 4 == 0)
 487                                printk("\n");
 488                }
 489        }
 490
 491        err = mthca_SW2HW_MPT(dev, mailbox,
 492                              key & (dev->limits.num_mpts - 1));
 493        if (err) {
 494                mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
 495                goto err_out_mailbox;
 496        }
 497
 498        mthca_free_mailbox(dev, mailbox);
 499        return err;
 500
 501err_out_mailbox:
 502        mthca_free_mailbox(dev, mailbox);
 503
 504err_out_table:
 505        mthca_table_put(dev, dev->mr_table.mpt_table, key);
 506
 507err_out_mpt_free:
 508        mthca_free(&dev->mr_table.mpt_alloc, key);
 509        return err;
 510}
 511
 512int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
 513                           u32 access, struct mthca_mr *mr)
 514{
 515        mr->mtt = NULL;
 516        return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr);
 517}
 518
 519int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
 520                        u64 *buffer_list, int buffer_size_shift,
 521                        int list_len, u64 iova, u64 total_size,
 522                        u32 access, struct mthca_mr *mr)
 523{
 524        int err;
 525
 526        mr->mtt = mthca_alloc_mtt(dev, list_len);
 527        if (IS_ERR(mr->mtt))
 528                return PTR_ERR(mr->mtt);
 529
 530        err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len);
 531        if (err) {
 532                mthca_free_mtt(dev, mr->mtt);
 533                return err;
 534        }
 535
 536        err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova,
 537                             total_size, access, mr);
 538        if (err)
 539                mthca_free_mtt(dev, mr->mtt);
 540
 541        return err;
 542}
 543
 544/* Free mr or fmr */
 545static void mthca_free_region(struct mthca_dev *dev, u32 lkey)
 546{
 547        mthca_table_put(dev, dev->mr_table.mpt_table,
 548                        key_to_hw_index(dev, lkey));
 549
 550        mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
 551}
 552
 553void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
 554{
 555        int err;
 556
 557        err = mthca_HW2SW_MPT(dev, NULL,
 558                              key_to_hw_index(dev, mr->ibmr.lkey) &
 559                              (dev->limits.num_mpts - 1));
 560        if (err)
 561                mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err);
 562
 563        mthca_free_region(dev, mr->ibmr.lkey);
 564        mthca_free_mtt(dev, mr->mtt);
 565}
 566
 567int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 568                    u32 access, struct mthca_fmr *mr)
 569{
 570        struct mthca_mpt_entry *mpt_entry;
 571        struct mthca_mailbox *mailbox;
 572        u64 mtt_seg;
 573        u32 key, idx;
 574        int list_len = mr->attr.max_pages;
 575        int err = -ENOMEM;
 576        int i;
 577
 578        if (mr->attr.page_shift < 12 || mr->attr.page_shift >= 32)
 579                return -EINVAL;
 580
 581        /* For Arbel, all MTTs must fit in the same page. */
 582        if (mthca_is_memfree(dev) &&
 583            mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE)
 584                return -EINVAL;
 585
 586        mr->maps = 0;
 587
 588        key = mthca_alloc(&dev->mr_table.mpt_alloc);
 589        if (key == -1)
 590                return -ENOMEM;
 591        key = adjust_key(dev, key);
 592
 593        idx = key & (dev->limits.num_mpts - 1);
 594        mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
 595
 596        if (mthca_is_memfree(dev)) {
 597                err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
 598                if (err)
 599                        goto err_out_mpt_free;
 600
 601                mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key, NULL);
 602                BUG_ON(!mr->mem.arbel.mpt);
 603        } else
 604                mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
 605                        sizeof *(mr->mem.tavor.mpt) * idx;
 606
 607        mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
 608        if (IS_ERR(mr->mtt)) {
 609                err = PTR_ERR(mr->mtt);
 610                goto err_out_table;
 611        }
 612
 613        mtt_seg = mr->mtt->first_seg * dev->limits.mtt_seg_size;
 614
 615        if (mthca_is_memfree(dev)) {
 616                mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
 617                                                      mr->mtt->first_seg,
 618                                                      &mr->mem.arbel.dma_handle);
 619                BUG_ON(!mr->mem.arbel.mtts);
 620        } else
 621                mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
 622
 623        mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 624        if (IS_ERR(mailbox)) {
 625                err = PTR_ERR(mailbox);
 626                goto err_out_free_mtt;
 627        }
 628
 629        mpt_entry = mailbox->buf;
 630
 631        mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
 632                                       MTHCA_MPT_FLAG_MIO         |
 633                                       MTHCA_MPT_FLAG_REGION      |
 634                                       access);
 635
 636        mpt_entry->page_size = cpu_to_be32(mr->attr.page_shift - 12);
 637        mpt_entry->key       = cpu_to_be32(key);
 638        mpt_entry->pd        = cpu_to_be32(pd);
 639        memset(&mpt_entry->start, 0,
 640               sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start));
 641        mpt_entry->mtt_seg   = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg);
 642
 643        if (0) {
 644                mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
 645                for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
 646                        if (i % 4 == 0)
 647                                printk("[%02x] ", i * 4);
 648                        printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i]));
 649                        if ((i + 1) % 4 == 0)
 650                                printk("\n");
 651                }
 652        }
 653
 654        err = mthca_SW2HW_MPT(dev, mailbox,
 655                              key & (dev->limits.num_mpts - 1));
 656        if (err) {
 657                mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
 658                goto err_out_mailbox_free;
 659        }
 660
 661        mthca_free_mailbox(dev, mailbox);
 662        return 0;
 663
 664err_out_mailbox_free:
 665        mthca_free_mailbox(dev, mailbox);
 666
 667err_out_free_mtt:
 668        mthca_free_mtt(dev, mr->mtt);
 669
 670err_out_table:
 671        mthca_table_put(dev, dev->mr_table.mpt_table, key);
 672
 673err_out_mpt_free:
 674        mthca_free(&dev->mr_table.mpt_alloc, key);
 675        return err;
 676}
 677
 678int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
 679{
 680        if (fmr->maps)
 681                return -EBUSY;
 682
 683        mthca_free_region(dev, fmr->ibmr.lkey);
 684        mthca_free_mtt(dev, fmr->mtt);
 685
 686        return 0;
 687}
 688
 689static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
 690                                  int list_len, u64 iova)
 691{
 692        int i, page_mask;
 693
 694        if (list_len > fmr->attr.max_pages)
 695                return -EINVAL;
 696
 697        page_mask = (1 << fmr->attr.page_shift) - 1;
 698
 699        /* We are getting page lists, so va must be page aligned. */
 700        if (iova & page_mask)
 701                return -EINVAL;
 702
 703        /* Trust the user not to pass misaligned data in page_list */
 704        if (0)
 705                for (i = 0; i < list_len; ++i) {
 706                        if (page_list[i] & ~page_mask)
 707                                return -EINVAL;
 708                }
 709
 710        if (fmr->maps >= fmr->attr.max_maps)
 711                return -EINVAL;
 712
 713        return 0;
 714}
 715
 716
 717int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
 718                             int list_len, u64 iova)
 719{
 720        struct mthca_fmr *fmr = to_mfmr(ibfmr);
 721        struct mthca_dev *dev = to_mdev(ibfmr->device);
 722        struct mthca_mpt_entry mpt_entry;
 723        u32 key;
 724        int i, err;
 725
 726        err = mthca_check_fmr(fmr, page_list, list_len, iova);
 727        if (err)
 728                return err;
 729
 730        ++fmr->maps;
 731
 732        key = tavor_key_to_hw_index(fmr->ibmr.lkey);
 733        key += dev->limits.num_mpts;
 734        fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
 735
 736        writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
 737
 738        for (i = 0; i < list_len; ++i) {
 739                __be64 mtt_entry = cpu_to_be64(page_list[i] |
 740                                               MTHCA_MTT_FLAG_PRESENT);
 741                mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i);
 742        }
 743
 744        mpt_entry.lkey   = cpu_to_be32(key);
 745        mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
 746        mpt_entry.start  = cpu_to_be64(iova);
 747
 748        __raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
 749        memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start,
 750                    offsetof(struct mthca_mpt_entry, window_count) -
 751                    offsetof(struct mthca_mpt_entry, start));
 752
 753        writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt);
 754
 755        return 0;
 756}
 757
 758int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
 759                             int list_len, u64 iova)
 760{
 761        struct mthca_fmr *fmr = to_mfmr(ibfmr);
 762        struct mthca_dev *dev = to_mdev(ibfmr->device);
 763        u32 key;
 764        int i, err;
 765
 766        err = mthca_check_fmr(fmr, page_list, list_len, iova);
 767        if (err)
 768                return err;
 769
 770        ++fmr->maps;
 771
 772        key = arbel_key_to_hw_index(fmr->ibmr.lkey);
 773        if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
 774                key += SINAI_FMR_KEY_INC;
 775        else
 776                key += dev->limits.num_mpts;
 777        fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
 778
 779        *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
 780
 781        wmb();
 782
 783        dma_sync_single_for_cpu(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
 784                                list_len * sizeof(u64), DMA_TO_DEVICE);
 785
 786        for (i = 0; i < list_len; ++i)
 787                fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
 788                                                     MTHCA_MTT_FLAG_PRESENT);
 789
 790        dma_sync_single_for_device(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
 791                                   list_len * sizeof(u64), DMA_TO_DEVICE);
 792
 793        fmr->mem.arbel.mpt->key    = cpu_to_be32(key);
 794        fmr->mem.arbel.mpt->lkey   = cpu_to_be32(key);
 795        fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
 796        fmr->mem.arbel.mpt->start  = cpu_to_be64(iova);
 797
 798        wmb();
 799
 800        *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW;
 801
 802        wmb();
 803
 804        return 0;
 805}
 806
 807void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
 808{
 809        if (!fmr->maps)
 810                return;
 811
 812        fmr->maps = 0;
 813
 814        writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
 815}
 816
 817void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
 818{
 819        if (!fmr->maps)
 820                return;
 821
 822        fmr->maps = 0;
 823
 824        *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
 825}
 826
 827int mthca_init_mr_table(struct mthca_dev *dev)
 828{
 829        phys_addr_t addr;
 830        int mpts, mtts, err, i;
 831
 832        err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
 833                               dev->limits.num_mpts,
 834                               ~0, dev->limits.reserved_mrws);
 835        if (err)
 836                return err;
 837
 838        if (!mthca_is_memfree(dev) &&
 839            (dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN))
 840                dev->limits.fmr_reserved_mtts = 0;
 841        else
 842                dev->mthca_flags |= MTHCA_FLAG_FMR;
 843
 844        if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
 845                mthca_dbg(dev, "Memory key throughput optimization activated.\n");
 846
 847        err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
 848                               fls(dev->limits.num_mtt_segs - 1));
 849
 850        if (err)
 851                goto err_mtt_buddy;
 852
 853        dev->mr_table.tavor_fmr.mpt_base = NULL;
 854        dev->mr_table.tavor_fmr.mtt_base = NULL;
 855
 856        if (dev->limits.fmr_reserved_mtts) {
 857                i = fls(dev->limits.fmr_reserved_mtts - 1);
 858
 859                if (i >= 31) {
 860                        mthca_warn(dev, "Unable to reserve 2^31 FMR MTTs.\n");
 861                        err = -EINVAL;
 862                        goto err_fmr_mpt;
 863                }
 864                mpts = mtts = 1 << i;
 865        } else {
 866                mtts = dev->limits.num_mtt_segs;
 867                mpts = dev->limits.num_mpts;
 868        }
 869
 870        if (!mthca_is_memfree(dev) &&
 871            (dev->mthca_flags & MTHCA_FLAG_FMR)) {
 872
 873                addr = pci_resource_start(dev->pdev, 4) +
 874                        ((pci_resource_len(dev->pdev, 4) - 1) &
 875                         dev->mr_table.mpt_base);
 876
 877                dev->mr_table.tavor_fmr.mpt_base =
 878                        ioremap(addr, mpts * sizeof(struct mthca_mpt_entry));
 879
 880                if (!dev->mr_table.tavor_fmr.mpt_base) {
 881                        mthca_warn(dev, "MPT ioremap for FMR failed.\n");
 882                        err = -ENOMEM;
 883                        goto err_fmr_mpt;
 884                }
 885
 886                addr = pci_resource_start(dev->pdev, 4) +
 887                        ((pci_resource_len(dev->pdev, 4) - 1) &
 888                         dev->mr_table.mtt_base);
 889
 890                dev->mr_table.tavor_fmr.mtt_base =
 891                        ioremap(addr, mtts * dev->limits.mtt_seg_size);
 892                if (!dev->mr_table.tavor_fmr.mtt_base) {
 893                        mthca_warn(dev, "MTT ioremap for FMR failed.\n");
 894                        err = -ENOMEM;
 895                        goto err_fmr_mtt;
 896                }
 897        }
 898
 899        if (dev->limits.fmr_reserved_mtts) {
 900                err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, fls(mtts - 1));
 901                if (err)
 902                        goto err_fmr_mtt_buddy;
 903
 904                /* Prevent regular MRs from using FMR keys */
 905                err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, fls(mtts - 1));
 906                if (err)
 907                        goto err_reserve_fmr;
 908
 909                dev->mr_table.fmr_mtt_buddy =
 910                        &dev->mr_table.tavor_fmr.mtt_buddy;
 911        } else
 912                dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy;
 913
 914        /* FMR table is always the first, take reserved MTTs out of there */
 915        if (dev->limits.reserved_mtts) {
 916                i = fls(dev->limits.reserved_mtts - 1);
 917
 918                if (mthca_alloc_mtt_range(dev, i,
 919                                          dev->mr_table.fmr_mtt_buddy) == -1) {
 920                        mthca_warn(dev, "MTT table of order %d is too small.\n",
 921                                  dev->mr_table.fmr_mtt_buddy->max_order);
 922                        err = -ENOMEM;
 923                        goto err_reserve_mtts;
 924                }
 925        }
 926
 927        return 0;
 928
 929err_reserve_mtts:
 930err_reserve_fmr:
 931        if (dev->limits.fmr_reserved_mtts)
 932                mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
 933
 934err_fmr_mtt_buddy:
 935        if (dev->mr_table.tavor_fmr.mtt_base)
 936                iounmap(dev->mr_table.tavor_fmr.mtt_base);
 937
 938err_fmr_mtt:
 939        if (dev->mr_table.tavor_fmr.mpt_base)
 940                iounmap(dev->mr_table.tavor_fmr.mpt_base);
 941
 942err_fmr_mpt:
 943        mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
 944
 945err_mtt_buddy:
 946        mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
 947
 948        return err;
 949}
 950
 951void mthca_cleanup_mr_table(struct mthca_dev *dev)
 952{
 953        /* XXX check if any MRs are still allocated? */
 954        if (dev->limits.fmr_reserved_mtts)
 955                mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
 956
 957        mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
 958
 959        if (dev->mr_table.tavor_fmr.mtt_base)
 960                iounmap(dev->mr_table.tavor_fmr.mtt_base);
 961        if (dev->mr_table.tavor_fmr.mpt_base)
 962                iounmap(dev->mr_table.tavor_fmr.mpt_base);
 963
 964        mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
 965}
 966