linux/drivers/infiniband/hw/mthca/mthca_mr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
   3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33
  34#include <linux/slab.h>
  35#include <linux/errno.h>
  36
  37#include "mthca_dev.h"
  38#include "mthca_cmd.h"
  39#include "mthca_memfree.h"
  40
  41struct mthca_mtt {
  42        struct mthca_buddy *buddy;
  43        int                 order;
  44        u32                 first_seg;
  45};
  46
  47/*
  48 * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
  49 */
  50struct mthca_mpt_entry {
  51        __be32 flags;
  52        __be32 page_size;
  53        __be32 key;
  54        __be32 pd;
  55        __be64 start;
  56        __be64 length;
  57        __be32 lkey;
  58        __be32 window_count;
  59        __be32 window_count_limit;
  60        __be64 mtt_seg;
  61        __be32 mtt_sz;          /* Arbel only */
  62        u32    reserved[2];
  63} __attribute__((packed));
  64
  65#define MTHCA_MPT_FLAG_SW_OWNS       (0xfUL << 28)
  66#define MTHCA_MPT_FLAG_MIO           (1 << 17)
  67#define MTHCA_MPT_FLAG_BIND_ENABLE   (1 << 15)
  68#define MTHCA_MPT_FLAG_PHYSICAL      (1 <<  9)
  69#define MTHCA_MPT_FLAG_REGION        (1 <<  8)
  70
  71#define MTHCA_MTT_FLAG_PRESENT       1
  72
  73#define MTHCA_MPT_STATUS_SW 0xF0
  74#define MTHCA_MPT_STATUS_HW 0x00
  75
  76#define SINAI_FMR_KEY_INC 0x1000000
  77
  78/*
  79 * Buddy allocator for MTT segments (currently not very efficient
  80 * since it doesn't keep a free list and just searches linearly
  81 * through the bitmaps)
  82 */
  83
  84static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
  85{
  86        int o;
  87        int m;
  88        u32 seg;
  89
  90        spin_lock(&buddy->lock);
  91
  92        for (o = order; o <= buddy->max_order; ++o)
  93                if (buddy->num_free[o]) {
  94                        m = 1 << (buddy->max_order - o);
  95                        seg = find_first_bit(buddy->bits[o], m);
  96                        if (seg < m)
  97                                goto found;
  98                }
  99
 100        spin_unlock(&buddy->lock);
 101        return -1;
 102
 103 found:
 104        clear_bit(seg, buddy->bits[o]);
 105        --buddy->num_free[o];
 106
 107        while (o > order) {
 108                --o;
 109                seg <<= 1;
 110                set_bit(seg ^ 1, buddy->bits[o]);
 111                ++buddy->num_free[o];
 112        }
 113
 114        spin_unlock(&buddy->lock);
 115
 116        seg <<= order;
 117
 118        return seg;
 119}
 120
 121static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
 122{
 123        seg >>= order;
 124
 125        spin_lock(&buddy->lock);
 126
 127        while (test_bit(seg ^ 1, buddy->bits[order])) {
 128                clear_bit(seg ^ 1, buddy->bits[order]);
 129                --buddy->num_free[order];
 130                seg >>= 1;
 131                ++order;
 132        }
 133
 134        set_bit(seg, buddy->bits[order]);
 135        ++buddy->num_free[order];
 136
 137        spin_unlock(&buddy->lock);
 138}
 139
 140static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
 141{
 142        int i, s;
 143
 144        buddy->max_order = max_order;
 145        spin_lock_init(&buddy->lock);
 146
 147        buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
 148                              GFP_KERNEL);
 149        buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *),
 150                                  GFP_KERNEL);
 151        if (!buddy->bits || !buddy->num_free)
 152                goto err_out;
 153
 154        for (i = 0; i <= buddy->max_order; ++i) {
 155                s = BITS_TO_LONGS(1 << (buddy->max_order - i));
 156                buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
 157                if (!buddy->bits[i])
 158                        goto err_out_free;
 159                bitmap_zero(buddy->bits[i],
 160                            1 << (buddy->max_order - i));
 161        }
 162
 163        set_bit(0, buddy->bits[buddy->max_order]);
 164        buddy->num_free[buddy->max_order] = 1;
 165
 166        return 0;
 167
 168err_out_free:
 169        for (i = 0; i <= buddy->max_order; ++i)
 170                kfree(buddy->bits[i]);
 171
 172err_out:
 173        kfree(buddy->bits);
 174        kfree(buddy->num_free);
 175
 176        return -ENOMEM;
 177}
 178
 179static void mthca_buddy_cleanup(struct mthca_buddy *buddy)
 180{
 181        int i;
 182
 183        for (i = 0; i <= buddy->max_order; ++i)
 184                kfree(buddy->bits[i]);
 185
 186        kfree(buddy->bits);
 187        kfree(buddy->num_free);
 188}
 189
 190static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
 191                                 struct mthca_buddy *buddy)
 192{
 193        u32 seg = mthca_buddy_alloc(buddy, order);
 194
 195        if (seg == -1)
 196                return -1;
 197
 198        if (mthca_is_memfree(dev))
 199                if (mthca_table_get_range(dev, dev->mr_table.mtt_table, seg,
 200                                          seg + (1 << order) - 1)) {
 201                        mthca_buddy_free(buddy, seg, order);
 202                        seg = -1;
 203                }
 204
 205        return seg;
 206}
 207
 208static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size,
 209                                           struct mthca_buddy *buddy)
 210{
 211        struct mthca_mtt *mtt;
 212        int i;
 213
 214        if (size <= 0)
 215                return ERR_PTR(-EINVAL);
 216
 217        mtt = kmalloc(sizeof *mtt, GFP_KERNEL);
 218        if (!mtt)
 219                return ERR_PTR(-ENOMEM);
 220
 221        mtt->buddy = buddy;
 222        mtt->order = 0;
 223        for (i = dev->limits.mtt_seg_size / 8; i < size; i <<= 1)
 224                ++mtt->order;
 225
 226        mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
 227        if (mtt->first_seg == -1) {
 228                kfree(mtt);
 229                return ERR_PTR(-ENOMEM);
 230        }
 231
 232        return mtt;
 233}
 234
 235struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size)
 236{
 237        return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy);
 238}
 239
 240void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt)
 241{
 242        if (!mtt)
 243                return;
 244
 245        mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order);
 246
 247        mthca_table_put_range(dev, dev->mr_table.mtt_table,
 248                              mtt->first_seg,
 249                              mtt->first_seg + (1 << mtt->order) - 1);
 250
 251        kfree(mtt);
 252}
 253
 254static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
 255                             int start_index, u64 *buffer_list, int list_len)
 256{
 257        struct mthca_mailbox *mailbox;
 258        __be64 *mtt_entry;
 259        int err = 0;
 260        u8 status;
 261        int i;
 262
 263        mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 264        if (IS_ERR(mailbox))
 265                return PTR_ERR(mailbox);
 266        mtt_entry = mailbox->buf;
 267
 268        while (list_len > 0) {
 269                mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
 270                                           mtt->first_seg * dev->limits.mtt_seg_size +
 271                                           start_index * 8);
 272                mtt_entry[1] = 0;
 273                for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
 274                        mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
 275                                                       MTHCA_MTT_FLAG_PRESENT);
 276
 277                /*
 278                 * If we have an odd number of entries to write, add
 279                 * one more dummy entry for firmware efficiency.
 280                 */
 281                if (i & 1)
 282                        mtt_entry[i + 2] = 0;
 283
 284                err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1, &status);
 285                if (err) {
 286                        mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
 287                        goto out;
 288                }
 289                if (status) {
 290                        mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
 291                                   status);
 292                        err = -EINVAL;
 293                        goto out;
 294                }
 295
 296                list_len    -= i;
 297                start_index += i;
 298                buffer_list += i;
 299        }
 300
 301out:
 302        mthca_free_mailbox(dev, mailbox);
 303        return err;
 304}
 305
 306int mthca_write_mtt_size(struct mthca_dev *dev)
 307{
 308        if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
 309            !(dev->mthca_flags & MTHCA_FLAG_FMR))
 310                /*
 311                 * Be friendly to WRITE_MTT command
 312                 * and leave two empty slots for the
 313                 * index and reserved fields of the
 314                 * mailbox.
 315                 */
 316                return PAGE_SIZE / sizeof (u64) - 2;
 317
 318        /* For Arbel, all MTTs must fit in the same page. */
 319        return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff;
 320}
 321
 322static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev,
 323                                      struct mthca_mtt *mtt, int start_index,
 324                                      u64 *buffer_list, int list_len)
 325{
 326        u64 __iomem *mtts;
 327        int i;
 328
 329        mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * dev->limits.mtt_seg_size +
 330                start_index * sizeof (u64);
 331        for (i = 0; i < list_len; ++i)
 332                mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT),
 333                                  mtts + i);
 334}
 335
 336static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev,
 337                                      struct mthca_mtt *mtt, int start_index,
 338                                      u64 *buffer_list, int list_len)
 339{
 340        __be64 *mtts;
 341        dma_addr_t dma_handle;
 342        int i;
 343        int s = start_index * sizeof (u64);
 344
 345        /* For Arbel, all MTTs must fit in the same page. */
 346        BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE);
 347        /* Require full segments */
 348        BUG_ON(s % dev->limits.mtt_seg_size);
 349
 350        mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg +
 351                                s / dev->limits.mtt_seg_size, &dma_handle);
 352
 353        BUG_ON(!mtts);
 354
 355        dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle,
 356                                list_len * sizeof (u64), DMA_TO_DEVICE);
 357
 358        for (i = 0; i < list_len; ++i)
 359                mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT);
 360
 361        dma_sync_single_for_device(&dev->pdev->dev, dma_handle,
 362                                   list_len * sizeof (u64), DMA_TO_DEVICE);
 363}
 364
 365int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
 366                    int start_index, u64 *buffer_list, int list_len)
 367{
 368        int size = mthca_write_mtt_size(dev);
 369        int chunk;
 370
 371        if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
 372            !(dev->mthca_flags & MTHCA_FLAG_FMR))
 373                return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
 374
 375        while (list_len > 0) {
 376                chunk = min(size, list_len);
 377                if (mthca_is_memfree(dev))
 378                        mthca_arbel_write_mtt_seg(dev, mtt, start_index,
 379                                                  buffer_list, chunk);
 380                else
 381                        mthca_tavor_write_mtt_seg(dev, mtt, start_index,
 382                                                  buffer_list, chunk);
 383
 384                list_len    -= chunk;
 385                start_index += chunk;
 386                buffer_list += chunk;
 387        }
 388
 389        return 0;
 390}
 391
 392static inline u32 tavor_hw_index_to_key(u32 ind)
 393{
 394        return ind;
 395}
 396
 397static inline u32 tavor_key_to_hw_index(u32 key)
 398{
 399        return key;
 400}
 401
 402static inline u32 arbel_hw_index_to_key(u32 ind)
 403{
 404        return (ind >> 24) | (ind << 8);
 405}
 406
 407static inline u32 arbel_key_to_hw_index(u32 key)
 408{
 409        return (key << 24) | (key >> 8);
 410}
 411
 412static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind)
 413{
 414        if (mthca_is_memfree(dev))
 415                return arbel_hw_index_to_key(ind);
 416        else
 417                return tavor_hw_index_to_key(ind);
 418}
 419
 420static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
 421{
 422        if (mthca_is_memfree(dev))
 423                return arbel_key_to_hw_index(key);
 424        else
 425                return tavor_key_to_hw_index(key);
 426}
 427
 428static inline u32 adjust_key(struct mthca_dev *dev, u32 key)
 429{
 430        if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
 431                return ((key << 20) & 0x800000) | (key & 0x7fffff);
 432        else
 433                return key;
 434}
 435
 436int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
 437                   u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
 438{
 439        struct mthca_mailbox *mailbox;
 440        struct mthca_mpt_entry *mpt_entry;
 441        u32 key;
 442        int i;
 443        int err;
 444        u8 status;
 445
 446        WARN_ON(buffer_size_shift >= 32);
 447
 448        key = mthca_alloc(&dev->mr_table.mpt_alloc);
 449        if (key == -1)
 450                return -ENOMEM;
 451        key = adjust_key(dev, key);
 452        mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
 453
 454        if (mthca_is_memfree(dev)) {
 455                err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
 456                if (err)
 457                        goto err_out_mpt_free;
 458        }
 459
 460        mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 461        if (IS_ERR(mailbox)) {
 462                err = PTR_ERR(mailbox);
 463                goto err_out_table;
 464        }
 465        mpt_entry = mailbox->buf;
 466
 467        mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
 468                                       MTHCA_MPT_FLAG_MIO         |
 469                                       MTHCA_MPT_FLAG_REGION      |
 470                                       access);
 471        if (!mr->mtt)
 472                mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL);
 473
 474        mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
 475        mpt_entry->key       = cpu_to_be32(key);
 476        mpt_entry->pd        = cpu_to_be32(pd);
 477        mpt_entry->start     = cpu_to_be64(iova);
 478        mpt_entry->length    = cpu_to_be64(total_size);
 479
 480        memset(&mpt_entry->lkey, 0,
 481               sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
 482
 483        if (mr->mtt)
 484                mpt_entry->mtt_seg =
 485                        cpu_to_be64(dev->mr_table.mtt_base +
 486                                    mr->mtt->first_seg * dev->limits.mtt_seg_size);
 487
 488        if (0) {
 489                mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
 490                for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
 491                        if (i % 4 == 0)
 492                                printk("[%02x] ", i * 4);
 493                        printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i]));
 494                        if ((i + 1) % 4 == 0)
 495                                printk("\n");
 496                }
 497        }
 498
 499        err = mthca_SW2HW_MPT(dev, mailbox,
 500                              key & (dev->limits.num_mpts - 1),
 501                              &status);
 502        if (err) {
 503                mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
 504                goto err_out_mailbox;
 505        } else if (status) {
 506                mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
 507                           status);
 508                err = -EINVAL;
 509                goto err_out_mailbox;
 510        }
 511
 512        mthca_free_mailbox(dev, mailbox);
 513        return err;
 514
 515err_out_mailbox:
 516        mthca_free_mailbox(dev, mailbox);
 517
 518err_out_table:
 519        mthca_table_put(dev, dev->mr_table.mpt_table, key);
 520
 521err_out_mpt_free:
 522        mthca_free(&dev->mr_table.mpt_alloc, key);
 523        return err;
 524}
 525
 526int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
 527                           u32 access, struct mthca_mr *mr)
 528{
 529        mr->mtt = NULL;
 530        return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr);
 531}
 532
 533int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
 534                        u64 *buffer_list, int buffer_size_shift,
 535                        int list_len, u64 iova, u64 total_size,
 536                        u32 access, struct mthca_mr *mr)
 537{
 538        int err;
 539
 540        mr->mtt = mthca_alloc_mtt(dev, list_len);
 541        if (IS_ERR(mr->mtt))
 542                return PTR_ERR(mr->mtt);
 543
 544        err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len);
 545        if (err) {
 546                mthca_free_mtt(dev, mr->mtt);
 547                return err;
 548        }
 549
 550        err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova,
 551                             total_size, access, mr);
 552        if (err)
 553                mthca_free_mtt(dev, mr->mtt);
 554
 555        return err;
 556}
 557
 558/* Free mr or fmr */
 559static void mthca_free_region(struct mthca_dev *dev, u32 lkey)
 560{
 561        mthca_table_put(dev, dev->mr_table.mpt_table,
 562                        key_to_hw_index(dev, lkey));
 563
 564        mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
 565}
 566
 567void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
 568{
 569        int err;
 570        u8 status;
 571
 572        err = mthca_HW2SW_MPT(dev, NULL,
 573                              key_to_hw_index(dev, mr->ibmr.lkey) &
 574                              (dev->limits.num_mpts - 1),
 575                              &status);
 576        if (err)
 577                mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err);
 578        else if (status)
 579                mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
 580                           status);
 581
 582        mthca_free_region(dev, mr->ibmr.lkey);
 583        mthca_free_mtt(dev, mr->mtt);
 584}
 585
 586int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
 587                    u32 access, struct mthca_fmr *mr)
 588{
 589        struct mthca_mpt_entry *mpt_entry;
 590        struct mthca_mailbox *mailbox;
 591        u64 mtt_seg;
 592        u32 key, idx;
 593        u8 status;
 594        int list_len = mr->attr.max_pages;
 595        int err = -ENOMEM;
 596        int i;
 597
 598        if (mr->attr.page_shift < 12 || mr->attr.page_shift >= 32)
 599                return -EINVAL;
 600
 601        /* For Arbel, all MTTs must fit in the same page. */
 602        if (mthca_is_memfree(dev) &&
 603            mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE)
 604                return -EINVAL;
 605
 606        mr->maps = 0;
 607
 608        key = mthca_alloc(&dev->mr_table.mpt_alloc);
 609        if (key == -1)
 610                return -ENOMEM;
 611        key = adjust_key(dev, key);
 612
 613        idx = key & (dev->limits.num_mpts - 1);
 614        mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
 615
 616        if (mthca_is_memfree(dev)) {
 617                err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
 618                if (err)
 619                        goto err_out_mpt_free;
 620
 621                mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key, NULL);
 622                BUG_ON(!mr->mem.arbel.mpt);
 623        } else
 624                mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
 625                        sizeof *(mr->mem.tavor.mpt) * idx;
 626
 627        mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
 628        if (IS_ERR(mr->mtt)) {
 629                err = PTR_ERR(mr->mtt);
 630                goto err_out_table;
 631        }
 632
 633        mtt_seg = mr->mtt->first_seg * dev->limits.mtt_seg_size;
 634
 635        if (mthca_is_memfree(dev)) {
 636                mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
 637                                                      mr->mtt->first_seg,
 638                                                      &mr->mem.arbel.dma_handle);
 639                BUG_ON(!mr->mem.arbel.mtts);
 640        } else
 641                mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
 642
 643        mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 644        if (IS_ERR(mailbox)) {
 645                err = PTR_ERR(mailbox);
 646                goto err_out_free_mtt;
 647        }
 648
 649        mpt_entry = mailbox->buf;
 650
 651        mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
 652                                       MTHCA_MPT_FLAG_MIO         |
 653                                       MTHCA_MPT_FLAG_REGION      |
 654                                       access);
 655
 656        mpt_entry->page_size = cpu_to_be32(mr->attr.page_shift - 12);
 657        mpt_entry->key       = cpu_to_be32(key);
 658        mpt_entry->pd        = cpu_to_be32(pd);
 659        memset(&mpt_entry->start, 0,
 660               sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start));
 661        mpt_entry->mtt_seg   = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg);
 662
 663        if (0) {
 664                mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
 665                for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
 666                        if (i % 4 == 0)
 667                                printk("[%02x] ", i * 4);
 668                        printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i]));
 669                        if ((i + 1) % 4 == 0)
 670                                printk("\n");
 671                }
 672        }
 673
 674        err = mthca_SW2HW_MPT(dev, mailbox,
 675                              key & (dev->limits.num_mpts - 1),
 676                              &status);
 677        if (err) {
 678                mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
 679                goto err_out_mailbox_free;
 680        }
 681        if (status) {
 682                mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
 683                           status);
 684                err = -EINVAL;
 685                goto err_out_mailbox_free;
 686        }
 687
 688        mthca_free_mailbox(dev, mailbox);
 689        return 0;
 690
 691err_out_mailbox_free:
 692        mthca_free_mailbox(dev, mailbox);
 693
 694err_out_free_mtt:
 695        mthca_free_mtt(dev, mr->mtt);
 696
 697err_out_table:
 698        mthca_table_put(dev, dev->mr_table.mpt_table, key);
 699
 700err_out_mpt_free:
 701        mthca_free(&dev->mr_table.mpt_alloc, key);
 702        return err;
 703}
 704
 705int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
 706{
 707        if (fmr->maps)
 708                return -EBUSY;
 709
 710        mthca_free_region(dev, fmr->ibmr.lkey);
 711        mthca_free_mtt(dev, fmr->mtt);
 712
 713        return 0;
 714}
 715
 716static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
 717                                  int list_len, u64 iova)
 718{
 719        int i, page_mask;
 720
 721        if (list_len > fmr->attr.max_pages)
 722                return -EINVAL;
 723
 724        page_mask = (1 << fmr->attr.page_shift) - 1;
 725
 726        /* We are getting page lists, so va must be page aligned. */
 727        if (iova & page_mask)
 728                return -EINVAL;
 729
 730        /* Trust the user not to pass misaligned data in page_list */
 731        if (0)
 732                for (i = 0; i < list_len; ++i) {
 733                        if (page_list[i] & ~page_mask)
 734                                return -EINVAL;
 735                }
 736
 737        if (fmr->maps >= fmr->attr.max_maps)
 738                return -EINVAL;
 739
 740        return 0;
 741}
 742
 743
 744int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
 745                             int list_len, u64 iova)
 746{
 747        struct mthca_fmr *fmr = to_mfmr(ibfmr);
 748        struct mthca_dev *dev = to_mdev(ibfmr->device);
 749        struct mthca_mpt_entry mpt_entry;
 750        u32 key;
 751        int i, err;
 752
 753        err = mthca_check_fmr(fmr, page_list, list_len, iova);
 754        if (err)
 755                return err;
 756
 757        ++fmr->maps;
 758
 759        key = tavor_key_to_hw_index(fmr->ibmr.lkey);
 760        key += dev->limits.num_mpts;
 761        fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
 762
 763        writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
 764
 765        for (i = 0; i < list_len; ++i) {
 766                __be64 mtt_entry = cpu_to_be64(page_list[i] |
 767                                               MTHCA_MTT_FLAG_PRESENT);
 768                mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i);
 769        }
 770
 771        mpt_entry.lkey   = cpu_to_be32(key);
 772        mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
 773        mpt_entry.start  = cpu_to_be64(iova);
 774
 775        __raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
 776        memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start,
 777                    offsetof(struct mthca_mpt_entry, window_count) -
 778                    offsetof(struct mthca_mpt_entry, start));
 779
 780        writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt);
 781
 782        return 0;
 783}
 784
 785int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
 786                             int list_len, u64 iova)
 787{
 788        struct mthca_fmr *fmr = to_mfmr(ibfmr);
 789        struct mthca_dev *dev = to_mdev(ibfmr->device);
 790        u32 key;
 791        int i, err;
 792
 793        err = mthca_check_fmr(fmr, page_list, list_len, iova);
 794        if (err)
 795                return err;
 796
 797        ++fmr->maps;
 798
 799        key = arbel_key_to_hw_index(fmr->ibmr.lkey);
 800        if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
 801                key += SINAI_FMR_KEY_INC;
 802        else
 803                key += dev->limits.num_mpts;
 804        fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
 805
 806        *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
 807
 808        wmb();
 809
 810        dma_sync_single_for_cpu(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
 811                                list_len * sizeof(u64), DMA_TO_DEVICE);
 812
 813        for (i = 0; i < list_len; ++i)
 814                fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
 815                                                     MTHCA_MTT_FLAG_PRESENT);
 816
 817        dma_sync_single_for_device(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
 818                                   list_len * sizeof(u64), DMA_TO_DEVICE);
 819
 820        fmr->mem.arbel.mpt->key    = cpu_to_be32(key);
 821        fmr->mem.arbel.mpt->lkey   = cpu_to_be32(key);
 822        fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
 823        fmr->mem.arbel.mpt->start  = cpu_to_be64(iova);
 824
 825        wmb();
 826
 827        *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW;
 828
 829        wmb();
 830
 831        return 0;
 832}
 833
 834void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
 835{
 836        if (!fmr->maps)
 837                return;
 838
 839        fmr->maps = 0;
 840
 841        writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
 842}
 843
 844void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
 845{
 846        if (!fmr->maps)
 847                return;
 848
 849        fmr->maps = 0;
 850
 851        *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
 852}
 853
 854int mthca_init_mr_table(struct mthca_dev *dev)
 855{
 856        unsigned long addr;
 857        int mpts, mtts, err, i;
 858
 859        err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
 860                               dev->limits.num_mpts,
 861                               ~0, dev->limits.reserved_mrws);
 862        if (err)
 863                return err;
 864
 865        if (!mthca_is_memfree(dev) &&
 866            (dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN))
 867                dev->limits.fmr_reserved_mtts = 0;
 868        else
 869                dev->mthca_flags |= MTHCA_FLAG_FMR;
 870
 871        if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
 872                mthca_dbg(dev, "Memory key throughput optimization activated.\n");
 873
 874        err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
 875                               fls(dev->limits.num_mtt_segs - 1));
 876
 877        if (err)
 878                goto err_mtt_buddy;
 879
 880        dev->mr_table.tavor_fmr.mpt_base = NULL;
 881        dev->mr_table.tavor_fmr.mtt_base = NULL;
 882
 883        if (dev->limits.fmr_reserved_mtts) {
 884                i = fls(dev->limits.fmr_reserved_mtts - 1);
 885
 886                if (i >= 31) {
 887                        mthca_warn(dev, "Unable to reserve 2^31 FMR MTTs.\n");
 888                        err = -EINVAL;
 889                        goto err_fmr_mpt;
 890                }
 891                mpts = mtts = 1 << i;
 892        } else {
 893                mtts = dev->limits.num_mtt_segs;
 894                mpts = dev->limits.num_mpts;
 895        }
 896
 897        if (!mthca_is_memfree(dev) &&
 898            (dev->mthca_flags & MTHCA_FLAG_FMR)) {
 899
 900                addr = pci_resource_start(dev->pdev, 4) +
 901                        ((pci_resource_len(dev->pdev, 4) - 1) &
 902                         dev->mr_table.mpt_base);
 903
 904                dev->mr_table.tavor_fmr.mpt_base =
 905                        ioremap(addr, mpts * sizeof(struct mthca_mpt_entry));
 906
 907                if (!dev->mr_table.tavor_fmr.mpt_base) {
 908                        mthca_warn(dev, "MPT ioremap for FMR failed.\n");
 909                        err = -ENOMEM;
 910                        goto err_fmr_mpt;
 911                }
 912
 913                addr = pci_resource_start(dev->pdev, 4) +
 914                        ((pci_resource_len(dev->pdev, 4) - 1) &
 915                         dev->mr_table.mtt_base);
 916
 917                dev->mr_table.tavor_fmr.mtt_base =
 918                        ioremap(addr, mtts * dev->limits.mtt_seg_size);
 919                if (!dev->mr_table.tavor_fmr.mtt_base) {
 920                        mthca_warn(dev, "MTT ioremap for FMR failed.\n");
 921                        err = -ENOMEM;
 922                        goto err_fmr_mtt;
 923                }
 924        }
 925
 926        if (dev->limits.fmr_reserved_mtts) {
 927                err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, fls(mtts - 1));
 928                if (err)
 929                        goto err_fmr_mtt_buddy;
 930
 931                /* Prevent regular MRs from using FMR keys */
 932                err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, fls(mtts - 1));
 933                if (err)
 934                        goto err_reserve_fmr;
 935
 936                dev->mr_table.fmr_mtt_buddy =
 937                        &dev->mr_table.tavor_fmr.mtt_buddy;
 938        } else
 939                dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy;
 940
 941        /* FMR table is always the first, take reserved MTTs out of there */
 942        if (dev->limits.reserved_mtts) {
 943                i = fls(dev->limits.reserved_mtts - 1);
 944
 945                if (mthca_alloc_mtt_range(dev, i,
 946                                          dev->mr_table.fmr_mtt_buddy) == -1) {
 947                        mthca_warn(dev, "MTT table of order %d is too small.\n",
 948                                  dev->mr_table.fmr_mtt_buddy->max_order);
 949                        err = -ENOMEM;
 950                        goto err_reserve_mtts;
 951                }
 952        }
 953
 954        return 0;
 955
 956err_reserve_mtts:
 957err_reserve_fmr:
 958        if (dev->limits.fmr_reserved_mtts)
 959                mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
 960
 961err_fmr_mtt_buddy:
 962        if (dev->mr_table.tavor_fmr.mtt_base)
 963                iounmap(dev->mr_table.tavor_fmr.mtt_base);
 964
 965err_fmr_mtt:
 966        if (dev->mr_table.tavor_fmr.mpt_base)
 967                iounmap(dev->mr_table.tavor_fmr.mpt_base);
 968
 969err_fmr_mpt:
 970        mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
 971
 972err_mtt_buddy:
 973        mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
 974
 975        return err;
 976}
 977
 978void mthca_cleanup_mr_table(struct mthca_dev *dev)
 979{
 980        /* XXX check if any MRs are still allocated? */
 981        if (dev->limits.fmr_reserved_mtts)
 982                mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
 983
 984        mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
 985
 986        if (dev->mr_table.tavor_fmr.mtt_base)
 987                iounmap(dev->mr_table.tavor_fmr.mtt_base);
 988        if (dev->mr_table.tavor_fmr.mpt_base)
 989                iounmap(dev->mr_table.tavor_fmr.mpt_base);
 990
 991        mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
 992}
 993