linux/drivers/infiniband/hw/ehca/ehca_mrmw.c
<<
>>
Prefs
   1/*
   2 *  IBM eServer eHCA Infiniband device driver for Linux on POWER
   3 *
   4 *  MR/MW functions
   5 *
   6 *  Authors: Dietmar Decker <ddecker@de.ibm.com>
   7 *           Christoph Raisch <raisch@de.ibm.com>
   8 *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
   9 *
  10 *  Copyright (c) 2005 IBM Corporation
  11 *
  12 *  All rights reserved.
  13 *
  14 *  This source code is distributed under a dual license of GPL v2.0 and OpenIB
  15 *  BSD.
  16 *
  17 * OpenIB BSD License
  18 *
  19 * Redistribution and use in source and binary forms, with or without
  20 * modification, are permitted provided that the following conditions are met:
  21 *
  22 * Redistributions of source code must retain the above copyright notice, this
  23 * list of conditions and the following disclaimer.
  24 *
  25 * Redistributions in binary form must reproduce the above copyright notice,
  26 * this list of conditions and the following disclaimer in the documentation
  27 * and/or other materials
  28 * provided with the distribution.
  29 *
  30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  31 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  34 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  37 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  38 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  40 * POSSIBILITY OF SUCH DAMAGE.
  41 */
  42
  43#include <linux/slab.h>
  44#include <rdma/ib_umem.h>
  45
  46#include "ehca_iverbs.h"
  47#include "ehca_mrmw.h"
  48#include "hcp_if.h"
  49#include "hipz_hw.h"
  50
  51#define NUM_CHUNKS(length, chunk_size) \
  52        (((length) + (chunk_size - 1)) / (chunk_size))
  53
  54/* max number of rpages (per hcall register_rpages) */
  55#define MAX_RPAGES 512
  56
  57/* DMEM toleration management */
  58#define EHCA_SECTSHIFT        SECTION_SIZE_BITS
  59#define EHCA_SECTSIZE          (1UL << EHCA_SECTSHIFT)
  60#define EHCA_HUGEPAGESHIFT     34
  61#define EHCA_HUGEPAGE_SIZE     (1UL << EHCA_HUGEPAGESHIFT)
  62#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT)
  63#define EHCA_INVAL_ADDR        0xFFFFFFFFFFFFFFFFULL
  64#define EHCA_DIR_INDEX_SHIFT 13                   /* 8k Entries in 64k block */
  65#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2)
  66#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT)
  67#define EHCA_TOP_MAP_SIZE (0x10000)               /* currently fixed map size */
  68#define EHCA_DIR_MAP_SIZE (0x10000)
  69#define EHCA_ENT_MAP_SIZE (0x10000)
  70#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1)
  71
  72static unsigned long ehca_mr_len;
  73
  74/*
  75 * Memory map data structures
  76 */
  77struct ehca_dir_bmap {
  78        u64 ent[EHCA_MAP_ENTRIES];
  79};
  80struct ehca_top_bmap {
  81        struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES];
  82};
  83struct ehca_bmap {
  84        struct ehca_top_bmap *top[EHCA_MAP_ENTRIES];
  85};
  86
  87static struct ehca_bmap *ehca_bmap;
  88
  89static struct kmem_cache *mr_cache;
  90static struct kmem_cache *mw_cache;
  91
  92enum ehca_mr_pgsize {
  93        EHCA_MR_PGSIZE4K  = 0x1000L,
  94        EHCA_MR_PGSIZE64K = 0x10000L,
  95        EHCA_MR_PGSIZE1M  = 0x100000L,
  96        EHCA_MR_PGSIZE16M = 0x1000000L
  97};
  98
  99#define EHCA_MR_PGSHIFT4K  12
 100#define EHCA_MR_PGSHIFT64K 16
 101#define EHCA_MR_PGSHIFT1M  20
 102#define EHCA_MR_PGSHIFT16M 24
 103
 104static u64 ehca_map_vaddr(void *caddr);
 105
 106static u32 ehca_encode_hwpage_size(u32 pgsize)
 107{
 108        int log = ilog2(pgsize);
 109        WARN_ON(log < 12 || log > 24 || log & 3);
 110        return (log - 12) / 4;
 111}
 112
 113static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
 114{
 115        return 1UL << ilog2(shca->hca_cap_mr_pgsize);
 116}
 117
 118static struct ehca_mr *ehca_mr_new(void)
 119{
 120        struct ehca_mr *me;
 121
 122        me = kmem_cache_zalloc(mr_cache, GFP_KERNEL);
 123        if (me)
 124                spin_lock_init(&me->mrlock);
 125        else
 126                ehca_gen_err("alloc failed");
 127
 128        return me;
 129}
 130
 131static void ehca_mr_delete(struct ehca_mr *me)
 132{
 133        kmem_cache_free(mr_cache, me);
 134}
 135
 136static struct ehca_mw *ehca_mw_new(void)
 137{
 138        struct ehca_mw *me;
 139
 140        me = kmem_cache_zalloc(mw_cache, GFP_KERNEL);
 141        if (me)
 142                spin_lock_init(&me->mwlock);
 143        else
 144                ehca_gen_err("alloc failed");
 145
 146        return me;
 147}
 148
 149static void ehca_mw_delete(struct ehca_mw *me)
 150{
 151        kmem_cache_free(mw_cache, me);
 152}
 153
 154/*----------------------------------------------------------------------*/
 155
 156struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
 157{
 158        struct ib_mr *ib_mr;
 159        int ret;
 160        struct ehca_mr *e_maxmr;
 161        struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
 162        struct ehca_shca *shca =
 163                container_of(pd->device, struct ehca_shca, ib_device);
 164
 165        if (shca->maxmr) {
 166                e_maxmr = ehca_mr_new();
 167                if (!e_maxmr) {
 168                        ehca_err(&shca->ib_device, "out of memory");
 169                        ib_mr = ERR_PTR(-ENOMEM);
 170                        goto get_dma_mr_exit0;
 171                }
 172
 173                ret = ehca_reg_maxmr(shca, e_maxmr,
 174                                     (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)),
 175                                     mr_access_flags, e_pd,
 176                                     &e_maxmr->ib.ib_mr.lkey,
 177                                     &e_maxmr->ib.ib_mr.rkey);
 178                if (ret) {
 179                        ehca_mr_delete(e_maxmr);
 180                        ib_mr = ERR_PTR(ret);
 181                        goto get_dma_mr_exit0;
 182                }
 183                ib_mr = &e_maxmr->ib.ib_mr;
 184        } else {
 185                ehca_err(&shca->ib_device, "no internal max-MR exist!");
 186                ib_mr = ERR_PTR(-EINVAL);
 187                goto get_dma_mr_exit0;
 188        }
 189
 190get_dma_mr_exit0:
 191        if (IS_ERR(ib_mr))
 192                ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x",
 193                         PTR_ERR(ib_mr), pd, mr_access_flags);
 194        return ib_mr;
 195} /* end ehca_get_dma_mr() */
 196
 197/*----------------------------------------------------------------------*/
 198
 199struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
 200                               struct ib_phys_buf *phys_buf_array,
 201                               int num_phys_buf,
 202                               int mr_access_flags,
 203                               u64 *iova_start)
 204{
 205        struct ib_mr *ib_mr;
 206        int ret;
 207        struct ehca_mr *e_mr;
 208        struct ehca_shca *shca =
 209                container_of(pd->device, struct ehca_shca, ib_device);
 210        struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
 211
 212        u64 size;
 213
 214        if ((num_phys_buf <= 0) || !phys_buf_array) {
 215                ehca_err(pd->device, "bad input values: num_phys_buf=%x "
 216                         "phys_buf_array=%p", num_phys_buf, phys_buf_array);
 217                ib_mr = ERR_PTR(-EINVAL);
 218                goto reg_phys_mr_exit0;
 219        }
 220        if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
 221             !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
 222            ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
 223             !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
 224                /*
 225                 * Remote Write Access requires Local Write Access
 226                 * Remote Atomic Access requires Local Write Access
 227                 */
 228                ehca_err(pd->device, "bad input values: mr_access_flags=%x",
 229                         mr_access_flags);
 230                ib_mr = ERR_PTR(-EINVAL);
 231                goto reg_phys_mr_exit0;
 232        }
 233
 234        /* check physical buffer list and calculate size */
 235        ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
 236                                            iova_start, &size);
 237        if (ret) {
 238                ib_mr = ERR_PTR(ret);
 239                goto reg_phys_mr_exit0;
 240        }
 241        if ((size == 0) ||
 242            (((u64)iova_start + size) < (u64)iova_start)) {
 243                ehca_err(pd->device, "bad input values: size=%llx iova_start=%p",
 244                         size, iova_start);
 245                ib_mr = ERR_PTR(-EINVAL);
 246                goto reg_phys_mr_exit0;
 247        }
 248
 249        e_mr = ehca_mr_new();
 250        if (!e_mr) {
 251                ehca_err(pd->device, "out of memory");
 252                ib_mr = ERR_PTR(-ENOMEM);
 253                goto reg_phys_mr_exit0;
 254        }
 255
 256        /* register MR on HCA */
 257        if (ehca_mr_is_maxmr(size, iova_start)) {
 258                e_mr->flags |= EHCA_MR_FLAG_MAXMR;
 259                ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
 260                                     e_pd, &e_mr->ib.ib_mr.lkey,
 261                                     &e_mr->ib.ib_mr.rkey);
 262                if (ret) {
 263                        ib_mr = ERR_PTR(ret);
 264                        goto reg_phys_mr_exit1;
 265                }
 266        } else {
 267                struct ehca_mr_pginfo pginfo;
 268                u32 num_kpages;
 269                u32 num_hwpages;
 270                u64 hw_pgsize;
 271
 272                num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size,
 273                                        PAGE_SIZE);
 274                /* for kernel space we try most possible pgsize */
 275                hw_pgsize = ehca_get_max_hwpage_size(shca);
 276                num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size,
 277                                         hw_pgsize);
 278                memset(&pginfo, 0, sizeof(pginfo));
 279                pginfo.type = EHCA_MR_PGI_PHYS;
 280                pginfo.num_kpages = num_kpages;
 281                pginfo.hwpage_size = hw_pgsize;
 282                pginfo.num_hwpages = num_hwpages;
 283                pginfo.u.phy.num_phys_buf = num_phys_buf;
 284                pginfo.u.phy.phys_buf_array = phys_buf_array;
 285                pginfo.next_hwpage =
 286                        ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
 287
 288                ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
 289                                  e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
 290                                  &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
 291                if (ret) {
 292                        ib_mr = ERR_PTR(ret);
 293                        goto reg_phys_mr_exit1;
 294                }
 295        }
 296
 297        /* successful registration of all pages */
 298        return &e_mr->ib.ib_mr;
 299
 300reg_phys_mr_exit1:
 301        ehca_mr_delete(e_mr);
 302reg_phys_mr_exit0:
 303        if (IS_ERR(ib_mr))
 304                ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
 305                         "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
 306                         PTR_ERR(ib_mr), pd, phys_buf_array,
 307                         num_phys_buf, mr_access_flags, iova_start);
 308        return ib_mr;
 309} /* end ehca_reg_phys_mr() */
 310
 311/*----------------------------------------------------------------------*/
 312
 313struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 314                               u64 virt, int mr_access_flags,
 315                               struct ib_udata *udata)
 316{
 317        struct ib_mr *ib_mr;
 318        struct ehca_mr *e_mr;
 319        struct ehca_shca *shca =
 320                container_of(pd->device, struct ehca_shca, ib_device);
 321        struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
 322        struct ehca_mr_pginfo pginfo;
 323        int ret, page_shift;
 324        u32 num_kpages;
 325        u32 num_hwpages;
 326        u64 hwpage_size;
 327
 328        if (!pd) {
 329                ehca_gen_err("bad pd=%p", pd);
 330                return ERR_PTR(-EFAULT);
 331        }
 332
 333        if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
 334             !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
 335            ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
 336             !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
 337                /*
 338                 * Remote Write Access requires Local Write Access
 339                 * Remote Atomic Access requires Local Write Access
 340                 */
 341                ehca_err(pd->device, "bad input values: mr_access_flags=%x",
 342                         mr_access_flags);
 343                ib_mr = ERR_PTR(-EINVAL);
 344                goto reg_user_mr_exit0;
 345        }
 346
 347        if (length == 0 || virt + length < virt) {
 348                ehca_err(pd->device, "bad input values: length=%llx "
 349                         "virt_base=%llx", length, virt);
 350                ib_mr = ERR_PTR(-EINVAL);
 351                goto reg_user_mr_exit0;
 352        }
 353
 354        e_mr = ehca_mr_new();
 355        if (!e_mr) {
 356                ehca_err(pd->device, "out of memory");
 357                ib_mr = ERR_PTR(-ENOMEM);
 358                goto reg_user_mr_exit0;
 359        }
 360
 361        e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
 362                                 mr_access_flags, 0);
 363        if (IS_ERR(e_mr->umem)) {
 364                ib_mr = (void *)e_mr->umem;
 365                goto reg_user_mr_exit1;
 366        }
 367
 368        if (e_mr->umem->page_size != PAGE_SIZE) {
 369                ehca_err(pd->device, "page size not supported, "
 370                         "e_mr->umem->page_size=%x", e_mr->umem->page_size);
 371                ib_mr = ERR_PTR(-EINVAL);
 372                goto reg_user_mr_exit2;
 373        }
 374
 375        /* determine number of MR pages */
 376        num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE);
 377        /* select proper hw_pgsize */
 378        page_shift = PAGE_SHIFT;
 379        if (e_mr->umem->hugetlb) {
 380                /* determine page_shift, clamp between 4K and 16M */
 381                page_shift = (fls64(length - 1) + 3) & ~3;
 382                page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
 383                                 EHCA_MR_PGSHIFT16M);
 384        }
 385        hwpage_size = 1UL << page_shift;
 386
 387        /* now that we have the desired page size, shift until it's
 388         * supported, too. 4K is always supported, so this terminates.
 389         */
 390        while (!(hwpage_size & shca->hca_cap_mr_pgsize))
 391                hwpage_size >>= 4;
 392
 393reg_user_mr_fallback:
 394        num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size);
 395        /* register MR on HCA */
 396        memset(&pginfo, 0, sizeof(pginfo));
 397        pginfo.type = EHCA_MR_PGI_USER;
 398        pginfo.hwpage_size = hwpage_size;
 399        pginfo.num_kpages = num_kpages;
 400        pginfo.num_hwpages = num_hwpages;
 401        pginfo.u.usr.region = e_mr->umem;
 402        pginfo.next_hwpage = e_mr->umem->offset / hwpage_size;
 403        pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk,
 404                                                     (&e_mr->umem->chunk_list),
 405                                                     list);
 406
 407        ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
 408                          e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
 409                          &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
 410        if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) {
 411                ehca_warn(pd->device, "failed to register mr "
 412                          "with hwpage_size=%llx", hwpage_size);
 413                ehca_info(pd->device, "try to register mr with "
 414                          "kpage_size=%lx", PAGE_SIZE);
 415                /*
 416                 * this means kpages are not contiguous for a hw page
 417                 * try kernel page size as fallback solution
 418                 */
 419                hwpage_size = PAGE_SIZE;
 420                goto reg_user_mr_fallback;
 421        }
 422        if (ret) {
 423                ib_mr = ERR_PTR(ret);
 424                goto reg_user_mr_exit2;
 425        }
 426
 427        /* successful registration of all pages */
 428        return &e_mr->ib.ib_mr;
 429
 430reg_user_mr_exit2:
 431        ib_umem_release(e_mr->umem);
 432reg_user_mr_exit1:
 433        ehca_mr_delete(e_mr);
 434reg_user_mr_exit0:
 435        if (IS_ERR(ib_mr))
 436                ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p",
 437                         PTR_ERR(ib_mr), pd, mr_access_flags, udata);
 438        return ib_mr;
 439} /* end ehca_reg_user_mr() */
 440
 441/*----------------------------------------------------------------------*/
 442
 443int ehca_rereg_phys_mr(struct ib_mr *mr,
 444                       int mr_rereg_mask,
 445                       struct ib_pd *pd,
 446                       struct ib_phys_buf *phys_buf_array,
 447                       int num_phys_buf,
 448                       int mr_access_flags,
 449                       u64 *iova_start)
 450{
 451        int ret;
 452
 453        struct ehca_shca *shca =
 454                container_of(mr->device, struct ehca_shca, ib_device);
 455        struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
 456        u64 new_size;
 457        u64 *new_start;
 458        u32 new_acl;
 459        struct ehca_pd *new_pd;
 460        u32 tmp_lkey, tmp_rkey;
 461        unsigned long sl_flags;
 462        u32 num_kpages = 0;
 463        u32 num_hwpages = 0;
 464        struct ehca_mr_pginfo pginfo;
 465
 466        if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
 467                /* TODO not supported, because PHYP rereg hCall needs pages */
 468                ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
 469                         "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
 470                ret = -EINVAL;
 471                goto rereg_phys_mr_exit0;
 472        }
 473
 474        if (mr_rereg_mask & IB_MR_REREG_PD) {
 475                if (!pd) {
 476                        ehca_err(mr->device, "rereg with bad pd, pd=%p "
 477                                 "mr_rereg_mask=%x", pd, mr_rereg_mask);
 478                        ret = -EINVAL;
 479                        goto rereg_phys_mr_exit0;
 480                }
 481        }
 482
 483        if ((mr_rereg_mask &
 484             ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
 485            (mr_rereg_mask == 0)) {
 486                ret = -EINVAL;
 487                goto rereg_phys_mr_exit0;
 488        }
 489
 490        /* check other parameters */
 491        if (e_mr == shca->maxmr) {
 492                /* should be impossible, however reject to be sure */
 493                ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
 494                         "shca->maxmr=%p mr->lkey=%x",
 495                         mr, shca->maxmr, mr->lkey);
 496                ret = -EINVAL;
 497                goto rereg_phys_mr_exit0;
 498        }
 499        if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
 500                if (e_mr->flags & EHCA_MR_FLAG_FMR) {
 501                        ehca_err(mr->device, "not supported for FMR, mr=%p "
 502                                 "flags=%x", mr, e_mr->flags);
 503                        ret = -EINVAL;
 504                        goto rereg_phys_mr_exit0;
 505                }
 506                if (!phys_buf_array || num_phys_buf <= 0) {
 507                        ehca_err(mr->device, "bad input values mr_rereg_mask=%x"
 508                                 " phys_buf_array=%p num_phys_buf=%x",
 509                                 mr_rereg_mask, phys_buf_array, num_phys_buf);
 510                        ret = -EINVAL;
 511                        goto rereg_phys_mr_exit0;
 512                }
 513        }
 514        if ((mr_rereg_mask & IB_MR_REREG_ACCESS) &&     /* change ACL */
 515            (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
 516              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
 517             ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
 518              !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
 519                /*
 520                 * Remote Write Access requires Local Write Access
 521                 * Remote Atomic Access requires Local Write Access
 522                 */
 523                ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
 524                         "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
 525                ret = -EINVAL;
 526                goto rereg_phys_mr_exit0;
 527        }
 528
 529        /* set requested values dependent on rereg request */
 530        spin_lock_irqsave(&e_mr->mrlock, sl_flags);
 531        new_start = e_mr->start;
 532        new_size = e_mr->size;
 533        new_acl = e_mr->acl;
 534        new_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
 535
 536        if (mr_rereg_mask & IB_MR_REREG_TRANS) {
 537                u64 hw_pgsize = ehca_get_max_hwpage_size(shca);
 538
 539                new_start = iova_start; /* change address */
 540                /* check physical buffer list and calculate size */
 541                ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
 542                                                    num_phys_buf, iova_start,
 543                                                    &new_size);
 544                if (ret)
 545                        goto rereg_phys_mr_exit1;
 546                if ((new_size == 0) ||
 547                    (((u64)iova_start + new_size) < (u64)iova_start)) {
 548                        ehca_err(mr->device, "bad input values: new_size=%llx "
 549                                 "iova_start=%p", new_size, iova_start);
 550                        ret = -EINVAL;
 551                        goto rereg_phys_mr_exit1;
 552                }
 553                num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) +
 554                                        new_size, PAGE_SIZE);
 555                num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) +
 556                                         new_size, hw_pgsize);
 557                memset(&pginfo, 0, sizeof(pginfo));
 558                pginfo.type = EHCA_MR_PGI_PHYS;
 559                pginfo.num_kpages = num_kpages;
 560                pginfo.hwpage_size = hw_pgsize;
 561                pginfo.num_hwpages = num_hwpages;
 562                pginfo.u.phy.num_phys_buf = num_phys_buf;
 563                pginfo.u.phy.phys_buf_array = phys_buf_array;
 564                pginfo.next_hwpage =
 565                        ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
 566        }
 567        if (mr_rereg_mask & IB_MR_REREG_ACCESS)
 568                new_acl = mr_access_flags;
 569        if (mr_rereg_mask & IB_MR_REREG_PD)
 570                new_pd = container_of(pd, struct ehca_pd, ib_pd);
 571
 572        ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
 573                            new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
 574        if (ret)
 575                goto rereg_phys_mr_exit1;
 576
 577        /* successful reregistration */
 578        if (mr_rereg_mask & IB_MR_REREG_PD)
 579                mr->pd = pd;
 580        mr->lkey = tmp_lkey;
 581        mr->rkey = tmp_rkey;
 582
 583rereg_phys_mr_exit1:
 584        spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
 585rereg_phys_mr_exit0:
 586        if (ret)
 587                ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
 588                         "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
 589                         "iova_start=%p",
 590                         ret, mr, mr_rereg_mask, pd, phys_buf_array,
 591                         num_phys_buf, mr_access_flags, iova_start);
 592        return ret;
 593} /* end ehca_rereg_phys_mr() */
 594
 595/*----------------------------------------------------------------------*/
 596
 597int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
 598{
 599        int ret = 0;
 600        u64 h_ret;
 601        struct ehca_shca *shca =
 602                container_of(mr->device, struct ehca_shca, ib_device);
 603        struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
 604        unsigned long sl_flags;
 605        struct ehca_mr_hipzout_parms hipzout;
 606
 607        if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
 608                ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
 609                         "e_mr->flags=%x", mr, e_mr, e_mr->flags);
 610                ret = -EINVAL;
 611                goto query_mr_exit0;
 612        }
 613
 614        memset(mr_attr, 0, sizeof(struct ib_mr_attr));
 615        spin_lock_irqsave(&e_mr->mrlock, sl_flags);
 616
 617        h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
 618        if (h_ret != H_SUCCESS) {
 619                ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p "
 620                         "hca_hndl=%llx mr_hndl=%llx lkey=%x",
 621                         h_ret, mr, shca->ipz_hca_handle.handle,
 622                         e_mr->ipz_mr_handle.handle, mr->lkey);
 623                ret = ehca2ib_return_code(h_ret);
 624                goto query_mr_exit1;
 625        }
 626        mr_attr->pd = mr->pd;
 627        mr_attr->device_virt_addr = hipzout.vaddr;
 628        mr_attr->size = hipzout.len;
 629        mr_attr->lkey = hipzout.lkey;
 630        mr_attr->rkey = hipzout.rkey;
 631        ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
 632
 633query_mr_exit1:
 634        spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
 635query_mr_exit0:
 636        if (ret)
 637                ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
 638                         ret, mr, mr_attr);
 639        return ret;
 640} /* end ehca_query_mr() */
 641
 642/*----------------------------------------------------------------------*/
 643
 644int ehca_dereg_mr(struct ib_mr *mr)
 645{
 646        int ret = 0;
 647        u64 h_ret;
 648        struct ehca_shca *shca =
 649                container_of(mr->device, struct ehca_shca, ib_device);
 650        struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
 651
 652        if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
 653                ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
 654                         "e_mr->flags=%x", mr, e_mr, e_mr->flags);
 655                ret = -EINVAL;
 656                goto dereg_mr_exit0;
 657        } else if (e_mr == shca->maxmr) {
 658                /* should be impossible, however reject to be sure */
 659                ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p "
 660                         "shca->maxmr=%p mr->lkey=%x",
 661                         mr, shca->maxmr, mr->lkey);
 662                ret = -EINVAL;
 663                goto dereg_mr_exit0;
 664        }
 665
 666        /* TODO: BUSY: MR still has bound window(s) */
 667        h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
 668        if (h_ret != H_SUCCESS) {
 669                ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p "
 670                         "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x",
 671                         h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
 672                         e_mr->ipz_mr_handle.handle, mr->lkey);
 673                ret = ehca2ib_return_code(h_ret);
 674                goto dereg_mr_exit0;
 675        }
 676
 677        if (e_mr->umem)
 678                ib_umem_release(e_mr->umem);
 679
 680        /* successful deregistration */
 681        ehca_mr_delete(e_mr);
 682
 683dereg_mr_exit0:
 684        if (ret)
 685                ehca_err(mr->device, "ret=%i mr=%p", ret, mr);
 686        return ret;
 687} /* end ehca_dereg_mr() */
 688
 689/*----------------------------------------------------------------------*/
 690
 691struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
 692{
 693        struct ib_mw *ib_mw;
 694        u64 h_ret;
 695        struct ehca_mw *e_mw;
 696        struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
 697        struct ehca_shca *shca =
 698                container_of(pd->device, struct ehca_shca, ib_device);
 699        struct ehca_mw_hipzout_parms hipzout;
 700
 701        e_mw = ehca_mw_new();
 702        if (!e_mw) {
 703                ib_mw = ERR_PTR(-ENOMEM);
 704                goto alloc_mw_exit0;
 705        }
 706
 707        h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
 708                                         e_pd->fw_pd, &hipzout);
 709        if (h_ret != H_SUCCESS) {
 710                ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli "
 711                         "shca=%p hca_hndl=%llx mw=%p",
 712                         h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
 713                ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
 714                goto alloc_mw_exit1;
 715        }
 716        /* successful MW allocation */
 717        e_mw->ipz_mw_handle = hipzout.handle;
 718        e_mw->ib_mw.rkey    = hipzout.rkey;
 719        return &e_mw->ib_mw;
 720
 721alloc_mw_exit1:
 722        ehca_mw_delete(e_mw);
 723alloc_mw_exit0:
 724        if (IS_ERR(ib_mw))
 725                ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd);
 726        return ib_mw;
 727} /* end ehca_alloc_mw() */
 728
 729/*----------------------------------------------------------------------*/
 730
 731int ehca_bind_mw(struct ib_qp *qp,
 732                 struct ib_mw *mw,
 733                 struct ib_mw_bind *mw_bind)
 734{
 735        /* TODO: not supported up to now */
 736        ehca_gen_err("bind MW currently not supported by HCAD");
 737
 738        return -EPERM;
 739} /* end ehca_bind_mw() */
 740
 741/*----------------------------------------------------------------------*/
 742
 743int ehca_dealloc_mw(struct ib_mw *mw)
 744{
 745        u64 h_ret;
 746        struct ehca_shca *shca =
 747                container_of(mw->device, struct ehca_shca, ib_device);
 748        struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw);
 749
 750        h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
 751        if (h_ret != H_SUCCESS) {
 752                ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p "
 753                         "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx",
 754                         h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
 755                         e_mw->ipz_mw_handle.handle);
 756                return ehca2ib_return_code(h_ret);
 757        }
 758        /* successful deallocation */
 759        ehca_mw_delete(e_mw);
 760        return 0;
 761} /* end ehca_dealloc_mw() */
 762
 763/*----------------------------------------------------------------------*/
 764
 765struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
 766                              int mr_access_flags,
 767                              struct ib_fmr_attr *fmr_attr)
 768{
 769        struct ib_fmr *ib_fmr;
 770        struct ehca_shca *shca =
 771                container_of(pd->device, struct ehca_shca, ib_device);
 772        struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
 773        struct ehca_mr *e_fmr;
 774        int ret;
 775        u32 tmp_lkey, tmp_rkey;
 776        struct ehca_mr_pginfo pginfo;
 777        u64 hw_pgsize;
 778
 779        /* check other parameters */
 780        if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
 781             !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
 782            ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
 783             !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
 784                /*
 785                 * Remote Write Access requires Local Write Access
 786                 * Remote Atomic Access requires Local Write Access
 787                 */
 788                ehca_err(pd->device, "bad input values: mr_access_flags=%x",
 789                         mr_access_flags);
 790                ib_fmr = ERR_PTR(-EINVAL);
 791                goto alloc_fmr_exit0;
 792        }
 793        if (mr_access_flags & IB_ACCESS_MW_BIND) {
 794                ehca_err(pd->device, "bad input values: mr_access_flags=%x",
 795                         mr_access_flags);
 796                ib_fmr = ERR_PTR(-EINVAL);
 797                goto alloc_fmr_exit0;
 798        }
 799        if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) {
 800                ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x "
 801                         "fmr_attr->max_maps=%x fmr_attr->page_shift=%x",
 802                         fmr_attr->max_pages, fmr_attr->max_maps,
 803                         fmr_attr->page_shift);
 804                ib_fmr = ERR_PTR(-EINVAL);
 805                goto alloc_fmr_exit0;
 806        }
 807
 808        hw_pgsize = 1 << fmr_attr->page_shift;
 809        if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) {
 810                ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
 811                         fmr_attr->page_shift);
 812                ib_fmr = ERR_PTR(-EINVAL);
 813                goto alloc_fmr_exit0;
 814        }
 815
 816        e_fmr = ehca_mr_new();
 817        if (!e_fmr) {
 818                ib_fmr = ERR_PTR(-ENOMEM);
 819                goto alloc_fmr_exit0;
 820        }
 821        e_fmr->flags |= EHCA_MR_FLAG_FMR;
 822
 823        /* register MR on HCA */
 824        memset(&pginfo, 0, sizeof(pginfo));
 825        pginfo.hwpage_size = hw_pgsize;
 826        /*
 827         * pginfo.num_hwpages==0, ie register_rpages() will not be called
 828         * but deferred to map_phys_fmr()
 829         */
 830        ret = ehca_reg_mr(shca, e_fmr, NULL,
 831                          fmr_attr->max_pages * (1 << fmr_attr->page_shift),
 832                          mr_access_flags, e_pd, &pginfo,
 833                          &tmp_lkey, &tmp_rkey, EHCA_REG_MR);
 834        if (ret) {
 835                ib_fmr = ERR_PTR(ret);
 836                goto alloc_fmr_exit1;
 837        }
 838
 839        /* successful */
 840        e_fmr->hwpage_size = hw_pgsize;
 841        e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
 842        e_fmr->fmr_max_pages = fmr_attr->max_pages;
 843        e_fmr->fmr_max_maps = fmr_attr->max_maps;
 844        e_fmr->fmr_map_cnt = 0;
 845        return &e_fmr->ib.ib_fmr;
 846
 847alloc_fmr_exit1:
 848        ehca_mr_delete(e_fmr);
 849alloc_fmr_exit0:
 850        return ib_fmr;
 851} /* end ehca_alloc_fmr() */
 852
 853/*----------------------------------------------------------------------*/
 854
 855int ehca_map_phys_fmr(struct ib_fmr *fmr,
 856                      u64 *page_list,
 857                      int list_len,
 858                      u64 iova)
 859{
 860        int ret;
 861        struct ehca_shca *shca =
 862                container_of(fmr->device, struct ehca_shca, ib_device);
 863        struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
 864        struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
 865        struct ehca_mr_pginfo pginfo;
 866        u32 tmp_lkey, tmp_rkey;
 867
 868        if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
 869                ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
 870                         e_fmr, e_fmr->flags);
 871                ret = -EINVAL;
 872                goto map_phys_fmr_exit0;
 873        }
 874        ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len);
 875        if (ret)
 876                goto map_phys_fmr_exit0;
 877        if (iova % e_fmr->fmr_page_size) {
 878                /* only whole-numbered pages */
 879                ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x",
 880                         iova, e_fmr->fmr_page_size);
 881                ret = -EINVAL;
 882                goto map_phys_fmr_exit0;
 883        }
 884        if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) {
 885                /* HCAD does not limit the maps, however trace this anyway */
 886                ehca_info(fmr->device, "map limit exceeded, fmr=%p "
 887                          "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x",
 888                          fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
 889        }
 890
 891        memset(&pginfo, 0, sizeof(pginfo));
 892        pginfo.type = EHCA_MR_PGI_FMR;
 893        pginfo.num_kpages = list_len;
 894        pginfo.hwpage_size = e_fmr->hwpage_size;
 895        pginfo.num_hwpages =
 896                list_len * e_fmr->fmr_page_size / pginfo.hwpage_size;
 897        pginfo.u.fmr.page_list = page_list;
 898        pginfo.next_hwpage =
 899                (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size;
 900        pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size;
 901
 902        ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova,
 903                            list_len * e_fmr->fmr_page_size,
 904                            e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
 905        if (ret)
 906                goto map_phys_fmr_exit0;
 907
 908        /* successful reregistration */
 909        e_fmr->fmr_map_cnt++;
 910        e_fmr->ib.ib_fmr.lkey = tmp_lkey;
 911        e_fmr->ib.ib_fmr.rkey = tmp_rkey;
 912        return 0;
 913
 914map_phys_fmr_exit0:
 915        if (ret)
 916                ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x "
 917                         "iova=%llx", ret, fmr, page_list, list_len, iova);
 918        return ret;
 919} /* end ehca_map_phys_fmr() */
 920
 921/*----------------------------------------------------------------------*/
 922
 923int ehca_unmap_fmr(struct list_head *fmr_list)
 924{
 925        int ret = 0;
 926        struct ib_fmr *ib_fmr;
 927        struct ehca_shca *shca = NULL;
 928        struct ehca_shca *prev_shca;
 929        struct ehca_mr *e_fmr;
 930        u32 num_fmr = 0;
 931        u32 unmap_fmr_cnt = 0;
 932
 933        /* check all FMR belong to same SHCA, and check internal flag */
 934        list_for_each_entry(ib_fmr, fmr_list, list) {
 935                prev_shca = shca;
 936                shca = container_of(ib_fmr->device, struct ehca_shca,
 937                                    ib_device);
 938                e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
 939                if ((shca != prev_shca) && prev_shca) {
 940                        ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p "
 941                                 "prev_shca=%p e_fmr=%p",
 942                                 shca, prev_shca, e_fmr);
 943                        ret = -EINVAL;
 944                        goto unmap_fmr_exit0;
 945                }
 946                if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
 947                        ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p "
 948                                 "e_fmr->flags=%x", e_fmr, e_fmr->flags);
 949                        ret = -EINVAL;
 950                        goto unmap_fmr_exit0;
 951                }
 952                num_fmr++;
 953        }
 954
 955        /* loop over all FMRs to unmap */
 956        list_for_each_entry(ib_fmr, fmr_list, list) {
 957                unmap_fmr_cnt++;
 958                e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
 959                shca = container_of(ib_fmr->device, struct ehca_shca,
 960                                    ib_device);
 961                ret = ehca_unmap_one_fmr(shca, e_fmr);
 962                if (ret) {
 963                        /* unmap failed, stop unmapping of rest of FMRs */
 964                        ehca_err(&shca->ib_device, "unmap of one FMR failed, "
 965                                 "stop rest, e_fmr=%p num_fmr=%x "
 966                                 "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr,
 967                                 unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey);
 968                        goto unmap_fmr_exit0;
 969                }
 970        }
 971
 972unmap_fmr_exit0:
 973        if (ret)
 974                ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
 975                             ret, fmr_list, num_fmr, unmap_fmr_cnt);
 976        return ret;
 977} /* end ehca_unmap_fmr() */
 978
 979/*----------------------------------------------------------------------*/
 980
 981int ehca_dealloc_fmr(struct ib_fmr *fmr)
 982{
 983        int ret;
 984        u64 h_ret;
 985        struct ehca_shca *shca =
 986                container_of(fmr->device, struct ehca_shca, ib_device);
 987        struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
 988
 989        if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
 990                ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
 991                         e_fmr, e_fmr->flags);
 992                ret = -EINVAL;
 993                goto free_fmr_exit0;
 994        }
 995
 996        h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
 997        if (h_ret != H_SUCCESS) {
 998                ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p "
 999                         "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x",
1000                         h_ret, e_fmr, shca->ipz_hca_handle.handle,
1001                         e_fmr->ipz_mr_handle.handle, fmr->lkey);
1002                ret = ehca2ib_return_code(h_ret);
1003                goto free_fmr_exit0;
1004        }
1005        /* successful deregistration */
1006        ehca_mr_delete(e_fmr);
1007        return 0;
1008
1009free_fmr_exit0:
1010        if (ret)
1011                ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr);
1012        return ret;
1013} /* end ehca_dealloc_fmr() */
1014
1015/*----------------------------------------------------------------------*/
1016
1017static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
1018                                   struct ehca_mr *e_mr,
1019                                   struct ehca_mr_pginfo *pginfo);
1020
1021int ehca_reg_mr(struct ehca_shca *shca,
1022                struct ehca_mr *e_mr,
1023                u64 *iova_start,
1024                u64 size,
1025                int acl,
1026                struct ehca_pd *e_pd,
1027                struct ehca_mr_pginfo *pginfo,
1028                u32 *lkey, /*OUT*/
1029                u32 *rkey, /*OUT*/
1030                enum ehca_reg_type reg_type)
1031{
1032        int ret;
1033        u64 h_ret;
1034        u32 hipz_acl;
1035        struct ehca_mr_hipzout_parms hipzout;
1036
1037        ehca_mrmw_map_acl(acl, &hipz_acl);
1038        ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
1039        if (ehca_use_hp_mr == 1)
1040                hipz_acl |= 0x00000001;
1041
1042        h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
1043                                         (u64)iova_start, size, hipz_acl,
1044                                         e_pd->fw_pd, &hipzout);
1045        if (h_ret != H_SUCCESS) {
1046                ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli "
1047                         "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle);
1048                ret = ehca2ib_return_code(h_ret);
1049                goto ehca_reg_mr_exit0;
1050        }
1051
1052        e_mr->ipz_mr_handle = hipzout.handle;
1053
1054        if (reg_type == EHCA_REG_BUSMAP_MR)
1055                ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo);
1056        else if (reg_type == EHCA_REG_MR)
1057                ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
1058        else
1059                ret = -EINVAL;
1060
1061        if (ret)
1062                goto ehca_reg_mr_exit1;
1063
1064        /* successful registration */
1065        e_mr->num_kpages = pginfo->num_kpages;
1066        e_mr->num_hwpages = pginfo->num_hwpages;
1067        e_mr->hwpage_size = pginfo->hwpage_size;
1068        e_mr->start = iova_start;
1069        e_mr->size = size;
1070        e_mr->acl = acl;
1071        *lkey = hipzout.lkey;
1072        *rkey = hipzout.rkey;
1073        return 0;
1074
1075ehca_reg_mr_exit1:
1076        h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
1077        if (h_ret != H_SUCCESS) {
1078                ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p "
1079                         "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x "
1080                         "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i",
1081                         h_ret, shca, e_mr, iova_start, size, acl, e_pd,
1082                         hipzout.lkey, pginfo, pginfo->num_kpages,
1083                         pginfo->num_hwpages, ret);
1084                ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
1085                         "not recoverable");
1086        }
1087ehca_reg_mr_exit0:
1088        if (ret)
1089                ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
1090                         "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
1091                         "num_kpages=%llx num_hwpages=%llx",
1092                         ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
1093                         pginfo->num_kpages, pginfo->num_hwpages);
1094        return ret;
1095} /* end ehca_reg_mr() */
1096
1097/*----------------------------------------------------------------------*/
1098
1099int ehca_reg_mr_rpages(struct ehca_shca *shca,
1100                       struct ehca_mr *e_mr,
1101                       struct ehca_mr_pginfo *pginfo)
1102{
1103        int ret = 0;
1104        u64 h_ret;
1105        u32 rnum;
1106        u64 rpage;
1107        u32 i;
1108        u64 *kpage;
1109
1110        if (!pginfo->num_hwpages) /* in case of fmr */
1111                return 0;
1112
1113        kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
1114        if (!kpage) {
1115                ehca_err(&shca->ib_device, "kpage alloc failed");
1116                ret = -ENOMEM;
1117                goto ehca_reg_mr_rpages_exit0;
1118        }
1119
1120        /* max MAX_RPAGES ehca mr pages per register call */
1121        for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) {
1122
1123                if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
1124                        rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */
1125                        if (rnum == 0)
1126                                rnum = MAX_RPAGES;      /* last shot is full */
1127                } else
1128                        rnum = MAX_RPAGES;
1129
1130                ret = ehca_set_pagebuf(pginfo, rnum, kpage);
1131                if (ret) {
1132                        ehca_err(&shca->ib_device, "ehca_set_pagebuf "
1133                                 "bad rc, ret=%i rnum=%x kpage=%p",
1134                                 ret, rnum, kpage);
1135                        goto ehca_reg_mr_rpages_exit1;
1136                }
1137
1138                if (rnum > 1) {
1139                        rpage = virt_to_abs(kpage);
1140                        if (!rpage) {
1141                                ehca_err(&shca->ib_device, "kpage=%p i=%x",
1142                                         kpage, i);
1143                                ret = -EFAULT;
1144                                goto ehca_reg_mr_rpages_exit1;
1145                        }
1146                } else
1147                        rpage = *kpage;
1148
1149                h_ret = hipz_h_register_rpage_mr(
1150                        shca->ipz_hca_handle, e_mr,
1151                        ehca_encode_hwpage_size(pginfo->hwpage_size),
1152                        0, rpage, rnum);
1153
1154                if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) {
1155                        /*
1156                         * check for 'registration complete'==H_SUCCESS
1157                         * and for 'page registered'==H_PAGE_REGISTERED
1158                         */
1159                        if (h_ret != H_SUCCESS) {
1160                                ehca_err(&shca->ib_device, "last "
1161                                         "hipz_reg_rpage_mr failed, h_ret=%lli "
1162                                         "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx"
1163                                         " lkey=%x", h_ret, e_mr, i,
1164                                         shca->ipz_hca_handle.handle,
1165                                         e_mr->ipz_mr_handle.handle,
1166                                         e_mr->ib.ib_mr.lkey);
1167                                ret = ehca2ib_return_code(h_ret);
1168                                break;
1169                        } else
1170                                ret = 0;
1171                } else if (h_ret != H_PAGE_REGISTERED) {
1172                        ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
1173                                 "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx "
1174                                 "mr_hndl=%llx", h_ret, e_mr, i,
1175                                 e_mr->ib.ib_mr.lkey,
1176                                 shca->ipz_hca_handle.handle,
1177                                 e_mr->ipz_mr_handle.handle);
1178                        ret = ehca2ib_return_code(h_ret);
1179                        break;
1180                } else
1181                        ret = 0;
1182        } /* end for(i) */
1183
1184
1185ehca_reg_mr_rpages_exit1:
1186        ehca_free_fw_ctrlblock(kpage);
1187ehca_reg_mr_rpages_exit0:
1188        if (ret)
1189                ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p "
1190                         "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr,
1191                         pginfo, pginfo->num_kpages, pginfo->num_hwpages);
1192        return ret;
1193} /* end ehca_reg_mr_rpages() */
1194
1195/*----------------------------------------------------------------------*/
1196
1197inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
1198                                struct ehca_mr *e_mr,
1199                                u64 *iova_start,
1200                                u64 size,
1201                                u32 acl,
1202                                struct ehca_pd *e_pd,
1203                                struct ehca_mr_pginfo *pginfo,
1204                                u32 *lkey, /*OUT*/
1205                                u32 *rkey) /*OUT*/
1206{
1207        int ret;
1208        u64 h_ret;
1209        u32 hipz_acl;
1210        u64 *kpage;
1211        u64 rpage;
1212        struct ehca_mr_pginfo pginfo_save;
1213        struct ehca_mr_hipzout_parms hipzout;
1214
1215        ehca_mrmw_map_acl(acl, &hipz_acl);
1216        ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl);
1217
1218        kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
1219        if (!kpage) {
1220                ehca_err(&shca->ib_device, "kpage alloc failed");
1221                ret = -ENOMEM;
1222                goto ehca_rereg_mr_rereg1_exit0;
1223        }
1224
1225        pginfo_save = *pginfo;
1226        ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage);
1227        if (ret) {
1228                ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
1229                         "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx "
1230                         "kpage=%p", e_mr, pginfo, pginfo->type,
1231                         pginfo->num_kpages, pginfo->num_hwpages, kpage);
1232                goto ehca_rereg_mr_rereg1_exit1;
1233        }
1234        rpage = virt_to_abs(kpage);
1235        if (!rpage) {
1236                ehca_err(&shca->ib_device, "kpage=%p", kpage);
1237                ret = -EFAULT;
1238                goto ehca_rereg_mr_rereg1_exit1;
1239        }
1240        h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr,
1241                                      (u64)iova_start, size, hipz_acl,
1242                                      e_pd->fw_pd, rpage, &hipzout);
1243        if (h_ret != H_SUCCESS) {
1244                /*
1245                 * reregistration unsuccessful, try it again with the 3 hCalls,
1246                 * e.g. this is required in case H_MR_CONDITION
1247                 * (MW bound or MR is shared)
1248                 */
1249                ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
1250                          "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr);
1251                *pginfo = pginfo_save;
1252                ret = -EAGAIN;
1253        } else if ((u64 *)hipzout.vaddr != iova_start) {
1254                ehca_err(&shca->ib_device, "PHYP changed iova_start in "
1255                         "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p "
1256                         "mr_handle=%llx lkey=%x lkey_out=%x", iova_start,
1257                         hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
1258                         e_mr->ib.ib_mr.lkey, hipzout.lkey);
1259                ret = -EFAULT;
1260        } else {
1261                /*
1262                 * successful reregistration
1263                 * note: start and start_out are identical for eServer HCAs
1264                 */
1265                e_mr->num_kpages = pginfo->num_kpages;
1266                e_mr->num_hwpages = pginfo->num_hwpages;
1267                e_mr->hwpage_size = pginfo->hwpage_size;
1268                e_mr->start = iova_start;
1269                e_mr->size = size;
1270                e_mr->acl = acl;
1271                *lkey = hipzout.lkey;
1272                *rkey = hipzout.rkey;
1273        }
1274
1275ehca_rereg_mr_rereg1_exit1:
1276        ehca_free_fw_ctrlblock(kpage);
1277ehca_rereg_mr_rereg1_exit0:
1278        if ( ret && (ret != -EAGAIN) )
1279                ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x "
1280                         "pginfo=%p num_kpages=%llx num_hwpages=%llx",
1281                         ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
1282                         pginfo->num_hwpages);
1283        return ret;
1284} /* end ehca_rereg_mr_rereg1() */
1285
1286/*----------------------------------------------------------------------*/
1287
1288int ehca_rereg_mr(struct ehca_shca *shca,
1289                  struct ehca_mr *e_mr,
1290                  u64 *iova_start,
1291                  u64 size,
1292                  int acl,
1293                  struct ehca_pd *e_pd,
1294                  struct ehca_mr_pginfo *pginfo,
1295                  u32 *lkey,
1296                  u32 *rkey)
1297{
1298        int ret = 0;
1299        u64 h_ret;
1300        int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */
1301        int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
1302
1303        /* first determine reregistration hCall(s) */
1304        if ((pginfo->num_hwpages > MAX_RPAGES) ||
1305            (e_mr->num_hwpages > MAX_RPAGES) ||
1306            (pginfo->num_hwpages > e_mr->num_hwpages)) {
1307                ehca_dbg(&shca->ib_device, "Rereg3 case, "
1308                         "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x",
1309                         pginfo->num_hwpages, e_mr->num_hwpages);
1310                rereg_1_hcall = 0;
1311                rereg_3_hcall = 1;
1312        }
1313
1314        if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */
1315                rereg_1_hcall = 0;
1316                rereg_3_hcall = 1;
1317                e_mr->flags &= ~EHCA_MR_FLAG_MAXMR;
1318                ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p",
1319                         e_mr);
1320        }
1321
1322        if (rereg_1_hcall) {
1323                ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size,
1324                                           acl, e_pd, pginfo, lkey, rkey);
1325                if (ret) {
1326                        if (ret == -EAGAIN)
1327                                rereg_3_hcall = 1;
1328                        else
1329                                goto ehca_rereg_mr_exit0;
1330                }
1331        }
1332
1333        if (rereg_3_hcall) {
1334                struct ehca_mr save_mr;
1335
1336                /* first deregister old MR */
1337                h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
1338                if (h_ret != H_SUCCESS) {
1339                        ehca_err(&shca->ib_device, "hipz_free_mr failed, "
1340                                 "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx "
1341                                 "mr->lkey=%x",
1342                                 h_ret, e_mr, shca->ipz_hca_handle.handle,
1343                                 e_mr->ipz_mr_handle.handle,
1344                                 e_mr->ib.ib_mr.lkey);
1345                        ret = ehca2ib_return_code(h_ret);
1346                        goto ehca_rereg_mr_exit0;
1347                }
1348                /* clean ehca_mr_t, without changing struct ib_mr and lock */
1349                save_mr = *e_mr;
1350                ehca_mr_deletenew(e_mr);
1351
1352                /* set some MR values */
1353                e_mr->flags = save_mr.flags;
1354                e_mr->hwpage_size = save_mr.hwpage_size;
1355                e_mr->fmr_page_size = save_mr.fmr_page_size;
1356                e_mr->fmr_max_pages = save_mr.fmr_max_pages;
1357                e_mr->fmr_max_maps = save_mr.fmr_max_maps;
1358                e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
1359
1360                ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
1361                                  e_pd, pginfo, lkey, rkey, EHCA_REG_MR);
1362                if (ret) {
1363                        u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
1364                        memcpy(&e_mr->flags, &(save_mr.flags),
1365                               sizeof(struct ehca_mr) - offset);
1366                        goto ehca_rereg_mr_exit0;
1367                }
1368        }
1369
1370ehca_rereg_mr_exit0:
1371        if (ret)
1372                ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
1373                         "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p "
1374                         "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x "
1375                         "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
1376                         acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey,
1377                         rereg_1_hcall, rereg_3_hcall);
1378        return ret;
1379} /* end ehca_rereg_mr() */
1380
1381/*----------------------------------------------------------------------*/
1382
1383int ehca_unmap_one_fmr(struct ehca_shca *shca,
1384                       struct ehca_mr *e_fmr)
1385{
1386        int ret = 0;
1387        u64 h_ret;
1388        struct ehca_pd *e_pd =
1389                container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
1390        struct ehca_mr save_fmr;
1391        u32 tmp_lkey, tmp_rkey;
1392        struct ehca_mr_pginfo pginfo;
1393        struct ehca_mr_hipzout_parms hipzout;
1394        struct ehca_mr save_mr;
1395
1396        if (e_fmr->fmr_max_pages <= MAX_RPAGES) {
1397                /*
1398                 * note: after using rereg hcall with len=0,
1399                 * rereg hcall must be used again for registering pages
1400                 */
1401                h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
1402                                              0, 0, e_pd->fw_pd, 0, &hipzout);
1403                if (h_ret == H_SUCCESS) {
1404                        /* successful reregistration */
1405                        e_fmr->start = NULL;
1406                        e_fmr->size = 0;
1407                        tmp_lkey = hipzout.lkey;
1408                        tmp_rkey = hipzout.rkey;
1409                        return 0;
1410                }
1411                /*
1412                 * should not happen, because length checked above,
1413                 * FMRs are not shared and no MW bound to FMRs
1414                 */
1415                ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
1416                         "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx "
1417                         "mr_hndl=%llx lkey=%x lkey_out=%x",
1418                         h_ret, e_fmr, shca->ipz_hca_handle.handle,
1419                         e_fmr->ipz_mr_handle.handle,
1420                         e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
1421                /* try free and rereg */
1422        }
1423
1424        /* first free old FMR */
1425        h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
1426        if (h_ret != H_SUCCESS) {
1427                ehca_err(&shca->ib_device, "hipz_free_mr failed, "
1428                         "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx "
1429                         "lkey=%x",
1430                         h_ret, e_fmr, shca->ipz_hca_handle.handle,
1431                         e_fmr->ipz_mr_handle.handle,
1432                         e_fmr->ib.ib_fmr.lkey);
1433                ret = ehca2ib_return_code(h_ret);
1434                goto ehca_unmap_one_fmr_exit0;
1435        }
1436        /* clean ehca_mr_t, without changing lock */
1437        save_fmr = *e_fmr;
1438        ehca_mr_deletenew(e_fmr);
1439
1440        /* set some MR values */
1441        e_fmr->flags = save_fmr.flags;
1442        e_fmr->hwpage_size = save_fmr.hwpage_size;
1443        e_fmr->fmr_page_size = save_fmr.fmr_page_size;
1444        e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
1445        e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
1446        e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
1447        e_fmr->acl = save_fmr.acl;
1448
1449        memset(&pginfo, 0, sizeof(pginfo));
1450        pginfo.type = EHCA_MR_PGI_FMR;
1451        ret = ehca_reg_mr(shca, e_fmr, NULL,
1452                          (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
1453                          e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
1454                          &tmp_rkey, EHCA_REG_MR);
1455        if (ret) {
1456                u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
1457                memcpy(&e_fmr->flags, &(save_mr.flags),
1458                       sizeof(struct ehca_mr) - offset);
1459        }
1460
1461ehca_unmap_one_fmr_exit0:
1462        if (ret)
1463                ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x "
1464                         "fmr_max_pages=%x",
1465                         ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
1466        return ret;
1467} /* end ehca_unmap_one_fmr() */
1468
1469/*----------------------------------------------------------------------*/
1470
1471int ehca_reg_smr(struct ehca_shca *shca,
1472                 struct ehca_mr *e_origmr,
1473                 struct ehca_mr *e_newmr,
1474                 u64 *iova_start,
1475                 int acl,
1476                 struct ehca_pd *e_pd,
1477                 u32 *lkey, /*OUT*/
1478                 u32 *rkey) /*OUT*/
1479{
1480        int ret = 0;
1481        u64 h_ret;
1482        u32 hipz_acl;
1483        struct ehca_mr_hipzout_parms hipzout;
1484
1485        ehca_mrmw_map_acl(acl, &hipz_acl);
1486        ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
1487
1488        h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
1489                                    (u64)iova_start, hipz_acl, e_pd->fw_pd,
1490                                    &hipzout);
1491        if (h_ret != H_SUCCESS) {
1492                ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
1493                         "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
1494                         "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
1495                         h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
1496                         shca->ipz_hca_handle.handle,
1497                         e_origmr->ipz_mr_handle.handle,
1498                         e_origmr->ib.ib_mr.lkey);
1499                ret = ehca2ib_return_code(h_ret);
1500                goto ehca_reg_smr_exit0;
1501        }
1502        /* successful registration */
1503        e_newmr->num_kpages = e_origmr->num_kpages;
1504        e_newmr->num_hwpages = e_origmr->num_hwpages;
1505        e_newmr->hwpage_size   = e_origmr->hwpage_size;
1506        e_newmr->start = iova_start;
1507        e_newmr->size = e_origmr->size;
1508        e_newmr->acl = acl;
1509        e_newmr->ipz_mr_handle = hipzout.handle;
1510        *lkey = hipzout.lkey;
1511        *rkey = hipzout.rkey;
1512        return 0;
1513
1514ehca_reg_smr_exit0:
1515        if (ret)
1516                ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p "
1517                         "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
1518                         ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
1519        return ret;
1520} /* end ehca_reg_smr() */
1521
1522/*----------------------------------------------------------------------*/
1523static inline void *ehca_calc_sectbase(int top, int dir, int idx)
1524{
1525        unsigned long ret = idx;
1526        ret |= dir << EHCA_DIR_INDEX_SHIFT;
1527        ret |= top << EHCA_TOP_INDEX_SHIFT;
1528        return abs_to_virt(ret << SECTION_SIZE_BITS);
1529}
1530
1531#define ehca_bmap_valid(entry) \
1532        ((u64)entry != (u64)EHCA_INVAL_ADDR)
1533
1534static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage,
1535                               struct ehca_shca *shca, struct ehca_mr *mr,
1536                               struct ehca_mr_pginfo *pginfo)
1537{
1538        u64 h_ret = 0;
1539        unsigned long page = 0;
1540        u64 rpage = virt_to_abs(kpage);
1541        int page_count;
1542
1543        void *sectbase = ehca_calc_sectbase(top, dir, idx);
1544        if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) {
1545                ehca_err(&shca->ib_device, "reg_mr_section will probably fail:"
1546                                           "hwpage_size does not fit to "
1547                                           "section start address");
1548        }
1549        page_count = EHCA_SECTSIZE / pginfo->hwpage_size;
1550
1551        while (page < page_count) {
1552                u64 rnum;
1553                for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count);
1554                     rnum++) {
1555                        void *pg = sectbase + ((page++) * pginfo->hwpage_size);
1556                        kpage[rnum] = virt_to_abs(pg);
1557                }
1558
1559                h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr,
1560                        ehca_encode_hwpage_size(pginfo->hwpage_size),
1561                        0, rpage, rnum);
1562
1563                if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) {
1564                        ehca_err(&shca->ib_device, "register_rpage_mr failed");
1565                        return h_ret;
1566                }
1567        }
1568        return h_ret;
1569}
1570
1571static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage,
1572                                struct ehca_shca *shca, struct ehca_mr *mr,
1573                                struct ehca_mr_pginfo *pginfo)
1574{
1575        u64 hret = H_SUCCESS;
1576        int idx;
1577
1578        for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) {
1579                if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx]))
1580                        continue;
1581
1582                hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr,
1583                                           pginfo);
1584                if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
1585                                return hret;
1586        }
1587        return hret;
1588}
1589
1590static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca,
1591                                    struct ehca_mr *mr,
1592                                    struct ehca_mr_pginfo *pginfo)
1593{
1594        u64 hret = H_SUCCESS;
1595        int dir;
1596
1597        for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
1598                if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
1599                        continue;
1600
1601                hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo);
1602                if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED))
1603                                return hret;
1604        }
1605        return hret;
1606}
1607
1608/* register internal max-MR to internal SHCA */
1609int ehca_reg_internal_maxmr(
1610        struct ehca_shca *shca,
1611        struct ehca_pd *e_pd,
1612        struct ehca_mr **e_maxmr)  /*OUT*/
1613{
1614        int ret;
1615        struct ehca_mr *e_mr;
1616        u64 *iova_start;
1617        u64 size_maxmr;
1618        struct ehca_mr_pginfo pginfo;
1619        struct ib_phys_buf ib_pbuf;
1620        u32 num_kpages;
1621        u32 num_hwpages;
1622        u64 hw_pgsize;
1623
1624        if (!ehca_bmap) {
1625                ret = -EFAULT;
1626                goto ehca_reg_internal_maxmr_exit0;
1627        }
1628
1629        e_mr = ehca_mr_new();
1630        if (!e_mr) {
1631                ehca_err(&shca->ib_device, "out of memory");
1632                ret = -ENOMEM;
1633                goto ehca_reg_internal_maxmr_exit0;
1634        }
1635        e_mr->flags |= EHCA_MR_FLAG_MAXMR;
1636
1637        /* register internal max-MR on HCA */
1638        size_maxmr = ehca_mr_len;
1639        iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START));
1640        ib_pbuf.addr = 0;
1641        ib_pbuf.size = size_maxmr;
1642        num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
1643                                PAGE_SIZE);
1644        hw_pgsize = ehca_get_max_hwpage_size(shca);
1645        num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr,
1646                                 hw_pgsize);
1647
1648        memset(&pginfo, 0, sizeof(pginfo));
1649        pginfo.type = EHCA_MR_PGI_PHYS;
1650        pginfo.num_kpages = num_kpages;
1651        pginfo.num_hwpages = num_hwpages;
1652        pginfo.hwpage_size = hw_pgsize;
1653        pginfo.u.phy.num_phys_buf = 1;
1654        pginfo.u.phy.phys_buf_array = &ib_pbuf;
1655
1656        ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
1657                          &pginfo, &e_mr->ib.ib_mr.lkey,
1658                          &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR);
1659        if (ret) {
1660                ehca_err(&shca->ib_device, "reg of internal max MR failed, "
1661                         "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x "
1662                         "num_hwpages=%x", e_mr, iova_start, size_maxmr,
1663                         num_kpages, num_hwpages);
1664                goto ehca_reg_internal_maxmr_exit1;
1665        }
1666
1667        /* successful registration of all pages */
1668        e_mr->ib.ib_mr.device = e_pd->ib_pd.device;
1669        e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
1670        e_mr->ib.ib_mr.uobject = NULL;
1671        atomic_inc(&(e_pd->ib_pd.usecnt));
1672        atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
1673        *e_maxmr = e_mr;
1674        return 0;
1675
1676ehca_reg_internal_maxmr_exit1:
1677        ehca_mr_delete(e_mr);
1678ehca_reg_internal_maxmr_exit0:
1679        if (ret)
1680                ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p",
1681                         ret, shca, e_pd, e_maxmr);
1682        return ret;
1683} /* end ehca_reg_internal_maxmr() */
1684
1685/*----------------------------------------------------------------------*/
1686
1687int ehca_reg_maxmr(struct ehca_shca *shca,
1688                   struct ehca_mr *e_newmr,
1689                   u64 *iova_start,
1690                   int acl,
1691                   struct ehca_pd *e_pd,
1692                   u32 *lkey,
1693                   u32 *rkey)
1694{
1695        u64 h_ret;
1696        struct ehca_mr *e_origmr = shca->maxmr;
1697        u32 hipz_acl;
1698        struct ehca_mr_hipzout_parms hipzout;
1699
1700        ehca_mrmw_map_acl(acl, &hipz_acl);
1701        ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl);
1702
1703        h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
1704                                    (u64)iova_start, hipz_acl, e_pd->fw_pd,
1705                                    &hipzout);
1706        if (h_ret != H_SUCCESS) {
1707                ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli "
1708                         "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x",
1709                         h_ret, e_origmr, shca->ipz_hca_handle.handle,
1710                         e_origmr->ipz_mr_handle.handle,
1711                         e_origmr->ib.ib_mr.lkey);
1712                return ehca2ib_return_code(h_ret);
1713        }
1714        /* successful registration */
1715        e_newmr->num_kpages = e_origmr->num_kpages;
1716        e_newmr->num_hwpages = e_origmr->num_hwpages;
1717        e_newmr->hwpage_size = e_origmr->hwpage_size;
1718        e_newmr->start = iova_start;
1719        e_newmr->size = e_origmr->size;
1720        e_newmr->acl = acl;
1721        e_newmr->ipz_mr_handle = hipzout.handle;
1722        *lkey = hipzout.lkey;
1723        *rkey = hipzout.rkey;
1724        return 0;
1725} /* end ehca_reg_maxmr() */
1726
1727/*----------------------------------------------------------------------*/
1728
1729int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
1730{
1731        int ret;
1732        struct ehca_mr *e_maxmr;
1733        struct ib_pd *ib_pd;
1734
1735        if (!shca->maxmr) {
1736                ehca_err(&shca->ib_device, "bad call, shca=%p", shca);
1737                ret = -EINVAL;
1738                goto ehca_dereg_internal_maxmr_exit0;
1739        }
1740
1741        e_maxmr = shca->maxmr;
1742        ib_pd = e_maxmr->ib.ib_mr.pd;
1743        shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */
1744
1745        ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
1746        if (ret) {
1747                ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
1748                         "ret=%i e_maxmr=%p shca=%p lkey=%x",
1749                         ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
1750                shca->maxmr = e_maxmr;
1751                goto ehca_dereg_internal_maxmr_exit0;
1752        }
1753
1754        atomic_dec(&ib_pd->usecnt);
1755
1756ehca_dereg_internal_maxmr_exit0:
1757        if (ret)
1758                ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p",
1759                         ret, shca, shca->maxmr);
1760        return ret;
1761} /* end ehca_dereg_internal_maxmr() */
1762
1763/*----------------------------------------------------------------------*/
1764
1765/*
1766 * check physical buffer array of MR verbs for validness and
1767 * calculates MR size
1768 */
1769int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
1770                                  int num_phys_buf,
1771                                  u64 *iova_start,
1772                                  u64 *size)
1773{
1774        struct ib_phys_buf *pbuf = phys_buf_array;
1775        u64 size_count = 0;
1776        u32 i;
1777
1778        if (num_phys_buf == 0) {
1779                ehca_gen_err("bad phys buf array len, num_phys_buf=0");
1780                return -EINVAL;
1781        }
1782        /* check first buffer */
1783        if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
1784                ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
1785                             "pbuf->addr=%llx pbuf->size=%llx",
1786                             iova_start, pbuf->addr, pbuf->size);
1787                return -EINVAL;
1788        }
1789        if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
1790            (num_phys_buf > 1)) {
1791                ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx "
1792                             "pbuf->size=%llx", pbuf->addr, pbuf->size);
1793                return -EINVAL;
1794        }
1795
1796        for (i = 0; i < num_phys_buf; i++) {
1797                if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
1798                        ehca_gen_err("bad address, i=%x pbuf->addr=%llx "
1799                                     "pbuf->size=%llx",
1800                                     i, pbuf->addr, pbuf->size);
1801                        return -EINVAL;
1802                }
1803                if (((i > 0) && /* not 1st */
1804                     (i < (num_phys_buf - 1)) &&        /* not last */
1805                     (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
1806                        ehca_gen_err("bad size, i=%x pbuf->size=%llx",
1807                                     i, pbuf->size);
1808                        return -EINVAL;
1809                }
1810                size_count += pbuf->size;
1811                pbuf++;
1812        }
1813
1814        *size = size_count;
1815        return 0;
1816} /* end ehca_mr_chk_buf_and_calc_size() */
1817
1818/*----------------------------------------------------------------------*/
1819
1820/* check page list of map FMR verb for validness */
1821int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
1822                             u64 *page_list,
1823                             int list_len)
1824{
1825        u32 i;
1826        u64 *page;
1827
1828        if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) {
1829                ehca_gen_err("bad list_len, list_len=%x "
1830                             "e_fmr->fmr_max_pages=%x fmr=%p",
1831                             list_len, e_fmr->fmr_max_pages, e_fmr);
1832                return -EINVAL;
1833        }
1834
1835        /* each page must be aligned */
1836        page = page_list;
1837        for (i = 0; i < list_len; i++) {
1838                if (*page % e_fmr->fmr_page_size) {
1839                        ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p "
1840                                     "fmr_page_size=%x", i, *page, page, e_fmr,
1841                                     e_fmr->fmr_page_size);
1842                        return -EINVAL;
1843                }
1844                page++;
1845        }
1846
1847        return 0;
1848} /* end ehca_fmr_check_page_list() */
1849
1850/*----------------------------------------------------------------------*/
1851
1852/* PAGE_SIZE >= pginfo->hwpage_size */
1853static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
1854                                  u32 number,
1855                                  u64 *kpage)
1856{
1857        int ret = 0;
1858        struct ib_umem_chunk *prev_chunk;
1859        struct ib_umem_chunk *chunk;
1860        u64 pgaddr;
1861        u32 i = 0;
1862        u32 j = 0;
1863        int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
1864
1865        /* loop over desired chunk entries */
1866        chunk      = pginfo->u.usr.next_chunk;
1867        prev_chunk = pginfo->u.usr.next_chunk;
1868        list_for_each_entry_continue(
1869                chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
1870                for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
1871                        pgaddr = page_to_pfn(sg_page(&chunk->page_list[i]))
1872                                << PAGE_SHIFT ;
1873                        *kpage = phys_to_abs(pgaddr +
1874                                             (pginfo->next_hwpage *
1875                                              pginfo->hwpage_size));
1876                        if ( !(*kpage) ) {
1877                                ehca_gen_err("pgaddr=%llx "
1878                                             "chunk->page_list[i]=%llx "
1879                                             "i=%x next_hwpage=%llx",
1880                                             pgaddr, (u64)sg_dma_address(
1881                                                     &chunk->page_list[i]),
1882                                             i, pginfo->next_hwpage);
1883                                return -EFAULT;
1884                        }
1885                        (pginfo->hwpage_cnt)++;
1886                        (pginfo->next_hwpage)++;
1887                        kpage++;
1888                        if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
1889                                (pginfo->kpage_cnt)++;
1890                                (pginfo->u.usr.next_nmap)++;
1891                                pginfo->next_hwpage = 0;
1892                                i++;
1893                        }
1894                        j++;
1895                        if (j >= number) break;
1896                }
1897                if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
1898                    (j >= number)) {
1899                        pginfo->u.usr.next_nmap = 0;
1900                        prev_chunk = chunk;
1901                        break;
1902                } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
1903                        pginfo->u.usr.next_nmap = 0;
1904                        prev_chunk = chunk;
1905                } else if (j >= number)
1906                        break;
1907                else
1908                        prev_chunk = chunk;
1909        }
1910        pginfo->u.usr.next_chunk =
1911                list_prepare_entry(prev_chunk,
1912                                   (&(pginfo->u.usr.region->chunk_list)),
1913                                   list);
1914        return ret;
1915}
1916
1917/*
1918 * check given pages for contiguous layout
1919 * last page addr is returned in prev_pgaddr for further check
1920 */
1921static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
1922                                     int start_idx, int end_idx,
1923                                     u64 *prev_pgaddr)
1924{
1925        int t;
1926        for (t = start_idx; t <= end_idx; t++) {
1927                u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
1928                if (ehca_debug_level >= 3)
1929                        ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
1930                                     *(u64 *)abs_to_virt(phys_to_abs(pgaddr)));
1931                if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
1932                        ehca_gen_err("uncontiguous page found pgaddr=%llx "
1933                                     "prev_pgaddr=%llx page_list_i=%x",
1934                                     pgaddr, *prev_pgaddr, t);
1935                        return -EINVAL;
1936                }
1937                *prev_pgaddr = pgaddr;
1938        }
1939        return 0;
1940}
1941
1942/* PAGE_SIZE < pginfo->hwpage_size */
1943static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
1944                                  u32 number,
1945                                  u64 *kpage)
1946{
1947        int ret = 0;
1948        struct ib_umem_chunk *prev_chunk;
1949        struct ib_umem_chunk *chunk;
1950        u64 pgaddr, prev_pgaddr;
1951        u32 i = 0;
1952        u32 j = 0;
1953        int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
1954        int nr_kpages = kpages_per_hwpage;
1955
1956        /* loop over desired chunk entries */
1957        chunk      = pginfo->u.usr.next_chunk;
1958        prev_chunk = pginfo->u.usr.next_chunk;
1959        list_for_each_entry_continue(
1960                chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
1961                for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
1962                        if (nr_kpages == kpages_per_hwpage) {
1963                                pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i]))
1964                                           << PAGE_SHIFT );
1965                                *kpage = phys_to_abs(pgaddr);
1966                                if ( !(*kpage) ) {
1967                                        ehca_gen_err("pgaddr=%llx i=%x",
1968                                                     pgaddr, i);
1969                                        ret = -EFAULT;
1970                                        return ret;
1971                                }
1972                                /*
1973                                 * The first page in a hwpage must be aligned;
1974                                 * the first MR page is exempt from this rule.
1975                                 */
1976                                if (pgaddr & (pginfo->hwpage_size - 1)) {
1977                                        if (pginfo->hwpage_cnt) {
1978                                                ehca_gen_err(
1979                                                        "invalid alignment "
1980                                                        "pgaddr=%llx i=%x "
1981                                                        "mr_pgsize=%llx",
1982                                                        pgaddr, i,
1983                                                        pginfo->hwpage_size);
1984                                                ret = -EFAULT;
1985                                                return ret;
1986                                        }
1987                                        /* first MR page */
1988                                        pginfo->kpage_cnt =
1989                                                (pgaddr &
1990                                                 (pginfo->hwpage_size - 1)) >>
1991                                                PAGE_SHIFT;
1992                                        nr_kpages -= pginfo->kpage_cnt;
1993                                        *kpage = phys_to_abs(
1994                                                pgaddr &
1995                                                ~(pginfo->hwpage_size - 1));
1996                                }
1997                                if (ehca_debug_level >= 3) {
1998                                        u64 val = *(u64 *)abs_to_virt(
1999                                                phys_to_abs(pgaddr));
2000                                        ehca_gen_dbg("kpage=%llx chunk_page=%llx "
2001                                                     "value=%016llx",
2002                                                     *kpage, pgaddr, val);
2003                                }
2004                                prev_pgaddr = pgaddr;
2005                                i++;
2006                                pginfo->kpage_cnt++;
2007                                pginfo->u.usr.next_nmap++;
2008                                nr_kpages--;
2009                                if (!nr_kpages)
2010                                        goto next_kpage;
2011                                continue;
2012                        }
2013                        if (i + nr_kpages > chunk->nmap) {
2014                                ret = ehca_check_kpages_per_ate(
2015                                        chunk->page_list, i,
2016                                        chunk->nmap - 1, &prev_pgaddr);
2017                                if (ret) return ret;
2018                                pginfo->kpage_cnt += chunk->nmap - i;
2019                                pginfo->u.usr.next_nmap += chunk->nmap - i;
2020                                nr_kpages -= chunk->nmap - i;
2021                                break;
2022                        }
2023
2024                        ret = ehca_check_kpages_per_ate(chunk->page_list, i,
2025                                                        i + nr_kpages - 1,
2026                                                        &prev_pgaddr);
2027                        if (ret) return ret;
2028                        i += nr_kpages;
2029                        pginfo->kpage_cnt += nr_kpages;
2030                        pginfo->u.usr.next_nmap += nr_kpages;
2031next_kpage:
2032                        nr_kpages = kpages_per_hwpage;
2033                        (pginfo->hwpage_cnt)++;
2034                        kpage++;
2035                        j++;
2036                        if (j >= number) break;
2037                }
2038                if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
2039                    (j >= number)) {
2040                        pginfo->u.usr.next_nmap = 0;
2041                        prev_chunk = chunk;
2042                        break;
2043                } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
2044                        pginfo->u.usr.next_nmap = 0;
2045                        prev_chunk = chunk;
2046                } else if (j >= number)
2047                        break;
2048                else
2049                        prev_chunk = chunk;
2050        }
2051        pginfo->u.usr.next_chunk =
2052                list_prepare_entry(prev_chunk,
2053                                   (&(pginfo->u.usr.region->chunk_list)),
2054                                   list);
2055        return ret;
2056}
2057
2058static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
2059                                 u32 number, u64 *kpage)
2060{
2061        int ret = 0;
2062        struct ib_phys_buf *pbuf;
2063        u64 num_hw, offs_hw;
2064        u32 i = 0;
2065
2066        /* loop over desired phys_buf_array entries */
2067        while (i < number) {
2068                pbuf   = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf;
2069                num_hw  = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) +
2070                                     pbuf->size, pginfo->hwpage_size);
2071                offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) /
2072                        pginfo->hwpage_size;
2073                while (pginfo->next_hwpage < offs_hw + num_hw) {
2074                        /* sanity check */
2075                        if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
2076                            (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
2077                                ehca_gen_err("kpage_cnt >= num_kpages, "
2078                                             "kpage_cnt=%llx num_kpages=%llx "
2079                                             "hwpage_cnt=%llx "
2080                                             "num_hwpages=%llx i=%x",
2081                                             pginfo->kpage_cnt,
2082                                             pginfo->num_kpages,
2083                                             pginfo->hwpage_cnt,
2084                                             pginfo->num_hwpages, i);
2085                                return -EFAULT;
2086                        }
2087                        *kpage = phys_to_abs(
2088                                (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
2089                                (pginfo->next_hwpage * pginfo->hwpage_size));
2090                        if ( !(*kpage) && pbuf->addr ) {
2091                                ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx "
2092                                             "next_hwpage=%llx", pbuf->addr,
2093                                             pbuf->size, pginfo->next_hwpage);
2094                                return -EFAULT;
2095                        }
2096                        (pginfo->hwpage_cnt)++;
2097                        (pginfo->next_hwpage)++;
2098                        if (PAGE_SIZE >= pginfo->hwpage_size) {
2099                                if (pginfo->next_hwpage %
2100                                    (PAGE_SIZE / pginfo->hwpage_size) == 0)
2101                                        (pginfo->kpage_cnt)++;
2102                        } else
2103                                pginfo->kpage_cnt += pginfo->hwpage_size /
2104                                        PAGE_SIZE;
2105                        kpage++;
2106                        i++;
2107                        if (i >= number) break;
2108                }
2109                if (pginfo->next_hwpage >= offs_hw + num_hw) {
2110                        (pginfo->u.phy.next_buf)++;
2111                        pginfo->next_hwpage = 0;
2112                }
2113        }
2114        return ret;
2115}
2116
2117static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
2118                                u32 number, u64 *kpage)
2119{
2120        int ret = 0;
2121        u64 *fmrlist;
2122        u32 i;
2123
2124        /* loop over desired page_list entries */
2125        fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
2126        for (i = 0; i < number; i++) {
2127                *kpage = phys_to_abs((*fmrlist & ~(pginfo->hwpage_size - 1)) +
2128                                     pginfo->next_hwpage * pginfo->hwpage_size);
2129                if ( !(*kpage) ) {
2130                        ehca_gen_err("*fmrlist=%llx fmrlist=%p "
2131                                     "next_listelem=%llx next_hwpage=%llx",
2132                                     *fmrlist, fmrlist,
2133                                     pginfo->u.fmr.next_listelem,
2134                                     pginfo->next_hwpage);
2135                        return -EFAULT;
2136                }
2137                (pginfo->hwpage_cnt)++;
2138                if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) {
2139                        if (pginfo->next_hwpage %
2140                            (pginfo->u.fmr.fmr_pgsize /
2141                             pginfo->hwpage_size) == 0) {
2142                                (pginfo->kpage_cnt)++;
2143                                (pginfo->u.fmr.next_listelem)++;
2144                                fmrlist++;
2145                                pginfo->next_hwpage = 0;
2146                        } else
2147                                (pginfo->next_hwpage)++;
2148                } else {
2149                        unsigned int cnt_per_hwpage = pginfo->hwpage_size /
2150                                pginfo->u.fmr.fmr_pgsize;
2151                        unsigned int j;
2152                        u64 prev = *kpage;
2153                        /* check if adrs are contiguous */
2154                        for (j = 1; j < cnt_per_hwpage; j++) {
2155                                u64 p = phys_to_abs(fmrlist[j] &
2156                                                    ~(pginfo->hwpage_size - 1));
2157                                if (prev + pginfo->u.fmr.fmr_pgsize != p) {
2158                                        ehca_gen_err("uncontiguous fmr pages "
2159                                                     "found prev=%llx p=%llx "
2160                                                     "idx=%x", prev, p, i + j);
2161                                        return -EINVAL;
2162                                }
2163                                prev = p;
2164                        }
2165                        pginfo->kpage_cnt += cnt_per_hwpage;
2166                        pginfo->u.fmr.next_listelem += cnt_per_hwpage;
2167                        fmrlist += cnt_per_hwpage;
2168                }
2169                kpage++;
2170        }
2171        return ret;
2172}
2173
2174/* setup page buffer from page info */
2175int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo,
2176                     u32 number,
2177                     u64 *kpage)
2178{
2179        int ret;
2180
2181        switch (pginfo->type) {
2182        case EHCA_MR_PGI_PHYS:
2183                ret = ehca_set_pagebuf_phys(pginfo, number, kpage);
2184                break;
2185        case EHCA_MR_PGI_USER:
2186                ret = PAGE_SIZE >= pginfo->hwpage_size ?
2187                        ehca_set_pagebuf_user1(pginfo, number, kpage) :
2188                        ehca_set_pagebuf_user2(pginfo, number, kpage);
2189                break;
2190        case EHCA_MR_PGI_FMR:
2191                ret = ehca_set_pagebuf_fmr(pginfo, number, kpage);
2192                break;
2193        default:
2194                ehca_gen_err("bad pginfo->type=%x", pginfo->type);
2195                ret = -EFAULT;
2196                break;
2197        }
2198        return ret;
2199} /* end ehca_set_pagebuf() */
2200
2201/*----------------------------------------------------------------------*/
2202
2203/*
2204 * check MR if it is a max-MR, i.e. uses whole memory
2205 * in case it's a max-MR 1 is returned, else 0
2206 */
2207int ehca_mr_is_maxmr(u64 size,
2208                     u64 *iova_start)
2209{
2210        /* a MR is treated as max-MR only if it fits following: */
2211        if ((size == ehca_mr_len) &&
2212            (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) {
2213                ehca_gen_dbg("this is a max-MR");
2214                return 1;
2215        } else
2216                return 0;
2217} /* end ehca_mr_is_maxmr() */
2218
2219/*----------------------------------------------------------------------*/
2220
2221/* map access control for MR/MW. This routine is used for MR and MW. */
2222void ehca_mrmw_map_acl(int ib_acl,
2223                       u32 *hipz_acl)
2224{
2225        *hipz_acl = 0;
2226        if (ib_acl & IB_ACCESS_REMOTE_READ)
2227                *hipz_acl |= HIPZ_ACCESSCTRL_R_READ;
2228        if (ib_acl & IB_ACCESS_REMOTE_WRITE)
2229                *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE;
2230        if (ib_acl & IB_ACCESS_REMOTE_ATOMIC)
2231                *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC;
2232        if (ib_acl & IB_ACCESS_LOCAL_WRITE)
2233                *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE;
2234        if (ib_acl & IB_ACCESS_MW_BIND)
2235                *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND;
2236} /* end ehca_mrmw_map_acl() */
2237
2238/*----------------------------------------------------------------------*/
2239
2240/* sets page size in hipz access control for MR/MW. */
2241void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/
2242{
2243        *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24);
2244} /* end ehca_mrmw_set_pgsize_hipz_acl() */
2245
2246/*----------------------------------------------------------------------*/
2247
2248/*
2249 * reverse map access control for MR/MW.
2250 * This routine is used for MR and MW.
2251 */
2252void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
2253                               int *ib_acl) /*OUT*/
2254{
2255        *ib_acl = 0;
2256        if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ)
2257                *ib_acl |= IB_ACCESS_REMOTE_READ;
2258        if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE)
2259                *ib_acl |= IB_ACCESS_REMOTE_WRITE;
2260        if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC)
2261                *ib_acl |= IB_ACCESS_REMOTE_ATOMIC;
2262        if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE)
2263                *ib_acl |= IB_ACCESS_LOCAL_WRITE;
2264        if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND)
2265                *ib_acl |= IB_ACCESS_MW_BIND;
2266} /* end ehca_mrmw_reverse_map_acl() */
2267
2268
2269/*----------------------------------------------------------------------*/
2270
2271/*
2272 * MR destructor and constructor
2273 * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
2274 * except struct ib_mr and spinlock
2275 */
2276void ehca_mr_deletenew(struct ehca_mr *mr)
2277{
2278        mr->flags = 0;
2279        mr->num_kpages = 0;
2280        mr->num_hwpages = 0;
2281        mr->acl = 0;
2282        mr->start = NULL;
2283        mr->fmr_page_size = 0;
2284        mr->fmr_max_pages = 0;
2285        mr->fmr_max_maps = 0;
2286        mr->fmr_map_cnt = 0;
2287        memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
2288        memset(&mr->galpas, 0, sizeof(mr->galpas));
2289} /* end ehca_mr_deletenew() */
2290
2291int ehca_init_mrmw_cache(void)
2292{
2293        mr_cache = kmem_cache_create("ehca_cache_mr",
2294                                     sizeof(struct ehca_mr), 0,
2295                                     SLAB_HWCACHE_ALIGN,
2296                                     NULL);
2297        if (!mr_cache)
2298                return -ENOMEM;
2299        mw_cache = kmem_cache_create("ehca_cache_mw",
2300                                     sizeof(struct ehca_mw), 0,
2301                                     SLAB_HWCACHE_ALIGN,
2302                                     NULL);
2303        if (!mw_cache) {
2304                kmem_cache_destroy(mr_cache);
2305                mr_cache = NULL;
2306                return -ENOMEM;
2307        }
2308        return 0;
2309}
2310
2311void ehca_cleanup_mrmw_cache(void)
2312{
2313        if (mr_cache)
2314                kmem_cache_destroy(mr_cache);
2315        if (mw_cache)
2316                kmem_cache_destroy(mw_cache);
2317}
2318
2319static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap,
2320                                     int dir)
2321{
2322        if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) {
2323                ehca_top_bmap->dir[dir] =
2324                        kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL);
2325                if (!ehca_top_bmap->dir[dir])
2326                        return -ENOMEM;
2327                /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2328                memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE);
2329        }
2330        return 0;
2331}
2332
2333static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir)
2334{
2335        if (!ehca_bmap_valid(ehca_bmap->top[top])) {
2336                ehca_bmap->top[top] =
2337                        kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL);
2338                if (!ehca_bmap->top[top])
2339                        return -ENOMEM;
2340                /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2341                memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE);
2342        }
2343        return ehca_init_top_bmap(ehca_bmap->top[top], dir);
2344}
2345
2346static inline int ehca_calc_index(unsigned long i, unsigned long s)
2347{
2348        return (i >> s) & EHCA_INDEX_MASK;
2349}
2350
2351void ehca_destroy_busmap(void)
2352{
2353        int top, dir;
2354
2355        if (!ehca_bmap)
2356                return;
2357
2358        for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
2359                if (!ehca_bmap_valid(ehca_bmap->top[top]))
2360                        continue;
2361                for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) {
2362                        if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
2363                                continue;
2364
2365                        kfree(ehca_bmap->top[top]->dir[dir]);
2366                }
2367
2368                kfree(ehca_bmap->top[top]);
2369        }
2370
2371        kfree(ehca_bmap);
2372        ehca_bmap = NULL;
2373}
2374
2375static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages)
2376{
2377        unsigned long i, start_section, end_section;
2378        int top, dir, idx;
2379
2380        if (!nr_pages)
2381                return 0;
2382
2383        if (!ehca_bmap) {
2384                ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL);
2385                if (!ehca_bmap)
2386                        return -ENOMEM;
2387                /* Set map block to 0xFF according to EHCA_INVAL_ADDR */
2388                memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE);
2389        }
2390
2391        start_section = phys_to_abs(pfn * PAGE_SIZE) / EHCA_SECTSIZE;
2392        end_section = phys_to_abs((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE;
2393        for (i = start_section; i < end_section; i++) {
2394                int ret;
2395                top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT);
2396                dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT);
2397                idx = i & EHCA_INDEX_MASK;
2398
2399                ret = ehca_init_bmap(ehca_bmap, top, dir);
2400                if (ret) {
2401                        ehca_destroy_busmap();
2402                        return ret;
2403                }
2404                ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len;
2405                ehca_mr_len += EHCA_SECTSIZE;
2406        }
2407        return 0;
2408}
2409
2410static int ehca_is_hugepage(unsigned long pfn)
2411{
2412        int page_order;
2413
2414        if (pfn & EHCA_HUGEPAGE_PFN_MASK)
2415                return 0;
2416
2417        page_order = compound_order(pfn_to_page(pfn));
2418        if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT)
2419                return 0;
2420
2421        return 1;
2422}
2423
2424static int ehca_create_busmap_callback(unsigned long initial_pfn,
2425                                       unsigned long total_nr_pages, void *arg)
2426{
2427        int ret;
2428        unsigned long pfn, start_pfn, end_pfn, nr_pages;
2429
2430        if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE)
2431                return ehca_update_busmap(initial_pfn, total_nr_pages);
2432
2433        /* Given chunk is >= 16GB -> check for hugepages */
2434        start_pfn = initial_pfn;
2435        end_pfn = initial_pfn + total_nr_pages;
2436        pfn = start_pfn;
2437
2438        while (pfn < end_pfn) {
2439                if (ehca_is_hugepage(pfn)) {
2440                        /* Add mem found in front of the hugepage */
2441                        nr_pages = pfn - start_pfn;
2442                        ret = ehca_update_busmap(start_pfn, nr_pages);
2443                        if (ret)
2444                                return ret;
2445                        /* Skip the hugepage */
2446                        pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE);
2447                        start_pfn = pfn;
2448                } else
2449                        pfn += (EHCA_SECTSIZE / PAGE_SIZE);
2450        }
2451
2452        /* Add mem found behind the hugepage(s)  */
2453        nr_pages = pfn - start_pfn;
2454        return ehca_update_busmap(start_pfn, nr_pages);
2455}
2456
2457int ehca_create_busmap(void)
2458{
2459        int ret;
2460
2461        ehca_mr_len = 0;
2462        ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
2463                                   ehca_create_busmap_callback);
2464        return ret;
2465}
2466
2467static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca,
2468                                   struct ehca_mr *e_mr,
2469                                   struct ehca_mr_pginfo *pginfo)
2470{
2471        int top;
2472        u64 hret, *kpage;
2473
2474        kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
2475        if (!kpage) {
2476                ehca_err(&shca->ib_device, "kpage alloc failed");
2477                return -ENOMEM;
2478        }
2479        for (top = 0; top < EHCA_MAP_ENTRIES; top++) {
2480                if (!ehca_bmap_valid(ehca_bmap->top[top]))
2481                        continue;
2482                hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo);
2483                if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS))
2484                        break;
2485        }
2486
2487        ehca_free_fw_ctrlblock(kpage);
2488
2489        if (hret == H_SUCCESS)
2490                return 0; /* Everything is fine */
2491        else {
2492                ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, "
2493                                 "h_ret=%lli e_mr=%p top=%x lkey=%x "
2494                                 "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top,
2495                                 e_mr->ib.ib_mr.lkey,
2496                                 shca->ipz_hca_handle.handle,
2497                                 e_mr->ipz_mr_handle.handle);
2498                return ehca2ib_return_code(hret);
2499        }
2500}
2501
2502static u64 ehca_map_vaddr(void *caddr)
2503{
2504        int top, dir, idx;
2505        unsigned long abs_addr, offset;
2506        u64 entry;
2507
2508        if (!ehca_bmap)
2509                return EHCA_INVAL_ADDR;
2510
2511        abs_addr = virt_to_abs(caddr);
2512        top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT);
2513        if (!ehca_bmap_valid(ehca_bmap->top[top]))
2514                return EHCA_INVAL_ADDR;
2515
2516        dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT);
2517        if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]))
2518                return EHCA_INVAL_ADDR;
2519
2520        idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT);
2521
2522        entry = ehca_bmap->top[top]->dir[dir]->ent[idx];
2523        if (ehca_bmap_valid(entry)) {
2524                offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1);
2525                return entry | offset;
2526        } else
2527                return EHCA_INVAL_ADDR;
2528}
2529
2530static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
2531{
2532        return dma_addr == EHCA_INVAL_ADDR;
2533}
2534
2535static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr,
2536                               size_t size, enum dma_data_direction direction)
2537{
2538        if (cpu_addr)
2539                return ehca_map_vaddr(cpu_addr);
2540        else
2541                return EHCA_INVAL_ADDR;
2542}
2543
2544static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
2545                                  enum dma_data_direction direction)
2546{
2547        /* This is only a stub; nothing to be done here */
2548}
2549
2550static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page,
2551                             unsigned long offset, size_t size,
2552                             enum dma_data_direction direction)
2553{
2554        u64 addr;
2555
2556        if (offset + size > PAGE_SIZE)
2557                return EHCA_INVAL_ADDR;
2558
2559        addr = ehca_map_vaddr(page_address(page));
2560        if (!ehca_dma_mapping_error(dev, addr))
2561                addr += offset;
2562
2563        return addr;
2564}
2565
2566static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
2567                                enum dma_data_direction direction)
2568{
2569        /* This is only a stub; nothing to be done here */
2570}
2571
2572static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl,
2573                           int nents, enum dma_data_direction direction)
2574{
2575        struct scatterlist *sg;
2576        int i;
2577
2578        for_each_sg(sgl, sg, nents, i) {
2579                u64 addr;
2580                addr = ehca_map_vaddr(sg_virt(sg));
2581                if (ehca_dma_mapping_error(dev, addr))
2582                        return 0;
2583
2584                sg->dma_address = addr;
2585                sg->dma_length = sg->length;
2586        }
2587        return nents;
2588}
2589
2590static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
2591                              int nents, enum dma_data_direction direction)
2592{
2593        /* This is only a stub; nothing to be done here */
2594}
2595
2596static u64 ehca_dma_address(struct ib_device *dev, struct scatterlist *sg)
2597{
2598        return sg->dma_address;
2599}
2600
2601static unsigned int ehca_dma_len(struct ib_device *dev, struct scatterlist *sg)
2602{
2603        return sg->length;
2604}
2605
2606static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr,
2607                                         size_t size,
2608                                         enum dma_data_direction dir)
2609{
2610        dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
2611}
2612
2613static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr,
2614                                            size_t size,
2615                                            enum dma_data_direction dir)
2616{
2617        dma_sync_single_for_device(dev->dma_device, addr, size, dir);
2618}
2619
2620static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size,
2621                                     u64 *dma_handle, gfp_t flag)
2622{
2623        struct page *p;
2624        void *addr = NULL;
2625        u64 dma_addr;
2626
2627        p = alloc_pages(flag, get_order(size));
2628        if (p) {
2629                addr = page_address(p);
2630                dma_addr = ehca_map_vaddr(addr);
2631                if (ehca_dma_mapping_error(dev, dma_addr)) {
2632                        free_pages((unsigned long)addr, get_order(size));
2633                        return NULL;
2634                }
2635                if (dma_handle)
2636                        *dma_handle = dma_addr;
2637                return addr;
2638        }
2639        return NULL;
2640}
2641
2642static void ehca_dma_free_coherent(struct ib_device *dev, size_t size,
2643                                   void *cpu_addr, u64 dma_handle)
2644{
2645        if (cpu_addr && size)
2646                free_pages((unsigned long)cpu_addr, get_order(size));
2647}
2648
2649
2650struct ib_dma_mapping_ops ehca_dma_mapping_ops = {
2651        .mapping_error          = ehca_dma_mapping_error,
2652        .map_single             = ehca_dma_map_single,
2653        .unmap_single           = ehca_dma_unmap_single,
2654        .map_page               = ehca_dma_map_page,
2655        .unmap_page             = ehca_dma_unmap_page,
2656        .map_sg                 = ehca_dma_map_sg,
2657        .unmap_sg               = ehca_dma_unmap_sg,
2658        .dma_address            = ehca_dma_address,
2659        .dma_len                = ehca_dma_len,
2660        .sync_single_for_cpu    = ehca_dma_sync_single_for_cpu,
2661        .sync_single_for_device = ehca_dma_sync_single_for_device,
2662        .alloc_coherent         = ehca_dma_alloc_coherent,
2663        .free_coherent          = ehca_dma_free_coherent,
2664};
2665