linux/drivers/infiniband/hw/efa/efa_verbs.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
   4 */
   5
   6#include <linux/vmalloc.h>
   7
   8#include <rdma/ib_addr.h>
   9#include <rdma/ib_umem.h>
  10#include <rdma/ib_user_verbs.h>
  11#include <rdma/ib_verbs.h>
  12#include <rdma/uverbs_ioctl.h>
  13
  14#include "efa.h"
  15
  16#define EFA_MMAP_FLAG_SHIFT 56
  17#define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0)
  18#define EFA_MMAP_INVALID U64_MAX
  19
  20enum {
  21        EFA_MMAP_DMA_PAGE = 0,
  22        EFA_MMAP_IO_WC,
  23        EFA_MMAP_IO_NC,
  24};
  25
  26#define EFA_AENQ_ENABLED_GROUPS \
  27        (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
  28         BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
  29
  30struct efa_mmap_entry {
  31        void  *obj;
  32        u64 address;
  33        u64 length;
  34        u32 mmap_page;
  35        u8 mmap_flag;
  36};
  37
  38static inline u64 get_mmap_key(const struct efa_mmap_entry *efa)
  39{
  40        return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) |
  41               ((u64)efa->mmap_page << PAGE_SHIFT);
  42}
  43
  44#define EFA_CHUNK_PAYLOAD_SHIFT       12
  45#define EFA_CHUNK_PAYLOAD_SIZE        BIT(EFA_CHUNK_PAYLOAD_SHIFT)
  46#define EFA_CHUNK_PAYLOAD_PTR_SIZE    8
  47
  48#define EFA_CHUNK_SHIFT               12
  49#define EFA_CHUNK_SIZE                BIT(EFA_CHUNK_SHIFT)
  50#define EFA_CHUNK_PTR_SIZE            sizeof(struct efa_com_ctrl_buff_info)
  51
  52#define EFA_PTRS_PER_CHUNK \
  53        ((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE)
  54
  55#define EFA_CHUNK_USED_SIZE \
  56        ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE)
  57
  58#define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE
  59
  60struct pbl_chunk {
  61        dma_addr_t dma_addr;
  62        u64 *buf;
  63        u32 length;
  64};
  65
  66struct pbl_chunk_list {
  67        struct pbl_chunk *chunks;
  68        unsigned int size;
  69};
  70
  71struct pbl_context {
  72        union {
  73                struct {
  74                        dma_addr_t dma_addr;
  75                } continuous;
  76                struct {
  77                        u32 pbl_buf_size_in_pages;
  78                        struct scatterlist *sgl;
  79                        int sg_dma_cnt;
  80                        struct pbl_chunk_list chunk_list;
  81                } indirect;
  82        } phys;
  83        u64 *pbl_buf;
  84        u32 pbl_buf_size_in_bytes;
  85        u8 physically_continuous;
  86};
  87
  88static inline struct efa_dev *to_edev(struct ib_device *ibdev)
  89{
  90        return container_of(ibdev, struct efa_dev, ibdev);
  91}
  92
  93static inline struct efa_ucontext *to_eucontext(struct ib_ucontext *ibucontext)
  94{
  95        return container_of(ibucontext, struct efa_ucontext, ibucontext);
  96}
  97
  98static inline struct efa_pd *to_epd(struct ib_pd *ibpd)
  99{
 100        return container_of(ibpd, struct efa_pd, ibpd);
 101}
 102
 103static inline struct efa_mr *to_emr(struct ib_mr *ibmr)
 104{
 105        return container_of(ibmr, struct efa_mr, ibmr);
 106}
 107
 108static inline struct efa_qp *to_eqp(struct ib_qp *ibqp)
 109{
 110        return container_of(ibqp, struct efa_qp, ibqp);
 111}
 112
 113static inline struct efa_cq *to_ecq(struct ib_cq *ibcq)
 114{
 115        return container_of(ibcq, struct efa_cq, ibcq);
 116}
 117
 118static inline struct efa_ah *to_eah(struct ib_ah *ibah)
 119{
 120        return container_of(ibah, struct efa_ah, ibah);
 121}
 122
 123#define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \
 124                                 sizeof(((typeof(x) *)0)->fld) <= (sz))
 125
 126#define is_reserved_cleared(reserved) \
 127        !memchr_inv(reserved, 0, sizeof(reserved))
 128
 129static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr,
 130                               size_t size, enum dma_data_direction dir)
 131{
 132        void *addr;
 133
 134        addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
 135        if (!addr)
 136                return NULL;
 137
 138        *dma_addr = dma_map_single(&dev->pdev->dev, addr, size, dir);
 139        if (dma_mapping_error(&dev->pdev->dev, *dma_addr)) {
 140                ibdev_err(&dev->ibdev, "Failed to map DMA address\n");
 141                free_pages_exact(addr, size);
 142                return NULL;
 143        }
 144
 145        return addr;
 146}
 147
 148/*
 149 * This is only called when the ucontext is destroyed and there can be no
 150 * concurrent query via mmap or allocate on the xarray, thus we can be sure no
 151 * other thread is using the entry pointer. We also know that all the BAR
 152 * pages have either been zap'd or munmaped at this point.  Normal pages are
 153 * refcounted and will be freed at the proper time.
 154 */
 155static void mmap_entries_remove_free(struct efa_dev *dev,
 156                                     struct efa_ucontext *ucontext)
 157{
 158        struct efa_mmap_entry *entry;
 159        unsigned long mmap_page;
 160
 161        xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
 162                xa_erase(&ucontext->mmap_xa, mmap_page);
 163
 164                ibdev_dbg(
 165                        &dev->ibdev,
 166                        "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
 167                        entry->obj, get_mmap_key(entry), entry->address,
 168                        entry->length);
 169                if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
 170                        /* DMA mapping is already gone, now free the pages */
 171                        free_pages_exact(phys_to_virt(entry->address),
 172                                         entry->length);
 173                kfree(entry);
 174        }
 175}
 176
 177static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev,
 178                                             struct efa_ucontext *ucontext,
 179                                             u64 key, u64 len)
 180{
 181        struct efa_mmap_entry *entry;
 182        u64 mmap_page;
 183
 184        mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT;
 185        if (mmap_page > U32_MAX)
 186                return NULL;
 187
 188        entry = xa_load(&ucontext->mmap_xa, mmap_page);
 189        if (!entry || get_mmap_key(entry) != key || entry->length != len)
 190                return NULL;
 191
 192        ibdev_dbg(&dev->ibdev,
 193                  "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
 194                  entry->obj, key, entry->address, entry->length);
 195
 196        return entry;
 197}
 198
 199/*
 200 * Note this locking scheme cannot support removal of entries, except during
 201 * ucontext destruction when the core code guarentees no concurrency.
 202 */
 203static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext,
 204                             void *obj, u64 address, u64 length, u8 mmap_flag)
 205{
 206        struct efa_mmap_entry *entry;
 207        u32 next_mmap_page;
 208        int err;
 209
 210        entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 211        if (!entry)
 212                return EFA_MMAP_INVALID;
 213
 214        entry->obj = obj;
 215        entry->address = address;
 216        entry->length = length;
 217        entry->mmap_flag = mmap_flag;
 218
 219        xa_lock(&ucontext->mmap_xa);
 220        if (check_add_overflow(ucontext->mmap_xa_page,
 221                               (u32)(length >> PAGE_SHIFT),
 222                               &next_mmap_page))
 223                goto err_unlock;
 224
 225        entry->mmap_page = ucontext->mmap_xa_page;
 226        ucontext->mmap_xa_page = next_mmap_page;
 227        err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry,
 228                          GFP_KERNEL);
 229        if (err)
 230                goto err_unlock;
 231
 232        xa_unlock(&ucontext->mmap_xa);
 233
 234        ibdev_dbg(
 235                &dev->ibdev,
 236                "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n",
 237                entry->obj, entry->address, entry->length, get_mmap_key(entry));
 238
 239        return get_mmap_key(entry);
 240
 241err_unlock:
 242        xa_unlock(&ucontext->mmap_xa);
 243        kfree(entry);
 244        return EFA_MMAP_INVALID;
 245
 246}
 247
 248int efa_query_device(struct ib_device *ibdev,
 249                     struct ib_device_attr *props,
 250                     struct ib_udata *udata)
 251{
 252        struct efa_com_get_device_attr_result *dev_attr;
 253        struct efa_ibv_ex_query_device_resp resp = {};
 254        struct efa_dev *dev = to_edev(ibdev);
 255        int err;
 256
 257        if (udata && udata->inlen &&
 258            !ib_is_udata_cleared(udata, 0, udata->inlen)) {
 259                ibdev_dbg(ibdev,
 260                          "Incompatible ABI params, udata not cleared\n");
 261                return -EINVAL;
 262        }
 263
 264        dev_attr = &dev->dev_attr;
 265
 266        memset(props, 0, sizeof(*props));
 267        props->max_mr_size = dev_attr->max_mr_pages * PAGE_SIZE;
 268        props->page_size_cap = dev_attr->page_size_cap;
 269        props->vendor_id = dev->pdev->vendor;
 270        props->vendor_part_id = dev->pdev->device;
 271        props->hw_ver = dev->pdev->subsystem_device;
 272        props->max_qp = dev_attr->max_qp;
 273        props->max_cq = dev_attr->max_cq;
 274        props->max_pd = dev_attr->max_pd;
 275        props->max_mr = dev_attr->max_mr;
 276        props->max_ah = dev_attr->max_ah;
 277        props->max_cqe = dev_attr->max_cq_depth;
 278        props->max_qp_wr = min_t(u32, dev_attr->max_sq_depth,
 279                                 dev_attr->max_rq_depth);
 280        props->max_send_sge = dev_attr->max_sq_sge;
 281        props->max_recv_sge = dev_attr->max_rq_sge;
 282
 283        if (udata && udata->outlen) {
 284                resp.max_sq_sge = dev_attr->max_sq_sge;
 285                resp.max_rq_sge = dev_attr->max_rq_sge;
 286                resp.max_sq_wr = dev_attr->max_sq_depth;
 287                resp.max_rq_wr = dev_attr->max_rq_depth;
 288
 289                err = ib_copy_to_udata(udata, &resp,
 290                                       min(sizeof(resp), udata->outlen));
 291                if (err) {
 292                        ibdev_dbg(ibdev,
 293                                  "Failed to copy udata for query_device\n");
 294                        return err;
 295                }
 296        }
 297
 298        return 0;
 299}
 300
 301int efa_query_port(struct ib_device *ibdev, u8 port,
 302                   struct ib_port_attr *props)
 303{
 304        struct efa_dev *dev = to_edev(ibdev);
 305
 306        props->lmc = 1;
 307
 308        props->state = IB_PORT_ACTIVE;
 309        props->phys_state = 5;
 310        props->gid_tbl_len = 1;
 311        props->pkey_tbl_len = 1;
 312        props->active_speed = IB_SPEED_EDR;
 313        props->active_width = IB_WIDTH_4X;
 314        props->max_mtu = ib_mtu_int_to_enum(dev->mtu);
 315        props->active_mtu = ib_mtu_int_to_enum(dev->mtu);
 316        props->max_msg_sz = dev->mtu;
 317        props->max_vl_num = 1;
 318
 319        return 0;
 320}
 321
 322int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 323                 int qp_attr_mask,
 324                 struct ib_qp_init_attr *qp_init_attr)
 325{
 326        struct efa_dev *dev = to_edev(ibqp->device);
 327        struct efa_com_query_qp_params params = {};
 328        struct efa_com_query_qp_result result;
 329        struct efa_qp *qp = to_eqp(ibqp);
 330        int err;
 331
 332#define EFA_QUERY_QP_SUPP_MASK \
 333        (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \
 334         IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP)
 335
 336        if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) {
 337                ibdev_dbg(&dev->ibdev,
 338                          "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
 339                          qp_attr_mask, EFA_QUERY_QP_SUPP_MASK);
 340                return -EOPNOTSUPP;
 341        }
 342
 343        memset(qp_attr, 0, sizeof(*qp_attr));
 344        memset(qp_init_attr, 0, sizeof(*qp_init_attr));
 345
 346        params.qp_handle = qp->qp_handle;
 347        err = efa_com_query_qp(&dev->edev, &params, &result);
 348        if (err)
 349                return err;
 350
 351        qp_attr->qp_state = result.qp_state;
 352        qp_attr->qkey = result.qkey;
 353        qp_attr->sq_psn = result.sq_psn;
 354        qp_attr->sq_draining = result.sq_draining;
 355        qp_attr->port_num = 1;
 356
 357        qp_attr->cap.max_send_wr = qp->max_send_wr;
 358        qp_attr->cap.max_recv_wr = qp->max_recv_wr;
 359        qp_attr->cap.max_send_sge = qp->max_send_sge;
 360        qp_attr->cap.max_recv_sge = qp->max_recv_sge;
 361        qp_attr->cap.max_inline_data = qp->max_inline_data;
 362
 363        qp_init_attr->qp_type = ibqp->qp_type;
 364        qp_init_attr->recv_cq = ibqp->recv_cq;
 365        qp_init_attr->send_cq = ibqp->send_cq;
 366        qp_init_attr->qp_context = ibqp->qp_context;
 367        qp_init_attr->cap = qp_attr->cap;
 368
 369        return 0;
 370}
 371
 372int efa_query_gid(struct ib_device *ibdev, u8 port, int index,
 373                  union ib_gid *gid)
 374{
 375        struct efa_dev *dev = to_edev(ibdev);
 376
 377        memcpy(gid->raw, dev->addr, sizeof(dev->addr));
 378
 379        return 0;
 380}
 381
 382int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
 383                   u16 *pkey)
 384{
 385        if (index > 0)
 386                return -EINVAL;
 387
 388        *pkey = 0xffff;
 389        return 0;
 390}
 391
 392static int efa_pd_dealloc(struct efa_dev *dev, u16 pdn)
 393{
 394        struct efa_com_dealloc_pd_params params = {
 395                .pdn = pdn,
 396        };
 397
 398        return efa_com_dealloc_pd(&dev->edev, &params);
 399}
 400
 401int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 402{
 403        struct efa_dev *dev = to_edev(ibpd->device);
 404        struct efa_ibv_alloc_pd_resp resp = {};
 405        struct efa_com_alloc_pd_result result;
 406        struct efa_pd *pd = to_epd(ibpd);
 407        int err;
 408
 409        if (udata->inlen &&
 410            !ib_is_udata_cleared(udata, 0, udata->inlen)) {
 411                ibdev_dbg(&dev->ibdev,
 412                          "Incompatible ABI params, udata not cleared\n");
 413                err = -EINVAL;
 414                goto err_out;
 415        }
 416
 417        err = efa_com_alloc_pd(&dev->edev, &result);
 418        if (err)
 419                goto err_out;
 420
 421        pd->pdn = result.pdn;
 422        resp.pdn = result.pdn;
 423
 424        if (udata->outlen) {
 425                err = ib_copy_to_udata(udata, &resp,
 426                                       min(sizeof(resp), udata->outlen));
 427                if (err) {
 428                        ibdev_dbg(&dev->ibdev,
 429                                  "Failed to copy udata for alloc_pd\n");
 430                        goto err_dealloc_pd;
 431                }
 432        }
 433
 434        ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn);
 435
 436        return 0;
 437
 438err_dealloc_pd:
 439        efa_pd_dealloc(dev, result.pdn);
 440err_out:
 441        atomic64_inc(&dev->stats.sw_stats.alloc_pd_err);
 442        return err;
 443}
 444
 445void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 446{
 447        struct efa_dev *dev = to_edev(ibpd->device);
 448        struct efa_pd *pd = to_epd(ibpd);
 449
 450        if (udata->inlen &&
 451            !ib_is_udata_cleared(udata, 0, udata->inlen)) {
 452                ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
 453                return;
 454        }
 455
 456        ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn);
 457        efa_pd_dealloc(dev, pd->pdn);
 458}
 459
 460static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
 461{
 462        struct efa_com_destroy_qp_params params = { .qp_handle = qp_handle };
 463
 464        return efa_com_destroy_qp(&dev->edev, &params);
 465}
 466
 467int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 468{
 469        struct efa_dev *dev = to_edev(ibqp->pd->device);
 470        struct efa_qp *qp = to_eqp(ibqp);
 471        int err;
 472
 473        if (udata->inlen &&
 474            !ib_is_udata_cleared(udata, 0, udata->inlen)) {
 475                ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
 476                return -EINVAL;
 477        }
 478
 479        ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num);
 480        err = efa_destroy_qp_handle(dev, qp->qp_handle);
 481        if (err)
 482                return err;
 483
 484        if (qp->rq_cpu_addr) {
 485                ibdev_dbg(&dev->ibdev,
 486                          "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n",
 487                          qp->rq_cpu_addr, qp->rq_size,
 488                          &qp->rq_dma_addr);
 489                dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
 490                                 DMA_TO_DEVICE);
 491        }
 492
 493        kfree(qp);
 494        return 0;
 495}
 496
 497static int qp_mmap_entries_setup(struct efa_qp *qp,
 498                                 struct efa_dev *dev,
 499                                 struct efa_ucontext *ucontext,
 500                                 struct efa_com_create_qp_params *params,
 501                                 struct efa_ibv_create_qp_resp *resp)
 502{
 503        /*
 504         * Once an entry is inserted it might be mmapped, hence cannot be
 505         * cleaned up until dealloc_ucontext.
 506         */
 507        resp->sq_db_mmap_key =
 508                mmap_entry_insert(dev, ucontext, qp,
 509                                  dev->db_bar_addr + resp->sq_db_offset,
 510                                  PAGE_SIZE, EFA_MMAP_IO_NC);
 511        if (resp->sq_db_mmap_key == EFA_MMAP_INVALID)
 512                return -ENOMEM;
 513
 514        resp->sq_db_offset &= ~PAGE_MASK;
 515
 516        resp->llq_desc_mmap_key =
 517                mmap_entry_insert(dev, ucontext, qp,
 518                                  dev->mem_bar_addr + resp->llq_desc_offset,
 519                                  PAGE_ALIGN(params->sq_ring_size_in_bytes +
 520                                             (resp->llq_desc_offset & ~PAGE_MASK)),
 521                                  EFA_MMAP_IO_WC);
 522        if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID)
 523                return -ENOMEM;
 524
 525        resp->llq_desc_offset &= ~PAGE_MASK;
 526
 527        if (qp->rq_size) {
 528                resp->rq_db_mmap_key =
 529                        mmap_entry_insert(dev, ucontext, qp,
 530                                          dev->db_bar_addr + resp->rq_db_offset,
 531                                          PAGE_SIZE, EFA_MMAP_IO_NC);
 532                if (resp->rq_db_mmap_key == EFA_MMAP_INVALID)
 533                        return -ENOMEM;
 534
 535                resp->rq_db_offset &= ~PAGE_MASK;
 536
 537                resp->rq_mmap_key =
 538                        mmap_entry_insert(dev, ucontext, qp,
 539                                          virt_to_phys(qp->rq_cpu_addr),
 540                                          qp->rq_size, EFA_MMAP_DMA_PAGE);
 541                if (resp->rq_mmap_key == EFA_MMAP_INVALID)
 542                        return -ENOMEM;
 543
 544                resp->rq_mmap_size = qp->rq_size;
 545        }
 546
 547        return 0;
 548}
 549
 550static int efa_qp_validate_cap(struct efa_dev *dev,
 551                               struct ib_qp_init_attr *init_attr)
 552{
 553        if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) {
 554                ibdev_dbg(&dev->ibdev,
 555                          "qp: requested send wr[%u] exceeds the max[%u]\n",
 556                          init_attr->cap.max_send_wr,
 557                          dev->dev_attr.max_sq_depth);
 558                return -EINVAL;
 559        }
 560        if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) {
 561                ibdev_dbg(&dev->ibdev,
 562                          "qp: requested receive wr[%u] exceeds the max[%u]\n",
 563                          init_attr->cap.max_recv_wr,
 564                          dev->dev_attr.max_rq_depth);
 565                return -EINVAL;
 566        }
 567        if (init_attr->cap.max_send_sge > dev->dev_attr.max_sq_sge) {
 568                ibdev_dbg(&dev->ibdev,
 569                          "qp: requested sge send[%u] exceeds the max[%u]\n",
 570                          init_attr->cap.max_send_sge, dev->dev_attr.max_sq_sge);
 571                return -EINVAL;
 572        }
 573        if (init_attr->cap.max_recv_sge > dev->dev_attr.max_rq_sge) {
 574                ibdev_dbg(&dev->ibdev,
 575                          "qp: requested sge recv[%u] exceeds the max[%u]\n",
 576                          init_attr->cap.max_recv_sge, dev->dev_attr.max_rq_sge);
 577                return -EINVAL;
 578        }
 579        if (init_attr->cap.max_inline_data > dev->dev_attr.inline_buf_size) {
 580                ibdev_dbg(&dev->ibdev,
 581                          "qp: requested inline data[%u] exceeds the max[%u]\n",
 582                          init_attr->cap.max_inline_data,
 583                          dev->dev_attr.inline_buf_size);
 584                return -EINVAL;
 585        }
 586
 587        return 0;
 588}
 589
 590static int efa_qp_validate_attr(struct efa_dev *dev,
 591                                struct ib_qp_init_attr *init_attr)
 592{
 593        if (init_attr->qp_type != IB_QPT_DRIVER &&
 594            init_attr->qp_type != IB_QPT_UD) {
 595                ibdev_dbg(&dev->ibdev,
 596                          "Unsupported qp type %d\n", init_attr->qp_type);
 597                return -EOPNOTSUPP;
 598        }
 599
 600        if (init_attr->srq) {
 601                ibdev_dbg(&dev->ibdev, "SRQ is not supported\n");
 602                return -EOPNOTSUPP;
 603        }
 604
 605        if (init_attr->create_flags) {
 606                ibdev_dbg(&dev->ibdev, "Unsupported create flags\n");
 607                return -EOPNOTSUPP;
 608        }
 609
 610        return 0;
 611}
 612
 613struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
 614                            struct ib_qp_init_attr *init_attr,
 615                            struct ib_udata *udata)
 616{
 617        struct efa_com_create_qp_params create_qp_params = {};
 618        struct efa_com_create_qp_result create_qp_resp;
 619        struct efa_dev *dev = to_edev(ibpd->device);
 620        struct efa_ibv_create_qp_resp resp = {};
 621        struct efa_ibv_create_qp cmd = {};
 622        bool rq_entry_inserted = false;
 623        struct efa_ucontext *ucontext;
 624        struct efa_qp *qp;
 625        int err;
 626
 627        ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext,
 628                                             ibucontext);
 629
 630        err = efa_qp_validate_cap(dev, init_attr);
 631        if (err)
 632                goto err_out;
 633
 634        err = efa_qp_validate_attr(dev, init_attr);
 635        if (err)
 636                goto err_out;
 637
 638        if (!field_avail(cmd, driver_qp_type, udata->inlen)) {
 639                ibdev_dbg(&dev->ibdev,
 640                          "Incompatible ABI params, no input udata\n");
 641                err = -EINVAL;
 642                goto err_out;
 643        }
 644
 645        if (udata->inlen > sizeof(cmd) &&
 646            !ib_is_udata_cleared(udata, sizeof(cmd),
 647                                 udata->inlen - sizeof(cmd))) {
 648                ibdev_dbg(&dev->ibdev,
 649                          "Incompatible ABI params, unknown fields in udata\n");
 650                err = -EINVAL;
 651                goto err_out;
 652        }
 653
 654        err = ib_copy_from_udata(&cmd, udata,
 655                                 min(sizeof(cmd), udata->inlen));
 656        if (err) {
 657                ibdev_dbg(&dev->ibdev,
 658                          "Cannot copy udata for create_qp\n");
 659                goto err_out;
 660        }
 661
 662        if (cmd.comp_mask) {
 663                ibdev_dbg(&dev->ibdev,
 664                          "Incompatible ABI params, unknown fields in udata\n");
 665                err = -EINVAL;
 666                goto err_out;
 667        }
 668
 669        qp = kzalloc(sizeof(*qp), GFP_KERNEL);
 670        if (!qp) {
 671                err = -ENOMEM;
 672                goto err_out;
 673        }
 674
 675        create_qp_params.uarn = ucontext->uarn;
 676        create_qp_params.pd = to_epd(ibpd)->pdn;
 677
 678        if (init_attr->qp_type == IB_QPT_UD) {
 679                create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD;
 680        } else if (cmd.driver_qp_type == EFA_QP_DRIVER_TYPE_SRD) {
 681                create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_SRD;
 682        } else {
 683                ibdev_dbg(&dev->ibdev,
 684                          "Unsupported qp type %d driver qp type %d\n",
 685                          init_attr->qp_type, cmd.driver_qp_type);
 686                err = -EOPNOTSUPP;
 687                goto err_free_qp;
 688        }
 689
 690        ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n",
 691                  init_attr->qp_type, cmd.driver_qp_type);
 692        create_qp_params.send_cq_idx = to_ecq(init_attr->send_cq)->cq_idx;
 693        create_qp_params.recv_cq_idx = to_ecq(init_attr->recv_cq)->cq_idx;
 694        create_qp_params.sq_depth = init_attr->cap.max_send_wr;
 695        create_qp_params.sq_ring_size_in_bytes = cmd.sq_ring_size;
 696
 697        create_qp_params.rq_depth = init_attr->cap.max_recv_wr;
 698        create_qp_params.rq_ring_size_in_bytes = cmd.rq_ring_size;
 699        qp->rq_size = PAGE_ALIGN(create_qp_params.rq_ring_size_in_bytes);
 700        if (qp->rq_size) {
 701                qp->rq_cpu_addr = efa_zalloc_mapped(dev, &qp->rq_dma_addr,
 702                                                    qp->rq_size, DMA_TO_DEVICE);
 703                if (!qp->rq_cpu_addr) {
 704                        err = -ENOMEM;
 705                        goto err_free_qp;
 706                }
 707
 708                ibdev_dbg(&dev->ibdev,
 709                          "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n",
 710                          qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr);
 711                create_qp_params.rq_base_addr = qp->rq_dma_addr;
 712        }
 713
 714        err = efa_com_create_qp(&dev->edev, &create_qp_params,
 715                                &create_qp_resp);
 716        if (err)
 717                goto err_free_mapped;
 718
 719        resp.sq_db_offset = create_qp_resp.sq_db_offset;
 720        resp.rq_db_offset = create_qp_resp.rq_db_offset;
 721        resp.llq_desc_offset = create_qp_resp.llq_descriptors_offset;
 722        resp.send_sub_cq_idx = create_qp_resp.send_sub_cq_idx;
 723        resp.recv_sub_cq_idx = create_qp_resp.recv_sub_cq_idx;
 724
 725        err = qp_mmap_entries_setup(qp, dev, ucontext, &create_qp_params,
 726                                    &resp);
 727        if (err)
 728                goto err_destroy_qp;
 729
 730        rq_entry_inserted = true;
 731        qp->qp_handle = create_qp_resp.qp_handle;
 732        qp->ibqp.qp_num = create_qp_resp.qp_num;
 733        qp->ibqp.qp_type = init_attr->qp_type;
 734        qp->max_send_wr = init_attr->cap.max_send_wr;
 735        qp->max_recv_wr = init_attr->cap.max_recv_wr;
 736        qp->max_send_sge = init_attr->cap.max_send_sge;
 737        qp->max_recv_sge = init_attr->cap.max_recv_sge;
 738        qp->max_inline_data = init_attr->cap.max_inline_data;
 739
 740        if (udata->outlen) {
 741                err = ib_copy_to_udata(udata, &resp,
 742                                       min(sizeof(resp), udata->outlen));
 743                if (err) {
 744                        ibdev_dbg(&dev->ibdev,
 745                                  "Failed to copy udata for qp[%u]\n",
 746                                  create_qp_resp.qp_num);
 747                        goto err_destroy_qp;
 748                }
 749        }
 750
 751        ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num);
 752
 753        return &qp->ibqp;
 754
 755err_destroy_qp:
 756        efa_destroy_qp_handle(dev, create_qp_resp.qp_handle);
 757err_free_mapped:
 758        if (qp->rq_size) {
 759                dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
 760                                 DMA_TO_DEVICE);
 761                if (!rq_entry_inserted)
 762                        free_pages_exact(qp->rq_cpu_addr, qp->rq_size);
 763        }
 764err_free_qp:
 765        kfree(qp);
 766err_out:
 767        atomic64_inc(&dev->stats.sw_stats.create_qp_err);
 768        return ERR_PTR(err);
 769}
 770
 771static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
 772                                  struct ib_qp_attr *qp_attr, int qp_attr_mask,
 773                                  enum ib_qp_state cur_state,
 774                                  enum ib_qp_state new_state)
 775{
 776#define EFA_MODIFY_QP_SUPP_MASK \
 777        (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \
 778         IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN)
 779
 780        if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) {
 781                ibdev_dbg(&dev->ibdev,
 782                          "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
 783                          qp_attr_mask, EFA_MODIFY_QP_SUPP_MASK);
 784                return -EOPNOTSUPP;
 785        }
 786
 787        if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
 788                                qp_attr_mask)) {
 789                ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n");
 790                return -EINVAL;
 791        }
 792
 793        if ((qp_attr_mask & IB_QP_PORT) && qp_attr->port_num != 1) {
 794                ibdev_dbg(&dev->ibdev, "Can't change port num\n");
 795                return -EOPNOTSUPP;
 796        }
 797
 798        if ((qp_attr_mask & IB_QP_PKEY_INDEX) && qp_attr->pkey_index) {
 799                ibdev_dbg(&dev->ibdev, "Can't change pkey index\n");
 800                return -EOPNOTSUPP;
 801        }
 802
 803        return 0;
 804}
 805
 806int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 807                  int qp_attr_mask, struct ib_udata *udata)
 808{
 809        struct efa_dev *dev = to_edev(ibqp->device);
 810        struct efa_com_modify_qp_params params = {};
 811        struct efa_qp *qp = to_eqp(ibqp);
 812        enum ib_qp_state cur_state;
 813        enum ib_qp_state new_state;
 814        int err;
 815
 816        if (udata->inlen &&
 817            !ib_is_udata_cleared(udata, 0, udata->inlen)) {
 818                ibdev_dbg(&dev->ibdev,
 819                          "Incompatible ABI params, udata not cleared\n");
 820                return -EINVAL;
 821        }
 822
 823        cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state :
 824                                                     qp->state;
 825        new_state = qp_attr_mask & IB_QP_STATE ? qp_attr->qp_state : cur_state;
 826
 827        err = efa_modify_qp_validate(dev, qp, qp_attr, qp_attr_mask, cur_state,
 828                                     new_state);
 829        if (err)
 830                return err;
 831
 832        params.qp_handle = qp->qp_handle;
 833
 834        if (qp_attr_mask & IB_QP_STATE) {
 835                params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) |
 836                                      BIT(EFA_ADMIN_CUR_QP_STATE_BIT);
 837                params.cur_qp_state = qp_attr->cur_qp_state;
 838                params.qp_state = qp_attr->qp_state;
 839        }
 840
 841        if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
 842                params.modify_mask |=
 843                        BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT);
 844                params.sq_drained_async_notify = qp_attr->en_sqd_async_notify;
 845        }
 846
 847        if (qp_attr_mask & IB_QP_QKEY) {
 848                params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT);
 849                params.qkey = qp_attr->qkey;
 850        }
 851
 852        if (qp_attr_mask & IB_QP_SQ_PSN) {
 853                params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT);
 854                params.sq_psn = qp_attr->sq_psn;
 855        }
 856
 857        err = efa_com_modify_qp(&dev->edev, &params);
 858        if (err)
 859                return err;
 860
 861        qp->state = new_state;
 862
 863        return 0;
 864}
 865
 866static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx)
 867{
 868        struct efa_com_destroy_cq_params params = { .cq_idx = cq_idx };
 869
 870        return efa_com_destroy_cq(&dev->edev, &params);
 871}
 872
 873int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 874{
 875        struct efa_dev *dev = to_edev(ibcq->device);
 876        struct efa_cq *cq = to_ecq(ibcq);
 877        int err;
 878
 879        if (udata->inlen &&
 880            !ib_is_udata_cleared(udata, 0, udata->inlen)) {
 881                ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
 882                return -EINVAL;
 883        }
 884
 885        ibdev_dbg(&dev->ibdev,
 886                  "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n",
 887                  cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr);
 888
 889        err = efa_destroy_cq_idx(dev, cq->cq_idx);
 890        if (err)
 891                return err;
 892
 893        dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
 894                         DMA_FROM_DEVICE);
 895
 896        kfree(cq);
 897        return 0;
 898}
 899
 900static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
 901                                 struct efa_ibv_create_cq_resp *resp)
 902{
 903        resp->q_mmap_size = cq->size;
 904        resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq,
 905                                             virt_to_phys(cq->cpu_addr),
 906                                             cq->size, EFA_MMAP_DMA_PAGE);
 907        if (resp->q_mmap_key == EFA_MMAP_INVALID)
 908                return -ENOMEM;
 909
 910        return 0;
 911}
 912
 913static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries,
 914                                  int vector, struct ib_ucontext *ibucontext,
 915                                  struct ib_udata *udata)
 916{
 917        struct efa_ibv_create_cq_resp resp = {};
 918        struct efa_com_create_cq_params params;
 919        struct efa_com_create_cq_result result;
 920        struct efa_dev *dev = to_edev(ibdev);
 921        struct efa_ibv_create_cq cmd = {};
 922        bool cq_entry_inserted = false;
 923        struct efa_cq *cq;
 924        int err;
 925
 926        ibdev_dbg(ibdev, "create_cq entries %d\n", entries);
 927
 928        if (entries < 1 || entries > dev->dev_attr.max_cq_depth) {
 929                ibdev_dbg(ibdev,
 930                          "cq: requested entries[%u] non-positive or greater than max[%u]\n",
 931                          entries, dev->dev_attr.max_cq_depth);
 932                err = -EINVAL;
 933                goto err_out;
 934        }
 935
 936        if (!field_avail(cmd, num_sub_cqs, udata->inlen)) {
 937                ibdev_dbg(ibdev,
 938                          "Incompatible ABI params, no input udata\n");
 939                err = -EINVAL;
 940                goto err_out;
 941        }
 942
 943        if (udata->inlen > sizeof(cmd) &&
 944            !ib_is_udata_cleared(udata, sizeof(cmd),
 945                                 udata->inlen - sizeof(cmd))) {
 946                ibdev_dbg(ibdev,
 947                          "Incompatible ABI params, unknown fields in udata\n");
 948                err = -EINVAL;
 949                goto err_out;
 950        }
 951
 952        err = ib_copy_from_udata(&cmd, udata,
 953                                 min(sizeof(cmd), udata->inlen));
 954        if (err) {
 955                ibdev_dbg(ibdev, "Cannot copy udata for create_cq\n");
 956                goto err_out;
 957        }
 958
 959        if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_50)) {
 960                ibdev_dbg(ibdev,
 961                          "Incompatible ABI params, unknown fields in udata\n");
 962                err = -EINVAL;
 963                goto err_out;
 964        }
 965
 966        if (!cmd.cq_entry_size) {
 967                ibdev_dbg(ibdev,
 968                          "Invalid entry size [%u]\n", cmd.cq_entry_size);
 969                err = -EINVAL;
 970                goto err_out;
 971        }
 972
 973        if (cmd.num_sub_cqs != dev->dev_attr.sub_cqs_per_cq) {
 974                ibdev_dbg(ibdev,
 975                          "Invalid number of sub cqs[%u] expected[%u]\n",
 976                          cmd.num_sub_cqs, dev->dev_attr.sub_cqs_per_cq);
 977                err = -EINVAL;
 978                goto err_out;
 979        }
 980
 981        cq = kzalloc(sizeof(*cq), GFP_KERNEL);
 982        if (!cq) {
 983                err = -ENOMEM;
 984                goto err_out;
 985        }
 986
 987        cq->ucontext = to_eucontext(ibucontext);
 988        cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs);
 989        cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size,
 990                                         DMA_FROM_DEVICE);
 991        if (!cq->cpu_addr) {
 992                err = -ENOMEM;
 993                goto err_free_cq;
 994        }
 995
 996        params.uarn = cq->ucontext->uarn;
 997        params.cq_depth = entries;
 998        params.dma_addr = cq->dma_addr;
 999        params.entry_size_in_bytes = cmd.cq_entry_size;
1000        params.num_sub_cqs = cmd.num_sub_cqs;
1001        err = efa_com_create_cq(&dev->edev, &params, &result);
1002        if (err)
1003                goto err_free_mapped;
1004
1005        resp.cq_idx = result.cq_idx;
1006        cq->cq_idx = result.cq_idx;
1007        cq->ibcq.cqe = result.actual_depth;
1008        WARN_ON_ONCE(entries != result.actual_depth);
1009
1010        err = cq_mmap_entries_setup(dev, cq, &resp);
1011        if (err) {
1012                ibdev_dbg(ibdev,
1013                          "Could not setup cq[%u] mmap entries\n", cq->cq_idx);
1014                goto err_destroy_cq;
1015        }
1016
1017        cq_entry_inserted = true;
1018
1019        if (udata->outlen) {
1020                err = ib_copy_to_udata(udata, &resp,
1021                                       min(sizeof(resp), udata->outlen));
1022                if (err) {
1023                        ibdev_dbg(ibdev,
1024                                  "Failed to copy udata for create_cq\n");
1025                        goto err_destroy_cq;
1026                }
1027        }
1028
1029        ibdev_dbg(ibdev,
1030                  "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n",
1031                  cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr);
1032
1033        return &cq->ibcq;
1034
1035err_destroy_cq:
1036        efa_destroy_cq_idx(dev, cq->cq_idx);
1037err_free_mapped:
1038        dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
1039                         DMA_FROM_DEVICE);
1040        if (!cq_entry_inserted)
1041                free_pages_exact(cq->cpu_addr, cq->size);
1042err_free_cq:
1043        kfree(cq);
1044err_out:
1045        atomic64_inc(&dev->stats.sw_stats.create_cq_err);
1046        return ERR_PTR(err);
1047}
1048
1049struct ib_cq *efa_create_cq(struct ib_device *ibdev,
1050                            const struct ib_cq_init_attr *attr,
1051                            struct ib_udata *udata)
1052{
1053        struct efa_ucontext *ucontext = rdma_udata_to_drv_context(udata,
1054                                                                  struct efa_ucontext,
1055                                                                  ibucontext);
1056
1057        return do_create_cq(ibdev, attr->cqe, attr->comp_vector,
1058                            &ucontext->ibucontext, udata);
1059}
1060
1061static int umem_to_page_list(struct efa_dev *dev,
1062                             struct ib_umem *umem,
1063                             u64 *page_list,
1064                             u32 hp_cnt,
1065                             u8 hp_shift)
1066{
1067        u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT);
1068        struct sg_dma_page_iter sg_iter;
1069        unsigned int page_idx = 0;
1070        unsigned int hp_idx = 0;
1071
1072        ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
1073                  hp_cnt, pages_in_hp);
1074
1075        for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
1076                if (page_idx % pages_in_hp == 0) {
1077                        page_list[hp_idx] = sg_page_iter_dma_address(&sg_iter);
1078                        hp_idx++;
1079                }
1080
1081                page_idx++;
1082        }
1083
1084        return 0;
1085}
1086
1087static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt)
1088{
1089        struct scatterlist *sglist;
1090        struct page *pg;
1091        int i;
1092
1093        sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL);
1094        if (!sglist)
1095                return NULL;
1096        sg_init_table(sglist, page_cnt);
1097        for (i = 0; i < page_cnt; i++) {
1098                pg = vmalloc_to_page(buf);
1099                if (!pg)
1100                        goto err;
1101                sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
1102                buf += PAGE_SIZE / sizeof(*buf);
1103        }
1104        return sglist;
1105
1106err:
1107        kfree(sglist);
1108        return NULL;
1109}
1110
1111/*
1112 * create a chunk list of physical pages dma addresses from the supplied
1113 * scatter gather list
1114 */
1115static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl)
1116{
1117        unsigned int entry, payloads_in_sg, chunk_list_size, chunk_idx, payload_idx;
1118        struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
1119        int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages;
1120        struct scatterlist *pages_sgl = pbl->phys.indirect.sgl;
1121        int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt;
1122        struct efa_com_ctrl_buff_info *ctrl_buf;
1123        u64 *cur_chunk_buf, *prev_chunk_buf;
1124        struct scatterlist *sg;
1125        dma_addr_t dma_addr;
1126        int i;
1127
1128        /* allocate a chunk list that consists of 4KB chunks */
1129        chunk_list_size = DIV_ROUND_UP(page_cnt, EFA_PTRS_PER_CHUNK);
1130
1131        chunk_list->size = chunk_list_size;
1132        chunk_list->chunks = kcalloc(chunk_list_size,
1133                                     sizeof(*chunk_list->chunks),
1134                                     GFP_KERNEL);
1135        if (!chunk_list->chunks)
1136                return -ENOMEM;
1137
1138        ibdev_dbg(&dev->ibdev,
1139                  "chunk_list_size[%u] - pages[%u]\n", chunk_list_size,
1140                  page_cnt);
1141
1142        /* allocate chunk buffers: */
1143        for (i = 0; i < chunk_list_size; i++) {
1144                chunk_list->chunks[i].buf = kzalloc(EFA_CHUNK_SIZE, GFP_KERNEL);
1145                if (!chunk_list->chunks[i].buf)
1146                        goto chunk_list_dealloc;
1147
1148                chunk_list->chunks[i].length = EFA_CHUNK_USED_SIZE;
1149        }
1150        chunk_list->chunks[chunk_list_size - 1].length =
1151                ((page_cnt % EFA_PTRS_PER_CHUNK) * EFA_CHUNK_PAYLOAD_PTR_SIZE) +
1152                        EFA_CHUNK_PTR_SIZE;
1153
1154        /* fill the dma addresses of sg list pages to chunks: */
1155        chunk_idx = 0;
1156        payload_idx = 0;
1157        cur_chunk_buf = chunk_list->chunks[0].buf;
1158        for_each_sg(pages_sgl, sg, sg_dma_cnt, entry) {
1159                payloads_in_sg = sg_dma_len(sg) >> EFA_CHUNK_PAYLOAD_SHIFT;
1160                for (i = 0; i < payloads_in_sg; i++) {
1161                        cur_chunk_buf[payload_idx++] =
1162                                (sg_dma_address(sg) & ~(EFA_CHUNK_PAYLOAD_SIZE - 1)) +
1163                                (EFA_CHUNK_PAYLOAD_SIZE * i);
1164
1165                        if (payload_idx == EFA_PTRS_PER_CHUNK) {
1166                                chunk_idx++;
1167                                cur_chunk_buf = chunk_list->chunks[chunk_idx].buf;
1168                                payload_idx = 0;
1169                        }
1170                }
1171        }
1172
1173        /* map chunks to dma and fill chunks next ptrs */
1174        for (i = chunk_list_size - 1; i >= 0; i--) {
1175                dma_addr = dma_map_single(&dev->pdev->dev,
1176                                          chunk_list->chunks[i].buf,
1177                                          chunk_list->chunks[i].length,
1178                                          DMA_TO_DEVICE);
1179                if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
1180                        ibdev_err(&dev->ibdev,
1181                                  "chunk[%u] dma_map_failed\n", i);
1182                        goto chunk_list_unmap;
1183                }
1184
1185                chunk_list->chunks[i].dma_addr = dma_addr;
1186                ibdev_dbg(&dev->ibdev,
1187                          "chunk[%u] mapped at [%pad]\n", i, &dma_addr);
1188
1189                if (!i)
1190                        break;
1191
1192                prev_chunk_buf = chunk_list->chunks[i - 1].buf;
1193
1194                ctrl_buf = (struct efa_com_ctrl_buff_info *)
1195                                &prev_chunk_buf[EFA_PTRS_PER_CHUNK];
1196                ctrl_buf->length = chunk_list->chunks[i].length;
1197
1198                efa_com_set_dma_addr(dma_addr,
1199                                     &ctrl_buf->address.mem_addr_high,
1200                                     &ctrl_buf->address.mem_addr_low);
1201        }
1202
1203        return 0;
1204
1205chunk_list_unmap:
1206        for (; i < chunk_list_size; i++) {
1207                dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
1208                                 chunk_list->chunks[i].length, DMA_TO_DEVICE);
1209        }
1210chunk_list_dealloc:
1211        for (i = 0; i < chunk_list_size; i++)
1212                kfree(chunk_list->chunks[i].buf);
1213
1214        kfree(chunk_list->chunks);
1215        return -ENOMEM;
1216}
1217
1218static void pbl_chunk_list_destroy(struct efa_dev *dev, struct pbl_context *pbl)
1219{
1220        struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
1221        int i;
1222
1223        for (i = 0; i < chunk_list->size; i++) {
1224                dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
1225                                 chunk_list->chunks[i].length, DMA_TO_DEVICE);
1226                kfree(chunk_list->chunks[i].buf);
1227        }
1228
1229        kfree(chunk_list->chunks);
1230}
1231
1232/* initialize pbl continuous mode: map pbl buffer to a dma address. */
1233static int pbl_continuous_initialize(struct efa_dev *dev,
1234                                     struct pbl_context *pbl)
1235{
1236        dma_addr_t dma_addr;
1237
1238        dma_addr = dma_map_single(&dev->pdev->dev, pbl->pbl_buf,
1239                                  pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
1240        if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
1241                ibdev_err(&dev->ibdev, "Unable to map pbl to DMA address\n");
1242                return -ENOMEM;
1243        }
1244
1245        pbl->phys.continuous.dma_addr = dma_addr;
1246        ibdev_dbg(&dev->ibdev,
1247                  "pbl continuous - dma_addr = %pad, size[%u]\n",
1248                  &dma_addr, pbl->pbl_buf_size_in_bytes);
1249
1250        return 0;
1251}
1252
1253/*
1254 * initialize pbl indirect mode:
1255 * create a chunk list out of the dma addresses of the physical pages of
1256 * pbl buffer.
1257 */
1258static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl)
1259{
1260        u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE);
1261        struct scatterlist *sgl;
1262        int sg_dma_cnt, err;
1263
1264        BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE > PAGE_SIZE);
1265        sgl = efa_vmalloc_buf_to_sg(pbl->pbl_buf, size_in_pages);
1266        if (!sgl)
1267                return -ENOMEM;
1268
1269        sg_dma_cnt = dma_map_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
1270        if (!sg_dma_cnt) {
1271                err = -EINVAL;
1272                goto err_map;
1273        }
1274
1275        pbl->phys.indirect.pbl_buf_size_in_pages = size_in_pages;
1276        pbl->phys.indirect.sgl = sgl;
1277        pbl->phys.indirect.sg_dma_cnt = sg_dma_cnt;
1278        err = pbl_chunk_list_create(dev, pbl);
1279        if (err) {
1280                ibdev_dbg(&dev->ibdev,
1281                          "chunk_list creation failed[%d]\n", err);
1282                goto err_chunk;
1283        }
1284
1285        ibdev_dbg(&dev->ibdev,
1286                  "pbl indirect - size[%u], chunks[%u]\n",
1287                  pbl->pbl_buf_size_in_bytes,
1288                  pbl->phys.indirect.chunk_list.size);
1289
1290        return 0;
1291
1292err_chunk:
1293        dma_unmap_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
1294err_map:
1295        kfree(sgl);
1296        return err;
1297}
1298
1299static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl)
1300{
1301        pbl_chunk_list_destroy(dev, pbl);
1302        dma_unmap_sg(&dev->pdev->dev, pbl->phys.indirect.sgl,
1303                     pbl->phys.indirect.pbl_buf_size_in_pages, DMA_TO_DEVICE);
1304        kfree(pbl->phys.indirect.sgl);
1305}
1306
1307/* create a page buffer list from a mapped user memory region */
1308static int pbl_create(struct efa_dev *dev,
1309                      struct pbl_context *pbl,
1310                      struct ib_umem *umem,
1311                      int hp_cnt,
1312                      u8 hp_shift)
1313{
1314        int err;
1315
1316        pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE;
1317        pbl->pbl_buf = kzalloc(pbl->pbl_buf_size_in_bytes,
1318                               GFP_KERNEL | __GFP_NOWARN);
1319        if (pbl->pbl_buf) {
1320                pbl->physically_continuous = 1;
1321                err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
1322                                        hp_shift);
1323                if (err)
1324                        goto err_continuous;
1325                err = pbl_continuous_initialize(dev, pbl);
1326                if (err)
1327                        goto err_continuous;
1328        } else {
1329                pbl->physically_continuous = 0;
1330                pbl->pbl_buf = vzalloc(pbl->pbl_buf_size_in_bytes);
1331                if (!pbl->pbl_buf)
1332                        return -ENOMEM;
1333
1334                err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
1335                                        hp_shift);
1336                if (err)
1337                        goto err_indirect;
1338                err = pbl_indirect_initialize(dev, pbl);
1339                if (err)
1340                        goto err_indirect;
1341        }
1342
1343        ibdev_dbg(&dev->ibdev,
1344                  "user_pbl_created: user_pages[%u], continuous[%u]\n",
1345                  hp_cnt, pbl->physically_continuous);
1346
1347        return 0;
1348
1349err_continuous:
1350        kfree(pbl->pbl_buf);
1351        return err;
1352err_indirect:
1353        vfree(pbl->pbl_buf);
1354        return err;
1355}
1356
1357static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl)
1358{
1359        if (pbl->physically_continuous) {
1360                dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr,
1361                                 pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
1362                kfree(pbl->pbl_buf);
1363        } else {
1364                pbl_indirect_terminate(dev, pbl);
1365                vfree(pbl->pbl_buf);
1366        }
1367}
1368
1369static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr,
1370                                 struct efa_com_reg_mr_params *params)
1371{
1372        int err;
1373
1374        params->inline_pbl = 1;
1375        err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array,
1376                                params->page_num, params->page_shift);
1377        if (err)
1378                return err;
1379
1380        ibdev_dbg(&dev->ibdev,
1381                  "inline_pbl_array - pages[%u]\n", params->page_num);
1382
1383        return 0;
1384}
1385
1386static int efa_create_pbl(struct efa_dev *dev,
1387                          struct pbl_context *pbl,
1388                          struct efa_mr *mr,
1389                          struct efa_com_reg_mr_params *params)
1390{
1391        int err;
1392
1393        err = pbl_create(dev, pbl, mr->umem, params->page_num,
1394                         params->page_shift);
1395        if (err) {
1396                ibdev_dbg(&dev->ibdev, "Failed to create pbl[%d]\n", err);
1397                return err;
1398        }
1399
1400        params->inline_pbl = 0;
1401        params->indirect = !pbl->physically_continuous;
1402        if (pbl->physically_continuous) {
1403                params->pbl.pbl.length = pbl->pbl_buf_size_in_bytes;
1404
1405                efa_com_set_dma_addr(pbl->phys.continuous.dma_addr,
1406                                     &params->pbl.pbl.address.mem_addr_high,
1407                                     &params->pbl.pbl.address.mem_addr_low);
1408        } else {
1409                params->pbl.pbl.length =
1410                        pbl->phys.indirect.chunk_list.chunks[0].length;
1411
1412                efa_com_set_dma_addr(pbl->phys.indirect.chunk_list.chunks[0].dma_addr,
1413                                     &params->pbl.pbl.address.mem_addr_high,
1414                                     &params->pbl.pbl.address.mem_addr_low);
1415        }
1416
1417        return 0;
1418}
1419
1420static void efa_cont_pages(struct ib_umem *umem, u64 addr,
1421                           unsigned long max_page_shift,
1422                           int *count, u8 *shift, u32 *ncont)
1423{
1424        struct scatterlist *sg;
1425        u64 base = ~0, p = 0;
1426        unsigned long tmp;
1427        unsigned long m;
1428        u64 len, pfn;
1429        int i = 0;
1430        int entry;
1431
1432        addr = addr >> PAGE_SHIFT;
1433        tmp = (unsigned long)addr;
1434        m = find_first_bit(&tmp, BITS_PER_LONG);
1435        if (max_page_shift)
1436                m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m);
1437
1438        for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
1439                len = DIV_ROUND_UP(sg_dma_len(sg), PAGE_SIZE);
1440                pfn = sg_dma_address(sg) >> PAGE_SHIFT;
1441                if (base + p != pfn) {
1442                        /*
1443                         * If either the offset or the new
1444                         * base are unaligned update m
1445                         */
1446                        tmp = (unsigned long)(pfn | p);
1447                        if (!IS_ALIGNED(tmp, 1 << m))
1448                                m = find_first_bit(&tmp, BITS_PER_LONG);
1449
1450                        base = pfn;
1451                        p = 0;
1452                }
1453
1454                p += len;
1455                i += len;
1456        }
1457
1458        if (i) {
1459                m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
1460                *ncont = DIV_ROUND_UP(i, (1 << m));
1461        } else {
1462                m = 0;
1463                *ncont = 0;
1464        }
1465
1466        *shift = PAGE_SHIFT + m;
1467        *count = i;
1468}
1469
1470struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
1471                         u64 virt_addr, int access_flags,
1472                         struct ib_udata *udata)
1473{
1474        struct efa_dev *dev = to_edev(ibpd->device);
1475        struct efa_com_reg_mr_params params = {};
1476        struct efa_com_reg_mr_result result = {};
1477        unsigned long max_page_shift;
1478        struct pbl_context pbl;
1479        struct efa_mr *mr;
1480        int inline_size;
1481        int npages;
1482        int err;
1483
1484        if (udata->inlen &&
1485            !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) {
1486                ibdev_dbg(&dev->ibdev,
1487                          "Incompatible ABI params, udata not cleared\n");
1488                err = -EINVAL;
1489                goto err_out;
1490        }
1491
1492        if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) {
1493                ibdev_dbg(&dev->ibdev,
1494                          "Unsupported access flags[%#x], supported[%#x]\n",
1495                          access_flags, EFA_SUPPORTED_ACCESS_FLAGS);
1496                err = -EOPNOTSUPP;
1497                goto err_out;
1498        }
1499
1500        mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1501        if (!mr) {
1502                err = -ENOMEM;
1503                goto err_out;
1504        }
1505
1506        mr->umem = ib_umem_get(udata, start, length, access_flags, 0);
1507        if (IS_ERR(mr->umem)) {
1508                err = PTR_ERR(mr->umem);
1509                ibdev_dbg(&dev->ibdev,
1510                          "Failed to pin and map user space memory[%d]\n", err);
1511                goto err_free;
1512        }
1513
1514        params.pd = to_epd(ibpd)->pdn;
1515        params.iova = virt_addr;
1516        params.mr_length_in_bytes = length;
1517        params.permissions = access_flags & 0x1;
1518        max_page_shift = fls64(dev->dev_attr.page_size_cap);
1519
1520        efa_cont_pages(mr->umem, start, max_page_shift, &npages,
1521                       &params.page_shift, &params.page_num);
1522        ibdev_dbg(&dev->ibdev,
1523                  "start %#llx length %#llx npages %d params.page_shift %u params.page_num %u\n",
1524                  start, length, npages, params.page_shift, params.page_num);
1525
1526        inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array);
1527        if (params.page_num <= inline_size) {
1528                err = efa_create_inline_pbl(dev, mr, &params);
1529                if (err)
1530                        goto err_unmap;
1531
1532                err = efa_com_register_mr(&dev->edev, &params, &result);
1533                if (err)
1534                        goto err_unmap;
1535        } else {
1536                err = efa_create_pbl(dev, &pbl, mr, &params);
1537                if (err)
1538                        goto err_unmap;
1539
1540                err = efa_com_register_mr(&dev->edev, &params, &result);
1541                pbl_destroy(dev, &pbl);
1542
1543                if (err)
1544                        goto err_unmap;
1545        }
1546
1547        mr->ibmr.lkey = result.l_key;
1548        mr->ibmr.rkey = result.r_key;
1549        mr->ibmr.length = length;
1550        ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey);
1551
1552        return &mr->ibmr;
1553
1554err_unmap:
1555        ib_umem_release(mr->umem);
1556err_free:
1557        kfree(mr);
1558err_out:
1559        atomic64_inc(&dev->stats.sw_stats.reg_mr_err);
1560        return ERR_PTR(err);
1561}
1562
1563int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1564{
1565        struct efa_dev *dev = to_edev(ibmr->device);
1566        struct efa_com_dereg_mr_params params;
1567        struct efa_mr *mr = to_emr(ibmr);
1568        int err;
1569
1570        if (udata->inlen &&
1571            !ib_is_udata_cleared(udata, 0, udata->inlen)) {
1572                ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
1573                return -EINVAL;
1574        }
1575
1576        ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey);
1577
1578        if (mr->umem) {
1579                params.l_key = mr->ibmr.lkey;
1580                err = efa_com_dereg_mr(&dev->edev, &params);
1581                if (err)
1582                        return err;
1583                ib_umem_release(mr->umem);
1584        }
1585
1586        kfree(mr);
1587
1588        return 0;
1589}
1590
1591int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num,
1592                           struct ib_port_immutable *immutable)
1593{
1594        struct ib_port_attr attr;
1595        int err;
1596
1597        err = ib_query_port(ibdev, port_num, &attr);
1598        if (err) {
1599                ibdev_dbg(ibdev, "Couldn't query port err[%d]\n", err);
1600                return err;
1601        }
1602
1603        immutable->pkey_tbl_len = attr.pkey_tbl_len;
1604        immutable->gid_tbl_len = attr.gid_tbl_len;
1605
1606        return 0;
1607}
1608
1609static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn)
1610{
1611        struct efa_com_dealloc_uar_params params = {
1612                .uarn = uarn,
1613        };
1614
1615        return efa_com_dealloc_uar(&dev->edev, &params);
1616}
1617
1618int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
1619{
1620        struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1621        struct efa_dev *dev = to_edev(ibucontext->device);
1622        struct efa_ibv_alloc_ucontext_resp resp = {};
1623        struct efa_com_alloc_uar_result result;
1624        int err;
1625
1626        /*
1627         * it's fine if the driver does not know all request fields,
1628         * we will ack input fields in our response.
1629         */
1630
1631        err = efa_com_alloc_uar(&dev->edev, &result);
1632        if (err)
1633                goto err_out;
1634
1635        ucontext->uarn = result.uarn;
1636        xa_init(&ucontext->mmap_xa);
1637
1638        resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE;
1639        resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH;
1640        resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq;
1641        resp.inline_buf_size = dev->dev_attr.inline_buf_size;
1642        resp.max_llq_size = dev->dev_attr.max_llq_size;
1643
1644        if (udata && udata->outlen) {
1645                err = ib_copy_to_udata(udata, &resp,
1646                                       min(sizeof(resp), udata->outlen));
1647                if (err)
1648                        goto err_dealloc_uar;
1649        }
1650
1651        return 0;
1652
1653err_dealloc_uar:
1654        efa_dealloc_uar(dev, result.uarn);
1655err_out:
1656        atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err);
1657        return err;
1658}
1659
1660void efa_dealloc_ucontext(struct ib_ucontext *ibucontext)
1661{
1662        struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1663        struct efa_dev *dev = to_edev(ibucontext->device);
1664
1665        mmap_entries_remove_free(dev, ucontext);
1666        efa_dealloc_uar(dev, ucontext->uarn);
1667}
1668
1669static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
1670                      struct vm_area_struct *vma, u64 key, u64 length)
1671{
1672        struct efa_mmap_entry *entry;
1673        unsigned long va;
1674        u64 pfn;
1675        int err;
1676
1677        entry = mmap_entry_get(dev, ucontext, key, length);
1678        if (!entry) {
1679                ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n",
1680                          key);
1681                return -EINVAL;
1682        }
1683
1684        ibdev_dbg(&dev->ibdev,
1685                  "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n",
1686                  entry->address, length, entry->mmap_flag);
1687
1688        pfn = entry->address >> PAGE_SHIFT;
1689        switch (entry->mmap_flag) {
1690        case EFA_MMAP_IO_NC:
1691                err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
1692                                        pgprot_noncached(vma->vm_page_prot));
1693                break;
1694        case EFA_MMAP_IO_WC:
1695                err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
1696                                        pgprot_writecombine(vma->vm_page_prot));
1697                break;
1698        case EFA_MMAP_DMA_PAGE:
1699                for (va = vma->vm_start; va < vma->vm_end;
1700                     va += PAGE_SIZE, pfn++) {
1701                        err = vm_insert_page(vma, va, pfn_to_page(pfn));
1702                        if (err)
1703                                break;
1704                }
1705                break;
1706        default:
1707                err = -EINVAL;
1708        }
1709
1710        if (err)
1711                ibdev_dbg(
1712                        &dev->ibdev,
1713                        "Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n",
1714                        entry->address, length, entry->mmap_flag, err);
1715
1716        return err;
1717}
1718
1719int efa_mmap(struct ib_ucontext *ibucontext,
1720             struct vm_area_struct *vma)
1721{
1722        struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1723        struct efa_dev *dev = to_edev(ibucontext->device);
1724        u64 length = vma->vm_end - vma->vm_start;
1725        u64 key = vma->vm_pgoff << PAGE_SHIFT;
1726
1727        ibdev_dbg(&dev->ibdev,
1728                  "start %#lx, end %#lx, length = %#llx, key = %#llx\n",
1729                  vma->vm_start, vma->vm_end, length, key);
1730
1731        if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) {
1732                ibdev_dbg(&dev->ibdev,
1733                          "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n",
1734                          length, PAGE_SIZE, vma->vm_flags);
1735                return -EINVAL;
1736        }
1737
1738        if (vma->vm_flags & VM_EXEC) {
1739                ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n");
1740                return -EPERM;
1741        }
1742
1743        return __efa_mmap(dev, ucontext, vma, key, length);
1744}
1745
1746static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
1747{
1748        struct efa_com_destroy_ah_params params = {
1749                .ah = ah->ah,
1750                .pdn = to_epd(ah->ibah.pd)->pdn,
1751        };
1752
1753        return efa_com_destroy_ah(&dev->edev, &params);
1754}
1755
1756int efa_create_ah(struct ib_ah *ibah,
1757                  struct rdma_ah_attr *ah_attr,
1758                  u32 flags,
1759                  struct ib_udata *udata)
1760{
1761        struct efa_dev *dev = to_edev(ibah->device);
1762        struct efa_com_create_ah_params params = {};
1763        struct efa_ibv_create_ah_resp resp = {};
1764        struct efa_com_create_ah_result result;
1765        struct efa_ah *ah = to_eah(ibah);
1766        int err;
1767
1768        if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) {
1769                ibdev_dbg(&dev->ibdev,
1770                          "Create address handle is not supported in atomic context\n");
1771                err = -EOPNOTSUPP;
1772                goto err_out;
1773        }
1774
1775        if (udata->inlen &&
1776            !ib_is_udata_cleared(udata, 0, udata->inlen)) {
1777                ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
1778                err = -EINVAL;
1779                goto err_out;
1780        }
1781
1782        memcpy(params.dest_addr, ah_attr->grh.dgid.raw,
1783               sizeof(params.dest_addr));
1784        params.pdn = to_epd(ibah->pd)->pdn;
1785        err = efa_com_create_ah(&dev->edev, &params, &result);
1786        if (err)
1787                goto err_out;
1788
1789        memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id));
1790        ah->ah = result.ah;
1791
1792        resp.efa_address_handle = result.ah;
1793
1794        if (udata->outlen) {
1795                err = ib_copy_to_udata(udata, &resp,
1796                                       min(sizeof(resp), udata->outlen));
1797                if (err) {
1798                        ibdev_dbg(&dev->ibdev,
1799                                  "Failed to copy udata for create_ah response\n");
1800                        goto err_destroy_ah;
1801                }
1802        }
1803        ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah);
1804
1805        return 0;
1806
1807err_destroy_ah:
1808        efa_ah_destroy(dev, ah);
1809err_out:
1810        atomic64_inc(&dev->stats.sw_stats.create_ah_err);
1811        return err;
1812}
1813
1814void efa_destroy_ah(struct ib_ah *ibah, u32 flags)
1815{
1816        struct efa_dev *dev = to_edev(ibah->pd->device);
1817        struct efa_ah *ah = to_eah(ibah);
1818
1819        ibdev_dbg(&dev->ibdev, "Destroy ah[%d]\n", ah->ah);
1820
1821        if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) {
1822                ibdev_dbg(&dev->ibdev,
1823                          "Destroy address handle is not supported in atomic context\n");
1824                return;
1825        }
1826
1827        efa_ah_destroy(dev, ah);
1828}
1829
1830enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
1831                                         u8 port_num)
1832{
1833        return IB_LINK_LAYER_UNSPECIFIED;
1834}
1835
1836