linux/drivers/infiniband/hw/mlx5/wr.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/*
   3 * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
   4 */
   5
   6#include <linux/gfp.h>
   7#include <linux/mlx5/qp.h>
   8#include <linux/mlx5/driver.h>
   9#include "wr.h"
  10
  11static const u32 mlx5_ib_opcode[] = {
  12        [IB_WR_SEND]                            = MLX5_OPCODE_SEND,
  13        [IB_WR_LSO]                             = MLX5_OPCODE_LSO,
  14        [IB_WR_SEND_WITH_IMM]                   = MLX5_OPCODE_SEND_IMM,
  15        [IB_WR_RDMA_WRITE]                      = MLX5_OPCODE_RDMA_WRITE,
  16        [IB_WR_RDMA_WRITE_WITH_IMM]             = MLX5_OPCODE_RDMA_WRITE_IMM,
  17        [IB_WR_RDMA_READ]                       = MLX5_OPCODE_RDMA_READ,
  18        [IB_WR_ATOMIC_CMP_AND_SWP]              = MLX5_OPCODE_ATOMIC_CS,
  19        [IB_WR_ATOMIC_FETCH_AND_ADD]            = MLX5_OPCODE_ATOMIC_FA,
  20        [IB_WR_SEND_WITH_INV]                   = MLX5_OPCODE_SEND_INVAL,
  21        [IB_WR_LOCAL_INV]                       = MLX5_OPCODE_UMR,
  22        [IB_WR_REG_MR]                          = MLX5_OPCODE_UMR,
  23        [IB_WR_MASKED_ATOMIC_CMP_AND_SWP]       = MLX5_OPCODE_ATOMIC_MASKED_CS,
  24        [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]     = MLX5_OPCODE_ATOMIC_MASKED_FA,
  25        [MLX5_IB_WR_UMR]                        = MLX5_OPCODE_UMR,
  26};
  27
  28/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
  29 * next nearby edge and get new address translation for current WQE position.
  30 * @sq - SQ buffer.
  31 * @seg: Current WQE position (16B aligned).
  32 * @wqe_sz: Total current WQE size [16B].
  33 * @cur_edge: Updated current edge.
  34 */
  35static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
  36                                         u32 wqe_sz, void **cur_edge)
  37{
  38        u32 idx;
  39
  40        if (likely(*seg != *cur_edge))
  41                return;
  42
  43        idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
  44        *cur_edge = get_sq_edge(sq, idx);
  45
  46        *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
  47}
  48
  49/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's
  50 * pointers. At the end @seg is aligned to 16B regardless the copied size.
  51 * @sq - SQ buffer.
  52 * @cur_edge: Updated current edge.
  53 * @seg: Current WQE position (16B aligned).
  54 * @wqe_sz: Total current WQE size [16B].
  55 * @src: Pointer to copy from.
  56 * @n: Number of bytes to copy.
  57 */
  58static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
  59                                   void **seg, u32 *wqe_sz, const void *src,
  60                                   size_t n)
  61{
  62        while (likely(n)) {
  63                size_t leftlen = *cur_edge - *seg;
  64                size_t copysz = min_t(size_t, leftlen, n);
  65                size_t stride;
  66
  67                memcpy(*seg, src, copysz);
  68
  69                n -= copysz;
  70                src += copysz;
  71                stride = !n ? ALIGN(copysz, 16) : copysz;
  72                *seg += stride;
  73                *wqe_sz += stride >> 4;
  74                handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
  75        }
  76}
  77
  78static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq,
  79                            struct ib_cq *ib_cq)
  80{
  81        struct mlx5_ib_cq *cq;
  82        unsigned int cur;
  83
  84        cur = wq->head - wq->tail;
  85        if (likely(cur + nreq < wq->max_post))
  86                return 0;
  87
  88        cq = to_mcq(ib_cq);
  89        spin_lock(&cq->lock);
  90        cur = wq->head - wq->tail;
  91        spin_unlock(&cq->lock);
  92
  93        return cur + nreq >= wq->max_post;
  94}
  95
  96static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
  97                                          u64 remote_addr, u32 rkey)
  98{
  99        rseg->raddr    = cpu_to_be64(remote_addr);
 100        rseg->rkey     = cpu_to_be32(rkey);
 101        rseg->reserved = 0;
 102}
 103
 104static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
 105                        void **seg, int *size, void **cur_edge)
 106{
 107        struct mlx5_wqe_eth_seg *eseg = *seg;
 108
 109        memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
 110
 111        if (wr->send_flags & IB_SEND_IP_CSUM)
 112                eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
 113                                 MLX5_ETH_WQE_L4_CSUM;
 114
 115        if (wr->opcode == IB_WR_LSO) {
 116                struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
 117                size_t left, copysz;
 118                void *pdata = ud_wr->header;
 119                size_t stride;
 120
 121                left = ud_wr->hlen;
 122                eseg->mss = cpu_to_be16(ud_wr->mss);
 123                eseg->inline_hdr.sz = cpu_to_be16(left);
 124
 125                /* memcpy_send_wqe should get a 16B align address. Hence, we
 126                 * first copy up to the current edge and then, if needed,
 127                 * continue to memcpy_send_wqe.
 128                 */
 129                copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
 130                               left);
 131                memcpy(eseg->inline_hdr.start, pdata, copysz);
 132                stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
 133                               sizeof(eseg->inline_hdr.start) + copysz, 16);
 134                *size += stride / 16;
 135                *seg += stride;
 136
 137                if (copysz < left) {
 138                        handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 139                        left -= copysz;
 140                        pdata += copysz;
 141                        memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata,
 142                                        left);
 143                }
 144
 145                return;
 146        }
 147
 148        *seg += sizeof(struct mlx5_wqe_eth_seg);
 149        *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
 150}
 151
 152static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
 153                             const struct ib_send_wr *wr)
 154{
 155        memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
 156        dseg->av.dqp_dct =
 157                cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
 158        dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey);
 159}
 160
 161static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
 162{
 163        dseg->byte_count = cpu_to_be32(sg->length);
 164        dseg->lkey       = cpu_to_be32(sg->lkey);
 165        dseg->addr       = cpu_to_be64(sg->addr);
 166}
 167
 168static u64 get_xlt_octo(u64 bytes)
 169{
 170        return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
 171               MLX5_IB_UMR_OCTOWORD;
 172}
 173
 174static __be64 frwr_mkey_mask(bool atomic)
 175{
 176        u64 result;
 177
 178        result = MLX5_MKEY_MASK_LEN             |
 179                MLX5_MKEY_MASK_PAGE_SIZE        |
 180                MLX5_MKEY_MASK_START_ADDR       |
 181                MLX5_MKEY_MASK_EN_RINVAL        |
 182                MLX5_MKEY_MASK_KEY              |
 183                MLX5_MKEY_MASK_LR               |
 184                MLX5_MKEY_MASK_LW               |
 185                MLX5_MKEY_MASK_RR               |
 186                MLX5_MKEY_MASK_RW               |
 187                MLX5_MKEY_MASK_SMALL_FENCE      |
 188                MLX5_MKEY_MASK_FREE;
 189
 190        if (atomic)
 191                result |= MLX5_MKEY_MASK_A;
 192
 193        return cpu_to_be64(result);
 194}
 195
 196static __be64 sig_mkey_mask(void)
 197{
 198        u64 result;
 199
 200        result = MLX5_MKEY_MASK_LEN             |
 201                MLX5_MKEY_MASK_PAGE_SIZE        |
 202                MLX5_MKEY_MASK_START_ADDR       |
 203                MLX5_MKEY_MASK_EN_SIGERR        |
 204                MLX5_MKEY_MASK_EN_RINVAL        |
 205                MLX5_MKEY_MASK_KEY              |
 206                MLX5_MKEY_MASK_LR               |
 207                MLX5_MKEY_MASK_LW               |
 208                MLX5_MKEY_MASK_RR               |
 209                MLX5_MKEY_MASK_RW               |
 210                MLX5_MKEY_MASK_SMALL_FENCE      |
 211                MLX5_MKEY_MASK_FREE             |
 212                MLX5_MKEY_MASK_BSF_EN;
 213
 214        return cpu_to_be64(result);
 215}
 216
 217static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
 218                            struct mlx5_ib_mr *mr, u8 flags, bool atomic)
 219{
 220        int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
 221
 222        memset(umr, 0, sizeof(*umr));
 223
 224        umr->flags = flags;
 225        umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
 226        umr->mkey_mask = frwr_mkey_mask(atomic);
 227}
 228
 229static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
 230{
 231        memset(umr, 0, sizeof(*umr));
 232        umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
 233        umr->flags = MLX5_UMR_INLINE;
 234}
 235
 236static __be64 get_umr_enable_mr_mask(void)
 237{
 238        u64 result;
 239
 240        result = MLX5_MKEY_MASK_KEY |
 241                 MLX5_MKEY_MASK_FREE;
 242
 243        return cpu_to_be64(result);
 244}
 245
 246static __be64 get_umr_disable_mr_mask(void)
 247{
 248        u64 result;
 249
 250        result = MLX5_MKEY_MASK_FREE;
 251
 252        return cpu_to_be64(result);
 253}
 254
 255static __be64 get_umr_update_translation_mask(void)
 256{
 257        u64 result;
 258
 259        result = MLX5_MKEY_MASK_LEN |
 260                 MLX5_MKEY_MASK_PAGE_SIZE |
 261                 MLX5_MKEY_MASK_START_ADDR;
 262
 263        return cpu_to_be64(result);
 264}
 265
 266static __be64 get_umr_update_access_mask(int atomic,
 267                                         int relaxed_ordering_write,
 268                                         int relaxed_ordering_read)
 269{
 270        u64 result;
 271
 272        result = MLX5_MKEY_MASK_LR |
 273                 MLX5_MKEY_MASK_LW |
 274                 MLX5_MKEY_MASK_RR |
 275                 MLX5_MKEY_MASK_RW;
 276
 277        if (atomic)
 278                result |= MLX5_MKEY_MASK_A;
 279
 280        if (relaxed_ordering_write)
 281                result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
 282
 283        if (relaxed_ordering_read)
 284                result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
 285
 286        return cpu_to_be64(result);
 287}
 288
 289static __be64 get_umr_update_pd_mask(void)
 290{
 291        u64 result;
 292
 293        result = MLX5_MKEY_MASK_PD;
 294
 295        return cpu_to_be64(result);
 296}
 297
 298static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
 299{
 300        if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
 301            MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
 302                return -EPERM;
 303
 304        if (mask & MLX5_MKEY_MASK_A &&
 305            MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
 306                return -EPERM;
 307
 308        if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
 309            !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
 310                return -EPERM;
 311
 312        if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
 313            !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
 314                return -EPERM;
 315
 316        return 0;
 317}
 318
 319static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
 320                               struct mlx5_wqe_umr_ctrl_seg *umr,
 321                               const struct ib_send_wr *wr)
 322{
 323        const struct mlx5_umr_wr *umrwr = umr_wr(wr);
 324
 325        memset(umr, 0, sizeof(*umr));
 326
 327        if (!umrwr->ignore_free_state) {
 328                if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
 329                         /* fail if free */
 330                        umr->flags = MLX5_UMR_CHECK_FREE;
 331                else
 332                        /* fail if not free */
 333                        umr->flags = MLX5_UMR_CHECK_NOT_FREE;
 334        }
 335
 336        umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size));
 337        if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) {
 338                u64 offset = get_xlt_octo(umrwr->offset);
 339
 340                umr->xlt_offset = cpu_to_be16(offset & 0xffff);
 341                umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16);
 342                umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
 343        }
 344        if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
 345                umr->mkey_mask |= get_umr_update_translation_mask();
 346        if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
 347                umr->mkey_mask |= get_umr_update_access_mask(
 348                        !!(MLX5_CAP_GEN(dev->mdev, atomic)),
 349                        !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)),
 350                        !!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)));
 351                umr->mkey_mask |= get_umr_update_pd_mask();
 352        }
 353        if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
 354                umr->mkey_mask |= get_umr_enable_mr_mask();
 355        if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
 356                umr->mkey_mask |= get_umr_disable_mr_mask();
 357
 358        if (!wr->num_sge)
 359                umr->flags |= MLX5_UMR_INLINE;
 360
 361        return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask));
 362}
 363
 364static u8 get_umr_flags(int acc)
 365{
 366        return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC       : 0) |
 367               (acc & IB_ACCESS_REMOTE_WRITE  ? MLX5_PERM_REMOTE_WRITE : 0) |
 368               (acc & IB_ACCESS_REMOTE_READ   ? MLX5_PERM_REMOTE_READ  : 0) |
 369               (acc & IB_ACCESS_LOCAL_WRITE   ? MLX5_PERM_LOCAL_WRITE  : 0) |
 370                MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
 371}
 372
 373static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
 374                             struct mlx5_ib_mr *mr,
 375                             u32 key, int access)
 376{
 377        int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1;
 378
 379        memset(seg, 0, sizeof(*seg));
 380
 381        if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT)
 382                seg->log2_page_size = ilog2(mr->ibmr.page_size);
 383        else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
 384                /* KLMs take twice the size of MTTs */
 385                ndescs *= 2;
 386
 387        seg->flags = get_umr_flags(access) | mr->access_mode;
 388        seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
 389        seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
 390        seg->start_addr = cpu_to_be64(mr->ibmr.iova);
 391        seg->len = cpu_to_be64(mr->ibmr.length);
 392        seg->xlt_oct_size = cpu_to_be32(ndescs);
 393}
 394
 395static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
 396{
 397        memset(seg, 0, sizeof(*seg));
 398        seg->status = MLX5_MKEY_STATUS_FREE;
 399}
 400
 401static void set_reg_mkey_segment(struct mlx5_ib_dev *dev,
 402                                 struct mlx5_mkey_seg *seg,
 403                                 const struct ib_send_wr *wr)
 404{
 405        const struct mlx5_umr_wr *umrwr = umr_wr(wr);
 406
 407        memset(seg, 0, sizeof(*seg));
 408        if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
 409                MLX5_SET(mkc, seg, free, 1);
 410
 411        MLX5_SET(mkc, seg, a,
 412                 !!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC));
 413        MLX5_SET(mkc, seg, rw,
 414                 !!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE));
 415        MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ));
 416        MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE));
 417        MLX5_SET(mkc, seg, lr, 1);
 418        if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
 419                MLX5_SET(mkc, seg, relaxed_ordering_write,
 420                         !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
 421        if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
 422                MLX5_SET(mkc, seg, relaxed_ordering_read,
 423                         !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
 424
 425        if (umrwr->pd)
 426                MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn);
 427        if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION &&
 428            !umrwr->length)
 429                MLX5_SET(mkc, seg, length64, 1);
 430
 431        MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr);
 432        MLX5_SET64(mkc, seg, len, umrwr->length);
 433        MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift);
 434        MLX5_SET(mkc, seg, qpn, 0xffffff);
 435        MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey));
 436}
 437
 438static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
 439                             struct mlx5_ib_mr *mr,
 440                             struct mlx5_ib_pd *pd)
 441{
 442        int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs);
 443
 444        dseg->addr = cpu_to_be64(mr->desc_map);
 445        dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
 446        dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
 447}
 448
 449static __be32 send_ieth(const struct ib_send_wr *wr)
 450{
 451        switch (wr->opcode) {
 452        case IB_WR_SEND_WITH_IMM:
 453        case IB_WR_RDMA_WRITE_WITH_IMM:
 454                return wr->ex.imm_data;
 455
 456        case IB_WR_SEND_WITH_INV:
 457                return cpu_to_be32(wr->ex.invalidate_rkey);
 458
 459        default:
 460                return 0;
 461        }
 462}
 463
 464static u8 calc_sig(void *wqe, int size)
 465{
 466        u8 *p = wqe;
 467        u8 res = 0;
 468        int i;
 469
 470        for (i = 0; i < size; i++)
 471                res ^= p[i];
 472
 473        return ~res;
 474}
 475
 476static u8 wq_sig(void *wqe)
 477{
 478        return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
 479}
 480
 481static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
 482                            void **wqe, int *wqe_sz, void **cur_edge)
 483{
 484        struct mlx5_wqe_inline_seg *seg;
 485        size_t offset;
 486        int inl = 0;
 487        int i;
 488
 489        seg = *wqe;
 490        *wqe += sizeof(*seg);
 491        offset = sizeof(*seg);
 492
 493        for (i = 0; i < wr->num_sge; i++) {
 494                size_t len  = wr->sg_list[i].length;
 495                void *addr = (void *)(unsigned long)(wr->sg_list[i].addr);
 496
 497                inl += len;
 498
 499                if (unlikely(inl > qp->max_inline_data))
 500                        return -ENOMEM;
 501
 502                while (likely(len)) {
 503                        size_t leftlen;
 504                        size_t copysz;
 505
 506                        handle_post_send_edge(&qp->sq, wqe,
 507                                              *wqe_sz + (offset >> 4),
 508                                              cur_edge);
 509
 510                        leftlen = *cur_edge - *wqe;
 511                        copysz = min_t(size_t, leftlen, len);
 512
 513                        memcpy(*wqe, addr, copysz);
 514                        len -= copysz;
 515                        addr += copysz;
 516                        *wqe += copysz;
 517                        offset += copysz;
 518                }
 519        }
 520
 521        seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
 522
 523        *wqe_sz +=  ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
 524
 525        return 0;
 526}
 527
 528static u16 prot_field_size(enum ib_signature_type type)
 529{
 530        switch (type) {
 531        case IB_SIG_TYPE_T10_DIF:
 532                return MLX5_DIF_SIZE;
 533        default:
 534                return 0;
 535        }
 536}
 537
 538static u8 bs_selector(int block_size)
 539{
 540        switch (block_size) {
 541        case 512:           return 0x1;
 542        case 520:           return 0x2;
 543        case 4096:          return 0x3;
 544        case 4160:          return 0x4;
 545        case 1073741824:    return 0x5;
 546        default:            return 0;
 547        }
 548}
 549
 550static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain,
 551                              struct mlx5_bsf_inl *inl)
 552{
 553        /* Valid inline section and allow BSF refresh */
 554        inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID |
 555                                       MLX5_BSF_REFRESH_DIF);
 556        inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag);
 557        inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag);
 558        /* repeating block */
 559        inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
 560        inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
 561                        MLX5_DIF_CRC : MLX5_DIF_IPCS;
 562
 563        if (domain->sig.dif.ref_remap)
 564                inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG;
 565
 566        if (domain->sig.dif.app_escape) {
 567                if (domain->sig.dif.ref_escape)
 568                        inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE;
 569                else
 570                        inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE;
 571        }
 572
 573        inl->dif_app_bitmask_check =
 574                cpu_to_be16(domain->sig.dif.apptag_check_mask);
 575}
 576
 577static int mlx5_set_bsf(struct ib_mr *sig_mr,
 578                        struct ib_sig_attrs *sig_attrs,
 579                        struct mlx5_bsf *bsf, u32 data_size)
 580{
 581        struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig;
 582        struct mlx5_bsf_basic *basic = &bsf->basic;
 583        struct ib_sig_domain *mem = &sig_attrs->mem;
 584        struct ib_sig_domain *wire = &sig_attrs->wire;
 585
 586        memset(bsf, 0, sizeof(*bsf));
 587
 588        /* Basic + Extended + Inline */
 589        basic->bsf_size_sbs = 1 << 7;
 590        /* Input domain check byte mask */
 591        basic->check_byte_mask = sig_attrs->check_mask;
 592        basic->raw_data_size = cpu_to_be32(data_size);
 593
 594        /* Memory domain */
 595        switch (sig_attrs->mem.sig_type) {
 596        case IB_SIG_TYPE_NONE:
 597                break;
 598        case IB_SIG_TYPE_T10_DIF:
 599                basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
 600                basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx);
 601                mlx5_fill_inl_bsf(mem, &bsf->m_inl);
 602                break;
 603        default:
 604                return -EINVAL;
 605        }
 606
 607        /* Wire domain */
 608        switch (sig_attrs->wire.sig_type) {
 609        case IB_SIG_TYPE_NONE:
 610                break;
 611        case IB_SIG_TYPE_T10_DIF:
 612                if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
 613                    mem->sig_type == wire->sig_type) {
 614                        /* Same block structure */
 615                        basic->bsf_size_sbs |= 1 << 4;
 616                        if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
 617                                basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
 618                        if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
 619                                basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK;
 620                        if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
 621                                basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK;
 622                } else
 623                        basic->wire.bs_selector =
 624                                bs_selector(wire->sig.dif.pi_interval);
 625
 626                basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx);
 627                mlx5_fill_inl_bsf(wire, &bsf->w_inl);
 628                break;
 629        default:
 630                return -EINVAL;
 631        }
 632
 633        return 0;
 634}
 635
 636
 637static int set_sig_data_segment(const struct ib_send_wr *send_wr,
 638                                struct ib_mr *sig_mr,
 639                                struct ib_sig_attrs *sig_attrs,
 640                                struct mlx5_ib_qp *qp, void **seg, int *size,
 641                                void **cur_edge)
 642{
 643        struct mlx5_bsf *bsf;
 644        u32 data_len;
 645        u32 data_key;
 646        u64 data_va;
 647        u32 prot_len = 0;
 648        u32 prot_key = 0;
 649        u64 prot_va = 0;
 650        bool prot = false;
 651        int ret;
 652        int wqe_size;
 653        struct mlx5_ib_mr *mr = to_mmr(sig_mr);
 654        struct mlx5_ib_mr *pi_mr = mr->pi_mr;
 655
 656        data_len = pi_mr->data_length;
 657        data_key = pi_mr->ibmr.lkey;
 658        data_va = pi_mr->data_iova;
 659        if (pi_mr->meta_ndescs) {
 660                prot_len = pi_mr->meta_length;
 661                prot_key = pi_mr->ibmr.lkey;
 662                prot_va = pi_mr->pi_iova;
 663                prot = true;
 664        }
 665
 666        if (!prot || (data_key == prot_key && data_va == prot_va &&
 667                      data_len == prot_len)) {
 668                /**
 669                 * Source domain doesn't contain signature information
 670                 * or data and protection are interleaved in memory.
 671                 * So need construct:
 672                 *                  ------------------
 673                 *                 |     data_klm     |
 674                 *                  ------------------
 675                 *                 |       BSF        |
 676                 *                  ------------------
 677                 **/
 678                struct mlx5_klm *data_klm = *seg;
 679
 680                data_klm->bcount = cpu_to_be32(data_len);
 681                data_klm->key = cpu_to_be32(data_key);
 682                data_klm->va = cpu_to_be64(data_va);
 683                wqe_size = ALIGN(sizeof(*data_klm), 64);
 684        } else {
 685                /**
 686                 * Source domain contains signature information
 687                 * So need construct a strided block format:
 688                 *               ---------------------------
 689                 *              |     stride_block_ctrl     |
 690                 *               ---------------------------
 691                 *              |          data_klm         |
 692                 *               ---------------------------
 693                 *              |          prot_klm         |
 694                 *               ---------------------------
 695                 *              |             BSF           |
 696                 *               ---------------------------
 697                 **/
 698                struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
 699                struct mlx5_stride_block_entry *data_sentry;
 700                struct mlx5_stride_block_entry *prot_sentry;
 701                u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
 702                int prot_size;
 703
 704                sblock_ctrl = *seg;
 705                data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl);
 706                prot_sentry = (void *)data_sentry + sizeof(*data_sentry);
 707
 708                prot_size = prot_field_size(sig_attrs->mem.sig_type);
 709                if (!prot_size) {
 710                        pr_err("Bad block size given: %u\n", block_size);
 711                        return -EINVAL;
 712                }
 713                sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size +
 714                                                            prot_size);
 715                sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP);
 716                sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size);
 717                sblock_ctrl->num_entries = cpu_to_be16(2);
 718
 719                data_sentry->bcount = cpu_to_be16(block_size);
 720                data_sentry->key = cpu_to_be32(data_key);
 721                data_sentry->va = cpu_to_be64(data_va);
 722                data_sentry->stride = cpu_to_be16(block_size);
 723
 724                prot_sentry->bcount = cpu_to_be16(prot_size);
 725                prot_sentry->key = cpu_to_be32(prot_key);
 726                prot_sentry->va = cpu_to_be64(prot_va);
 727                prot_sentry->stride = cpu_to_be16(prot_size);
 728
 729                wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
 730                                 sizeof(*prot_sentry), 64);
 731        }
 732
 733        *seg += wqe_size;
 734        *size += wqe_size / 16;
 735        handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 736
 737        bsf = *seg;
 738        ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
 739        if (ret)
 740                return -EINVAL;
 741
 742        *seg += sizeof(*bsf);
 743        *size += sizeof(*bsf) / 16;
 744        handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 745
 746        return 0;
 747}
 748
 749static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
 750                                 struct ib_mr *sig_mr, int access_flags,
 751                                 u32 size, u32 length, u32 pdn)
 752{
 753        u32 sig_key = sig_mr->rkey;
 754        u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
 755
 756        memset(seg, 0, sizeof(*seg));
 757
 758        seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS;
 759        seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
 760        seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
 761                                    MLX5_MKEY_BSF_EN | pdn);
 762        seg->len = cpu_to_be64(length);
 763        seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size));
 764        seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
 765}
 766
 767static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
 768                                u32 size)
 769{
 770        memset(umr, 0, sizeof(*umr));
 771
 772        umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
 773        umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
 774        umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
 775        umr->mkey_mask = sig_mkey_mask();
 776}
 777
 778static int set_pi_umr_wr(const struct ib_send_wr *send_wr,
 779                         struct mlx5_ib_qp *qp, void **seg, int *size,
 780                         void **cur_edge)
 781{
 782        const struct ib_reg_wr *wr = reg_wr(send_wr);
 783        struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr);
 784        struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr;
 785        struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs;
 786        u32 pdn = to_mpd(qp->ibqp.pd)->pdn;
 787        u32 xlt_size;
 788        int region_len, ret;
 789
 790        if (unlikely(send_wr->num_sge != 0) ||
 791            unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) ||
 792            unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) ||
 793            unlikely(!sig_mr->sig->sig_status_checked))
 794                return -EINVAL;
 795
 796        /* length of the protected region, data + protection */
 797        region_len = pi_mr->ibmr.length;
 798
 799        /**
 800         * KLM octoword size - if protection was provided
 801         * then we use strided block format (3 octowords),
 802         * else we use single KLM (1 octoword)
 803         **/
 804        if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE)
 805                xlt_size = 0x30;
 806        else
 807                xlt_size = sizeof(struct mlx5_klm);
 808
 809        set_sig_umr_segment(*seg, xlt_size);
 810        *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
 811        *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
 812        handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 813
 814        set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len,
 815                             pdn);
 816        *seg += sizeof(struct mlx5_mkey_seg);
 817        *size += sizeof(struct mlx5_mkey_seg) / 16;
 818        handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 819
 820        ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size,
 821                                   cur_edge);
 822        if (ret)
 823                return ret;
 824
 825        sig_mr->sig->sig_status_checked = false;
 826        return 0;
 827}
 828
 829static int set_psv_wr(struct ib_sig_domain *domain,
 830                      u32 psv_idx, void **seg, int *size)
 831{
 832        struct mlx5_seg_set_psv *psv_seg = *seg;
 833
 834        memset(psv_seg, 0, sizeof(*psv_seg));
 835        psv_seg->psv_num = cpu_to_be32(psv_idx);
 836        switch (domain->sig_type) {
 837        case IB_SIG_TYPE_NONE:
 838                break;
 839        case IB_SIG_TYPE_T10_DIF:
 840                psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
 841                                                     domain->sig.dif.app_tag);
 842                psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
 843                break;
 844        default:
 845                pr_err("Bad signature type (%d) is given.\n",
 846                       domain->sig_type);
 847                return -EINVAL;
 848        }
 849
 850        *seg += sizeof(*psv_seg);
 851        *size += sizeof(*psv_seg) / 16;
 852
 853        return 0;
 854}
 855
 856static int set_reg_wr(struct mlx5_ib_qp *qp,
 857                      const struct ib_reg_wr *wr,
 858                      void **seg, int *size, void **cur_edge,
 859                      bool check_not_free)
 860{
 861        struct mlx5_ib_mr *mr = to_mmr(wr->mr);
 862        struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
 863        struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device);
 864        int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size;
 865        bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
 866        bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC;
 867        u8 flags = 0;
 868
 869        /* Matches access in mlx5_set_umr_free_mkey().
 870         * Relaxed Ordering is set implicitly in mlx5_set_umr_free_mkey() and
 871         * kernel ULPs are not aware of it, so we don't set it here.
 872         */
 873        if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) {
 874                mlx5_ib_warn(
 875                        to_mdev(qp->ibqp.device),
 876                        "Fast update for MR access flags is not possible\n");
 877                return -EINVAL;
 878        }
 879
 880        if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
 881                mlx5_ib_warn(to_mdev(qp->ibqp.device),
 882                             "Invalid IB_SEND_INLINE send flag\n");
 883                return -EINVAL;
 884        }
 885
 886        if (check_not_free)
 887                flags |= MLX5_UMR_CHECK_NOT_FREE;
 888        if (umr_inline)
 889                flags |= MLX5_UMR_INLINE;
 890
 891        set_reg_umr_seg(*seg, mr, flags, atomic);
 892        *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
 893        *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
 894        handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 895
 896        set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
 897        *seg += sizeof(struct mlx5_mkey_seg);
 898        *size += sizeof(struct mlx5_mkey_seg) / 16;
 899        handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 900
 901        if (umr_inline) {
 902                memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
 903                                mr_list_size);
 904                *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
 905        } else {
 906                set_reg_data_seg(*seg, mr, pd);
 907                *seg += sizeof(struct mlx5_wqe_data_seg);
 908                *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
 909        }
 910        return 0;
 911}
 912
 913static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size,
 914                        void **cur_edge)
 915{
 916        set_linv_umr_seg(*seg);
 917        *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
 918        *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
 919        handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 920        set_linv_mkey_seg(*seg);
 921        *seg += sizeof(struct mlx5_mkey_seg);
 922        *size += sizeof(struct mlx5_mkey_seg) / 16;
 923        handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
 924}
 925
 926static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
 927{
 928        __be32 *p = NULL;
 929        int i, j;
 930
 931        pr_debug("dump WQE index %u:\n", idx);
 932        for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
 933                if ((i & 0xf) == 0) {
 934                        p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
 935                        pr_debug("WQBB at %p:\n", (void *)p);
 936                        j = 0;
 937                        idx = (idx + 1) & (qp->sq.wqe_cnt - 1);
 938                }
 939                pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
 940                         be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
 941                         be32_to_cpu(p[j + 3]));
 942        }
 943}
 944
 945static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
 946                       struct mlx5_wqe_ctrl_seg **ctrl,
 947                       const struct ib_send_wr *wr, unsigned int *idx,
 948                       int *size, void **cur_edge, int nreq,
 949                       bool send_signaled, bool solicited)
 950{
 951        if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
 952                return -ENOMEM;
 953
 954        *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
 955        *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
 956        *ctrl = *seg;
 957        *(uint32_t *)(*seg + 8) = 0;
 958        (*ctrl)->imm = send_ieth(wr);
 959        (*ctrl)->fm_ce_se = qp->sq_signal_bits |
 960                (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
 961                (solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
 962
 963        *seg += sizeof(**ctrl);
 964        *size = sizeof(**ctrl) / 16;
 965        *cur_edge = qp->sq.cur_edge;
 966
 967        return 0;
 968}
 969
 970static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
 971                     struct mlx5_wqe_ctrl_seg **ctrl,
 972                     const struct ib_send_wr *wr, unsigned int *idx, int *size,
 973                     void **cur_edge, int nreq)
 974{
 975        return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
 976                           wr->send_flags & IB_SEND_SIGNALED,
 977                           wr->send_flags & IB_SEND_SOLICITED);
 978}
 979
 980static void finish_wqe(struct mlx5_ib_qp *qp,
 981                       struct mlx5_wqe_ctrl_seg *ctrl,
 982                       void *seg, u8 size, void *cur_edge,
 983                       unsigned int idx, u64 wr_id, int nreq, u8 fence,
 984                       u32 mlx5_opcode)
 985{
 986        u8 opmod = 0;
 987
 988        ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
 989                                             mlx5_opcode | ((u32)opmod << 24));
 990        ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
 991        ctrl->fm_ce_se |= fence;
 992        if (unlikely(qp->flags_en & MLX5_QP_FLAG_SIGNATURE))
 993                ctrl->signature = wq_sig(ctrl);
 994
 995        qp->sq.wrid[idx] = wr_id;
 996        qp->sq.w_list[idx].opcode = mlx5_opcode;
 997        qp->sq.wqe_head[idx] = qp->sq.head + nreq;
 998        qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
 999        qp->sq.w_list[idx].next = qp->sq.cur_post;
1000
1001        /* We save the edge which was possibly updated during the WQE
1002         * construction, into SQ's cache.
1003         */
1004        seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB);
1005        qp->sq.cur_edge = (unlikely(seg == cur_edge)) ?
1006                          get_sq_edge(&qp->sq, qp->sq.cur_post &
1007                                      (qp->sq.wqe_cnt - 1)) :
1008                          cur_edge;
1009}
1010
1011static void handle_rdma_op(const struct ib_send_wr *wr, void **seg, int *size)
1012{
1013        set_raddr_seg(*seg, rdma_wr(wr)->remote_addr, rdma_wr(wr)->rkey);
1014        *seg += sizeof(struct mlx5_wqe_raddr_seg);
1015        *size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
1016}
1017
1018static void handle_local_inv(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
1019                             struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
1020                             int *size, void **cur_edge, unsigned int idx)
1021{
1022        qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
1023        (*ctrl)->imm = cpu_to_be32(wr->ex.invalidate_rkey);
1024        set_linv_wr(qp, seg, size, cur_edge);
1025}
1026
1027static int handle_reg_mr(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
1028                         struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
1029                         void **cur_edge, unsigned int idx)
1030{
1031        qp->sq.wr_data[idx] = IB_WR_REG_MR;
1032        (*ctrl)->imm = cpu_to_be32(reg_wr(wr)->key);
1033        return set_reg_wr(qp, reg_wr(wr), seg, size, cur_edge, true);
1034}
1035
1036static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1037                      const struct ib_send_wr *wr,
1038                      struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
1039                      void **cur_edge, unsigned int *idx, int nreq,
1040                      struct ib_sig_domain *domain, u32 psv_index,
1041                      u8 next_fence)
1042{
1043        int err;
1044
1045        /*
1046         * SET_PSV WQEs are not signaled and solicited on error.
1047         */
1048        err = __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
1049                          false, true);
1050        if (unlikely(err)) {
1051                mlx5_ib_warn(dev, "\n");
1052                err = -ENOMEM;
1053                goto out;
1054        }
1055        err = set_psv_wr(domain, psv_index, seg, size);
1056        if (unlikely(err)) {
1057                mlx5_ib_warn(dev, "\n");
1058                goto out;
1059        }
1060        finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq,
1061                   next_fence, MLX5_OPCODE_SET_PSV);
1062
1063out:
1064        return err;
1065}
1066
1067static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev,
1068                                   struct mlx5_ib_qp *qp,
1069                                   const struct ib_send_wr *wr,
1070                                   struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
1071                                   int *size, void **cur_edge,
1072                                   unsigned int *idx, int nreq, u8 fence,
1073                                   u8 next_fence)
1074{
1075        struct mlx5_ib_mr *mr;
1076        struct mlx5_ib_mr *pi_mr;
1077        struct mlx5_ib_mr pa_pi_mr;
1078        struct ib_sig_attrs *sig_attrs;
1079        struct ib_reg_wr reg_pi_wr;
1080        int err;
1081
1082        qp->sq.wr_data[*idx] = IB_WR_REG_MR_INTEGRITY;
1083
1084        mr = to_mmr(reg_wr(wr)->mr);
1085        pi_mr = mr->pi_mr;
1086
1087        if (pi_mr) {
1088                memset(&reg_pi_wr, 0,
1089                       sizeof(struct ib_reg_wr));
1090
1091                reg_pi_wr.mr = &pi_mr->ibmr;
1092                reg_pi_wr.access = reg_wr(wr)->access;
1093                reg_pi_wr.key = pi_mr->ibmr.rkey;
1094
1095                (*ctrl)->imm = cpu_to_be32(reg_pi_wr.key);
1096                /* UMR for data + prot registration */
1097                err = set_reg_wr(qp, &reg_pi_wr, seg, size, cur_edge, false);
1098                if (unlikely(err))
1099                        goto out;
1100
1101                finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
1102                           nreq, fence, MLX5_OPCODE_UMR);
1103
1104                err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq);
1105                if (unlikely(err)) {
1106                        mlx5_ib_warn(dev, "\n");
1107                        err = -ENOMEM;
1108                        goto out;
1109                }
1110        } else {
1111                memset(&pa_pi_mr, 0, sizeof(struct mlx5_ib_mr));
1112                /* No UMR, use local_dma_lkey */
1113                pa_pi_mr.ibmr.lkey = mr->ibmr.pd->local_dma_lkey;
1114                pa_pi_mr.ndescs = mr->ndescs;
1115                pa_pi_mr.data_length = mr->data_length;
1116                pa_pi_mr.data_iova = mr->data_iova;
1117                if (mr->meta_ndescs) {
1118                        pa_pi_mr.meta_ndescs = mr->meta_ndescs;
1119                        pa_pi_mr.meta_length = mr->meta_length;
1120                        pa_pi_mr.pi_iova = mr->pi_iova;
1121                }
1122
1123                pa_pi_mr.ibmr.length = mr->ibmr.length;
1124                mr->pi_mr = &pa_pi_mr;
1125        }
1126        (*ctrl)->imm = cpu_to_be32(mr->ibmr.rkey);
1127        /* UMR for sig MR */
1128        err = set_pi_umr_wr(wr, qp, seg, size, cur_edge);
1129        if (unlikely(err)) {
1130                mlx5_ib_warn(dev, "\n");
1131                goto out;
1132        }
1133        finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq,
1134                   fence, MLX5_OPCODE_UMR);
1135
1136        sig_attrs = mr->ibmr.sig_attrs;
1137        err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
1138                         &sig_attrs->mem, mr->sig->psv_memory.psv_idx,
1139                         next_fence);
1140        if (unlikely(err))
1141                goto out;
1142
1143        err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
1144                         &sig_attrs->wire, mr->sig->psv_wire.psv_idx,
1145                         next_fence);
1146        if (unlikely(err))
1147                goto out;
1148
1149        qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
1150
1151out:
1152        return err;
1153}
1154
1155static int handle_qpt_rc(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1156                         const struct ib_send_wr *wr,
1157                         struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
1158                         void **cur_edge, unsigned int *idx, int nreq, u8 fence,
1159                         u8 next_fence, int *num_sge)
1160{
1161        int err = 0;
1162
1163        switch (wr->opcode) {
1164        case IB_WR_RDMA_READ:
1165        case IB_WR_RDMA_WRITE:
1166        case IB_WR_RDMA_WRITE_WITH_IMM:
1167                handle_rdma_op(wr, seg, size);
1168                break;
1169
1170        case IB_WR_ATOMIC_CMP_AND_SWP:
1171        case IB_WR_ATOMIC_FETCH_AND_ADD:
1172        case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
1173                mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
1174                err = -EOPNOTSUPP;
1175                goto out;
1176
1177        case IB_WR_LOCAL_INV:
1178                handle_local_inv(qp, wr, ctrl, seg, size, cur_edge, *idx);
1179                *num_sge = 0;
1180                break;
1181
1182        case IB_WR_REG_MR:
1183                err = handle_reg_mr(qp, wr, ctrl, seg, size, cur_edge, *idx);
1184                if (unlikely(err))
1185                        goto out;
1186                *num_sge = 0;
1187                break;
1188
1189        case IB_WR_REG_MR_INTEGRITY:
1190                err = handle_reg_mr_integrity(dev, qp, wr, ctrl, seg, size,
1191                                              cur_edge, idx, nreq, fence,
1192                                              next_fence);
1193                if (unlikely(err))
1194                        goto out;
1195                *num_sge = 0;
1196                break;
1197
1198        default:
1199                break;
1200        }
1201
1202out:
1203        return err;
1204}
1205
1206static void handle_qpt_uc(const struct ib_send_wr *wr, void **seg, int *size)
1207{
1208        switch (wr->opcode) {
1209        case IB_WR_RDMA_WRITE:
1210        case IB_WR_RDMA_WRITE_WITH_IMM:
1211                handle_rdma_op(wr, seg, size);
1212                break;
1213        default:
1214                break;
1215        }
1216}
1217
1218static void handle_qpt_hw_gsi(struct mlx5_ib_qp *qp,
1219                              const struct ib_send_wr *wr, void **seg,
1220                              int *size, void **cur_edge)
1221{
1222        set_datagram_seg(*seg, wr);
1223        *seg += sizeof(struct mlx5_wqe_datagram_seg);
1224        *size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
1225        handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1226}
1227
1228static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
1229                          void **seg, int *size, void **cur_edge)
1230{
1231        set_datagram_seg(*seg, wr);
1232        *seg += sizeof(struct mlx5_wqe_datagram_seg);
1233        *size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
1234        handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1235
1236        /* handle qp that supports ud offload */
1237        if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
1238                struct mlx5_wqe_eth_pad *pad;
1239
1240                pad = *seg;
1241                memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
1242                *seg += sizeof(struct mlx5_wqe_eth_pad);
1243                *size += sizeof(struct mlx5_wqe_eth_pad) / 16;
1244                set_eth_seg(wr, qp, seg, size, cur_edge);
1245                handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1246        }
1247}
1248
1249static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1250                              const struct ib_send_wr *wr,
1251                              struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
1252                              int *size, void **cur_edge, unsigned int idx)
1253{
1254        int err = 0;
1255
1256        if (unlikely(wr->opcode != MLX5_IB_WR_UMR)) {
1257                err = -EINVAL;
1258                mlx5_ib_warn(dev, "bad opcode %d\n", wr->opcode);
1259                goto out;
1260        }
1261
1262        qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
1263        (*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey);
1264        err = set_reg_umr_segment(dev, *seg, wr);
1265        if (unlikely(err))
1266                goto out;
1267        *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
1268        *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
1269        handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1270        set_reg_mkey_segment(dev, *seg, wr);
1271        *seg += sizeof(struct mlx5_mkey_seg);
1272        *size += sizeof(struct mlx5_mkey_seg) / 16;
1273        handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
1274out:
1275        return err;
1276}
1277
1278int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1279                      const struct ib_send_wr **bad_wr, bool drain)
1280{
1281        struct mlx5_wqe_ctrl_seg *ctrl = NULL;  /* compiler warning */
1282        struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1283        struct mlx5_core_dev *mdev = dev->mdev;
1284        struct mlx5_ib_qp *qp = to_mqp(ibqp);
1285        struct mlx5_wqe_xrc_seg *xrc;
1286        struct mlx5_bf *bf;
1287        void *cur_edge;
1288        int size;
1289        unsigned long flags;
1290        unsigned int idx;
1291        int err = 0;
1292        int num_sge;
1293        void *seg;
1294        int nreq;
1295        int i;
1296        u8 next_fence = 0;
1297        u8 fence;
1298
1299        if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
1300                     !drain)) {
1301                *bad_wr = wr;
1302                return -EIO;
1303        }
1304
1305        if (qp->type == IB_QPT_GSI)
1306                return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
1307
1308        bf = &qp->bf;
1309
1310        spin_lock_irqsave(&qp->sq.lock, flags);
1311
1312        for (nreq = 0; wr; nreq++, wr = wr->next) {
1313                if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
1314                        mlx5_ib_warn(dev, "\n");
1315                        err = -EINVAL;
1316                        *bad_wr = wr;
1317                        goto out;
1318                }
1319
1320                num_sge = wr->num_sge;
1321                if (unlikely(num_sge > qp->sq.max_gs)) {
1322                        mlx5_ib_warn(dev, "\n");
1323                        err = -EINVAL;
1324                        *bad_wr = wr;
1325                        goto out;
1326                }
1327
1328                err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge,
1329                                nreq);
1330                if (err) {
1331                        mlx5_ib_warn(dev, "\n");
1332                        err = -ENOMEM;
1333                        *bad_wr = wr;
1334                        goto out;
1335                }
1336
1337                if (wr->opcode == IB_WR_REG_MR ||
1338                    wr->opcode == IB_WR_REG_MR_INTEGRITY) {
1339                        fence = dev->umr_fence;
1340                        next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
1341                } else  {
1342                        if (wr->send_flags & IB_SEND_FENCE) {
1343                                if (qp->next_fence)
1344                                        fence = MLX5_FENCE_MODE_SMALL_AND_FENCE;
1345                                else
1346                                        fence = MLX5_FENCE_MODE_FENCE;
1347                        } else {
1348                                fence = qp->next_fence;
1349                        }
1350                }
1351
1352                switch (qp->type) {
1353                case IB_QPT_XRC_INI:
1354                        xrc = seg;
1355                        seg += sizeof(*xrc);
1356                        size += sizeof(*xrc) / 16;
1357                        fallthrough;
1358                case IB_QPT_RC:
1359                        err = handle_qpt_rc(dev, qp, wr, &ctrl, &seg, &size,
1360                                            &cur_edge, &idx, nreq, fence,
1361                                            next_fence, &num_sge);
1362                        if (unlikely(err)) {
1363                                *bad_wr = wr;
1364                                goto out;
1365                        } else if (wr->opcode == IB_WR_REG_MR_INTEGRITY) {
1366                                goto skip_psv;
1367                        }
1368                        break;
1369
1370                case IB_QPT_UC:
1371                        handle_qpt_uc(wr, &seg, &size);
1372                        break;
1373                case IB_QPT_SMI:
1374                        if (unlikely(!dev->port_caps[qp->port - 1].has_smi)) {
1375                                mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
1376                                err = -EPERM;
1377                                *bad_wr = wr;
1378                                goto out;
1379                        }
1380                        fallthrough;
1381                case MLX5_IB_QPT_HW_GSI:
1382                        handle_qpt_hw_gsi(qp, wr, &seg, &size, &cur_edge);
1383                        break;
1384                case IB_QPT_UD:
1385                        handle_qpt_ud(qp, wr, &seg, &size, &cur_edge);
1386                        break;
1387                case MLX5_IB_QPT_REG_UMR:
1388                        err = handle_qpt_reg_umr(dev, qp, wr, &ctrl, &seg,
1389                                                       &size, &cur_edge, idx);
1390                        if (unlikely(err))
1391                                goto out;
1392                        break;
1393
1394                default:
1395                        break;
1396                }
1397
1398                if (wr->send_flags & IB_SEND_INLINE && num_sge) {
1399                        err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge);
1400                        if (unlikely(err)) {
1401                                mlx5_ib_warn(dev, "\n");
1402                                *bad_wr = wr;
1403                                goto out;
1404                        }
1405                } else {
1406                        for (i = 0; i < num_sge; i++) {
1407                                handle_post_send_edge(&qp->sq, &seg, size,
1408                                                      &cur_edge);
1409                                if (unlikely(!wr->sg_list[i].length))
1410                                        continue;
1411
1412                                set_data_ptr_seg(
1413                                        (struct mlx5_wqe_data_seg *)seg,
1414                                        wr->sg_list + i);
1415                                size += sizeof(struct mlx5_wqe_data_seg) / 16;
1416                                seg += sizeof(struct mlx5_wqe_data_seg);
1417                        }
1418                }
1419
1420                qp->next_fence = next_fence;
1421                finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq,
1422                           fence, mlx5_ib_opcode[wr->opcode]);
1423skip_psv:
1424                if (0)
1425                        dump_wqe(qp, idx, size);
1426        }
1427
1428out:
1429        if (likely(nreq)) {
1430                qp->sq.head += nreq;
1431
1432                /* Make sure that descriptors are written before
1433                 * updating doorbell record and ringing the doorbell
1434                 */
1435                wmb();
1436
1437                qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
1438
1439                /* Make sure doorbell record is visible to the HCA before
1440                 * we hit doorbell.
1441                 */
1442                wmb();
1443
1444                mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
1445                /* Make sure doorbells don't leak out of SQ spinlock
1446                 * and reach the HCA out of order.
1447                 */
1448                bf->offset ^= bf->buf_size;
1449        }
1450
1451        spin_unlock_irqrestore(&qp->sq.lock, flags);
1452
1453        return err;
1454}
1455
1456static void set_sig_seg(struct mlx5_rwqe_sig *sig, int max_gs)
1457{
1458         sig->signature = calc_sig(sig, (max_gs + 1) << 2);
1459}
1460
1461int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
1462                      const struct ib_recv_wr **bad_wr, bool drain)
1463{
1464        struct mlx5_ib_qp *qp = to_mqp(ibqp);
1465        struct mlx5_wqe_data_seg *scat;
1466        struct mlx5_rwqe_sig *sig;
1467        struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1468        struct mlx5_core_dev *mdev = dev->mdev;
1469        unsigned long flags;
1470        int err = 0;
1471        int nreq;
1472        int ind;
1473        int i;
1474
1475        if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
1476                     !drain)) {
1477                *bad_wr = wr;
1478                return -EIO;
1479        }
1480
1481        if (qp->type == IB_QPT_GSI)
1482                return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
1483
1484        spin_lock_irqsave(&qp->rq.lock, flags);
1485
1486        ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
1487
1488        for (nreq = 0; wr; nreq++, wr = wr->next) {
1489                if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
1490                        err = -ENOMEM;
1491                        *bad_wr = wr;
1492                        goto out;
1493                }
1494
1495                if (unlikely(wr->num_sge > qp->rq.max_gs)) {
1496                        err = -EINVAL;
1497                        *bad_wr = wr;
1498                        goto out;
1499                }
1500
1501                scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind);
1502                if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
1503                        scat++;
1504
1505                for (i = 0; i < wr->num_sge; i++)
1506                        set_data_ptr_seg(scat + i, wr->sg_list + i);
1507
1508                if (i < qp->rq.max_gs) {
1509                        scat[i].byte_count = 0;
1510                        scat[i].lkey       = cpu_to_be32(MLX5_INVALID_LKEY);
1511                        scat[i].addr       = 0;
1512                }
1513
1514                if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) {
1515                        sig = (struct mlx5_rwqe_sig *)scat;
1516                        set_sig_seg(sig, qp->rq.max_gs);
1517                }
1518
1519                qp->rq.wrid[ind] = wr->wr_id;
1520
1521                ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
1522        }
1523
1524out:
1525        if (likely(nreq)) {
1526                qp->rq.head += nreq;
1527
1528                /* Make sure that descriptors are written before
1529                 * doorbell record.
1530                 */
1531                wmb();
1532
1533                *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
1534        }
1535
1536        spin_unlock_irqrestore(&qp->rq.lock, flags);
1537
1538        return err;
1539}
1540