linux/drivers/infiniband/hw/mlx5/qp.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <linux/etherdevice.h>
  34#include <rdma/ib_umem.h>
  35#include <rdma/ib_cache.h>
  36#include <rdma/ib_user_verbs.h>
  37#include <rdma/rdma_counter.h>
  38#include <linux/mlx5/fs.h>
  39#include "mlx5_ib.h"
  40#include "ib_rep.h"
  41#include "counters.h"
  42#include "cmd.h"
  43#include "qp.h"
  44#include "wr.h"
  45
  46enum {
  47        MLX5_IB_ACK_REQ_FREQ    = 8,
  48};
  49
  50enum {
  51        MLX5_IB_DEFAULT_SCHED_QUEUE     = 0x83,
  52        MLX5_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
  53        MLX5_IB_LINK_TYPE_IB            = 0,
  54        MLX5_IB_LINK_TYPE_ETH           = 1
  55};
  56
  57enum raw_qp_set_mask_map {
  58        MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID         = 1UL << 0,
  59        MLX5_RAW_QP_RATE_LIMIT                  = 1UL << 1,
  60};
  61
  62struct mlx5_modify_raw_qp_param {
  63        u16 operation;
  64
  65        u32 set_mask; /* raw_qp_set_mask_map */
  66
  67        struct mlx5_rate_limit rl;
  68
  69        u8 rq_q_ctr_id;
  70        u32 port;
  71};
  72
  73static void get_cqs(enum ib_qp_type qp_type,
  74                    struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
  75                    struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq);
  76
  77static int is_qp0(enum ib_qp_type qp_type)
  78{
  79        return qp_type == IB_QPT_SMI;
  80}
  81
  82static int is_sqp(enum ib_qp_type qp_type)
  83{
  84        return is_qp0(qp_type) || is_qp1(qp_type);
  85}
  86
  87/**
  88 * mlx5_ib_read_user_wqe_common() - Copy a WQE (or part of) from user WQ
  89 * to kernel buffer
  90 *
  91 * @umem: User space memory where the WQ is
  92 * @buffer: buffer to copy to
  93 * @buflen: buffer length
  94 * @wqe_index: index of WQE to copy from
  95 * @wq_offset: offset to start of WQ
  96 * @wq_wqe_cnt: number of WQEs in WQ
  97 * @wq_wqe_shift: log2 of WQE size
  98 * @bcnt: number of bytes to copy
  99 * @bytes_copied: number of bytes to copy (return value)
 100 *
 101 * Copies from start of WQE bcnt or less bytes.
 102 * Does not gurantee to copy the entire WQE.
 103 *
 104 * Return: zero on success, or an error code.
 105 */
 106static int mlx5_ib_read_user_wqe_common(struct ib_umem *umem, void *buffer,
 107                                        size_t buflen, int wqe_index,
 108                                        int wq_offset, int wq_wqe_cnt,
 109                                        int wq_wqe_shift, int bcnt,
 110                                        size_t *bytes_copied)
 111{
 112        size_t offset = wq_offset + ((wqe_index % wq_wqe_cnt) << wq_wqe_shift);
 113        size_t wq_end = wq_offset + (wq_wqe_cnt << wq_wqe_shift);
 114        size_t copy_length;
 115        int ret;
 116
 117        /* don't copy more than requested, more than buffer length or
 118         * beyond WQ end
 119         */
 120        copy_length = min_t(u32, buflen, wq_end - offset);
 121        copy_length = min_t(u32, copy_length, bcnt);
 122
 123        ret = ib_umem_copy_from(buffer, umem, offset, copy_length);
 124        if (ret)
 125                return ret;
 126
 127        if (!ret && bytes_copied)
 128                *bytes_copied = copy_length;
 129
 130        return 0;
 131}
 132
 133static int mlx5_ib_read_kernel_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index,
 134                                      void *buffer, size_t buflen, size_t *bc)
 135{
 136        struct mlx5_wqe_ctrl_seg *ctrl;
 137        size_t bytes_copied = 0;
 138        size_t wqe_length;
 139        void *p;
 140        int ds;
 141
 142        wqe_index = wqe_index & qp->sq.fbc.sz_m1;
 143
 144        /* read the control segment first */
 145        p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, wqe_index);
 146        ctrl = p;
 147        ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
 148        wqe_length = ds * MLX5_WQE_DS_UNITS;
 149
 150        /* read rest of WQE if it spreads over more than one stride */
 151        while (bytes_copied < wqe_length) {
 152                size_t copy_length =
 153                        min_t(size_t, buflen - bytes_copied, MLX5_SEND_WQE_BB);
 154
 155                if (!copy_length)
 156                        break;
 157
 158                memcpy(buffer + bytes_copied, p, copy_length);
 159                bytes_copied += copy_length;
 160
 161                wqe_index = (wqe_index + 1) & qp->sq.fbc.sz_m1;
 162                p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, wqe_index);
 163        }
 164        *bc = bytes_copied;
 165        return 0;
 166}
 167
 168static int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index,
 169                                    void *buffer, size_t buflen, size_t *bc)
 170{
 171        struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
 172        struct ib_umem *umem = base->ubuffer.umem;
 173        struct mlx5_ib_wq *wq = &qp->sq;
 174        struct mlx5_wqe_ctrl_seg *ctrl;
 175        size_t bytes_copied;
 176        size_t bytes_copied2;
 177        size_t wqe_length;
 178        int ret;
 179        int ds;
 180
 181        /* at first read as much as possible */
 182        ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index,
 183                                           wq->offset, wq->wqe_cnt,
 184                                           wq->wqe_shift, buflen,
 185                                           &bytes_copied);
 186        if (ret)
 187                return ret;
 188
 189        /* we need at least control segment size to proceed */
 190        if (bytes_copied < sizeof(*ctrl))
 191                return -EINVAL;
 192
 193        ctrl = buffer;
 194        ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
 195        wqe_length = ds * MLX5_WQE_DS_UNITS;
 196
 197        /* if we copied enough then we are done */
 198        if (bytes_copied >= wqe_length) {
 199                *bc = bytes_copied;
 200                return 0;
 201        }
 202
 203        /* otherwise this a wrapped around wqe
 204         * so read the remaining bytes starting
 205         * from  wqe_index 0
 206         */
 207        ret = mlx5_ib_read_user_wqe_common(umem, buffer + bytes_copied,
 208                                           buflen - bytes_copied, 0, wq->offset,
 209                                           wq->wqe_cnt, wq->wqe_shift,
 210                                           wqe_length - bytes_copied,
 211                                           &bytes_copied2);
 212
 213        if (ret)
 214                return ret;
 215        *bc = bytes_copied + bytes_copied2;
 216        return 0;
 217}
 218
 219int mlx5_ib_read_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
 220                        size_t buflen, size_t *bc)
 221{
 222        struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
 223        struct ib_umem *umem = base->ubuffer.umem;
 224
 225        if (buflen < sizeof(struct mlx5_wqe_ctrl_seg))
 226                return -EINVAL;
 227
 228        if (!umem)
 229                return mlx5_ib_read_kernel_wqe_sq(qp, wqe_index, buffer,
 230                                                  buflen, bc);
 231
 232        return mlx5_ib_read_user_wqe_sq(qp, wqe_index, buffer, buflen, bc);
 233}
 234
 235static int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index,
 236                                    void *buffer, size_t buflen, size_t *bc)
 237{
 238        struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
 239        struct ib_umem *umem = base->ubuffer.umem;
 240        struct mlx5_ib_wq *wq = &qp->rq;
 241        size_t bytes_copied;
 242        int ret;
 243
 244        ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index,
 245                                           wq->offset, wq->wqe_cnt,
 246                                           wq->wqe_shift, buflen,
 247                                           &bytes_copied);
 248
 249        if (ret)
 250                return ret;
 251        *bc = bytes_copied;
 252        return 0;
 253}
 254
 255int mlx5_ib_read_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
 256                        size_t buflen, size_t *bc)
 257{
 258        struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
 259        struct ib_umem *umem = base->ubuffer.umem;
 260        struct mlx5_ib_wq *wq = &qp->rq;
 261        size_t wqe_size = 1 << wq->wqe_shift;
 262
 263        if (buflen < wqe_size)
 264                return -EINVAL;
 265
 266        if (!umem)
 267                return -EOPNOTSUPP;
 268
 269        return mlx5_ib_read_user_wqe_rq(qp, wqe_index, buffer, buflen, bc);
 270}
 271
 272static int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index,
 273                                     void *buffer, size_t buflen, size_t *bc)
 274{
 275        struct ib_umem *umem = srq->umem;
 276        size_t bytes_copied;
 277        int ret;
 278
 279        ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index, 0,
 280                                           srq->msrq.max, srq->msrq.wqe_shift,
 281                                           buflen, &bytes_copied);
 282
 283        if (ret)
 284                return ret;
 285        *bc = bytes_copied;
 286        return 0;
 287}
 288
 289int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer,
 290                         size_t buflen, size_t *bc)
 291{
 292        struct ib_umem *umem = srq->umem;
 293        size_t wqe_size = 1 << srq->msrq.wqe_shift;
 294
 295        if (buflen < wqe_size)
 296                return -EINVAL;
 297
 298        if (!umem)
 299                return -EOPNOTSUPP;
 300
 301        return mlx5_ib_read_user_wqe_srq(srq, wqe_index, buffer, buflen, bc);
 302}
 303
 304static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
 305{
 306        struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
 307        struct ib_event event;
 308
 309        if (type == MLX5_EVENT_TYPE_PATH_MIG) {
 310                /* This event is only valid for trans_qps */
 311                to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port;
 312        }
 313
 314        if (ibqp->event_handler) {
 315                event.device     = ibqp->device;
 316                event.element.qp = ibqp;
 317                switch (type) {
 318                case MLX5_EVENT_TYPE_PATH_MIG:
 319                        event.event = IB_EVENT_PATH_MIG;
 320                        break;
 321                case MLX5_EVENT_TYPE_COMM_EST:
 322                        event.event = IB_EVENT_COMM_EST;
 323                        break;
 324                case MLX5_EVENT_TYPE_SQ_DRAINED:
 325                        event.event = IB_EVENT_SQ_DRAINED;
 326                        break;
 327                case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
 328                        event.event = IB_EVENT_QP_LAST_WQE_REACHED;
 329                        break;
 330                case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
 331                        event.event = IB_EVENT_QP_FATAL;
 332                        break;
 333                case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
 334                        event.event = IB_EVENT_PATH_MIG_ERR;
 335                        break;
 336                case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
 337                        event.event = IB_EVENT_QP_REQ_ERR;
 338                        break;
 339                case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
 340                        event.event = IB_EVENT_QP_ACCESS_ERR;
 341                        break;
 342                default:
 343                        pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn);
 344                        return;
 345                }
 346
 347                ibqp->event_handler(&event, ibqp->qp_context);
 348        }
 349}
 350
 351static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
 352                       int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd)
 353{
 354        int wqe_size;
 355        int wq_size;
 356
 357        /* Sanity check RQ size before proceeding */
 358        if (cap->max_recv_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)))
 359                return -EINVAL;
 360
 361        if (!has_rq) {
 362                qp->rq.max_gs = 0;
 363                qp->rq.wqe_cnt = 0;
 364                qp->rq.wqe_shift = 0;
 365                cap->max_recv_wr = 0;
 366                cap->max_recv_sge = 0;
 367        } else {
 368                int wq_sig = !!(qp->flags_en & MLX5_QP_FLAG_SIGNATURE);
 369
 370                if (ucmd) {
 371                        qp->rq.wqe_cnt = ucmd->rq_wqe_count;
 372                        if (ucmd->rq_wqe_shift > BITS_PER_BYTE * sizeof(ucmd->rq_wqe_shift))
 373                                return -EINVAL;
 374                        qp->rq.wqe_shift = ucmd->rq_wqe_shift;
 375                        if ((1 << qp->rq.wqe_shift) /
 376                                    sizeof(struct mlx5_wqe_data_seg) <
 377                            wq_sig)
 378                                return -EINVAL;
 379                        qp->rq.max_gs =
 380                                (1 << qp->rq.wqe_shift) /
 381                                        sizeof(struct mlx5_wqe_data_seg) -
 382                                wq_sig;
 383                        qp->rq.max_post = qp->rq.wqe_cnt;
 384                } else {
 385                        wqe_size =
 386                                wq_sig ? sizeof(struct mlx5_wqe_signature_seg) :
 387                                         0;
 388                        wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg);
 389                        wqe_size = roundup_pow_of_two(wqe_size);
 390                        wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
 391                        wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
 392                        qp->rq.wqe_cnt = wq_size / wqe_size;
 393                        if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq)) {
 394                                mlx5_ib_dbg(dev, "wqe_size %d, max %d\n",
 395                                            wqe_size,
 396                                            MLX5_CAP_GEN(dev->mdev,
 397                                                         max_wqe_sz_rq));
 398                                return -EINVAL;
 399                        }
 400                        qp->rq.wqe_shift = ilog2(wqe_size);
 401                        qp->rq.max_gs =
 402                                (1 << qp->rq.wqe_shift) /
 403                                        sizeof(struct mlx5_wqe_data_seg) -
 404                                wq_sig;
 405                        qp->rq.max_post = qp->rq.wqe_cnt;
 406                }
 407        }
 408
 409        return 0;
 410}
 411
 412static int sq_overhead(struct ib_qp_init_attr *attr)
 413{
 414        int size = 0;
 415
 416        switch (attr->qp_type) {
 417        case IB_QPT_XRC_INI:
 418                size += sizeof(struct mlx5_wqe_xrc_seg);
 419                fallthrough;
 420        case IB_QPT_RC:
 421                size += sizeof(struct mlx5_wqe_ctrl_seg) +
 422                        max(sizeof(struct mlx5_wqe_atomic_seg) +
 423                            sizeof(struct mlx5_wqe_raddr_seg),
 424                            sizeof(struct mlx5_wqe_umr_ctrl_seg) +
 425                            sizeof(struct mlx5_mkey_seg) +
 426                            MLX5_IB_SQ_UMR_INLINE_THRESHOLD /
 427                            MLX5_IB_UMR_OCTOWORD);
 428                break;
 429
 430        case IB_QPT_XRC_TGT:
 431                return 0;
 432
 433        case IB_QPT_UC:
 434                size += sizeof(struct mlx5_wqe_ctrl_seg) +
 435                        max(sizeof(struct mlx5_wqe_raddr_seg),
 436                            sizeof(struct mlx5_wqe_umr_ctrl_seg) +
 437                            sizeof(struct mlx5_mkey_seg));
 438                break;
 439
 440        case IB_QPT_UD:
 441                if (attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
 442                        size += sizeof(struct mlx5_wqe_eth_pad) +
 443                                sizeof(struct mlx5_wqe_eth_seg);
 444                fallthrough;
 445        case IB_QPT_SMI:
 446        case MLX5_IB_QPT_HW_GSI:
 447                size += sizeof(struct mlx5_wqe_ctrl_seg) +
 448                        sizeof(struct mlx5_wqe_datagram_seg);
 449                break;
 450
 451        case MLX5_IB_QPT_REG_UMR:
 452                size += sizeof(struct mlx5_wqe_ctrl_seg) +
 453                        sizeof(struct mlx5_wqe_umr_ctrl_seg) +
 454                        sizeof(struct mlx5_mkey_seg);
 455                break;
 456
 457        default:
 458                return -EINVAL;
 459        }
 460
 461        return size;
 462}
 463
 464static int calc_send_wqe(struct ib_qp_init_attr *attr)
 465{
 466        int inl_size = 0;
 467        int size;
 468
 469        size = sq_overhead(attr);
 470        if (size < 0)
 471                return size;
 472
 473        if (attr->cap.max_inline_data) {
 474                inl_size = size + sizeof(struct mlx5_wqe_inline_seg) +
 475                        attr->cap.max_inline_data;
 476        }
 477
 478        size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
 479        if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN &&
 480            ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE)
 481                return MLX5_SIG_WQE_SIZE;
 482        else
 483                return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
 484}
 485
 486static int get_send_sge(struct ib_qp_init_attr *attr, int wqe_size)
 487{
 488        int max_sge;
 489
 490        if (attr->qp_type == IB_QPT_RC)
 491                max_sge = (min_t(int, wqe_size, 512) -
 492                           sizeof(struct mlx5_wqe_ctrl_seg) -
 493                           sizeof(struct mlx5_wqe_raddr_seg)) /
 494                        sizeof(struct mlx5_wqe_data_seg);
 495        else if (attr->qp_type == IB_QPT_XRC_INI)
 496                max_sge = (min_t(int, wqe_size, 512) -
 497                           sizeof(struct mlx5_wqe_ctrl_seg) -
 498                           sizeof(struct mlx5_wqe_xrc_seg) -
 499                           sizeof(struct mlx5_wqe_raddr_seg)) /
 500                        sizeof(struct mlx5_wqe_data_seg);
 501        else
 502                max_sge = (wqe_size - sq_overhead(attr)) /
 503                        sizeof(struct mlx5_wqe_data_seg);
 504
 505        return min_t(int, max_sge, wqe_size - sq_overhead(attr) /
 506                     sizeof(struct mlx5_wqe_data_seg));
 507}
 508
 509static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
 510                        struct mlx5_ib_qp *qp)
 511{
 512        int wqe_size;
 513        int wq_size;
 514
 515        if (!attr->cap.max_send_wr)
 516                return 0;
 517
 518        wqe_size = calc_send_wqe(attr);
 519        mlx5_ib_dbg(dev, "wqe_size %d\n", wqe_size);
 520        if (wqe_size < 0)
 521                return wqe_size;
 522
 523        if (wqe_size > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) {
 524                mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n",
 525                            wqe_size, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq));
 526                return -EINVAL;
 527        }
 528
 529        qp->max_inline_data = wqe_size - sq_overhead(attr) -
 530                              sizeof(struct mlx5_wqe_inline_seg);
 531        attr->cap.max_inline_data = qp->max_inline_data;
 532
 533        wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
 534        qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
 535        if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
 536                mlx5_ib_dbg(dev, "send queue size (%d * %d / %d -> %d) exceeds limits(%d)\n",
 537                            attr->cap.max_send_wr, wqe_size, MLX5_SEND_WQE_BB,
 538                            qp->sq.wqe_cnt,
 539                            1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz));
 540                return -ENOMEM;
 541        }
 542        qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
 543        qp->sq.max_gs = get_send_sge(attr, wqe_size);
 544        if (qp->sq.max_gs < attr->cap.max_send_sge)
 545                return -ENOMEM;
 546
 547        attr->cap.max_send_sge = qp->sq.max_gs;
 548        qp->sq.max_post = wq_size / wqe_size;
 549        attr->cap.max_send_wr = qp->sq.max_post;
 550
 551        return wq_size;
 552}
 553
 554static int set_user_buf_size(struct mlx5_ib_dev *dev,
 555                            struct mlx5_ib_qp *qp,
 556                            struct mlx5_ib_create_qp *ucmd,
 557                            struct mlx5_ib_qp_base *base,
 558                            struct ib_qp_init_attr *attr)
 559{
 560        int desc_sz = 1 << qp->sq.wqe_shift;
 561
 562        if (desc_sz > MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq)) {
 563                mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n",
 564                             desc_sz, MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq));
 565                return -EINVAL;
 566        }
 567
 568        if (ucmd->sq_wqe_count && !is_power_of_2(ucmd->sq_wqe_count)) {
 569                mlx5_ib_warn(dev, "sq_wqe_count %d is not a power of two\n",
 570                             ucmd->sq_wqe_count);
 571                return -EINVAL;
 572        }
 573
 574        qp->sq.wqe_cnt = ucmd->sq_wqe_count;
 575
 576        if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
 577                mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n",
 578                             qp->sq.wqe_cnt,
 579                             1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz));
 580                return -EINVAL;
 581        }
 582
 583        if (attr->qp_type == IB_QPT_RAW_PACKET ||
 584            qp->flags & IB_QP_CREATE_SOURCE_QPN) {
 585                base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
 586                qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
 587        } else {
 588                base->ubuffer.buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
 589                                         (qp->sq.wqe_cnt << 6);
 590        }
 591
 592        return 0;
 593}
 594
 595static int qp_has_rq(struct ib_qp_init_attr *attr)
 596{
 597        if (attr->qp_type == IB_QPT_XRC_INI ||
 598            attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
 599            attr->qp_type == MLX5_IB_QPT_REG_UMR ||
 600            !attr->cap.max_recv_wr)
 601                return 0;
 602
 603        return 1;
 604}
 605
 606enum {
 607        /* this is the first blue flame register in the array of bfregs assigned
 608         * to a processes. Since we do not use it for blue flame but rather
 609         * regular 64 bit doorbells, we do not need a lock for maintaiing
 610         * "odd/even" order
 611         */
 612        NUM_NON_BLUE_FLAME_BFREGS = 1,
 613};
 614
 615static int max_bfregs(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi)
 616{
 617        return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) *
 618               bfregi->num_static_sys_pages * MLX5_NON_FP_BFREGS_PER_UAR;
 619}
 620
 621static int num_med_bfreg(struct mlx5_ib_dev *dev,
 622                         struct mlx5_bfreg_info *bfregi)
 623{
 624        int n;
 625
 626        n = max_bfregs(dev, bfregi) - bfregi->num_low_latency_bfregs -
 627            NUM_NON_BLUE_FLAME_BFREGS;
 628
 629        return n >= 0 ? n : 0;
 630}
 631
 632static int first_med_bfreg(struct mlx5_ib_dev *dev,
 633                           struct mlx5_bfreg_info *bfregi)
 634{
 635        return num_med_bfreg(dev, bfregi) ? 1 : -ENOMEM;
 636}
 637
 638static int first_hi_bfreg(struct mlx5_ib_dev *dev,
 639                          struct mlx5_bfreg_info *bfregi)
 640{
 641        int med;
 642
 643        med = num_med_bfreg(dev, bfregi);
 644        return ++med;
 645}
 646
 647static int alloc_high_class_bfreg(struct mlx5_ib_dev *dev,
 648                                  struct mlx5_bfreg_info *bfregi)
 649{
 650        int i;
 651
 652        for (i = first_hi_bfreg(dev, bfregi); i < max_bfregs(dev, bfregi); i++) {
 653                if (!bfregi->count[i]) {
 654                        bfregi->count[i]++;
 655                        return i;
 656                }
 657        }
 658
 659        return -ENOMEM;
 660}
 661
 662static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev,
 663                                 struct mlx5_bfreg_info *bfregi)
 664{
 665        int minidx = first_med_bfreg(dev, bfregi);
 666        int i;
 667
 668        if (minidx < 0)
 669                return minidx;
 670
 671        for (i = minidx; i < first_hi_bfreg(dev, bfregi); i++) {
 672                if (bfregi->count[i] < bfregi->count[minidx])
 673                        minidx = i;
 674                if (!bfregi->count[minidx])
 675                        break;
 676        }
 677
 678        bfregi->count[minidx]++;
 679        return minidx;
 680}
 681
 682static int alloc_bfreg(struct mlx5_ib_dev *dev,
 683                       struct mlx5_bfreg_info *bfregi)
 684{
 685        int bfregn = -ENOMEM;
 686
 687        if (bfregi->lib_uar_dyn)
 688                return -EINVAL;
 689
 690        mutex_lock(&bfregi->lock);
 691        if (bfregi->ver >= 2) {
 692                bfregn = alloc_high_class_bfreg(dev, bfregi);
 693                if (bfregn < 0)
 694                        bfregn = alloc_med_class_bfreg(dev, bfregi);
 695        }
 696
 697        if (bfregn < 0) {
 698                BUILD_BUG_ON(NUM_NON_BLUE_FLAME_BFREGS != 1);
 699                bfregn = 0;
 700                bfregi->count[bfregn]++;
 701        }
 702        mutex_unlock(&bfregi->lock);
 703
 704        return bfregn;
 705}
 706
 707void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, int bfregn)
 708{
 709        mutex_lock(&bfregi->lock);
 710        bfregi->count[bfregn]--;
 711        mutex_unlock(&bfregi->lock);
 712}
 713
 714static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state)
 715{
 716        switch (state) {
 717        case IB_QPS_RESET:      return MLX5_QP_STATE_RST;
 718        case IB_QPS_INIT:       return MLX5_QP_STATE_INIT;
 719        case IB_QPS_RTR:        return MLX5_QP_STATE_RTR;
 720        case IB_QPS_RTS:        return MLX5_QP_STATE_RTS;
 721        case IB_QPS_SQD:        return MLX5_QP_STATE_SQD;
 722        case IB_QPS_SQE:        return MLX5_QP_STATE_SQER;
 723        case IB_QPS_ERR:        return MLX5_QP_STATE_ERR;
 724        default:                return -1;
 725        }
 726}
 727
 728static int to_mlx5_st(enum ib_qp_type type)
 729{
 730        switch (type) {
 731        case IB_QPT_RC:                 return MLX5_QP_ST_RC;
 732        case IB_QPT_UC:                 return MLX5_QP_ST_UC;
 733        case IB_QPT_UD:                 return MLX5_QP_ST_UD;
 734        case MLX5_IB_QPT_REG_UMR:       return MLX5_QP_ST_REG_UMR;
 735        case IB_QPT_XRC_INI:
 736        case IB_QPT_XRC_TGT:            return MLX5_QP_ST_XRC;
 737        case IB_QPT_SMI:                return MLX5_QP_ST_QP0;
 738        case MLX5_IB_QPT_HW_GSI:        return MLX5_QP_ST_QP1;
 739        case MLX5_IB_QPT_DCI:           return MLX5_QP_ST_DCI;
 740        case IB_QPT_RAW_PACKET:         return MLX5_QP_ST_RAW_ETHERTYPE;
 741        default:                return -EINVAL;
 742        }
 743}
 744
 745static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq,
 746                             struct mlx5_ib_cq *recv_cq);
 747static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
 748                               struct mlx5_ib_cq *recv_cq);
 749
 750int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
 751                        struct mlx5_bfreg_info *bfregi, u32 bfregn,
 752                        bool dyn_bfreg)
 753{
 754        unsigned int bfregs_per_sys_page;
 755        u32 index_of_sys_page;
 756        u32 offset;
 757
 758        if (bfregi->lib_uar_dyn)
 759                return -EINVAL;
 760
 761        bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) *
 762                                MLX5_NON_FP_BFREGS_PER_UAR;
 763        index_of_sys_page = bfregn / bfregs_per_sys_page;
 764
 765        if (dyn_bfreg) {
 766                index_of_sys_page += bfregi->num_static_sys_pages;
 767
 768                if (index_of_sys_page >= bfregi->num_sys_pages)
 769                        return -EINVAL;
 770
 771                if (bfregn > bfregi->num_dyn_bfregs ||
 772                    bfregi->sys_pages[index_of_sys_page] == MLX5_IB_INVALID_UAR_INDEX) {
 773                        mlx5_ib_dbg(dev, "Invalid dynamic uar index\n");
 774                        return -EINVAL;
 775                }
 776        }
 777
 778        offset = bfregn % bfregs_per_sys_page / MLX5_NON_FP_BFREGS_PER_UAR;
 779        return bfregi->sys_pages[index_of_sys_page] + offset;
 780}
 781
 782static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 783                            struct mlx5_ib_rwq *rwq, struct ib_udata *udata)
 784{
 785        struct mlx5_ib_ucontext *context =
 786                rdma_udata_to_drv_context(
 787                        udata,
 788                        struct mlx5_ib_ucontext,
 789                        ibucontext);
 790
 791        if (rwq->create_flags & MLX5_IB_WQ_FLAGS_DELAY_DROP)
 792                atomic_dec(&dev->delay_drop.rqs_cnt);
 793
 794        mlx5_ib_db_unmap_user(context, &rwq->db);
 795        ib_umem_release(rwq->umem);
 796}
 797
 798static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 799                          struct ib_udata *udata, struct mlx5_ib_rwq *rwq,
 800                          struct mlx5_ib_create_wq *ucmd)
 801{
 802        struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
 803                udata, struct mlx5_ib_ucontext, ibucontext);
 804        unsigned long page_size = 0;
 805        u32 offset = 0;
 806        int err;
 807
 808        if (!ucmd->buf_addr)
 809                return -EINVAL;
 810
 811        rwq->umem = ib_umem_get(&dev->ib_dev, ucmd->buf_addr, rwq->buf_size, 0);
 812        if (IS_ERR(rwq->umem)) {
 813                mlx5_ib_dbg(dev, "umem_get failed\n");
 814                err = PTR_ERR(rwq->umem);
 815                return err;
 816        }
 817
 818        page_size = mlx5_umem_find_best_quantized_pgoff(
 819                rwq->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT,
 820                page_offset, 64, &rwq->rq_page_offset);
 821        if (!page_size) {
 822                mlx5_ib_warn(dev, "bad offset\n");
 823                err = -EINVAL;
 824                goto err_umem;
 825        }
 826
 827        rwq->rq_num_pas = ib_umem_num_dma_blocks(rwq->umem, page_size);
 828        rwq->page_shift = order_base_2(page_size);
 829        rwq->log_page_size =  rwq->page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 830        rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE);
 831
 832        mlx5_ib_dbg(
 833                dev,
 834                "addr 0x%llx, size %zd, npages %zu, page_size %ld, ncont %d, offset %d\n",
 835                (unsigned long long)ucmd->buf_addr, rwq->buf_size,
 836                ib_umem_num_pages(rwq->umem), page_size, rwq->rq_num_pas,
 837                offset);
 838
 839        err = mlx5_ib_db_map_user(ucontext, ucmd->db_addr, &rwq->db);
 840        if (err) {
 841                mlx5_ib_dbg(dev, "map failed\n");
 842                goto err_umem;
 843        }
 844
 845        return 0;
 846
 847err_umem:
 848        ib_umem_release(rwq->umem);
 849        return err;
 850}
 851
 852static int adjust_bfregn(struct mlx5_ib_dev *dev,
 853                         struct mlx5_bfreg_info *bfregi, int bfregn)
 854{
 855        return bfregn / MLX5_NON_FP_BFREGS_PER_UAR * MLX5_BFREGS_PER_UAR +
 856                                bfregn % MLX5_NON_FP_BFREGS_PER_UAR;
 857}
 858
 859static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 860                           struct mlx5_ib_qp *qp, struct ib_udata *udata,
 861                           struct ib_qp_init_attr *attr, u32 **in,
 862                           struct mlx5_ib_create_qp_resp *resp, int *inlen,
 863                           struct mlx5_ib_qp_base *base,
 864                           struct mlx5_ib_create_qp *ucmd)
 865{
 866        struct mlx5_ib_ucontext *context;
 867        struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
 868        unsigned int page_offset_quantized = 0;
 869        unsigned long page_size = 0;
 870        int uar_index = 0;
 871        int bfregn;
 872        int ncont = 0;
 873        __be64 *pas;
 874        void *qpc;
 875        int err;
 876        u16 uid;
 877        u32 uar_flags;
 878
 879        context = rdma_udata_to_drv_context(udata, struct mlx5_ib_ucontext,
 880                                            ibucontext);
 881        uar_flags = qp->flags_en &
 882                    (MLX5_QP_FLAG_UAR_PAGE_INDEX | MLX5_QP_FLAG_BFREG_INDEX);
 883        switch (uar_flags) {
 884        case MLX5_QP_FLAG_UAR_PAGE_INDEX:
 885                uar_index = ucmd->bfreg_index;
 886                bfregn = MLX5_IB_INVALID_BFREG;
 887                break;
 888        case MLX5_QP_FLAG_BFREG_INDEX:
 889                uar_index = bfregn_to_uar_index(dev, &context->bfregi,
 890                                                ucmd->bfreg_index, true);
 891                if (uar_index < 0)
 892                        return uar_index;
 893                bfregn = MLX5_IB_INVALID_BFREG;
 894                break;
 895        case 0:
 896                if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
 897                        return -EINVAL;
 898                bfregn = alloc_bfreg(dev, &context->bfregi);
 899                if (bfregn < 0)
 900                        return bfregn;
 901                break;
 902        default:
 903                return -EINVAL;
 904        }
 905
 906        mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index);
 907        if (bfregn != MLX5_IB_INVALID_BFREG)
 908                uar_index = bfregn_to_uar_index(dev, &context->bfregi, bfregn,
 909                                                false);
 910
 911        qp->rq.offset = 0;
 912        qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
 913        qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
 914
 915        err = set_user_buf_size(dev, qp, ucmd, base, attr);
 916        if (err)
 917                goto err_bfreg;
 918
 919        if (ucmd->buf_addr && ubuffer->buf_size) {
 920                ubuffer->buf_addr = ucmd->buf_addr;
 921                ubuffer->umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr,
 922                                            ubuffer->buf_size, 0);
 923                if (IS_ERR(ubuffer->umem)) {
 924                        err = PTR_ERR(ubuffer->umem);
 925                        goto err_bfreg;
 926                }
 927                page_size = mlx5_umem_find_best_quantized_pgoff(
 928                        ubuffer->umem, qpc, log_page_size,
 929                        MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64,
 930                        &page_offset_quantized);
 931                if (!page_size) {
 932                        err = -EINVAL;
 933                        goto err_umem;
 934                }
 935                ncont = ib_umem_num_dma_blocks(ubuffer->umem, page_size);
 936        } else {
 937                ubuffer->umem = NULL;
 938        }
 939
 940        *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
 941                 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * ncont;
 942        *in = kvzalloc(*inlen, GFP_KERNEL);
 943        if (!*in) {
 944                err = -ENOMEM;
 945                goto err_umem;
 946        }
 947
 948        uid = (attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0;
 949        MLX5_SET(create_qp_in, *in, uid, uid);
 950        qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
 951        pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
 952        if (ubuffer->umem) {
 953                mlx5_ib_populate_pas(ubuffer->umem, page_size, pas, 0);
 954                MLX5_SET(qpc, qpc, log_page_size,
 955                         order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
 956                MLX5_SET(qpc, qpc, page_offset, page_offset_quantized);
 957        }
 958        MLX5_SET(qpc, qpc, uar_page, uar_index);
 959        if (bfregn != MLX5_IB_INVALID_BFREG)
 960                resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn);
 961        else
 962                resp->bfreg_index = MLX5_IB_INVALID_BFREG;
 963        qp->bfregn = bfregn;
 964
 965        err = mlx5_ib_db_map_user(context, ucmd->db_addr, &qp->db);
 966        if (err) {
 967                mlx5_ib_dbg(dev, "map failed\n");
 968                goto err_free;
 969        }
 970
 971        return 0;
 972
 973err_free:
 974        kvfree(*in);
 975
 976err_umem:
 977        ib_umem_release(ubuffer->umem);
 978
 979err_bfreg:
 980        if (bfregn != MLX5_IB_INVALID_BFREG)
 981                mlx5_ib_free_bfreg(dev, &context->bfregi, bfregn);
 982        return err;
 983}
 984
 985static void destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 986                       struct mlx5_ib_qp_base *base, struct ib_udata *udata)
 987{
 988        struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
 989                udata, struct mlx5_ib_ucontext, ibucontext);
 990
 991        if (udata) {
 992                /* User QP */
 993                mlx5_ib_db_unmap_user(context, &qp->db);
 994                ib_umem_release(base->ubuffer.umem);
 995
 996                /*
 997                 * Free only the BFREGs which are handled by the kernel.
 998                 * BFREGs of UARs allocated dynamically are handled by user.
 999                 */
1000                if (qp->bfregn != MLX5_IB_INVALID_BFREG)
1001                        mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn);
1002                return;
1003        }
1004
1005        /* Kernel QP */
1006        kvfree(qp->sq.wqe_head);
1007        kvfree(qp->sq.w_list);
1008        kvfree(qp->sq.wrid);
1009        kvfree(qp->sq.wr_data);
1010        kvfree(qp->rq.wrid);
1011        if (qp->db.db)
1012                mlx5_db_free(dev->mdev, &qp->db);
1013        if (qp->buf.frags)
1014                mlx5_frag_buf_free(dev->mdev, &qp->buf);
1015}
1016
1017static int _create_kernel_qp(struct mlx5_ib_dev *dev,
1018                             struct ib_qp_init_attr *init_attr,
1019                             struct mlx5_ib_qp *qp, u32 **in, int *inlen,
1020                             struct mlx5_ib_qp_base *base)
1021{
1022        int uar_index;
1023        void *qpc;
1024        int err;
1025
1026        if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
1027                qp->bf.bfreg = &dev->fp_bfreg;
1028        else if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
1029                qp->bf.bfreg = &dev->wc_bfreg;
1030        else
1031                qp->bf.bfreg = &dev->bfreg;
1032
1033        /* We need to divide by two since each register is comprised of
1034         * two buffers of identical size, namely odd and even
1035         */
1036        qp->bf.buf_size = (1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size)) / 2;
1037        uar_index = qp->bf.bfreg->index;
1038
1039        err = calc_sq_size(dev, init_attr, qp);
1040        if (err < 0) {
1041                mlx5_ib_dbg(dev, "err %d\n", err);
1042                return err;
1043        }
1044
1045        qp->rq.offset = 0;
1046        qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
1047        base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
1048
1049        err = mlx5_frag_buf_alloc_node(dev->mdev, base->ubuffer.buf_size,
1050                                       &qp->buf, dev->mdev->priv.numa_node);
1051        if (err) {
1052                mlx5_ib_dbg(dev, "err %d\n", err);
1053                return err;
1054        }
1055
1056        if (qp->rq.wqe_cnt)
1057                mlx5_init_fbc(qp->buf.frags, qp->rq.wqe_shift,
1058                              ilog2(qp->rq.wqe_cnt), &qp->rq.fbc);
1059
1060        if (qp->sq.wqe_cnt) {
1061                int sq_strides_offset = (qp->sq.offset  & (PAGE_SIZE - 1)) /
1062                                        MLX5_SEND_WQE_BB;
1063                mlx5_init_fbc_offset(qp->buf.frags +
1064                                     (qp->sq.offset / PAGE_SIZE),
1065                                     ilog2(MLX5_SEND_WQE_BB),
1066                                     ilog2(qp->sq.wqe_cnt),
1067                                     sq_strides_offset, &qp->sq.fbc);
1068
1069                qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
1070        }
1071
1072        *inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
1073                 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages;
1074        *in = kvzalloc(*inlen, GFP_KERNEL);
1075        if (!*in) {
1076                err = -ENOMEM;
1077                goto err_buf;
1078        }
1079
1080        qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
1081        MLX5_SET(qpc, qpc, uar_page, uar_index);
1082        MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(dev->mdev));
1083        MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
1084
1085        /* Set "fast registration enabled" for all kernel QPs */
1086        MLX5_SET(qpc, qpc, fre, 1);
1087        MLX5_SET(qpc, qpc, rlky, 1);
1088
1089        if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
1090                MLX5_SET(qpc, qpc, deth_sqpn, 1);
1091
1092        mlx5_fill_page_frag_array(&qp->buf,
1093                                  (__be64 *)MLX5_ADDR_OF(create_qp_in,
1094                                                         *in, pas));
1095
1096        err = mlx5_db_alloc(dev->mdev, &qp->db);
1097        if (err) {
1098                mlx5_ib_dbg(dev, "err %d\n", err);
1099                goto err_free;
1100        }
1101
1102        qp->sq.wrid = kvmalloc_array(qp->sq.wqe_cnt,
1103                                     sizeof(*qp->sq.wrid), GFP_KERNEL);
1104        qp->sq.wr_data = kvmalloc_array(qp->sq.wqe_cnt,
1105                                        sizeof(*qp->sq.wr_data), GFP_KERNEL);
1106        qp->rq.wrid = kvmalloc_array(qp->rq.wqe_cnt,
1107                                     sizeof(*qp->rq.wrid), GFP_KERNEL);
1108        qp->sq.w_list = kvmalloc_array(qp->sq.wqe_cnt,
1109                                       sizeof(*qp->sq.w_list), GFP_KERNEL);
1110        qp->sq.wqe_head = kvmalloc_array(qp->sq.wqe_cnt,
1111                                         sizeof(*qp->sq.wqe_head), GFP_KERNEL);
1112
1113        if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid ||
1114            !qp->sq.w_list || !qp->sq.wqe_head) {
1115                err = -ENOMEM;
1116                goto err_wrid;
1117        }
1118
1119        return 0;
1120
1121err_wrid:
1122        kvfree(qp->sq.wqe_head);
1123        kvfree(qp->sq.w_list);
1124        kvfree(qp->sq.wrid);
1125        kvfree(qp->sq.wr_data);
1126        kvfree(qp->rq.wrid);
1127        mlx5_db_free(dev->mdev, &qp->db);
1128
1129err_free:
1130        kvfree(*in);
1131
1132err_buf:
1133        mlx5_frag_buf_free(dev->mdev, &qp->buf);
1134        return err;
1135}
1136
1137static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
1138{
1139        if (attr->srq || (qp->type == IB_QPT_XRC_TGT) ||
1140            (qp->type == MLX5_IB_QPT_DCI) || (qp->type == IB_QPT_XRC_INI))
1141                return MLX5_SRQ_RQ;
1142        else if (!qp->has_rq)
1143                return MLX5_ZERO_LEN_RQ;
1144
1145        return MLX5_NON_ZERO_RQ;
1146}
1147
1148static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
1149                                    struct mlx5_ib_qp *qp,
1150                                    struct mlx5_ib_sq *sq, u32 tdn,
1151                                    struct ib_pd *pd)
1152{
1153        u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
1154        void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
1155
1156        MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid);
1157        MLX5_SET(tisc, tisc, transport_domain, tdn);
1158        if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
1159                MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
1160
1161        return mlx5_core_create_tis(dev->mdev, in, &sq->tisn);
1162}
1163
1164static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
1165                                      struct mlx5_ib_sq *sq, struct ib_pd *pd)
1166{
1167        mlx5_cmd_destroy_tis(dev->mdev, sq->tisn, to_mpd(pd)->uid);
1168}
1169
1170static void destroy_flow_rule_vport_sq(struct mlx5_ib_sq *sq)
1171{
1172        if (sq->flow_rule)
1173                mlx5_del_flow_rules(sq->flow_rule);
1174        sq->flow_rule = NULL;
1175}
1176
1177static bool fr_supported(int ts_cap)
1178{
1179        return ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING ||
1180               ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME;
1181}
1182
1183static int get_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
1184                         bool fr_sup, bool rt_sup)
1185{
1186        if (cq->private_flags & MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS) {
1187                if (!rt_sup) {
1188                        mlx5_ib_dbg(dev,
1189                                    "Real time TS format is not supported\n");
1190                        return -EOPNOTSUPP;
1191                }
1192                return MLX5_TIMESTAMP_FORMAT_REAL_TIME;
1193        }
1194        if (cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) {
1195                if (!fr_sup) {
1196                        mlx5_ib_dbg(dev,
1197                                    "Free running TS format is not supported\n");
1198                        return -EOPNOTSUPP;
1199                }
1200                return MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
1201        }
1202        return fr_sup ? MLX5_TIMESTAMP_FORMAT_FREE_RUNNING :
1203                        MLX5_TIMESTAMP_FORMAT_DEFAULT;
1204}
1205
1206static int get_rq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *recv_cq)
1207{
1208        u8 ts_cap = MLX5_CAP_GEN(dev->mdev, rq_ts_format);
1209
1210        return get_ts_format(dev, recv_cq, fr_supported(ts_cap),
1211                             rt_supported(ts_cap));
1212}
1213
1214static int get_sq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq)
1215{
1216        u8 ts_cap = MLX5_CAP_GEN(dev->mdev, sq_ts_format);
1217
1218        return get_ts_format(dev, send_cq, fr_supported(ts_cap),
1219                             rt_supported(ts_cap));
1220}
1221
1222static int get_qp_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq,
1223                            struct mlx5_ib_cq *recv_cq)
1224{
1225        u8 ts_cap = MLX5_CAP_ROCE(dev->mdev, qp_ts_format);
1226        bool fr_sup = fr_supported(ts_cap);
1227        bool rt_sup = rt_supported(ts_cap);
1228        u8 default_ts = fr_sup ? MLX5_TIMESTAMP_FORMAT_FREE_RUNNING :
1229                                 MLX5_TIMESTAMP_FORMAT_DEFAULT;
1230        int send_ts_format =
1231                send_cq ? get_ts_format(dev, send_cq, fr_sup, rt_sup) :
1232                          default_ts;
1233        int recv_ts_format =
1234                recv_cq ? get_ts_format(dev, recv_cq, fr_sup, rt_sup) :
1235                          default_ts;
1236
1237        if (send_ts_format < 0 || recv_ts_format < 0)
1238                return -EOPNOTSUPP;
1239
1240        if (send_ts_format != MLX5_TIMESTAMP_FORMAT_DEFAULT &&
1241            recv_ts_format != MLX5_TIMESTAMP_FORMAT_DEFAULT &&
1242            send_ts_format != recv_ts_format) {
1243                mlx5_ib_dbg(
1244                        dev,
1245                        "The send ts_format does not match the receive ts_format\n");
1246                return -EOPNOTSUPP;
1247        }
1248
1249        return send_ts_format == default_ts ? recv_ts_format : send_ts_format;
1250}
1251
1252static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
1253                                   struct ib_udata *udata,
1254                                   struct mlx5_ib_sq *sq, void *qpin,
1255                                   struct ib_pd *pd, struct mlx5_ib_cq *cq)
1256{
1257        struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer;
1258        __be64 *pas;
1259        void *in;
1260        void *sqc;
1261        void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
1262        void *wq;
1263        int inlen;
1264        int err;
1265        unsigned int page_offset_quantized;
1266        unsigned long page_size;
1267        int ts_format;
1268
1269        ts_format = get_sq_ts_format(dev, cq);
1270        if (ts_format < 0)
1271                return ts_format;
1272
1273        sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr,
1274                                       ubuffer->buf_size, 0);
1275        if (IS_ERR(sq->ubuffer.umem))
1276                return PTR_ERR(sq->ubuffer.umem);
1277        page_size = mlx5_umem_find_best_quantized_pgoff(
1278                ubuffer->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT,
1279                page_offset, 64, &page_offset_quantized);
1280        if (!page_size) {
1281                err = -EINVAL;
1282                goto err_umem;
1283        }
1284
1285        inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1286                sizeof(u64) *
1287                        ib_umem_num_dma_blocks(sq->ubuffer.umem, page_size);
1288        in = kvzalloc(inlen, GFP_KERNEL);
1289        if (!in) {
1290                err = -ENOMEM;
1291                goto err_umem;
1292        }
1293
1294        MLX5_SET(create_sq_in, in, uid, to_mpd(pd)->uid);
1295        sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1296        MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1297        if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe))
1298                MLX5_SET(sqc, sqc, allow_multi_pkt_send_wqe, 1);
1299        MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1300        MLX5_SET(sqc, sqc, ts_format, ts_format);
1301        MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index));
1302        MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd));
1303        MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1304        MLX5_SET(sqc, sqc, tis_num_0, sq->tisn);
1305        if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
1306            MLX5_CAP_ETH(dev->mdev, swp))
1307                MLX5_SET(sqc, sqc, allow_swp, 1);
1308
1309        wq = MLX5_ADDR_OF(sqc, sqc, wq);
1310        MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1311        MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
1312        MLX5_SET(wq, wq, uar_page, MLX5_GET(qpc, qpc, uar_page));
1313        MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
1314        MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1315        MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size));
1316        MLX5_SET(wq, wq, log_wq_pg_sz,
1317                 order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
1318        MLX5_SET(wq, wq, page_offset, page_offset_quantized);
1319
1320        pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
1321        mlx5_ib_populate_pas(sq->ubuffer.umem, page_size, pas, 0);
1322
1323        err = mlx5_core_create_sq_tracked(dev, in, inlen, &sq->base.mqp);
1324
1325        kvfree(in);
1326
1327        if (err)
1328                goto err_umem;
1329
1330        return 0;
1331
1332err_umem:
1333        ib_umem_release(sq->ubuffer.umem);
1334        sq->ubuffer.umem = NULL;
1335
1336        return err;
1337}
1338
1339static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
1340                                     struct mlx5_ib_sq *sq)
1341{
1342        destroy_flow_rule_vport_sq(sq);
1343        mlx5_core_destroy_sq_tracked(dev, &sq->base.mqp);
1344        ib_umem_release(sq->ubuffer.umem);
1345}
1346
1347static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
1348                                   struct mlx5_ib_rq *rq, void *qpin,
1349                                   struct ib_pd *pd, struct mlx5_ib_cq *cq)
1350{
1351        struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
1352        __be64 *pas;
1353        void *in;
1354        void *rqc;
1355        void *wq;
1356        void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
1357        struct ib_umem *umem = rq->base.ubuffer.umem;
1358        unsigned int page_offset_quantized;
1359        unsigned long page_size = 0;
1360        int ts_format;
1361        size_t inlen;
1362        int err;
1363
1364        ts_format = get_rq_ts_format(dev, cq);
1365        if (ts_format < 0)
1366                return ts_format;
1367
1368        page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz,
1369                                                        MLX5_ADAPTER_PAGE_SHIFT,
1370                                                        page_offset, 64,
1371                                                        &page_offset_quantized);
1372        if (!page_size)
1373                return -EINVAL;
1374
1375        inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
1376                sizeof(u64) * ib_umem_num_dma_blocks(umem, page_size);
1377        in = kvzalloc(inlen, GFP_KERNEL);
1378        if (!in)
1379                return -ENOMEM;
1380
1381        MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid);
1382        rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
1383        if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING))
1384                MLX5_SET(rqc, rqc, vsd, 1);
1385        MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
1386        MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
1387        MLX5_SET(rqc, rqc, ts_format, ts_format);
1388        MLX5_SET(rqc, rqc, flush_in_error_en, 1);
1389        MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
1390        MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
1391
1392        if (mqp->flags & IB_QP_CREATE_SCATTER_FCS)
1393                MLX5_SET(rqc, rqc, scatter_fcs, 1);
1394
1395        wq = MLX5_ADDR_OF(rqc, rqc, wq);
1396        MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1397        if (rq->flags & MLX5_IB_RQ_PCI_WRITE_END_PADDING)
1398                MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1399        MLX5_SET(wq, wq, page_offset, page_offset_quantized);
1400        MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
1401        MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
1402        MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4);
1403        MLX5_SET(wq, wq, log_wq_pg_sz,
1404                 order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
1405        MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size));
1406
1407        pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
1408        mlx5_ib_populate_pas(umem, page_size, pas, 0);
1409
1410        err = mlx5_core_create_rq_tracked(dev, in, inlen, &rq->base.mqp);
1411
1412        kvfree(in);
1413
1414        return err;
1415}
1416
1417static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
1418                                     struct mlx5_ib_rq *rq)
1419{
1420        mlx5_core_destroy_rq_tracked(dev, &rq->base.mqp);
1421}
1422
1423static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
1424                                      struct mlx5_ib_rq *rq,
1425                                      u32 qp_flags_en,
1426                                      struct ib_pd *pd)
1427{
1428        if (qp_flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
1429                           MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC))
1430                mlx5_ib_disable_lb(dev, false, true);
1431        mlx5_cmd_destroy_tir(dev->mdev, rq->tirn, to_mpd(pd)->uid);
1432}
1433
1434static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
1435                                    struct mlx5_ib_rq *rq, u32 tdn,
1436                                    u32 *qp_flags_en, struct ib_pd *pd,
1437                                    u32 *out)
1438{
1439        u8 lb_flag = 0;
1440        u32 *in;
1441        void *tirc;
1442        int inlen;
1443        int err;
1444
1445        inlen = MLX5_ST_SZ_BYTES(create_tir_in);
1446        in = kvzalloc(inlen, GFP_KERNEL);
1447        if (!in)
1448                return -ENOMEM;
1449
1450        MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid);
1451        tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1452        MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
1453        MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
1454        MLX5_SET(tirc, tirc, transport_domain, tdn);
1455        if (*qp_flags_en & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
1456                MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
1457
1458        if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC)
1459                lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
1460
1461        if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)
1462                lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
1463
1464        if (dev->is_rep) {
1465                lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
1466                *qp_flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
1467        }
1468
1469        MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
1470        MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
1471        err = mlx5_cmd_exec_inout(dev->mdev, create_tir, in, out);
1472        rq->tirn = MLX5_GET(create_tir_out, out, tirn);
1473        if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
1474                err = mlx5_ib_enable_lb(dev, false, true);
1475
1476                if (err)
1477                        destroy_raw_packet_qp_tir(dev, rq, 0, pd);
1478        }
1479        kvfree(in);
1480
1481        return err;
1482}
1483
1484static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1485                                u32 *in, size_t inlen, struct ib_pd *pd,
1486                                struct ib_udata *udata,
1487                                struct mlx5_ib_create_qp_resp *resp,
1488                                struct ib_qp_init_attr *init_attr)
1489{
1490        struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
1491        struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
1492        struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
1493        struct mlx5_ib_ucontext *mucontext = rdma_udata_to_drv_context(
1494                udata, struct mlx5_ib_ucontext, ibucontext);
1495        int err;
1496        u32 tdn = mucontext->tdn;
1497        u16 uid = to_mpd(pd)->uid;
1498        u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {};
1499
1500        if (!qp->sq.wqe_cnt && !qp->rq.wqe_cnt)
1501                return -EINVAL;
1502        if (qp->sq.wqe_cnt) {
1503                err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd);
1504                if (err)
1505                        return err;
1506
1507                err = create_raw_packet_qp_sq(dev, udata, sq, in, pd,
1508                                              to_mcq(init_attr->send_cq));
1509                if (err)
1510                        goto err_destroy_tis;
1511
1512                if (uid) {
1513                        resp->tisn = sq->tisn;
1514                        resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TISN;
1515                        resp->sqn = sq->base.mqp.qpn;
1516                        resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_SQN;
1517                }
1518
1519                sq->base.container_mibqp = qp;
1520                sq->base.mqp.event = mlx5_ib_qp_event;
1521        }
1522
1523        if (qp->rq.wqe_cnt) {
1524                rq->base.container_mibqp = qp;
1525
1526                if (qp->flags & IB_QP_CREATE_CVLAN_STRIPPING)
1527                        rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
1528                if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING)
1529                        rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING;
1530                err = create_raw_packet_qp_rq(dev, rq, in, pd,
1531                                              to_mcq(init_attr->recv_cq));
1532                if (err)
1533                        goto err_destroy_sq;
1534
1535                err = create_raw_packet_qp_tir(dev, rq, tdn, &qp->flags_en, pd,
1536                                               out);
1537                if (err)
1538                        goto err_destroy_rq;
1539
1540                if (uid) {
1541                        resp->rqn = rq->base.mqp.qpn;
1542                        resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN;
1543                        resp->tirn = rq->tirn;
1544                        resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
1545                        if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
1546                            MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) {
1547                                resp->tir_icm_addr = MLX5_GET(
1548                                        create_tir_out, out, icm_address_31_0);
1549                                resp->tir_icm_addr |=
1550                                        (u64)MLX5_GET(create_tir_out, out,
1551                                                      icm_address_39_32)
1552                                        << 32;
1553                                resp->tir_icm_addr |=
1554                                        (u64)MLX5_GET(create_tir_out, out,
1555                                                      icm_address_63_40)
1556                                        << 40;
1557                                resp->comp_mask |=
1558                                        MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR;
1559                        }
1560                }
1561        }
1562
1563        qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
1564                                                     rq->base.mqp.qpn;
1565        return 0;
1566
1567err_destroy_rq:
1568        destroy_raw_packet_qp_rq(dev, rq);
1569err_destroy_sq:
1570        if (!qp->sq.wqe_cnt)
1571                return err;
1572        destroy_raw_packet_qp_sq(dev, sq);
1573err_destroy_tis:
1574        destroy_raw_packet_qp_tis(dev, sq, pd);
1575
1576        return err;
1577}
1578
1579static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev,
1580                                  struct mlx5_ib_qp *qp)
1581{
1582        struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
1583        struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
1584        struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
1585
1586        if (qp->rq.wqe_cnt) {
1587                destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, qp->ibqp.pd);
1588                destroy_raw_packet_qp_rq(dev, rq);
1589        }
1590
1591        if (qp->sq.wqe_cnt) {
1592                destroy_raw_packet_qp_sq(dev, sq);
1593                destroy_raw_packet_qp_tis(dev, sq, qp->ibqp.pd);
1594        }
1595}
1596
1597static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
1598                                    struct mlx5_ib_raw_packet_qp *raw_packet_qp)
1599{
1600        struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
1601        struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
1602
1603        sq->sq = &qp->sq;
1604        rq->rq = &qp->rq;
1605        sq->doorbell = &qp->db;
1606        rq->doorbell = &qp->db;
1607}
1608
1609static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
1610{
1611        if (qp->flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
1612                            MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC))
1613                mlx5_ib_disable_lb(dev, false, true);
1614        mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn,
1615                             to_mpd(qp->ibqp.pd)->uid);
1616}
1617
1618struct mlx5_create_qp_params {
1619        struct ib_udata *udata;
1620        size_t inlen;
1621        size_t outlen;
1622        size_t ucmd_size;
1623        void *ucmd;
1624        u8 is_rss_raw : 1;
1625        struct ib_qp_init_attr *attr;
1626        u32 uidx;
1627        struct mlx5_ib_create_qp_resp resp;
1628};
1629
1630static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1631                                 struct mlx5_ib_qp *qp,
1632                                 struct mlx5_create_qp_params *params)
1633{
1634        struct ib_qp_init_attr *init_attr = params->attr;
1635        struct mlx5_ib_create_qp_rss *ucmd = params->ucmd;
1636        struct ib_udata *udata = params->udata;
1637        struct mlx5_ib_ucontext *mucontext = rdma_udata_to_drv_context(
1638                udata, struct mlx5_ib_ucontext, ibucontext);
1639        int inlen;
1640        int outlen;
1641        int err;
1642        u32 *in;
1643        u32 *out;
1644        void *tirc;
1645        void *hfso;
1646        u32 selected_fields = 0;
1647        u32 outer_l4;
1648        u32 tdn = mucontext->tdn;
1649        u8 lb_flag = 0;
1650
1651        if (ucmd->comp_mask) {
1652                mlx5_ib_dbg(dev, "invalid comp mask\n");
1653                return -EOPNOTSUPP;
1654        }
1655
1656        if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_INNER &&
1657            !(ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)) {
1658                mlx5_ib_dbg(dev, "Tunnel offloads must be set for inner RSS\n");
1659                return -EOPNOTSUPP;
1660        }
1661
1662        if (dev->is_rep)
1663                qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
1664
1665        if (qp->flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC)
1666                lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
1667
1668        if (qp->flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)
1669                lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
1670
1671        inlen = MLX5_ST_SZ_BYTES(create_tir_in);
1672        outlen = MLX5_ST_SZ_BYTES(create_tir_out);
1673        in = kvzalloc(inlen + outlen, GFP_KERNEL);
1674        if (!in)
1675                return -ENOMEM;
1676
1677        out = in + MLX5_ST_SZ_DW(create_tir_in);
1678        MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid);
1679        tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1680        MLX5_SET(tirc, tirc, disp_type,
1681                 MLX5_TIRC_DISP_TYPE_INDIRECT);
1682        MLX5_SET(tirc, tirc, indirect_table,
1683                 init_attr->rwq_ind_tbl->ind_tbl_num);
1684        MLX5_SET(tirc, tirc, transport_domain, tdn);
1685
1686        hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1687
1688        if (ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
1689                MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
1690
1691        MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
1692
1693        if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_INNER)
1694                hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner);
1695        else
1696                hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1697
1698        switch (ucmd->rx_hash_function) {
1699        case MLX5_RX_HASH_FUNC_TOEPLITZ:
1700        {
1701                void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1702                size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key);
1703
1704                if (len != ucmd->rx_key_len) {
1705                        err = -EINVAL;
1706                        goto err;
1707                }
1708
1709                MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1710                memcpy(rss_key, ucmd->rx_hash_key, len);
1711                break;
1712        }
1713        default:
1714                err = -EOPNOTSUPP;
1715                goto err;
1716        }
1717
1718        if (!ucmd->rx_hash_fields_mask) {
1719                /* special case when this TIR serves as steering entry without hashing */
1720                if (!init_attr->rwq_ind_tbl->log_ind_tbl_size)
1721                        goto create_tir;
1722                err = -EINVAL;
1723                goto err;
1724        }
1725
1726        if (((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
1727             (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) &&
1728             ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
1729             (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) {
1730                err = -EINVAL;
1731                goto err;
1732        }
1733
1734        /* If none of IPV4 & IPV6 SRC/DST was set - this bit field is ignored */
1735        if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
1736            (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4))
1737                MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1738                         MLX5_L3_PROT_TYPE_IPV4);
1739        else if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
1740                 (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
1741                MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1742                         MLX5_L3_PROT_TYPE_IPV6);
1743
1744        outer_l4 = ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
1745                    (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
1746                           << 0 |
1747                   ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
1748                    (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
1749                           << 1 |
1750                   (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) << 2;
1751
1752        /* Check that only one l4 protocol is set */
1753        if (outer_l4 & (outer_l4 - 1)) {
1754                err = -EINVAL;
1755                goto err;
1756        }
1757
1758        /* If none of TCP & UDP SRC/DST was set - this bit field is ignored */
1759        if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
1760            (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
1761                MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
1762                         MLX5_L4_PROT_TYPE_TCP);
1763        else if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
1764                 (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
1765                MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
1766                         MLX5_L4_PROT_TYPE_UDP);
1767
1768        if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
1769            (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6))
1770                selected_fields |= MLX5_HASH_FIELD_SEL_SRC_IP;
1771
1772        if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) ||
1773            (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
1774                selected_fields |= MLX5_HASH_FIELD_SEL_DST_IP;
1775
1776        if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
1777            (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP))
1778                selected_fields |= MLX5_HASH_FIELD_SEL_L4_SPORT;
1779
1780        if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) ||
1781            (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
1782                selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT;
1783
1784        if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI)
1785                selected_fields |= MLX5_HASH_FIELD_SEL_IPSEC_SPI;
1786
1787        MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
1788
1789create_tir:
1790        MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
1791        err = mlx5_cmd_exec_inout(dev->mdev, create_tir, in, out);
1792
1793        qp->rss_qp.tirn = MLX5_GET(create_tir_out, out, tirn);
1794        if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
1795                err = mlx5_ib_enable_lb(dev, false, true);
1796
1797                if (err)
1798                        mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn,
1799                                             to_mpd(pd)->uid);
1800        }
1801
1802        if (err)
1803                goto err;
1804
1805        if (mucontext->devx_uid) {
1806                params->resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
1807                params->resp.tirn = qp->rss_qp.tirn;
1808                if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
1809                    MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) {
1810                        params->resp.tir_icm_addr =
1811                                MLX5_GET(create_tir_out, out, icm_address_31_0);
1812                        params->resp.tir_icm_addr |=
1813                                (u64)MLX5_GET(create_tir_out, out,
1814                                              icm_address_39_32)
1815                                << 32;
1816                        params->resp.tir_icm_addr |=
1817                                (u64)MLX5_GET(create_tir_out, out,
1818                                              icm_address_63_40)
1819                                << 40;
1820                        params->resp.comp_mask |=
1821                                MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR;
1822                }
1823        }
1824
1825        kvfree(in);
1826        /* qpn is reserved for that QP */
1827        qp->trans_qp.base.mqp.qpn = 0;
1828        qp->is_rss = true;
1829        return 0;
1830
1831err:
1832        kvfree(in);
1833        return err;
1834}
1835
1836static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
1837                                         struct mlx5_ib_qp *qp,
1838                                         struct ib_qp_init_attr *init_attr,
1839                                         void *qpc)
1840{
1841        int scqe_sz;
1842        bool allow_scat_cqe = false;
1843
1844        allow_scat_cqe = qp->flags_en & MLX5_QP_FLAG_ALLOW_SCATTER_CQE;
1845
1846        if (!allow_scat_cqe && init_attr->sq_sig_type != IB_SIGNAL_ALL_WR)
1847                return;
1848
1849        scqe_sz = mlx5_ib_get_cqe_size(init_attr->send_cq);
1850        if (scqe_sz == 128) {
1851                MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE);
1852                return;
1853        }
1854
1855        if (init_attr->qp_type != MLX5_IB_QPT_DCI ||
1856            MLX5_CAP_GEN(dev->mdev, dc_req_scat_data_cqe))
1857                MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE);
1858}
1859
1860static int atomic_size_to_mode(int size_mask)
1861{
1862        /* driver does not support atomic_size > 256B
1863         * and does not know how to translate bigger sizes
1864         */
1865        int supported_size_mask = size_mask & 0x1ff;
1866        int log_max_size;
1867
1868        if (!supported_size_mask)
1869                return -EOPNOTSUPP;
1870
1871        log_max_size = __fls(supported_size_mask);
1872
1873        if (log_max_size > 3)
1874                return log_max_size;
1875
1876        return MLX5_ATOMIC_MODE_8B;
1877}
1878
1879static int get_atomic_mode(struct mlx5_ib_dev *dev,
1880                           enum ib_qp_type qp_type)
1881{
1882        u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
1883        u8 atomic = MLX5_CAP_GEN(dev->mdev, atomic);
1884        int atomic_mode = -EOPNOTSUPP;
1885        int atomic_size_mask;
1886
1887        if (!atomic)
1888                return -EOPNOTSUPP;
1889
1890        if (qp_type == MLX5_IB_QPT_DCT)
1891                atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
1892        else
1893                atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
1894
1895        if ((atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_CMP_SWAP) ||
1896            (atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_FETCH_ADD))
1897                atomic_mode = atomic_size_to_mode(atomic_size_mask);
1898
1899        if (atomic_mode <= 0 &&
1900            (atomic_operations & MLX5_ATOMIC_OPS_CMP_SWAP &&
1901             atomic_operations & MLX5_ATOMIC_OPS_FETCH_ADD))
1902                atomic_mode = MLX5_ATOMIC_MODE_IB_COMP;
1903
1904        return atomic_mode;
1905}
1906
1907static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1908                             struct mlx5_create_qp_params *params)
1909{
1910        struct ib_qp_init_attr *attr = params->attr;
1911        u32 uidx = params->uidx;
1912        struct mlx5_ib_resources *devr = &dev->devr;
1913        u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
1914        int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
1915        struct mlx5_core_dev *mdev = dev->mdev;
1916        struct mlx5_ib_qp_base *base;
1917        unsigned long flags;
1918        void *qpc;
1919        u32 *in;
1920        int err;
1921
1922        if (attr->sq_sig_type == IB_SIGNAL_ALL_WR)
1923                qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
1924
1925        in = kvzalloc(inlen, GFP_KERNEL);
1926        if (!in)
1927                return -ENOMEM;
1928
1929        qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
1930
1931        MLX5_SET(qpc, qpc, st, MLX5_QP_ST_XRC);
1932        MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
1933        MLX5_SET(qpc, qpc, pd, to_mpd(devr->p0)->pdn);
1934
1935        if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
1936                MLX5_SET(qpc, qpc, block_lb_mc, 1);
1937        if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
1938                MLX5_SET(qpc, qpc, cd_master, 1);
1939        if (qp->flags & IB_QP_CREATE_MANAGED_SEND)
1940                MLX5_SET(qpc, qpc, cd_slave_send, 1);
1941        if (qp->flags & IB_QP_CREATE_MANAGED_RECV)
1942                MLX5_SET(qpc, qpc, cd_slave_receive, 1);
1943
1944        MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(dev->mdev));
1945        MLX5_SET(qpc, qpc, rq_type, MLX5_SRQ_RQ);
1946        MLX5_SET(qpc, qpc, no_sq, 1);
1947        MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
1948        MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn);
1949        MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
1950        MLX5_SET(qpc, qpc, xrcd, to_mxrcd(attr->xrcd)->xrcdn);
1951        MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
1952
1953        /* 0xffffff means we ask to work with cqe version 0 */
1954        if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
1955                MLX5_SET(qpc, qpc, user_index, uidx);
1956
1957        if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) {
1958                MLX5_SET(qpc, qpc, end_padding_mode,
1959                         MLX5_WQ_END_PAD_MODE_ALIGN);
1960                /* Special case to clean flag */
1961                qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING;
1962        }
1963
1964        base = &qp->trans_qp.base;
1965        err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
1966        kvfree(in);
1967        if (err)
1968                return err;
1969
1970        base->container_mibqp = qp;
1971        base->mqp.event = mlx5_ib_qp_event;
1972        if (MLX5_CAP_GEN(mdev, ece_support))
1973                params->resp.ece_options = MLX5_GET(create_qp_out, out, ece);
1974
1975        spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
1976        list_add_tail(&qp->qps_list, &dev->qp_list);
1977        spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
1978
1979        qp->trans_qp.xrcdn = to_mxrcd(attr->xrcd)->xrcdn;
1980        return 0;
1981}
1982
1983static int create_dci(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1984                      struct mlx5_ib_qp *qp,
1985                      struct mlx5_create_qp_params *params)
1986{
1987        struct ib_qp_init_attr *init_attr = params->attr;
1988        struct mlx5_ib_create_qp *ucmd = params->ucmd;
1989        u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
1990        struct ib_udata *udata = params->udata;
1991        u32 uidx = params->uidx;
1992        struct mlx5_ib_resources *devr = &dev->devr;
1993        int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
1994        struct mlx5_core_dev *mdev = dev->mdev;
1995        struct mlx5_ib_cq *send_cq;
1996        struct mlx5_ib_cq *recv_cq;
1997        unsigned long flags;
1998        struct mlx5_ib_qp_base *base;
1999        int ts_format;
2000        int mlx5_st;
2001        void *qpc;
2002        u32 *in;
2003        int err;
2004
2005        spin_lock_init(&qp->sq.lock);
2006        spin_lock_init(&qp->rq.lock);
2007
2008        mlx5_st = to_mlx5_st(qp->type);
2009        if (mlx5_st < 0)
2010                return -EINVAL;
2011
2012        if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
2013                qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
2014
2015        base = &qp->trans_qp.base;
2016
2017        qp->has_rq = qp_has_rq(init_attr);
2018        err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, ucmd);
2019        if (err) {
2020                mlx5_ib_dbg(dev, "err %d\n", err);
2021                return err;
2022        }
2023
2024        if (ucmd->rq_wqe_shift != qp->rq.wqe_shift ||
2025            ucmd->rq_wqe_count != qp->rq.wqe_cnt)
2026                return -EINVAL;
2027
2028        if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz)))
2029                return -EINVAL;
2030
2031        ts_format = get_qp_ts_format(dev, to_mcq(init_attr->send_cq),
2032                                     to_mcq(init_attr->recv_cq));
2033
2034        if (ts_format < 0)
2035                return ts_format;
2036
2037        err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, &params->resp,
2038                              &inlen, base, ucmd);
2039        if (err)
2040                return err;
2041
2042        if (MLX5_CAP_GEN(mdev, ece_support))
2043                MLX5_SET(create_qp_in, in, ece, ucmd->ece_options);
2044        qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
2045
2046        MLX5_SET(qpc, qpc, st, mlx5_st);
2047        MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
2048        MLX5_SET(qpc, qpc, pd, to_mpd(pd)->pdn);
2049
2050        if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
2051                MLX5_SET(qpc, qpc, wq_signature, 1);
2052
2053        if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
2054                MLX5_SET(qpc, qpc, cd_master, 1);
2055        if (qp->flags & IB_QP_CREATE_MANAGED_SEND)
2056                MLX5_SET(qpc, qpc, cd_slave_send, 1);
2057        if (qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE)
2058                configure_requester_scat_cqe(dev, qp, init_attr, qpc);
2059
2060        if (qp->rq.wqe_cnt) {
2061                MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
2062                MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
2063        }
2064
2065        if (qp->flags_en & MLX5_QP_FLAG_DCI_STREAM) {
2066                MLX5_SET(qpc, qpc, log_num_dci_stream_channels,
2067                         ucmd->dci_streams.log_num_concurent);
2068                MLX5_SET(qpc, qpc, log_num_dci_errored_streams,
2069                         ucmd->dci_streams.log_num_errored);
2070        }
2071
2072        MLX5_SET(qpc, qpc, ts_format, ts_format);
2073        MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
2074
2075        MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
2076
2077        /* Set default resources */
2078        if (init_attr->srq) {
2079                MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
2080                MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
2081                         to_msrq(init_attr->srq)->msrq.srqn);
2082        } else {
2083                MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
2084                MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
2085                         to_msrq(devr->s1)->msrq.srqn);
2086        }
2087
2088        if (init_attr->send_cq)
2089                MLX5_SET(qpc, qpc, cqn_snd,
2090                         to_mcq(init_attr->send_cq)->mcq.cqn);
2091
2092        if (init_attr->recv_cq)
2093                MLX5_SET(qpc, qpc, cqn_rcv,
2094                         to_mcq(init_attr->recv_cq)->mcq.cqn);
2095
2096        MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
2097
2098        /* 0xffffff means we ask to work with cqe version 0 */
2099        if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
2100                MLX5_SET(qpc, qpc, user_index, uidx);
2101
2102        if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) {
2103                MLX5_SET(qpc, qpc, end_padding_mode,
2104                         MLX5_WQ_END_PAD_MODE_ALIGN);
2105                /* Special case to clean flag */
2106                qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING;
2107        }
2108
2109        err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
2110
2111        kvfree(in);
2112        if (err)
2113                goto err_create;
2114
2115        base->container_mibqp = qp;
2116        base->mqp.event = mlx5_ib_qp_event;
2117        if (MLX5_CAP_GEN(mdev, ece_support))
2118                params->resp.ece_options = MLX5_GET(create_qp_out, out, ece);
2119
2120        get_cqs(qp->type, init_attr->send_cq, init_attr->recv_cq,
2121                &send_cq, &recv_cq);
2122        spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
2123        mlx5_ib_lock_cqs(send_cq, recv_cq);
2124        /* Maintain device to QPs access, needed for further handling via reset
2125         * flow
2126         */
2127        list_add_tail(&qp->qps_list, &dev->qp_list);
2128        /* Maintain CQ to QPs access, needed for further handling via reset flow
2129         */
2130        if (send_cq)
2131                list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
2132        if (recv_cq)
2133                list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
2134        mlx5_ib_unlock_cqs(send_cq, recv_cq);
2135        spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
2136
2137        return 0;
2138
2139err_create:
2140        destroy_qp(dev, qp, base, udata);
2141        return err;
2142}
2143
2144static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
2145                          struct mlx5_ib_qp *qp,
2146                          struct mlx5_create_qp_params *params)
2147{
2148        struct ib_qp_init_attr *init_attr = params->attr;
2149        struct mlx5_ib_create_qp *ucmd = params->ucmd;
2150        u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
2151        struct ib_udata *udata = params->udata;
2152        u32 uidx = params->uidx;
2153        struct mlx5_ib_resources *devr = &dev->devr;
2154        int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
2155        struct mlx5_core_dev *mdev = dev->mdev;
2156        struct mlx5_ib_cq *send_cq;
2157        struct mlx5_ib_cq *recv_cq;
2158        unsigned long flags;
2159        struct mlx5_ib_qp_base *base;
2160        int ts_format;
2161        int mlx5_st;
2162        void *qpc;
2163        u32 *in;
2164        int err;
2165
2166        spin_lock_init(&qp->sq.lock);
2167        spin_lock_init(&qp->rq.lock);
2168
2169        mlx5_st = to_mlx5_st(qp->type);
2170        if (mlx5_st < 0)
2171                return -EINVAL;
2172
2173        if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
2174                qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
2175
2176        if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
2177                qp->underlay_qpn = init_attr->source_qpn;
2178
2179        base = (init_attr->qp_type == IB_QPT_RAW_PACKET ||
2180                qp->flags & IB_QP_CREATE_SOURCE_QPN) ?
2181               &qp->raw_packet_qp.rq.base :
2182               &qp->trans_qp.base;
2183
2184        qp->has_rq = qp_has_rq(init_attr);
2185        err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, ucmd);
2186        if (err) {
2187                mlx5_ib_dbg(dev, "err %d\n", err);
2188                return err;
2189        }
2190
2191        if (ucmd->rq_wqe_shift != qp->rq.wqe_shift ||
2192            ucmd->rq_wqe_count != qp->rq.wqe_cnt)
2193                return -EINVAL;
2194
2195        if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz)))
2196                return -EINVAL;
2197
2198        if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
2199                ts_format = get_qp_ts_format(dev, to_mcq(init_attr->send_cq),
2200                                             to_mcq(init_attr->recv_cq));
2201                if (ts_format < 0)
2202                        return ts_format;
2203        }
2204
2205        err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, &params->resp,
2206                              &inlen, base, ucmd);
2207        if (err)
2208                return err;
2209
2210        if (is_sqp(init_attr->qp_type))
2211                qp->port = init_attr->port_num;
2212
2213        if (MLX5_CAP_GEN(mdev, ece_support))
2214                MLX5_SET(create_qp_in, in, ece, ucmd->ece_options);
2215        qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
2216
2217        MLX5_SET(qpc, qpc, st, mlx5_st);
2218        MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
2219        MLX5_SET(qpc, qpc, pd, to_mpd(pd)->pdn);
2220
2221        if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
2222                MLX5_SET(qpc, qpc, wq_signature, 1);
2223
2224        if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
2225                MLX5_SET(qpc, qpc, block_lb_mc, 1);
2226
2227        if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
2228                MLX5_SET(qpc, qpc, cd_master, 1);
2229        if (qp->flags & IB_QP_CREATE_MANAGED_SEND)
2230                MLX5_SET(qpc, qpc, cd_slave_send, 1);
2231        if (qp->flags & IB_QP_CREATE_MANAGED_RECV)
2232                MLX5_SET(qpc, qpc, cd_slave_receive, 1);
2233        if (qp->flags_en & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE)
2234                MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1);
2235        if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) &&
2236            (init_attr->qp_type == IB_QPT_RC ||
2237             init_attr->qp_type == IB_QPT_UC)) {
2238                int rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq);
2239
2240                MLX5_SET(qpc, qpc, cs_res,
2241                         rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE :
2242                                          MLX5_RES_SCAT_DATA32_CQE);
2243        }
2244        if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) &&
2245            (qp->type == MLX5_IB_QPT_DCI || qp->type == IB_QPT_RC))
2246                configure_requester_scat_cqe(dev, qp, init_attr, qpc);
2247
2248        if (qp->rq.wqe_cnt) {
2249                MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
2250                MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
2251        }
2252
2253        if (init_attr->qp_type != IB_QPT_RAW_PACKET)
2254                MLX5_SET(qpc, qpc, ts_format, ts_format);
2255
2256        MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
2257
2258        if (qp->sq.wqe_cnt) {
2259                MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
2260        } else {
2261                MLX5_SET(qpc, qpc, no_sq, 1);
2262                if (init_attr->srq &&
2263                    init_attr->srq->srq_type == IB_SRQT_TM)
2264                        MLX5_SET(qpc, qpc, offload_type,
2265                                 MLX5_QPC_OFFLOAD_TYPE_RNDV);
2266        }
2267
2268        /* Set default resources */
2269        switch (init_attr->qp_type) {
2270        case IB_QPT_XRC_INI:
2271                MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
2272                MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
2273                MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
2274                break;
2275        default:
2276                if (init_attr->srq) {
2277                        MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
2278                        MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn);
2279                } else {
2280                        MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
2281                        MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn);
2282                }
2283        }
2284
2285        if (init_attr->send_cq)
2286                MLX5_SET(qpc, qpc, cqn_snd, to_mcq(init_attr->send_cq)->mcq.cqn);
2287
2288        if (init_attr->recv_cq)
2289                MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(init_attr->recv_cq)->mcq.cqn);
2290
2291        MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
2292
2293        /* 0xffffff means we ask to work with cqe version 0 */
2294        if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
2295                MLX5_SET(qpc, qpc, user_index, uidx);
2296
2297        if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING &&
2298            init_attr->qp_type != IB_QPT_RAW_PACKET) {
2299                MLX5_SET(qpc, qpc, end_padding_mode,
2300                         MLX5_WQ_END_PAD_MODE_ALIGN);
2301                /* Special case to clean flag */
2302                qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING;
2303        }
2304
2305        if (init_attr->qp_type == IB_QPT_RAW_PACKET ||
2306            qp->flags & IB_QP_CREATE_SOURCE_QPN) {
2307                qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd->sq_buf_addr;
2308                raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
2309                err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata,
2310                                           &params->resp, init_attr);
2311        } else
2312                err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
2313
2314        kvfree(in);
2315        if (err)
2316                goto err_create;
2317
2318        base->container_mibqp = qp;
2319        base->mqp.event = mlx5_ib_qp_event;
2320        if (MLX5_CAP_GEN(mdev, ece_support))
2321                params->resp.ece_options = MLX5_GET(create_qp_out, out, ece);
2322
2323        get_cqs(qp->type, init_attr->send_cq, init_attr->recv_cq,
2324                &send_cq, &recv_cq);
2325        spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
2326        mlx5_ib_lock_cqs(send_cq, recv_cq);
2327        /* Maintain device to QPs access, needed for further handling via reset
2328         * flow
2329         */
2330        list_add_tail(&qp->qps_list, &dev->qp_list);
2331        /* Maintain CQ to QPs access, needed for further handling via reset flow
2332         */
2333        if (send_cq)
2334                list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
2335        if (recv_cq)
2336                list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
2337        mlx5_ib_unlock_cqs(send_cq, recv_cq);
2338        spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
2339
2340        return 0;
2341
2342err_create:
2343        destroy_qp(dev, qp, base, udata);
2344        return err;
2345}
2346
2347static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
2348                            struct mlx5_ib_qp *qp,
2349                            struct mlx5_create_qp_params *params)
2350{
2351        struct ib_qp_init_attr *attr = params->attr;
2352        u32 uidx = params->uidx;
2353        struct mlx5_ib_resources *devr = &dev->devr;
2354        u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
2355        int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
2356        struct mlx5_core_dev *mdev = dev->mdev;
2357        struct mlx5_ib_cq *send_cq;
2358        struct mlx5_ib_cq *recv_cq;
2359        unsigned long flags;
2360        struct mlx5_ib_qp_base *base;
2361        int mlx5_st;
2362        void *qpc;
2363        u32 *in;
2364        int err;
2365
2366        spin_lock_init(&qp->sq.lock);
2367        spin_lock_init(&qp->rq.lock);
2368
2369        mlx5_st = to_mlx5_st(qp->type);
2370        if (mlx5_st < 0)
2371                return -EINVAL;
2372
2373        if (attr->sq_sig_type == IB_SIGNAL_ALL_WR)
2374                qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
2375
2376        base = &qp->trans_qp.base;
2377
2378        qp->has_rq = qp_has_rq(attr);
2379        err = set_rq_size(dev, &attr->cap, qp->has_rq, qp, NULL);
2380        if (err) {
2381                mlx5_ib_dbg(dev, "err %d\n", err);
2382                return err;
2383        }
2384
2385        err = _create_kernel_qp(dev, attr, qp, &in, &inlen, base);
2386        if (err)
2387                return err;
2388
2389        if (is_sqp(attr->qp_type))
2390                qp->port = attr->port_num;
2391
2392        qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
2393
2394        MLX5_SET(qpc, qpc, st, mlx5_st);
2395        MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
2396
2397        if (attr->qp_type != MLX5_IB_QPT_REG_UMR)
2398                MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn);
2399        else
2400                MLX5_SET(qpc, qpc, latency_sensitive, 1);
2401
2402
2403        if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
2404                MLX5_SET(qpc, qpc, block_lb_mc, 1);
2405
2406        if (qp->rq.wqe_cnt) {
2407                MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
2408                MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
2409        }
2410
2411        MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, attr));
2412
2413        if (qp->sq.wqe_cnt)
2414                MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
2415        else
2416                MLX5_SET(qpc, qpc, no_sq, 1);
2417
2418        if (attr->srq) {
2419                MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
2420                MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
2421                         to_msrq(attr->srq)->msrq.srqn);
2422        } else {
2423                MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
2424                MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
2425                         to_msrq(devr->s1)->msrq.srqn);
2426        }
2427
2428        if (attr->send_cq)
2429                MLX5_SET(qpc, qpc, cqn_snd, to_mcq(attr->send_cq)->mcq.cqn);
2430
2431        if (attr->recv_cq)
2432                MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(attr->recv_cq)->mcq.cqn);
2433
2434        MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
2435
2436        /* 0xffffff means we ask to work with cqe version 0 */
2437        if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
2438                MLX5_SET(qpc, qpc, user_index, uidx);
2439
2440        /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
2441        if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO)
2442                MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1);
2443
2444        err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
2445        kvfree(in);
2446        if (err)
2447                goto err_create;
2448
2449        base->container_mibqp = qp;
2450        base->mqp.event = mlx5_ib_qp_event;
2451
2452        get_cqs(qp->type, attr->send_cq, attr->recv_cq,
2453                &send_cq, &recv_cq);
2454        spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
2455        mlx5_ib_lock_cqs(send_cq, recv_cq);
2456        /* Maintain device to QPs access, needed for further handling via reset
2457         * flow
2458         */
2459        list_add_tail(&qp->qps_list, &dev->qp_list);
2460        /* Maintain CQ to QPs access, needed for further handling via reset flow
2461         */
2462        if (send_cq)
2463                list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
2464        if (recv_cq)
2465                list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
2466        mlx5_ib_unlock_cqs(send_cq, recv_cq);
2467        spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
2468
2469        return 0;
2470
2471err_create:
2472        destroy_qp(dev, qp, base, NULL);
2473        return err;
2474}
2475
2476static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
2477        __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
2478{
2479        if (send_cq) {
2480                if (recv_cq) {
2481                        if (send_cq->mcq.cqn < recv_cq->mcq.cqn)  {
2482                                spin_lock(&send_cq->lock);
2483                                spin_lock_nested(&recv_cq->lock,
2484                                                 SINGLE_DEPTH_NESTING);
2485                        } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
2486                                spin_lock(&send_cq->lock);
2487                                __acquire(&recv_cq->lock);
2488                        } else {
2489                                spin_lock(&recv_cq->lock);
2490                                spin_lock_nested(&send_cq->lock,
2491                                                 SINGLE_DEPTH_NESTING);
2492                        }
2493                } else {
2494                        spin_lock(&send_cq->lock);
2495                        __acquire(&recv_cq->lock);
2496                }
2497        } else if (recv_cq) {
2498                spin_lock(&recv_cq->lock);
2499                __acquire(&send_cq->lock);
2500        } else {
2501                __acquire(&send_cq->lock);
2502                __acquire(&recv_cq->lock);
2503        }
2504}
2505
2506static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
2507        __releases(&send_cq->lock) __releases(&recv_cq->lock)
2508{
2509        if (send_cq) {
2510                if (recv_cq) {
2511                        if (send_cq->mcq.cqn < recv_cq->mcq.cqn)  {
2512                                spin_unlock(&recv_cq->lock);
2513                                spin_unlock(&send_cq->lock);
2514                        } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
2515                                __release(&recv_cq->lock);
2516                                spin_unlock(&send_cq->lock);
2517                        } else {
2518                                spin_unlock(&send_cq->lock);
2519                                spin_unlock(&recv_cq->lock);
2520                        }
2521                } else {
2522                        __release(&recv_cq->lock);
2523                        spin_unlock(&send_cq->lock);
2524                }
2525        } else if (recv_cq) {
2526                __release(&send_cq->lock);
2527                spin_unlock(&recv_cq->lock);
2528        } else {
2529                __release(&recv_cq->lock);
2530                __release(&send_cq->lock);
2531        }
2532}
2533
2534static void get_cqs(enum ib_qp_type qp_type,
2535                    struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
2536                    struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
2537{
2538        switch (qp_type) {
2539        case IB_QPT_XRC_TGT:
2540                *send_cq = NULL;
2541                *recv_cq = NULL;
2542                break;
2543        case MLX5_IB_QPT_REG_UMR:
2544        case IB_QPT_XRC_INI:
2545                *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
2546                *recv_cq = NULL;
2547                break;
2548
2549        case IB_QPT_SMI:
2550        case MLX5_IB_QPT_HW_GSI:
2551        case IB_QPT_RC:
2552        case IB_QPT_UC:
2553        case IB_QPT_UD:
2554        case IB_QPT_RAW_PACKET:
2555                *send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
2556                *recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL;
2557                break;
2558        default:
2559                *send_cq = NULL;
2560                *recv_cq = NULL;
2561                break;
2562        }
2563}
2564
2565static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
2566                                const struct mlx5_modify_raw_qp_param *raw_qp_param,
2567                                u8 lag_tx_affinity);
2568
2569static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
2570                              struct ib_udata *udata)
2571{
2572        struct mlx5_ib_cq *send_cq, *recv_cq;
2573        struct mlx5_ib_qp_base *base;
2574        unsigned long flags;
2575        int err;
2576
2577        if (qp->is_rss) {
2578                destroy_rss_raw_qp_tir(dev, qp);
2579                return;
2580        }
2581
2582        base = (qp->type == IB_QPT_RAW_PACKET ||
2583                qp->flags & IB_QP_CREATE_SOURCE_QPN) ?
2584                       &qp->raw_packet_qp.rq.base :
2585                       &qp->trans_qp.base;
2586
2587        if (qp->state != IB_QPS_RESET) {
2588                if (qp->type != IB_QPT_RAW_PACKET &&
2589                    !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) {
2590                        err = mlx5_core_qp_modify(dev, MLX5_CMD_OP_2RST_QP, 0,
2591                                                  NULL, &base->mqp, NULL);
2592                } else {
2593                        struct mlx5_modify_raw_qp_param raw_qp_param = {
2594                                .operation = MLX5_CMD_OP_2RST_QP
2595                        };
2596
2597                        err = modify_raw_packet_qp(dev, qp, &raw_qp_param, 0);
2598                }
2599                if (err)
2600                        mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n",
2601                                     base->mqp.qpn);
2602        }
2603
2604        get_cqs(qp->type, qp->ibqp.send_cq, qp->ibqp.recv_cq, &send_cq,
2605                &recv_cq);
2606
2607        spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
2608        mlx5_ib_lock_cqs(send_cq, recv_cq);
2609        /* del from lists under both locks above to protect reset flow paths */
2610        list_del(&qp->qps_list);
2611        if (send_cq)
2612                list_del(&qp->cq_send_list);
2613
2614        if (recv_cq)
2615                list_del(&qp->cq_recv_list);
2616
2617        if (!udata) {
2618                __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
2619                                   qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
2620                if (send_cq != recv_cq)
2621                        __mlx5_ib_cq_clean(send_cq, base->mqp.qpn,
2622                                           NULL);
2623        }
2624        mlx5_ib_unlock_cqs(send_cq, recv_cq);
2625        spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
2626
2627        if (qp->type == IB_QPT_RAW_PACKET ||
2628            qp->flags & IB_QP_CREATE_SOURCE_QPN) {
2629                destroy_raw_packet_qp(dev, qp);
2630        } else {
2631                err = mlx5_core_destroy_qp(dev, &base->mqp);
2632                if (err)
2633                        mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
2634                                     base->mqp.qpn);
2635        }
2636
2637        destroy_qp(dev, qp, base, udata);
2638}
2639
2640static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd,
2641                      struct mlx5_ib_qp *qp,
2642                      struct mlx5_create_qp_params *params)
2643{
2644        struct ib_qp_init_attr *attr = params->attr;
2645        struct mlx5_ib_create_qp *ucmd = params->ucmd;
2646        u32 uidx = params->uidx;
2647        void *dctc;
2648
2649        if (mlx5_lag_is_active(dev->mdev) && !MLX5_CAP_GEN(dev->mdev, lag_dct))
2650                return -EOPNOTSUPP;
2651
2652        qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL);
2653        if (!qp->dct.in)
2654                return -ENOMEM;
2655
2656        MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid);
2657        dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
2658        MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn);
2659        MLX5_SET(dctc, dctc, srqn_xrqn, to_msrq(attr->srq)->msrq.srqn);
2660        MLX5_SET(dctc, dctc, cqn, to_mcq(attr->recv_cq)->mcq.cqn);
2661        MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key);
2662        MLX5_SET(dctc, dctc, user_index, uidx);
2663        if (MLX5_CAP_GEN(dev->mdev, ece_support))
2664                MLX5_SET(dctc, dctc, ece, ucmd->ece_options);
2665
2666        if (qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) {
2667                int rcqe_sz = mlx5_ib_get_cqe_size(attr->recv_cq);
2668
2669                if (rcqe_sz == 128)
2670                        MLX5_SET(dctc, dctc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
2671        }
2672
2673        qp->state = IB_QPS_RESET;
2674        return 0;
2675}
2676
2677static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
2678                         enum ib_qp_type *type)
2679{
2680        if (attr->qp_type == IB_QPT_DRIVER && !MLX5_CAP_GEN(dev->mdev, dct))
2681                goto out;
2682
2683        switch (attr->qp_type) {
2684        case IB_QPT_XRC_TGT:
2685        case IB_QPT_XRC_INI:
2686                if (!MLX5_CAP_GEN(dev->mdev, xrc))
2687                        goto out;
2688                fallthrough;
2689        case IB_QPT_RC:
2690        case IB_QPT_UC:
2691        case IB_QPT_SMI:
2692        case MLX5_IB_QPT_HW_GSI:
2693        case IB_QPT_DRIVER:
2694        case IB_QPT_GSI:
2695        case IB_QPT_RAW_PACKET:
2696        case IB_QPT_UD:
2697        case MLX5_IB_QPT_REG_UMR:
2698                break;
2699        default:
2700                goto out;
2701        }
2702
2703        *type = attr->qp_type;
2704        return 0;
2705
2706out:
2707        mlx5_ib_dbg(dev, "Unsupported QP type %d\n", attr->qp_type);
2708        return -EOPNOTSUPP;
2709}
2710
2711static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd,
2712                            struct ib_qp_init_attr *attr,
2713                            struct ib_udata *udata)
2714{
2715        struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
2716                udata, struct mlx5_ib_ucontext, ibucontext);
2717
2718        if (!udata) {
2719                /* Kernel create_qp callers */
2720                if (attr->rwq_ind_tbl)
2721                        return -EOPNOTSUPP;
2722
2723                switch (attr->qp_type) {
2724                case IB_QPT_RAW_PACKET:
2725                case IB_QPT_DRIVER:
2726                        return -EOPNOTSUPP;
2727                default:
2728                        return 0;
2729                }
2730        }
2731
2732        /* Userspace create_qp callers */
2733        if (attr->qp_type == IB_QPT_RAW_PACKET && !ucontext->cqe_version) {
2734                mlx5_ib_dbg(dev,
2735                        "Raw Packet QP is only supported for CQE version > 0\n");
2736                return -EINVAL;
2737        }
2738
2739        if (attr->qp_type != IB_QPT_RAW_PACKET && attr->rwq_ind_tbl) {
2740                mlx5_ib_dbg(dev,
2741                            "Wrong QP type %d for the RWQ indirect table\n",
2742                            attr->qp_type);
2743                return -EINVAL;
2744        }
2745
2746        /*
2747         * We don't need to see this warning, it means that kernel code
2748         * missing ib_pd. Placed here to catch developer's mistakes.
2749         */
2750        WARN_ONCE(!pd && attr->qp_type != IB_QPT_XRC_TGT,
2751                  "There is a missing PD pointer assignment\n");
2752        return 0;
2753}
2754
2755static void process_vendor_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
2756                                bool cond, struct mlx5_ib_qp *qp)
2757{
2758        if (!(*flags & flag))
2759                return;
2760
2761        if (cond) {
2762                qp->flags_en |= flag;
2763                *flags &= ~flag;
2764                return;
2765        }
2766
2767        switch (flag) {
2768        case MLX5_QP_FLAG_SCATTER_CQE:
2769        case MLX5_QP_FLAG_ALLOW_SCATTER_CQE:
2770                /*
2771                         * We don't return error if these flags were provided,
2772                         * and mlx5 doesn't have right capability.
2773                         */
2774                *flags &= ~(MLX5_QP_FLAG_SCATTER_CQE |
2775                            MLX5_QP_FLAG_ALLOW_SCATTER_CQE);
2776                return;
2777        default:
2778                break;
2779        }
2780        mlx5_ib_dbg(dev, "Vendor create QP flag 0x%X is not supported\n", flag);
2781}
2782
2783static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
2784                                void *ucmd, struct ib_qp_init_attr *attr)
2785{
2786        struct mlx5_core_dev *mdev = dev->mdev;
2787        bool cond;
2788        int flags;
2789
2790        if (attr->rwq_ind_tbl)
2791                flags = ((struct mlx5_ib_create_qp_rss *)ucmd)->flags;
2792        else
2793                flags = ((struct mlx5_ib_create_qp *)ucmd)->flags;
2794
2795        switch (flags & (MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI)) {
2796        case MLX5_QP_FLAG_TYPE_DCI:
2797                qp->type = MLX5_IB_QPT_DCI;
2798                break;
2799        case MLX5_QP_FLAG_TYPE_DCT:
2800                qp->type = MLX5_IB_QPT_DCT;
2801                break;
2802        default:
2803                if (qp->type != IB_QPT_DRIVER)
2804                        break;
2805                /*
2806                 * It is IB_QPT_DRIVER and or no subtype or
2807                 * wrong subtype were provided.
2808                 */
2809                return -EINVAL;
2810        }
2811
2812        process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCI, true, qp);
2813        process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCT, true, qp);
2814        process_vendor_flag(dev, &flags, MLX5_QP_FLAG_DCI_STREAM,
2815                            MLX5_CAP_GEN(mdev, log_max_dci_stream_channels),
2816                            qp);
2817
2818        process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SIGNATURE, true, qp);
2819        process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SCATTER_CQE,
2820                            MLX5_CAP_GEN(mdev, sctr_data_cqe), qp);
2821        process_vendor_flag(dev, &flags, MLX5_QP_FLAG_ALLOW_SCATTER_CQE,
2822                            MLX5_CAP_GEN(mdev, sctr_data_cqe), qp);
2823
2824        if (qp->type == IB_QPT_RAW_PACKET) {
2825                cond = MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) ||
2826                       MLX5_CAP_ETH(mdev, tunnel_stateless_gre) ||
2827                       MLX5_CAP_ETH(mdev, tunnel_stateless_geneve_rx);
2828                process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TUNNEL_OFFLOADS,
2829                                    cond, qp);
2830                process_vendor_flag(dev, &flags,
2831                                    MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC, true,
2832                                    qp);
2833                process_vendor_flag(dev, &flags,
2834                                    MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC, true,
2835                                    qp);
2836        }
2837
2838        if (qp->type == IB_QPT_RC)
2839                process_vendor_flag(dev, &flags,
2840                                    MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE,
2841                                    MLX5_CAP_GEN(mdev, qp_packet_based), qp);
2842
2843        process_vendor_flag(dev, &flags, MLX5_QP_FLAG_BFREG_INDEX, true, qp);
2844        process_vendor_flag(dev, &flags, MLX5_QP_FLAG_UAR_PAGE_INDEX, true, qp);
2845
2846        cond = qp->flags_en & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS |
2847                                MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
2848                                MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC);
2849        if (attr->rwq_ind_tbl && cond) {
2850                mlx5_ib_dbg(dev, "RSS RAW QP has unsupported flags 0x%X\n",
2851                            cond);
2852                return -EINVAL;
2853        }
2854
2855        if (flags)
2856                mlx5_ib_dbg(dev, "udata has unsupported flags 0x%X\n", flags);
2857
2858        return (flags) ? -EINVAL : 0;
2859        }
2860
2861static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
2862                                bool cond, struct mlx5_ib_qp *qp)
2863{
2864        if (!(*flags & flag))
2865                return;
2866
2867        if (cond) {
2868                qp->flags |= flag;
2869                *flags &= ~flag;
2870                return;
2871        }
2872
2873        if (flag == MLX5_IB_QP_CREATE_WC_TEST) {
2874                /*
2875                 * Special case, if condition didn't meet, it won't be error,
2876                 * just different in-kernel flow.
2877                 */
2878                *flags &= ~MLX5_IB_QP_CREATE_WC_TEST;
2879                return;
2880        }
2881        mlx5_ib_dbg(dev, "Verbs create QP flag 0x%X is not supported\n", flag);
2882}
2883
2884static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
2885                                struct ib_qp_init_attr *attr)
2886{
2887        enum ib_qp_type qp_type = qp->type;
2888        struct mlx5_core_dev *mdev = dev->mdev;
2889        int create_flags = attr->create_flags;
2890        bool cond;
2891
2892        if (qp_type == MLX5_IB_QPT_DCT)
2893                return (create_flags) ? -EINVAL : 0;
2894
2895        if (qp_type == IB_QPT_RAW_PACKET && attr->rwq_ind_tbl)
2896                return (create_flags) ? -EINVAL : 0;
2897
2898        process_create_flag(dev, &create_flags, IB_QP_CREATE_NETIF_QP,
2899                            mlx5_get_flow_namespace(dev->mdev,
2900                                                    MLX5_FLOW_NAMESPACE_BYPASS),
2901                            qp);
2902        process_create_flag(dev, &create_flags,
2903                            IB_QP_CREATE_INTEGRITY_EN,
2904                            MLX5_CAP_GEN(mdev, sho), qp);
2905        process_create_flag(dev, &create_flags,
2906                            IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
2907                            MLX5_CAP_GEN(mdev, block_lb_mc), qp);
2908        process_create_flag(dev, &create_flags, IB_QP_CREATE_CROSS_CHANNEL,
2909                            MLX5_CAP_GEN(mdev, cd), qp);
2910        process_create_flag(dev, &create_flags, IB_QP_CREATE_MANAGED_SEND,
2911                            MLX5_CAP_GEN(mdev, cd), qp);
2912        process_create_flag(dev, &create_flags, IB_QP_CREATE_MANAGED_RECV,
2913                            MLX5_CAP_GEN(mdev, cd), qp);
2914
2915        if (qp_type == IB_QPT_UD) {
2916                process_create_flag(dev, &create_flags,
2917                                    IB_QP_CREATE_IPOIB_UD_LSO,
2918                                    MLX5_CAP_GEN(mdev, ipoib_basic_offloads),
2919                                    qp);
2920                cond = MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_IB;
2921                process_create_flag(dev, &create_flags, IB_QP_CREATE_SOURCE_QPN,
2922                                    cond, qp);
2923        }
2924
2925        if (qp_type == IB_QPT_RAW_PACKET) {
2926                cond = MLX5_CAP_GEN(mdev, eth_net_offloads) &&
2927                       MLX5_CAP_ETH(mdev, scatter_fcs);
2928                process_create_flag(dev, &create_flags,
2929                                    IB_QP_CREATE_SCATTER_FCS, cond, qp);
2930
2931                cond = MLX5_CAP_GEN(mdev, eth_net_offloads) &&
2932                       MLX5_CAP_ETH(mdev, vlan_cap);
2933                process_create_flag(dev, &create_flags,
2934                                    IB_QP_CREATE_CVLAN_STRIPPING, cond, qp);
2935        }
2936
2937        process_create_flag(dev, &create_flags,
2938                            IB_QP_CREATE_PCI_WRITE_END_PADDING,
2939                            MLX5_CAP_GEN(mdev, end_pad), qp);
2940
2941        process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_WC_TEST,
2942                            qp_type != MLX5_IB_QPT_REG_UMR, qp);
2943        process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1,
2944                            true, qp);
2945
2946        if (create_flags) {
2947                mlx5_ib_dbg(dev, "Create QP has unsupported flags 0x%X\n",
2948                            create_flags);
2949                return -EOPNOTSUPP;
2950        }
2951        return 0;
2952}
2953
2954static int process_udata_size(struct mlx5_ib_dev *dev,
2955                              struct mlx5_create_qp_params *params)
2956{
2957        size_t ucmd = sizeof(struct mlx5_ib_create_qp);
2958        struct ib_udata *udata = params->udata;
2959        size_t outlen = udata->outlen;
2960        size_t inlen = udata->inlen;
2961
2962        params->outlen = min(outlen, sizeof(struct mlx5_ib_create_qp_resp));
2963        params->ucmd_size = ucmd;
2964        if (!params->is_rss_raw) {
2965                /* User has old rdma-core, which doesn't support ECE */
2966                size_t min_inlen =
2967                        offsetof(struct mlx5_ib_create_qp, ece_options);
2968
2969                /*
2970                 * We will check in check_ucmd_data() that user
2971                 * cleared everything after inlen.
2972                 */
2973                params->inlen = (inlen < min_inlen) ? 0 : min(inlen, ucmd);
2974                goto out;
2975        }
2976
2977        /* RSS RAW QP */
2978        if (inlen < offsetofend(struct mlx5_ib_create_qp_rss, flags))
2979                return -EINVAL;
2980
2981        if (outlen < offsetofend(struct mlx5_ib_create_qp_resp, bfreg_index))
2982                return -EINVAL;
2983
2984        ucmd = sizeof(struct mlx5_ib_create_qp_rss);
2985        params->ucmd_size = ucmd;
2986        if (inlen > ucmd && !ib_is_udata_cleared(udata, ucmd, inlen - ucmd))
2987                return -EINVAL;
2988
2989        params->inlen = min(ucmd, inlen);
2990out:
2991        if (!params->inlen)
2992                mlx5_ib_dbg(dev, "udata is too small\n");
2993
2994        return (params->inlen) ? 0 : -EINVAL;
2995}
2996
2997static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
2998                     struct mlx5_ib_qp *qp,
2999                     struct mlx5_create_qp_params *params)
3000{
3001        int err;
3002
3003        if (params->is_rss_raw) {
3004                err = create_rss_raw_qp_tir(dev, pd, qp, params);
3005                goto out;
3006        }
3007
3008        switch (qp->type) {
3009        case MLX5_IB_QPT_DCT:
3010                err = create_dct(dev, pd, qp, params);
3011                rdma_restrack_no_track(&qp->ibqp.res);
3012                break;
3013        case MLX5_IB_QPT_DCI:
3014                err = create_dci(dev, pd, qp, params);
3015                break;
3016        case IB_QPT_XRC_TGT:
3017                err = create_xrc_tgt_qp(dev, qp, params);
3018                break;
3019        case IB_QPT_GSI:
3020                err = mlx5_ib_create_gsi(pd, qp, params->attr);
3021                break;
3022        case MLX5_IB_QPT_HW_GSI:
3023        case MLX5_IB_QPT_REG_UMR:
3024                rdma_restrack_no_track(&qp->ibqp.res);
3025                fallthrough;
3026        default:
3027                if (params->udata)
3028                        err = create_user_qp(dev, pd, qp, params);
3029                else
3030                        err = create_kernel_qp(dev, pd, qp, params);
3031        }
3032
3033out:
3034        if (err) {
3035                mlx5_ib_err(dev, "Create QP type %d failed\n", qp->type);
3036                return err;
3037        }
3038
3039        if (is_qp0(qp->type))
3040                qp->ibqp.qp_num = 0;
3041        else if (is_qp1(qp->type))
3042                qp->ibqp.qp_num = 1;
3043        else
3044                qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
3045
3046        mlx5_ib_dbg(dev,
3047                "QP type %d, ib qpn 0x%X, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x, ece 0x%x\n",
3048                qp->type, qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
3049                params->attr->recv_cq ? to_mcq(params->attr->recv_cq)->mcq.cqn :
3050                                        -1,
3051                params->attr->send_cq ? to_mcq(params->attr->send_cq)->mcq.cqn :
3052                                        -1,
3053                params->resp.ece_options);
3054
3055        return 0;
3056}
3057
3058static int check_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
3059                         struct ib_qp_init_attr *attr)
3060{
3061        int ret = 0;
3062
3063        switch (qp->type) {
3064        case MLX5_IB_QPT_DCT:
3065                ret = (!attr->srq || !attr->recv_cq) ? -EINVAL : 0;
3066                break;
3067        case MLX5_IB_QPT_DCI:
3068                ret = (attr->cap.max_recv_wr || attr->cap.max_recv_sge) ?
3069                              -EINVAL :
3070                              0;
3071                break;
3072        case IB_QPT_RAW_PACKET:
3073                ret = (attr->rwq_ind_tbl && attr->send_cq) ? -EINVAL : 0;
3074                break;
3075        default:
3076                break;
3077        }
3078
3079        if (ret)
3080                mlx5_ib_dbg(dev, "QP type %d has wrong attributes\n", qp->type);
3081
3082        return ret;
3083}
3084
3085static int get_qp_uidx(struct mlx5_ib_qp *qp,
3086                       struct mlx5_create_qp_params *params)
3087{
3088        struct mlx5_ib_create_qp *ucmd = params->ucmd;
3089        struct ib_udata *udata = params->udata;
3090        struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
3091                udata, struct mlx5_ib_ucontext, ibucontext);
3092
3093        if (params->is_rss_raw)
3094                return 0;
3095
3096        return get_qp_user_index(ucontext, ucmd, sizeof(*ucmd), &params->uidx);
3097}
3098
3099static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp)
3100{
3101        struct mlx5_ib_dev *dev = to_mdev(mqp->ibqp.device);
3102
3103        if (mqp->state == IB_QPS_RTR) {
3104                int err;
3105
3106                err = mlx5_core_destroy_dct(dev, &mqp->dct.mdct);
3107                if (err) {
3108                        mlx5_ib_warn(dev, "failed to destroy DCT %d\n", err);
3109                        return err;
3110                }
3111        }
3112
3113        kfree(mqp->dct.in);
3114        return 0;
3115}
3116
3117static int check_ucmd_data(struct mlx5_ib_dev *dev,
3118                           struct mlx5_create_qp_params *params)
3119{
3120        struct ib_udata *udata = params->udata;
3121        size_t size, last;
3122        int ret;
3123
3124        if (params->is_rss_raw)
3125                /*
3126                 * These QPs don't have "reserved" field in their
3127                 * create_qp input struct, so their data is always valid.
3128                 */
3129                last = sizeof(struct mlx5_ib_create_qp_rss);
3130        else
3131                last = offsetof(struct mlx5_ib_create_qp, reserved);
3132
3133        if (udata->inlen <= last)
3134                return 0;
3135
3136        /*
3137         * User provides different create_qp structures based on the
3138         * flow and we need to know if he cleared memory after our
3139         * struct create_qp ends.
3140         */
3141        size = udata->inlen - last;
3142        ret = ib_is_udata_cleared(params->udata, last, size);
3143        if (!ret)
3144                mlx5_ib_dbg(
3145                        dev,
3146                        "udata is not cleared, inlen = %zu, ucmd = %zu, last = %zu, size = %zu\n",
3147                        udata->inlen, params->ucmd_size, last, size);
3148        return ret ? 0 : -EINVAL;
3149}
3150
3151int mlx5_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
3152                      struct ib_udata *udata)
3153{
3154        struct mlx5_create_qp_params params = {};
3155        struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
3156        struct mlx5_ib_qp *qp = to_mqp(ibqp);
3157        struct ib_pd *pd = ibqp->pd;
3158        enum ib_qp_type type;
3159        int err;
3160
3161        err = check_qp_type(dev, attr, &type);
3162        if (err)
3163                return err;
3164
3165        err = check_valid_flow(dev, pd, attr, udata);
3166        if (err)
3167                return err;
3168
3169        params.udata = udata;
3170        params.uidx = MLX5_IB_DEFAULT_UIDX;
3171        params.attr = attr;
3172        params.is_rss_raw = !!attr->rwq_ind_tbl;
3173
3174        if (udata) {
3175                err = process_udata_size(dev, &params);
3176                if (err)
3177                        return err;
3178
3179                err = check_ucmd_data(dev, &params);
3180                if (err)
3181                        return err;
3182
3183                params.ucmd = kzalloc(params.ucmd_size, GFP_KERNEL);
3184                if (!params.ucmd)
3185                        return -ENOMEM;
3186
3187                err = ib_copy_from_udata(params.ucmd, udata, params.inlen);
3188                if (err)
3189                        goto free_ucmd;
3190        }
3191
3192        mutex_init(&qp->mutex);
3193        qp->type = type;
3194        if (udata) {
3195                err = process_vendor_flags(dev, qp, params.ucmd, attr);
3196                if (err)
3197                        goto free_ucmd;
3198
3199                err = get_qp_uidx(qp, &params);
3200                if (err)
3201                        goto free_ucmd;
3202        }
3203        err = process_create_flags(dev, qp, attr);
3204        if (err)
3205                goto free_ucmd;
3206
3207        err = check_qp_attr(dev, qp, attr);
3208        if (err)
3209                goto free_ucmd;
3210
3211        err = create_qp(dev, pd, qp, &params);
3212        if (err)
3213                goto free_ucmd;
3214
3215        kfree(params.ucmd);
3216        params.ucmd = NULL;
3217
3218        if (udata)
3219                /*
3220                 * It is safe to copy response for all user create QP flows,
3221                 * including MLX5_IB_QPT_DCT, which doesn't need it.
3222                 * In that case, resp will be filled with zeros.
3223                 */
3224                err = ib_copy_to_udata(udata, &params.resp, params.outlen);
3225        if (err)
3226                goto destroy_qp;
3227
3228        return 0;
3229
3230destroy_qp:
3231        switch (qp->type) {
3232        case MLX5_IB_QPT_DCT:
3233                mlx5_ib_destroy_dct(qp);
3234                break;
3235        case IB_QPT_GSI:
3236                mlx5_ib_destroy_gsi(qp);
3237                break;
3238        default:
3239                destroy_qp_common(dev, qp, udata);
3240        }
3241
3242free_ucmd:
3243        kfree(params.ucmd);
3244        return err;
3245}
3246
3247int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
3248{
3249        struct mlx5_ib_dev *dev = to_mdev(qp->device);
3250        struct mlx5_ib_qp *mqp = to_mqp(qp);
3251
3252        if (mqp->type == IB_QPT_GSI)
3253                return mlx5_ib_destroy_gsi(mqp);
3254
3255        if (mqp->type == MLX5_IB_QPT_DCT)
3256                return mlx5_ib_destroy_dct(mqp);
3257
3258        destroy_qp_common(dev, mqp, udata);
3259        return 0;
3260}
3261
3262static int set_qpc_atomic_flags(struct mlx5_ib_qp *qp,
3263                                const struct ib_qp_attr *attr, int attr_mask,
3264                                void *qpc)
3265{
3266        struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
3267        u8 dest_rd_atomic;
3268        u32 access_flags;
3269
3270        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
3271                dest_rd_atomic = attr->max_dest_rd_atomic;
3272        else
3273                dest_rd_atomic = qp->trans_qp.resp_depth;
3274
3275        if (attr_mask & IB_QP_ACCESS_FLAGS)
3276                access_flags = attr->qp_access_flags;
3277        else
3278                access_flags = qp->trans_qp.atomic_rd_en;
3279
3280        if (!dest_rd_atomic)
3281                access_flags &= IB_ACCESS_REMOTE_WRITE;
3282
3283        MLX5_SET(qpc, qpc, rre, !!(access_flags & IB_ACCESS_REMOTE_READ));
3284
3285        if (access_flags & IB_ACCESS_REMOTE_ATOMIC) {
3286                int atomic_mode;
3287
3288                atomic_mode = get_atomic_mode(dev, qp->type);
3289                if (atomic_mode < 0)
3290                        return -EOPNOTSUPP;
3291
3292                MLX5_SET(qpc, qpc, rae, 1);
3293                MLX5_SET(qpc, qpc, atomic_mode, atomic_mode);
3294        }
3295
3296        MLX5_SET(qpc, qpc, rwe, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
3297        return 0;
3298}
3299
3300enum {
3301        MLX5_PATH_FLAG_FL       = 1 << 0,
3302        MLX5_PATH_FLAG_FREE_AR  = 1 << 1,
3303        MLX5_PATH_FLAG_COUNTER  = 1 << 2,
3304};
3305
3306static int mlx5_to_ib_rate_map(u8 rate)
3307{
3308        static const int rates[] = { IB_RATE_PORT_CURRENT, IB_RATE_56_GBPS,
3309                                     IB_RATE_25_GBPS,      IB_RATE_100_GBPS,
3310                                     IB_RATE_200_GBPS,     IB_RATE_50_GBPS,
3311                                     IB_RATE_400_GBPS };
3312
3313        if (rate < ARRAY_SIZE(rates))
3314                return rates[rate];
3315
3316        return rate - MLX5_STAT_RATE_OFFSET;
3317}
3318
3319static int ib_to_mlx5_rate_map(u8 rate)
3320{
3321        switch (rate) {
3322        case IB_RATE_PORT_CURRENT:
3323                return 0;
3324        case IB_RATE_56_GBPS:
3325                return 1;
3326        case IB_RATE_25_GBPS:
3327                return 2;
3328        case IB_RATE_100_GBPS:
3329                return 3;
3330        case IB_RATE_200_GBPS:
3331                return 4;
3332        case IB_RATE_50_GBPS:
3333                return 5;
3334        case IB_RATE_400_GBPS:
3335                return 6;
3336        default:
3337                return rate + MLX5_STAT_RATE_OFFSET;
3338        }
3339
3340        return 0;
3341}
3342
3343static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
3344{
3345        u32 stat_rate_support;
3346
3347        if (rate == IB_RATE_PORT_CURRENT)
3348                return 0;
3349
3350        if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS)
3351                return -EINVAL;
3352
3353        stat_rate_support = MLX5_CAP_GEN(dev->mdev, stat_rate_support);
3354        while (rate != IB_RATE_PORT_CURRENT &&
3355               !(1 << ib_to_mlx5_rate_map(rate) & stat_rate_support))
3356                --rate;
3357
3358        return ib_to_mlx5_rate_map(rate);
3359}
3360
3361static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
3362                                      struct mlx5_ib_sq *sq, u8 sl,
3363                                      struct ib_pd *pd)
3364{
3365        void *in;
3366        void *tisc;
3367        int inlen;
3368        int err;
3369
3370        inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
3371        in = kvzalloc(inlen, GFP_KERNEL);
3372        if (!in)
3373                return -ENOMEM;
3374
3375        MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
3376        MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid);
3377
3378        tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
3379        MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
3380
3381        err = mlx5_core_modify_tis(dev, sq->tisn, in);
3382
3383        kvfree(in);
3384
3385        return err;
3386}
3387
3388static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
3389                                         struct mlx5_ib_sq *sq, u8 tx_affinity,
3390                                         struct ib_pd *pd)
3391{
3392        void *in;
3393        void *tisc;
3394        int inlen;
3395        int err;
3396
3397        inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
3398        in = kvzalloc(inlen, GFP_KERNEL);
3399        if (!in)
3400                return -ENOMEM;
3401
3402        MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1);
3403        MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid);
3404
3405        tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
3406        MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity);
3407
3408        err = mlx5_core_modify_tis(dev, sq->tisn, in);
3409
3410        kvfree(in);
3411
3412        return err;
3413}
3414
3415static void mlx5_set_path_udp_sport(void *path, const struct rdma_ah_attr *ah,
3416                                    u32 lqpn, u32 rqpn)
3417
3418{
3419        u32 fl = ah->grh.flow_label;
3420
3421        if (!fl)
3422                fl = rdma_calc_flow_label(lqpn, rqpn);
3423
3424        MLX5_SET(ads, path, udp_sport, rdma_flow_label_to_udp_sport(fl));
3425}
3426
3427static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
3428                         const struct rdma_ah_attr *ah, void *path, u8 port,
3429                         int attr_mask, u32 path_flags,
3430                         const struct ib_qp_attr *attr, bool alt)
3431{
3432        const struct ib_global_route *grh = rdma_ah_read_grh(ah);
3433        int err;
3434        enum ib_gid_type gid_type;
3435        u8 ah_flags = rdma_ah_get_ah_flags(ah);
3436        u8 sl = rdma_ah_get_sl(ah);
3437
3438        if (attr_mask & IB_QP_PKEY_INDEX)
3439                MLX5_SET(ads, path, pkey_index,
3440                         alt ? attr->alt_pkey_index : attr->pkey_index);
3441
3442        if (ah_flags & IB_AH_GRH) {
3443                const struct ib_port_immutable *immutable;
3444
3445                immutable = ib_port_immutable_read(&dev->ib_dev, port);
3446                if (grh->sgid_index >= immutable->gid_tbl_len) {
3447                        pr_err("sgid_index (%u) too large. max is %d\n",
3448                               grh->sgid_index,
3449                               immutable->gid_tbl_len);
3450                        return -EINVAL;
3451                }
3452        }
3453
3454        if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) {
3455                if (!(ah_flags & IB_AH_GRH))
3456                        return -EINVAL;
3457
3458                ether_addr_copy(MLX5_ADDR_OF(ads, path, rmac_47_32),
3459                                ah->roce.dmac);
3460                if ((qp->type == IB_QPT_RC ||
3461                     qp->type == IB_QPT_UC ||
3462                     qp->type == IB_QPT_XRC_INI ||
3463                     qp->type == IB_QPT_XRC_TGT) &&
3464                    (grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) &&
3465                    (attr_mask & IB_QP_DEST_QPN))
3466                        mlx5_set_path_udp_sport(path, ah,
3467                                                qp->ibqp.qp_num,
3468                                                attr->dest_qp_num);
3469                MLX5_SET(ads, path, eth_prio, sl & 0x7);
3470                gid_type = ah->grh.sgid_attr->gid_type;
3471                if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
3472                        MLX5_SET(ads, path, dscp, grh->traffic_class >> 2);
3473        } else {
3474                MLX5_SET(ads, path, fl, !!(path_flags & MLX5_PATH_FLAG_FL));
3475                MLX5_SET(ads, path, free_ar,
3476                         !!(path_flags & MLX5_PATH_FLAG_FREE_AR));
3477                MLX5_SET(ads, path, rlid, rdma_ah_get_dlid(ah));
3478                MLX5_SET(ads, path, mlid, rdma_ah_get_path_bits(ah));
3479                MLX5_SET(ads, path, grh, !!(ah_flags & IB_AH_GRH));
3480                MLX5_SET(ads, path, sl, sl);
3481        }
3482
3483        if (ah_flags & IB_AH_GRH) {
3484                MLX5_SET(ads, path, src_addr_index, grh->sgid_index);
3485                MLX5_SET(ads, path, hop_limit, grh->hop_limit);
3486                MLX5_SET(ads, path, tclass, grh->traffic_class);
3487                MLX5_SET(ads, path, flow_label, grh->flow_label);
3488                memcpy(MLX5_ADDR_OF(ads, path, rgid_rip), grh->dgid.raw,
3489                       sizeof(grh->dgid.raw));
3490        }
3491
3492        err = ib_rate_to_mlx5(dev, rdma_ah_get_static_rate(ah));
3493        if (err < 0)
3494                return err;
3495        MLX5_SET(ads, path, stat_rate, err);
3496        MLX5_SET(ads, path, vhca_port_num, port);
3497
3498        if (attr_mask & IB_QP_TIMEOUT)
3499                MLX5_SET(ads, path, ack_timeout,
3500                         alt ? attr->alt_timeout : attr->timeout);
3501
3502        if ((qp->type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
3503                return modify_raw_packet_eth_prio(dev->mdev,
3504                                                  &qp->raw_packet_qp.sq,
3505                                                  sl & 0xf, qp->ibqp.pd);
3506
3507        return 0;
3508}
3509
3510static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_QP_ST_MAX] = {
3511        [MLX5_QP_STATE_INIT] = {
3512                [MLX5_QP_STATE_INIT] = {
3513                        [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE            |
3514                                          MLX5_QP_OPTPAR_RAE            |
3515                                          MLX5_QP_OPTPAR_RWE            |
3516                                          MLX5_QP_OPTPAR_PKEY_INDEX     |
3517                                          MLX5_QP_OPTPAR_PRI_PORT       |
3518                                          MLX5_QP_OPTPAR_LAG_TX_AFF,
3519                        [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE            |
3520                                          MLX5_QP_OPTPAR_PKEY_INDEX     |
3521                                          MLX5_QP_OPTPAR_PRI_PORT       |
3522                                          MLX5_QP_OPTPAR_LAG_TX_AFF,
3523                        [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX     |
3524                                          MLX5_QP_OPTPAR_Q_KEY          |
3525                                          MLX5_QP_OPTPAR_PRI_PORT,
3526                        [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RRE           |
3527                                          MLX5_QP_OPTPAR_RAE            |
3528                                          MLX5_QP_OPTPAR_RWE            |
3529                                          MLX5_QP_OPTPAR_PKEY_INDEX     |
3530                                          MLX5_QP_OPTPAR_PRI_PORT       |
3531                                          MLX5_QP_OPTPAR_LAG_TX_AFF,
3532                },
3533                [MLX5_QP_STATE_RTR] = {
3534                        [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
3535                                          MLX5_QP_OPTPAR_RRE            |
3536                                          MLX5_QP_OPTPAR_RAE            |
3537                                          MLX5_QP_OPTPAR_RWE            |
3538                                          MLX5_QP_OPTPAR_PKEY_INDEX     |
3539                                          MLX5_QP_OPTPAR_LAG_TX_AFF,
3540                        [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
3541                                          MLX5_QP_OPTPAR_RWE            |
3542                                          MLX5_QP_OPTPAR_PKEY_INDEX     |
3543                                          MLX5_QP_OPTPAR_LAG_TX_AFF,
3544                        [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX     |
3545                                          MLX5_QP_OPTPAR_Q_KEY,
3546                        [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX    |
3547                                           MLX5_QP_OPTPAR_Q_KEY,
3548                        [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
3549                                          MLX5_QP_OPTPAR_RRE            |
3550                                          MLX5_QP_OPTPAR_RAE            |
3551                                          MLX5_QP_OPTPAR_RWE            |
3552                                          MLX5_QP_OPTPAR_PKEY_INDEX     |
3553                                          MLX5_QP_OPTPAR_LAG_TX_AFF,
3554                },
3555        },
3556        [MLX5_QP_STATE_RTR] = {
3557                [MLX5_QP_STATE_RTS] = {
3558                        [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
3559                                          MLX5_QP_OPTPAR_RRE            |
3560                                          MLX5_QP_OPTPAR_RAE            |
3561                                          MLX5_QP_OPTPAR_RWE            |
3562                                          MLX5_QP_OPTPAR_PM_STATE       |
3563                                          MLX5_QP_OPTPAR_RNR_TIMEOUT,
3564                        [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH  |
3565                                          MLX5_QP_OPTPAR_RWE            |
3566                                          MLX5_QP_OPTPAR_PM_STATE,
3567                        [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
3568                        [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
3569                                          MLX5_QP_OPTPAR_RRE            |
3570                                          MLX5_QP_OPTPAR_RAE            |
3571                                          MLX5_QP_OPTPAR_RWE            |
3572                                          MLX5_QP_OPTPAR_PM_STATE       |
3573                                          MLX5_QP_OPTPAR_RNR_TIMEOUT,
3574                },
3575        },
3576        [MLX5_QP_STATE_RTS] = {
3577                [MLX5_QP_STATE_RTS] = {
3578                        [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE            |
3579                                          MLX5_QP_OPTPAR_RAE            |
3580                                          MLX5_QP_OPTPAR_RWE            |
3581                                          MLX5_QP_OPTPAR_RNR_TIMEOUT    |
3582                                          MLX5_QP_OPTPAR_PM_STATE       |
3583                                          MLX5_QP_OPTPAR_ALT_ADDR_PATH,
3584                        [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE            |
3585                                          MLX5_QP_OPTPAR_PM_STATE       |
3586                                          MLX5_QP_OPTPAR_ALT_ADDR_PATH,
3587                        [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY          |
3588                                          MLX5_QP_OPTPAR_SRQN           |
3589                                          MLX5_QP_OPTPAR_CQN_RCV,
3590                        [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RRE           |
3591                                          MLX5_QP_OPTPAR_RAE            |
3592                                          MLX5_QP_OPTPAR_RWE            |
3593                                          MLX5_QP_OPTPAR_RNR_TIMEOUT    |
3594                                          MLX5_QP_OPTPAR_PM_STATE       |
3595                                          MLX5_QP_OPTPAR_ALT_ADDR_PATH,
3596                },
3597        },
3598        [MLX5_QP_STATE_SQER] = {
3599                [MLX5_QP_STATE_RTS] = {
3600                        [MLX5_QP_ST_UD]  = MLX5_QP_OPTPAR_Q_KEY,
3601                        [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
3602                        [MLX5_QP_ST_UC]  = MLX5_QP_OPTPAR_RWE,
3603                        [MLX5_QP_ST_RC]  = MLX5_QP_OPTPAR_RNR_TIMEOUT   |
3604                                           MLX5_QP_OPTPAR_RWE           |
3605                                           MLX5_QP_OPTPAR_RAE           |
3606                                           MLX5_QP_OPTPAR_RRE,
3607                        [MLX5_QP_ST_XRC]  = MLX5_QP_OPTPAR_RNR_TIMEOUT  |
3608                                           MLX5_QP_OPTPAR_RWE           |
3609                                           MLX5_QP_OPTPAR_RAE           |
3610                                           MLX5_QP_OPTPAR_RRE,
3611                },
3612        },
3613        [MLX5_QP_STATE_SQD] = {
3614                [MLX5_QP_STATE_RTS] = {
3615                        [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
3616                        [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
3617                        [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE,
3618                        [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RNR_TIMEOUT    |
3619                                          MLX5_QP_OPTPAR_RWE            |
3620                                          MLX5_QP_OPTPAR_RAE            |
3621                                          MLX5_QP_OPTPAR_RRE,
3622                },
3623        },
3624};
3625
3626static int ib_nr_to_mlx5_nr(int ib_mask)
3627{
3628        switch (ib_mask) {
3629        case IB_QP_STATE:
3630                return 0;
3631        case IB_QP_CUR_STATE:
3632                return 0;
3633        case IB_QP_EN_SQD_ASYNC_NOTIFY:
3634                return 0;
3635        case IB_QP_ACCESS_FLAGS:
3636                return MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RRE |
3637                        MLX5_QP_OPTPAR_RAE;
3638        case IB_QP_PKEY_INDEX:
3639                return MLX5_QP_OPTPAR_PKEY_INDEX;
3640        case IB_QP_PORT:
3641                return MLX5_QP_OPTPAR_PRI_PORT;
3642        case IB_QP_QKEY:
3643                return MLX5_QP_OPTPAR_Q_KEY;
3644        case IB_QP_AV:
3645                return MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH |
3646                        MLX5_QP_OPTPAR_PRI_PORT;
3647        case IB_QP_PATH_MTU:
3648                return 0;
3649        case IB_QP_TIMEOUT:
3650                return MLX5_QP_OPTPAR_ACK_TIMEOUT;
3651        case IB_QP_RETRY_CNT:
3652                return MLX5_QP_OPTPAR_RETRY_COUNT;
3653        case IB_QP_RNR_RETRY:
3654                return MLX5_QP_OPTPAR_RNR_RETRY;
3655        case IB_QP_RQ_PSN:
3656                return 0;
3657        case IB_QP_MAX_QP_RD_ATOMIC:
3658                return MLX5_QP_OPTPAR_SRA_MAX;
3659        case IB_QP_ALT_PATH:
3660                return MLX5_QP_OPTPAR_ALT_ADDR_PATH;
3661        case IB_QP_MIN_RNR_TIMER:
3662                return MLX5_QP_OPTPAR_RNR_TIMEOUT;
3663        case IB_QP_SQ_PSN:
3664                return 0;
3665        case IB_QP_MAX_DEST_RD_ATOMIC:
3666                return MLX5_QP_OPTPAR_RRA_MAX | MLX5_QP_OPTPAR_RWE |
3667                        MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE;
3668        case IB_QP_PATH_MIG_STATE:
3669                return MLX5_QP_OPTPAR_PM_STATE;
3670        case IB_QP_CAP:
3671                return 0;
3672        case IB_QP_DEST_QPN:
3673                return 0;
3674        }
3675        return 0;
3676}
3677
3678static int ib_mask_to_mlx5_opt(int ib_mask)
3679{
3680        int result = 0;
3681        int i;
3682
3683        for (i = 0; i < 8 * sizeof(int); i++) {
3684                if ((1 << i) & ib_mask)
3685                        result |= ib_nr_to_mlx5_nr(1 << i);
3686        }
3687
3688        return result;
3689}
3690
3691static int modify_raw_packet_qp_rq(
3692        struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, int new_state,
3693        const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd)
3694{
3695        void *in;
3696        void *rqc;
3697        int inlen;
3698        int err;
3699
3700        inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
3701        in = kvzalloc(inlen, GFP_KERNEL);
3702        if (!in)
3703                return -ENOMEM;
3704
3705        MLX5_SET(modify_rq_in, in, rq_state, rq->state);
3706        MLX5_SET(modify_rq_in, in, uid, to_mpd(pd)->uid);
3707
3708        rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
3709        MLX5_SET(rqc, rqc, state, new_state);
3710
3711        if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) {
3712                if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
3713                        MLX5_SET64(modify_rq_in, in, modify_bitmask,
3714                                   MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
3715                        MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id);
3716                } else
3717                        dev_info_once(
3718                                &dev->ib_dev.dev,
3719                                "RAW PACKET QP counters are not supported on current FW\n");
3720        }
3721
3722        err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in);
3723        if (err)
3724                goto out;
3725
3726        rq->state = new_state;
3727
3728out:
3729        kvfree(in);
3730        return err;
3731}
3732
3733static int modify_raw_packet_qp_sq(
3734        struct mlx5_core_dev *dev, struct mlx5_ib_sq *sq, int new_state,
3735        const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd)
3736{
3737        struct mlx5_ib_qp *ibqp = sq->base.container_mibqp;
3738        struct mlx5_rate_limit old_rl = ibqp->rl;
3739        struct mlx5_rate_limit new_rl = old_rl;
3740        bool new_rate_added = false;
3741        u16 rl_index = 0;
3742        void *in;
3743        void *sqc;
3744        int inlen;
3745        int err;
3746
3747        inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
3748        in = kvzalloc(inlen, GFP_KERNEL);
3749        if (!in)
3750                return -ENOMEM;
3751
3752        MLX5_SET(modify_sq_in, in, uid, to_mpd(pd)->uid);
3753        MLX5_SET(modify_sq_in, in, sq_state, sq->state);
3754
3755        sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
3756        MLX5_SET(sqc, sqc, state, new_state);
3757
3758        if (raw_qp_param->set_mask & MLX5_RAW_QP_RATE_LIMIT) {
3759                if (new_state != MLX5_SQC_STATE_RDY)
3760                        pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n",
3761                                __func__);
3762                else
3763                        new_rl = raw_qp_param->rl;
3764        }
3765
3766        if (!mlx5_rl_are_equal(&old_rl, &new_rl)) {
3767                if (new_rl.rate) {
3768                        err = mlx5_rl_add_rate(dev, &rl_index, &new_rl);
3769                        if (err) {
3770                                pr_err("Failed configuring rate limit(err %d): \
3771                                       rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
3772                                       err, new_rl.rate, new_rl.max_burst_sz,
3773                                       new_rl.typical_pkt_sz);
3774
3775                                goto out;
3776                        }
3777                        new_rate_added = true;
3778                }
3779
3780                MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
3781                /* index 0 means no limit */
3782                MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index);
3783        }
3784
3785        err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in);
3786        if (err) {
3787                /* Remove new rate from table if failed */
3788                if (new_rate_added)
3789                        mlx5_rl_remove_rate(dev, &new_rl);
3790                goto out;
3791        }
3792
3793        /* Only remove the old rate after new rate was set */
3794        if ((old_rl.rate && !mlx5_rl_are_equal(&old_rl, &new_rl)) ||
3795            (new_state != MLX5_SQC_STATE_RDY)) {
3796                mlx5_rl_remove_rate(dev, &old_rl);
3797                if (new_state != MLX5_SQC_STATE_RDY)
3798                        memset(&new_rl, 0, sizeof(new_rl));
3799        }
3800
3801        ibqp->rl = new_rl;
3802        sq->state = new_state;
3803
3804out:
3805        kvfree(in);
3806        return err;
3807}
3808
3809static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
3810                                const struct mlx5_modify_raw_qp_param *raw_qp_param,
3811                                u8 tx_affinity)
3812{
3813        struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
3814        struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
3815        struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
3816        int modify_rq = !!qp->rq.wqe_cnt;
3817        int modify_sq = !!qp->sq.wqe_cnt;
3818        int rq_state;
3819        int sq_state;
3820        int err;
3821
3822        switch (raw_qp_param->operation) {
3823        case MLX5_CMD_OP_RST2INIT_QP:
3824                rq_state = MLX5_RQC_STATE_RDY;
3825                sq_state = MLX5_SQC_STATE_RST;
3826                break;
3827        case MLX5_CMD_OP_2ERR_QP:
3828                rq_state = MLX5_RQC_STATE_ERR;
3829                sq_state = MLX5_SQC_STATE_ERR;
3830                break;
3831        case MLX5_CMD_OP_2RST_QP:
3832                rq_state = MLX5_RQC_STATE_RST;
3833                sq_state = MLX5_SQC_STATE_RST;
3834                break;
3835        case MLX5_CMD_OP_RTR2RTS_QP:
3836        case MLX5_CMD_OP_RTS2RTS_QP:
3837                if (raw_qp_param->set_mask & ~MLX5_RAW_QP_RATE_LIMIT)
3838                        return -EINVAL;
3839
3840                modify_rq = 0;
3841                sq_state = MLX5_SQC_STATE_RDY;
3842                break;
3843        case MLX5_CMD_OP_INIT2INIT_QP:
3844        case MLX5_CMD_OP_INIT2RTR_QP:
3845                if (raw_qp_param->set_mask)
3846                        return -EINVAL;
3847                else
3848                        return 0;
3849        default:
3850                WARN_ON(1);
3851                return -EINVAL;
3852        }
3853
3854        if (modify_rq) {
3855                err =  modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param,
3856                                               qp->ibqp.pd);
3857                if (err)
3858                        return err;
3859        }
3860
3861        if (modify_sq) {
3862                struct mlx5_flow_handle *flow_rule;
3863
3864                if (tx_affinity) {
3865                        err = modify_raw_packet_tx_affinity(dev->mdev, sq,
3866                                                            tx_affinity,
3867                                                            qp->ibqp.pd);
3868                        if (err)
3869                                return err;
3870                }
3871
3872                flow_rule = create_flow_rule_vport_sq(dev, sq,
3873                                                      raw_qp_param->port);
3874                if (IS_ERR(flow_rule))
3875                        return PTR_ERR(flow_rule);
3876
3877                err = modify_raw_packet_qp_sq(dev->mdev, sq, sq_state,
3878                                              raw_qp_param, qp->ibqp.pd);
3879                if (err) {
3880                        if (flow_rule)
3881                                mlx5_del_flow_rules(flow_rule);
3882                        return err;
3883                }
3884
3885                if (flow_rule) {
3886                        destroy_flow_rule_vport_sq(sq);
3887                        sq->flow_rule = flow_rule;
3888                }
3889
3890                return err;
3891        }
3892
3893        return 0;
3894}
3895
3896static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev,
3897                                       struct ib_udata *udata)
3898{
3899        struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
3900                udata, struct mlx5_ib_ucontext, ibucontext);
3901        u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
3902        atomic_t *tx_port_affinity;
3903
3904        if (ucontext)
3905                tx_port_affinity = &ucontext->tx_port_affinity;
3906        else
3907                tx_port_affinity = &dev->port[port_num].roce.tx_port_affinity;
3908
3909        return (unsigned int)atomic_add_return(1, tx_port_affinity) %
3910                MLX5_MAX_PORTS + 1;
3911}
3912
3913static bool qp_supports_affinity(struct mlx5_ib_qp *qp)
3914{
3915        if ((qp->type == IB_QPT_RC) || (qp->type == IB_QPT_UD) ||
3916            (qp->type == IB_QPT_UC) || (qp->type == IB_QPT_RAW_PACKET) ||
3917            (qp->type == IB_QPT_XRC_INI) || (qp->type == IB_QPT_XRC_TGT) ||
3918            (qp->type == MLX5_IB_QPT_DCI))
3919                return true;
3920        return false;
3921}
3922
3923static unsigned int get_tx_affinity(struct ib_qp *qp,
3924                                    const struct ib_qp_attr *attr,
3925                                    int attr_mask, u8 init,
3926                                    struct ib_udata *udata)
3927{
3928        struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
3929                udata, struct mlx5_ib_ucontext, ibucontext);
3930        struct mlx5_ib_dev *dev = to_mdev(qp->device);
3931        struct mlx5_ib_qp *mqp = to_mqp(qp);
3932        struct mlx5_ib_qp_base *qp_base;
3933        unsigned int tx_affinity;
3934
3935        if (!(mlx5_ib_lag_should_assign_affinity(dev) &&
3936              qp_supports_affinity(mqp)))
3937                return 0;
3938
3939        if (mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
3940                tx_affinity = mqp->gsi_lag_port;
3941        else if (init)
3942                tx_affinity = get_tx_affinity_rr(dev, udata);
3943        else if ((attr_mask & IB_QP_AV) && attr->xmit_slave)
3944                tx_affinity =
3945                        mlx5_lag_get_slave_port(dev->mdev, attr->xmit_slave);
3946        else
3947                return 0;
3948
3949        qp_base = &mqp->trans_qp.base;
3950        if (ucontext)
3951                mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x ucontext %p\n",
3952                            tx_affinity, qp_base->mqp.qpn, ucontext);
3953        else
3954                mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n",
3955                            tx_affinity, qp_base->mqp.qpn);
3956        return tx_affinity;
3957}
3958
3959static int __mlx5_ib_qp_set_counter(struct ib_qp *qp,
3960                                    struct rdma_counter *counter)
3961{
3962        struct mlx5_ib_dev *dev = to_mdev(qp->device);
3963        u32 in[MLX5_ST_SZ_DW(rts2rts_qp_in)] = {};
3964        struct mlx5_ib_qp *mqp = to_mqp(qp);
3965        struct mlx5_ib_qp_base *base;
3966        u32 set_id;
3967        u32 *qpc;
3968
3969        if (counter)
3970                set_id = counter->id;
3971        else
3972                set_id = mlx5_ib_get_counters_id(dev, mqp->port - 1);
3973
3974        base = &mqp->trans_qp.base;
3975        MLX5_SET(rts2rts_qp_in, in, opcode, MLX5_CMD_OP_RTS2RTS_QP);
3976        MLX5_SET(rts2rts_qp_in, in, qpn, base->mqp.qpn);
3977        MLX5_SET(rts2rts_qp_in, in, uid, base->mqp.uid);
3978        MLX5_SET(rts2rts_qp_in, in, opt_param_mask,
3979                 MLX5_QP_OPTPAR_COUNTER_SET_ID);
3980
3981        qpc = MLX5_ADDR_OF(rts2rts_qp_in, in, qpc);
3982        MLX5_SET(qpc, qpc, counter_set_id, set_id);
3983        return mlx5_cmd_exec_in(dev->mdev, rts2rts_qp, in);
3984}
3985
3986static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
3987                               const struct ib_qp_attr *attr, int attr_mask,
3988                               enum ib_qp_state cur_state,
3989                               enum ib_qp_state new_state,
3990                               const struct mlx5_ib_modify_qp *ucmd,
3991                               struct mlx5_ib_modify_qp_resp *resp,
3992                               struct ib_udata *udata)
3993{
3994        static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
3995                [MLX5_QP_STATE_RST] = {
3996                        [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
3997                        [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
3998                        [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_RST2INIT_QP,
3999                },
4000                [MLX5_QP_STATE_INIT]  = {
4001                        [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
4002                        [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
4003                        [MLX5_QP_STATE_INIT]    = MLX5_CMD_OP_INIT2INIT_QP,
4004                        [MLX5_QP_STATE_RTR]     = MLX5_CMD_OP_INIT2RTR_QP,
4005                },
4006                [MLX5_QP_STATE_RTR]   = {
4007                        [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
4008                        [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
4009                        [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTR2RTS_QP,
4010                },
4011                [MLX5_QP_STATE_RTS]   = {
4012                        [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
4013                        [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
4014                        [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_RTS2RTS_QP,
4015                },
4016                [MLX5_QP_STATE_SQD] = {
4017                        [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
4018                        [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
4019                        [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_SQD_RTS_QP,
4020                },
4021                [MLX5_QP_STATE_SQER] = {
4022                        [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
4023                        [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
4024                        [MLX5_QP_STATE_RTS]     = MLX5_CMD_OP_SQERR2RTS_QP,
4025                },
4026                [MLX5_QP_STATE_ERR] = {
4027                        [MLX5_QP_STATE_RST]     = MLX5_CMD_OP_2RST_QP,
4028                        [MLX5_QP_STATE_ERR]     = MLX5_CMD_OP_2ERR_QP,
4029                }
4030        };
4031
4032        struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
4033        struct mlx5_ib_qp *qp = to_mqp(ibqp);
4034        struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
4035        struct mlx5_ib_cq *send_cq, *recv_cq;
4036        struct mlx5_ib_pd *pd;
4037        enum mlx5_qp_state mlx5_cur, mlx5_new;
4038        void *qpc, *pri_path, *alt_path;
4039        enum mlx5_qp_optpar optpar = 0;
4040        u32 set_id = 0;
4041        int mlx5_st;
4042        int err;
4043        u16 op;
4044        u8 tx_affinity = 0;
4045
4046        mlx5_st = to_mlx5_st(qp->type);
4047        if (mlx5_st < 0)
4048                return -EINVAL;
4049
4050        qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
4051        if (!qpc)
4052                return -ENOMEM;
4053
4054        pd = to_mpd(qp->ibqp.pd);
4055        MLX5_SET(qpc, qpc, st, mlx5_st);
4056
4057        if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
4058                MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
4059        } else {
4060                switch (attr->path_mig_state) {
4061                case IB_MIG_MIGRATED:
4062                        MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
4063                        break;
4064                case IB_MIG_REARM:
4065                        MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_REARM);
4066                        break;
4067                case IB_MIG_ARMED:
4068                        MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_ARMED);
4069                        break;
4070                }
4071        }
4072
4073        tx_affinity = get_tx_affinity(ibqp, attr, attr_mask,
4074                                      cur_state == IB_QPS_RESET &&
4075                                      new_state == IB_QPS_INIT, udata);
4076
4077        MLX5_SET(qpc, qpc, lag_tx_port_affinity, tx_affinity);
4078        if (tx_affinity && new_state == IB_QPS_RTR &&
4079            MLX5_CAP_GEN(dev->mdev, init2_lag_tx_port_affinity))
4080                optpar |= MLX5_QP_OPTPAR_LAG_TX_AFF;
4081
4082        if (is_sqp(qp->type)) {
4083                MLX5_SET(qpc, qpc, mtu, IB_MTU_256);
4084                MLX5_SET(qpc, qpc, log_msg_max, 8);
4085        } else if ((qp->type == IB_QPT_UD &&
4086                    !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) ||
4087                   qp->type == MLX5_IB_QPT_REG_UMR) {
4088                MLX5_SET(qpc, qpc, mtu, IB_MTU_4096);
4089                MLX5_SET(qpc, qpc, log_msg_max, 12);
4090        } else if (attr_mask & IB_QP_PATH_MTU) {
4091                if (attr->path_mtu < IB_MTU_256 ||
4092                    attr->path_mtu > IB_MTU_4096) {
4093                        mlx5_ib_warn(dev, "invalid mtu %d\n", attr->path_mtu);
4094                        err = -EINVAL;
4095                        goto out;
4096                }
4097                MLX5_SET(qpc, qpc, mtu, attr->path_mtu);
4098                MLX5_SET(qpc, qpc, log_msg_max,
4099                         MLX5_CAP_GEN(dev->mdev, log_max_msg));
4100        }
4101
4102        if (attr_mask & IB_QP_DEST_QPN)
4103                MLX5_SET(qpc, qpc, remote_qpn, attr->dest_qp_num);
4104
4105        pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
4106        alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path);
4107
4108        if (attr_mask & IB_QP_PKEY_INDEX)
4109                MLX5_SET(ads, pri_path, pkey_index, attr->pkey_index);
4110
4111        /* todo implement counter_index functionality */
4112
4113        if (is_sqp(qp->type))
4114                MLX5_SET(ads, pri_path, vhca_port_num, qp->port);
4115
4116        if (attr_mask & IB_QP_PORT)
4117                MLX5_SET(ads, pri_path, vhca_port_num, attr->port_num);
4118
4119        if (attr_mask & IB_QP_AV) {
4120                err = mlx5_set_path(dev, qp, &attr->ah_attr, pri_path,
4121                                    attr_mask & IB_QP_PORT ? attr->port_num :
4122                                                             qp->port,
4123                                    attr_mask, 0, attr, false);
4124                if (err)
4125                        goto out;
4126        }
4127
4128        if (attr_mask & IB_QP_TIMEOUT)
4129                MLX5_SET(ads, pri_path, ack_timeout, attr->timeout);
4130
4131        if (attr_mask & IB_QP_ALT_PATH) {
4132                err = mlx5_set_path(dev, qp, &attr->alt_ah_attr, alt_path,
4133                                    attr->alt_port_num,
4134                                    attr_mask | IB_QP_PKEY_INDEX |
4135                                            IB_QP_TIMEOUT,
4136                                    0, attr, true);
4137                if (err)
4138                        goto out;
4139        }
4140
4141        get_cqs(qp->type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
4142                &send_cq, &recv_cq);
4143
4144        MLX5_SET(qpc, qpc, pd, pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
4145        if (send_cq)
4146                MLX5_SET(qpc, qpc, cqn_snd, send_cq->mcq.cqn);
4147        if (recv_cq)
4148                MLX5_SET(qpc, qpc, cqn_rcv, recv_cq->mcq.cqn);
4149
4150        MLX5_SET(qpc, qpc, log_ack_req_freq, MLX5_IB_ACK_REQ_FREQ);
4151
4152        if (attr_mask & IB_QP_RNR_RETRY)
4153                MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
4154
4155        if (attr_mask & IB_QP_RETRY_CNT)
4156                MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
4157
4158        if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic)
4159                MLX5_SET(qpc, qpc, log_sra_max, ilog2(attr->max_rd_atomic));
4160
4161        if (attr_mask & IB_QP_SQ_PSN)
4162                MLX5_SET(qpc, qpc, next_send_psn, attr->sq_psn);
4163
4164        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic)
4165                MLX5_SET(qpc, qpc, log_rra_max,
4166                         ilog2(attr->max_dest_rd_atomic));
4167
4168        if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
4169                err = set_qpc_atomic_flags(qp, attr, attr_mask, qpc);
4170                if (err)
4171                        goto out;
4172        }
4173
4174        if (attr_mask & IB_QP_MIN_RNR_TIMER)
4175                MLX5_SET(qpc, qpc, min_rnr_nak, attr->min_rnr_timer);
4176
4177        if (attr_mask & IB_QP_RQ_PSN)
4178                MLX5_SET(qpc, qpc, next_rcv_psn, attr->rq_psn);
4179
4180        if (attr_mask & IB_QP_QKEY)
4181                MLX5_SET(qpc, qpc, q_key, attr->qkey);
4182
4183        if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
4184                MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
4185
4186        if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
4187                u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
4188                               qp->port) - 1;
4189
4190                /* Underlay port should be used - index 0 function per port */
4191                if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
4192                        port_num = 0;
4193
4194                if (ibqp->counter)
4195                        set_id = ibqp->counter->id;
4196                else
4197                        set_id = mlx5_ib_get_counters_id(dev, port_num);
4198                MLX5_SET(qpc, qpc, counter_set_id, set_id);
4199        }
4200
4201        if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
4202                MLX5_SET(qpc, qpc, rlky, 1);
4203
4204        if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
4205                MLX5_SET(qpc, qpc, deth_sqpn, 1);
4206
4207        mlx5_cur = to_mlx5_state(cur_state);
4208        mlx5_new = to_mlx5_state(new_state);
4209
4210        if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
4211            !optab[mlx5_cur][mlx5_new]) {
4212                err = -EINVAL;
4213                goto out;
4214        }
4215
4216        op = optab[mlx5_cur][mlx5_new];
4217        optpar |= ib_mask_to_mlx5_opt(attr_mask);
4218        optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
4219
4220        if (qp->type == IB_QPT_RAW_PACKET ||
4221            qp->flags & IB_QP_CREATE_SOURCE_QPN) {
4222                struct mlx5_modify_raw_qp_param raw_qp_param = {};
4223
4224                raw_qp_param.operation = op;
4225                if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
4226                        raw_qp_param.rq_q_ctr_id = set_id;
4227                        raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
4228                }
4229
4230                if (attr_mask & IB_QP_PORT)
4231                        raw_qp_param.port = attr->port_num;
4232
4233                if (attr_mask & IB_QP_RATE_LIMIT) {
4234                        raw_qp_param.rl.rate = attr->rate_limit;
4235
4236                        if (ucmd->burst_info.max_burst_sz) {
4237                                if (attr->rate_limit &&
4238                                    MLX5_CAP_QOS(dev->mdev, packet_pacing_burst_bound)) {
4239                                        raw_qp_param.rl.max_burst_sz =
4240                                                ucmd->burst_info.max_burst_sz;
4241                                } else {
4242                                        err = -EINVAL;
4243                                        goto out;
4244                                }
4245                        }
4246
4247                        if (ucmd->burst_info.typical_pkt_sz) {
4248                                if (attr->rate_limit &&
4249                                    MLX5_CAP_QOS(dev->mdev, packet_pacing_typical_size)) {
4250                                        raw_qp_param.rl.typical_pkt_sz =
4251                                                ucmd->burst_info.typical_pkt_sz;
4252                                } else {
4253                                        err = -EINVAL;
4254                                        goto out;
4255                                }
4256                        }
4257
4258                        raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT;
4259                }
4260
4261                err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity);
4262        } else {
4263                if (udata) {
4264                        /* For the kernel flows, the resp will stay zero */
4265                        resp->ece_options =
4266                                MLX5_CAP_GEN(dev->mdev, ece_support) ?
4267                                        ucmd->ece_options : 0;
4268                        resp->response_length = sizeof(*resp);
4269                }
4270                err = mlx5_core_qp_modify(dev, op, optpar, qpc, &base->mqp,
4271                                          &resp->ece_options);
4272        }
4273
4274        if (err)
4275                goto out;
4276
4277        qp->state = new_state;
4278
4279        if (attr_mask & IB_QP_ACCESS_FLAGS)
4280                qp->trans_qp.atomic_rd_en = attr->qp_access_flags;
4281        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
4282                qp->trans_qp.resp_depth = attr->max_dest_rd_atomic;
4283        if (attr_mask & IB_QP_PORT)
4284                qp->port = attr->port_num;
4285        if (attr_mask & IB_QP_ALT_PATH)
4286                qp->trans_qp.alt_port = attr->alt_port_num;
4287
4288        /*
4289         * If we moved a kernel QP to RESET, clean up all old CQ
4290         * entries and reinitialize the QP.
4291         */
4292        if (new_state == IB_QPS_RESET &&
4293            !ibqp->uobject && qp->type != IB_QPT_XRC_TGT) {
4294                mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
4295                                 ibqp->srq ? to_msrq(ibqp->srq) : NULL);
4296                if (send_cq != recv_cq)
4297                        mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL);
4298
4299                qp->rq.head = 0;
4300                qp->rq.tail = 0;
4301                qp->sq.head = 0;
4302                qp->sq.tail = 0;
4303                qp->sq.cur_post = 0;
4304                if (qp->sq.wqe_cnt)
4305                        qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
4306                qp->sq.last_poll = 0;
4307                qp->db.db[MLX5_RCV_DBR] = 0;
4308                qp->db.db[MLX5_SND_DBR] = 0;
4309        }
4310
4311        if ((new_state == IB_QPS_RTS) && qp->counter_pending) {
4312                err = __mlx5_ib_qp_set_counter(ibqp, ibqp->counter);
4313                if (!err)
4314                        qp->counter_pending = 0;
4315        }
4316
4317out:
4318        kfree(qpc);
4319        return err;
4320}
4321
4322static inline bool is_valid_mask(int mask, int req, int opt)
4323{
4324        if ((mask & req) != req)
4325                return false;
4326
4327        if (mask & ~(req | opt))
4328                return false;
4329
4330        return true;
4331}
4332
4333/* check valid transition for driver QP types
4334 * for now the only QP type that this function supports is DCI
4335 */
4336static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new_state,
4337                                enum ib_qp_attr_mask attr_mask)
4338{
4339        int req = IB_QP_STATE;
4340        int opt = 0;
4341
4342        if (new_state == IB_QPS_RESET) {
4343                return is_valid_mask(attr_mask, req, opt);
4344        } else if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
4345                req |= IB_QP_PKEY_INDEX | IB_QP_PORT;
4346                return is_valid_mask(attr_mask, req, opt);
4347        } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
4348                opt = IB_QP_PKEY_INDEX | IB_QP_PORT;
4349                return is_valid_mask(attr_mask, req, opt);
4350        } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
4351                req |= IB_QP_PATH_MTU;
4352                opt = IB_QP_PKEY_INDEX | IB_QP_AV;
4353                return is_valid_mask(attr_mask, req, opt);
4354        } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
4355                req |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
4356                       IB_QP_MAX_QP_RD_ATOMIC | IB_QP_SQ_PSN;
4357                opt = IB_QP_MIN_RNR_TIMER;
4358                return is_valid_mask(attr_mask, req, opt);
4359        } else if (cur_state == IB_QPS_RTS && new_state == IB_QPS_RTS) {
4360                opt = IB_QP_MIN_RNR_TIMER;
4361                return is_valid_mask(attr_mask, req, opt);
4362        } else if (cur_state != IB_QPS_RESET && new_state == IB_QPS_ERR) {
4363                return is_valid_mask(attr_mask, req, opt);
4364        }
4365        return false;
4366}
4367
4368/* mlx5_ib_modify_dct: modify a DCT QP
4369 * valid transitions are:
4370 * RESET to INIT: must set access_flags, pkey_index and port
4371 * INIT  to RTR : must set min_rnr_timer, tclass, flow_label,
4372 *                         mtu, gid_index and hop_limit
4373 * Other transitions and attributes are illegal
4374 */
4375static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
4376                              int attr_mask, struct mlx5_ib_modify_qp *ucmd,
4377                              struct ib_udata *udata)
4378{
4379        struct mlx5_ib_qp *qp = to_mqp(ibqp);
4380        struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
4381        enum ib_qp_state cur_state, new_state;
4382        int required = IB_QP_STATE;
4383        void *dctc;
4384        int err;
4385
4386        if (!(attr_mask & IB_QP_STATE))
4387                return -EINVAL;
4388
4389        cur_state = qp->state;
4390        new_state = attr->qp_state;
4391
4392        dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
4393        if (MLX5_CAP_GEN(dev->mdev, ece_support) && ucmd->ece_options)
4394                /*
4395                 * DCT doesn't initialize QP till modify command is executed,
4396                 * so we need to overwrite previously set ECE field if user
4397                 * provided any value except zero, which means not set/not
4398                 * valid.
4399                 */
4400                MLX5_SET(dctc, dctc, ece, ucmd->ece_options);
4401
4402        if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
4403                u16 set_id;
4404
4405                required |= IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT;
4406                if (!is_valid_mask(attr_mask, required, 0))
4407                        return -EINVAL;
4408
4409                if (attr->port_num == 0 ||
4410                    attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports)) {
4411                        mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n",
4412                                    attr->port_num, dev->num_ports);
4413                        return -EINVAL;
4414                }
4415                if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
4416                        MLX5_SET(dctc, dctc, rre, 1);
4417                if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
4418                        MLX5_SET(dctc, dctc, rwe, 1);
4419                if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) {
4420                        int atomic_mode;
4421
4422                        atomic_mode = get_atomic_mode(dev, MLX5_IB_QPT_DCT);
4423                        if (atomic_mode < 0)
4424                                return -EOPNOTSUPP;
4425
4426                        MLX5_SET(dctc, dctc, atomic_mode, atomic_mode);
4427                        MLX5_SET(dctc, dctc, rae, 1);
4428                }
4429                MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index);
4430                if (mlx5_lag_is_active(dev->mdev))
4431                        MLX5_SET(dctc, dctc, port,
4432                                 get_tx_affinity_rr(dev, udata));
4433                else
4434                        MLX5_SET(dctc, dctc, port, attr->port_num);
4435
4436                set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1);
4437                MLX5_SET(dctc, dctc, counter_set_id, set_id);
4438        } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
4439                struct mlx5_ib_modify_qp_resp resp = {};
4440                u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {};
4441                u32 min_resp_len = offsetofend(typeof(resp), dctn);
4442
4443                if (udata->outlen < min_resp_len)
4444                        return -EINVAL;
4445                /*
4446                 * If we don't have enough space for the ECE options,
4447                 * simply indicate it with resp.response_length.
4448                 */
4449                resp.response_length = (udata->outlen < sizeof(resp)) ?
4450                                               min_resp_len :
4451                                               sizeof(resp);
4452
4453                required |= IB_QP_MIN_RNR_TIMER | IB_QP_AV | IB_QP_PATH_MTU;
4454                if (!is_valid_mask(attr_mask, required, 0))
4455                        return -EINVAL;
4456                MLX5_SET(dctc, dctc, min_rnr_nak, attr->min_rnr_timer);
4457                MLX5_SET(dctc, dctc, tclass, attr->ah_attr.grh.traffic_class);
4458                MLX5_SET(dctc, dctc, flow_label, attr->ah_attr.grh.flow_label);
4459                MLX5_SET(dctc, dctc, mtu, attr->path_mtu);
4460                MLX5_SET(dctc, dctc, my_addr_index, attr->ah_attr.grh.sgid_index);
4461                MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit);
4462                if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
4463                        MLX5_SET(dctc, dctc, eth_prio, attr->ah_attr.sl & 0x7);
4464
4465                err = mlx5_core_create_dct(dev, &qp->dct.mdct, qp->dct.in,
4466                                           MLX5_ST_SZ_BYTES(create_dct_in), out,
4467                                           sizeof(out));
4468                err = mlx5_cmd_check(dev->mdev, err, qp->dct.in, out);
4469                if (err)
4470                        return err;
4471                resp.dctn = qp->dct.mdct.mqp.qpn;
4472                if (MLX5_CAP_GEN(dev->mdev, ece_support))
4473                        resp.ece_options = MLX5_GET(create_dct_out, out, ece);
4474                err = ib_copy_to_udata(udata, &resp, resp.response_length);
4475                if (err) {
4476                        mlx5_core_destroy_dct(dev, &qp->dct.mdct);
4477                        return err;
4478                }
4479        } else {
4480                mlx5_ib_warn(dev, "Modify DCT: Invalid transition from %d to %d\n", cur_state, new_state);
4481                return -EINVAL;
4482        }
4483
4484        qp->state = new_state;
4485        return 0;
4486}
4487
4488static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev,
4489                                      struct mlx5_ib_qp *qp)
4490{
4491        if (dev->profile != &raw_eth_profile)
4492                return true;
4493
4494        if (qp->type == IB_QPT_RAW_PACKET || qp->type == MLX5_IB_QPT_REG_UMR)
4495                return true;
4496
4497        /* Internal QP used for wc testing, with NOPs in wq */
4498        if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
4499                return true;
4500
4501        return false;
4502}
4503
4504int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
4505                      int attr_mask, struct ib_udata *udata)
4506{
4507        struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
4508        struct mlx5_ib_modify_qp_resp resp = {};
4509        struct mlx5_ib_qp *qp = to_mqp(ibqp);
4510        struct mlx5_ib_modify_qp ucmd = {};
4511        enum ib_qp_type qp_type;
4512        enum ib_qp_state cur_state, new_state;
4513        int err = -EINVAL;
4514
4515        if (!mlx5_ib_modify_qp_allowed(dev, qp))
4516                return -EOPNOTSUPP;
4517
4518        if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT))
4519                return -EOPNOTSUPP;
4520
4521        if (ibqp->rwq_ind_tbl)
4522                return -ENOSYS;
4523
4524        if (udata && udata->inlen) {
4525                if (udata->inlen < offsetofend(typeof(ucmd), ece_options))
4526                        return -EINVAL;
4527
4528                if (udata->inlen > sizeof(ucmd) &&
4529                    !ib_is_udata_cleared(udata, sizeof(ucmd),
4530                                         udata->inlen - sizeof(ucmd)))
4531                        return -EOPNOTSUPP;
4532
4533                if (ib_copy_from_udata(&ucmd, udata,
4534                                       min(udata->inlen, sizeof(ucmd))))
4535                        return -EFAULT;
4536
4537                if (ucmd.comp_mask ||
4538                    memchr_inv(&ucmd.burst_info.reserved, 0,
4539                               sizeof(ucmd.burst_info.reserved)))
4540                        return -EOPNOTSUPP;
4541
4542        }
4543
4544        if (qp->type == IB_QPT_GSI)
4545                return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
4546
4547        qp_type = (qp->type == MLX5_IB_QPT_HW_GSI) ? IB_QPT_GSI : qp->type;
4548
4549        if (qp_type == MLX5_IB_QPT_DCT)
4550                return mlx5_ib_modify_dct(ibqp, attr, attr_mask, &ucmd, udata);
4551
4552        mutex_lock(&qp->mutex);
4553
4554        cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
4555        new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
4556
4557        if (qp->flags & IB_QP_CREATE_SOURCE_QPN) {
4558                if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) {
4559                        mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n",
4560                                    attr_mask);
4561                        goto out;
4562                }
4563        } else if (qp_type != MLX5_IB_QPT_REG_UMR &&
4564                   qp_type != MLX5_IB_QPT_DCI &&
4565                   !ib_modify_qp_is_ok(cur_state, new_state, qp_type,
4566                                       attr_mask)) {
4567                mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
4568                            cur_state, new_state, qp->type, attr_mask);
4569                goto out;
4570        } else if (qp_type == MLX5_IB_QPT_DCI &&
4571                   !modify_dci_qp_is_ok(cur_state, new_state, attr_mask)) {
4572                mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
4573                            cur_state, new_state, qp_type, attr_mask);
4574                goto out;
4575        }
4576
4577        if ((attr_mask & IB_QP_PORT) &&
4578            (attr->port_num == 0 ||
4579             attr->port_num > dev->num_ports)) {
4580                mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n",
4581                            attr->port_num, dev->num_ports);
4582                goto out;
4583        }
4584
4585        if ((attr_mask & IB_QP_PKEY_INDEX) &&
4586            attr->pkey_index >= dev->pkey_table_len) {
4587                mlx5_ib_dbg(dev, "invalid pkey index %d\n", attr->pkey_index);
4588                goto out;
4589        }
4590
4591        if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
4592            attr->max_rd_atomic >
4593            (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) {
4594                mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
4595                            attr->max_rd_atomic);
4596                goto out;
4597        }
4598
4599        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
4600            attr->max_dest_rd_atomic >
4601            (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) {
4602                mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
4603                            attr->max_dest_rd_atomic);
4604                goto out;
4605        }
4606
4607        if (cur_state == new_state && cur_state == IB_QPS_RESET) {
4608                err = 0;
4609                goto out;
4610        }
4611
4612        err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state,
4613                                  new_state, &ucmd, &resp, udata);
4614
4615        /* resp.response_length is set in ECE supported flows only */
4616        if (!err && resp.response_length &&
4617            udata->outlen >= resp.response_length)
4618                /* Return -EFAULT to the user and expect him to destroy QP. */
4619                err = ib_copy_to_udata(udata, &resp, resp.response_length);
4620
4621out:
4622        mutex_unlock(&qp->mutex);
4623        return err;
4624}
4625
4626static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
4627{
4628        switch (mlx5_state) {
4629        case MLX5_QP_STATE_RST:      return IB_QPS_RESET;
4630        case MLX5_QP_STATE_INIT:     return IB_QPS_INIT;
4631        case MLX5_QP_STATE_RTR:      return IB_QPS_RTR;
4632        case MLX5_QP_STATE_RTS:      return IB_QPS_RTS;
4633        case MLX5_QP_STATE_SQ_DRAINING:
4634        case MLX5_QP_STATE_SQD:      return IB_QPS_SQD;
4635        case MLX5_QP_STATE_SQER:     return IB_QPS_SQE;
4636        case MLX5_QP_STATE_ERR:      return IB_QPS_ERR;
4637        default:                     return -1;
4638        }
4639}
4640
4641static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state)
4642{
4643        switch (mlx5_mig_state) {
4644        case MLX5_QP_PM_ARMED:          return IB_MIG_ARMED;
4645        case MLX5_QP_PM_REARM:          return IB_MIG_REARM;
4646        case MLX5_QP_PM_MIGRATED:       return IB_MIG_MIGRATED;
4647        default: return -1;
4648        }
4649}
4650
4651static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev,
4652                            struct rdma_ah_attr *ah_attr, void *path)
4653{
4654        int port = MLX5_GET(ads, path, vhca_port_num);
4655        int static_rate;
4656
4657        memset(ah_attr, 0, sizeof(*ah_attr));
4658
4659        if (!port || port > ibdev->num_ports)
4660                return;
4661
4662        ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port);
4663
4664        rdma_ah_set_port_num(ah_attr, port);
4665        rdma_ah_set_sl(ah_attr, MLX5_GET(ads, path, sl));
4666
4667        rdma_ah_set_dlid(ah_attr, MLX5_GET(ads, path, rlid));
4668        rdma_ah_set_path_bits(ah_attr, MLX5_GET(ads, path, mlid));
4669
4670        static_rate = MLX5_GET(ads, path, stat_rate);
4671        rdma_ah_set_static_rate(ah_attr, mlx5_to_ib_rate_map(static_rate));
4672        if (MLX5_GET(ads, path, grh) ||
4673            ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
4674                rdma_ah_set_grh(ah_attr, NULL, MLX5_GET(ads, path, flow_label),
4675                                MLX5_GET(ads, path, src_addr_index),
4676                                MLX5_GET(ads, path, hop_limit),
4677                                MLX5_GET(ads, path, tclass));
4678                rdma_ah_set_dgid_raw(ah_attr, MLX5_ADDR_OF(ads, path, rgid_rip));
4679        }
4680}
4681
4682static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,
4683                                        struct mlx5_ib_sq *sq,
4684                                        u8 *sq_state)
4685{
4686        int err;
4687
4688        err = mlx5_core_query_sq_state(dev->mdev, sq->base.mqp.qpn, sq_state);
4689        if (err)
4690                goto out;
4691        sq->state = *sq_state;
4692
4693out:
4694        return err;
4695}
4696
4697static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev,
4698                                        struct mlx5_ib_rq *rq,
4699                                        u8 *rq_state)
4700{
4701        void *out;
4702        void *rqc;
4703        int inlen;
4704        int err;
4705
4706        inlen = MLX5_ST_SZ_BYTES(query_rq_out);
4707        out = kvzalloc(inlen, GFP_KERNEL);
4708        if (!out)
4709                return -ENOMEM;
4710
4711        err = mlx5_core_query_rq(dev->mdev, rq->base.mqp.qpn, out);
4712        if (err)
4713                goto out;
4714
4715        rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
4716        *rq_state = MLX5_GET(rqc, rqc, state);
4717        rq->state = *rq_state;
4718
4719out:
4720        kvfree(out);
4721        return err;
4722}
4723
4724static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
4725                                  struct mlx5_ib_qp *qp, u8 *qp_state)
4726{
4727        static const u8 sqrq_trans[MLX5_RQ_NUM_STATE][MLX5_SQ_NUM_STATE] = {
4728                [MLX5_RQC_STATE_RST] = {
4729                        [MLX5_SQC_STATE_RST]    = IB_QPS_RESET,
4730                        [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE_BAD,
4731                        [MLX5_SQC_STATE_ERR]    = MLX5_QP_STATE_BAD,
4732                        [MLX5_SQ_STATE_NA]      = IB_QPS_RESET,
4733                },
4734                [MLX5_RQC_STATE_RDY] = {
4735                        [MLX5_SQC_STATE_RST]    = MLX5_QP_STATE,
4736                        [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE,
4737                        [MLX5_SQC_STATE_ERR]    = IB_QPS_SQE,
4738                        [MLX5_SQ_STATE_NA]      = MLX5_QP_STATE,
4739                },
4740                [MLX5_RQC_STATE_ERR] = {
4741                        [MLX5_SQC_STATE_RST]    = MLX5_QP_STATE_BAD,
4742                        [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE_BAD,
4743                        [MLX5_SQC_STATE_ERR]    = IB_QPS_ERR,
4744                        [MLX5_SQ_STATE_NA]      = IB_QPS_ERR,
4745                },
4746                [MLX5_RQ_STATE_NA] = {
4747                        [MLX5_SQC_STATE_RST]    = MLX5_QP_STATE,
4748                        [MLX5_SQC_STATE_RDY]    = MLX5_QP_STATE,
4749                        [MLX5_SQC_STATE_ERR]    = MLX5_QP_STATE,
4750                        [MLX5_SQ_STATE_NA]      = MLX5_QP_STATE_BAD,
4751                },
4752        };
4753
4754        *qp_state = sqrq_trans[rq_state][sq_state];
4755
4756        if (*qp_state == MLX5_QP_STATE_BAD) {
4757                WARN(1, "Buggy Raw Packet QP state, SQ 0x%x state: 0x%x, RQ 0x%x state: 0x%x",
4758                     qp->raw_packet_qp.sq.base.mqp.qpn, sq_state,
4759                     qp->raw_packet_qp.rq.base.mqp.qpn, rq_state);
4760                return -EINVAL;
4761        }
4762
4763        if (*qp_state == MLX5_QP_STATE)
4764                *qp_state = qp->state;
4765
4766        return 0;
4767}
4768
4769static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev,
4770                                     struct mlx5_ib_qp *qp,
4771                                     u8 *raw_packet_qp_state)
4772{
4773        struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
4774        struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
4775        struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
4776        int err;
4777        u8 sq_state = MLX5_SQ_STATE_NA;
4778        u8 rq_state = MLX5_RQ_STATE_NA;
4779
4780        if (qp->sq.wqe_cnt) {
4781                err = query_raw_packet_qp_sq_state(dev, sq, &sq_state);
4782                if (err)
4783                        return err;
4784        }
4785
4786        if (qp->rq.wqe_cnt) {
4787                err = query_raw_packet_qp_rq_state(dev, rq, &rq_state);
4788                if (err)
4789                        return err;
4790        }
4791
4792        return sqrq_state_to_qp_state(sq_state, rq_state, qp,
4793                                      raw_packet_qp_state);
4794}
4795
4796static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
4797                         struct ib_qp_attr *qp_attr)
4798{
4799        int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
4800        void *qpc, *pri_path, *alt_path;
4801        u32 *outb;
4802        int err;
4803
4804        outb = kzalloc(outlen, GFP_KERNEL);
4805        if (!outb)
4806                return -ENOMEM;
4807
4808        err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen);
4809        if (err)
4810                goto out;
4811
4812        qpc = MLX5_ADDR_OF(query_qp_out, outb, qpc);
4813
4814        qp->state = to_ib_qp_state(MLX5_GET(qpc, qpc, state));
4815        if (MLX5_GET(qpc, qpc, state) == MLX5_QP_STATE_SQ_DRAINING)
4816                qp_attr->sq_draining = 1;
4817
4818        qp_attr->path_mtu = MLX5_GET(qpc, qpc, mtu);
4819        qp_attr->path_mig_state = to_ib_mig_state(MLX5_GET(qpc, qpc, pm_state));
4820        qp_attr->qkey = MLX5_GET(qpc, qpc, q_key);
4821        qp_attr->rq_psn = MLX5_GET(qpc, qpc, next_rcv_psn);
4822        qp_attr->sq_psn = MLX5_GET(qpc, qpc, next_send_psn);
4823        qp_attr->dest_qp_num = MLX5_GET(qpc, qpc, remote_qpn);
4824
4825        if (MLX5_GET(qpc, qpc, rre))
4826                qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ;
4827        if (MLX5_GET(qpc, qpc, rwe))
4828                qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_WRITE;
4829        if (MLX5_GET(qpc, qpc, rae))
4830                qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_ATOMIC;
4831
4832        qp_attr->max_rd_atomic = 1 << MLX5_GET(qpc, qpc, log_sra_max);
4833        qp_attr->max_dest_rd_atomic = 1 << MLX5_GET(qpc, qpc, log_rra_max);
4834        qp_attr->min_rnr_timer = MLX5_GET(qpc, qpc, min_rnr_nak);
4835        qp_attr->retry_cnt = MLX5_GET(qpc, qpc, retry_count);
4836        qp_attr->rnr_retry = MLX5_GET(qpc, qpc, rnr_retry);
4837
4838        pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
4839        alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path);
4840
4841        if (qp->type == IB_QPT_RC || qp->type == IB_QPT_UC ||
4842            qp->type == IB_QPT_XRC_INI || qp->type == IB_QPT_XRC_TGT) {
4843                to_rdma_ah_attr(dev, &qp_attr->ah_attr, pri_path);
4844                to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, alt_path);
4845                qp_attr->alt_pkey_index = MLX5_GET(ads, alt_path, pkey_index);
4846                qp_attr->alt_port_num = MLX5_GET(ads, alt_path, vhca_port_num);
4847        }
4848
4849        qp_attr->pkey_index = MLX5_GET(ads, pri_path, pkey_index);
4850        qp_attr->port_num = MLX5_GET(ads, pri_path, vhca_port_num);
4851        qp_attr->timeout = MLX5_GET(ads, pri_path, ack_timeout);
4852        qp_attr->alt_timeout = MLX5_GET(ads, alt_path, ack_timeout);
4853
4854out:
4855        kfree(outb);
4856        return err;
4857}
4858
4859static int mlx5_ib_dct_query_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *mqp,
4860                                struct ib_qp_attr *qp_attr, int qp_attr_mask,
4861                                struct ib_qp_init_attr *qp_init_attr)
4862{
4863        struct mlx5_core_dct    *dct = &mqp->dct.mdct;
4864        u32 *out;
4865        u32 access_flags = 0;
4866        int outlen = MLX5_ST_SZ_BYTES(query_dct_out);
4867        void *dctc;
4868        int err;
4869        int supported_mask = IB_QP_STATE |
4870                             IB_QP_ACCESS_FLAGS |
4871                             IB_QP_PORT |
4872                             IB_QP_MIN_RNR_TIMER |
4873                             IB_QP_AV |
4874                             IB_QP_PATH_MTU |
4875                             IB_QP_PKEY_INDEX;
4876
4877        if (qp_attr_mask & ~supported_mask)
4878                return -EINVAL;
4879        if (mqp->state != IB_QPS_RTR)
4880                return -EINVAL;
4881
4882        out = kzalloc(outlen, GFP_KERNEL);
4883        if (!out)
4884                return -ENOMEM;
4885
4886        err = mlx5_core_dct_query(dev, dct, out, outlen);
4887        if (err)
4888                goto out;
4889
4890        dctc = MLX5_ADDR_OF(query_dct_out, out, dct_context_entry);
4891
4892        if (qp_attr_mask & IB_QP_STATE)
4893                qp_attr->qp_state = IB_QPS_RTR;
4894
4895        if (qp_attr_mask & IB_QP_ACCESS_FLAGS) {
4896                if (MLX5_GET(dctc, dctc, rre))
4897                        access_flags |= IB_ACCESS_REMOTE_READ;
4898                if (MLX5_GET(dctc, dctc, rwe))
4899                        access_flags |= IB_ACCESS_REMOTE_WRITE;
4900                if (MLX5_GET(dctc, dctc, rae))
4901                        access_flags |= IB_ACCESS_REMOTE_ATOMIC;
4902                qp_attr->qp_access_flags = access_flags;
4903        }
4904
4905        if (qp_attr_mask & IB_QP_PORT)
4906                qp_attr->port_num = MLX5_GET(dctc, dctc, port);
4907        if (qp_attr_mask & IB_QP_MIN_RNR_TIMER)
4908                qp_attr->min_rnr_timer = MLX5_GET(dctc, dctc, min_rnr_nak);
4909        if (qp_attr_mask & IB_QP_AV) {
4910                qp_attr->ah_attr.grh.traffic_class = MLX5_GET(dctc, dctc, tclass);
4911                qp_attr->ah_attr.grh.flow_label = MLX5_GET(dctc, dctc, flow_label);
4912                qp_attr->ah_attr.grh.sgid_index = MLX5_GET(dctc, dctc, my_addr_index);
4913                qp_attr->ah_attr.grh.hop_limit = MLX5_GET(dctc, dctc, hop_limit);
4914        }
4915        if (qp_attr_mask & IB_QP_PATH_MTU)
4916                qp_attr->path_mtu = MLX5_GET(dctc, dctc, mtu);
4917        if (qp_attr_mask & IB_QP_PKEY_INDEX)
4918                qp_attr->pkey_index = MLX5_GET(dctc, dctc, pkey_index);
4919out:
4920        kfree(out);
4921        return err;
4922}
4923
4924int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
4925                     int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
4926{
4927        struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
4928        struct mlx5_ib_qp *qp = to_mqp(ibqp);
4929        int err = 0;
4930        u8 raw_packet_qp_state;
4931
4932        if (ibqp->rwq_ind_tbl)
4933                return -ENOSYS;
4934
4935        if (qp->type == IB_QPT_GSI)
4936                return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
4937                                            qp_init_attr);
4938
4939        /* Not all of output fields are applicable, make sure to zero them */
4940        memset(qp_init_attr, 0, sizeof(*qp_init_attr));
4941        memset(qp_attr, 0, sizeof(*qp_attr));
4942
4943        if (unlikely(qp->type == MLX5_IB_QPT_DCT))
4944                return mlx5_ib_dct_query_qp(dev, qp, qp_attr,
4945                                            qp_attr_mask, qp_init_attr);
4946
4947        mutex_lock(&qp->mutex);
4948
4949        if (qp->type == IB_QPT_RAW_PACKET ||
4950            qp->flags & IB_QP_CREATE_SOURCE_QPN) {
4951                err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
4952                if (err)
4953                        goto out;
4954                qp->state = raw_packet_qp_state;
4955                qp_attr->port_num = 1;
4956        } else {
4957                err = query_qp_attr(dev, qp, qp_attr);
4958                if (err)
4959                        goto out;
4960        }
4961
4962        qp_attr->qp_state            = qp->state;
4963        qp_attr->cur_qp_state        = qp_attr->qp_state;
4964        qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;
4965        qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
4966
4967        if (!ibqp->uobject) {
4968                qp_attr->cap.max_send_wr  = qp->sq.max_post;
4969                qp_attr->cap.max_send_sge = qp->sq.max_gs;
4970                qp_init_attr->qp_context = ibqp->qp_context;
4971        } else {
4972                qp_attr->cap.max_send_wr  = 0;
4973                qp_attr->cap.max_send_sge = 0;
4974        }
4975
4976        qp_init_attr->qp_type = qp->type;
4977        qp_init_attr->recv_cq = ibqp->recv_cq;
4978        qp_init_attr->send_cq = ibqp->send_cq;
4979        qp_init_attr->srq = ibqp->srq;
4980        qp_attr->cap.max_inline_data = qp->max_inline_data;
4981
4982        qp_init_attr->cap            = qp_attr->cap;
4983
4984        qp_init_attr->create_flags = qp->flags;
4985
4986        qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
4987                IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
4988
4989out:
4990        mutex_unlock(&qp->mutex);
4991        return err;
4992}
4993
4994int mlx5_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
4995{
4996        struct mlx5_ib_dev *dev = to_mdev(ibxrcd->device);
4997        struct mlx5_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
4998
4999        if (!MLX5_CAP_GEN(dev->mdev, xrc))
5000                return -EOPNOTSUPP;
5001
5002        return mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
5003}
5004
5005int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
5006{
5007        struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
5008        u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
5009
5010        return mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
5011}
5012
5013static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
5014{
5015        struct mlx5_ib_rwq *rwq = to_mibrwq(core_qp);
5016        struct mlx5_ib_dev *dev = to_mdev(rwq->ibwq.device);
5017        struct ib_event event;
5018
5019        if (rwq->ibwq.event_handler) {
5020                event.device     = rwq->ibwq.device;
5021                event.element.wq = &rwq->ibwq;
5022                switch (type) {
5023                case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
5024                        event.event = IB_EVENT_WQ_FATAL;
5025                        break;
5026                default:
5027                        mlx5_ib_warn(dev, "Unexpected event type %d on WQ %06x\n", type, core_qp->qpn);
5028                        return;
5029                }
5030
5031                rwq->ibwq.event_handler(&event, rwq->ibwq.wq_context);
5032        }
5033}
5034
5035static int set_delay_drop(struct mlx5_ib_dev *dev)
5036{
5037        int err = 0;
5038
5039        mutex_lock(&dev->delay_drop.lock);
5040        if (dev->delay_drop.activate)
5041                goto out;
5042
5043        err = mlx5_core_set_delay_drop(dev, dev->delay_drop.timeout);
5044        if (err)
5045                goto out;
5046
5047        dev->delay_drop.activate = true;
5048out:
5049        mutex_unlock(&dev->delay_drop.lock);
5050
5051        if (!err)
5052                atomic_inc(&dev->delay_drop.rqs_cnt);
5053        return err;
5054}
5055
5056static int  create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
5057                      struct ib_wq_init_attr *init_attr)
5058{
5059        struct mlx5_ib_dev *dev;
5060        int has_net_offloads;
5061        __be64 *rq_pas0;
5062        int ts_format;
5063        void *in;
5064        void *rqc;
5065        void *wq;
5066        int inlen;
5067        int err;
5068
5069        dev = to_mdev(pd->device);
5070
5071        ts_format = get_rq_ts_format(dev, to_mcq(init_attr->cq));
5072        if (ts_format < 0)
5073                return ts_format;
5074
5075        inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rwq->rq_num_pas;
5076        in = kvzalloc(inlen, GFP_KERNEL);
5077        if (!in)
5078                return -ENOMEM;
5079
5080        MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid);
5081        rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
5082        MLX5_SET(rqc,  rqc, mem_rq_type,
5083                 MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
5084        MLX5_SET(rqc, rqc, ts_format, ts_format);
5085        MLX5_SET(rqc, rqc, user_index, rwq->user_index);
5086        MLX5_SET(rqc,  rqc, cqn, to_mcq(init_attr->cq)->mcq.cqn);
5087        MLX5_SET(rqc,  rqc, state, MLX5_RQC_STATE_RST);
5088        MLX5_SET(rqc,  rqc, flush_in_error_en, 1);
5089        wq = MLX5_ADDR_OF(rqc, rqc, wq);
5090        MLX5_SET(wq, wq, wq_type,
5091                 rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ ?
5092                 MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ : MLX5_WQ_TYPE_CYCLIC);
5093        if (init_attr->create_flags & IB_WQ_FLAGS_PCI_WRITE_END_PADDING) {
5094                if (!MLX5_CAP_GEN(dev->mdev, end_pad)) {
5095                        mlx5_ib_dbg(dev, "Scatter end padding is not supported\n");
5096                        err = -EOPNOTSUPP;
5097                        goto out;
5098                } else {
5099                        MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
5100                }
5101        }
5102        MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride);
5103        if (rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ) {
5104                /*
5105                 * In Firmware number of strides in each WQE is:
5106                 *   "512 * 2^single_wqe_log_num_of_strides"
5107                 * Values 3 to 8 are accepted as 10 to 15, 9 to 18 are
5108                 * accepted as 0 to 9
5109                 */
5110                static const u8 fw_map[] = { 10, 11, 12, 13, 14, 15, 0, 1,
5111                                             2,  3,  4,  5,  6,  7,  8, 9 };
5112                MLX5_SET(wq, wq, two_byte_shift_en, rwq->two_byte_shift_en);
5113                MLX5_SET(wq, wq, log_wqe_stride_size,
5114                         rwq->single_stride_log_num_of_bytes -
5115                         MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES);
5116                MLX5_SET(wq, wq, log_wqe_num_of_strides,
5117                         fw_map[rwq->log_num_strides -
5118                                MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES]);
5119        }
5120        MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size);
5121        MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn);
5122        MLX5_SET(wq, wq, page_offset, rwq->rq_page_offset);
5123        MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size);
5124        MLX5_SET(wq, wq, wq_signature, rwq->wq_sig);
5125        MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma);
5126        has_net_offloads = MLX5_CAP_GEN(dev->mdev, eth_net_offloads);
5127        if (init_attr->create_flags & IB_WQ_FLAGS_CVLAN_STRIPPING) {
5128                if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
5129                        mlx5_ib_dbg(dev, "VLAN offloads are not supported\n");
5130                        err = -EOPNOTSUPP;
5131                        goto out;
5132                }
5133        } else {
5134                MLX5_SET(rqc, rqc, vsd, 1);
5135        }
5136        if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS) {
5137                if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, scatter_fcs))) {
5138                        mlx5_ib_dbg(dev, "Scatter FCS is not supported\n");
5139                        err = -EOPNOTSUPP;
5140                        goto out;
5141                }
5142                MLX5_SET(rqc, rqc, scatter_fcs, 1);
5143        }
5144        if (init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
5145                if (!(dev->ib_dev.attrs.raw_packet_caps &
5146                      IB_RAW_PACKET_CAP_DELAY_DROP)) {
5147                        mlx5_ib_dbg(dev, "Delay drop is not supported\n");
5148                        err = -EOPNOTSUPP;
5149                        goto out;
5150                }
5151                MLX5_SET(rqc, rqc, delay_drop_en, 1);
5152        }
5153        rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
5154        mlx5_ib_populate_pas(rwq->umem, 1UL << rwq->page_shift, rq_pas0, 0);
5155        err = mlx5_core_create_rq_tracked(dev, in, inlen, &rwq->core_qp);
5156        if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
5157                err = set_delay_drop(dev);
5158                if (err) {
5159                        mlx5_ib_warn(dev, "Failed to enable delay drop err=%d\n",
5160                                     err);
5161                        mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
5162                } else {
5163                        rwq->create_flags |= MLX5_IB_WQ_FLAGS_DELAY_DROP;
5164                }
5165        }
5166out:
5167        kvfree(in);
5168        return err;
5169}
5170
5171static int set_user_rq_size(struct mlx5_ib_dev *dev,
5172                            struct ib_wq_init_attr *wq_init_attr,
5173                            struct mlx5_ib_create_wq *ucmd,
5174                            struct mlx5_ib_rwq *rwq)
5175{
5176        /* Sanity check RQ size before proceeding */
5177        if (wq_init_attr->max_wr > (1 << MLX5_CAP_GEN(dev->mdev, log_max_wq_sz)))
5178                return -EINVAL;
5179
5180        if (!ucmd->rq_wqe_count)
5181                return -EINVAL;
5182
5183        rwq->wqe_count = ucmd->rq_wqe_count;
5184        rwq->wqe_shift = ucmd->rq_wqe_shift;
5185        if (check_shl_overflow(rwq->wqe_count, rwq->wqe_shift, &rwq->buf_size))
5186                return -EINVAL;
5187
5188        rwq->log_rq_stride = rwq->wqe_shift;
5189        rwq->log_rq_size = ilog2(rwq->wqe_count);
5190        return 0;
5191}
5192
5193static bool log_of_strides_valid(struct mlx5_ib_dev *dev, u32 log_num_strides)
5194{
5195        if ((log_num_strides > MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) ||
5196            (log_num_strides < MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES))
5197                return false;
5198
5199        if (!MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) &&
5200            (log_num_strides < MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES))
5201                return false;
5202
5203        return true;
5204}
5205
5206static int prepare_user_rq(struct ib_pd *pd,
5207                           struct ib_wq_init_attr *init_attr,
5208                           struct ib_udata *udata,
5209                           struct mlx5_ib_rwq *rwq)
5210{
5211        struct mlx5_ib_dev *dev = to_mdev(pd->device);
5212        struct mlx5_ib_create_wq ucmd = {};
5213        int err;
5214        size_t required_cmd_sz;
5215
5216        required_cmd_sz = offsetofend(struct mlx5_ib_create_wq,
5217                                      single_stride_log_num_of_bytes);
5218        if (udata->inlen < required_cmd_sz) {
5219                mlx5_ib_dbg(dev, "invalid inlen\n");
5220                return -EINVAL;
5221        }
5222
5223        if (udata->inlen > sizeof(ucmd) &&
5224            !ib_is_udata_cleared(udata, sizeof(ucmd),
5225                                 udata->inlen - sizeof(ucmd))) {
5226                mlx5_ib_dbg(dev, "inlen is not supported\n");
5227                return -EOPNOTSUPP;
5228        }
5229
5230        if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
5231                mlx5_ib_dbg(dev, "copy failed\n");
5232                return -EFAULT;
5233        }
5234
5235        if (ucmd.comp_mask & (~MLX5_IB_CREATE_WQ_STRIDING_RQ)) {
5236                mlx5_ib_dbg(dev, "invalid comp mask\n");
5237                return -EOPNOTSUPP;
5238        } else if (ucmd.comp_mask & MLX5_IB_CREATE_WQ_STRIDING_RQ) {
5239                if (!MLX5_CAP_GEN(dev->mdev, striding_rq)) {
5240                        mlx5_ib_dbg(dev, "Striding RQ is not supported\n");
5241                        return -EOPNOTSUPP;
5242                }
5243                if ((ucmd.single_stride_log_num_of_bytes <
5244                    MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES) ||
5245                    (ucmd.single_stride_log_num_of_bytes >
5246                     MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES)) {
5247                        mlx5_ib_dbg(dev, "Invalid log stride size (%u. Range is %u - %u)\n",
5248                                    ucmd.single_stride_log_num_of_bytes,
5249                                    MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES,
5250                                    MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES);
5251                        return -EINVAL;
5252                }
5253                if (!log_of_strides_valid(dev,
5254                                          ucmd.single_wqe_log_num_of_strides)) {
5255                        mlx5_ib_dbg(
5256                                dev,
5257                                "Invalid log num strides (%u. Range is %u - %u)\n",
5258                                ucmd.single_wqe_log_num_of_strides,
5259                                MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) ?
5260                                        MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES :
5261                                        MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES,
5262                                MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES);
5263                        return -EINVAL;
5264                }
5265                rwq->single_stride_log_num_of_bytes =
5266                        ucmd.single_stride_log_num_of_bytes;
5267                rwq->log_num_strides = ucmd.single_wqe_log_num_of_strides;
5268                rwq->two_byte_shift_en = !!ucmd.two_byte_shift_en;
5269                rwq->create_flags |= MLX5_IB_WQ_FLAGS_STRIDING_RQ;
5270        }
5271
5272        err = set_user_rq_size(dev, init_attr, &ucmd, rwq);
5273        if (err) {
5274                mlx5_ib_dbg(dev, "err %d\n", err);
5275                return err;
5276        }
5277
5278        err = create_user_rq(dev, pd, udata, rwq, &ucmd);
5279        if (err) {
5280                mlx5_ib_dbg(dev, "err %d\n", err);
5281                return err;
5282        }
5283
5284        rwq->user_index = ucmd.user_index;
5285        return 0;
5286}
5287
5288struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
5289                                struct ib_wq_init_attr *init_attr,
5290                                struct ib_udata *udata)
5291{
5292        struct mlx5_ib_dev *dev;
5293        struct mlx5_ib_rwq *rwq;
5294        struct mlx5_ib_create_wq_resp resp = {};
5295        size_t min_resp_len;
5296        int err;
5297
5298        if (!udata)
5299                return ERR_PTR(-ENOSYS);
5300
5301        min_resp_len = offsetofend(struct mlx5_ib_create_wq_resp, reserved);
5302        if (udata->outlen && udata->outlen < min_resp_len)
5303                return ERR_PTR(-EINVAL);
5304
5305        if (!capable(CAP_SYS_RAWIO) &&
5306            init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP)
5307                return ERR_PTR(-EPERM);
5308
5309        dev = to_mdev(pd->device);
5310        switch (init_attr->wq_type) {
5311        case IB_WQT_RQ:
5312                rwq = kzalloc(sizeof(*rwq), GFP_KERNEL);
5313                if (!rwq)
5314                        return ERR_PTR(-ENOMEM);
5315                err = prepare_user_rq(pd, init_attr, udata, rwq);
5316                if (err)
5317                        goto err;
5318                err = create_rq(rwq, pd, init_attr);
5319                if (err)
5320                        goto err_user_rq;
5321                break;
5322        default:
5323                mlx5_ib_dbg(dev, "unsupported wq type %d\n",
5324                            init_attr->wq_type);
5325                return ERR_PTR(-EINVAL);
5326        }
5327
5328        rwq->ibwq.wq_num = rwq->core_qp.qpn;
5329        rwq->ibwq.state = IB_WQS_RESET;
5330        if (udata->outlen) {
5331                resp.response_length = offsetofend(
5332                        struct mlx5_ib_create_wq_resp, response_length);
5333                err = ib_copy_to_udata(udata, &resp, resp.response_length);
5334                if (err)
5335                        goto err_copy;
5336        }
5337
5338        rwq->core_qp.event = mlx5_ib_wq_event;
5339        rwq->ibwq.event_handler = init_attr->event_handler;
5340        return &rwq->ibwq;
5341
5342err_copy:
5343        mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
5344err_user_rq:
5345        destroy_user_rq(dev, pd, rwq, udata);
5346err:
5347        kfree(rwq);
5348        return ERR_PTR(err);
5349}
5350
5351int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
5352{
5353        struct mlx5_ib_dev *dev = to_mdev(wq->device);
5354        struct mlx5_ib_rwq *rwq = to_mrwq(wq);
5355        int ret;
5356
5357        ret = mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
5358        if (ret)
5359                return ret;
5360        destroy_user_rq(dev, wq->pd, rwq, udata);
5361        kfree(rwq);
5362        return 0;
5363}
5364
5365int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
5366                                 struct ib_rwq_ind_table_init_attr *init_attr,
5367                                 struct ib_udata *udata)
5368{
5369        struct mlx5_ib_rwq_ind_table *rwq_ind_tbl =
5370                to_mrwq_ind_table(ib_rwq_ind_table);
5371        struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_table->device);
5372        int sz = 1 << init_attr->log_ind_tbl_size;
5373        struct mlx5_ib_create_rwq_ind_tbl_resp resp = {};
5374        size_t min_resp_len;
5375        int inlen;
5376        int err;
5377        int i;
5378        u32 *in;
5379        void *rqtc;
5380
5381        if (udata->inlen > 0 &&
5382            !ib_is_udata_cleared(udata, 0,
5383                                 udata->inlen))
5384                return -EOPNOTSUPP;
5385
5386        if (init_attr->log_ind_tbl_size >
5387            MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) {
5388                mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n",
5389                            init_attr->log_ind_tbl_size,
5390                            MLX5_CAP_GEN(dev->mdev, log_max_rqt_size));
5391                return -EINVAL;
5392        }
5393
5394        min_resp_len =
5395                offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp, reserved);
5396        if (udata->outlen && udata->outlen < min_resp_len)
5397                return -EINVAL;
5398
5399        inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
5400        in = kvzalloc(inlen, GFP_KERNEL);
5401        if (!in)
5402                return -ENOMEM;
5403
5404        rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
5405
5406        MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
5407        MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
5408
5409        for (i = 0; i < sz; i++)
5410                MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num);
5411
5412        rwq_ind_tbl->uid = to_mpd(init_attr->ind_tbl[0]->pd)->uid;
5413        MLX5_SET(create_rqt_in, in, uid, rwq_ind_tbl->uid);
5414
5415        err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn);
5416        kvfree(in);
5417        if (err)
5418                return err;
5419
5420        rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn;
5421        if (udata->outlen) {
5422                resp.response_length =
5423                        offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp,
5424                                    response_length);
5425                err = ib_copy_to_udata(udata, &resp, resp.response_length);
5426                if (err)
5427                        goto err_copy;
5428        }
5429
5430        return 0;
5431
5432err_copy:
5433        mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
5434        return err;
5435}
5436
5437int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
5438{
5439        struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl);
5440        struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device);
5441
5442        return mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
5443}
5444
5445int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
5446                      u32 wq_attr_mask, struct ib_udata *udata)
5447{
5448        struct mlx5_ib_dev *dev = to_mdev(wq->device);
5449        struct mlx5_ib_rwq *rwq = to_mrwq(wq);
5450        struct mlx5_ib_modify_wq ucmd = {};
5451        size_t required_cmd_sz;
5452        int curr_wq_state;
5453        int wq_state;
5454        int inlen;
5455        int err;
5456        void *rqc;
5457        void *in;
5458
5459        required_cmd_sz = offsetofend(struct mlx5_ib_modify_wq, reserved);
5460        if (udata->inlen < required_cmd_sz)
5461                return -EINVAL;
5462
5463        if (udata->inlen > sizeof(ucmd) &&
5464            !ib_is_udata_cleared(udata, sizeof(ucmd),
5465                                 udata->inlen - sizeof(ucmd)))
5466                return -EOPNOTSUPP;
5467
5468        if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)))
5469                return -EFAULT;
5470
5471        if (ucmd.comp_mask || ucmd.reserved)
5472                return -EOPNOTSUPP;
5473
5474        inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
5475        in = kvzalloc(inlen, GFP_KERNEL);
5476        if (!in)
5477                return -ENOMEM;
5478
5479        rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
5480
5481        curr_wq_state = wq_attr->curr_wq_state;
5482        wq_state = wq_attr->wq_state;
5483        if (curr_wq_state == IB_WQS_ERR)
5484                curr_wq_state = MLX5_RQC_STATE_ERR;
5485        if (wq_state == IB_WQS_ERR)
5486                wq_state = MLX5_RQC_STATE_ERR;
5487        MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
5488        MLX5_SET(modify_rq_in, in, uid, to_mpd(wq->pd)->uid);
5489        MLX5_SET(rqc, rqc, state, wq_state);
5490
5491        if (wq_attr_mask & IB_WQ_FLAGS) {
5492                if (wq_attr->flags_mask & IB_WQ_FLAGS_CVLAN_STRIPPING) {
5493                        if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
5494                              MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
5495                                mlx5_ib_dbg(dev, "VLAN offloads are not "
5496                                            "supported\n");
5497                                err = -EOPNOTSUPP;
5498                                goto out;
5499                        }
5500                        MLX5_SET64(modify_rq_in, in, modify_bitmask,
5501                                   MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
5502                        MLX5_SET(rqc, rqc, vsd,
5503                                 (wq_attr->flags & IB_WQ_FLAGS_CVLAN_STRIPPING) ? 0 : 1);
5504                }
5505
5506                if (wq_attr->flags_mask & IB_WQ_FLAGS_PCI_WRITE_END_PADDING) {
5507                        mlx5_ib_dbg(dev, "Modifying scatter end padding is not supported\n");
5508                        err = -EOPNOTSUPP;
5509                        goto out;
5510                }
5511        }
5512
5513        if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) {
5514                u16 set_id;
5515
5516                set_id = mlx5_ib_get_counters_id(dev, 0);
5517                if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
5518                        MLX5_SET64(modify_rq_in, in, modify_bitmask,
5519                                   MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
5520                        MLX5_SET(rqc, rqc, counter_set_id, set_id);
5521                } else
5522                        dev_info_once(
5523                                &dev->ib_dev.dev,
5524                                "Receive WQ counters are not supported on current FW\n");
5525        }
5526
5527        err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in);
5528        if (!err)
5529                rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
5530
5531out:
5532        kvfree(in);
5533        return err;
5534}
5535
5536struct mlx5_ib_drain_cqe {
5537        struct ib_cqe cqe;
5538        struct completion done;
5539};
5540
5541static void mlx5_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
5542{
5543        struct mlx5_ib_drain_cqe *cqe = container_of(wc->wr_cqe,
5544                                                     struct mlx5_ib_drain_cqe,
5545                                                     cqe);
5546
5547        complete(&cqe->done);
5548}
5549
5550/* This function returns only once the drained WR was completed */
5551static void handle_drain_completion(struct ib_cq *cq,
5552                                    struct mlx5_ib_drain_cqe *sdrain,
5553                                    struct mlx5_ib_dev *dev)
5554{
5555        struct mlx5_core_dev *mdev = dev->mdev;
5556
5557        if (cq->poll_ctx == IB_POLL_DIRECT) {
5558                while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0)
5559                        ib_process_cq_direct(cq, -1);
5560                return;
5561        }
5562
5563        if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
5564                struct mlx5_ib_cq *mcq = to_mcq(cq);
5565                bool triggered = false;
5566                unsigned long flags;
5567
5568                spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
5569                /* Make sure that the CQ handler won't run if wasn't run yet */
5570                if (!mcq->mcq.reset_notify_added)
5571                        mcq->mcq.reset_notify_added = 1;
5572                else
5573                        triggered = true;
5574                spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
5575
5576                if (triggered) {
5577                        /* Wait for any scheduled/running task to be ended */
5578                        switch (cq->poll_ctx) {
5579                        case IB_POLL_SOFTIRQ:
5580                                irq_poll_disable(&cq->iop);
5581                                irq_poll_enable(&cq->iop);
5582                                break;
5583                        case IB_POLL_WORKQUEUE:
5584                                cancel_work_sync(&cq->work);
5585                                break;
5586                        default:
5587                                WARN_ON_ONCE(1);
5588                        }
5589                }
5590
5591                /* Run the CQ handler - this makes sure that the drain WR will
5592                 * be processed if wasn't processed yet.
5593                 */
5594                mcq->mcq.comp(&mcq->mcq, NULL);
5595        }
5596
5597        wait_for_completion(&sdrain->done);
5598}
5599
5600void mlx5_ib_drain_sq(struct ib_qp *qp)
5601{
5602        struct ib_cq *cq = qp->send_cq;
5603        struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
5604        struct mlx5_ib_drain_cqe sdrain;
5605        const struct ib_send_wr *bad_swr;
5606        struct ib_rdma_wr swr = {
5607                .wr = {
5608                        .next = NULL,
5609                        { .wr_cqe       = &sdrain.cqe, },
5610                        .opcode = IB_WR_RDMA_WRITE,
5611                },
5612        };
5613        int ret;
5614        struct mlx5_ib_dev *dev = to_mdev(qp->device);
5615        struct mlx5_core_dev *mdev = dev->mdev;
5616
5617        ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
5618        if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
5619                WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
5620                return;
5621        }
5622
5623        sdrain.cqe.done = mlx5_ib_drain_qp_done;
5624        init_completion(&sdrain.done);
5625
5626        ret = mlx5_ib_post_send_drain(qp, &swr.wr, &bad_swr);
5627        if (ret) {
5628                WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
5629                return;
5630        }
5631
5632        handle_drain_completion(cq, &sdrain, dev);
5633}
5634
5635void mlx5_ib_drain_rq(struct ib_qp *qp)
5636{
5637        struct ib_cq *cq = qp->recv_cq;
5638        struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
5639        struct mlx5_ib_drain_cqe rdrain;
5640        struct ib_recv_wr rwr = {};
5641        const struct ib_recv_wr *bad_rwr;
5642        int ret;
5643        struct mlx5_ib_dev *dev = to_mdev(qp->device);
5644        struct mlx5_core_dev *mdev = dev->mdev;
5645
5646        ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
5647        if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
5648                WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
5649                return;
5650        }
5651
5652        rwr.wr_cqe = &rdrain.cqe;
5653        rdrain.cqe.done = mlx5_ib_drain_qp_done;
5654        init_completion(&rdrain.done);
5655
5656        ret = mlx5_ib_post_recv_drain(qp, &rwr, &bad_rwr);
5657        if (ret) {
5658                WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
5659                return;
5660        }
5661
5662        handle_drain_completion(cq, &rdrain, dev);
5663}
5664
5665/*
5666 * Bind a qp to a counter. If @counter is NULL then bind the qp to
5667 * the default counter
5668 */
5669int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter)
5670{
5671        struct mlx5_ib_dev *dev = to_mdev(qp->device);
5672        struct mlx5_ib_qp *mqp = to_mqp(qp);
5673        int err = 0;
5674
5675        mutex_lock(&mqp->mutex);
5676        if (mqp->state == IB_QPS_RESET) {
5677                qp->counter = counter;
5678                goto out;
5679        }
5680
5681        if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id)) {
5682                err = -EOPNOTSUPP;
5683                goto out;
5684        }
5685
5686        if (mqp->state == IB_QPS_RTS) {
5687                err = __mlx5_ib_qp_set_counter(qp, counter);
5688                if (!err)
5689                        qp->counter = counter;
5690
5691                goto out;
5692        }
5693
5694        mqp->counter_pending = 1;
5695        qp->counter = counter;
5696
5697out:
5698        mutex_unlock(&mqp->mutex);
5699        return err;
5700}
5701