linux/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/* Copyright (c) 2019 Mellanox Technologies. */
   3
   4#include <linux/smp.h>
   5#include "dr_types.h"
   6
   7#define QUEUE_SIZE 128
   8#define SIGNAL_PER_DIV_QUEUE 16
   9#define TH_NUMS_TO_DRAIN 2
  10
  11enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
  12
  13struct dr_data_seg {
  14        u64 addr;
  15        u32 length;
  16        u32 lkey;
  17        unsigned int send_flags;
  18};
  19
  20struct postsend_info {
  21        struct dr_data_seg write;
  22        struct dr_data_seg read;
  23        u64 remote_addr;
  24        u32 rkey;
  25};
  26
  27struct dr_qp_rtr_attr {
  28        struct mlx5dr_cmd_gid_attr dgid_attr;
  29        enum ib_mtu mtu;
  30        u32 qp_num;
  31        u16 port_num;
  32        u8 min_rnr_timer;
  33        u8 sgid_index;
  34        u16 udp_src_port;
  35};
  36
  37struct dr_qp_rts_attr {
  38        u8 timeout;
  39        u8 retry_cnt;
  40        u8 rnr_retry;
  41};
  42
  43struct dr_qp_init_attr {
  44        u32 cqn;
  45        u32 pdn;
  46        u32 max_send_wr;
  47        struct mlx5_uars_page *uar;
  48};
  49
  50static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
  51{
  52        unsigned int idx;
  53        u8 opcode;
  54
  55        opcode = get_cqe_opcode(cqe64);
  56        if (opcode == MLX5_CQE_REQ_ERR) {
  57                idx = be16_to_cpu(cqe64->wqe_counter) &
  58                        (dr_cq->qp->sq.wqe_cnt - 1);
  59                dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
  60        } else if (opcode == MLX5_CQE_RESP_ERR) {
  61                ++dr_cq->qp->sq.cc;
  62        } else {
  63                idx = be16_to_cpu(cqe64->wqe_counter) &
  64                        (dr_cq->qp->sq.wqe_cnt - 1);
  65                dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
  66
  67                return CQ_OK;
  68        }
  69
  70        return CQ_POLL_ERR;
  71}
  72
  73static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq)
  74{
  75        struct mlx5_cqe64 *cqe64;
  76        int err;
  77
  78        cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq);
  79        if (!cqe64)
  80                return CQ_EMPTY;
  81
  82        mlx5_cqwq_pop(&dr_cq->wq);
  83        err = dr_parse_cqe(dr_cq, cqe64);
  84        mlx5_cqwq_update_db_record(&dr_cq->wq);
  85
  86        return err;
  87}
  88
  89static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
  90{
  91        int npolled;
  92        int err = 0;
  93
  94        for (npolled = 0; npolled < ne; ++npolled) {
  95                err = dr_cq_poll_one(dr_cq);
  96                if (err != CQ_OK)
  97                        break;
  98        }
  99
 100        return err == CQ_POLL_ERR ? err : npolled;
 101}
 102
 103static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
 104                                         struct dr_qp_init_attr *attr)
 105{
 106        u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
 107        u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
 108        struct mlx5_wq_param wqp;
 109        struct mlx5dr_qp *dr_qp;
 110        int inlen;
 111        void *qpc;
 112        void *in;
 113        int err;
 114
 115        dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL);
 116        if (!dr_qp)
 117                return NULL;
 118
 119        wqp.buf_numa_node = mdev->priv.numa_node;
 120        wqp.db_numa_node = mdev->priv.numa_node;
 121
 122        dr_qp->rq.pc = 0;
 123        dr_qp->rq.cc = 0;
 124        dr_qp->rq.wqe_cnt = 4;
 125        dr_qp->sq.pc = 0;
 126        dr_qp->sq.cc = 0;
 127        dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
 128
 129        MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
 130        MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
 131        MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
 132        err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq,
 133                                &dr_qp->wq_ctrl);
 134        if (err) {
 135                mlx5_core_warn(mdev, "Can't create QP WQ\n");
 136                goto err_wq;
 137        }
 138
 139        dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt,
 140                                     sizeof(dr_qp->sq.wqe_head[0]),
 141                                     GFP_KERNEL);
 142
 143        if (!dr_qp->sq.wqe_head) {
 144                mlx5_core_warn(mdev, "Can't allocate wqe head\n");
 145                goto err_wqe_head;
 146        }
 147
 148        inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
 149                MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
 150                dr_qp->wq_ctrl.buf.npages;
 151        in = kvzalloc(inlen, GFP_KERNEL);
 152        if (!in) {
 153                err = -ENOMEM;
 154                goto err_in;
 155        }
 156
 157        qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 158        MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
 159        MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
 160        MLX5_SET(qpc, qpc, pd, attr->pdn);
 161        MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
 162        MLX5_SET(qpc, qpc, log_page_size,
 163                 dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 164        MLX5_SET(qpc, qpc, fre, 1);
 165        MLX5_SET(qpc, qpc, rlky, 1);
 166        MLX5_SET(qpc, qpc, cqn_snd, attr->cqn);
 167        MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn);
 168        MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
 169        MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
 170        MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
 171        MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
 172        MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
 173        if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
 174                MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
 175        mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf,
 176                                  (__be64 *)MLX5_ADDR_OF(create_qp_in,
 177                                                         in, pas));
 178
 179        MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
 180        err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
 181        dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn);
 182        kvfree(in);
 183        if (err)
 184                goto err_in;
 185        dr_qp->uar = attr->uar;
 186
 187        return dr_qp;
 188
 189err_in:
 190        kfree(dr_qp->sq.wqe_head);
 191err_wqe_head:
 192        mlx5_wq_destroy(&dr_qp->wq_ctrl);
 193err_wq:
 194        kfree(dr_qp);
 195        return NULL;
 196}
 197
 198static void dr_destroy_qp(struct mlx5_core_dev *mdev,
 199                          struct mlx5dr_qp *dr_qp)
 200{
 201        u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
 202
 203        MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
 204        MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn);
 205        mlx5_cmd_exec_in(mdev, destroy_qp, in);
 206
 207        kfree(dr_qp->sq.wqe_head);
 208        mlx5_wq_destroy(&dr_qp->wq_ctrl);
 209        kfree(dr_qp);
 210}
 211
 212static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
 213{
 214        dma_wmb();
 215        *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff);
 216
 217        /* After wmb() the hw aware of new work */
 218        wmb();
 219
 220        mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
 221}
 222
 223static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
 224                             u32 rkey, struct dr_data_seg *data_seg,
 225                             u32 opcode, int nreq)
 226{
 227        struct mlx5_wqe_raddr_seg *wq_raddr;
 228        struct mlx5_wqe_ctrl_seg *wq_ctrl;
 229        struct mlx5_wqe_data_seg *wq_dseg;
 230        unsigned int size;
 231        unsigned int idx;
 232
 233        size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 +
 234                sizeof(*wq_raddr) / 16;
 235
 236        idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
 237
 238        wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
 239        wq_ctrl->imm = 0;
 240        wq_ctrl->fm_ce_se = (data_seg->send_flags) ?
 241                MLX5_WQE_CTRL_CQ_UPDATE : 0;
 242        wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) |
 243                                                opcode);
 244        wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8);
 245        wq_raddr = (void *)(wq_ctrl + 1);
 246        wq_raddr->raddr = cpu_to_be64(remote_addr);
 247        wq_raddr->rkey = cpu_to_be32(rkey);
 248        wq_raddr->reserved = 0;
 249
 250        wq_dseg = (void *)(wq_raddr + 1);
 251        wq_dseg->byte_count = cpu_to_be32(data_seg->length);
 252        wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
 253        wq_dseg->addr = cpu_to_be64(data_seg->addr);
 254
 255        dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++;
 256
 257        if (nreq)
 258                dr_cmd_notify_hw(dr_qp, wq_ctrl);
 259}
 260
 261static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
 262{
 263        dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
 264                         &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0);
 265        dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
 266                         &send_info->read, MLX5_OPCODE_RDMA_READ, 1);
 267}
 268
 269/**
 270 * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent
 271 * with send_list parameters:
 272 *
 273 *     @ste:       The data that attached to this specific ste
 274 *     @size:      of data to write
 275 *     @offset:    of the data from start of the hw_ste entry
 276 *     @data:      data
 277 *     @ste_info:  ste to be sent with send_list
 278 *     @send_list: to append into it
 279 *     @copy_data: if true indicates that the data should be kept because
 280 *                 it's not backuped any where (like in re-hash).
 281 *                 if false, it lets the data to be updated after
 282 *                 it was added to the list.
 283 */
 284void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
 285                                               u16 offset, u8 *data,
 286                                               struct mlx5dr_ste_send_info *ste_info,
 287                                               struct list_head *send_list,
 288                                               bool copy_data)
 289{
 290        ste_info->size = size;
 291        ste_info->ste = ste;
 292        ste_info->offset = offset;
 293
 294        if (copy_data) {
 295                memcpy(ste_info->data_cont, data, size);
 296                ste_info->data = ste_info->data_cont;
 297        } else {
 298                ste_info->data = data;
 299        }
 300
 301        list_add_tail(&ste_info->send_list, send_list);
 302}
 303
 304/* The function tries to consume one wc each time, unless the queue is full, in
 305 * that case, which means that the hw is behind the sw in a full queue len
 306 * the function will drain the cq till it empty.
 307 */
 308static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
 309                                struct mlx5dr_send_ring *send_ring)
 310{
 311        bool is_drain = false;
 312        int ne;
 313
 314        if (send_ring->pending_wqe < send_ring->signal_th)
 315                return 0;
 316
 317        /* Queue is full start drain it */
 318        if (send_ring->pending_wqe >=
 319            dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN)
 320                is_drain = true;
 321
 322        do {
 323                ne = dr_poll_cq(send_ring->cq, 1);
 324                if (ne < 0)
 325                        return ne;
 326                else if (ne == 1)
 327                        send_ring->pending_wqe -= send_ring->signal_th;
 328        } while (is_drain && send_ring->pending_wqe);
 329
 330        return 0;
 331}
 332
 333static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
 334                              struct postsend_info *send_info)
 335{
 336        send_ring->pending_wqe++;
 337
 338        if (send_ring->pending_wqe % send_ring->signal_th == 0)
 339                send_info->write.send_flags |= IB_SEND_SIGNALED;
 340
 341        send_ring->pending_wqe++;
 342        send_info->read.length = send_info->write.length;
 343        /* Read into the same write area */
 344        send_info->read.addr = (uintptr_t)send_info->write.addr;
 345        send_info->read.lkey = send_ring->mr->mkey.key;
 346
 347        if (send_ring->pending_wqe % send_ring->signal_th == 0)
 348                send_info->read.send_flags = IB_SEND_SIGNALED;
 349        else
 350                send_info->read.send_flags = 0;
 351}
 352
 353static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
 354                                struct postsend_info *send_info)
 355{
 356        struct mlx5dr_send_ring *send_ring = dmn->send_ring;
 357        u32 buff_offset;
 358        int ret;
 359
 360        spin_lock(&send_ring->lock);
 361
 362        ret = dr_handle_pending_wc(dmn, send_ring);
 363        if (ret)
 364                goto out_unlock;
 365
 366        if (send_info->write.length > dmn->info.max_inline_size) {
 367                buff_offset = (send_ring->tx_head &
 368                               (dmn->send_ring->signal_th - 1)) *
 369                        send_ring->max_post_send_size;
 370                /* Copy to ring mr */
 371                memcpy(send_ring->buf + buff_offset,
 372                       (void *)(uintptr_t)send_info->write.addr,
 373                       send_info->write.length);
 374                send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
 375                send_info->write.lkey = send_ring->mr->mkey.key;
 376        }
 377
 378        send_ring->tx_head++;
 379        dr_fill_data_segs(send_ring, send_info);
 380        dr_post_send(send_ring->qp, send_info);
 381
 382out_unlock:
 383        spin_unlock(&send_ring->lock);
 384        return ret;
 385}
 386
 387static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
 388                                   struct mlx5dr_ste_htbl *htbl,
 389                                   u8 **data,
 390                                   u32 *byte_size,
 391                                   int *iterations,
 392                                   int *num_stes)
 393{
 394        int alloc_size;
 395
 396        if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) {
 397                *iterations = htbl->chunk->byte_size /
 398                        dmn->send_ring->max_post_send_size;
 399                *byte_size = dmn->send_ring->max_post_send_size;
 400                alloc_size = *byte_size;
 401                *num_stes = *byte_size / DR_STE_SIZE;
 402        } else {
 403                *iterations = 1;
 404                *num_stes = htbl->chunk->num_of_entries;
 405                alloc_size = *num_stes * DR_STE_SIZE;
 406        }
 407
 408        *data = kzalloc(alloc_size, GFP_KERNEL);
 409        if (!*data)
 410                return -ENOMEM;
 411
 412        return 0;
 413}
 414
 415/**
 416 * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm.
 417 *
 418 *     @dmn:    Domain
 419 *     @ste:    The ste struct that contains the data (at
 420 *              least part of it)
 421 *     @data:   The real data to send size data
 422 *     @size:   for writing.
 423 *     @offset: The offset from the icm mapped data to
 424 *              start write to this for write only part of the
 425 *              buffer.
 426 *
 427 * Return: 0 on success.
 428 */
 429int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
 430                             u8 *data, u16 size, u16 offset)
 431{
 432        struct postsend_info send_info = {};
 433
 434        send_info.write.addr = (uintptr_t)data;
 435        send_info.write.length = size;
 436        send_info.write.lkey = 0;
 437        send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
 438        send_info.rkey = ste->htbl->chunk->rkey;
 439
 440        return dr_postsend_icm_data(dmn, &send_info);
 441}
 442
 443int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
 444                              struct mlx5dr_ste_htbl *htbl,
 445                              u8 *formatted_ste, u8 *mask)
 446{
 447        u32 byte_size = htbl->chunk->byte_size;
 448        int num_stes_per_iter;
 449        int iterations;
 450        u8 *data;
 451        int ret;
 452        int i;
 453        int j;
 454
 455        ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
 456                                      &iterations, &num_stes_per_iter);
 457        if (ret)
 458                return ret;
 459
 460        /* Send the data iteration times */
 461        for (i = 0; i < iterations; i++) {
 462                u32 ste_index = i * (byte_size / DR_STE_SIZE);
 463                struct postsend_info send_info = {};
 464
 465                /* Copy all ste's on the data buffer
 466                 * need to add the bit_mask
 467                 */
 468                for (j = 0; j < num_stes_per_iter; j++) {
 469                        u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste;
 470                        u32 ste_off = j * DR_STE_SIZE;
 471
 472                        if (mlx5dr_ste_is_not_valid_entry(hw_ste)) {
 473                                memcpy(data + ste_off,
 474                                       formatted_ste, DR_STE_SIZE);
 475                        } else {
 476                                /* Copy data */
 477                                memcpy(data + ste_off,
 478                                       htbl->ste_arr[ste_index + j].hw_ste,
 479                                       DR_STE_SIZE_REDUCED);
 480                                /* Copy bit_mask */
 481                                memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
 482                                       mask, DR_STE_SIZE_MASK);
 483                        }
 484                }
 485
 486                send_info.write.addr = (uintptr_t)data;
 487                send_info.write.length = byte_size;
 488                send_info.write.lkey = 0;
 489                send_info.remote_addr =
 490                        mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
 491                send_info.rkey = htbl->chunk->rkey;
 492
 493                ret = dr_postsend_icm_data(dmn, &send_info);
 494                if (ret)
 495                        goto out_free;
 496        }
 497
 498out_free:
 499        kfree(data);
 500        return ret;
 501}
 502
 503/* Initialize htble with default STEs */
 504int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
 505                                        struct mlx5dr_ste_htbl *htbl,
 506                                        u8 *ste_init_data,
 507                                        bool update_hw_ste)
 508{
 509        u32 byte_size = htbl->chunk->byte_size;
 510        int iterations;
 511        int num_stes;
 512        u8 *data;
 513        int ret;
 514        int i;
 515
 516        ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
 517                                      &iterations, &num_stes);
 518        if (ret)
 519                return ret;
 520
 521        for (i = 0; i < num_stes; i++) {
 522                u8 *copy_dst;
 523
 524                /* Copy the same ste on the data buffer */
 525                copy_dst = data + i * DR_STE_SIZE;
 526                memcpy(copy_dst, ste_init_data, DR_STE_SIZE);
 527
 528                if (update_hw_ste) {
 529                        /* Copy the reduced ste to hash table ste_arr */
 530                        copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
 531                        memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
 532                }
 533        }
 534
 535        /* Send the data iteration times */
 536        for (i = 0; i < iterations; i++) {
 537                u8 ste_index = i * (byte_size / DR_STE_SIZE);
 538                struct postsend_info send_info = {};
 539
 540                send_info.write.addr = (uintptr_t)data;
 541                send_info.write.length = byte_size;
 542                send_info.write.lkey = 0;
 543                send_info.remote_addr =
 544                        mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
 545                send_info.rkey = htbl->chunk->rkey;
 546
 547                ret = dr_postsend_icm_data(dmn, &send_info);
 548                if (ret)
 549                        goto out_free;
 550        }
 551
 552out_free:
 553        kfree(data);
 554        return ret;
 555}
 556
 557int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
 558                                struct mlx5dr_action *action)
 559{
 560        struct postsend_info send_info = {};
 561        int ret;
 562
 563        send_info.write.addr = (uintptr_t)action->rewrite.data;
 564        send_info.write.length = action->rewrite.num_of_actions *
 565                                 DR_MODIFY_ACTION_SIZE;
 566        send_info.write.lkey = 0;
 567        send_info.remote_addr = action->rewrite.chunk->mr_addr;
 568        send_info.rkey = action->rewrite.chunk->rkey;
 569
 570        ret = dr_postsend_icm_data(dmn, &send_info);
 571
 572        return ret;
 573}
 574
 575static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
 576                                 struct mlx5dr_qp *dr_qp,
 577                                 int port)
 578{
 579        u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
 580        void *qpc;
 581
 582        qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
 583
 584        MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port);
 585        MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
 586        MLX5_SET(qpc, qpc, rre, 1);
 587        MLX5_SET(qpc, qpc, rwe, 1);
 588
 589        MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
 590        MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn);
 591
 592        return mlx5_cmd_exec_in(mdev, rst2init_qp, in);
 593}
 594
 595static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
 596                                    struct mlx5dr_qp *dr_qp,
 597                                    struct dr_qp_rts_attr *attr)
 598{
 599        u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
 600        void *qpc;
 601
 602        qpc  = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
 603
 604        MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
 605
 606        MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
 607        MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
 608
 609        MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
 610        MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
 611
 612        return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
 613}
 614
 615static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
 616                                     struct mlx5dr_qp *dr_qp,
 617                                     struct dr_qp_rtr_attr *attr)
 618{
 619        u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
 620        void *qpc;
 621
 622        qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
 623
 624        MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
 625
 626        MLX5_SET(qpc, qpc, mtu, attr->mtu);
 627        MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
 628        MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num);
 629        memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
 630               attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac));
 631        memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
 632               attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid));
 633        MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
 634                 attr->sgid_index);
 635
 636        if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2)
 637                MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
 638                         attr->udp_src_port);
 639
 640        MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
 641        MLX5_SET(qpc, qpc, min_rnr_nak, 1);
 642
 643        MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
 644        MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
 645
 646        return mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
 647}
 648
 649static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
 650{
 651        struct mlx5dr_qp *dr_qp = dmn->send_ring->qp;
 652        struct dr_qp_rts_attr rts_attr = {};
 653        struct dr_qp_rtr_attr rtr_attr = {};
 654        enum ib_mtu mtu = IB_MTU_1024;
 655        u16 gid_index = 0;
 656        int port = 1;
 657        int ret;
 658
 659        /* Init */
 660        ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port);
 661        if (ret) {
 662                mlx5dr_err(dmn, "Failed modify QP rst2init\n");
 663                return ret;
 664        }
 665
 666        /* RTR */
 667        ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr);
 668        if (ret)
 669                return ret;
 670
 671        rtr_attr.mtu            = mtu;
 672        rtr_attr.qp_num         = dr_qp->qpn;
 673        rtr_attr.min_rnr_timer  = 12;
 674        rtr_attr.port_num       = port;
 675        rtr_attr.sgid_index     = gid_index;
 676        rtr_attr.udp_src_port   = dmn->info.caps.roce_min_src_udp;
 677
 678        ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
 679        if (ret) {
 680                mlx5dr_err(dmn, "Failed modify QP init2rtr\n");
 681                return ret;
 682        }
 683
 684        /* RTS */
 685        rts_attr.timeout        = 14;
 686        rts_attr.retry_cnt      = 7;
 687        rts_attr.rnr_retry      = 7;
 688
 689        ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr);
 690        if (ret) {
 691                mlx5dr_err(dmn, "Failed modify QP rtr2rts\n");
 692                return ret;
 693        }
 694
 695        return 0;
 696}
 697
 698static void dr_cq_complete(struct mlx5_core_cq *mcq,
 699                           struct mlx5_eqe *eqe)
 700{
 701        pr_err("CQ completion CQ: #%u\n", mcq->cqn);
 702}
 703
 704static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
 705                                      struct mlx5_uars_page *uar,
 706                                      size_t ncqe)
 707{
 708        u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {};
 709        u32 out[MLX5_ST_SZ_DW(create_cq_out)];
 710        struct mlx5_wq_param wqp;
 711        struct mlx5_cqe64 *cqe;
 712        struct mlx5dr_cq *cq;
 713        int inlen, err, eqn;
 714        unsigned int irqn;
 715        void *cqc, *in;
 716        __be64 *pas;
 717        int vector;
 718        u32 i;
 719
 720        cq = kzalloc(sizeof(*cq), GFP_KERNEL);
 721        if (!cq)
 722                return NULL;
 723
 724        ncqe = roundup_pow_of_two(ncqe);
 725        MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe));
 726
 727        wqp.buf_numa_node = mdev->priv.numa_node;
 728        wqp.db_numa_node = mdev->priv.numa_node;
 729
 730        err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq,
 731                               &cq->wq_ctrl);
 732        if (err)
 733                goto out;
 734
 735        for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
 736                cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
 737                cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
 738        }
 739
 740        inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 741                sizeof(u64) * cq->wq_ctrl.buf.npages;
 742        in = kvzalloc(inlen, GFP_KERNEL);
 743        if (!in)
 744                goto err_cqwq;
 745
 746        vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev);
 747        err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn);
 748        if (err) {
 749                kvfree(in);
 750                goto err_cqwq;
 751        }
 752
 753        cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
 754        MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
 755        MLX5_SET(cqc, cqc, c_eqn, eqn);
 756        MLX5_SET(cqc, cqc, uar_page, uar->index);
 757        MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
 758                 MLX5_ADAPTER_PAGE_SHIFT);
 759        MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
 760
 761        pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
 762        mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
 763
 764        cq->mcq.comp  = dr_cq_complete;
 765
 766        err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
 767        kvfree(in);
 768
 769        if (err)
 770                goto err_cqwq;
 771
 772        cq->mcq.cqe_sz = 64;
 773        cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
 774        cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
 775        *cq->mcq.set_ci_db = 0;
 776
 777        /* set no-zero value, in order to avoid the HW to run db-recovery on
 778         * CQ that used in polling mode.
 779         */
 780        *cq->mcq.arm_db = cpu_to_be32(2 << 28);
 781
 782        cq->mcq.vector = 0;
 783        cq->mcq.irqn = irqn;
 784        cq->mcq.uar = uar;
 785
 786        return cq;
 787
 788err_cqwq:
 789        mlx5_wq_destroy(&cq->wq_ctrl);
 790out:
 791        kfree(cq);
 792        return NULL;
 793}
 794
 795static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
 796{
 797        mlx5_core_destroy_cq(mdev, &cq->mcq);
 798        mlx5_wq_destroy(&cq->wq_ctrl);
 799        kfree(cq);
 800}
 801
 802static int
 803dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey)
 804{
 805        u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
 806        void *mkc;
 807
 808        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 809        MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
 810        MLX5_SET(mkc, mkc, a, 1);
 811        MLX5_SET(mkc, mkc, rw, 1);
 812        MLX5_SET(mkc, mkc, rr, 1);
 813        MLX5_SET(mkc, mkc, lw, 1);
 814        MLX5_SET(mkc, mkc, lr, 1);
 815
 816        MLX5_SET(mkc, mkc, pd, pdn);
 817        MLX5_SET(mkc, mkc, length64, 1);
 818        MLX5_SET(mkc, mkc, qpn, 0xffffff);
 819
 820        return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in));
 821}
 822
 823static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
 824                                   u32 pdn, void *buf, size_t size)
 825{
 826        struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 827        struct device *dma_device;
 828        dma_addr_t dma_addr;
 829        int err;
 830
 831        if (!mr)
 832                return NULL;
 833
 834        dma_device = &mdev->pdev->dev;
 835        dma_addr = dma_map_single(dma_device, buf, size,
 836                                  DMA_BIDIRECTIONAL);
 837        err = dma_mapping_error(dma_device, dma_addr);
 838        if (err) {
 839                mlx5_core_warn(mdev, "Can't dma buf\n");
 840                kfree(mr);
 841                return NULL;
 842        }
 843
 844        err = dr_create_mkey(mdev, pdn, &mr->mkey);
 845        if (err) {
 846                mlx5_core_warn(mdev, "Can't create mkey\n");
 847                dma_unmap_single(dma_device, dma_addr, size,
 848                                 DMA_BIDIRECTIONAL);
 849                kfree(mr);
 850                return NULL;
 851        }
 852
 853        mr->dma_addr = dma_addr;
 854        mr->size = size;
 855        mr->addr = buf;
 856
 857        return mr;
 858}
 859
 860static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
 861{
 862        mlx5_core_destroy_mkey(mdev, &mr->mkey);
 863        dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size,
 864                         DMA_BIDIRECTIONAL);
 865        kfree(mr);
 866}
 867
 868int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
 869{
 870        struct dr_qp_init_attr init_attr = {};
 871        int cq_size;
 872        int size;
 873        int ret;
 874
 875        dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL);
 876        if (!dmn->send_ring)
 877                return -ENOMEM;
 878
 879        cq_size = QUEUE_SIZE + 1;
 880        dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size);
 881        if (!dmn->send_ring->cq) {
 882                mlx5dr_err(dmn, "Failed creating CQ\n");
 883                ret = -ENOMEM;
 884                goto free_send_ring;
 885        }
 886
 887        init_attr.cqn = dmn->send_ring->cq->mcq.cqn;
 888        init_attr.pdn = dmn->pdn;
 889        init_attr.uar = dmn->uar;
 890        init_attr.max_send_wr = QUEUE_SIZE;
 891        spin_lock_init(&dmn->send_ring->lock);
 892
 893        dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
 894        if (!dmn->send_ring->qp)  {
 895                mlx5dr_err(dmn, "Failed creating QP\n");
 896                ret = -ENOMEM;
 897                goto clean_cq;
 898        }
 899
 900        dmn->send_ring->cq->qp = dmn->send_ring->qp;
 901
 902        dmn->info.max_send_wr = QUEUE_SIZE;
 903        dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
 904                                        DR_STE_SIZE);
 905
 906        dmn->send_ring->signal_th = dmn->info.max_send_wr /
 907                SIGNAL_PER_DIV_QUEUE;
 908
 909        /* Prepare qp to be used */
 910        ret = dr_prepare_qp_to_rts(dmn);
 911        if (ret)
 912                goto clean_qp;
 913
 914        dmn->send_ring->max_post_send_size =
 915                mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K,
 916                                                   DR_ICM_TYPE_STE);
 917
 918        /* Allocating the max size as a buffer for writing */
 919        size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size;
 920        dmn->send_ring->buf = kzalloc(size, GFP_KERNEL);
 921        if (!dmn->send_ring->buf) {
 922                ret = -ENOMEM;
 923                goto clean_qp;
 924        }
 925
 926        dmn->send_ring->buf_size = size;
 927
 928        dmn->send_ring->mr = dr_reg_mr(dmn->mdev,
 929                                       dmn->pdn, dmn->send_ring->buf, size);
 930        if (!dmn->send_ring->mr) {
 931                ret = -ENOMEM;
 932                goto free_mem;
 933        }
 934
 935        dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
 936                                            dmn->pdn, dmn->send_ring->sync_buff,
 937                                            MIN_READ_SYNC);
 938        if (!dmn->send_ring->sync_mr) {
 939                ret = -ENOMEM;
 940                goto clean_mr;
 941        }
 942
 943        return 0;
 944
 945clean_mr:
 946        dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
 947free_mem:
 948        kfree(dmn->send_ring->buf);
 949clean_qp:
 950        dr_destroy_qp(dmn->mdev, dmn->send_ring->qp);
 951clean_cq:
 952        dr_destroy_cq(dmn->mdev, dmn->send_ring->cq);
 953free_send_ring:
 954        kfree(dmn->send_ring);
 955
 956        return ret;
 957}
 958
 959void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
 960                           struct mlx5dr_send_ring *send_ring)
 961{
 962        dr_destroy_qp(dmn->mdev, send_ring->qp);
 963        dr_destroy_cq(dmn->mdev, send_ring->cq);
 964        dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
 965        dr_dereg_mr(dmn->mdev, send_ring->mr);
 966        kfree(send_ring->buf);
 967        kfree(send_ring);
 968}
 969
 970int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
 971{
 972        struct mlx5dr_send_ring *send_ring = dmn->send_ring;
 973        struct postsend_info send_info = {};
 974        u8 data[DR_STE_SIZE];
 975        int num_of_sends_req;
 976        int ret;
 977        int i;
 978
 979        /* Sending this amount of requests makes sure we will get drain */
 980        num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
 981
 982        /* Send fake requests forcing the last to be signaled */
 983        send_info.write.addr = (uintptr_t)data;
 984        send_info.write.length = DR_STE_SIZE;
 985        send_info.write.lkey = 0;
 986        /* Using the sync_mr in order to write/read */
 987        send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
 988        send_info.rkey = send_ring->sync_mr->mkey.key;
 989
 990        for (i = 0; i < num_of_sends_req; i++) {
 991                ret = dr_postsend_icm_data(dmn, &send_info);
 992                if (ret)
 993                        return ret;
 994        }
 995
 996        spin_lock(&send_ring->lock);
 997        ret = dr_handle_pending_wc(dmn, send_ring);
 998        spin_unlock(&send_ring->lock);
 999
1000        return ret;
1001}
1002