linux/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/* Copyright (c) 2019 Mellanox Technologies. */
   3
   4#include <linux/smp.h>
   5#include "dr_types.h"
   6
   7#define QUEUE_SIZE 128
   8#define SIGNAL_PER_DIV_QUEUE 16
   9#define TH_NUMS_TO_DRAIN 2
  10
  11enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
  12
  13struct dr_data_seg {
  14        u64 addr;
  15        u32 length;
  16        u32 lkey;
  17        unsigned int send_flags;
  18};
  19
  20struct postsend_info {
  21        struct dr_data_seg write;
  22        struct dr_data_seg read;
  23        u64 remote_addr;
  24        u32 rkey;
  25};
  26
  27struct dr_qp_rtr_attr {
  28        struct mlx5dr_cmd_gid_attr dgid_attr;
  29        enum ib_mtu mtu;
  30        u32 qp_num;
  31        u16 port_num;
  32        u8 min_rnr_timer;
  33        u8 sgid_index;
  34        u16 udp_src_port;
  35};
  36
  37struct dr_qp_rts_attr {
  38        u8 timeout;
  39        u8 retry_cnt;
  40        u8 rnr_retry;
  41};
  42
  43struct dr_qp_init_attr {
  44        u32 cqn;
  45        u32 pdn;
  46        u32 max_send_wr;
  47        struct mlx5_uars_page *uar;
  48};
  49
  50static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
  51{
  52        unsigned int idx;
  53        u8 opcode;
  54
  55        opcode = get_cqe_opcode(cqe64);
  56        if (opcode == MLX5_CQE_REQ_ERR) {
  57                idx = be16_to_cpu(cqe64->wqe_counter) &
  58                        (dr_cq->qp->sq.wqe_cnt - 1);
  59                dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
  60        } else if (opcode == MLX5_CQE_RESP_ERR) {
  61                ++dr_cq->qp->sq.cc;
  62        } else {
  63                idx = be16_to_cpu(cqe64->wqe_counter) &
  64                        (dr_cq->qp->sq.wqe_cnt - 1);
  65                dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
  66
  67                return CQ_OK;
  68        }
  69
  70        return CQ_POLL_ERR;
  71}
  72
  73static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq)
  74{
  75        struct mlx5_cqe64 *cqe64;
  76        int err;
  77
  78        cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq);
  79        if (!cqe64)
  80                return CQ_EMPTY;
  81
  82        mlx5_cqwq_pop(&dr_cq->wq);
  83        err = dr_parse_cqe(dr_cq, cqe64);
  84        mlx5_cqwq_update_db_record(&dr_cq->wq);
  85
  86        return err;
  87}
  88
  89static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
  90{
  91        int npolled;
  92        int err = 0;
  93
  94        for (npolled = 0; npolled < ne; ++npolled) {
  95                err = dr_cq_poll_one(dr_cq);
  96                if (err != CQ_OK)
  97                        break;
  98        }
  99
 100        return err == CQ_POLL_ERR ? err : npolled;
 101}
 102
 103static void dr_qp_event(struct mlx5_core_qp *mqp, int event)
 104{
 105        pr_info("DR QP event %u on QP #%u\n", event, mqp->qpn);
 106}
 107
 108static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
 109                                         struct dr_qp_init_attr *attr)
 110{
 111        u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
 112        struct mlx5_wq_param wqp;
 113        struct mlx5dr_qp *dr_qp;
 114        int inlen;
 115        void *qpc;
 116        void *in;
 117        int err;
 118
 119        dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL);
 120        if (!dr_qp)
 121                return NULL;
 122
 123        wqp.buf_numa_node = mdev->priv.numa_node;
 124        wqp.db_numa_node = mdev->priv.numa_node;
 125
 126        dr_qp->rq.pc = 0;
 127        dr_qp->rq.cc = 0;
 128        dr_qp->rq.wqe_cnt = 4;
 129        dr_qp->sq.pc = 0;
 130        dr_qp->sq.cc = 0;
 131        dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
 132
 133        MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
 134        MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
 135        MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
 136        err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq,
 137                                &dr_qp->wq_ctrl);
 138        if (err) {
 139                mlx5_core_info(mdev, "Can't create QP WQ\n");
 140                goto err_wq;
 141        }
 142
 143        dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt,
 144                                     sizeof(dr_qp->sq.wqe_head[0]),
 145                                     GFP_KERNEL);
 146
 147        if (!dr_qp->sq.wqe_head) {
 148                mlx5_core_warn(mdev, "Can't allocate wqe head\n");
 149                goto err_wqe_head;
 150        }
 151
 152        inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
 153                MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
 154                dr_qp->wq_ctrl.buf.npages;
 155        in = kvzalloc(inlen, GFP_KERNEL);
 156        if (!in) {
 157                err = -ENOMEM;
 158                goto err_in;
 159        }
 160
 161        qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 162        MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
 163        MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
 164        MLX5_SET(qpc, qpc, pd, attr->pdn);
 165        MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
 166        MLX5_SET(qpc, qpc, log_page_size,
 167                 dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 168        MLX5_SET(qpc, qpc, fre, 1);
 169        MLX5_SET(qpc, qpc, rlky, 1);
 170        MLX5_SET(qpc, qpc, cqn_snd, attr->cqn);
 171        MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn);
 172        MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
 173        MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
 174        MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
 175        MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
 176        MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
 177        if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
 178                MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
 179        mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf,
 180                                  (__be64 *)MLX5_ADDR_OF(create_qp_in,
 181                                                         in, pas));
 182
 183        err = mlx5_core_create_qp(mdev, &dr_qp->mqp, in, inlen);
 184        kfree(in);
 185
 186        if (err) {
 187                mlx5_core_warn(mdev, " Can't create QP\n");
 188                goto err_in;
 189        }
 190        dr_qp->mqp.event = dr_qp_event;
 191        dr_qp->uar = attr->uar;
 192
 193        return dr_qp;
 194
 195err_in:
 196        kfree(dr_qp->sq.wqe_head);
 197err_wqe_head:
 198        mlx5_wq_destroy(&dr_qp->wq_ctrl);
 199err_wq:
 200        kfree(dr_qp);
 201        return NULL;
 202}
 203
 204static void dr_destroy_qp(struct mlx5_core_dev *mdev,
 205                          struct mlx5dr_qp *dr_qp)
 206{
 207        mlx5_core_destroy_qp(mdev, &dr_qp->mqp);
 208        kfree(dr_qp->sq.wqe_head);
 209        mlx5_wq_destroy(&dr_qp->wq_ctrl);
 210        kfree(dr_qp);
 211}
 212
 213static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
 214{
 215        dma_wmb();
 216        *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff);
 217
 218        /* After wmb() the hw aware of new work */
 219        wmb();
 220
 221        mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
 222}
 223
 224static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
 225                             u32 rkey, struct dr_data_seg *data_seg,
 226                             u32 opcode, int nreq)
 227{
 228        struct mlx5_wqe_raddr_seg *wq_raddr;
 229        struct mlx5_wqe_ctrl_seg *wq_ctrl;
 230        struct mlx5_wqe_data_seg *wq_dseg;
 231        unsigned int size;
 232        unsigned int idx;
 233
 234        size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 +
 235                sizeof(*wq_raddr) / 16;
 236
 237        idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
 238
 239        wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
 240        wq_ctrl->imm = 0;
 241        wq_ctrl->fm_ce_se = (data_seg->send_flags) ?
 242                MLX5_WQE_CTRL_CQ_UPDATE : 0;
 243        wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) |
 244                                                opcode);
 245        wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->mqp.qpn << 8);
 246        wq_raddr = (void *)(wq_ctrl + 1);
 247        wq_raddr->raddr = cpu_to_be64(remote_addr);
 248        wq_raddr->rkey = cpu_to_be32(rkey);
 249        wq_raddr->reserved = 0;
 250
 251        wq_dseg = (void *)(wq_raddr + 1);
 252        wq_dseg->byte_count = cpu_to_be32(data_seg->length);
 253        wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
 254        wq_dseg->addr = cpu_to_be64(data_seg->addr);
 255
 256        dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++;
 257
 258        if (nreq)
 259                dr_cmd_notify_hw(dr_qp, wq_ctrl);
 260}
 261
 262static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
 263{
 264        dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
 265                         &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0);
 266        dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
 267                         &send_info->read, MLX5_OPCODE_RDMA_READ, 1);
 268}
 269
 270/**
 271 * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent
 272 * with send_list parameters:
 273 *
 274 *     @ste:       The data that attached to this specific ste
 275 *     @size:      of data to write
 276 *     @offset:    of the data from start of the hw_ste entry
 277 *     @data:      data
 278 *     @ste_info:  ste to be sent with send_list
 279 *     @send_list: to append into it
 280 *     @copy_data: if true indicates that the data should be kept because
 281 *                 it's not backuped any where (like in re-hash).
 282 *                 if false, it lets the data to be updated after
 283 *                 it was added to the list.
 284 */
 285void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
 286                                               u16 offset, u8 *data,
 287                                               struct mlx5dr_ste_send_info *ste_info,
 288                                               struct list_head *send_list,
 289                                               bool copy_data)
 290{
 291        ste_info->size = size;
 292        ste_info->ste = ste;
 293        ste_info->offset = offset;
 294
 295        if (copy_data) {
 296                memcpy(ste_info->data_cont, data, size);
 297                ste_info->data = ste_info->data_cont;
 298        } else {
 299                ste_info->data = data;
 300        }
 301
 302        list_add_tail(&ste_info->send_list, send_list);
 303}
 304
 305/* The function tries to consume one wc each time, unless the queue is full, in
 306 * that case, which means that the hw is behind the sw in a full queue len
 307 * the function will drain the cq till it empty.
 308 */
 309static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
 310                                struct mlx5dr_send_ring *send_ring)
 311{
 312        bool is_drain = false;
 313        int ne;
 314
 315        if (send_ring->pending_wqe < send_ring->signal_th)
 316                return 0;
 317
 318        /* Queue is full start drain it */
 319        if (send_ring->pending_wqe >=
 320            dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN)
 321                is_drain = true;
 322
 323        do {
 324                ne = dr_poll_cq(send_ring->cq, 1);
 325                if (ne < 0)
 326                        return ne;
 327                else if (ne == 1)
 328                        send_ring->pending_wqe -= send_ring->signal_th;
 329        } while (is_drain && send_ring->pending_wqe);
 330
 331        return 0;
 332}
 333
 334static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
 335                              struct postsend_info *send_info)
 336{
 337        send_ring->pending_wqe++;
 338
 339        if (send_ring->pending_wqe % send_ring->signal_th == 0)
 340                send_info->write.send_flags |= IB_SEND_SIGNALED;
 341
 342        send_ring->pending_wqe++;
 343        send_info->read.length = send_info->write.length;
 344        /* Read into the same write area */
 345        send_info->read.addr = (uintptr_t)send_info->write.addr;
 346        send_info->read.lkey = send_ring->mr->mkey.key;
 347
 348        if (send_ring->pending_wqe % send_ring->signal_th == 0)
 349                send_info->read.send_flags = IB_SEND_SIGNALED;
 350        else
 351                send_info->read.send_flags = 0;
 352}
 353
 354static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
 355                                struct postsend_info *send_info)
 356{
 357        struct mlx5dr_send_ring *send_ring = dmn->send_ring;
 358        u32 buff_offset;
 359        int ret;
 360
 361        ret = dr_handle_pending_wc(dmn, send_ring);
 362        if (ret)
 363                return ret;
 364
 365        if (send_info->write.length > dmn->info.max_inline_size) {
 366                buff_offset = (send_ring->tx_head &
 367                               (dmn->send_ring->signal_th - 1)) *
 368                        send_ring->max_post_send_size;
 369                /* Copy to ring mr */
 370                memcpy(send_ring->buf + buff_offset,
 371                       (void *)(uintptr_t)send_info->write.addr,
 372                       send_info->write.length);
 373                send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
 374                send_info->write.lkey = send_ring->mr->mkey.key;
 375        }
 376
 377        send_ring->tx_head++;
 378        dr_fill_data_segs(send_ring, send_info);
 379        dr_post_send(send_ring->qp, send_info);
 380
 381        return 0;
 382}
 383
 384static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
 385                                   struct mlx5dr_ste_htbl *htbl,
 386                                   u8 **data,
 387                                   u32 *byte_size,
 388                                   int *iterations,
 389                                   int *num_stes)
 390{
 391        int alloc_size;
 392
 393        if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) {
 394                *iterations = htbl->chunk->byte_size /
 395                        dmn->send_ring->max_post_send_size;
 396                *byte_size = dmn->send_ring->max_post_send_size;
 397                alloc_size = *byte_size;
 398                *num_stes = *byte_size / DR_STE_SIZE;
 399        } else {
 400                *iterations = 1;
 401                *num_stes = htbl->chunk->num_of_entries;
 402                alloc_size = *num_stes * DR_STE_SIZE;
 403        }
 404
 405        *data = kzalloc(alloc_size, GFP_KERNEL);
 406        if (!*data)
 407                return -ENOMEM;
 408
 409        return 0;
 410}
 411
 412/**
 413 * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm.
 414 *
 415 *     @dmn:    Domain
 416 *     @ste:    The ste struct that contains the data (at
 417 *              least part of it)
 418 *     @data:   The real data to send size data
 419 *     @size:   for writing.
 420 *     @offset: The offset from the icm mapped data to
 421 *              start write to this for write only part of the
 422 *              buffer.
 423 *
 424 * Return: 0 on success.
 425 */
 426int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
 427                             u8 *data, u16 size, u16 offset)
 428{
 429        struct postsend_info send_info = {};
 430
 431        send_info.write.addr = (uintptr_t)data;
 432        send_info.write.length = size;
 433        send_info.write.lkey = 0;
 434        send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
 435        send_info.rkey = ste->htbl->chunk->rkey;
 436
 437        return dr_postsend_icm_data(dmn, &send_info);
 438}
 439
 440int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
 441                              struct mlx5dr_ste_htbl *htbl,
 442                              u8 *formatted_ste, u8 *mask)
 443{
 444        u32 byte_size = htbl->chunk->byte_size;
 445        int num_stes_per_iter;
 446        int iterations;
 447        u8 *data;
 448        int ret;
 449        int i;
 450        int j;
 451
 452        ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
 453                                      &iterations, &num_stes_per_iter);
 454        if (ret)
 455                return ret;
 456
 457        /* Send the data iteration times */
 458        for (i = 0; i < iterations; i++) {
 459                u32 ste_index = i * (byte_size / DR_STE_SIZE);
 460                struct postsend_info send_info = {};
 461
 462                /* Copy all ste's on the data buffer
 463                 * need to add the bit_mask
 464                 */
 465                for (j = 0; j < num_stes_per_iter; j++) {
 466                        u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste;
 467                        u32 ste_off = j * DR_STE_SIZE;
 468
 469                        if (mlx5dr_ste_is_not_valid_entry(hw_ste)) {
 470                                memcpy(data + ste_off,
 471                                       formatted_ste, DR_STE_SIZE);
 472                        } else {
 473                                /* Copy data */
 474                                memcpy(data + ste_off,
 475                                       htbl->ste_arr[ste_index + j].hw_ste,
 476                                       DR_STE_SIZE_REDUCED);
 477                                /* Copy bit_mask */
 478                                memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
 479                                       mask, DR_STE_SIZE_MASK);
 480                        }
 481                }
 482
 483                send_info.write.addr = (uintptr_t)data;
 484                send_info.write.length = byte_size;
 485                send_info.write.lkey = 0;
 486                send_info.remote_addr =
 487                        mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
 488                send_info.rkey = htbl->chunk->rkey;
 489
 490                ret = dr_postsend_icm_data(dmn, &send_info);
 491                if (ret)
 492                        goto out_free;
 493        }
 494
 495out_free:
 496        kfree(data);
 497        return ret;
 498}
 499
 500/* Initialize htble with default STEs */
 501int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
 502                                        struct mlx5dr_ste_htbl *htbl,
 503                                        u8 *ste_init_data,
 504                                        bool update_hw_ste)
 505{
 506        u32 byte_size = htbl->chunk->byte_size;
 507        int iterations;
 508        int num_stes;
 509        u8 *data;
 510        int ret;
 511        int i;
 512
 513        ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
 514                                      &iterations, &num_stes);
 515        if (ret)
 516                return ret;
 517
 518        for (i = 0; i < num_stes; i++) {
 519                u8 *copy_dst;
 520
 521                /* Copy the same ste on the data buffer */
 522                copy_dst = data + i * DR_STE_SIZE;
 523                memcpy(copy_dst, ste_init_data, DR_STE_SIZE);
 524
 525                if (update_hw_ste) {
 526                        /* Copy the reduced ste to hash table ste_arr */
 527                        copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
 528                        memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
 529                }
 530        }
 531
 532        /* Send the data iteration times */
 533        for (i = 0; i < iterations; i++) {
 534                u8 ste_index = i * (byte_size / DR_STE_SIZE);
 535                struct postsend_info send_info = {};
 536
 537                send_info.write.addr = (uintptr_t)data;
 538                send_info.write.length = byte_size;
 539                send_info.write.lkey = 0;
 540                send_info.remote_addr =
 541                        mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
 542                send_info.rkey = htbl->chunk->rkey;
 543
 544                ret = dr_postsend_icm_data(dmn, &send_info);
 545                if (ret)
 546                        goto out_free;
 547        }
 548
 549out_free:
 550        kfree(data);
 551        return ret;
 552}
 553
 554int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
 555                                struct mlx5dr_action *action)
 556{
 557        struct postsend_info send_info = {};
 558        int ret;
 559
 560        send_info.write.addr = (uintptr_t)action->rewrite.data;
 561        send_info.write.length = action->rewrite.chunk->byte_size;
 562        send_info.write.lkey = 0;
 563        send_info.remote_addr = action->rewrite.chunk->mr_addr;
 564        send_info.rkey = action->rewrite.chunk->rkey;
 565
 566        mutex_lock(&dmn->mutex);
 567        ret = dr_postsend_icm_data(dmn, &send_info);
 568        mutex_unlock(&dmn->mutex);
 569
 570        return ret;
 571}
 572
 573static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
 574                                 struct mlx5dr_qp *dr_qp,
 575                                 int port)
 576{
 577        u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
 578        void *qpc;
 579
 580        qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
 581
 582        MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port);
 583        MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
 584        MLX5_SET(qpc, qpc, rre, 1);
 585        MLX5_SET(qpc, qpc, rwe, 1);
 586
 587        return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc,
 588                                   &dr_qp->mqp);
 589}
 590
 591static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
 592                                    struct mlx5dr_qp *dr_qp,
 593                                    struct dr_qp_rts_attr *attr)
 594{
 595        u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
 596        void *qpc;
 597
 598        qpc  = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
 599
 600        MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->mqp.qpn);
 601
 602        MLX5_SET(qpc, qpc, log_ack_req_freq, 0);
 603        MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
 604        MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
 605
 606        return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, qpc,
 607                                   &dr_qp->mqp);
 608}
 609
 610static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
 611                                     struct mlx5dr_qp *dr_qp,
 612                                     struct dr_qp_rtr_attr *attr)
 613{
 614        u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
 615        void *qpc;
 616
 617        qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
 618
 619        MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->mqp.qpn);
 620
 621        MLX5_SET(qpc, qpc, mtu, attr->mtu);
 622        MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
 623        MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num);
 624        memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
 625               attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac));
 626        memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
 627               attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid));
 628        MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
 629                 attr->sgid_index);
 630
 631        if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2)
 632                MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
 633                         attr->udp_src_port);
 634
 635        MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
 636        MLX5_SET(qpc, qpc, min_rnr_nak, 1);
 637
 638        return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc,
 639                                   &dr_qp->mqp);
 640}
 641
 642static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
 643{
 644        struct mlx5dr_qp *dr_qp = dmn->send_ring->qp;
 645        struct dr_qp_rts_attr rts_attr = {};
 646        struct dr_qp_rtr_attr rtr_attr = {};
 647        enum ib_mtu mtu = IB_MTU_1024;
 648        u16 gid_index = 0;
 649        int port = 1;
 650        int ret;
 651
 652        /* Init */
 653        ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port);
 654        if (ret)
 655                return ret;
 656
 657        /* RTR */
 658        ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr);
 659        if (ret)
 660                return ret;
 661
 662        rtr_attr.mtu            = mtu;
 663        rtr_attr.qp_num         = dr_qp->mqp.qpn;
 664        rtr_attr.min_rnr_timer  = 12;
 665        rtr_attr.port_num       = port;
 666        rtr_attr.sgid_index     = gid_index;
 667        rtr_attr.udp_src_port   = dmn->info.caps.roce_min_src_udp;
 668
 669        ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
 670        if (ret)
 671                return ret;
 672
 673        /* RTS */
 674        rts_attr.timeout        = 14;
 675        rts_attr.retry_cnt      = 7;
 676        rts_attr.rnr_retry      = 7;
 677
 678        ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr);
 679        if (ret)
 680                return ret;
 681
 682        return 0;
 683}
 684
 685static void dr_cq_event(struct mlx5_core_cq *mcq,
 686                        enum mlx5_event event)
 687{
 688        pr_info("CQ event %u on CQ #%u\n", event, mcq->cqn);
 689}
 690
 691static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
 692                                      struct mlx5_uars_page *uar,
 693                                      size_t ncqe)
 694{
 695        u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {};
 696        u32 out[MLX5_ST_SZ_DW(create_cq_out)];
 697        struct mlx5_wq_param wqp;
 698        struct mlx5_cqe64 *cqe;
 699        struct mlx5dr_cq *cq;
 700        int inlen, err, eqn;
 701        unsigned int irqn;
 702        void *cqc, *in;
 703        __be64 *pas;
 704        int vector;
 705        u32 i;
 706
 707        cq = kzalloc(sizeof(*cq), GFP_KERNEL);
 708        if (!cq)
 709                return NULL;
 710
 711        ncqe = roundup_pow_of_two(ncqe);
 712        MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe));
 713
 714        wqp.buf_numa_node = mdev->priv.numa_node;
 715        wqp.db_numa_node = mdev->priv.numa_node;
 716
 717        err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq,
 718                               &cq->wq_ctrl);
 719        if (err)
 720                goto out;
 721
 722        for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
 723                cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
 724                cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
 725        }
 726
 727        inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 728                sizeof(u64) * cq->wq_ctrl.buf.npages;
 729        in = kvzalloc(inlen, GFP_KERNEL);
 730        if (!in)
 731                goto err_cqwq;
 732
 733        vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev);
 734        err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn);
 735        if (err) {
 736                kvfree(in);
 737                goto err_cqwq;
 738        }
 739
 740        cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
 741        MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
 742        MLX5_SET(cqc, cqc, c_eqn, eqn);
 743        MLX5_SET(cqc, cqc, uar_page, uar->index);
 744        MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
 745                 MLX5_ADAPTER_PAGE_SHIFT);
 746        MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
 747
 748        pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
 749        mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
 750
 751        cq->mcq.event = dr_cq_event;
 752
 753        err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
 754        kvfree(in);
 755
 756        if (err)
 757                goto err_cqwq;
 758
 759        cq->mcq.cqe_sz = 64;
 760        cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
 761        cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
 762        *cq->mcq.set_ci_db = 0;
 763        *cq->mcq.arm_db = 0;
 764        cq->mcq.vector = 0;
 765        cq->mcq.irqn = irqn;
 766        cq->mcq.uar = uar;
 767
 768        return cq;
 769
 770err_cqwq:
 771        mlx5_wq_destroy(&cq->wq_ctrl);
 772out:
 773        kfree(cq);
 774        return NULL;
 775}
 776
 777static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
 778{
 779        mlx5_core_destroy_cq(mdev, &cq->mcq);
 780        mlx5_wq_destroy(&cq->wq_ctrl);
 781        kfree(cq);
 782}
 783
 784static int
 785dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey)
 786{
 787        u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
 788        void *mkc;
 789
 790        mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 791        MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
 792        MLX5_SET(mkc, mkc, a, 1);
 793        MLX5_SET(mkc, mkc, rw, 1);
 794        MLX5_SET(mkc, mkc, rr, 1);
 795        MLX5_SET(mkc, mkc, lw, 1);
 796        MLX5_SET(mkc, mkc, lr, 1);
 797
 798        MLX5_SET(mkc, mkc, pd, pdn);
 799        MLX5_SET(mkc, mkc, length64, 1);
 800        MLX5_SET(mkc, mkc, qpn, 0xffffff);
 801
 802        return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in));
 803}
 804
 805static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
 806                                   u32 pdn, void *buf, size_t size)
 807{
 808        struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 809        struct device *dma_device;
 810        dma_addr_t dma_addr;
 811        int err;
 812
 813        if (!mr)
 814                return NULL;
 815
 816        dma_device = &mdev->pdev->dev;
 817        dma_addr = dma_map_single(dma_device, buf, size,
 818                                  DMA_BIDIRECTIONAL);
 819        err = dma_mapping_error(dma_device, dma_addr);
 820        if (err) {
 821                mlx5_core_warn(mdev, "Can't dma buf\n");
 822                kfree(mr);
 823                return NULL;
 824        }
 825
 826        err = dr_create_mkey(mdev, pdn, &mr->mkey);
 827        if (err) {
 828                mlx5_core_warn(mdev, "Can't create mkey\n");
 829                dma_unmap_single(dma_device, dma_addr, size,
 830                                 DMA_BIDIRECTIONAL);
 831                kfree(mr);
 832                return NULL;
 833        }
 834
 835        mr->dma_addr = dma_addr;
 836        mr->size = size;
 837        mr->addr = buf;
 838
 839        return mr;
 840}
 841
 842static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
 843{
 844        mlx5_core_destroy_mkey(mdev, &mr->mkey);
 845        dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size,
 846                         DMA_BIDIRECTIONAL);
 847        kfree(mr);
 848}
 849
 850int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
 851{
 852        struct dr_qp_init_attr init_attr = {};
 853        int cq_size;
 854        int size;
 855        int ret;
 856
 857        dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL);
 858        if (!dmn->send_ring)
 859                return -ENOMEM;
 860
 861        cq_size = QUEUE_SIZE + 1;
 862        dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size);
 863        if (!dmn->send_ring->cq) {
 864                ret = -ENOMEM;
 865                goto free_send_ring;
 866        }
 867
 868        init_attr.cqn = dmn->send_ring->cq->mcq.cqn;
 869        init_attr.pdn = dmn->pdn;
 870        init_attr.uar = dmn->uar;
 871        init_attr.max_send_wr = QUEUE_SIZE;
 872
 873        dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
 874        if (!dmn->send_ring->qp)  {
 875                ret = -ENOMEM;
 876                goto clean_cq;
 877        }
 878
 879        dmn->send_ring->cq->qp = dmn->send_ring->qp;
 880
 881        dmn->info.max_send_wr = QUEUE_SIZE;
 882        dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
 883                                        DR_STE_SIZE);
 884
 885        dmn->send_ring->signal_th = dmn->info.max_send_wr /
 886                SIGNAL_PER_DIV_QUEUE;
 887
 888        /* Prepare qp to be used */
 889        ret = dr_prepare_qp_to_rts(dmn);
 890        if (ret)
 891                goto clean_qp;
 892
 893        dmn->send_ring->max_post_send_size =
 894                mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K,
 895                                                   DR_ICM_TYPE_STE);
 896
 897        /* Allocating the max size as a buffer for writing */
 898        size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size;
 899        dmn->send_ring->buf = kzalloc(size, GFP_KERNEL);
 900        if (!dmn->send_ring->buf) {
 901                ret = -ENOMEM;
 902                goto clean_qp;
 903        }
 904
 905        dmn->send_ring->buf_size = size;
 906
 907        dmn->send_ring->mr = dr_reg_mr(dmn->mdev,
 908                                       dmn->pdn, dmn->send_ring->buf, size);
 909        if (!dmn->send_ring->mr) {
 910                ret = -ENOMEM;
 911                goto free_mem;
 912        }
 913
 914        dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
 915                                            dmn->pdn, dmn->send_ring->sync_buff,
 916                                            MIN_READ_SYNC);
 917        if (!dmn->send_ring->sync_mr) {
 918                ret = -ENOMEM;
 919                goto clean_mr;
 920        }
 921
 922        return 0;
 923
 924clean_mr:
 925        dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
 926free_mem:
 927        kfree(dmn->send_ring->buf);
 928clean_qp:
 929        dr_destroy_qp(dmn->mdev, dmn->send_ring->qp);
 930clean_cq:
 931        dr_destroy_cq(dmn->mdev, dmn->send_ring->cq);
 932free_send_ring:
 933        kfree(dmn->send_ring);
 934
 935        return ret;
 936}
 937
 938void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
 939                           struct mlx5dr_send_ring *send_ring)
 940{
 941        dr_destroy_qp(dmn->mdev, send_ring->qp);
 942        dr_destroy_cq(dmn->mdev, send_ring->cq);
 943        dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
 944        dr_dereg_mr(dmn->mdev, send_ring->mr);
 945        kfree(send_ring->buf);
 946        kfree(send_ring);
 947}
 948
 949int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
 950{
 951        struct mlx5dr_send_ring *send_ring = dmn->send_ring;
 952        struct postsend_info send_info = {};
 953        u8 data[DR_STE_SIZE];
 954        int num_of_sends_req;
 955        int ret;
 956        int i;
 957
 958        /* Sending this amount of requests makes sure we will get drain */
 959        num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
 960
 961        /* Send fake requests forcing the last to be signaled */
 962        send_info.write.addr = (uintptr_t)data;
 963        send_info.write.length = DR_STE_SIZE;
 964        send_info.write.lkey = 0;
 965        /* Using the sync_mr in order to write/read */
 966        send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
 967        send_info.rkey = send_ring->sync_mr->mkey.key;
 968
 969        for (i = 0; i < num_of_sends_req; i++) {
 970                ret = dr_postsend_icm_data(dmn, &send_info);
 971                if (ret)
 972                        return ret;
 973        }
 974
 975        ret = dr_handle_pending_wc(dmn, send_ring);
 976
 977        return ret;
 978}
 979