linux/drivers/infiniband/sw/siw/siw_qp.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
   2
   3/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
   4/* Copyright (c) 2008-2019, IBM Corporation */
   5
   6#include <linux/errno.h>
   7#include <linux/types.h>
   8#include <linux/net.h>
   9#include <linux/scatterlist.h>
  10#include <linux/llist.h>
  11#include <asm/barrier.h>
  12#include <net/tcp.h>
  13
  14#include "siw.h"
  15#include "siw_verbs.h"
  16#include "siw_mem.h"
  17
  18static char siw_qp_state_to_string[SIW_QP_STATE_COUNT][sizeof "TERMINATE"] = {
  19        [SIW_QP_STATE_IDLE] = "IDLE",
  20        [SIW_QP_STATE_RTR] = "RTR",
  21        [SIW_QP_STATE_RTS] = "RTS",
  22        [SIW_QP_STATE_CLOSING] = "CLOSING",
  23        [SIW_QP_STATE_TERMINATE] = "TERMINATE",
  24        [SIW_QP_STATE_ERROR] = "ERROR"
  25};
  26
  27/*
  28 * iWARP (RDMAP, DDP and MPA) parameters as well as Softiwarp settings on a
  29 * per-RDMAP message basis. Please keep order of initializer. All MPA len
  30 * is initialized to minimum packet size.
  31 */
  32struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1] = {
  33        { /* RDMAP_RDMA_WRITE */
  34          .hdr_len = sizeof(struct iwarp_rdma_write),
  35          .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_write) - 2),
  36          .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST |
  37                                 cpu_to_be16(DDP_VERSION << 8) |
  38                                 cpu_to_be16(RDMAP_VERSION << 6) |
  39                                 cpu_to_be16(RDMAP_RDMA_WRITE),
  40          .rx_data = siw_proc_write },
  41        { /* RDMAP_RDMA_READ_REQ */
  42          .hdr_len = sizeof(struct iwarp_rdma_rreq),
  43          .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rreq) - 2),
  44          .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
  45                                 cpu_to_be16(RDMAP_VERSION << 6) |
  46                                 cpu_to_be16(RDMAP_RDMA_READ_REQ),
  47          .rx_data = siw_proc_rreq },
  48        { /* RDMAP_RDMA_READ_RESP */
  49          .hdr_len = sizeof(struct iwarp_rdma_rresp),
  50          .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rresp) - 2),
  51          .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST |
  52                                 cpu_to_be16(DDP_VERSION << 8) |
  53                                 cpu_to_be16(RDMAP_VERSION << 6) |
  54                                 cpu_to_be16(RDMAP_RDMA_READ_RESP),
  55          .rx_data = siw_proc_rresp },
  56        { /* RDMAP_SEND */
  57          .hdr_len = sizeof(struct iwarp_send),
  58          .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
  59          .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
  60                                 cpu_to_be16(RDMAP_VERSION << 6) |
  61                                 cpu_to_be16(RDMAP_SEND),
  62          .rx_data = siw_proc_send },
  63        { /* RDMAP_SEND_INVAL */
  64          .hdr_len = sizeof(struct iwarp_send_inv),
  65          .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
  66          .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
  67                                 cpu_to_be16(RDMAP_VERSION << 6) |
  68                                 cpu_to_be16(RDMAP_SEND_INVAL),
  69          .rx_data = siw_proc_send },
  70        { /* RDMAP_SEND_SE */
  71          .hdr_len = sizeof(struct iwarp_send),
  72          .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
  73          .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
  74                                 cpu_to_be16(RDMAP_VERSION << 6) |
  75                                 cpu_to_be16(RDMAP_SEND_SE),
  76          .rx_data = siw_proc_send },
  77        { /* RDMAP_SEND_SE_INVAL */
  78          .hdr_len = sizeof(struct iwarp_send_inv),
  79          .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
  80          .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
  81                                 cpu_to_be16(RDMAP_VERSION << 6) |
  82                                 cpu_to_be16(RDMAP_SEND_SE_INVAL),
  83          .rx_data = siw_proc_send },
  84        { /* RDMAP_TERMINATE */
  85          .hdr_len = sizeof(struct iwarp_terminate),
  86          .ctrl.mpa_len = htons(sizeof(struct iwarp_terminate) - 2),
  87          .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
  88                                 cpu_to_be16(RDMAP_VERSION << 6) |
  89                                 cpu_to_be16(RDMAP_TERMINATE),
  90          .rx_data = siw_proc_terminate }
  91};
  92
  93void siw_qp_llp_data_ready(struct sock *sk)
  94{
  95        struct siw_qp *qp;
  96
  97        read_lock(&sk->sk_callback_lock);
  98
  99        if (unlikely(!sk->sk_user_data || !sk_to_qp(sk)))
 100                goto done;
 101
 102        qp = sk_to_qp(sk);
 103
 104        if (likely(!qp->rx_stream.rx_suspend &&
 105                   down_read_trylock(&qp->state_lock))) {
 106                read_descriptor_t rd_desc = { .arg.data = qp, .count = 1 };
 107
 108                if (likely(qp->attrs.state == SIW_QP_STATE_RTS))
 109                        /*
 110                         * Implements data receive operation during
 111                         * socket callback. TCP gracefully catches
 112                         * the case where there is nothing to receive
 113                         * (not calling siw_tcp_rx_data() then).
 114                         */
 115                        tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data);
 116
 117                up_read(&qp->state_lock);
 118        } else {
 119                siw_dbg_qp(qp, "unable to process RX, suspend: %d\n",
 120                           qp->rx_stream.rx_suspend);
 121        }
 122done:
 123        read_unlock(&sk->sk_callback_lock);
 124}
 125
 126void siw_qp_llp_close(struct siw_qp *qp)
 127{
 128        siw_dbg_qp(qp, "enter llp close, state = %s\n",
 129                   siw_qp_state_to_string[qp->attrs.state]);
 130
 131        down_write(&qp->state_lock);
 132
 133        qp->rx_stream.rx_suspend = 1;
 134        qp->tx_ctx.tx_suspend = 1;
 135        qp->attrs.sk = NULL;
 136
 137        switch (qp->attrs.state) {
 138        case SIW_QP_STATE_RTS:
 139        case SIW_QP_STATE_RTR:
 140        case SIW_QP_STATE_IDLE:
 141        case SIW_QP_STATE_TERMINATE:
 142                qp->attrs.state = SIW_QP_STATE_ERROR;
 143                break;
 144        /*
 145         * SIW_QP_STATE_CLOSING:
 146         *
 147         * This is a forced close. shall the QP be moved to
 148         * ERROR or IDLE ?
 149         */
 150        case SIW_QP_STATE_CLOSING:
 151                if (tx_wqe(qp)->wr_status == SIW_WR_IDLE)
 152                        qp->attrs.state = SIW_QP_STATE_ERROR;
 153                else
 154                        qp->attrs.state = SIW_QP_STATE_IDLE;
 155                break;
 156
 157        default:
 158                siw_dbg_qp(qp, "llp close: no state transition needed: %s\n",
 159                           siw_qp_state_to_string[qp->attrs.state]);
 160                break;
 161        }
 162        siw_sq_flush(qp);
 163        siw_rq_flush(qp);
 164
 165        /*
 166         * Dereference closing CEP
 167         */
 168        if (qp->cep) {
 169                siw_cep_put(qp->cep);
 170                qp->cep = NULL;
 171        }
 172
 173        up_write(&qp->state_lock);
 174
 175        siw_dbg_qp(qp, "llp close exit: state %s\n",
 176                   siw_qp_state_to_string[qp->attrs.state]);
 177}
 178
 179/*
 180 * socket callback routine informing about newly available send space.
 181 * Function schedules SQ work for processing SQ items.
 182 */
 183void siw_qp_llp_write_space(struct sock *sk)
 184{
 185        struct siw_cep *cep;
 186
 187        read_lock(&sk->sk_callback_lock);
 188
 189        cep  = sk_to_cep(sk);
 190        if (cep) {
 191                cep->sk_write_space(sk);
 192
 193                if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
 194                        (void)siw_sq_start(cep->qp);
 195        }
 196
 197        read_unlock(&sk->sk_callback_lock);
 198}
 199
 200static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
 201{
 202        irq_size = roundup_pow_of_two(irq_size);
 203        orq_size = roundup_pow_of_two(orq_size);
 204
 205        qp->attrs.irq_size = irq_size;
 206        qp->attrs.orq_size = orq_size;
 207
 208        qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe));
 209        if (!qp->irq) {
 210                siw_dbg_qp(qp, "irq malloc for %d failed\n", irq_size);
 211                qp->attrs.irq_size = 0;
 212                return -ENOMEM;
 213        }
 214        qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe));
 215        if (!qp->orq) {
 216                siw_dbg_qp(qp, "orq malloc for %d failed\n", orq_size);
 217                qp->attrs.orq_size = 0;
 218                qp->attrs.irq_size = 0;
 219                vfree(qp->irq);
 220                return -ENOMEM;
 221        }
 222        siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size);
 223        return 0;
 224}
 225
 226static int siw_qp_enable_crc(struct siw_qp *qp)
 227{
 228        struct siw_rx_stream *c_rx = &qp->rx_stream;
 229        struct siw_iwarp_tx *c_tx = &qp->tx_ctx;
 230        int size;
 231
 232        if (siw_crypto_shash == NULL)
 233                return -ENOENT;
 234
 235        size = crypto_shash_descsize(siw_crypto_shash) +
 236                sizeof(struct shash_desc);
 237
 238        c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
 239        c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
 240        if (!c_tx->mpa_crc_hd || !c_rx->mpa_crc_hd) {
 241                kfree(c_tx->mpa_crc_hd);
 242                kfree(c_rx->mpa_crc_hd);
 243                c_tx->mpa_crc_hd = NULL;
 244                c_rx->mpa_crc_hd = NULL;
 245                return -ENOMEM;
 246        }
 247        c_tx->mpa_crc_hd->tfm = siw_crypto_shash;
 248        c_rx->mpa_crc_hd->tfm = siw_crypto_shash;
 249
 250        return 0;
 251}
 252
 253/*
 254 * Send a non signalled READ or WRITE to peer side as negotiated
 255 * with MPAv2 P2P setup protocol. The work request is only created
 256 * as a current active WR and does not consume Send Queue space.
 257 *
 258 * Caller must hold QP state lock.
 259 */
 260int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl)
 261{
 262        struct siw_wqe *wqe = tx_wqe(qp);
 263        unsigned long flags;
 264        int rv = 0;
 265
 266        spin_lock_irqsave(&qp->sq_lock, flags);
 267
 268        if (unlikely(wqe->wr_status != SIW_WR_IDLE)) {
 269                spin_unlock_irqrestore(&qp->sq_lock, flags);
 270                return -EIO;
 271        }
 272        memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
 273
 274        wqe->wr_status = SIW_WR_QUEUED;
 275        wqe->sqe.flags = 0;
 276        wqe->sqe.num_sge = 1;
 277        wqe->sqe.sge[0].length = 0;
 278        wqe->sqe.sge[0].laddr = 0;
 279        wqe->sqe.sge[0].lkey = 0;
 280        /*
 281         * While it must not be checked for inbound zero length
 282         * READ/WRITE, some HW may treat STag 0 special.
 283         */
 284        wqe->sqe.rkey = 1;
 285        wqe->sqe.raddr = 0;
 286        wqe->processed = 0;
 287
 288        if (ctrl & MPA_V2_RDMA_WRITE_RTR)
 289                wqe->sqe.opcode = SIW_OP_WRITE;
 290        else if (ctrl & MPA_V2_RDMA_READ_RTR) {
 291                struct siw_sqe *rreq;
 292
 293                wqe->sqe.opcode = SIW_OP_READ;
 294
 295                spin_lock(&qp->orq_lock);
 296
 297                rreq = orq_get_free(qp);
 298                if (rreq) {
 299                        siw_read_to_orq(rreq, &wqe->sqe);
 300                        qp->orq_put++;
 301                } else
 302                        rv = -EIO;
 303
 304                spin_unlock(&qp->orq_lock);
 305        } else
 306                rv = -EINVAL;
 307
 308        if (rv)
 309                wqe->wr_status = SIW_WR_IDLE;
 310
 311        spin_unlock_irqrestore(&qp->sq_lock, flags);
 312
 313        if (!rv)
 314                rv = siw_sq_start(qp);
 315
 316        return rv;
 317}
 318
 319/*
 320 * Map memory access error to DDP tagged error
 321 */
 322enum ddp_ecode siw_tagged_error(enum siw_access_state state)
 323{
 324        switch (state) {
 325        case E_STAG_INVALID:
 326                return DDP_ECODE_T_INVALID_STAG;
 327        case E_BASE_BOUNDS:
 328                return DDP_ECODE_T_BASE_BOUNDS;
 329        case E_PD_MISMATCH:
 330                return DDP_ECODE_T_STAG_NOT_ASSOC;
 331        case E_ACCESS_PERM:
 332                /*
 333                 * RFC 5041 (DDP) lacks an ecode for insufficient access
 334                 * permissions. 'Invalid STag' seem to be the closest
 335                 * match though.
 336                 */
 337                return DDP_ECODE_T_INVALID_STAG;
 338        default:
 339                WARN_ON(1);
 340                return DDP_ECODE_T_INVALID_STAG;
 341        }
 342}
 343
 344/*
 345 * Map memory access error to RDMAP protection error
 346 */
 347enum rdmap_ecode siw_rdmap_error(enum siw_access_state state)
 348{
 349        switch (state) {
 350        case E_STAG_INVALID:
 351                return RDMAP_ECODE_INVALID_STAG;
 352        case E_BASE_BOUNDS:
 353                return RDMAP_ECODE_BASE_BOUNDS;
 354        case E_PD_MISMATCH:
 355                return RDMAP_ECODE_STAG_NOT_ASSOC;
 356        case E_ACCESS_PERM:
 357                return RDMAP_ECODE_ACCESS_RIGHTS;
 358        default:
 359                return RDMAP_ECODE_UNSPECIFIED;
 360        }
 361}
 362
 363void siw_init_terminate(struct siw_qp *qp, enum term_elayer layer, u8 etype,
 364                        u8 ecode, int in_tx)
 365{
 366        if (!qp->term_info.valid) {
 367                memset(&qp->term_info, 0, sizeof(qp->term_info));
 368                qp->term_info.layer = layer;
 369                qp->term_info.etype = etype;
 370                qp->term_info.ecode = ecode;
 371                qp->term_info.in_tx = in_tx;
 372                qp->term_info.valid = 1;
 373        }
 374        siw_dbg_qp(qp, "init TERM: layer %d, type %d, code %d, in tx %s\n",
 375                   layer, etype, ecode, in_tx ? "yes" : "no");
 376}
 377
 378/*
 379 * Send a TERMINATE message, as defined in RFC's 5040/5041/5044/6581.
 380 * Sending TERMINATE messages is best effort - such messages
 381 * can only be send if the QP is still connected and it does
 382 * not have another outbound message in-progress, i.e. the
 383 * TERMINATE message must not interfer with an incomplete current
 384 * transmit operation.
 385 */
 386void siw_send_terminate(struct siw_qp *qp)
 387{
 388        struct kvec iov[3];
 389        struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR };
 390        struct iwarp_terminate *term = NULL;
 391        union iwarp_hdr *err_hdr = NULL;
 392        struct socket *s = qp->attrs.sk;
 393        struct siw_rx_stream *srx = &qp->rx_stream;
 394        union iwarp_hdr *rx_hdr = &srx->hdr;
 395        u32 crc = 0;
 396        int num_frags, len_terminate, rv;
 397
 398        if (!qp->term_info.valid)
 399                return;
 400
 401        qp->term_info.valid = 0;
 402
 403        if (tx_wqe(qp)->wr_status == SIW_WR_INPROGRESS) {
 404                siw_dbg_qp(qp, "cannot send TERMINATE: op %d in progress\n",
 405                           tx_type(tx_wqe(qp)));
 406                return;
 407        }
 408        if (!s && qp->cep)
 409                /* QP not yet in RTS. Take socket from connection end point */
 410                s = qp->cep->sock;
 411
 412        if (!s) {
 413                siw_dbg_qp(qp, "cannot send TERMINATE: not connected\n");
 414                return;
 415        }
 416
 417        term = kzalloc(sizeof(*term), GFP_KERNEL);
 418        if (!term)
 419                return;
 420
 421        term->ddp_qn = cpu_to_be32(RDMAP_UNTAGGED_QN_TERMINATE);
 422        term->ddp_mo = 0;
 423        term->ddp_msn = cpu_to_be32(1);
 424
 425        iov[0].iov_base = term;
 426        iov[0].iov_len = sizeof(*term);
 427
 428        if ((qp->term_info.layer == TERM_ERROR_LAYER_DDP) ||
 429            ((qp->term_info.layer == TERM_ERROR_LAYER_RDMAP) &&
 430             (qp->term_info.etype != RDMAP_ETYPE_CATASTROPHIC))) {
 431                err_hdr = kzalloc(sizeof(*err_hdr), GFP_KERNEL);
 432                if (!err_hdr) {
 433                        kfree(term);
 434                        return;
 435                }
 436        }
 437        memcpy(&term->ctrl, &iwarp_pktinfo[RDMAP_TERMINATE].ctrl,
 438               sizeof(struct iwarp_ctrl));
 439
 440        __rdmap_term_set_layer(term, qp->term_info.layer);
 441        __rdmap_term_set_etype(term, qp->term_info.etype);
 442        __rdmap_term_set_ecode(term, qp->term_info.ecode);
 443
 444        switch (qp->term_info.layer) {
 445        case TERM_ERROR_LAYER_RDMAP:
 446                if (qp->term_info.etype == RDMAP_ETYPE_CATASTROPHIC)
 447                        /* No additional DDP/RDMAP header to be included */
 448                        break;
 449
 450                if (qp->term_info.etype == RDMAP_ETYPE_REMOTE_PROTECTION) {
 451                        /*
 452                         * Complete RDMAP frame will get attached, and
 453                         * DDP segment length is valid
 454                         */
 455                        term->flag_m = 1;
 456                        term->flag_d = 1;
 457                        term->flag_r = 1;
 458
 459                        if (qp->term_info.in_tx) {
 460                                struct iwarp_rdma_rreq *rreq;
 461                                struct siw_wqe *wqe = tx_wqe(qp);
 462
 463                                /* Inbound RREQ error, detected during
 464                                 * RRESP creation. Take state from
 465                                 * current TX work queue element to
 466                                 * reconstruct peers RREQ.
 467                                 */
 468                                rreq = (struct iwarp_rdma_rreq *)err_hdr;
 469
 470                                memcpy(&rreq->ctrl,
 471                                       &iwarp_pktinfo[RDMAP_RDMA_READ_REQ].ctrl,
 472                                       sizeof(struct iwarp_ctrl));
 473
 474                                rreq->rsvd = 0;
 475                                rreq->ddp_qn =
 476                                        htonl(RDMAP_UNTAGGED_QN_RDMA_READ);
 477
 478                                /* Provide RREQ's MSN as kept aside */
 479                                rreq->ddp_msn = htonl(wqe->sqe.sge[0].length);
 480
 481                                rreq->ddp_mo = htonl(wqe->processed);
 482                                rreq->sink_stag = htonl(wqe->sqe.rkey);
 483                                rreq->sink_to = cpu_to_be64(wqe->sqe.raddr);
 484                                rreq->read_size = htonl(wqe->sqe.sge[0].length);
 485                                rreq->source_stag = htonl(wqe->sqe.sge[0].lkey);
 486                                rreq->source_to =
 487                                        cpu_to_be64(wqe->sqe.sge[0].laddr);
 488
 489                                iov[1].iov_base = rreq;
 490                                iov[1].iov_len = sizeof(*rreq);
 491
 492                                rx_hdr = (union iwarp_hdr *)rreq;
 493                        } else {
 494                                /* Take RDMAP/DDP information from
 495                                 * current (failed) inbound frame.
 496                                 */
 497                                iov[1].iov_base = rx_hdr;
 498
 499                                if (__rdmap_get_opcode(&rx_hdr->ctrl) ==
 500                                    RDMAP_RDMA_READ_REQ)
 501                                        iov[1].iov_len =
 502                                                sizeof(struct iwarp_rdma_rreq);
 503                                else /* SEND type */
 504                                        iov[1].iov_len =
 505                                                sizeof(struct iwarp_send);
 506                        }
 507                } else {
 508                        /* Do not report DDP hdr information if packet
 509                         * layout is unknown
 510                         */
 511                        if ((qp->term_info.ecode == RDMAP_ECODE_VERSION) ||
 512                            (qp->term_info.ecode == RDMAP_ECODE_OPCODE))
 513                                break;
 514
 515                        iov[1].iov_base = rx_hdr;
 516
 517                        /* Only DDP frame will get attached */
 518                        if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)
 519                                iov[1].iov_len =
 520                                        sizeof(struct iwarp_rdma_write);
 521                        else
 522                                iov[1].iov_len = sizeof(struct iwarp_send);
 523
 524                        term->flag_m = 1;
 525                        term->flag_d = 1;
 526                }
 527                term->ctrl.mpa_len = cpu_to_be16(iov[1].iov_len);
 528                break;
 529
 530        case TERM_ERROR_LAYER_DDP:
 531                /* Report error encountered while DDP processing.
 532                 * This can only happen as a result of inbound
 533                 * DDP processing
 534                 */
 535
 536                /* Do not report DDP hdr information if packet
 537                 * layout is unknown
 538                 */
 539                if (((qp->term_info.etype == DDP_ETYPE_TAGGED_BUF) &&
 540                     (qp->term_info.ecode == DDP_ECODE_T_VERSION)) ||
 541                    ((qp->term_info.etype == DDP_ETYPE_UNTAGGED_BUF) &&
 542                     (qp->term_info.ecode == DDP_ECODE_UT_VERSION)))
 543                        break;
 544
 545                iov[1].iov_base = rx_hdr;
 546
 547                if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)
 548                        iov[1].iov_len = sizeof(struct iwarp_ctrl_tagged);
 549                else
 550                        iov[1].iov_len = sizeof(struct iwarp_ctrl_untagged);
 551
 552                term->flag_m = 1;
 553                term->flag_d = 1;
 554                break;
 555
 556        default:
 557                break;
 558        }
 559        if (term->flag_m || term->flag_d || term->flag_r) {
 560                iov[2].iov_base = &crc;
 561                iov[2].iov_len = sizeof(crc);
 562                len_terminate = sizeof(*term) + iov[1].iov_len + MPA_CRC_SIZE;
 563                num_frags = 3;
 564        } else {
 565                iov[1].iov_base = &crc;
 566                iov[1].iov_len = sizeof(crc);
 567                len_terminate = sizeof(*term) + MPA_CRC_SIZE;
 568                num_frags = 2;
 569        }
 570
 571        /* Adjust DDP Segment Length parameter, if valid */
 572        if (term->flag_m) {
 573                u32 real_ddp_len = be16_to_cpu(rx_hdr->ctrl.mpa_len);
 574                enum rdma_opcode op = __rdmap_get_opcode(&rx_hdr->ctrl);
 575
 576                real_ddp_len -= iwarp_pktinfo[op].hdr_len - MPA_HDR_SIZE;
 577                rx_hdr->ctrl.mpa_len = cpu_to_be16(real_ddp_len);
 578        }
 579
 580        term->ctrl.mpa_len =
 581                cpu_to_be16(len_terminate - (MPA_HDR_SIZE + MPA_CRC_SIZE));
 582        if (qp->tx_ctx.mpa_crc_hd) {
 583                crypto_shash_init(qp->tx_ctx.mpa_crc_hd);
 584                if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
 585                                        (u8 *)iov[0].iov_base,
 586                                        iov[0].iov_len))
 587                        goto out;
 588
 589                if (num_frags == 3) {
 590                        if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
 591                                                (u8 *)iov[1].iov_base,
 592                                                iov[1].iov_len))
 593                                goto out;
 594                }
 595                crypto_shash_final(qp->tx_ctx.mpa_crc_hd, (u8 *)&crc);
 596        }
 597
 598        rv = kernel_sendmsg(s, &msg, iov, num_frags, len_terminate);
 599        siw_dbg_qp(qp, "sent TERM: %s, layer %d, type %d, code %d (%d bytes)\n",
 600                   rv == len_terminate ? "success" : "failure",
 601                   __rdmap_term_layer(term), __rdmap_term_etype(term),
 602                   __rdmap_term_ecode(term), rv);
 603out:
 604        kfree(term);
 605        kfree(err_hdr);
 606}
 607
 608/*
 609 * Handle all attrs other than state
 610 */
 611static void siw_qp_modify_nonstate(struct siw_qp *qp,
 612                                   struct siw_qp_attrs *attrs,
 613                                   enum siw_qp_attr_mask mask)
 614{
 615        if (mask & SIW_QP_ATTR_ACCESS_FLAGS) {
 616                if (attrs->flags & SIW_RDMA_BIND_ENABLED)
 617                        qp->attrs.flags |= SIW_RDMA_BIND_ENABLED;
 618                else
 619                        qp->attrs.flags &= ~SIW_RDMA_BIND_ENABLED;
 620
 621                if (attrs->flags & SIW_RDMA_WRITE_ENABLED)
 622                        qp->attrs.flags |= SIW_RDMA_WRITE_ENABLED;
 623                else
 624                        qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED;
 625
 626                if (attrs->flags & SIW_RDMA_READ_ENABLED)
 627                        qp->attrs.flags |= SIW_RDMA_READ_ENABLED;
 628                else
 629                        qp->attrs.flags &= ~SIW_RDMA_READ_ENABLED;
 630        }
 631}
 632
 633static int siw_qp_nextstate_from_idle(struct siw_qp *qp,
 634                                      struct siw_qp_attrs *attrs,
 635                                      enum siw_qp_attr_mask mask)
 636{
 637        int rv = 0;
 638
 639        switch (attrs->state) {
 640        case SIW_QP_STATE_RTS:
 641                if (attrs->flags & SIW_MPA_CRC) {
 642                        rv = siw_qp_enable_crc(qp);
 643                        if (rv)
 644                                break;
 645                }
 646                if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) {
 647                        siw_dbg_qp(qp, "no socket\n");
 648                        rv = -EINVAL;
 649                        break;
 650                }
 651                if (!(mask & SIW_QP_ATTR_MPA)) {
 652                        siw_dbg_qp(qp, "no MPA\n");
 653                        rv = -EINVAL;
 654                        break;
 655                }
 656                /*
 657                 * Initialize iWARP TX state
 658                 */
 659                qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 0;
 660                qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 0;
 661                qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 0;
 662
 663                /*
 664                 * Initialize iWARP RX state
 665                 */
 666                qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 1;
 667                qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 1;
 668                qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 1;
 669
 670                /*
 671                 * init IRD free queue, caller has already checked
 672                 * limits.
 673                 */
 674                rv = siw_qp_readq_init(qp, attrs->irq_size,
 675                                       attrs->orq_size);
 676                if (rv)
 677                        break;
 678
 679                qp->attrs.sk = attrs->sk;
 680                qp->attrs.state = SIW_QP_STATE_RTS;
 681
 682                siw_dbg_qp(qp, "enter RTS: crc=%s, ord=%u, ird=%u\n",
 683                           attrs->flags & SIW_MPA_CRC ? "y" : "n",
 684                           qp->attrs.orq_size, qp->attrs.irq_size);
 685                break;
 686
 687        case SIW_QP_STATE_ERROR:
 688                siw_rq_flush(qp);
 689                qp->attrs.state = SIW_QP_STATE_ERROR;
 690                if (qp->cep) {
 691                        siw_cep_put(qp->cep);
 692                        qp->cep = NULL;
 693                }
 694                break;
 695
 696        default:
 697                break;
 698        }
 699        return rv;
 700}
 701
 702static int siw_qp_nextstate_from_rts(struct siw_qp *qp,
 703                                     struct siw_qp_attrs *attrs)
 704{
 705        int drop_conn = 0;
 706
 707        switch (attrs->state) {
 708        case SIW_QP_STATE_CLOSING:
 709                /*
 710                 * Verbs: move to IDLE if SQ and ORQ are empty.
 711                 * Move to ERROR otherwise. But first of all we must
 712                 * close the connection. So we keep CLOSING or ERROR
 713                 * as a transient state, schedule connection drop work
 714                 * and wait for the socket state change upcall to
 715                 * come back closed.
 716                 */
 717                if (tx_wqe(qp)->wr_status == SIW_WR_IDLE) {
 718                        qp->attrs.state = SIW_QP_STATE_CLOSING;
 719                } else {
 720                        qp->attrs.state = SIW_QP_STATE_ERROR;
 721                        siw_sq_flush(qp);
 722                }
 723                siw_rq_flush(qp);
 724
 725                drop_conn = 1;
 726                break;
 727
 728        case SIW_QP_STATE_TERMINATE:
 729                qp->attrs.state = SIW_QP_STATE_TERMINATE;
 730
 731                siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
 732                                   RDMAP_ETYPE_CATASTROPHIC,
 733                                   RDMAP_ECODE_UNSPECIFIED, 1);
 734                drop_conn = 1;
 735                break;
 736
 737        case SIW_QP_STATE_ERROR:
 738                /*
 739                 * This is an emergency close.
 740                 *
 741                 * Any in progress transmit operation will get
 742                 * cancelled.
 743                 * This will likely result in a protocol failure,
 744                 * if a TX operation is in transit. The caller
 745                 * could unconditional wait to give the current
 746                 * operation a chance to complete.
 747                 * Esp., how to handle the non-empty IRQ case?
 748                 * The peer was asking for data transfer at a valid
 749                 * point in time.
 750                 */
 751                siw_sq_flush(qp);
 752                siw_rq_flush(qp);
 753                qp->attrs.state = SIW_QP_STATE_ERROR;
 754                drop_conn = 1;
 755                break;
 756
 757        default:
 758                break;
 759        }
 760        return drop_conn;
 761}
 762
 763static void siw_qp_nextstate_from_term(struct siw_qp *qp,
 764                                       struct siw_qp_attrs *attrs)
 765{
 766        switch (attrs->state) {
 767        case SIW_QP_STATE_ERROR:
 768                siw_rq_flush(qp);
 769                qp->attrs.state = SIW_QP_STATE_ERROR;
 770
 771                if (tx_wqe(qp)->wr_status != SIW_WR_IDLE)
 772                        siw_sq_flush(qp);
 773                break;
 774
 775        default:
 776                break;
 777        }
 778}
 779
 780static int siw_qp_nextstate_from_close(struct siw_qp *qp,
 781                                       struct siw_qp_attrs *attrs)
 782{
 783        int rv = 0;
 784
 785        switch (attrs->state) {
 786        case SIW_QP_STATE_IDLE:
 787                WARN_ON(tx_wqe(qp)->wr_status != SIW_WR_IDLE);
 788                qp->attrs.state = SIW_QP_STATE_IDLE;
 789                break;
 790
 791        case SIW_QP_STATE_CLOSING:
 792                /*
 793                 * The LLP may already moved the QP to closing
 794                 * due to graceful peer close init
 795                 */
 796                break;
 797
 798        case SIW_QP_STATE_ERROR:
 799                /*
 800                 * QP was moved to CLOSING by LLP event
 801                 * not yet seen by user.
 802                 */
 803                qp->attrs.state = SIW_QP_STATE_ERROR;
 804
 805                if (tx_wqe(qp)->wr_status != SIW_WR_IDLE)
 806                        siw_sq_flush(qp);
 807
 808                siw_rq_flush(qp);
 809                break;
 810
 811        default:
 812                siw_dbg_qp(qp, "state transition undefined: %s => %s\n",
 813                           siw_qp_state_to_string[qp->attrs.state],
 814                           siw_qp_state_to_string[attrs->state]);
 815
 816                rv = -ECONNABORTED;
 817        }
 818        return rv;
 819}
 820
 821/*
 822 * Caller must hold qp->state_lock
 823 */
 824int siw_qp_modify(struct siw_qp *qp, struct siw_qp_attrs *attrs,
 825                  enum siw_qp_attr_mask mask)
 826{
 827        int drop_conn = 0, rv = 0;
 828
 829        if (!mask)
 830                return 0;
 831
 832        siw_dbg_qp(qp, "state: %s => %s\n",
 833                   siw_qp_state_to_string[qp->attrs.state],
 834                   siw_qp_state_to_string[attrs->state]);
 835
 836        if (mask != SIW_QP_ATTR_STATE)
 837                siw_qp_modify_nonstate(qp, attrs, mask);
 838
 839        if (!(mask & SIW_QP_ATTR_STATE))
 840                return 0;
 841
 842        switch (qp->attrs.state) {
 843        case SIW_QP_STATE_IDLE:
 844        case SIW_QP_STATE_RTR:
 845                rv = siw_qp_nextstate_from_idle(qp, attrs, mask);
 846                break;
 847
 848        case SIW_QP_STATE_RTS:
 849                drop_conn = siw_qp_nextstate_from_rts(qp, attrs);
 850                break;
 851
 852        case SIW_QP_STATE_TERMINATE:
 853                siw_qp_nextstate_from_term(qp, attrs);
 854                break;
 855
 856        case SIW_QP_STATE_CLOSING:
 857                siw_qp_nextstate_from_close(qp, attrs);
 858                break;
 859        default:
 860                break;
 861        }
 862        if (drop_conn)
 863                siw_qp_cm_drop(qp, 0);
 864
 865        return rv;
 866}
 867
 868void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe)
 869{
 870        rreq->id = sqe->id;
 871        rreq->opcode = sqe->opcode;
 872        rreq->sge[0].laddr = sqe->sge[0].laddr;
 873        rreq->sge[0].length = sqe->sge[0].length;
 874        rreq->sge[0].lkey = sqe->sge[0].lkey;
 875        rreq->sge[1].lkey = sqe->sge[1].lkey;
 876        rreq->flags = sqe->flags | SIW_WQE_VALID;
 877        rreq->num_sge = 1;
 878}
 879
 880/*
 881 * Must be called with SQ locked.
 882 * To avoid complete SQ starvation by constant inbound READ requests,
 883 * the active IRQ will not be served after qp->irq_burst, if the
 884 * SQ has pending work.
 885 */
 886int siw_activate_tx(struct siw_qp *qp)
 887{
 888        struct siw_sqe *irqe, *sqe;
 889        struct siw_wqe *wqe = tx_wqe(qp);
 890        int rv = 1;
 891
 892        irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size];
 893
 894        if (irqe->flags & SIW_WQE_VALID) {
 895                sqe = sq_get_next(qp);
 896
 897                /*
 898                 * Avoid local WQE processing starvation in case
 899                 * of constant inbound READ request stream
 900                 */
 901                if (sqe && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
 902                        qp->irq_burst = 0;
 903                        goto skip_irq;
 904                }
 905                memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
 906                wqe->wr_status = SIW_WR_QUEUED;
 907
 908                /* start READ RESPONSE */
 909                wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
 910                wqe->sqe.flags = 0;
 911                if (irqe->num_sge) {
 912                        wqe->sqe.num_sge = 1;
 913                        wqe->sqe.sge[0].length = irqe->sge[0].length;
 914                        wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
 915                        wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
 916                } else {
 917                        wqe->sqe.num_sge = 0;
 918                }
 919
 920                /* Retain original RREQ's message sequence number for
 921                 * potential error reporting cases.
 922                 */
 923                wqe->sqe.sge[1].length = irqe->sge[1].length;
 924
 925                wqe->sqe.rkey = irqe->rkey;
 926                wqe->sqe.raddr = irqe->raddr;
 927
 928                wqe->processed = 0;
 929                qp->irq_get++;
 930
 931                /* mark current IRQ entry free */
 932                smp_store_mb(irqe->flags, 0);
 933
 934                goto out;
 935        }
 936        sqe = sq_get_next(qp);
 937        if (sqe) {
 938skip_irq:
 939                memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
 940                wqe->wr_status = SIW_WR_QUEUED;
 941
 942                /* First copy SQE to kernel private memory */
 943                memcpy(&wqe->sqe, sqe, sizeof(*sqe));
 944
 945                if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
 946                        rv = -EINVAL;
 947                        goto out;
 948                }
 949                if (wqe->sqe.flags & SIW_WQE_INLINE) {
 950                        if (wqe->sqe.opcode != SIW_OP_SEND &&
 951                            wqe->sqe.opcode != SIW_OP_WRITE) {
 952                                rv = -EINVAL;
 953                                goto out;
 954                        }
 955                        if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
 956                                rv = -EINVAL;
 957                                goto out;
 958                        }
 959                        wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1];
 960                        wqe->sqe.sge[0].lkey = 0;
 961                        wqe->sqe.num_sge = 1;
 962                }
 963                if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
 964                        /* A READ cannot be fenced */
 965                        if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
 966                                     wqe->sqe.opcode ==
 967                                             SIW_OP_READ_LOCAL_INV)) {
 968                                siw_dbg_qp(qp, "cannot fence read\n");
 969                                rv = -EINVAL;
 970                                goto out;
 971                        }
 972                        spin_lock(&qp->orq_lock);
 973
 974                        if (!siw_orq_empty(qp)) {
 975                                qp->tx_ctx.orq_fence = 1;
 976                                rv = 0;
 977                        }
 978                        spin_unlock(&qp->orq_lock);
 979
 980                } else if (wqe->sqe.opcode == SIW_OP_READ ||
 981                           wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
 982                        struct siw_sqe *rreq;
 983
 984                        wqe->sqe.num_sge = 1;
 985
 986                        spin_lock(&qp->orq_lock);
 987
 988                        rreq = orq_get_free(qp);
 989                        if (rreq) {
 990                                /*
 991                                 * Make an immediate copy in ORQ to be ready
 992                                 * to process loopback READ reply
 993                                 */
 994                                siw_read_to_orq(rreq, &wqe->sqe);
 995                                qp->orq_put++;
 996                        } else {
 997                                qp->tx_ctx.orq_fence = 1;
 998                                rv = 0;
 999                        }
1000                        spin_unlock(&qp->orq_lock);
1001                }
1002
1003                /* Clear SQE, can be re-used by application */
1004                smp_store_mb(sqe->flags, 0);
1005                qp->sq_get++;
1006        } else {
1007                rv = 0;
1008        }
1009out:
1010        if (unlikely(rv < 0)) {
1011                siw_dbg_qp(qp, "error %d\n", rv);
1012                wqe->wr_status = SIW_WR_IDLE;
1013        }
1014        return rv;
1015}
1016
1017/*
1018 * Check if current CQ state qualifies for calling CQ completion
1019 * handler. Must be called with CQ lock held.
1020 */
1021static bool siw_cq_notify_now(struct siw_cq *cq, u32 flags)
1022{
1023        u32 cq_notify;
1024
1025        if (!cq->base_cq.comp_handler)
1026                return false;
1027
1028        /* Read application shared notification state */
1029        cq_notify = READ_ONCE(cq->notify->flags);
1030
1031        if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) ||
1032            ((cq_notify & SIW_NOTIFY_SOLICITED) &&
1033             (flags & SIW_WQE_SOLICITED))) {
1034                /*
1035                 * CQ notification is one-shot: Since the
1036                 * current CQE causes user notification,
1037                 * the CQ gets dis-aremd and must be re-aremd
1038                 * by the user for a new notification.
1039                 */
1040                WRITE_ONCE(cq->notify->flags, SIW_NOTIFY_NOT);
1041
1042                return true;
1043        }
1044        return false;
1045}
1046
1047int siw_sqe_complete(struct siw_qp *qp, struct siw_sqe *sqe, u32 bytes,
1048                     enum siw_wc_status status)
1049{
1050        struct siw_cq *cq = qp->scq;
1051        int rv = 0;
1052
1053        if (cq) {
1054                u32 sqe_flags = sqe->flags;
1055                struct siw_cqe *cqe;
1056                u32 idx;
1057                unsigned long flags;
1058
1059                spin_lock_irqsave(&cq->lock, flags);
1060
1061                idx = cq->cq_put % cq->num_cqe;
1062                cqe = &cq->queue[idx];
1063
1064                if (!READ_ONCE(cqe->flags)) {
1065                        bool notify;
1066
1067                        cqe->id = sqe->id;
1068                        cqe->opcode = sqe->opcode;
1069                        cqe->status = status;
1070                        cqe->imm_data = 0;
1071                        cqe->bytes = bytes;
1072
1073                        if (rdma_is_kernel_res(&cq->base_cq.res))
1074                                cqe->base_qp = &qp->base_qp;
1075                        else
1076                                cqe->qp_id = qp_id(qp);
1077
1078                        /* mark CQE valid for application */
1079                        WRITE_ONCE(cqe->flags, SIW_WQE_VALID);
1080                        /* recycle SQE */
1081                        smp_store_mb(sqe->flags, 0);
1082
1083                        cq->cq_put++;
1084                        notify = siw_cq_notify_now(cq, sqe_flags);
1085
1086                        spin_unlock_irqrestore(&cq->lock, flags);
1087
1088                        if (notify) {
1089                                siw_dbg_cq(cq, "Call completion handler\n");
1090                                cq->base_cq.comp_handler(&cq->base_cq,
1091                                                cq->base_cq.cq_context);
1092                        }
1093                } else {
1094                        spin_unlock_irqrestore(&cq->lock, flags);
1095                        rv = -ENOMEM;
1096                        siw_cq_event(cq, IB_EVENT_CQ_ERR);
1097                }
1098        } else {
1099                /* recycle SQE */
1100                smp_store_mb(sqe->flags, 0);
1101        }
1102        return rv;
1103}
1104
1105int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes,
1106                     u32 inval_stag, enum siw_wc_status status)
1107{
1108        struct siw_cq *cq = qp->rcq;
1109        int rv = 0;
1110
1111        if (cq) {
1112                struct siw_cqe *cqe;
1113                u32 idx;
1114                unsigned long flags;
1115
1116                spin_lock_irqsave(&cq->lock, flags);
1117
1118                idx = cq->cq_put % cq->num_cqe;
1119                cqe = &cq->queue[idx];
1120
1121                if (!READ_ONCE(cqe->flags)) {
1122                        bool notify;
1123                        u8 cqe_flags = SIW_WQE_VALID;
1124
1125                        cqe->id = rqe->id;
1126                        cqe->opcode = SIW_OP_RECEIVE;
1127                        cqe->status = status;
1128                        cqe->imm_data = 0;
1129                        cqe->bytes = bytes;
1130
1131                        if (rdma_is_kernel_res(&cq->base_cq.res)) {
1132                                cqe->base_qp = &qp->base_qp;
1133                                if (inval_stag) {
1134                                        cqe_flags |= SIW_WQE_REM_INVAL;
1135                                        cqe->inval_stag = inval_stag;
1136                                }
1137                        } else {
1138                                cqe->qp_id = qp_id(qp);
1139                        }
1140                        /* mark CQE valid for application */
1141                        WRITE_ONCE(cqe->flags, cqe_flags);
1142                        /* recycle RQE */
1143                        smp_store_mb(rqe->flags, 0);
1144
1145                        cq->cq_put++;
1146                        notify = siw_cq_notify_now(cq, SIW_WQE_SIGNALLED);
1147
1148                        spin_unlock_irqrestore(&cq->lock, flags);
1149
1150                        if (notify) {
1151                                siw_dbg_cq(cq, "Call completion handler\n");
1152                                cq->base_cq.comp_handler(&cq->base_cq,
1153                                                cq->base_cq.cq_context);
1154                        }
1155                } else {
1156                        spin_unlock_irqrestore(&cq->lock, flags);
1157                        rv = -ENOMEM;
1158                        siw_cq_event(cq, IB_EVENT_CQ_ERR);
1159                }
1160        } else {
1161                /* recycle RQE */
1162                smp_store_mb(rqe->flags, 0);
1163        }
1164        return rv;
1165}
1166
1167/*
1168 * siw_sq_flush()
1169 *
1170 * Flush SQ and ORRQ entries to CQ.
1171 *
1172 * Must be called with QP state write lock held.
1173 * Therefore, SQ and ORQ lock must not be taken.
1174 */
1175void siw_sq_flush(struct siw_qp *qp)
1176{
1177        struct siw_sqe *sqe;
1178        struct siw_wqe *wqe = tx_wqe(qp);
1179        int async_event = 0;
1180
1181        /*
1182         * Start with completing any work currently on the ORQ
1183         */
1184        while (qp->attrs.orq_size) {
1185                sqe = &qp->orq[qp->orq_get % qp->attrs.orq_size];
1186                if (!READ_ONCE(sqe->flags))
1187                        break;
1188
1189                if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1190                        break;
1191
1192                WRITE_ONCE(sqe->flags, 0);
1193                qp->orq_get++;
1194        }
1195        /*
1196         * Flush an in-progress WQE if present
1197         */
1198        if (wqe->wr_status != SIW_WR_IDLE) {
1199                siw_dbg_qp(qp, "flush current SQE, type %d, status %d\n",
1200                           tx_type(wqe), wqe->wr_status);
1201
1202                siw_wqe_put_mem(wqe, tx_type(wqe));
1203
1204                if (tx_type(wqe) != SIW_OP_READ_RESPONSE &&
1205                    ((tx_type(wqe) != SIW_OP_READ &&
1206                      tx_type(wqe) != SIW_OP_READ_LOCAL_INV) ||
1207                     wqe->wr_status == SIW_WR_QUEUED))
1208                        /*
1209                         * An in-progress Read Request is already in
1210                         * the ORQ
1211                         */
1212                        siw_sqe_complete(qp, &wqe->sqe, wqe->bytes,
1213                                         SIW_WC_WR_FLUSH_ERR);
1214
1215                wqe->wr_status = SIW_WR_IDLE;
1216        }
1217        /*
1218         * Flush the Send Queue
1219         */
1220        while (qp->attrs.sq_size) {
1221                sqe = &qp->sendq[qp->sq_get % qp->attrs.sq_size];
1222                if (!READ_ONCE(sqe->flags))
1223                        break;
1224
1225                async_event = 1;
1226                if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1227                        /*
1228                         * Shall IB_EVENT_SQ_DRAINED be supressed if work
1229                         * completion fails?
1230                         */
1231                        break;
1232
1233                WRITE_ONCE(sqe->flags, 0);
1234                qp->sq_get++;
1235        }
1236        if (async_event)
1237                siw_qp_event(qp, IB_EVENT_SQ_DRAINED);
1238}
1239
1240/*
1241 * siw_rq_flush()
1242 *
1243 * Flush recv queue entries to CQ. Also
1244 * takes care of pending active tagged and untagged
1245 * inbound transfers, which have target memory
1246 * referenced.
1247 *
1248 * Must be called with QP state write lock held.
1249 * Therefore, RQ lock must not be taken.
1250 */
1251void siw_rq_flush(struct siw_qp *qp)
1252{
1253        struct siw_wqe *wqe = &qp->rx_untagged.wqe_active;
1254
1255        /*
1256         * Flush an in-progress untagged operation if present
1257         */
1258        if (wqe->wr_status != SIW_WR_IDLE) {
1259                siw_dbg_qp(qp, "flush current rqe, type %d, status %d\n",
1260                           rx_type(wqe), wqe->wr_status);
1261
1262                siw_wqe_put_mem(wqe, rx_type(wqe));
1263
1264                if (rx_type(wqe) == SIW_OP_RECEIVE) {
1265                        siw_rqe_complete(qp, &wqe->rqe, wqe->bytes,
1266                                         0, SIW_WC_WR_FLUSH_ERR);
1267                } else if (rx_type(wqe) != SIW_OP_READ &&
1268                           rx_type(wqe) != SIW_OP_READ_RESPONSE &&
1269                           rx_type(wqe) != SIW_OP_WRITE) {
1270                        siw_sqe_complete(qp, &wqe->sqe, 0, SIW_WC_WR_FLUSH_ERR);
1271                }
1272                wqe->wr_status = SIW_WR_IDLE;
1273        }
1274        wqe = &qp->rx_tagged.wqe_active;
1275
1276        if (wqe->wr_status != SIW_WR_IDLE) {
1277                siw_wqe_put_mem(wqe, rx_type(wqe));
1278                wqe->wr_status = SIW_WR_IDLE;
1279        }
1280        /*
1281         * Flush the Receive Queue
1282         */
1283        while (qp->attrs.rq_size) {
1284                struct siw_rqe *rqe =
1285                        &qp->recvq[qp->rq_get % qp->attrs.rq_size];
1286
1287                if (!READ_ONCE(rqe->flags))
1288                        break;
1289
1290                if (siw_rqe_complete(qp, rqe, 0, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1291                        break;
1292
1293                WRITE_ONCE(rqe->flags, 0);
1294                qp->rq_get++;
1295        }
1296}
1297
1298int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp)
1299{
1300        int rv = xa_alloc(&sdev->qp_xa, &qp->base_qp.qp_num, qp, xa_limit_32b,
1301                          GFP_KERNEL);
1302
1303        if (!rv) {
1304                kref_init(&qp->ref);
1305                qp->sdev = sdev;
1306                siw_dbg_qp(qp, "new QP\n");
1307        }
1308        return rv;
1309}
1310
1311void siw_free_qp(struct kref *ref)
1312{
1313        struct siw_qp *found, *qp = container_of(ref, struct siw_qp, ref);
1314        struct siw_device *sdev = qp->sdev;
1315        unsigned long flags;
1316
1317        if (qp->cep)
1318                siw_cep_put(qp->cep);
1319
1320        found = xa_erase(&sdev->qp_xa, qp_id(qp));
1321        WARN_ON(found != qp);
1322        spin_lock_irqsave(&sdev->lock, flags);
1323        list_del(&qp->devq);
1324        spin_unlock_irqrestore(&sdev->lock, flags);
1325
1326        vfree(qp->sendq);
1327        vfree(qp->recvq);
1328        vfree(qp->irq);
1329        vfree(qp->orq);
1330
1331        siw_put_tx_cpu(qp->tx_cpu);
1332
1333        atomic_dec(&sdev->num_qp);
1334        siw_dbg_qp(qp, "free QP\n");
1335        kfree_rcu(qp, rcu);
1336}
1337