linux/drivers/infiniband/hw/hfi1/uc.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
   2/*
   3 * Copyright(c) 2015 - 2018 Intel Corporation.
   4 */
   5
   6#include "hfi.h"
   7#include "verbs_txreq.h"
   8#include "qp.h"
   9
  10/* cut down ridiculously long IB macro names */
  11#define OP(x) UC_OP(x)
  12
  13/**
  14 * hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
  15 * @qp: a pointer to the QP
  16 * @ps: the current packet state
  17 *
  18 * Assume s_lock is held.
  19 *
  20 * Return 1 if constructed; otherwise, return 0.
  21 */
  22int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
  23{
  24        struct hfi1_qp_priv *priv = qp->priv;
  25        struct ib_other_headers *ohdr;
  26        struct rvt_swqe *wqe;
  27        u32 hwords;
  28        u32 bth0 = 0;
  29        u32 len;
  30        u32 pmtu = qp->pmtu;
  31        int middle = 0;
  32
  33        ps->s_txreq = get_txreq(ps->dev, qp);
  34        if (!ps->s_txreq)
  35                goto bail_no_tx;
  36
  37        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
  38                if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
  39                        goto bail;
  40                /* We are in the error state, flush the work request. */
  41                if (qp->s_last == READ_ONCE(qp->s_head))
  42                        goto bail;
  43                /* If DMAs are in progress, we can't flush immediately. */
  44                if (iowait_sdma_pending(&priv->s_iowait)) {
  45                        qp->s_flags |= RVT_S_WAIT_DMA;
  46                        goto bail;
  47                }
  48                clear_ahg(qp);
  49                wqe = rvt_get_swqe_ptr(qp, qp->s_last);
  50                rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
  51                goto done_free_tx;
  52        }
  53
  54        if (priv->hdr_type == HFI1_PKT_TYPE_9B) {
  55                /* header size in 32-bit words LRH+BTH = (8+12)/4. */
  56                hwords = 5;
  57                if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
  58                        ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth;
  59                else
  60                        ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
  61        } else {
  62                /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
  63                hwords = 7;
  64                if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
  65                    (hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))))
  66                        ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth;
  67                else
  68                        ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth;
  69        }
  70
  71        /* Get the next send request. */
  72        wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
  73        qp->s_wqe = NULL;
  74        switch (qp->s_state) {
  75        default:
  76                if (!(ib_rvt_state_ops[qp->state] &
  77                    RVT_PROCESS_NEXT_SEND_OK))
  78                        goto bail;
  79                /* Check if send work queue is empty. */
  80                if (qp->s_cur == READ_ONCE(qp->s_head)) {
  81                        clear_ahg(qp);
  82                        goto bail;
  83                }
  84                /*
  85                 * Local operations are processed immediately
  86                 * after all prior requests have completed.
  87                 */
  88                if (wqe->wr.opcode == IB_WR_REG_MR ||
  89                    wqe->wr.opcode == IB_WR_LOCAL_INV) {
  90                        int local_ops = 0;
  91                        int err = 0;
  92
  93                        if (qp->s_last != qp->s_cur)
  94                                goto bail;
  95                        if (++qp->s_cur == qp->s_size)
  96                                qp->s_cur = 0;
  97                        if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
  98                                err = rvt_invalidate_rkey(
  99                                        qp, wqe->wr.ex.invalidate_rkey);
 100                                local_ops = 1;
 101                        }
 102                        rvt_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
 103                                                        : IB_WC_SUCCESS);
 104                        if (local_ops)
 105                                atomic_dec(&qp->local_ops_pending);
 106                        goto done_free_tx;
 107                }
 108                /*
 109                 * Start a new request.
 110                 */
 111                qp->s_psn = wqe->psn;
 112                qp->s_sge.sge = wqe->sg_list[0];
 113                qp->s_sge.sg_list = wqe->sg_list + 1;
 114                qp->s_sge.num_sge = wqe->wr.num_sge;
 115                qp->s_sge.total_len = wqe->length;
 116                len = wqe->length;
 117                qp->s_len = len;
 118                switch (wqe->wr.opcode) {
 119                case IB_WR_SEND:
 120                case IB_WR_SEND_WITH_IMM:
 121                        if (len > pmtu) {
 122                                qp->s_state = OP(SEND_FIRST);
 123                                len = pmtu;
 124                                break;
 125                        }
 126                        if (wqe->wr.opcode == IB_WR_SEND) {
 127                                qp->s_state = OP(SEND_ONLY);
 128                        } else {
 129                                qp->s_state =
 130                                        OP(SEND_ONLY_WITH_IMMEDIATE);
 131                                /* Immediate data comes after the BTH */
 132                                ohdr->u.imm_data = wqe->wr.ex.imm_data;
 133                                hwords += 1;
 134                        }
 135                        if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 136                                bth0 |= IB_BTH_SOLICITED;
 137                        qp->s_wqe = wqe;
 138                        if (++qp->s_cur >= qp->s_size)
 139                                qp->s_cur = 0;
 140                        break;
 141
 142                case IB_WR_RDMA_WRITE:
 143                case IB_WR_RDMA_WRITE_WITH_IMM:
 144                        ohdr->u.rc.reth.vaddr =
 145                                cpu_to_be64(wqe->rdma_wr.remote_addr);
 146                        ohdr->u.rc.reth.rkey =
 147                                cpu_to_be32(wqe->rdma_wr.rkey);
 148                        ohdr->u.rc.reth.length = cpu_to_be32(len);
 149                        hwords += sizeof(struct ib_reth) / 4;
 150                        if (len > pmtu) {
 151                                qp->s_state = OP(RDMA_WRITE_FIRST);
 152                                len = pmtu;
 153                                break;
 154                        }
 155                        if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
 156                                qp->s_state = OP(RDMA_WRITE_ONLY);
 157                        } else {
 158                                qp->s_state =
 159                                        OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
 160                                /* Immediate data comes after the RETH */
 161                                ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
 162                                hwords += 1;
 163                                if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 164                                        bth0 |= IB_BTH_SOLICITED;
 165                        }
 166                        qp->s_wqe = wqe;
 167                        if (++qp->s_cur >= qp->s_size)
 168                                qp->s_cur = 0;
 169                        break;
 170
 171                default:
 172                        goto bail;
 173                }
 174                break;
 175
 176        case OP(SEND_FIRST):
 177                qp->s_state = OP(SEND_MIDDLE);
 178                fallthrough;
 179        case OP(SEND_MIDDLE):
 180                len = qp->s_len;
 181                if (len > pmtu) {
 182                        len = pmtu;
 183                        middle = HFI1_CAP_IS_KSET(SDMA_AHG);
 184                        break;
 185                }
 186                if (wqe->wr.opcode == IB_WR_SEND) {
 187                        qp->s_state = OP(SEND_LAST);
 188                } else {
 189                        qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
 190                        /* Immediate data comes after the BTH */
 191                        ohdr->u.imm_data = wqe->wr.ex.imm_data;
 192                        hwords += 1;
 193                }
 194                if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 195                        bth0 |= IB_BTH_SOLICITED;
 196                qp->s_wqe = wqe;
 197                if (++qp->s_cur >= qp->s_size)
 198                        qp->s_cur = 0;
 199                break;
 200
 201        case OP(RDMA_WRITE_FIRST):
 202                qp->s_state = OP(RDMA_WRITE_MIDDLE);
 203                fallthrough;
 204        case OP(RDMA_WRITE_MIDDLE):
 205                len = qp->s_len;
 206                if (len > pmtu) {
 207                        len = pmtu;
 208                        middle = HFI1_CAP_IS_KSET(SDMA_AHG);
 209                        break;
 210                }
 211                if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
 212                        qp->s_state = OP(RDMA_WRITE_LAST);
 213                } else {
 214                        qp->s_state =
 215                                OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
 216                        /* Immediate data comes after the BTH */
 217                        ohdr->u.imm_data = wqe->wr.ex.imm_data;
 218                        hwords += 1;
 219                        if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 220                                bth0 |= IB_BTH_SOLICITED;
 221                }
 222                qp->s_wqe = wqe;
 223                if (++qp->s_cur >= qp->s_size)
 224                        qp->s_cur = 0;
 225                break;
 226        }
 227        qp->s_len -= len;
 228        ps->s_txreq->hdr_dwords = hwords;
 229        ps->s_txreq->sde = priv->s_sde;
 230        ps->s_txreq->ss = &qp->s_sge;
 231        ps->s_txreq->s_cur_size = len;
 232        hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
 233                             qp->remote_qpn, mask_psn(qp->s_psn++),
 234                             middle, ps);
 235        return 1;
 236
 237done_free_tx:
 238        hfi1_put_txreq(ps->s_txreq);
 239        ps->s_txreq = NULL;
 240        return 1;
 241
 242bail:
 243        hfi1_put_txreq(ps->s_txreq);
 244
 245bail_no_tx:
 246        ps->s_txreq = NULL;
 247        qp->s_flags &= ~RVT_S_BUSY;
 248        return 0;
 249}
 250
 251/**
 252 * hfi1_uc_rcv - handle an incoming UC packet
 253 * @packet: the packet structure
 254 *
 255 * This is called from qp_rcv() to process an incoming UC packet
 256 * for the given QP.
 257 * Called at interrupt level.
 258 */
 259void hfi1_uc_rcv(struct hfi1_packet *packet)
 260{
 261        struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
 262        void *data = packet->payload;
 263        u32 tlen = packet->tlen;
 264        struct rvt_qp *qp = packet->qp;
 265        struct ib_other_headers *ohdr = packet->ohdr;
 266        u32 opcode = packet->opcode;
 267        u32 hdrsize = packet->hlen;
 268        u32 psn;
 269        u32 pad = packet->pad;
 270        struct ib_wc wc;
 271        u32 pmtu = qp->pmtu;
 272        struct ib_reth *reth;
 273        int ret;
 274        u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
 275
 276        if (hfi1_ruc_check_hdr(ibp, packet))
 277                return;
 278
 279        process_ecn(qp, packet);
 280
 281        psn = ib_bth_get_psn(ohdr);
 282        /* Compare the PSN verses the expected PSN. */
 283        if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) {
 284                /*
 285                 * Handle a sequence error.
 286                 * Silently drop any current message.
 287                 */
 288                qp->r_psn = psn;
 289inv:
 290                if (qp->r_state == OP(SEND_FIRST) ||
 291                    qp->r_state == OP(SEND_MIDDLE)) {
 292                        set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
 293                        qp->r_sge.num_sge = 0;
 294                } else {
 295                        rvt_put_ss(&qp->r_sge);
 296                }
 297                qp->r_state = OP(SEND_LAST);
 298                switch (opcode) {
 299                case OP(SEND_FIRST):
 300                case OP(SEND_ONLY):
 301                case OP(SEND_ONLY_WITH_IMMEDIATE):
 302                        goto send_first;
 303
 304                case OP(RDMA_WRITE_FIRST):
 305                case OP(RDMA_WRITE_ONLY):
 306                case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
 307                        goto rdma_first;
 308
 309                default:
 310                        goto drop;
 311                }
 312        }
 313
 314        /* Check for opcode sequence errors. */
 315        switch (qp->r_state) {
 316        case OP(SEND_FIRST):
 317        case OP(SEND_MIDDLE):
 318                if (opcode == OP(SEND_MIDDLE) ||
 319                    opcode == OP(SEND_LAST) ||
 320                    opcode == OP(SEND_LAST_WITH_IMMEDIATE))
 321                        break;
 322                goto inv;
 323
 324        case OP(RDMA_WRITE_FIRST):
 325        case OP(RDMA_WRITE_MIDDLE):
 326                if (opcode == OP(RDMA_WRITE_MIDDLE) ||
 327                    opcode == OP(RDMA_WRITE_LAST) ||
 328                    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
 329                        break;
 330                goto inv;
 331
 332        default:
 333                if (opcode == OP(SEND_FIRST) ||
 334                    opcode == OP(SEND_ONLY) ||
 335                    opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
 336                    opcode == OP(RDMA_WRITE_FIRST) ||
 337                    opcode == OP(RDMA_WRITE_ONLY) ||
 338                    opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
 339                        break;
 340                goto inv;
 341        }
 342
 343        if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
 344                rvt_comm_est(qp);
 345
 346        /* OK, process the packet. */
 347        switch (opcode) {
 348        case OP(SEND_FIRST):
 349        case OP(SEND_ONLY):
 350        case OP(SEND_ONLY_WITH_IMMEDIATE):
 351send_first:
 352                if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
 353                        qp->r_sge = qp->s_rdma_read_sge;
 354                } else {
 355                        ret = rvt_get_rwqe(qp, false);
 356                        if (ret < 0)
 357                                goto op_err;
 358                        if (!ret)
 359                                goto drop;
 360                        /*
 361                         * qp->s_rdma_read_sge will be the owner
 362                         * of the mr references.
 363                         */
 364                        qp->s_rdma_read_sge = qp->r_sge;
 365                }
 366                qp->r_rcv_len = 0;
 367                if (opcode == OP(SEND_ONLY))
 368                        goto no_immediate_data;
 369                else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
 370                        goto send_last_imm;
 371                fallthrough;
 372        case OP(SEND_MIDDLE):
 373                /* Check for invalid length PMTU or posted rwqe len. */
 374                /*
 375                 * There will be no padding for 9B packet but 16B packets
 376                 * will come in with some padding since we always add
 377                 * CRC and LT bytes which will need to be flit aligned
 378                 */
 379                if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
 380                        goto rewind;
 381                qp->r_rcv_len += pmtu;
 382                if (unlikely(qp->r_rcv_len > qp->r_len))
 383                        goto rewind;
 384                rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false);
 385                break;
 386
 387        case OP(SEND_LAST_WITH_IMMEDIATE):
 388send_last_imm:
 389                wc.ex.imm_data = ohdr->u.imm_data;
 390                wc.wc_flags = IB_WC_WITH_IMM;
 391                goto send_last;
 392        case OP(SEND_LAST):
 393no_immediate_data:
 394                wc.ex.imm_data = 0;
 395                wc.wc_flags = 0;
 396send_last:
 397                /* Check for invalid length. */
 398                /* LAST len should be >= 1 */
 399                if (unlikely(tlen < (hdrsize + extra_bytes)))
 400                        goto rewind;
 401                /* Don't count the CRC. */
 402                tlen -= (hdrsize + extra_bytes);
 403                wc.byte_len = tlen + qp->r_rcv_len;
 404                if (unlikely(wc.byte_len > qp->r_len))
 405                        goto rewind;
 406                wc.opcode = IB_WC_RECV;
 407                rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false);
 408                rvt_put_ss(&qp->s_rdma_read_sge);
 409last_imm:
 410                wc.wr_id = qp->r_wr_id;
 411                wc.status = IB_WC_SUCCESS;
 412                wc.qp = &qp->ibqp;
 413                wc.src_qp = qp->remote_qpn;
 414                wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
 415                /*
 416                 * It seems that IB mandates the presence of an SL in a
 417                 * work completion only for the UD transport (see section
 418                 * 11.4.2 of IBTA Vol. 1).
 419                 *
 420                 * However, the way the SL is chosen below is consistent
 421                 * with the way that IB/qib works and is trying avoid
 422                 * introducing incompatibilities.
 423                 *
 424                 * See also OPA Vol. 1, section 9.7.6, and table 9-17.
 425                 */
 426                wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
 427                /* zero fields that are N/A */
 428                wc.vendor_err = 0;
 429                wc.pkey_index = 0;
 430                wc.dlid_path_bits = 0;
 431                wc.port_num = 0;
 432                /* Signal completion event if the solicited bit is set. */
 433                rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr));
 434                break;
 435
 436        case OP(RDMA_WRITE_FIRST):
 437        case OP(RDMA_WRITE_ONLY):
 438        case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
 439rdma_first:
 440                if (unlikely(!(qp->qp_access_flags &
 441                               IB_ACCESS_REMOTE_WRITE))) {
 442                        goto drop;
 443                }
 444                reth = &ohdr->u.rc.reth;
 445                qp->r_len = be32_to_cpu(reth->length);
 446                qp->r_rcv_len = 0;
 447                qp->r_sge.sg_list = NULL;
 448                if (qp->r_len != 0) {
 449                        u32 rkey = be32_to_cpu(reth->rkey);
 450                        u64 vaddr = be64_to_cpu(reth->vaddr);
 451                        int ok;
 452
 453                        /* Check rkey */
 454                        ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
 455                                         vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
 456                        if (unlikely(!ok))
 457                                goto drop;
 458                        qp->r_sge.num_sge = 1;
 459                } else {
 460                        qp->r_sge.num_sge = 0;
 461                        qp->r_sge.sge.mr = NULL;
 462                        qp->r_sge.sge.vaddr = NULL;
 463                        qp->r_sge.sge.length = 0;
 464                        qp->r_sge.sge.sge_length = 0;
 465                }
 466                if (opcode == OP(RDMA_WRITE_ONLY)) {
 467                        goto rdma_last;
 468                } else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) {
 469                        wc.ex.imm_data = ohdr->u.rc.imm_data;
 470                        goto rdma_last_imm;
 471                }
 472                fallthrough;
 473        case OP(RDMA_WRITE_MIDDLE):
 474                /* Check for invalid length PMTU or posted rwqe len. */
 475                if (unlikely(tlen != (hdrsize + pmtu + 4)))
 476                        goto drop;
 477                qp->r_rcv_len += pmtu;
 478                if (unlikely(qp->r_rcv_len > qp->r_len))
 479                        goto drop;
 480                rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
 481                break;
 482
 483        case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
 484                wc.ex.imm_data = ohdr->u.imm_data;
 485rdma_last_imm:
 486                wc.wc_flags = IB_WC_WITH_IMM;
 487
 488                /* Check for invalid length. */
 489                /* LAST len should be >= 1 */
 490                if (unlikely(tlen < (hdrsize + pad + 4)))
 491                        goto drop;
 492                /* Don't count the CRC. */
 493                tlen -= (hdrsize + extra_bytes);
 494                if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 495                        goto drop;
 496                if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
 497                        rvt_put_ss(&qp->s_rdma_read_sge);
 498                } else {
 499                        ret = rvt_get_rwqe(qp, true);
 500                        if (ret < 0)
 501                                goto op_err;
 502                        if (!ret)
 503                                goto drop;
 504                }
 505                wc.byte_len = qp->r_len;
 506                wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
 507                rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
 508                rvt_put_ss(&qp->r_sge);
 509                goto last_imm;
 510
 511        case OP(RDMA_WRITE_LAST):
 512rdma_last:
 513                /* Check for invalid length. */
 514                /* LAST len should be >= 1 */
 515                if (unlikely(tlen < (hdrsize + pad + 4)))
 516                        goto drop;
 517                /* Don't count the CRC. */
 518                tlen -= (hdrsize + extra_bytes);
 519                if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 520                        goto drop;
 521                rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
 522                rvt_put_ss(&qp->r_sge);
 523                break;
 524
 525        default:
 526                /* Drop packet for unknown opcodes. */
 527                goto drop;
 528        }
 529        qp->r_psn++;
 530        qp->r_state = opcode;
 531        return;
 532
 533rewind:
 534        set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
 535        qp->r_sge.num_sge = 0;
 536drop:
 537        ibp->rvp.n_pkt_drops++;
 538        return;
 539
 540op_err:
 541        rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
 542}
 543