linux/drivers/infiniband/hw/hfi1/rc.c
<<
>>
Prefs
   1/*
   2 * Copyright(c) 2015, 2016 Intel Corporation.
   3 *
   4 * This file is provided under a dual BSD/GPLv2 license.  When using or
   5 * redistributing this file, you may do so under either license.
   6 *
   7 * GPL LICENSE SUMMARY
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of version 2 of the GNU General Public License as
  11 * published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful, but
  14 * WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * BSD LICENSE
  19 *
  20 * Redistribution and use in source and binary forms, with or without
  21 * modification, are permitted provided that the following conditions
  22 * are met:
  23 *
  24 *  - Redistributions of source code must retain the above copyright
  25 *    notice, this list of conditions and the following disclaimer.
  26 *  - Redistributions in binary form must reproduce the above copyright
  27 *    notice, this list of conditions and the following disclaimer in
  28 *    the documentation and/or other materials provided with the
  29 *    distribution.
  30 *  - Neither the name of Intel Corporation nor the names of its
  31 *    contributors may be used to endorse or promote products derived
  32 *    from this software without specific prior written permission.
  33 *
  34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45 *
  46 */
  47
  48#include <linux/io.h>
  49#include <rdma/rdma_vt.h>
  50#include <rdma/rdmavt_qp.h>
  51
  52#include "hfi.h"
  53#include "qp.h"
  54#include "verbs_txreq.h"
  55#include "trace.h"
  56
  57/* cut down ridiculously long IB macro names */
  58#define OP(x) RC_OP(x)
  59
  60static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
  61                       u32 psn, u32 pmtu)
  62{
  63        u32 len;
  64
  65        len = delta_psn(psn, wqe->psn) * pmtu;
  66        ss->sge = wqe->sg_list[0];
  67        ss->sg_list = wqe->sg_list + 1;
  68        ss->num_sge = wqe->wr.num_sge;
  69        ss->total_len = wqe->length;
  70        rvt_skip_sge(ss, len, false);
  71        return wqe->length - len;
  72}
  73
  74/**
  75 * make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
  76 * @dev: the device for this QP
  77 * @qp: a pointer to the QP
  78 * @ohdr: a pointer to the IB header being constructed
  79 * @ps: the xmit packet state
  80 *
  81 * Return 1 if constructed; otherwise, return 0.
  82 * Note that we are in the responder's side of the QP context.
  83 * Note the QP s_lock must be held.
  84 */
  85static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
  86                       struct ib_other_headers *ohdr,
  87                       struct hfi1_pkt_state *ps)
  88{
  89        struct rvt_ack_entry *e;
  90        u32 hwords;
  91        u32 len;
  92        u32 bth0;
  93        u32 bth2;
  94        int middle = 0;
  95        u32 pmtu = qp->pmtu;
  96        struct hfi1_qp_priv *priv = qp->priv;
  97
  98        lockdep_assert_held(&qp->s_lock);
  99        /* Don't send an ACK if we aren't supposed to. */
 100        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 101                goto bail;
 102
 103        if (priv->hdr_type == HFI1_PKT_TYPE_9B)
 104                /* header size in 32-bit words LRH+BTH = (8+12)/4. */
 105                hwords = 5;
 106        else
 107                /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
 108                hwords = 7;
 109
 110        switch (qp->s_ack_state) {
 111        case OP(RDMA_READ_RESPONSE_LAST):
 112        case OP(RDMA_READ_RESPONSE_ONLY):
 113                e = &qp->s_ack_queue[qp->s_tail_ack_queue];
 114                if (e->rdma_sge.mr) {
 115                        rvt_put_mr(e->rdma_sge.mr);
 116                        e->rdma_sge.mr = NULL;
 117                }
 118                /* FALLTHROUGH */
 119        case OP(ATOMIC_ACKNOWLEDGE):
 120                /*
 121                 * We can increment the tail pointer now that the last
 122                 * response has been sent instead of only being
 123                 * constructed.
 124                 */
 125                if (++qp->s_tail_ack_queue > HFI1_MAX_RDMA_ATOMIC)
 126                        qp->s_tail_ack_queue = 0;
 127                /* FALLTHROUGH */
 128        case OP(SEND_ONLY):
 129        case OP(ACKNOWLEDGE):
 130                /* Check for no next entry in the queue. */
 131                if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
 132                        if (qp->s_flags & RVT_S_ACK_PENDING)
 133                                goto normal;
 134                        goto bail;
 135                }
 136
 137                e = &qp->s_ack_queue[qp->s_tail_ack_queue];
 138                if (e->opcode == OP(RDMA_READ_REQUEST)) {
 139                        /*
 140                         * If a RDMA read response is being resent and
 141                         * we haven't seen the duplicate request yet,
 142                         * then stop sending the remaining responses the
 143                         * responder has seen until the requester re-sends it.
 144                         */
 145                        len = e->rdma_sge.sge_length;
 146                        if (len && !e->rdma_sge.mr) {
 147                                qp->s_tail_ack_queue = qp->r_head_ack_queue;
 148                                goto bail;
 149                        }
 150                        /* Copy SGE state in case we need to resend */
 151                        ps->s_txreq->mr = e->rdma_sge.mr;
 152                        if (ps->s_txreq->mr)
 153                                rvt_get_mr(ps->s_txreq->mr);
 154                        qp->s_ack_rdma_sge.sge = e->rdma_sge;
 155                        qp->s_ack_rdma_sge.num_sge = 1;
 156                        ps->s_txreq->ss = &qp->s_ack_rdma_sge;
 157                        if (len > pmtu) {
 158                                len = pmtu;
 159                                qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
 160                        } else {
 161                                qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
 162                                e->sent = 1;
 163                        }
 164                        ohdr->u.aeth = rvt_compute_aeth(qp);
 165                        hwords++;
 166                        qp->s_ack_rdma_psn = e->psn;
 167                        bth2 = mask_psn(qp->s_ack_rdma_psn++);
 168                } else {
 169                        /* COMPARE_SWAP or FETCH_ADD */
 170                        ps->s_txreq->ss = NULL;
 171                        len = 0;
 172                        qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
 173                        ohdr->u.at.aeth = rvt_compute_aeth(qp);
 174                        ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
 175                        hwords += sizeof(ohdr->u.at) / sizeof(u32);
 176                        bth2 = mask_psn(e->psn);
 177                        e->sent = 1;
 178                }
 179                bth0 = qp->s_ack_state << 24;
 180                break;
 181
 182        case OP(RDMA_READ_RESPONSE_FIRST):
 183                qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
 184                /* FALLTHROUGH */
 185        case OP(RDMA_READ_RESPONSE_MIDDLE):
 186                ps->s_txreq->ss = &qp->s_ack_rdma_sge;
 187                ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
 188                if (ps->s_txreq->mr)
 189                        rvt_get_mr(ps->s_txreq->mr);
 190                len = qp->s_ack_rdma_sge.sge.sge_length;
 191                if (len > pmtu) {
 192                        len = pmtu;
 193                        middle = HFI1_CAP_IS_KSET(SDMA_AHG);
 194                } else {
 195                        ohdr->u.aeth = rvt_compute_aeth(qp);
 196                        hwords++;
 197                        qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
 198                        e = &qp->s_ack_queue[qp->s_tail_ack_queue];
 199                        e->sent = 1;
 200                }
 201                bth0 = qp->s_ack_state << 24;
 202                bth2 = mask_psn(qp->s_ack_rdma_psn++);
 203                break;
 204
 205        default:
 206normal:
 207                /*
 208                 * Send a regular ACK.
 209                 * Set the s_ack_state so we wait until after sending
 210                 * the ACK before setting s_ack_state to ACKNOWLEDGE
 211                 * (see above).
 212                 */
 213                qp->s_ack_state = OP(SEND_ONLY);
 214                qp->s_flags &= ~RVT_S_ACK_PENDING;
 215                ps->s_txreq->ss = NULL;
 216                if (qp->s_nak_state)
 217                        ohdr->u.aeth =
 218                                cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
 219                                            (qp->s_nak_state <<
 220                                             IB_AETH_CREDIT_SHIFT));
 221                else
 222                        ohdr->u.aeth = rvt_compute_aeth(qp);
 223                hwords++;
 224                len = 0;
 225                bth0 = OP(ACKNOWLEDGE) << 24;
 226                bth2 = mask_psn(qp->s_ack_psn);
 227        }
 228        qp->s_rdma_ack_cnt++;
 229        ps->s_txreq->sde = priv->s_sde;
 230        ps->s_txreq->s_cur_size = len;
 231        ps->s_txreq->hdr_dwords = hwords;
 232        hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps);
 233        return 1;
 234
 235bail:
 236        qp->s_ack_state = OP(ACKNOWLEDGE);
 237        /*
 238         * Ensure s_rdma_ack_cnt changes are committed prior to resetting
 239         * RVT_S_RESP_PENDING
 240         */
 241        smp_wmb();
 242        qp->s_flags &= ~(RVT_S_RESP_PENDING
 243                                | RVT_S_ACK_PENDING
 244                                | RVT_S_AHG_VALID);
 245        return 0;
 246}
 247
 248/**
 249 * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
 250 * @qp: a pointer to the QP
 251 *
 252 * Assumes s_lock is held.
 253 *
 254 * Return 1 if constructed; otherwise, return 0.
 255 */
 256int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 257{
 258        struct hfi1_qp_priv *priv = qp->priv;
 259        struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
 260        struct ib_other_headers *ohdr;
 261        struct rvt_sge_state *ss;
 262        struct rvt_swqe *wqe;
 263        u32 hwords;
 264        u32 len;
 265        u32 bth0 = 0;
 266        u32 bth2;
 267        u32 pmtu = qp->pmtu;
 268        char newreq;
 269        int middle = 0;
 270        int delta;
 271
 272        lockdep_assert_held(&qp->s_lock);
 273        ps->s_txreq = get_txreq(ps->dev, qp);
 274        if (IS_ERR(ps->s_txreq))
 275                goto bail_no_tx;
 276
 277        if (priv->hdr_type == HFI1_PKT_TYPE_9B) {
 278                /* header size in 32-bit words LRH+BTH = (8+12)/4. */
 279                hwords = 5;
 280                if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
 281                        ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth;
 282                else
 283                        ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
 284        } else {
 285                /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
 286                hwords = 7;
 287                if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
 288                    (hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))))
 289                        ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth;
 290                else
 291                        ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth;
 292        }
 293
 294        /* Sending responses has higher priority over sending requests. */
 295        if ((qp->s_flags & RVT_S_RESP_PENDING) &&
 296            make_rc_ack(dev, qp, ohdr, ps))
 297                return 1;
 298
 299        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
 300                if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 301                        goto bail;
 302                /* We are in the error state, flush the work request. */
 303                if (qp->s_last == READ_ONCE(qp->s_head))
 304                        goto bail;
 305                /* If DMAs are in progress, we can't flush immediately. */
 306                if (iowait_sdma_pending(&priv->s_iowait)) {
 307                        qp->s_flags |= RVT_S_WAIT_DMA;
 308                        goto bail;
 309                }
 310                clear_ahg(qp);
 311                wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 312                hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
 313                        IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
 314                /* will get called again */
 315                goto done_free_tx;
 316        }
 317
 318        if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
 319                goto bail;
 320
 321        if (cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) {
 322                if (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
 323                        qp->s_flags |= RVT_S_WAIT_PSN;
 324                        goto bail;
 325                }
 326                qp->s_sending_psn = qp->s_psn;
 327                qp->s_sending_hpsn = qp->s_psn - 1;
 328        }
 329
 330        /* Send a request. */
 331        wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 332        switch (qp->s_state) {
 333        default:
 334                if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
 335                        goto bail;
 336                /*
 337                 * Resend an old request or start a new one.
 338                 *
 339                 * We keep track of the current SWQE so that
 340                 * we don't reset the "furthest progress" state
 341                 * if we need to back up.
 342                 */
 343                newreq = 0;
 344                if (qp->s_cur == qp->s_tail) {
 345                        /* Check if send work queue is empty. */
 346                        if (qp->s_tail == READ_ONCE(qp->s_head)) {
 347                                clear_ahg(qp);
 348                                goto bail;
 349                        }
 350                        /*
 351                         * If a fence is requested, wait for previous
 352                         * RDMA read and atomic operations to finish.
 353                         */
 354                        if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
 355                            qp->s_num_rd_atomic) {
 356                                qp->s_flags |= RVT_S_WAIT_FENCE;
 357                                goto bail;
 358                        }
 359                        /*
 360                         * Local operations are processed immediately
 361                         * after all prior requests have completed
 362                         */
 363                        if (wqe->wr.opcode == IB_WR_REG_MR ||
 364                            wqe->wr.opcode == IB_WR_LOCAL_INV) {
 365                                int local_ops = 0;
 366                                int err = 0;
 367
 368                                if (qp->s_last != qp->s_cur)
 369                                        goto bail;
 370                                if (++qp->s_cur == qp->s_size)
 371                                        qp->s_cur = 0;
 372                                if (++qp->s_tail == qp->s_size)
 373                                        qp->s_tail = 0;
 374                                if (!(wqe->wr.send_flags &
 375                                      RVT_SEND_COMPLETION_ONLY)) {
 376                                        err = rvt_invalidate_rkey(
 377                                                qp,
 378                                                wqe->wr.ex.invalidate_rkey);
 379                                        local_ops = 1;
 380                                }
 381                                hfi1_send_complete(qp, wqe,
 382                                                   err ? IB_WC_LOC_PROT_ERR
 383                                                       : IB_WC_SUCCESS);
 384                                if (local_ops)
 385                                        atomic_dec(&qp->local_ops_pending);
 386                                goto done_free_tx;
 387                        }
 388
 389                        newreq = 1;
 390                        qp->s_psn = wqe->psn;
 391                }
 392                /*
 393                 * Note that we have to be careful not to modify the
 394                 * original work request since we may need to resend
 395                 * it.
 396                 */
 397                len = wqe->length;
 398                ss = &qp->s_sge;
 399                bth2 = mask_psn(qp->s_psn);
 400                switch (wqe->wr.opcode) {
 401                case IB_WR_SEND:
 402                case IB_WR_SEND_WITH_IMM:
 403                case IB_WR_SEND_WITH_INV:
 404                        /* If no credit, return. */
 405                        if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
 406                            rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
 407                                qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
 408                                goto bail;
 409                        }
 410                        if (len > pmtu) {
 411                                qp->s_state = OP(SEND_FIRST);
 412                                len = pmtu;
 413                                break;
 414                        }
 415                        if (wqe->wr.opcode == IB_WR_SEND) {
 416                                qp->s_state = OP(SEND_ONLY);
 417                        } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
 418                                qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
 419                                /* Immediate data comes after the BTH */
 420                                ohdr->u.imm_data = wqe->wr.ex.imm_data;
 421                                hwords += 1;
 422                        } else {
 423                                qp->s_state = OP(SEND_ONLY_WITH_INVALIDATE);
 424                                /* Invalidate rkey comes after the BTH */
 425                                ohdr->u.ieth = cpu_to_be32(
 426                                                wqe->wr.ex.invalidate_rkey);
 427                                hwords += 1;
 428                        }
 429                        if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 430                                bth0 |= IB_BTH_SOLICITED;
 431                        bth2 |= IB_BTH_REQ_ACK;
 432                        if (++qp->s_cur == qp->s_size)
 433                                qp->s_cur = 0;
 434                        break;
 435
 436                case IB_WR_RDMA_WRITE:
 437                        if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 438                                qp->s_lsn++;
 439                        goto no_flow_control;
 440                case IB_WR_RDMA_WRITE_WITH_IMM:
 441                        /* If no credit, return. */
 442                        if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
 443                            rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
 444                                qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
 445                                goto bail;
 446                        }
 447no_flow_control:
 448                        put_ib_reth_vaddr(
 449                                wqe->rdma_wr.remote_addr,
 450                                &ohdr->u.rc.reth);
 451                        ohdr->u.rc.reth.rkey =
 452                                cpu_to_be32(wqe->rdma_wr.rkey);
 453                        ohdr->u.rc.reth.length = cpu_to_be32(len);
 454                        hwords += sizeof(struct ib_reth) / sizeof(u32);
 455                        if (len > pmtu) {
 456                                qp->s_state = OP(RDMA_WRITE_FIRST);
 457                                len = pmtu;
 458                                break;
 459                        }
 460                        if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
 461                                qp->s_state = OP(RDMA_WRITE_ONLY);
 462                        } else {
 463                                qp->s_state =
 464                                        OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
 465                                /* Immediate data comes after RETH */
 466                                ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
 467                                hwords += 1;
 468                                if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 469                                        bth0 |= IB_BTH_SOLICITED;
 470                        }
 471                        bth2 |= IB_BTH_REQ_ACK;
 472                        if (++qp->s_cur == qp->s_size)
 473                                qp->s_cur = 0;
 474                        break;
 475
 476                case IB_WR_RDMA_READ:
 477                        /*
 478                         * Don't allow more operations to be started
 479                         * than the QP limits allow.
 480                         */
 481                        if (newreq) {
 482                                if (qp->s_num_rd_atomic >=
 483                                    qp->s_max_rd_atomic) {
 484                                        qp->s_flags |= RVT_S_WAIT_RDMAR;
 485                                        goto bail;
 486                                }
 487                                qp->s_num_rd_atomic++;
 488                                if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 489                                        qp->s_lsn++;
 490                        }
 491                        put_ib_reth_vaddr(
 492                                wqe->rdma_wr.remote_addr,
 493                                &ohdr->u.rc.reth);
 494                        ohdr->u.rc.reth.rkey =
 495                                cpu_to_be32(wqe->rdma_wr.rkey);
 496                        ohdr->u.rc.reth.length = cpu_to_be32(len);
 497                        qp->s_state = OP(RDMA_READ_REQUEST);
 498                        hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
 499                        ss = NULL;
 500                        len = 0;
 501                        bth2 |= IB_BTH_REQ_ACK;
 502                        if (++qp->s_cur == qp->s_size)
 503                                qp->s_cur = 0;
 504                        break;
 505
 506                case IB_WR_ATOMIC_CMP_AND_SWP:
 507                case IB_WR_ATOMIC_FETCH_AND_ADD:
 508                        /*
 509                         * Don't allow more operations to be started
 510                         * than the QP limits allow.
 511                         */
 512                        if (newreq) {
 513                                if (qp->s_num_rd_atomic >=
 514                                    qp->s_max_rd_atomic) {
 515                                        qp->s_flags |= RVT_S_WAIT_RDMAR;
 516                                        goto bail;
 517                                }
 518                                qp->s_num_rd_atomic++;
 519                                if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 520                                        qp->s_lsn++;
 521                        }
 522                        if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 523                                qp->s_state = OP(COMPARE_SWAP);
 524                                put_ib_ateth_swap(wqe->atomic_wr.swap,
 525                                                  &ohdr->u.atomic_eth);
 526                                put_ib_ateth_compare(wqe->atomic_wr.compare_add,
 527                                                     &ohdr->u.atomic_eth);
 528                        } else {
 529                                qp->s_state = OP(FETCH_ADD);
 530                                put_ib_ateth_swap(wqe->atomic_wr.compare_add,
 531                                                  &ohdr->u.atomic_eth);
 532                                put_ib_ateth_compare(0, &ohdr->u.atomic_eth);
 533                        }
 534                        put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
 535                                           &ohdr->u.atomic_eth);
 536                        ohdr->u.atomic_eth.rkey = cpu_to_be32(
 537                                wqe->atomic_wr.rkey);
 538                        hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
 539                        ss = NULL;
 540                        len = 0;
 541                        bth2 |= IB_BTH_REQ_ACK;
 542                        if (++qp->s_cur == qp->s_size)
 543                                qp->s_cur = 0;
 544                        break;
 545
 546                default:
 547                        goto bail;
 548                }
 549                qp->s_sge.sge = wqe->sg_list[0];
 550                qp->s_sge.sg_list = wqe->sg_list + 1;
 551                qp->s_sge.num_sge = wqe->wr.num_sge;
 552                qp->s_sge.total_len = wqe->length;
 553                qp->s_len = wqe->length;
 554                if (newreq) {
 555                        qp->s_tail++;
 556                        if (qp->s_tail >= qp->s_size)
 557                                qp->s_tail = 0;
 558                }
 559                if (wqe->wr.opcode == IB_WR_RDMA_READ)
 560                        qp->s_psn = wqe->lpsn + 1;
 561                else
 562                        qp->s_psn++;
 563                break;
 564
 565        case OP(RDMA_READ_RESPONSE_FIRST):
 566                /*
 567                 * qp->s_state is normally set to the opcode of the
 568                 * last packet constructed for new requests and therefore
 569                 * is never set to RDMA read response.
 570                 * RDMA_READ_RESPONSE_FIRST is used by the ACK processing
 571                 * thread to indicate a SEND needs to be restarted from an
 572                 * earlier PSN without interfering with the sending thread.
 573                 * See restart_rc().
 574                 */
 575                qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
 576                /* FALLTHROUGH */
 577        case OP(SEND_FIRST):
 578                qp->s_state = OP(SEND_MIDDLE);
 579                /* FALLTHROUGH */
 580        case OP(SEND_MIDDLE):
 581                bth2 = mask_psn(qp->s_psn++);
 582                ss = &qp->s_sge;
 583                len = qp->s_len;
 584                if (len > pmtu) {
 585                        len = pmtu;
 586                        middle = HFI1_CAP_IS_KSET(SDMA_AHG);
 587                        break;
 588                }
 589                if (wqe->wr.opcode == IB_WR_SEND) {
 590                        qp->s_state = OP(SEND_LAST);
 591                } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
 592                        qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
 593                        /* Immediate data comes after the BTH */
 594                        ohdr->u.imm_data = wqe->wr.ex.imm_data;
 595                        hwords += 1;
 596                } else {
 597                        qp->s_state = OP(SEND_LAST_WITH_INVALIDATE);
 598                        /* invalidate data comes after the BTH */
 599                        ohdr->u.ieth = cpu_to_be32(wqe->wr.ex.invalidate_rkey);
 600                        hwords += 1;
 601                }
 602                if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 603                        bth0 |= IB_BTH_SOLICITED;
 604                bth2 |= IB_BTH_REQ_ACK;
 605                qp->s_cur++;
 606                if (qp->s_cur >= qp->s_size)
 607                        qp->s_cur = 0;
 608                break;
 609
 610        case OP(RDMA_READ_RESPONSE_LAST):
 611                /*
 612                 * qp->s_state is normally set to the opcode of the
 613                 * last packet constructed for new requests and therefore
 614                 * is never set to RDMA read response.
 615                 * RDMA_READ_RESPONSE_LAST is used by the ACK processing
 616                 * thread to indicate a RDMA write needs to be restarted from
 617                 * an earlier PSN without interfering with the sending thread.
 618                 * See restart_rc().
 619                 */
 620                qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
 621                /* FALLTHROUGH */
 622        case OP(RDMA_WRITE_FIRST):
 623                qp->s_state = OP(RDMA_WRITE_MIDDLE);
 624                /* FALLTHROUGH */
 625        case OP(RDMA_WRITE_MIDDLE):
 626                bth2 = mask_psn(qp->s_psn++);
 627                ss = &qp->s_sge;
 628                len = qp->s_len;
 629                if (len > pmtu) {
 630                        len = pmtu;
 631                        middle = HFI1_CAP_IS_KSET(SDMA_AHG);
 632                        break;
 633                }
 634                if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
 635                        qp->s_state = OP(RDMA_WRITE_LAST);
 636                } else {
 637                        qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
 638                        /* Immediate data comes after the BTH */
 639                        ohdr->u.imm_data = wqe->wr.ex.imm_data;
 640                        hwords += 1;
 641                        if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 642                                bth0 |= IB_BTH_SOLICITED;
 643                }
 644                bth2 |= IB_BTH_REQ_ACK;
 645                qp->s_cur++;
 646                if (qp->s_cur >= qp->s_size)
 647                        qp->s_cur = 0;
 648                break;
 649
 650        case OP(RDMA_READ_RESPONSE_MIDDLE):
 651                /*
 652                 * qp->s_state is normally set to the opcode of the
 653                 * last packet constructed for new requests and therefore
 654                 * is never set to RDMA read response.
 655                 * RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing
 656                 * thread to indicate a RDMA read needs to be restarted from
 657                 * an earlier PSN without interfering with the sending thread.
 658                 * See restart_rc().
 659                 */
 660                len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu;
 661                put_ib_reth_vaddr(
 662                        wqe->rdma_wr.remote_addr + len,
 663                        &ohdr->u.rc.reth);
 664                ohdr->u.rc.reth.rkey =
 665                        cpu_to_be32(wqe->rdma_wr.rkey);
 666                ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
 667                qp->s_state = OP(RDMA_READ_REQUEST);
 668                hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
 669                bth2 = mask_psn(qp->s_psn) | IB_BTH_REQ_ACK;
 670                qp->s_psn = wqe->lpsn + 1;
 671                ss = NULL;
 672                len = 0;
 673                qp->s_cur++;
 674                if (qp->s_cur == qp->s_size)
 675                        qp->s_cur = 0;
 676                break;
 677        }
 678        qp->s_sending_hpsn = bth2;
 679        delta = delta_psn(bth2, wqe->psn);
 680        if (delta && delta % HFI1_PSN_CREDIT == 0)
 681                bth2 |= IB_BTH_REQ_ACK;
 682        if (qp->s_flags & RVT_S_SEND_ONE) {
 683                qp->s_flags &= ~RVT_S_SEND_ONE;
 684                qp->s_flags |= RVT_S_WAIT_ACK;
 685                bth2 |= IB_BTH_REQ_ACK;
 686        }
 687        qp->s_len -= len;
 688        ps->s_txreq->hdr_dwords = hwords;
 689        ps->s_txreq->sde = priv->s_sde;
 690        ps->s_txreq->ss = ss;
 691        ps->s_txreq->s_cur_size = len;
 692        hfi1_make_ruc_header(
 693                qp,
 694                ohdr,
 695                bth0 | (qp->s_state << 24),
 696                bth2,
 697                middle,
 698                ps);
 699        return 1;
 700
 701done_free_tx:
 702        hfi1_put_txreq(ps->s_txreq);
 703        ps->s_txreq = NULL;
 704        return 1;
 705
 706bail:
 707        hfi1_put_txreq(ps->s_txreq);
 708
 709bail_no_tx:
 710        ps->s_txreq = NULL;
 711        qp->s_flags &= ~RVT_S_BUSY;
 712        return 0;
 713}
 714
 715static inline void hfi1_make_bth_aeth(struct rvt_qp *qp,
 716                                      struct ib_other_headers *ohdr,
 717                                      u32 bth0, u32 bth1)
 718{
 719        if (qp->r_nak_state)
 720                ohdr->u.aeth = cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
 721                                            (qp->r_nak_state <<
 722                                             IB_AETH_CREDIT_SHIFT));
 723        else
 724                ohdr->u.aeth = rvt_compute_aeth(qp);
 725
 726        ohdr->bth[0] = cpu_to_be32(bth0);
 727        ohdr->bth[1] = cpu_to_be32(bth1 | qp->remote_qpn);
 728        ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn));
 729}
 730
 731static inline void hfi1_queue_rc_ack(struct hfi1_packet *packet, bool is_fecn)
 732{
 733        struct rvt_qp *qp = packet->qp;
 734        struct hfi1_ibport *ibp;
 735        unsigned long flags;
 736
 737        spin_lock_irqsave(&qp->s_lock, flags);
 738        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 739                goto unlock;
 740        ibp = rcd_to_iport(packet->rcd);
 741        this_cpu_inc(*ibp->rvp.rc_qacks);
 742        qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
 743        qp->s_nak_state = qp->r_nak_state;
 744        qp->s_ack_psn = qp->r_ack_psn;
 745        if (is_fecn)
 746                qp->s_flags |= RVT_S_ECN;
 747
 748        /* Schedule the send tasklet. */
 749        hfi1_schedule_send(qp);
 750unlock:
 751        spin_unlock_irqrestore(&qp->s_lock, flags);
 752}
 753
 754static inline void hfi1_make_rc_ack_9B(struct hfi1_packet *packet,
 755                                       struct hfi1_opa_header *opa_hdr,
 756                                       u8 sc5, bool is_fecn,
 757                                       u64 *pbc_flags, u32 *hwords,
 758                                       u32 *nwords)
 759{
 760        struct rvt_qp *qp = packet->qp;
 761        struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
 762        struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 763        struct ib_header *hdr = &opa_hdr->ibh;
 764        struct ib_other_headers *ohdr;
 765        u16 lrh0 = HFI1_LRH_BTH;
 766        u16 pkey;
 767        u32 bth0, bth1;
 768
 769        opa_hdr->hdr_type = HFI1_PKT_TYPE_9B;
 770        ohdr = &hdr->u.oth;
 771        /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */
 772        *hwords = 6;
 773
 774        if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
 775                *hwords += hfi1_make_grh(ibp, &hdr->u.l.grh,
 776                                         rdma_ah_read_grh(&qp->remote_ah_attr),
 777                                         *hwords - 2, SIZE_OF_CRC);
 778                ohdr = &hdr->u.l.oth;
 779                lrh0 = HFI1_LRH_GRH;
 780        }
 781        /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
 782        *pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT);
 783
 784        /* read pkey_index w/o lock (its atomic) */
 785        pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
 786
 787        lrh0 |= (sc5 & IB_SC_MASK) << IB_SC_SHIFT |
 788                (rdma_ah_get_sl(&qp->remote_ah_attr) & IB_SL_MASK) <<
 789                        IB_SL_SHIFT;
 790
 791        hfi1_make_ib_hdr(hdr, lrh0, *hwords + SIZE_OF_CRC,
 792                         opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), 9B),
 793                         ppd->lid | rdma_ah_get_path_bits(&qp->remote_ah_attr));
 794
 795        bth0 = pkey | (OP(ACKNOWLEDGE) << 24);
 796        if (qp->s_mig_state == IB_MIG_MIGRATED)
 797                bth0 |= IB_BTH_MIG_REQ;
 798        bth1 = (!!is_fecn) << IB_BECN_SHIFT;
 799        hfi1_make_bth_aeth(qp, ohdr, bth0, bth1);
 800}
 801
 802static inline void hfi1_make_rc_ack_16B(struct hfi1_packet *packet,
 803                                        struct hfi1_opa_header *opa_hdr,
 804                                        u8 sc5, bool is_fecn,
 805                                        u64 *pbc_flags, u32 *hwords,
 806                                        u32 *nwords)
 807{
 808        struct rvt_qp *qp = packet->qp;
 809        struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
 810        struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 811        struct hfi1_16b_header *hdr = &opa_hdr->opah;
 812        struct ib_other_headers *ohdr;
 813        u32 bth0, bth1 = 0;
 814        u16 len, pkey;
 815        bool becn = is_fecn;
 816        u8 l4 = OPA_16B_L4_IB_LOCAL;
 817        u8 extra_bytes;
 818
 819        opa_hdr->hdr_type = HFI1_PKT_TYPE_16B;
 820        ohdr = &hdr->u.oth;
 821        /* header size in 32-bit words 16B LRH+BTH+AETH = (16+12+4)/4 */
 822        *hwords = 8;
 823        extra_bytes = hfi1_get_16b_padding(*hwords << 2, 0);
 824        *nwords = SIZE_OF_CRC + ((extra_bytes + SIZE_OF_LT) >> 2);
 825
 826        if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
 827            hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))) {
 828                *hwords += hfi1_make_grh(ibp, &hdr->u.l.grh,
 829                                         rdma_ah_read_grh(&qp->remote_ah_attr),
 830                                         *hwords - 4, *nwords);
 831                ohdr = &hdr->u.l.oth;
 832                l4 = OPA_16B_L4_IB_GLOBAL;
 833        }
 834        *pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC;
 835
 836        /* read pkey_index w/o lock (its atomic) */
 837        pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
 838
 839        /* Convert dwords to flits */
 840        len = (*hwords + *nwords) >> 1;
 841
 842        hfi1_make_16b_hdr(hdr, ppd->lid |
 843                          (rdma_ah_get_path_bits(&qp->remote_ah_attr) &
 844                          ((1 << ppd->lmc) - 1)),
 845                          opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr),
 846                                      16B), len, pkey, becn, 0, l4, sc5);
 847
 848        bth0 = pkey | (OP(ACKNOWLEDGE) << 24);
 849        bth0 |= extra_bytes << 20;
 850        if (qp->s_mig_state == IB_MIG_MIGRATED)
 851                bth1 = OPA_BTH_MIG_REQ;
 852        hfi1_make_bth_aeth(qp, ohdr, bth0, bth1);
 853}
 854
 855typedef void (*hfi1_make_rc_ack)(struct hfi1_packet *packet,
 856                                 struct hfi1_opa_header *opa_hdr,
 857                                 u8 sc5, bool is_fecn,
 858                                 u64 *pbc_flags, u32 *hwords,
 859                                 u32 *nwords);
 860
 861/* We support only two types - 9B and 16B for now */
 862static const hfi1_make_rc_ack hfi1_make_rc_ack_tbl[2] = {
 863        [HFI1_PKT_TYPE_9B] = &hfi1_make_rc_ack_9B,
 864        [HFI1_PKT_TYPE_16B] = &hfi1_make_rc_ack_16B
 865};
 866
 867/**
 868 * hfi1_send_rc_ack - Construct an ACK packet and send it
 869 * @qp: a pointer to the QP
 870 *
 871 * This is called from hfi1_rc_rcv() and handle_receive_interrupt().
 872 * Note that RDMA reads and atomics are handled in the
 873 * send side QP state and send engine.
 874 */
 875void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn)
 876{
 877        struct hfi1_ctxtdata *rcd = packet->rcd;
 878        struct rvt_qp *qp = packet->qp;
 879        struct hfi1_ibport *ibp = rcd_to_iport(rcd);
 880        struct hfi1_qp_priv *priv = qp->priv;
 881        struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 882        u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
 883        u64 pbc, pbc_flags = 0;
 884        u32 hwords = 0;
 885        u32 nwords = 0;
 886        u32 plen;
 887        struct pio_buf *pbuf;
 888        struct hfi1_opa_header opa_hdr;
 889
 890        /* clear the defer count */
 891        qp->r_adefered = 0;
 892
 893        /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
 894        if (qp->s_flags & RVT_S_RESP_PENDING) {
 895                hfi1_queue_rc_ack(packet, is_fecn);
 896                return;
 897        }
 898
 899        /* Ensure s_rdma_ack_cnt changes are committed */
 900        if (qp->s_rdma_ack_cnt) {
 901                hfi1_queue_rc_ack(packet, is_fecn);
 902                return;
 903        }
 904
 905        /* Don't try to send ACKs if the link isn't ACTIVE */
 906        if (driver_lstate(ppd) != IB_PORT_ACTIVE)
 907                return;
 908
 909        /* Make the appropriate header */
 910        hfi1_make_rc_ack_tbl[priv->hdr_type](packet, &opa_hdr, sc5, is_fecn,
 911                                             &pbc_flags, &hwords, &nwords);
 912
 913        plen = 2 /* PBC */ + hwords + nwords;
 914        pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps,
 915                         sc_to_vlt(ppd->dd, sc5), plen);
 916        pbuf = sc_buffer_alloc(rcd->sc, plen, NULL, NULL);
 917        if (!pbuf) {
 918                /*
 919                 * We have no room to send at the moment.  Pass
 920                 * responsibility for sending the ACK to the send engine
 921                 * so that when enough buffer space becomes available,
 922                 * the ACK is sent ahead of other outgoing packets.
 923                 */
 924                hfi1_queue_rc_ack(packet, is_fecn);
 925                return;
 926        }
 927        trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
 928                               &opa_hdr, ib_is_sc5(sc5));
 929
 930        /* write the pbc and data */
 931        ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
 932                                 (priv->hdr_type == HFI1_PKT_TYPE_9B ?
 933                                 (void *)&opa_hdr.ibh :
 934                                 (void *)&opa_hdr.opah), hwords);
 935        return;
 936}
 937
 938/**
 939 * reset_psn - reset the QP state to send starting from PSN
 940 * @qp: the QP
 941 * @psn: the packet sequence number to restart at
 942 *
 943 * This is called from hfi1_rc_rcv() to process an incoming RC ACK
 944 * for the given QP.
 945 * Called at interrupt level with the QP s_lock held.
 946 */
 947static void reset_psn(struct rvt_qp *qp, u32 psn)
 948{
 949        u32 n = qp->s_acked;
 950        struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
 951        u32 opcode;
 952
 953        lockdep_assert_held(&qp->s_lock);
 954        qp->s_cur = n;
 955
 956        /*
 957         * If we are starting the request from the beginning,
 958         * let the normal send code handle initialization.
 959         */
 960        if (cmp_psn(psn, wqe->psn) <= 0) {
 961                qp->s_state = OP(SEND_LAST);
 962                goto done;
 963        }
 964
 965        /* Find the work request opcode corresponding to the given PSN. */
 966        opcode = wqe->wr.opcode;
 967        for (;;) {
 968                int diff;
 969
 970                if (++n == qp->s_size)
 971                        n = 0;
 972                if (n == qp->s_tail)
 973                        break;
 974                wqe = rvt_get_swqe_ptr(qp, n);
 975                diff = cmp_psn(psn, wqe->psn);
 976                if (diff < 0)
 977                        break;
 978                qp->s_cur = n;
 979                /*
 980                 * If we are starting the request from the beginning,
 981                 * let the normal send code handle initialization.
 982                 */
 983                if (diff == 0) {
 984                        qp->s_state = OP(SEND_LAST);
 985                        goto done;
 986                }
 987                opcode = wqe->wr.opcode;
 988        }
 989
 990        /*
 991         * Set the state to restart in the middle of a request.
 992         * Don't change the s_sge, s_cur_sge, or s_cur_size.
 993         * See hfi1_make_rc_req().
 994         */
 995        switch (opcode) {
 996        case IB_WR_SEND:
 997        case IB_WR_SEND_WITH_IMM:
 998                qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
 999                break;
1000
1001        case IB_WR_RDMA_WRITE:
1002        case IB_WR_RDMA_WRITE_WITH_IMM:
1003                qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
1004                break;
1005
1006        case IB_WR_RDMA_READ:
1007                qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
1008                break;
1009
1010        default:
1011                /*
1012                 * This case shouldn't happen since its only
1013                 * one PSN per req.
1014                 */
1015                qp->s_state = OP(SEND_LAST);
1016        }
1017done:
1018        qp->s_psn = psn;
1019        /*
1020         * Set RVT_S_WAIT_PSN as rc_complete() may start the timer
1021         * asynchronously before the send engine can get scheduled.
1022         * Doing it in hfi1_make_rc_req() is too late.
1023         */
1024        if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
1025            (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
1026                qp->s_flags |= RVT_S_WAIT_PSN;
1027        qp->s_flags &= ~RVT_S_AHG_VALID;
1028}
1029
1030/*
1031 * Back up requester to resend the last un-ACKed request.
1032 * The QP r_lock and s_lock should be held and interrupts disabled.
1033 */
1034void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
1035{
1036        struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1037        struct hfi1_ibport *ibp;
1038
1039        lockdep_assert_held(&qp->r_lock);
1040        lockdep_assert_held(&qp->s_lock);
1041        if (qp->s_retry == 0) {
1042                if (qp->s_mig_state == IB_MIG_ARMED) {
1043                        hfi1_migrate_qp(qp);
1044                        qp->s_retry = qp->s_retry_cnt;
1045                } else if (qp->s_last == qp->s_acked) {
1046                        hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
1047                        rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1048                        return;
1049                } else { /* need to handle delayed completion */
1050                        return;
1051                }
1052        } else {
1053                qp->s_retry--;
1054        }
1055
1056        ibp = to_iport(qp->ibqp.device, qp->port_num);
1057        if (wqe->wr.opcode == IB_WR_RDMA_READ)
1058                ibp->rvp.n_rc_resends++;
1059        else
1060                ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
1061
1062        qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
1063                         RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
1064                         RVT_S_WAIT_ACK);
1065        if (wait)
1066                qp->s_flags |= RVT_S_SEND_ONE;
1067        reset_psn(qp, psn);
1068}
1069
1070/*
1071 * Set qp->s_sending_psn to the next PSN after the given one.
1072 * This would be psn+1 except when RDMA reads are present.
1073 */
1074static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
1075{
1076        struct rvt_swqe *wqe;
1077        u32 n = qp->s_last;
1078
1079        lockdep_assert_held(&qp->s_lock);
1080        /* Find the work request corresponding to the given PSN. */
1081        for (;;) {
1082                wqe = rvt_get_swqe_ptr(qp, n);
1083                if (cmp_psn(psn, wqe->lpsn) <= 0) {
1084                        if (wqe->wr.opcode == IB_WR_RDMA_READ)
1085                                qp->s_sending_psn = wqe->lpsn + 1;
1086                        else
1087                                qp->s_sending_psn = psn + 1;
1088                        break;
1089                }
1090                if (++n == qp->s_size)
1091                        n = 0;
1092                if (n == qp->s_tail)
1093                        break;
1094        }
1095}
1096
1097/*
1098 * This should be called with the QP s_lock held and interrupts disabled.
1099 */
1100void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
1101{
1102        struct ib_other_headers *ohdr;
1103        struct hfi1_qp_priv *priv = qp->priv;
1104        struct rvt_swqe *wqe;
1105        struct ib_header *hdr = NULL;
1106        struct hfi1_16b_header *hdr_16b = NULL;
1107        u32 opcode;
1108        u32 psn;
1109
1110        lockdep_assert_held(&qp->s_lock);
1111        if (!(ib_rvt_state_ops[qp->state] & RVT_SEND_OR_FLUSH_OR_RECV_OK))
1112                return;
1113
1114        /* Find out where the BTH is */
1115        if (priv->hdr_type == HFI1_PKT_TYPE_9B) {
1116                hdr = &opah->ibh;
1117                if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
1118                        ohdr = &hdr->u.oth;
1119                else
1120                        ohdr = &hdr->u.l.oth;
1121        } else {
1122                u8 l4;
1123
1124                hdr_16b = &opah->opah;
1125                l4  = hfi1_16B_get_l4(hdr_16b);
1126                if (l4 == OPA_16B_L4_IB_LOCAL)
1127                        ohdr = &hdr_16b->u.oth;
1128                else
1129                        ohdr = &hdr_16b->u.l.oth;
1130        }
1131
1132        opcode = ib_bth_get_opcode(ohdr);
1133        if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
1134            opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
1135                WARN_ON(!qp->s_rdma_ack_cnt);
1136                qp->s_rdma_ack_cnt--;
1137                return;
1138        }
1139
1140        psn = ib_bth_get_psn(ohdr);
1141        reset_sending_psn(qp, psn);
1142
1143        /*
1144         * Start timer after a packet requesting an ACK has been sent and
1145         * there are still requests that haven't been acked.
1146         */
1147        if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
1148            !(qp->s_flags &
1149                (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
1150                (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
1151                rvt_add_retry_timer(qp);
1152
1153        while (qp->s_last != qp->s_acked) {
1154                u32 s_last;
1155
1156                wqe = rvt_get_swqe_ptr(qp, qp->s_last);
1157                if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
1158                    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
1159                        break;
1160                s_last = qp->s_last;
1161                trace_hfi1_qp_send_completion(qp, wqe, s_last);
1162                if (++s_last >= qp->s_size)
1163                        s_last = 0;
1164                qp->s_last = s_last;
1165                /* see post_send() */
1166                barrier();
1167                rvt_put_swqe(wqe);
1168                rvt_qp_swqe_complete(qp,
1169                                     wqe,
1170                                     ib_hfi1_wc_opcode[wqe->wr.opcode],
1171                                     IB_WC_SUCCESS);
1172        }
1173        /*
1174         * If we were waiting for sends to complete before re-sending,
1175         * and they are now complete, restart sending.
1176         */
1177        trace_hfi1_sendcomplete(qp, psn);
1178        if (qp->s_flags & RVT_S_WAIT_PSN &&
1179            cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
1180                qp->s_flags &= ~RVT_S_WAIT_PSN;
1181                qp->s_sending_psn = qp->s_psn;
1182                qp->s_sending_hpsn = qp->s_psn - 1;
1183                hfi1_schedule_send(qp);
1184        }
1185}
1186
1187static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
1188{
1189        qp->s_last_psn = psn;
1190}
1191
1192/*
1193 * Generate a SWQE completion.
1194 * This is similar to hfi1_send_complete but has to check to be sure
1195 * that the SGEs are not being referenced if the SWQE is being resent.
1196 */
1197static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
1198                                         struct rvt_swqe *wqe,
1199                                         struct hfi1_ibport *ibp)
1200{
1201        lockdep_assert_held(&qp->s_lock);
1202        /*
1203         * Don't decrement refcount and don't generate a
1204         * completion if the SWQE is being resent until the send
1205         * is finished.
1206         */
1207        if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 ||
1208            cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
1209                u32 s_last;
1210
1211                rvt_put_swqe(wqe);
1212                s_last = qp->s_last;
1213                trace_hfi1_qp_send_completion(qp, wqe, s_last);
1214                if (++s_last >= qp->s_size)
1215                        s_last = 0;
1216                qp->s_last = s_last;
1217                /* see post_send() */
1218                barrier();
1219                rvt_qp_swqe_complete(qp,
1220                                     wqe,
1221                                     ib_hfi1_wc_opcode[wqe->wr.opcode],
1222                                     IB_WC_SUCCESS);
1223        } else {
1224                struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
1225
1226                this_cpu_inc(*ibp->rvp.rc_delayed_comp);
1227                /*
1228                 * If send progress not running attempt to progress
1229                 * SDMA queue.
1230                 */
1231                if (ppd->dd->flags & HFI1_HAS_SEND_DMA) {
1232                        struct sdma_engine *engine;
1233                        u8 sl = rdma_ah_get_sl(&qp->remote_ah_attr);
1234                        u8 sc5;
1235
1236                        /* For now use sc to find engine */
1237                        sc5 = ibp->sl_to_sc[sl];
1238                        engine = qp_to_sdma_engine(qp, sc5);
1239                        sdma_engine_progress_schedule(engine);
1240                }
1241        }
1242
1243        qp->s_retry = qp->s_retry_cnt;
1244        update_last_psn(qp, wqe->lpsn);
1245
1246        /*
1247         * If we are completing a request which is in the process of
1248         * being resent, we can stop re-sending it since we know the
1249         * responder has already seen it.
1250         */
1251        if (qp->s_acked == qp->s_cur) {
1252                if (++qp->s_cur >= qp->s_size)
1253                        qp->s_cur = 0;
1254                qp->s_acked = qp->s_cur;
1255                wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
1256                if (qp->s_acked != qp->s_tail) {
1257                        qp->s_state = OP(SEND_LAST);
1258                        qp->s_psn = wqe->psn;
1259                }
1260        } else {
1261                if (++qp->s_acked >= qp->s_size)
1262                        qp->s_acked = 0;
1263                if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
1264                        qp->s_draining = 0;
1265                wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1266        }
1267        return wqe;
1268}
1269
1270/**
1271 * do_rc_ack - process an incoming RC ACK
1272 * @qp: the QP the ACK came in on
1273 * @psn: the packet sequence number of the ACK
1274 * @opcode: the opcode of the request that resulted in the ACK
1275 *
1276 * This is called from rc_rcv_resp() to process an incoming RC ACK
1277 * for the given QP.
1278 * May be called at interrupt level, with the QP s_lock held.
1279 * Returns 1 if OK, 0 if current operation should be aborted (NAK).
1280 */
1281static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
1282                     u64 val, struct hfi1_ctxtdata *rcd)
1283{
1284        struct hfi1_ibport *ibp;
1285        enum ib_wc_status status;
1286        struct rvt_swqe *wqe;
1287        int ret = 0;
1288        u32 ack_psn;
1289        int diff;
1290
1291        lockdep_assert_held(&qp->s_lock);
1292        /*
1293         * Note that NAKs implicitly ACK outstanding SEND and RDMA write
1294         * requests and implicitly NAK RDMA read and atomic requests issued
1295         * before the NAK'ed request.  The MSN won't include the NAK'ed
1296         * request but will include an ACK'ed request(s).
1297         */
1298        ack_psn = psn;
1299        if (aeth >> IB_AETH_NAK_SHIFT)
1300                ack_psn--;
1301        wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1302        ibp = rcd_to_iport(rcd);
1303
1304        /*
1305         * The MSN might be for a later WQE than the PSN indicates so
1306         * only complete WQEs that the PSN finishes.
1307         */
1308        while ((diff = delta_psn(ack_psn, wqe->lpsn)) >= 0) {
1309                /*
1310                 * RDMA_READ_RESPONSE_ONLY is a special case since
1311                 * we want to generate completion events for everything
1312                 * before the RDMA read, copy the data, then generate
1313                 * the completion for the read.
1314                 */
1315                if (wqe->wr.opcode == IB_WR_RDMA_READ &&
1316                    opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
1317                    diff == 0) {
1318                        ret = 1;
1319                        goto bail_stop;
1320                }
1321                /*
1322                 * If this request is a RDMA read or atomic, and the ACK is
1323                 * for a later operation, this ACK NAKs the RDMA read or
1324                 * atomic.  In other words, only a RDMA_READ_LAST or ONLY
1325                 * can ACK a RDMA read and likewise for atomic ops.  Note
1326                 * that the NAK case can only happen if relaxed ordering is
1327                 * used and requests are sent after an RDMA read or atomic
1328                 * is sent but before the response is received.
1329                 */
1330                if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
1331                     (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
1332                    ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1333                      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
1334                     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
1335                        /* Retry this request. */
1336                        if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
1337                                qp->r_flags |= RVT_R_RDMAR_SEQ;
1338                                hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
1339                                if (list_empty(&qp->rspwait)) {
1340                                        qp->r_flags |= RVT_R_RSP_SEND;
1341                                        rvt_get_qp(qp);
1342                                        list_add_tail(&qp->rspwait,
1343                                                      &rcd->qp_wait_list);
1344                                }
1345                        }
1346                        /*
1347                         * No need to process the ACK/NAK since we are
1348                         * restarting an earlier request.
1349                         */
1350                        goto bail_stop;
1351                }
1352                if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1353                    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
1354                        u64 *vaddr = wqe->sg_list[0].vaddr;
1355                        *vaddr = val;
1356                }
1357                if (qp->s_num_rd_atomic &&
1358                    (wqe->wr.opcode == IB_WR_RDMA_READ ||
1359                     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1360                     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
1361                        qp->s_num_rd_atomic--;
1362                        /* Restart sending task if fence is complete */
1363                        if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
1364                            !qp->s_num_rd_atomic) {
1365                                qp->s_flags &= ~(RVT_S_WAIT_FENCE |
1366                                                 RVT_S_WAIT_ACK);
1367                                hfi1_schedule_send(qp);
1368                        } else if (qp->s_flags & RVT_S_WAIT_RDMAR) {
1369                                qp->s_flags &= ~(RVT_S_WAIT_RDMAR |
1370                                                 RVT_S_WAIT_ACK);
1371                                hfi1_schedule_send(qp);
1372                        }
1373                }
1374                wqe = do_rc_completion(qp, wqe, ibp);
1375                if (qp->s_acked == qp->s_tail)
1376                        break;
1377        }
1378
1379        switch (aeth >> IB_AETH_NAK_SHIFT) {
1380        case 0:         /* ACK */
1381                this_cpu_inc(*ibp->rvp.rc_acks);
1382                if (qp->s_acked != qp->s_tail) {
1383                        /*
1384                         * We are expecting more ACKs so
1385                         * mod the retry timer.
1386                         */
1387                        rvt_mod_retry_timer(qp);
1388                        /*
1389                         * We can stop re-sending the earlier packets and
1390                         * continue with the next packet the receiver wants.
1391                         */
1392                        if (cmp_psn(qp->s_psn, psn) <= 0)
1393                                reset_psn(qp, psn + 1);
1394                } else {
1395                        /* No more acks - kill all timers */
1396                        rvt_stop_rc_timers(qp);
1397                        if (cmp_psn(qp->s_psn, psn) <= 0) {
1398                                qp->s_state = OP(SEND_LAST);
1399                                qp->s_psn = psn + 1;
1400                        }
1401                }
1402                if (qp->s_flags & RVT_S_WAIT_ACK) {
1403                        qp->s_flags &= ~RVT_S_WAIT_ACK;
1404                        hfi1_schedule_send(qp);
1405                }
1406                rvt_get_credit(qp, aeth);
1407                qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1408                qp->s_retry = qp->s_retry_cnt;
1409                update_last_psn(qp, psn);
1410                return 1;
1411
1412        case 1:         /* RNR NAK */
1413                ibp->rvp.n_rnr_naks++;
1414                if (qp->s_acked == qp->s_tail)
1415                        goto bail_stop;
1416                if (qp->s_flags & RVT_S_WAIT_RNR)
1417                        goto bail_stop;
1418                if (qp->s_rnr_retry == 0) {
1419                        status = IB_WC_RNR_RETRY_EXC_ERR;
1420                        goto class_b;
1421                }
1422                if (qp->s_rnr_retry_cnt < 7)
1423                        qp->s_rnr_retry--;
1424
1425                /* The last valid PSN is the previous PSN. */
1426                update_last_psn(qp, psn - 1);
1427
1428                ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
1429
1430                reset_psn(qp, psn);
1431
1432                qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
1433                rvt_stop_rc_timers(qp);
1434                rvt_add_rnr_timer(qp, aeth);
1435                return 0;
1436
1437        case 3:         /* NAK */
1438                if (qp->s_acked == qp->s_tail)
1439                        goto bail_stop;
1440                /* The last valid PSN is the previous PSN. */
1441                update_last_psn(qp, psn - 1);
1442                switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
1443                        IB_AETH_CREDIT_MASK) {
1444                case 0: /* PSN sequence error */
1445                        ibp->rvp.n_seq_naks++;
1446                        /*
1447                         * Back up to the responder's expected PSN.
1448                         * Note that we might get a NAK in the middle of an
1449                         * RDMA READ response which terminates the RDMA
1450                         * READ.
1451                         */
1452                        hfi1_restart_rc(qp, psn, 0);
1453                        hfi1_schedule_send(qp);
1454                        break;
1455
1456                case 1: /* Invalid Request */
1457                        status = IB_WC_REM_INV_REQ_ERR;
1458                        ibp->rvp.n_other_naks++;
1459                        goto class_b;
1460
1461                case 2: /* Remote Access Error */
1462                        status = IB_WC_REM_ACCESS_ERR;
1463                        ibp->rvp.n_other_naks++;
1464                        goto class_b;
1465
1466                case 3: /* Remote Operation Error */
1467                        status = IB_WC_REM_OP_ERR;
1468                        ibp->rvp.n_other_naks++;
1469class_b:
1470                        if (qp->s_last == qp->s_acked) {
1471                                hfi1_send_complete(qp, wqe, status);
1472                                rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1473                        }
1474                        break;
1475
1476                default:
1477                        /* Ignore other reserved NAK error codes */
1478                        goto reserved;
1479                }
1480                qp->s_retry = qp->s_retry_cnt;
1481                qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1482                goto bail_stop;
1483
1484        default:                /* 2: reserved */
1485reserved:
1486                /* Ignore reserved NAK codes. */
1487                goto bail_stop;
1488        }
1489        /* cannot be reached  */
1490bail_stop:
1491        rvt_stop_rc_timers(qp);
1492        return ret;
1493}
1494
1495/*
1496 * We have seen an out of sequence RDMA read middle or last packet.
1497 * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
1498 */
1499static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
1500                         struct hfi1_ctxtdata *rcd)
1501{
1502        struct rvt_swqe *wqe;
1503
1504        lockdep_assert_held(&qp->s_lock);
1505        /* Remove QP from retry timer */
1506        rvt_stop_rc_timers(qp);
1507
1508        wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1509
1510        while (cmp_psn(psn, wqe->lpsn) > 0) {
1511                if (wqe->wr.opcode == IB_WR_RDMA_READ ||
1512                    wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1513                    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
1514                        break;
1515                wqe = do_rc_completion(qp, wqe, ibp);
1516        }
1517
1518        ibp->rvp.n_rdma_seq++;
1519        qp->r_flags |= RVT_R_RDMAR_SEQ;
1520        hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
1521        if (list_empty(&qp->rspwait)) {
1522                qp->r_flags |= RVT_R_RSP_SEND;
1523                rvt_get_qp(qp);
1524                list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
1525        }
1526}
1527
1528/**
1529 * rc_rcv_resp - process an incoming RC response packet
1530 * @packet: data packet information
1531 *
1532 * This is called from hfi1_rc_rcv() to process an incoming RC response
1533 * packet for the given QP.
1534 * Called at interrupt level.
1535 */
1536static void rc_rcv_resp(struct hfi1_packet *packet)
1537{
1538        struct hfi1_ctxtdata *rcd = packet->rcd;
1539        void *data = packet->payload;
1540        u32 tlen = packet->tlen;
1541        struct rvt_qp *qp = packet->qp;
1542        struct hfi1_ibport *ibp;
1543        struct ib_other_headers *ohdr = packet->ohdr;
1544        struct rvt_swqe *wqe;
1545        enum ib_wc_status status;
1546        unsigned long flags;
1547        int diff;
1548        u64 val;
1549        u32 aeth;
1550        u32 psn = ib_bth_get_psn(packet->ohdr);
1551        u32 pmtu = qp->pmtu;
1552        u16 hdrsize = packet->hlen;
1553        u8 opcode = packet->opcode;
1554        u8 pad = packet->pad;
1555        u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
1556
1557        spin_lock_irqsave(&qp->s_lock, flags);
1558        trace_hfi1_ack(qp, psn);
1559
1560        /* Ignore invalid responses. */
1561        if (cmp_psn(psn, READ_ONCE(qp->s_next_psn)) >= 0)
1562                goto ack_done;
1563
1564        /* Ignore duplicate responses. */
1565        diff = cmp_psn(psn, qp->s_last_psn);
1566        if (unlikely(diff <= 0)) {
1567                /* Update credits for "ghost" ACKs */
1568                if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
1569                        aeth = be32_to_cpu(ohdr->u.aeth);
1570                        if ((aeth >> IB_AETH_NAK_SHIFT) == 0)
1571                                rvt_get_credit(qp, aeth);
1572                }
1573                goto ack_done;
1574        }
1575
1576        /*
1577         * Skip everything other than the PSN we expect, if we are waiting
1578         * for a reply to a restarted RDMA read or atomic op.
1579         */
1580        if (qp->r_flags & RVT_R_RDMAR_SEQ) {
1581                if (cmp_psn(psn, qp->s_last_psn + 1) != 0)
1582                        goto ack_done;
1583                qp->r_flags &= ~RVT_R_RDMAR_SEQ;
1584        }
1585
1586        if (unlikely(qp->s_acked == qp->s_tail))
1587                goto ack_done;
1588        wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1589        status = IB_WC_SUCCESS;
1590
1591        switch (opcode) {
1592        case OP(ACKNOWLEDGE):
1593        case OP(ATOMIC_ACKNOWLEDGE):
1594        case OP(RDMA_READ_RESPONSE_FIRST):
1595                aeth = be32_to_cpu(ohdr->u.aeth);
1596                if (opcode == OP(ATOMIC_ACKNOWLEDGE))
1597                        val = ib_u64_get(&ohdr->u.at.atomic_ack_eth);
1598                else
1599                        val = 0;
1600                if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
1601                    opcode != OP(RDMA_READ_RESPONSE_FIRST))
1602                        goto ack_done;
1603                wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1604                if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1605                        goto ack_op_err;
1606                /*
1607                 * If this is a response to a resent RDMA read, we
1608                 * have to be careful to copy the data to the right
1609                 * location.
1610                 */
1611                qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1612                                                  wqe, psn, pmtu);
1613                goto read_middle;
1614
1615        case OP(RDMA_READ_RESPONSE_MIDDLE):
1616                /* no AETH, no ACK */
1617                if (unlikely(cmp_psn(psn, qp->s_last_psn + 1)))
1618                        goto ack_seq_err;
1619                if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1620                        goto ack_op_err;
1621read_middle:
1622                if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
1623                        goto ack_len_err;
1624                if (unlikely(pmtu >= qp->s_rdma_read_len))
1625                        goto ack_len_err;
1626
1627                /*
1628                 * We got a response so update the timeout.
1629                 * 4.096 usec. * (1 << qp->timeout)
1630                 */
1631                rvt_mod_retry_timer(qp);
1632                if (qp->s_flags & RVT_S_WAIT_ACK) {
1633                        qp->s_flags &= ~RVT_S_WAIT_ACK;
1634                        hfi1_schedule_send(qp);
1635                }
1636
1637                if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
1638                        qp->s_retry = qp->s_retry_cnt;
1639
1640                /*
1641                 * Update the RDMA receive state but do the copy w/o
1642                 * holding the locks and blocking interrupts.
1643                 */
1644                qp->s_rdma_read_len -= pmtu;
1645                update_last_psn(qp, psn);
1646                spin_unlock_irqrestore(&qp->s_lock, flags);
1647                hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, false, false);
1648                goto bail;
1649
1650        case OP(RDMA_READ_RESPONSE_ONLY):
1651                aeth = be32_to_cpu(ohdr->u.aeth);
1652                if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
1653                        goto ack_done;
1654                /*
1655                 * Check that the data size is >= 0 && <= pmtu.
1656                 * Remember to account for ICRC (4).
1657                 */
1658                if (unlikely(tlen < (hdrsize + extra_bytes)))
1659                        goto ack_len_err;
1660                /*
1661                 * If this is a response to a resent RDMA read, we
1662                 * have to be careful to copy the data to the right
1663                 * location.
1664                 */
1665                wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1666                qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1667                                                  wqe, psn, pmtu);
1668                goto read_last;
1669
1670        case OP(RDMA_READ_RESPONSE_LAST):
1671                /* ACKs READ req. */
1672                if (unlikely(cmp_psn(psn, qp->s_last_psn + 1)))
1673                        goto ack_seq_err;
1674                if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1675                        goto ack_op_err;
1676                /*
1677                 * Check that the data size is >= 1 && <= pmtu.
1678                 * Remember to account for ICRC (4).
1679                 */
1680                if (unlikely(tlen <= (hdrsize + extra_bytes)))
1681                        goto ack_len_err;
1682read_last:
1683                tlen -= hdrsize + extra_bytes;
1684                if (unlikely(tlen != qp->s_rdma_read_len))
1685                        goto ack_len_err;
1686                aeth = be32_to_cpu(ohdr->u.aeth);
1687                hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, false, false);
1688                WARN_ON(qp->s_rdma_read_sge.num_sge);
1689                (void)do_rc_ack(qp, aeth, psn,
1690                                 OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
1691                goto ack_done;
1692        }
1693
1694ack_op_err:
1695        status = IB_WC_LOC_QP_OP_ERR;
1696        goto ack_err;
1697
1698ack_seq_err:
1699        ibp = rcd_to_iport(rcd);
1700        rdma_seq_err(qp, ibp, psn, rcd);
1701        goto ack_done;
1702
1703ack_len_err:
1704        status = IB_WC_LOC_LEN_ERR;
1705ack_err:
1706        if (qp->s_last == qp->s_acked) {
1707                hfi1_send_complete(qp, wqe, status);
1708                rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1709        }
1710ack_done:
1711        spin_unlock_irqrestore(&qp->s_lock, flags);
1712bail:
1713        return;
1714}
1715
1716static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
1717                                  struct rvt_qp *qp)
1718{
1719        if (list_empty(&qp->rspwait)) {
1720                qp->r_flags |= RVT_R_RSP_NAK;
1721                rvt_get_qp(qp);
1722                list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
1723        }
1724}
1725
1726static inline void rc_cancel_ack(struct rvt_qp *qp)
1727{
1728        qp->r_adefered = 0;
1729        if (list_empty(&qp->rspwait))
1730                return;
1731        list_del_init(&qp->rspwait);
1732        qp->r_flags &= ~RVT_R_RSP_NAK;
1733        rvt_put_qp(qp);
1734}
1735
1736/**
1737 * rc_rcv_error - process an incoming duplicate or error RC packet
1738 * @ohdr: the other headers for this packet
1739 * @data: the packet data
1740 * @qp: the QP for this packet
1741 * @opcode: the opcode for this packet
1742 * @psn: the packet sequence number for this packet
1743 * @diff: the difference between the PSN and the expected PSN
1744 *
1745 * This is called from hfi1_rc_rcv() to process an unexpected
1746 * incoming RC packet for the given QP.
1747 * Called at interrupt level.
1748 * Return 1 if no more processing is needed; otherwise return 0 to
1749 * schedule a response to be sent.
1750 */
1751static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
1752                                 struct rvt_qp *qp, u32 opcode, u32 psn,
1753                                 int diff, struct hfi1_ctxtdata *rcd)
1754{
1755        struct hfi1_ibport *ibp = rcd_to_iport(rcd);
1756        struct rvt_ack_entry *e;
1757        unsigned long flags;
1758        u8 i, prev;
1759        int old_req;
1760
1761        trace_hfi1_rcv_error(qp, psn);
1762        if (diff > 0) {
1763                /*
1764                 * Packet sequence error.
1765                 * A NAK will ACK earlier sends and RDMA writes.
1766                 * Don't queue the NAK if we already sent one.
1767                 */
1768                if (!qp->r_nak_state) {
1769                        ibp->rvp.n_rc_seqnak++;
1770                        qp->r_nak_state = IB_NAK_PSN_ERROR;
1771                        /* Use the expected PSN. */
1772                        qp->r_ack_psn = qp->r_psn;
1773                        /*
1774                         * Wait to send the sequence NAK until all packets
1775                         * in the receive queue have been processed.
1776                         * Otherwise, we end up propagating congestion.
1777                         */
1778                        rc_defered_ack(rcd, qp);
1779                }
1780                goto done;
1781        }
1782
1783        /*
1784         * Handle a duplicate request.  Don't re-execute SEND, RDMA
1785         * write or atomic op.  Don't NAK errors, just silently drop
1786         * the duplicate request.  Note that r_sge, r_len, and
1787         * r_rcv_len may be in use so don't modify them.
1788         *
1789         * We are supposed to ACK the earliest duplicate PSN but we
1790         * can coalesce an outstanding duplicate ACK.  We have to
1791         * send the earliest so that RDMA reads can be restarted at
1792         * the requester's expected PSN.
1793         *
1794         * First, find where this duplicate PSN falls within the
1795         * ACKs previously sent.
1796         * old_req is true if there is an older response that is scheduled
1797         * to be sent before sending this one.
1798         */
1799        e = NULL;
1800        old_req = 1;
1801        ibp->rvp.n_rc_dupreq++;
1802
1803        spin_lock_irqsave(&qp->s_lock, flags);
1804
1805        for (i = qp->r_head_ack_queue; ; i = prev) {
1806                if (i == qp->s_tail_ack_queue)
1807                        old_req = 0;
1808                if (i)
1809                        prev = i - 1;
1810                else
1811                        prev = HFI1_MAX_RDMA_ATOMIC;
1812                if (prev == qp->r_head_ack_queue) {
1813                        e = NULL;
1814                        break;
1815                }
1816                e = &qp->s_ack_queue[prev];
1817                if (!e->opcode) {
1818                        e = NULL;
1819                        break;
1820                }
1821                if (cmp_psn(psn, e->psn) >= 0) {
1822                        if (prev == qp->s_tail_ack_queue &&
1823                            cmp_psn(psn, e->lpsn) <= 0)
1824                                old_req = 0;
1825                        break;
1826                }
1827        }
1828        switch (opcode) {
1829        case OP(RDMA_READ_REQUEST): {
1830                struct ib_reth *reth;
1831                u32 offset;
1832                u32 len;
1833
1834                /*
1835                 * If we didn't find the RDMA read request in the ack queue,
1836                 * we can ignore this request.
1837                 */
1838                if (!e || e->opcode != OP(RDMA_READ_REQUEST))
1839                        goto unlock_done;
1840                /* RETH comes after BTH */
1841                reth = &ohdr->u.rc.reth;
1842                /*
1843                 * Address range must be a subset of the original
1844                 * request and start on pmtu boundaries.
1845                 * We reuse the old ack_queue slot since the requester
1846                 * should not back up and request an earlier PSN for the
1847                 * same request.
1848                 */
1849                offset = delta_psn(psn, e->psn) * qp->pmtu;
1850                len = be32_to_cpu(reth->length);
1851                if (unlikely(offset + len != e->rdma_sge.sge_length))
1852                        goto unlock_done;
1853                if (e->rdma_sge.mr) {
1854                        rvt_put_mr(e->rdma_sge.mr);
1855                        e->rdma_sge.mr = NULL;
1856                }
1857                if (len != 0) {
1858                        u32 rkey = be32_to_cpu(reth->rkey);
1859                        u64 vaddr = get_ib_reth_vaddr(reth);
1860                        int ok;
1861
1862                        ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
1863                                         IB_ACCESS_REMOTE_READ);
1864                        if (unlikely(!ok))
1865                                goto unlock_done;
1866                } else {
1867                        e->rdma_sge.vaddr = NULL;
1868                        e->rdma_sge.length = 0;
1869                        e->rdma_sge.sge_length = 0;
1870                }
1871                e->psn = psn;
1872                if (old_req)
1873                        goto unlock_done;
1874                qp->s_tail_ack_queue = prev;
1875                break;
1876        }
1877
1878        case OP(COMPARE_SWAP):
1879        case OP(FETCH_ADD): {
1880                /*
1881                 * If we didn't find the atomic request in the ack queue
1882                 * or the send engine is already backed up to send an
1883                 * earlier entry, we can ignore this request.
1884                 */
1885                if (!e || e->opcode != (u8)opcode || old_req)
1886                        goto unlock_done;
1887                qp->s_tail_ack_queue = prev;
1888                break;
1889        }
1890
1891        default:
1892                /*
1893                 * Ignore this operation if it doesn't request an ACK
1894                 * or an earlier RDMA read or atomic is going to be resent.
1895                 */
1896                if (!(psn & IB_BTH_REQ_ACK) || old_req)
1897                        goto unlock_done;
1898                /*
1899                 * Resend the most recent ACK if this request is
1900                 * after all the previous RDMA reads and atomics.
1901                 */
1902                if (i == qp->r_head_ack_queue) {
1903                        spin_unlock_irqrestore(&qp->s_lock, flags);
1904                        qp->r_nak_state = 0;
1905                        qp->r_ack_psn = qp->r_psn - 1;
1906                        goto send_ack;
1907                }
1908
1909                /*
1910                 * Resend the RDMA read or atomic op which
1911                 * ACKs this duplicate request.
1912                 */
1913                qp->s_tail_ack_queue = i;
1914                break;
1915        }
1916        qp->s_ack_state = OP(ACKNOWLEDGE);
1917        qp->s_flags |= RVT_S_RESP_PENDING;
1918        qp->r_nak_state = 0;
1919        hfi1_schedule_send(qp);
1920
1921unlock_done:
1922        spin_unlock_irqrestore(&qp->s_lock, flags);
1923done:
1924        return 1;
1925
1926send_ack:
1927        return 0;
1928}
1929
1930static inline void update_ack_queue(struct rvt_qp *qp, unsigned n)
1931{
1932        unsigned next;
1933
1934        next = n + 1;
1935        if (next > HFI1_MAX_RDMA_ATOMIC)
1936                next = 0;
1937        qp->s_tail_ack_queue = next;
1938        qp->s_ack_state = OP(ACKNOWLEDGE);
1939}
1940
1941static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid,
1942                          u32 lqpn, u32 rqpn, u8 svc_type)
1943{
1944        struct opa_hfi1_cong_log_event_internal *cc_event;
1945        unsigned long flags;
1946
1947        if (sl >= OPA_MAX_SLS)
1948                return;
1949
1950        spin_lock_irqsave(&ppd->cc_log_lock, flags);
1951
1952        ppd->threshold_cong_event_map[sl / 8] |= 1 << (sl % 8);
1953        ppd->threshold_event_counter++;
1954
1955        cc_event = &ppd->cc_events[ppd->cc_log_idx++];
1956        if (ppd->cc_log_idx == OPA_CONG_LOG_ELEMS)
1957                ppd->cc_log_idx = 0;
1958        cc_event->lqpn = lqpn & RVT_QPN_MASK;
1959        cc_event->rqpn = rqpn & RVT_QPN_MASK;
1960        cc_event->sl = sl;
1961        cc_event->svc_type = svc_type;
1962        cc_event->rlid = rlid;
1963        /* keep timestamp in units of 1.024 usec */
1964        cc_event->timestamp = ktime_get_ns() / 1024;
1965
1966        spin_unlock_irqrestore(&ppd->cc_log_lock, flags);
1967}
1968
1969void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn,
1970                  u32 rqpn, u8 svc_type)
1971{
1972        struct cca_timer *cca_timer;
1973        u16 ccti, ccti_incr, ccti_timer, ccti_limit;
1974        u8 trigger_threshold;
1975        struct cc_state *cc_state;
1976        unsigned long flags;
1977
1978        if (sl >= OPA_MAX_SLS)
1979                return;
1980
1981        cc_state = get_cc_state(ppd);
1982
1983        if (!cc_state)
1984                return;
1985
1986        /*
1987         * 1) increase CCTI (for this SL)
1988         * 2) select IPG (i.e., call set_link_ipg())
1989         * 3) start timer
1990         */
1991        ccti_limit = cc_state->cct.ccti_limit;
1992        ccti_incr = cc_state->cong_setting.entries[sl].ccti_increase;
1993        ccti_timer = cc_state->cong_setting.entries[sl].ccti_timer;
1994        trigger_threshold =
1995                cc_state->cong_setting.entries[sl].trigger_threshold;
1996
1997        spin_lock_irqsave(&ppd->cca_timer_lock, flags);
1998
1999        cca_timer = &ppd->cca_timer[sl];
2000        if (cca_timer->ccti < ccti_limit) {
2001                if (cca_timer->ccti + ccti_incr <= ccti_limit)
2002                        cca_timer->ccti += ccti_incr;
2003                else
2004                        cca_timer->ccti = ccti_limit;
2005                set_link_ipg(ppd);
2006        }
2007
2008        ccti = cca_timer->ccti;
2009
2010        if (!hrtimer_active(&cca_timer->hrtimer)) {
2011                /* ccti_timer is in units of 1.024 usec */
2012                unsigned long nsec = 1024 * ccti_timer;
2013
2014                hrtimer_start(&cca_timer->hrtimer, ns_to_ktime(nsec),
2015                              HRTIMER_MODE_REL);
2016        }
2017
2018        spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
2019
2020        if ((trigger_threshold != 0) && (ccti >= trigger_threshold))
2021                log_cca_event(ppd, sl, rlid, lqpn, rqpn, svc_type);
2022}
2023
2024/**
2025 * hfi1_rc_rcv - process an incoming RC packet
2026 * @packet: data packet information
2027 *
2028 * This is called from qp_rcv() to process an incoming RC packet
2029 * for the given QP.
2030 * May be called at interrupt level.
2031 */
2032void hfi1_rc_rcv(struct hfi1_packet *packet)
2033{
2034        struct hfi1_ctxtdata *rcd = packet->rcd;
2035        void *data = packet->payload;
2036        u32 tlen = packet->tlen;
2037        struct rvt_qp *qp = packet->qp;
2038        struct hfi1_ibport *ibp = rcd_to_iport(rcd);
2039        struct ib_other_headers *ohdr = packet->ohdr;
2040        u32 opcode = packet->opcode;
2041        u32 hdrsize = packet->hlen;
2042        u32 psn = ib_bth_get_psn(packet->ohdr);
2043        u32 pad = packet->pad;
2044        struct ib_wc wc;
2045        u32 pmtu = qp->pmtu;
2046        int diff;
2047        struct ib_reth *reth;
2048        unsigned long flags;
2049        int ret;
2050        bool is_fecn = false;
2051        bool copy_last = false;
2052        u32 rkey;
2053        u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
2054
2055        lockdep_assert_held(&qp->r_lock);
2056
2057        if (hfi1_ruc_check_hdr(ibp, packet))
2058                return;
2059
2060        is_fecn = process_ecn(qp, packet, false);
2061
2062        /*
2063         * Process responses (ACKs) before anything else.  Note that the
2064         * packet sequence number will be for something in the send work
2065         * queue rather than the expected receive packet sequence number.
2066         * In other words, this QP is the requester.
2067         */
2068        if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
2069            opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
2070                rc_rcv_resp(packet);
2071                if (is_fecn)
2072                        goto send_ack;
2073                return;
2074        }
2075
2076        /* Compute 24 bits worth of difference. */
2077        diff = delta_psn(psn, qp->r_psn);
2078        if (unlikely(diff)) {
2079                if (rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd))
2080                        return;
2081                goto send_ack;
2082        }
2083
2084        /* Check for opcode sequence errors. */
2085        switch (qp->r_state) {
2086        case OP(SEND_FIRST):
2087        case OP(SEND_MIDDLE):
2088                if (opcode == OP(SEND_MIDDLE) ||
2089                    opcode == OP(SEND_LAST) ||
2090                    opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
2091                    opcode == OP(SEND_LAST_WITH_INVALIDATE))
2092                        break;
2093                goto nack_inv;
2094
2095        case OP(RDMA_WRITE_FIRST):
2096        case OP(RDMA_WRITE_MIDDLE):
2097                if (opcode == OP(RDMA_WRITE_MIDDLE) ||
2098                    opcode == OP(RDMA_WRITE_LAST) ||
2099                    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
2100                        break;
2101                goto nack_inv;
2102
2103        default:
2104                if (opcode == OP(SEND_MIDDLE) ||
2105                    opcode == OP(SEND_LAST) ||
2106                    opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
2107                    opcode == OP(SEND_LAST_WITH_INVALIDATE) ||
2108                    opcode == OP(RDMA_WRITE_MIDDLE) ||
2109                    opcode == OP(RDMA_WRITE_LAST) ||
2110                    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
2111                        goto nack_inv;
2112                /*
2113                 * Note that it is up to the requester to not send a new
2114                 * RDMA read or atomic operation before receiving an ACK
2115                 * for the previous operation.
2116                 */
2117                break;
2118        }
2119
2120        if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
2121                rvt_comm_est(qp);
2122
2123        /* OK, process the packet. */
2124        switch (opcode) {
2125        case OP(SEND_FIRST):
2126                ret = hfi1_rvt_get_rwqe(qp, 0);
2127                if (ret < 0)
2128                        goto nack_op_err;
2129                if (!ret)
2130                        goto rnr_nak;
2131                qp->r_rcv_len = 0;
2132                /* FALLTHROUGH */
2133        case OP(SEND_MIDDLE):
2134        case OP(RDMA_WRITE_MIDDLE):
2135send_middle:
2136                /* Check for invalid length PMTU or posted rwqe len. */
2137                /*
2138                 * There will be no padding for 9B packet but 16B packets
2139                 * will come in with some padding since we always add
2140                 * CRC and LT bytes which will need to be flit aligned
2141                 */
2142                if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
2143                        goto nack_inv;
2144                qp->r_rcv_len += pmtu;
2145                if (unlikely(qp->r_rcv_len > qp->r_len))
2146                        goto nack_inv;
2147                hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false);
2148                break;
2149
2150        case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
2151                /* consume RWQE */
2152                ret = hfi1_rvt_get_rwqe(qp, 1);
2153                if (ret < 0)
2154                        goto nack_op_err;
2155                if (!ret)
2156                        goto rnr_nak;
2157                goto send_last_imm;
2158
2159        case OP(SEND_ONLY):
2160        case OP(SEND_ONLY_WITH_IMMEDIATE):
2161        case OP(SEND_ONLY_WITH_INVALIDATE):
2162                ret = hfi1_rvt_get_rwqe(qp, 0);
2163                if (ret < 0)
2164                        goto nack_op_err;
2165                if (!ret)
2166                        goto rnr_nak;
2167                qp->r_rcv_len = 0;
2168                if (opcode == OP(SEND_ONLY))
2169                        goto no_immediate_data;
2170                if (opcode == OP(SEND_ONLY_WITH_INVALIDATE))
2171                        goto send_last_inv;
2172                /* FALLTHROUGH -- for SEND_ONLY_WITH_IMMEDIATE */
2173        case OP(SEND_LAST_WITH_IMMEDIATE):
2174send_last_imm:
2175                wc.ex.imm_data = ohdr->u.imm_data;
2176                wc.wc_flags = IB_WC_WITH_IMM;
2177                goto send_last;
2178        case OP(SEND_LAST_WITH_INVALIDATE):
2179send_last_inv:
2180                rkey = be32_to_cpu(ohdr->u.ieth);
2181                if (rvt_invalidate_rkey(qp, rkey))
2182                        goto no_immediate_data;
2183                wc.ex.invalidate_rkey = rkey;
2184                wc.wc_flags = IB_WC_WITH_INVALIDATE;
2185                goto send_last;
2186        case OP(RDMA_WRITE_LAST):
2187                copy_last = rvt_is_user_qp(qp);
2188                /* fall through */
2189        case OP(SEND_LAST):
2190no_immediate_data:
2191                wc.wc_flags = 0;
2192                wc.ex.imm_data = 0;
2193send_last:
2194                /* Check for invalid length. */
2195                /* LAST len should be >= 1 */
2196                if (unlikely(tlen < (hdrsize + extra_bytes)))
2197                        goto nack_inv;
2198                /* Don't count the CRC(and padding and LT byte for 16B). */
2199                tlen -= (hdrsize + extra_bytes);
2200                wc.byte_len = tlen + qp->r_rcv_len;
2201                if (unlikely(wc.byte_len > qp->r_len))
2202                        goto nack_inv;
2203                hfi1_copy_sge(&qp->r_sge, data, tlen, true, copy_last);
2204                rvt_put_ss(&qp->r_sge);
2205                qp->r_msn++;
2206                if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
2207                        break;
2208                wc.wr_id = qp->r_wr_id;
2209                wc.status = IB_WC_SUCCESS;
2210                if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
2211                    opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
2212                        wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
2213                else
2214                        wc.opcode = IB_WC_RECV;
2215                wc.qp = &qp->ibqp;
2216                wc.src_qp = qp->remote_qpn;
2217                wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
2218                /*
2219                 * It seems that IB mandates the presence of an SL in a
2220                 * work completion only for the UD transport (see section
2221                 * 11.4.2 of IBTA Vol. 1).
2222                 *
2223                 * However, the way the SL is chosen below is consistent
2224                 * with the way that IB/qib works and is trying avoid
2225                 * introducing incompatibilities.
2226                 *
2227                 * See also OPA Vol. 1, section 9.7.6, and table 9-17.
2228                 */
2229                wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
2230                /* zero fields that are N/A */
2231                wc.vendor_err = 0;
2232                wc.pkey_index = 0;
2233                wc.dlid_path_bits = 0;
2234                wc.port_num = 0;
2235                /* Signal completion event if the solicited bit is set. */
2236                rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
2237                             ib_bth_is_solicited(ohdr));
2238                break;
2239
2240        case OP(RDMA_WRITE_ONLY):
2241                copy_last = rvt_is_user_qp(qp);
2242                /* fall through */
2243        case OP(RDMA_WRITE_FIRST):
2244        case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
2245                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
2246                        goto nack_inv;
2247                /* consume RWQE */
2248                reth = &ohdr->u.rc.reth;
2249                qp->r_len = be32_to_cpu(reth->length);
2250                qp->r_rcv_len = 0;
2251                qp->r_sge.sg_list = NULL;
2252                if (qp->r_len != 0) {
2253                        u32 rkey = be32_to_cpu(reth->rkey);
2254                        u64 vaddr = get_ib_reth_vaddr(reth);
2255                        int ok;
2256
2257                        /* Check rkey & NAK */
2258                        ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
2259                                         rkey, IB_ACCESS_REMOTE_WRITE);
2260                        if (unlikely(!ok))
2261                                goto nack_acc;
2262                        qp->r_sge.num_sge = 1;
2263                } else {
2264                        qp->r_sge.num_sge = 0;
2265                        qp->r_sge.sge.mr = NULL;
2266                        qp->r_sge.sge.vaddr = NULL;
2267                        qp->r_sge.sge.length = 0;
2268                        qp->r_sge.sge.sge_length = 0;
2269                }
2270                if (opcode == OP(RDMA_WRITE_FIRST))
2271                        goto send_middle;
2272                else if (opcode == OP(RDMA_WRITE_ONLY))
2273                        goto no_immediate_data;
2274                ret = hfi1_rvt_get_rwqe(qp, 1);
2275                if (ret < 0)
2276                        goto nack_op_err;
2277                if (!ret) {
2278                        /* peer will send again */
2279                        rvt_put_ss(&qp->r_sge);
2280                        goto rnr_nak;
2281                }
2282                wc.ex.imm_data = ohdr->u.rc.imm_data;
2283                wc.wc_flags = IB_WC_WITH_IMM;
2284                goto send_last;
2285
2286        case OP(RDMA_READ_REQUEST): {
2287                struct rvt_ack_entry *e;
2288                u32 len;
2289                u8 next;
2290
2291                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
2292                        goto nack_inv;
2293                next = qp->r_head_ack_queue + 1;
2294                /* s_ack_queue is size HFI1_MAX_RDMA_ATOMIC+1 so use > not >= */
2295                if (next > HFI1_MAX_RDMA_ATOMIC)
2296                        next = 0;
2297                spin_lock_irqsave(&qp->s_lock, flags);
2298                if (unlikely(next == qp->s_tail_ack_queue)) {
2299                        if (!qp->s_ack_queue[next].sent)
2300                                goto nack_inv_unlck;
2301                        update_ack_queue(qp, next);
2302                }
2303                e = &qp->s_ack_queue[qp->r_head_ack_queue];
2304                if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
2305                        rvt_put_mr(e->rdma_sge.mr);
2306                        e->rdma_sge.mr = NULL;
2307                }
2308                reth = &ohdr->u.rc.reth;
2309                len = be32_to_cpu(reth->length);
2310                if (len) {
2311                        u32 rkey = be32_to_cpu(reth->rkey);
2312                        u64 vaddr = get_ib_reth_vaddr(reth);
2313                        int ok;
2314
2315                        /* Check rkey & NAK */
2316                        ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
2317                                         rkey, IB_ACCESS_REMOTE_READ);
2318                        if (unlikely(!ok))
2319                                goto nack_acc_unlck;
2320                        /*
2321                         * Update the next expected PSN.  We add 1 later
2322                         * below, so only add the remainder here.
2323                         */
2324                        qp->r_psn += rvt_div_mtu(qp, len - 1);
2325                } else {
2326                        e->rdma_sge.mr = NULL;
2327                        e->rdma_sge.vaddr = NULL;
2328                        e->rdma_sge.length = 0;
2329                        e->rdma_sge.sge_length = 0;
2330                }
2331                e->opcode = opcode;
2332                e->sent = 0;
2333                e->psn = psn;
2334                e->lpsn = qp->r_psn;
2335                /*
2336                 * We need to increment the MSN here instead of when we
2337                 * finish sending the result since a duplicate request would
2338                 * increment it more than once.
2339                 */
2340                qp->r_msn++;
2341                qp->r_psn++;
2342                qp->r_state = opcode;
2343                qp->r_nak_state = 0;
2344                qp->r_head_ack_queue = next;
2345
2346                /* Schedule the send engine. */
2347                qp->s_flags |= RVT_S_RESP_PENDING;
2348                hfi1_schedule_send(qp);
2349
2350                spin_unlock_irqrestore(&qp->s_lock, flags);
2351                if (is_fecn)
2352                        goto send_ack;
2353                return;
2354        }
2355
2356        case OP(COMPARE_SWAP):
2357        case OP(FETCH_ADD): {
2358                struct ib_atomic_eth *ateth;
2359                struct rvt_ack_entry *e;
2360                u64 vaddr;
2361                atomic64_t *maddr;
2362                u64 sdata;
2363                u32 rkey;
2364                u8 next;
2365
2366                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
2367                        goto nack_inv;
2368                next = qp->r_head_ack_queue + 1;
2369                if (next > HFI1_MAX_RDMA_ATOMIC)
2370                        next = 0;
2371                spin_lock_irqsave(&qp->s_lock, flags);
2372                if (unlikely(next == qp->s_tail_ack_queue)) {
2373                        if (!qp->s_ack_queue[next].sent)
2374                                goto nack_inv_unlck;
2375                        update_ack_queue(qp, next);
2376                }
2377                e = &qp->s_ack_queue[qp->r_head_ack_queue];
2378                if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
2379                        rvt_put_mr(e->rdma_sge.mr);
2380                        e->rdma_sge.mr = NULL;
2381                }
2382                ateth = &ohdr->u.atomic_eth;
2383                vaddr = get_ib_ateth_vaddr(ateth);
2384                if (unlikely(vaddr & (sizeof(u64) - 1)))
2385                        goto nack_inv_unlck;
2386                rkey = be32_to_cpu(ateth->rkey);
2387                /* Check rkey & NAK */
2388                if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
2389                                          vaddr, rkey,
2390                                          IB_ACCESS_REMOTE_ATOMIC)))
2391                        goto nack_acc_unlck;
2392                /* Perform atomic OP and save result. */
2393                maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
2394                sdata = get_ib_ateth_swap(ateth);
2395                e->atomic_data = (opcode == OP(FETCH_ADD)) ?
2396                        (u64)atomic64_add_return(sdata, maddr) - sdata :
2397                        (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
2398                                      get_ib_ateth_compare(ateth),
2399                                      sdata);
2400                rvt_put_mr(qp->r_sge.sge.mr);
2401                qp->r_sge.num_sge = 0;
2402                e->opcode = opcode;
2403                e->sent = 0;
2404                e->psn = psn;
2405                e->lpsn = psn;
2406                qp->r_msn++;
2407                qp->r_psn++;
2408                qp->r_state = opcode;
2409                qp->r_nak_state = 0;
2410                qp->r_head_ack_queue = next;
2411
2412                /* Schedule the send engine. */
2413                qp->s_flags |= RVT_S_RESP_PENDING;
2414                hfi1_schedule_send(qp);
2415
2416                spin_unlock_irqrestore(&qp->s_lock, flags);
2417                if (is_fecn)
2418                        goto send_ack;
2419                return;
2420        }
2421
2422        default:
2423                /* NAK unknown opcodes. */
2424                goto nack_inv;
2425        }
2426        qp->r_psn++;
2427        qp->r_state = opcode;
2428        qp->r_ack_psn = psn;
2429        qp->r_nak_state = 0;
2430        /* Send an ACK if requested or required. */
2431        if (psn & IB_BTH_REQ_ACK) {
2432                if (packet->numpkt == 0) {
2433                        rc_cancel_ack(qp);
2434                        goto send_ack;
2435                }
2436                if (qp->r_adefered >= HFI1_PSN_CREDIT) {
2437                        rc_cancel_ack(qp);
2438                        goto send_ack;
2439                }
2440                if (unlikely(is_fecn)) {
2441                        rc_cancel_ack(qp);
2442                        goto send_ack;
2443                }
2444                qp->r_adefered++;
2445                rc_defered_ack(rcd, qp);
2446        }
2447        return;
2448
2449rnr_nak:
2450        qp->r_nak_state = qp->r_min_rnr_timer | IB_RNR_NAK;
2451        qp->r_ack_psn = qp->r_psn;
2452        /* Queue RNR NAK for later */
2453        rc_defered_ack(rcd, qp);
2454        return;
2455
2456nack_op_err:
2457        rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2458        qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
2459        qp->r_ack_psn = qp->r_psn;
2460        /* Queue NAK for later */
2461        rc_defered_ack(rcd, qp);
2462        return;
2463
2464nack_inv_unlck:
2465        spin_unlock_irqrestore(&qp->s_lock, flags);
2466nack_inv:
2467        rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2468        qp->r_nak_state = IB_NAK_INVALID_REQUEST;
2469        qp->r_ack_psn = qp->r_psn;
2470        /* Queue NAK for later */
2471        rc_defered_ack(rcd, qp);
2472        return;
2473
2474nack_acc_unlck:
2475        spin_unlock_irqrestore(&qp->s_lock, flags);
2476nack_acc:
2477        rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
2478        qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
2479        qp->r_ack_psn = qp->r_psn;
2480send_ack:
2481        hfi1_send_rc_ack(packet, is_fecn);
2482}
2483
2484void hfi1_rc_hdrerr(
2485        struct hfi1_ctxtdata *rcd,
2486        struct hfi1_packet *packet,
2487        struct rvt_qp *qp)
2488{
2489        struct hfi1_ibport *ibp = rcd_to_iport(rcd);
2490        int diff;
2491        u32 opcode;
2492        u32 psn;
2493
2494        if (hfi1_ruc_check_hdr(ibp, packet))
2495                return;
2496
2497        psn = ib_bth_get_psn(packet->ohdr);
2498        opcode = ib_bth_get_opcode(packet->ohdr);
2499
2500        /* Only deal with RDMA Writes for now */
2501        if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
2502                diff = delta_psn(psn, qp->r_psn);
2503                if (!qp->r_nak_state && diff >= 0) {
2504                        ibp->rvp.n_rc_seqnak++;
2505                        qp->r_nak_state = IB_NAK_PSN_ERROR;
2506                        /* Use the expected PSN. */
2507                        qp->r_ack_psn = qp->r_psn;
2508                        /*
2509                         * Wait to send the sequence
2510                         * NAK until all packets
2511                         * in the receive queue have
2512                         * been processed.
2513                         * Otherwise, we end up
2514                         * propagating congestion.
2515                         */
2516                        rc_defered_ack(rcd, qp);
2517                } /* Out of sequence NAK */
2518        } /* QP Request NAKs */
2519}
2520