linux/drivers/infiniband/hw/qib/qib_rc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
   3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33
  34#include <linux/io.h>
  35
  36#include "qib.h"
  37
  38/* cut down ridiculously long IB macro names */
  39#define OP(x) IB_OPCODE_RC_##x
  40
  41static void rc_timeout(unsigned long arg);
  42
  43static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
  44                       u32 psn, u32 pmtu)
  45{
  46        u32 len;
  47
  48        len = ((psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
  49        ss->sge = wqe->sg_list[0];
  50        ss->sg_list = wqe->sg_list + 1;
  51        ss->num_sge = wqe->wr.num_sge;
  52        ss->total_len = wqe->length;
  53        qib_skip_sge(ss, len, 0);
  54        return wqe->length - len;
  55}
  56
  57static void start_timer(struct rvt_qp *qp)
  58{
  59        qp->s_flags |= RVT_S_TIMER;
  60        qp->s_timer.function = rc_timeout;
  61        /* 4.096 usec. * (1 << qp->timeout) */
  62        qp->s_timer.expires = jiffies + qp->timeout_jiffies;
  63        add_timer(&qp->s_timer);
  64}
  65
  66/**
  67 * qib_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
  68 * @dev: the device for this QP
  69 * @qp: a pointer to the QP
  70 * @ohdr: a pointer to the IB header being constructed
  71 * @pmtu: the path MTU
  72 *
  73 * Return 1 if constructed; otherwise, return 0.
  74 * Note that we are in the responder's side of the QP context.
  75 * Note the QP s_lock must be held.
  76 */
  77static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
  78                           struct ib_other_headers *ohdr, u32 pmtu)
  79{
  80        struct rvt_ack_entry *e;
  81        u32 hwords;
  82        u32 len;
  83        u32 bth0;
  84        u32 bth2;
  85
  86        /* Don't send an ACK if we aren't supposed to. */
  87        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
  88                goto bail;
  89
  90        /* header size in 32-bit words LRH+BTH = (8+12)/4. */
  91        hwords = 5;
  92
  93        switch (qp->s_ack_state) {
  94        case OP(RDMA_READ_RESPONSE_LAST):
  95        case OP(RDMA_READ_RESPONSE_ONLY):
  96                e = &qp->s_ack_queue[qp->s_tail_ack_queue];
  97                if (e->rdma_sge.mr) {
  98                        rvt_put_mr(e->rdma_sge.mr);
  99                        e->rdma_sge.mr = NULL;
 100                }
 101                /* FALLTHROUGH */
 102        case OP(ATOMIC_ACKNOWLEDGE):
 103                /*
 104                 * We can increment the tail pointer now that the last
 105                 * response has been sent instead of only being
 106                 * constructed.
 107                 */
 108                if (++qp->s_tail_ack_queue > QIB_MAX_RDMA_ATOMIC)
 109                        qp->s_tail_ack_queue = 0;
 110                /* FALLTHROUGH */
 111        case OP(SEND_ONLY):
 112        case OP(ACKNOWLEDGE):
 113                /* Check for no next entry in the queue. */
 114                if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
 115                        if (qp->s_flags & RVT_S_ACK_PENDING)
 116                                goto normal;
 117                        goto bail;
 118                }
 119
 120                e = &qp->s_ack_queue[qp->s_tail_ack_queue];
 121                if (e->opcode == OP(RDMA_READ_REQUEST)) {
 122                        /*
 123                         * If a RDMA read response is being resent and
 124                         * we haven't seen the duplicate request yet,
 125                         * then stop sending the remaining responses the
 126                         * responder has seen until the requester resends it.
 127                         */
 128                        len = e->rdma_sge.sge_length;
 129                        if (len && !e->rdma_sge.mr) {
 130                                qp->s_tail_ack_queue = qp->r_head_ack_queue;
 131                                goto bail;
 132                        }
 133                        /* Copy SGE state in case we need to resend */
 134                        qp->s_rdma_mr = e->rdma_sge.mr;
 135                        if (qp->s_rdma_mr)
 136                                rvt_get_mr(qp->s_rdma_mr);
 137                        qp->s_ack_rdma_sge.sge = e->rdma_sge;
 138                        qp->s_ack_rdma_sge.num_sge = 1;
 139                        qp->s_cur_sge = &qp->s_ack_rdma_sge;
 140                        if (len > pmtu) {
 141                                len = pmtu;
 142                                qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
 143                        } else {
 144                                qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
 145                                e->sent = 1;
 146                        }
 147                        ohdr->u.aeth = qib_compute_aeth(qp);
 148                        hwords++;
 149                        qp->s_ack_rdma_psn = e->psn;
 150                        bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
 151                } else {
 152                        /* COMPARE_SWAP or FETCH_ADD */
 153                        qp->s_cur_sge = NULL;
 154                        len = 0;
 155                        qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
 156                        ohdr->u.at.aeth = qib_compute_aeth(qp);
 157                        ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
 158                        hwords += sizeof(ohdr->u.at) / sizeof(u32);
 159                        bth2 = e->psn & QIB_PSN_MASK;
 160                        e->sent = 1;
 161                }
 162                bth0 = qp->s_ack_state << 24;
 163                break;
 164
 165        case OP(RDMA_READ_RESPONSE_FIRST):
 166                qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
 167                /* FALLTHROUGH */
 168        case OP(RDMA_READ_RESPONSE_MIDDLE):
 169                qp->s_cur_sge = &qp->s_ack_rdma_sge;
 170                qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
 171                if (qp->s_rdma_mr)
 172                        rvt_get_mr(qp->s_rdma_mr);
 173                len = qp->s_ack_rdma_sge.sge.sge_length;
 174                if (len > pmtu)
 175                        len = pmtu;
 176                else {
 177                        ohdr->u.aeth = qib_compute_aeth(qp);
 178                        hwords++;
 179                        qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
 180                        e = &qp->s_ack_queue[qp->s_tail_ack_queue];
 181                        e->sent = 1;
 182                }
 183                bth0 = qp->s_ack_state << 24;
 184                bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
 185                break;
 186
 187        default:
 188normal:
 189                /*
 190                 * Send a regular ACK.
 191                 * Set the s_ack_state so we wait until after sending
 192                 * the ACK before setting s_ack_state to ACKNOWLEDGE
 193                 * (see above).
 194                 */
 195                qp->s_ack_state = OP(SEND_ONLY);
 196                qp->s_flags &= ~RVT_S_ACK_PENDING;
 197                qp->s_cur_sge = NULL;
 198                if (qp->s_nak_state)
 199                        ohdr->u.aeth =
 200                                cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
 201                                            (qp->s_nak_state <<
 202                                             QIB_AETH_CREDIT_SHIFT));
 203                else
 204                        ohdr->u.aeth = qib_compute_aeth(qp);
 205                hwords++;
 206                len = 0;
 207                bth0 = OP(ACKNOWLEDGE) << 24;
 208                bth2 = qp->s_ack_psn & QIB_PSN_MASK;
 209        }
 210        qp->s_rdma_ack_cnt++;
 211        qp->s_hdrwords = hwords;
 212        qp->s_cur_size = len;
 213        qib_make_ruc_header(qp, ohdr, bth0, bth2);
 214        return 1;
 215
 216bail:
 217        qp->s_ack_state = OP(ACKNOWLEDGE);
 218        qp->s_flags &= ~(RVT_S_RESP_PENDING | RVT_S_ACK_PENDING);
 219        return 0;
 220}
 221
 222/**
 223 * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
 224 * @qp: a pointer to the QP
 225 *
 226 * Assumes the s_lock is held.
 227 *
 228 * Return 1 if constructed; otherwise, return 0.
 229 */
 230int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
 231{
 232        struct qib_qp_priv *priv = qp->priv;
 233        struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 234        struct ib_other_headers *ohdr;
 235        struct rvt_sge_state *ss;
 236        struct rvt_swqe *wqe;
 237        u32 hwords;
 238        u32 len;
 239        u32 bth0;
 240        u32 bth2;
 241        u32 pmtu = qp->pmtu;
 242        char newreq;
 243        int ret = 0;
 244        int delta;
 245
 246        ohdr = &priv->s_hdr->u.oth;
 247        if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
 248                ohdr = &priv->s_hdr->u.l.oth;
 249
 250        /* Sending responses has higher priority over sending requests. */
 251        if ((qp->s_flags & RVT_S_RESP_PENDING) &&
 252            qib_make_rc_ack(dev, qp, ohdr, pmtu))
 253                goto done;
 254
 255        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
 256                if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 257                        goto bail;
 258                /* We are in the error state, flush the work request. */
 259                smp_read_barrier_depends(); /* see post_one_send() */
 260                if (qp->s_last == ACCESS_ONCE(qp->s_head))
 261                        goto bail;
 262                /* If DMAs are in progress, we can't flush immediately. */
 263                if (atomic_read(&priv->s_dma_busy)) {
 264                        qp->s_flags |= RVT_S_WAIT_DMA;
 265                        goto bail;
 266                }
 267                wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 268                qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
 269                        IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
 270                /* will get called again */
 271                goto done;
 272        }
 273
 274        if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
 275                goto bail;
 276
 277        if (qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) {
 278                if (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
 279                        qp->s_flags |= RVT_S_WAIT_PSN;
 280                        goto bail;
 281                }
 282                qp->s_sending_psn = qp->s_psn;
 283                qp->s_sending_hpsn = qp->s_psn - 1;
 284        }
 285
 286        /* header size in 32-bit words LRH+BTH = (8+12)/4. */
 287        hwords = 5;
 288        bth0 = 0;
 289
 290        /* Send a request. */
 291        wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 292        switch (qp->s_state) {
 293        default:
 294                if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
 295                        goto bail;
 296                /*
 297                 * Resend an old request or start a new one.
 298                 *
 299                 * We keep track of the current SWQE so that
 300                 * we don't reset the "furthest progress" state
 301                 * if we need to back up.
 302                 */
 303                newreq = 0;
 304                if (qp->s_cur == qp->s_tail) {
 305                        /* Check if send work queue is empty. */
 306                        if (qp->s_tail == qp->s_head)
 307                                goto bail;
 308                        /*
 309                         * If a fence is requested, wait for previous
 310                         * RDMA read and atomic operations to finish.
 311                         */
 312                        if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
 313                            qp->s_num_rd_atomic) {
 314                                qp->s_flags |= RVT_S_WAIT_FENCE;
 315                                goto bail;
 316                        }
 317                        newreq = 1;
 318                        qp->s_psn = wqe->psn;
 319                }
 320                /*
 321                 * Note that we have to be careful not to modify the
 322                 * original work request since we may need to resend
 323                 * it.
 324                 */
 325                len = wqe->length;
 326                ss = &qp->s_sge;
 327                bth2 = qp->s_psn & QIB_PSN_MASK;
 328                switch (wqe->wr.opcode) {
 329                case IB_WR_SEND:
 330                case IB_WR_SEND_WITH_IMM:
 331                        /* If no credit, return. */
 332                        if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
 333                            qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
 334                                qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
 335                                goto bail;
 336                        }
 337                        if (len > pmtu) {
 338                                qp->s_state = OP(SEND_FIRST);
 339                                len = pmtu;
 340                                break;
 341                        }
 342                        if (wqe->wr.opcode == IB_WR_SEND)
 343                                qp->s_state = OP(SEND_ONLY);
 344                        else {
 345                                qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
 346                                /* Immediate data comes after the BTH */
 347                                ohdr->u.imm_data = wqe->wr.ex.imm_data;
 348                                hwords += 1;
 349                        }
 350                        if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 351                                bth0 |= IB_BTH_SOLICITED;
 352                        bth2 |= IB_BTH_REQ_ACK;
 353                        if (++qp->s_cur == qp->s_size)
 354                                qp->s_cur = 0;
 355                        break;
 356
 357                case IB_WR_RDMA_WRITE:
 358                        if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 359                                qp->s_lsn++;
 360                        /* FALLTHROUGH */
 361                case IB_WR_RDMA_WRITE_WITH_IMM:
 362                        /* If no credit, return. */
 363                        if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
 364                            qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
 365                                qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
 366                                goto bail;
 367                        }
 368
 369                        ohdr->u.rc.reth.vaddr =
 370                                cpu_to_be64(wqe->rdma_wr.remote_addr);
 371                        ohdr->u.rc.reth.rkey =
 372                                cpu_to_be32(wqe->rdma_wr.rkey);
 373                        ohdr->u.rc.reth.length = cpu_to_be32(len);
 374                        hwords += sizeof(struct ib_reth) / sizeof(u32);
 375                        if (len > pmtu) {
 376                                qp->s_state = OP(RDMA_WRITE_FIRST);
 377                                len = pmtu;
 378                                break;
 379                        }
 380                        if (wqe->rdma_wr.wr.opcode == IB_WR_RDMA_WRITE)
 381                                qp->s_state = OP(RDMA_WRITE_ONLY);
 382                        else {
 383                                qp->s_state = OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
 384                                /* Immediate data comes after RETH */
 385                                ohdr->u.rc.imm_data =
 386                                        wqe->rdma_wr.wr.ex.imm_data;
 387                                hwords += 1;
 388                                if (wqe->rdma_wr.wr.send_flags & IB_SEND_SOLICITED)
 389                                        bth0 |= IB_BTH_SOLICITED;
 390                        }
 391                        bth2 |= IB_BTH_REQ_ACK;
 392                        if (++qp->s_cur == qp->s_size)
 393                                qp->s_cur = 0;
 394                        break;
 395
 396                case IB_WR_RDMA_READ:
 397                        /*
 398                         * Don't allow more operations to be started
 399                         * than the QP limits allow.
 400                         */
 401                        if (newreq) {
 402                                if (qp->s_num_rd_atomic >=
 403                                    qp->s_max_rd_atomic) {
 404                                        qp->s_flags |= RVT_S_WAIT_RDMAR;
 405                                        goto bail;
 406                                }
 407                                qp->s_num_rd_atomic++;
 408                                if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 409                                        qp->s_lsn++;
 410                        }
 411
 412                        ohdr->u.rc.reth.vaddr =
 413                                cpu_to_be64(wqe->rdma_wr.remote_addr);
 414                        ohdr->u.rc.reth.rkey =
 415                                cpu_to_be32(wqe->rdma_wr.rkey);
 416                        ohdr->u.rc.reth.length = cpu_to_be32(len);
 417                        qp->s_state = OP(RDMA_READ_REQUEST);
 418                        hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
 419                        ss = NULL;
 420                        len = 0;
 421                        bth2 |= IB_BTH_REQ_ACK;
 422                        if (++qp->s_cur == qp->s_size)
 423                                qp->s_cur = 0;
 424                        break;
 425
 426                case IB_WR_ATOMIC_CMP_AND_SWP:
 427                case IB_WR_ATOMIC_FETCH_AND_ADD:
 428                        /*
 429                         * Don't allow more operations to be started
 430                         * than the QP limits allow.
 431                         */
 432                        if (newreq) {
 433                                if (qp->s_num_rd_atomic >=
 434                                    qp->s_max_rd_atomic) {
 435                                        qp->s_flags |= RVT_S_WAIT_RDMAR;
 436                                        goto bail;
 437                                }
 438                                qp->s_num_rd_atomic++;
 439                                if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 440                                        qp->s_lsn++;
 441                        }
 442                        if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 443                                qp->s_state = OP(COMPARE_SWAP);
 444                                put_ib_ateth_swap(wqe->atomic_wr.swap,
 445                                                  &ohdr->u.atomic_eth);
 446                                put_ib_ateth_swap(wqe->atomic_wr.compare_add,
 447                                                  &ohdr->u.atomic_eth);
 448                        } else {
 449                                qp->s_state = OP(FETCH_ADD);
 450                                put_ib_ateth_swap(wqe->atomic_wr.compare_add,
 451                                                  &ohdr->u.atomic_eth);
 452                                put_ib_ateth_swap(0, &ohdr->u.atomic_eth);
 453                        }
 454                        put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
 455                                           &ohdr->u.atomic_eth);
 456                        ohdr->u.atomic_eth.rkey = cpu_to_be32(
 457                                wqe->atomic_wr.rkey);
 458                        hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
 459                        ss = NULL;
 460                        len = 0;
 461                        bth2 |= IB_BTH_REQ_ACK;
 462                        if (++qp->s_cur == qp->s_size)
 463                                qp->s_cur = 0;
 464                        break;
 465
 466                default:
 467                        goto bail;
 468                }
 469                qp->s_sge.sge = wqe->sg_list[0];
 470                qp->s_sge.sg_list = wqe->sg_list + 1;
 471                qp->s_sge.num_sge = wqe->wr.num_sge;
 472                qp->s_sge.total_len = wqe->length;
 473                qp->s_len = wqe->length;
 474                if (newreq) {
 475                        qp->s_tail++;
 476                        if (qp->s_tail >= qp->s_size)
 477                                qp->s_tail = 0;
 478                }
 479                if (wqe->wr.opcode == IB_WR_RDMA_READ)
 480                        qp->s_psn = wqe->lpsn + 1;
 481                else
 482                        qp->s_psn++;
 483                break;
 484
 485        case OP(RDMA_READ_RESPONSE_FIRST):
 486                /*
 487                 * qp->s_state is normally set to the opcode of the
 488                 * last packet constructed for new requests and therefore
 489                 * is never set to RDMA read response.
 490                 * RDMA_READ_RESPONSE_FIRST is used by the ACK processing
 491                 * thread to indicate a SEND needs to be restarted from an
 492                 * earlier PSN without interferring with the sending thread.
 493                 * See qib_restart_rc().
 494                 */
 495                qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
 496                /* FALLTHROUGH */
 497        case OP(SEND_FIRST):
 498                qp->s_state = OP(SEND_MIDDLE);
 499                /* FALLTHROUGH */
 500        case OP(SEND_MIDDLE):
 501                bth2 = qp->s_psn++ & QIB_PSN_MASK;
 502                ss = &qp->s_sge;
 503                len = qp->s_len;
 504                if (len > pmtu) {
 505                        len = pmtu;
 506                        break;
 507                }
 508                if (wqe->wr.opcode == IB_WR_SEND)
 509                        qp->s_state = OP(SEND_LAST);
 510                else {
 511                        qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
 512                        /* Immediate data comes after the BTH */
 513                        ohdr->u.imm_data = wqe->wr.ex.imm_data;
 514                        hwords += 1;
 515                }
 516                if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 517                        bth0 |= IB_BTH_SOLICITED;
 518                bth2 |= IB_BTH_REQ_ACK;
 519                qp->s_cur++;
 520                if (qp->s_cur >= qp->s_size)
 521                        qp->s_cur = 0;
 522                break;
 523
 524        case OP(RDMA_READ_RESPONSE_LAST):
 525                /*
 526                 * qp->s_state is normally set to the opcode of the
 527                 * last packet constructed for new requests and therefore
 528                 * is never set to RDMA read response.
 529                 * RDMA_READ_RESPONSE_LAST is used by the ACK processing
 530                 * thread to indicate a RDMA write needs to be restarted from
 531                 * an earlier PSN without interferring with the sending thread.
 532                 * See qib_restart_rc().
 533                 */
 534                qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
 535                /* FALLTHROUGH */
 536        case OP(RDMA_WRITE_FIRST):
 537                qp->s_state = OP(RDMA_WRITE_MIDDLE);
 538                /* FALLTHROUGH */
 539        case OP(RDMA_WRITE_MIDDLE):
 540                bth2 = qp->s_psn++ & QIB_PSN_MASK;
 541                ss = &qp->s_sge;
 542                len = qp->s_len;
 543                if (len > pmtu) {
 544                        len = pmtu;
 545                        break;
 546                }
 547                if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
 548                        qp->s_state = OP(RDMA_WRITE_LAST);
 549                else {
 550                        qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
 551                        /* Immediate data comes after the BTH */
 552                        ohdr->u.imm_data = wqe->wr.ex.imm_data;
 553                        hwords += 1;
 554                        if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 555                                bth0 |= IB_BTH_SOLICITED;
 556                }
 557                bth2 |= IB_BTH_REQ_ACK;
 558                qp->s_cur++;
 559                if (qp->s_cur >= qp->s_size)
 560                        qp->s_cur = 0;
 561                break;
 562
 563        case OP(RDMA_READ_RESPONSE_MIDDLE):
 564                /*
 565                 * qp->s_state is normally set to the opcode of the
 566                 * last packet constructed for new requests and therefore
 567                 * is never set to RDMA read response.
 568                 * RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing
 569                 * thread to indicate a RDMA read needs to be restarted from
 570                 * an earlier PSN without interferring with the sending thread.
 571                 * See qib_restart_rc().
 572                 */
 573                len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
 574                ohdr->u.rc.reth.vaddr =
 575                        cpu_to_be64(wqe->rdma_wr.remote_addr + len);
 576                ohdr->u.rc.reth.rkey =
 577                        cpu_to_be32(wqe->rdma_wr.rkey);
 578                ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
 579                qp->s_state = OP(RDMA_READ_REQUEST);
 580                hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
 581                bth2 = (qp->s_psn & QIB_PSN_MASK) | IB_BTH_REQ_ACK;
 582                qp->s_psn = wqe->lpsn + 1;
 583                ss = NULL;
 584                len = 0;
 585                qp->s_cur++;
 586                if (qp->s_cur == qp->s_size)
 587                        qp->s_cur = 0;
 588                break;
 589        }
 590        qp->s_sending_hpsn = bth2;
 591        delta = (((int) bth2 - (int) wqe->psn) << 8) >> 8;
 592        if (delta && delta % QIB_PSN_CREDIT == 0)
 593                bth2 |= IB_BTH_REQ_ACK;
 594        if (qp->s_flags & RVT_S_SEND_ONE) {
 595                qp->s_flags &= ~RVT_S_SEND_ONE;
 596                qp->s_flags |= RVT_S_WAIT_ACK;
 597                bth2 |= IB_BTH_REQ_ACK;
 598        }
 599        qp->s_len -= len;
 600        qp->s_hdrwords = hwords;
 601        qp->s_cur_sge = ss;
 602        qp->s_cur_size = len;
 603        qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2);
 604done:
 605        return 1;
 606bail:
 607        qp->s_flags &= ~RVT_S_BUSY;
 608        return ret;
 609}
 610
 611/**
 612 * qib_send_rc_ack - Construct an ACK packet and send it
 613 * @qp: a pointer to the QP
 614 *
 615 * This is called from qib_rc_rcv() and qib_kreceive().
 616 * Note that RDMA reads and atomics are handled in the
 617 * send side QP state and tasklet.
 618 */
 619void qib_send_rc_ack(struct rvt_qp *qp)
 620{
 621        struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 622        struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 623        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 624        u64 pbc;
 625        u16 lrh0;
 626        u32 bth0;
 627        u32 hwords;
 628        u32 pbufn;
 629        u32 __iomem *piobuf;
 630        struct ib_header hdr;
 631        struct ib_other_headers *ohdr;
 632        u32 control;
 633        unsigned long flags;
 634
 635        spin_lock_irqsave(&qp->s_lock, flags);
 636
 637        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 638                goto unlock;
 639
 640        /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
 641        if ((qp->s_flags & RVT_S_RESP_PENDING) || qp->s_rdma_ack_cnt)
 642                goto queue_ack;
 643
 644        /* Construct the header with s_lock held so APM doesn't change it. */
 645        ohdr = &hdr.u.oth;
 646        lrh0 = QIB_LRH_BTH;
 647        /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
 648        hwords = 6;
 649        if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
 650                hwords += qib_make_grh(ibp, &hdr.u.l.grh,
 651                                       &qp->remote_ah_attr.grh, hwords, 0);
 652                ohdr = &hdr.u.l.oth;
 653                lrh0 = QIB_LRH_GRH;
 654        }
 655        /* read pkey_index w/o lock (its atomic) */
 656        bth0 = qib_get_pkey(ibp, qp->s_pkey_index) | (OP(ACKNOWLEDGE) << 24);
 657        if (qp->s_mig_state == IB_MIG_MIGRATED)
 658                bth0 |= IB_BTH_MIG_REQ;
 659        if (qp->r_nak_state)
 660                ohdr->u.aeth = cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
 661                                            (qp->r_nak_state <<
 662                                             QIB_AETH_CREDIT_SHIFT));
 663        else
 664                ohdr->u.aeth = qib_compute_aeth(qp);
 665        lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
 666                qp->remote_ah_attr.sl << 4;
 667        hdr.lrh[0] = cpu_to_be16(lrh0);
 668        hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
 669        hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
 670        hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits);
 671        ohdr->bth[0] = cpu_to_be32(bth0);
 672        ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
 673        ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & QIB_PSN_MASK);
 674
 675        spin_unlock_irqrestore(&qp->s_lock, flags);
 676
 677        /* Don't try to send ACKs if the link isn't ACTIVE */
 678        if (!(ppd->lflags & QIBL_LINKACTIVE))
 679                goto done;
 680
 681        control = dd->f_setpbc_control(ppd, hwords + SIZE_OF_CRC,
 682                                       qp->s_srate, lrh0 >> 12);
 683        /* length is + 1 for the control dword */
 684        pbc = ((u64) control << 32) | (hwords + 1);
 685
 686        piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn);
 687        if (!piobuf) {
 688                /*
 689                 * We are out of PIO buffers at the moment.
 690                 * Pass responsibility for sending the ACK to the
 691                 * send tasklet so that when a PIO buffer becomes
 692                 * available, the ACK is sent ahead of other outgoing
 693                 * packets.
 694                 */
 695                spin_lock_irqsave(&qp->s_lock, flags);
 696                goto queue_ack;
 697        }
 698
 699        /*
 700         * Write the pbc.
 701         * We have to flush after the PBC for correctness
 702         * on some cpus or WC buffer can be written out of order.
 703         */
 704        writeq(pbc, piobuf);
 705
 706        if (dd->flags & QIB_PIO_FLUSH_WC) {
 707                u32 *hdrp = (u32 *) &hdr;
 708
 709                qib_flush_wc();
 710                qib_pio_copy(piobuf + 2, hdrp, hwords - 1);
 711                qib_flush_wc();
 712                __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
 713        } else
 714                qib_pio_copy(piobuf + 2, (u32 *) &hdr, hwords);
 715
 716        if (dd->flags & QIB_USE_SPCL_TRIG) {
 717                u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
 718
 719                qib_flush_wc();
 720                __raw_writel(0xaebecede, piobuf + spcl_off);
 721        }
 722
 723        qib_flush_wc();
 724        qib_sendbuf_done(dd, pbufn);
 725
 726        this_cpu_inc(ibp->pmastats->n_unicast_xmit);
 727        goto done;
 728
 729queue_ack:
 730        if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 731                this_cpu_inc(*ibp->rvp.rc_qacks);
 732                qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
 733                qp->s_nak_state = qp->r_nak_state;
 734                qp->s_ack_psn = qp->r_ack_psn;
 735
 736                /* Schedule the send tasklet. */
 737                qib_schedule_send(qp);
 738        }
 739unlock:
 740        spin_unlock_irqrestore(&qp->s_lock, flags);
 741done:
 742        return;
 743}
 744
 745/**
 746 * reset_psn - reset the QP state to send starting from PSN
 747 * @qp: the QP
 748 * @psn: the packet sequence number to restart at
 749 *
 750 * This is called from qib_rc_rcv() to process an incoming RC ACK
 751 * for the given QP.
 752 * Called at interrupt level with the QP s_lock held.
 753 */
 754static void reset_psn(struct rvt_qp *qp, u32 psn)
 755{
 756        u32 n = qp->s_acked;
 757        struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
 758        u32 opcode;
 759
 760        qp->s_cur = n;
 761
 762        /*
 763         * If we are starting the request from the beginning,
 764         * let the normal send code handle initialization.
 765         */
 766        if (qib_cmp24(psn, wqe->psn) <= 0) {
 767                qp->s_state = OP(SEND_LAST);
 768                goto done;
 769        }
 770
 771        /* Find the work request opcode corresponding to the given PSN. */
 772        opcode = wqe->wr.opcode;
 773        for (;;) {
 774                int diff;
 775
 776                if (++n == qp->s_size)
 777                        n = 0;
 778                if (n == qp->s_tail)
 779                        break;
 780                wqe = rvt_get_swqe_ptr(qp, n);
 781                diff = qib_cmp24(psn, wqe->psn);
 782                if (diff < 0)
 783                        break;
 784                qp->s_cur = n;
 785                /*
 786                 * If we are starting the request from the beginning,
 787                 * let the normal send code handle initialization.
 788                 */
 789                if (diff == 0) {
 790                        qp->s_state = OP(SEND_LAST);
 791                        goto done;
 792                }
 793                opcode = wqe->wr.opcode;
 794        }
 795
 796        /*
 797         * Set the state to restart in the middle of a request.
 798         * Don't change the s_sge, s_cur_sge, or s_cur_size.
 799         * See qib_make_rc_req().
 800         */
 801        switch (opcode) {
 802        case IB_WR_SEND:
 803        case IB_WR_SEND_WITH_IMM:
 804                qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
 805                break;
 806
 807        case IB_WR_RDMA_WRITE:
 808        case IB_WR_RDMA_WRITE_WITH_IMM:
 809                qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
 810                break;
 811
 812        case IB_WR_RDMA_READ:
 813                qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
 814                break;
 815
 816        default:
 817                /*
 818                 * This case shouldn't happen since its only
 819                 * one PSN per req.
 820                 */
 821                qp->s_state = OP(SEND_LAST);
 822        }
 823done:
 824        qp->s_psn = psn;
 825        /*
 826         * Set RVT_S_WAIT_PSN as qib_rc_complete() may start the timer
 827         * asynchronously before the send tasklet can get scheduled.
 828         * Doing it in qib_make_rc_req() is too late.
 829         */
 830        if ((qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
 831            (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
 832                qp->s_flags |= RVT_S_WAIT_PSN;
 833}
 834
 835/*
 836 * Back up requester to resend the last un-ACKed request.
 837 * The QP r_lock and s_lock should be held and interrupts disabled.
 838 */
 839static void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
 840{
 841        struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 842        struct qib_ibport *ibp;
 843
 844        if (qp->s_retry == 0) {
 845                if (qp->s_mig_state == IB_MIG_ARMED) {
 846                        qib_migrate_qp(qp);
 847                        qp->s_retry = qp->s_retry_cnt;
 848                } else if (qp->s_last == qp->s_acked) {
 849                        qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
 850                        rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 851                        return;
 852                } else /* XXX need to handle delayed completion */
 853                        return;
 854        } else
 855                qp->s_retry--;
 856
 857        ibp = to_iport(qp->ibqp.device, qp->port_num);
 858        if (wqe->wr.opcode == IB_WR_RDMA_READ)
 859                ibp->rvp.n_rc_resends++;
 860        else
 861                ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
 862
 863        qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
 864                         RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
 865                         RVT_S_WAIT_ACK);
 866        if (wait)
 867                qp->s_flags |= RVT_S_SEND_ONE;
 868        reset_psn(qp, psn);
 869}
 870
 871/*
 872 * This is called from s_timer for missing responses.
 873 */
 874static void rc_timeout(unsigned long arg)
 875{
 876        struct rvt_qp *qp = (struct rvt_qp *)arg;
 877        struct qib_ibport *ibp;
 878        unsigned long flags;
 879
 880        spin_lock_irqsave(&qp->r_lock, flags);
 881        spin_lock(&qp->s_lock);
 882        if (qp->s_flags & RVT_S_TIMER) {
 883                ibp = to_iport(qp->ibqp.device, qp->port_num);
 884                ibp->rvp.n_rc_timeouts++;
 885                qp->s_flags &= ~RVT_S_TIMER;
 886                del_timer(&qp->s_timer);
 887                qib_restart_rc(qp, qp->s_last_psn + 1, 1);
 888                qib_schedule_send(qp);
 889        }
 890        spin_unlock(&qp->s_lock);
 891        spin_unlock_irqrestore(&qp->r_lock, flags);
 892}
 893
 894/*
 895 * This is called from s_timer for RNR timeouts.
 896 */
 897void qib_rc_rnr_retry(unsigned long arg)
 898{
 899        struct rvt_qp *qp = (struct rvt_qp *)arg;
 900        unsigned long flags;
 901
 902        spin_lock_irqsave(&qp->s_lock, flags);
 903        if (qp->s_flags & RVT_S_WAIT_RNR) {
 904                qp->s_flags &= ~RVT_S_WAIT_RNR;
 905                del_timer(&qp->s_timer);
 906                qib_schedule_send(qp);
 907        }
 908        spin_unlock_irqrestore(&qp->s_lock, flags);
 909}
 910
 911/*
 912 * Set qp->s_sending_psn to the next PSN after the given one.
 913 * This would be psn+1 except when RDMA reads are present.
 914 */
 915static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
 916{
 917        struct rvt_swqe *wqe;
 918        u32 n = qp->s_last;
 919
 920        /* Find the work request corresponding to the given PSN. */
 921        for (;;) {
 922                wqe = rvt_get_swqe_ptr(qp, n);
 923                if (qib_cmp24(psn, wqe->lpsn) <= 0) {
 924                        if (wqe->wr.opcode == IB_WR_RDMA_READ)
 925                                qp->s_sending_psn = wqe->lpsn + 1;
 926                        else
 927                                qp->s_sending_psn = psn + 1;
 928                        break;
 929                }
 930                if (++n == qp->s_size)
 931                        n = 0;
 932                if (n == qp->s_tail)
 933                        break;
 934        }
 935}
 936
 937/*
 938 * This should be called with the QP s_lock held and interrupts disabled.
 939 */
 940void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
 941{
 942        struct ib_other_headers *ohdr;
 943        struct rvt_swqe *wqe;
 944        struct ib_wc wc;
 945        unsigned i;
 946        u32 opcode;
 947        u32 psn;
 948
 949        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 950                return;
 951
 952        /* Find out where the BTH is */
 953        if ((be16_to_cpu(hdr->lrh[0]) & 3) == QIB_LRH_BTH)
 954                ohdr = &hdr->u.oth;
 955        else
 956                ohdr = &hdr->u.l.oth;
 957
 958        opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
 959        if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
 960            opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
 961                WARN_ON(!qp->s_rdma_ack_cnt);
 962                qp->s_rdma_ack_cnt--;
 963                return;
 964        }
 965
 966        psn = be32_to_cpu(ohdr->bth[2]);
 967        reset_sending_psn(qp, psn);
 968
 969        /*
 970         * Start timer after a packet requesting an ACK has been sent and
 971         * there are still requests that haven't been acked.
 972         */
 973        if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
 974            !(qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
 975            (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 976                start_timer(qp);
 977
 978        while (qp->s_last != qp->s_acked) {
 979                u32 s_last;
 980
 981                wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 982                if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 &&
 983                    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
 984                        break;
 985                s_last = qp->s_last;
 986                if (++s_last >= qp->s_size)
 987                        s_last = 0;
 988                qp->s_last = s_last;
 989                /* see post_send() */
 990                barrier();
 991                for (i = 0; i < wqe->wr.num_sge; i++) {
 992                        struct rvt_sge *sge = &wqe->sg_list[i];
 993
 994                        rvt_put_mr(sge->mr);
 995                }
 996                /* Post a send completion queue entry if requested. */
 997                if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 998                    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 999                        memset(&wc, 0, sizeof(wc));
1000                        wc.wr_id = wqe->wr.wr_id;
1001                        wc.status = IB_WC_SUCCESS;
1002                        wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
1003                        wc.byte_len = wqe->length;
1004                        wc.qp = &qp->ibqp;
1005                        rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
1006                }
1007        }
1008        /*
1009         * If we were waiting for sends to complete before resending,
1010         * and they are now complete, restart sending.
1011         */
1012        if (qp->s_flags & RVT_S_WAIT_PSN &&
1013            qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
1014                qp->s_flags &= ~RVT_S_WAIT_PSN;
1015                qp->s_sending_psn = qp->s_psn;
1016                qp->s_sending_hpsn = qp->s_psn - 1;
1017                qib_schedule_send(qp);
1018        }
1019}
1020
1021static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
1022{
1023        qp->s_last_psn = psn;
1024}
1025
1026/*
1027 * Generate a SWQE completion.
1028 * This is similar to qib_send_complete but has to check to be sure
1029 * that the SGEs are not being referenced if the SWQE is being resent.
1030 */
1031static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
1032                                         struct rvt_swqe *wqe,
1033                                         struct qib_ibport *ibp)
1034{
1035        struct ib_wc wc;
1036        unsigned i;
1037
1038        /*
1039         * Don't decrement refcount and don't generate a
1040         * completion if the SWQE is being resent until the send
1041         * is finished.
1042         */
1043        if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 ||
1044            qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
1045                u32 s_last;
1046
1047                for (i = 0; i < wqe->wr.num_sge; i++) {
1048                        struct rvt_sge *sge = &wqe->sg_list[i];
1049
1050                        rvt_put_mr(sge->mr);
1051                }
1052                s_last = qp->s_last;
1053                if (++s_last >= qp->s_size)
1054                        s_last = 0;
1055                qp->s_last = s_last;
1056                /* see post_send() */
1057                barrier();
1058                /* Post a send completion queue entry if requested. */
1059                if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
1060                    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
1061                        memset(&wc, 0, sizeof(wc));
1062                        wc.wr_id = wqe->wr.wr_id;
1063                        wc.status = IB_WC_SUCCESS;
1064                        wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
1065                        wc.byte_len = wqe->length;
1066                        wc.qp = &qp->ibqp;
1067                        rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
1068                }
1069        } else
1070                this_cpu_inc(*ibp->rvp.rc_delayed_comp);
1071
1072        qp->s_retry = qp->s_retry_cnt;
1073        update_last_psn(qp, wqe->lpsn);
1074
1075        /*
1076         * If we are completing a request which is in the process of
1077         * being resent, we can stop resending it since we know the
1078         * responder has already seen it.
1079         */
1080        if (qp->s_acked == qp->s_cur) {
1081                if (++qp->s_cur >= qp->s_size)
1082                        qp->s_cur = 0;
1083                qp->s_acked = qp->s_cur;
1084                wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
1085                if (qp->s_acked != qp->s_tail) {
1086                        qp->s_state = OP(SEND_LAST);
1087                        qp->s_psn = wqe->psn;
1088                }
1089        } else {
1090                if (++qp->s_acked >= qp->s_size)
1091                        qp->s_acked = 0;
1092                if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
1093                        qp->s_draining = 0;
1094                wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1095        }
1096        return wqe;
1097}
1098
1099/**
1100 * do_rc_ack - process an incoming RC ACK
1101 * @qp: the QP the ACK came in on
1102 * @psn: the packet sequence number of the ACK
1103 * @opcode: the opcode of the request that resulted in the ACK
1104 *
1105 * This is called from qib_rc_rcv_resp() to process an incoming RC ACK
1106 * for the given QP.
1107 * Called at interrupt level with the QP s_lock held.
1108 * Returns 1 if OK, 0 if current operation should be aborted (NAK).
1109 */
1110static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
1111                     u64 val, struct qib_ctxtdata *rcd)
1112{
1113        struct qib_ibport *ibp;
1114        enum ib_wc_status status;
1115        struct rvt_swqe *wqe;
1116        int ret = 0;
1117        u32 ack_psn;
1118        int diff;
1119
1120        /* Remove QP from retry timer */
1121        if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
1122                qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
1123                del_timer(&qp->s_timer);
1124        }
1125
1126        /*
1127         * Note that NAKs implicitly ACK outstanding SEND and RDMA write
1128         * requests and implicitly NAK RDMA read and atomic requests issued
1129         * before the NAK'ed request.  The MSN won't include the NAK'ed
1130         * request but will include an ACK'ed request(s).
1131         */
1132        ack_psn = psn;
1133        if (aeth >> 29)
1134                ack_psn--;
1135        wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1136        ibp = to_iport(qp->ibqp.device, qp->port_num);
1137
1138        /*
1139         * The MSN might be for a later WQE than the PSN indicates so
1140         * only complete WQEs that the PSN finishes.
1141         */
1142        while ((diff = qib_cmp24(ack_psn, wqe->lpsn)) >= 0) {
1143                /*
1144                 * RDMA_READ_RESPONSE_ONLY is a special case since
1145                 * we want to generate completion events for everything
1146                 * before the RDMA read, copy the data, then generate
1147                 * the completion for the read.
1148                 */
1149                if (wqe->wr.opcode == IB_WR_RDMA_READ &&
1150                    opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
1151                    diff == 0) {
1152                        ret = 1;
1153                        goto bail;
1154                }
1155                /*
1156                 * If this request is a RDMA read or atomic, and the ACK is
1157                 * for a later operation, this ACK NAKs the RDMA read or
1158                 * atomic.  In other words, only a RDMA_READ_LAST or ONLY
1159                 * can ACK a RDMA read and likewise for atomic ops.  Note
1160                 * that the NAK case can only happen if relaxed ordering is
1161                 * used and requests are sent after an RDMA read or atomic
1162                 * is sent but before the response is received.
1163                 */
1164                if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
1165                     (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
1166                    ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1167                      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
1168                     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
1169                        /* Retry this request. */
1170                        if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
1171                                qp->r_flags |= RVT_R_RDMAR_SEQ;
1172                                qib_restart_rc(qp, qp->s_last_psn + 1, 0);
1173                                if (list_empty(&qp->rspwait)) {
1174                                        qp->r_flags |= RVT_R_RSP_SEND;
1175                                        rvt_get_qp(qp);
1176                                        list_add_tail(&qp->rspwait,
1177                                                      &rcd->qp_wait_list);
1178                                }
1179                        }
1180                        /*
1181                         * No need to process the ACK/NAK since we are
1182                         * restarting an earlier request.
1183                         */
1184                        goto bail;
1185                }
1186                if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1187                    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
1188                        u64 *vaddr = wqe->sg_list[0].vaddr;
1189                        *vaddr = val;
1190                }
1191                if (qp->s_num_rd_atomic &&
1192                    (wqe->wr.opcode == IB_WR_RDMA_READ ||
1193                     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1194                     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
1195                        qp->s_num_rd_atomic--;
1196                        /* Restart sending task if fence is complete */
1197                        if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
1198                            !qp->s_num_rd_atomic) {
1199                                qp->s_flags &= ~(RVT_S_WAIT_FENCE |
1200                                                 RVT_S_WAIT_ACK);
1201                                qib_schedule_send(qp);
1202                        } else if (qp->s_flags & RVT_S_WAIT_RDMAR) {
1203                                qp->s_flags &= ~(RVT_S_WAIT_RDMAR |
1204                                                 RVT_S_WAIT_ACK);
1205                                qib_schedule_send(qp);
1206                        }
1207                }
1208                wqe = do_rc_completion(qp, wqe, ibp);
1209                if (qp->s_acked == qp->s_tail)
1210                        break;
1211        }
1212
1213        switch (aeth >> 29) {
1214        case 0:         /* ACK */
1215                this_cpu_inc(*ibp->rvp.rc_acks);
1216                if (qp->s_acked != qp->s_tail) {
1217                        /*
1218                         * We are expecting more ACKs so
1219                         * reset the retransmit timer.
1220                         */
1221                        start_timer(qp);
1222                        /*
1223                         * We can stop resending the earlier packets and
1224                         * continue with the next packet the receiver wants.
1225                         */
1226                        if (qib_cmp24(qp->s_psn, psn) <= 0)
1227                                reset_psn(qp, psn + 1);
1228                } else if (qib_cmp24(qp->s_psn, psn) <= 0) {
1229                        qp->s_state = OP(SEND_LAST);
1230                        qp->s_psn = psn + 1;
1231                }
1232                if (qp->s_flags & RVT_S_WAIT_ACK) {
1233                        qp->s_flags &= ~RVT_S_WAIT_ACK;
1234                        qib_schedule_send(qp);
1235                }
1236                qib_get_credit(qp, aeth);
1237                qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1238                qp->s_retry = qp->s_retry_cnt;
1239                update_last_psn(qp, psn);
1240                ret = 1;
1241                goto bail;
1242
1243        case 1:         /* RNR NAK */
1244                ibp->rvp.n_rnr_naks++;
1245                if (qp->s_acked == qp->s_tail)
1246                        goto bail;
1247                if (qp->s_flags & RVT_S_WAIT_RNR)
1248                        goto bail;
1249                if (qp->s_rnr_retry == 0) {
1250                        status = IB_WC_RNR_RETRY_EXC_ERR;
1251                        goto class_b;
1252                }
1253                if (qp->s_rnr_retry_cnt < 7)
1254                        qp->s_rnr_retry--;
1255
1256                /* The last valid PSN is the previous PSN. */
1257                update_last_psn(qp, psn - 1);
1258
1259                ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
1260
1261                reset_psn(qp, psn);
1262
1263                qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
1264                qp->s_flags |= RVT_S_WAIT_RNR;
1265                qp->s_timer.function = qib_rc_rnr_retry;
1266                qp->s_timer.expires = jiffies + usecs_to_jiffies(
1267                        ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) &
1268                                           QIB_AETH_CREDIT_MASK]);
1269                add_timer(&qp->s_timer);
1270                goto bail;
1271
1272        case 3:         /* NAK */
1273                if (qp->s_acked == qp->s_tail)
1274                        goto bail;
1275                /* The last valid PSN is the previous PSN. */
1276                update_last_psn(qp, psn - 1);
1277                switch ((aeth >> QIB_AETH_CREDIT_SHIFT) &
1278                        QIB_AETH_CREDIT_MASK) {
1279                case 0: /* PSN sequence error */
1280                        ibp->rvp.n_seq_naks++;
1281                        /*
1282                         * Back up to the responder's expected PSN.
1283                         * Note that we might get a NAK in the middle of an
1284                         * RDMA READ response which terminates the RDMA
1285                         * READ.
1286                         */
1287                        qib_restart_rc(qp, psn, 0);
1288                        qib_schedule_send(qp);
1289                        break;
1290
1291                case 1: /* Invalid Request */
1292                        status = IB_WC_REM_INV_REQ_ERR;
1293                        ibp->rvp.n_other_naks++;
1294                        goto class_b;
1295
1296                case 2: /* Remote Access Error */
1297                        status = IB_WC_REM_ACCESS_ERR;
1298                        ibp->rvp.n_other_naks++;
1299                        goto class_b;
1300
1301                case 3: /* Remote Operation Error */
1302                        status = IB_WC_REM_OP_ERR;
1303                        ibp->rvp.n_other_naks++;
1304class_b:
1305                        if (qp->s_last == qp->s_acked) {
1306                                qib_send_complete(qp, wqe, status);
1307                                rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1308                        }
1309                        break;
1310
1311                default:
1312                        /* Ignore other reserved NAK error codes */
1313                        goto reserved;
1314                }
1315                qp->s_retry = qp->s_retry_cnt;
1316                qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1317                goto bail;
1318
1319        default:                /* 2: reserved */
1320reserved:
1321                /* Ignore reserved NAK codes. */
1322                goto bail;
1323        }
1324
1325bail:
1326        return ret;
1327}
1328
1329/*
1330 * We have seen an out of sequence RDMA read middle or last packet.
1331 * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
1332 */
1333static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn,
1334                         struct qib_ctxtdata *rcd)
1335{
1336        struct rvt_swqe *wqe;
1337
1338        /* Remove QP from retry timer */
1339        if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
1340                qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
1341                del_timer(&qp->s_timer);
1342        }
1343
1344        wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1345
1346        while (qib_cmp24(psn, wqe->lpsn) > 0) {
1347                if (wqe->wr.opcode == IB_WR_RDMA_READ ||
1348                    wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1349                    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
1350                        break;
1351                wqe = do_rc_completion(qp, wqe, ibp);
1352        }
1353
1354        ibp->rvp.n_rdma_seq++;
1355        qp->r_flags |= RVT_R_RDMAR_SEQ;
1356        qib_restart_rc(qp, qp->s_last_psn + 1, 0);
1357        if (list_empty(&qp->rspwait)) {
1358                qp->r_flags |= RVT_R_RSP_SEND;
1359                rvt_get_qp(qp);
1360                list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
1361        }
1362}
1363
1364/**
1365 * qib_rc_rcv_resp - process an incoming RC response packet
1366 * @ibp: the port this packet came in on
1367 * @ohdr: the other headers for this packet
1368 * @data: the packet data
1369 * @tlen: the packet length
1370 * @qp: the QP for this packet
1371 * @opcode: the opcode for this packet
1372 * @psn: the packet sequence number for this packet
1373 * @hdrsize: the header length
1374 * @pmtu: the path MTU
1375 *
1376 * This is called from qib_rc_rcv() to process an incoming RC response
1377 * packet for the given QP.
1378 * Called at interrupt level.
1379 */
1380static void qib_rc_rcv_resp(struct qib_ibport *ibp,
1381                            struct ib_other_headers *ohdr,
1382                            void *data, u32 tlen,
1383                            struct rvt_qp *qp,
1384                            u32 opcode,
1385                            u32 psn, u32 hdrsize, u32 pmtu,
1386                            struct qib_ctxtdata *rcd)
1387{
1388        struct rvt_swqe *wqe;
1389        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1390        enum ib_wc_status status;
1391        unsigned long flags;
1392        int diff;
1393        u32 pad;
1394        u32 aeth;
1395        u64 val;
1396
1397        if (opcode != OP(RDMA_READ_RESPONSE_MIDDLE)) {
1398                /*
1399                 * If ACK'd PSN on SDMA busy list try to make progress to
1400                 * reclaim SDMA credits.
1401                 */
1402                if ((qib_cmp24(psn, qp->s_sending_psn) >= 0) &&
1403                    (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) {
1404
1405                        /*
1406                         * If send tasklet not running attempt to progress
1407                         * SDMA queue.
1408                         */
1409                        if (!(qp->s_flags & RVT_S_BUSY)) {
1410                                /* Acquire SDMA Lock */
1411                                spin_lock_irqsave(&ppd->sdma_lock, flags);
1412                                /* Invoke sdma make progress */
1413                                qib_sdma_make_progress(ppd);
1414                                /* Release SDMA Lock */
1415                                spin_unlock_irqrestore(&ppd->sdma_lock, flags);
1416                        }
1417                }
1418        }
1419
1420        spin_lock_irqsave(&qp->s_lock, flags);
1421        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
1422                goto ack_done;
1423
1424        /* Ignore invalid responses. */
1425        smp_read_barrier_depends(); /* see post_one_send */
1426        if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
1427                goto ack_done;
1428
1429        /* Ignore duplicate responses. */
1430        diff = qib_cmp24(psn, qp->s_last_psn);
1431        if (unlikely(diff <= 0)) {
1432                /* Update credits for "ghost" ACKs */
1433                if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
1434                        aeth = be32_to_cpu(ohdr->u.aeth);
1435                        if ((aeth >> 29) == 0)
1436                                qib_get_credit(qp, aeth);
1437                }
1438                goto ack_done;
1439        }
1440
1441        /*
1442         * Skip everything other than the PSN we expect, if we are waiting
1443         * for a reply to a restarted RDMA read or atomic op.
1444         */
1445        if (qp->r_flags & RVT_R_RDMAR_SEQ) {
1446                if (qib_cmp24(psn, qp->s_last_psn + 1) != 0)
1447                        goto ack_done;
1448                qp->r_flags &= ~RVT_R_RDMAR_SEQ;
1449        }
1450
1451        if (unlikely(qp->s_acked == qp->s_tail))
1452                goto ack_done;
1453        wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1454        status = IB_WC_SUCCESS;
1455
1456        switch (opcode) {
1457        case OP(ACKNOWLEDGE):
1458        case OP(ATOMIC_ACKNOWLEDGE):
1459        case OP(RDMA_READ_RESPONSE_FIRST):
1460                aeth = be32_to_cpu(ohdr->u.aeth);
1461                if (opcode == OP(ATOMIC_ACKNOWLEDGE))
1462                        val = ib_u64_get(&ohdr->u.at.atomic_ack_eth);
1463                else
1464                        val = 0;
1465                if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
1466                    opcode != OP(RDMA_READ_RESPONSE_FIRST))
1467                        goto ack_done;
1468                hdrsize += 4;
1469                wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1470                if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1471                        goto ack_op_err;
1472                /*
1473                 * If this is a response to a resent RDMA read, we
1474                 * have to be careful to copy the data to the right
1475                 * location.
1476                 */
1477                qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1478                                                  wqe, psn, pmtu);
1479                goto read_middle;
1480
1481        case OP(RDMA_READ_RESPONSE_MIDDLE):
1482                /* no AETH, no ACK */
1483                if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1)))
1484                        goto ack_seq_err;
1485                if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1486                        goto ack_op_err;
1487read_middle:
1488                if (unlikely(tlen != (hdrsize + pmtu + 4)))
1489                        goto ack_len_err;
1490                if (unlikely(pmtu >= qp->s_rdma_read_len))
1491                        goto ack_len_err;
1492
1493                /*
1494                 * We got a response so update the timeout.
1495                 * 4.096 usec. * (1 << qp->timeout)
1496                 */
1497                qp->s_flags |= RVT_S_TIMER;
1498                mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
1499                if (qp->s_flags & RVT_S_WAIT_ACK) {
1500                        qp->s_flags &= ~RVT_S_WAIT_ACK;
1501                        qib_schedule_send(qp);
1502                }
1503
1504                if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
1505                        qp->s_retry = qp->s_retry_cnt;
1506
1507                /*
1508                 * Update the RDMA receive state but do the copy w/o
1509                 * holding the locks and blocking interrupts.
1510                 */
1511                qp->s_rdma_read_len -= pmtu;
1512                update_last_psn(qp, psn);
1513                spin_unlock_irqrestore(&qp->s_lock, flags);
1514                qib_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0);
1515                goto bail;
1516
1517        case OP(RDMA_READ_RESPONSE_ONLY):
1518                aeth = be32_to_cpu(ohdr->u.aeth);
1519                if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
1520                        goto ack_done;
1521                /* Get the number of bytes the message was padded by. */
1522                pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1523                /*
1524                 * Check that the data size is >= 0 && <= pmtu.
1525                 * Remember to account for the AETH header (4) and
1526                 * ICRC (4).
1527                 */
1528                if (unlikely(tlen < (hdrsize + pad + 8)))
1529                        goto ack_len_err;
1530                /*
1531                 * If this is a response to a resent RDMA read, we
1532                 * have to be careful to copy the data to the right
1533                 * location.
1534                 */
1535                wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1536                qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1537                                                  wqe, psn, pmtu);
1538                goto read_last;
1539
1540        case OP(RDMA_READ_RESPONSE_LAST):
1541                /* ACKs READ req. */
1542                if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1)))
1543                        goto ack_seq_err;
1544                if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1545                        goto ack_op_err;
1546                /* Get the number of bytes the message was padded by. */
1547                pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1548                /*
1549                 * Check that the data size is >= 1 && <= pmtu.
1550                 * Remember to account for the AETH header (4) and
1551                 * ICRC (4).
1552                 */
1553                if (unlikely(tlen <= (hdrsize + pad + 8)))
1554                        goto ack_len_err;
1555read_last:
1556                tlen -= hdrsize + pad + 8;
1557                if (unlikely(tlen != qp->s_rdma_read_len))
1558                        goto ack_len_err;
1559                aeth = be32_to_cpu(ohdr->u.aeth);
1560                qib_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0);
1561                WARN_ON(qp->s_rdma_read_sge.num_sge);
1562                (void) do_rc_ack(qp, aeth, psn,
1563                                 OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
1564                goto ack_done;
1565        }
1566
1567ack_op_err:
1568        status = IB_WC_LOC_QP_OP_ERR;
1569        goto ack_err;
1570
1571ack_seq_err:
1572        rdma_seq_err(qp, ibp, psn, rcd);
1573        goto ack_done;
1574
1575ack_len_err:
1576        status = IB_WC_LOC_LEN_ERR;
1577ack_err:
1578        if (qp->s_last == qp->s_acked) {
1579                qib_send_complete(qp, wqe, status);
1580                rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1581        }
1582ack_done:
1583        spin_unlock_irqrestore(&qp->s_lock, flags);
1584bail:
1585        return;
1586}
1587
1588/**
1589 * qib_rc_rcv_error - process an incoming duplicate or error RC packet
1590 * @ohdr: the other headers for this packet
1591 * @data: the packet data
1592 * @qp: the QP for this packet
1593 * @opcode: the opcode for this packet
1594 * @psn: the packet sequence number for this packet
1595 * @diff: the difference between the PSN and the expected PSN
1596 *
1597 * This is called from qib_rc_rcv() to process an unexpected
1598 * incoming RC packet for the given QP.
1599 * Called at interrupt level.
1600 * Return 1 if no more processing is needed; otherwise return 0 to
1601 * schedule a response to be sent.
1602 */
1603static int qib_rc_rcv_error(struct ib_other_headers *ohdr,
1604                            void *data,
1605                            struct rvt_qp *qp,
1606                            u32 opcode,
1607                            u32 psn,
1608                            int diff,
1609                            struct qib_ctxtdata *rcd)
1610{
1611        struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
1612        struct rvt_ack_entry *e;
1613        unsigned long flags;
1614        u8 i, prev;
1615        int old_req;
1616
1617        if (diff > 0) {
1618                /*
1619                 * Packet sequence error.
1620                 * A NAK will ACK earlier sends and RDMA writes.
1621                 * Don't queue the NAK if we already sent one.
1622                 */
1623                if (!qp->r_nak_state) {
1624                        ibp->rvp.n_rc_seqnak++;
1625                        qp->r_nak_state = IB_NAK_PSN_ERROR;
1626                        /* Use the expected PSN. */
1627                        qp->r_ack_psn = qp->r_psn;
1628                        /*
1629                         * Wait to send the sequence NAK until all packets
1630                         * in the receive queue have been processed.
1631                         * Otherwise, we end up propagating congestion.
1632                         */
1633                        if (list_empty(&qp->rspwait)) {
1634                                qp->r_flags |= RVT_R_RSP_NAK;
1635                                rvt_get_qp(qp);
1636                                list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
1637                        }
1638                }
1639                goto done;
1640        }
1641
1642        /*
1643         * Handle a duplicate request.  Don't re-execute SEND, RDMA
1644         * write or atomic op.  Don't NAK errors, just silently drop
1645         * the duplicate request.  Note that r_sge, r_len, and
1646         * r_rcv_len may be in use so don't modify them.
1647         *
1648         * We are supposed to ACK the earliest duplicate PSN but we
1649         * can coalesce an outstanding duplicate ACK.  We have to
1650         * send the earliest so that RDMA reads can be restarted at
1651         * the requester's expected PSN.
1652         *
1653         * First, find where this duplicate PSN falls within the
1654         * ACKs previously sent.
1655         * old_req is true if there is an older response that is scheduled
1656         * to be sent before sending this one.
1657         */
1658        e = NULL;
1659        old_req = 1;
1660        ibp->rvp.n_rc_dupreq++;
1661
1662        spin_lock_irqsave(&qp->s_lock, flags);
1663
1664        for (i = qp->r_head_ack_queue; ; i = prev) {
1665                if (i == qp->s_tail_ack_queue)
1666                        old_req = 0;
1667                if (i)
1668                        prev = i - 1;
1669                else
1670                        prev = QIB_MAX_RDMA_ATOMIC;
1671                if (prev == qp->r_head_ack_queue) {
1672                        e = NULL;
1673                        break;
1674                }
1675                e = &qp->s_ack_queue[prev];
1676                if (!e->opcode) {
1677                        e = NULL;
1678                        break;
1679                }
1680                if (qib_cmp24(psn, e->psn) >= 0) {
1681                        if (prev == qp->s_tail_ack_queue &&
1682                            qib_cmp24(psn, e->lpsn) <= 0)
1683                                old_req = 0;
1684                        break;
1685                }
1686        }
1687        switch (opcode) {
1688        case OP(RDMA_READ_REQUEST): {
1689                struct ib_reth *reth;
1690                u32 offset;
1691                u32 len;
1692
1693                /*
1694                 * If we didn't find the RDMA read request in the ack queue,
1695                 * we can ignore this request.
1696                 */
1697                if (!e || e->opcode != OP(RDMA_READ_REQUEST))
1698                        goto unlock_done;
1699                /* RETH comes after BTH */
1700                reth = &ohdr->u.rc.reth;
1701                /*
1702                 * Address range must be a subset of the original
1703                 * request and start on pmtu boundaries.
1704                 * We reuse the old ack_queue slot since the requester
1705                 * should not back up and request an earlier PSN for the
1706                 * same request.
1707                 */
1708                offset = ((psn - e->psn) & QIB_PSN_MASK) *
1709                        qp->pmtu;
1710                len = be32_to_cpu(reth->length);
1711                if (unlikely(offset + len != e->rdma_sge.sge_length))
1712                        goto unlock_done;
1713                if (e->rdma_sge.mr) {
1714                        rvt_put_mr(e->rdma_sge.mr);
1715                        e->rdma_sge.mr = NULL;
1716                }
1717                if (len != 0) {
1718                        u32 rkey = be32_to_cpu(reth->rkey);
1719                        u64 vaddr = be64_to_cpu(reth->vaddr);
1720                        int ok;
1721
1722                        ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
1723                                         IB_ACCESS_REMOTE_READ);
1724                        if (unlikely(!ok))
1725                                goto unlock_done;
1726                } else {
1727                        e->rdma_sge.vaddr = NULL;
1728                        e->rdma_sge.length = 0;
1729                        e->rdma_sge.sge_length = 0;
1730                }
1731                e->psn = psn;
1732                if (old_req)
1733                        goto unlock_done;
1734                qp->s_tail_ack_queue = prev;
1735                break;
1736        }
1737
1738        case OP(COMPARE_SWAP):
1739        case OP(FETCH_ADD): {
1740                /*
1741                 * If we didn't find the atomic request in the ack queue
1742                 * or the send tasklet is already backed up to send an
1743                 * earlier entry, we can ignore this request.
1744                 */
1745                if (!e || e->opcode != (u8) opcode || old_req)
1746                        goto unlock_done;
1747                qp->s_tail_ack_queue = prev;
1748                break;
1749        }
1750
1751        default:
1752                /*
1753                 * Ignore this operation if it doesn't request an ACK
1754                 * or an earlier RDMA read or atomic is going to be resent.
1755                 */
1756                if (!(psn & IB_BTH_REQ_ACK) || old_req)
1757                        goto unlock_done;
1758                /*
1759                 * Resend the most recent ACK if this request is
1760                 * after all the previous RDMA reads and atomics.
1761                 */
1762                if (i == qp->r_head_ack_queue) {
1763                        spin_unlock_irqrestore(&qp->s_lock, flags);
1764                        qp->r_nak_state = 0;
1765                        qp->r_ack_psn = qp->r_psn - 1;
1766                        goto send_ack;
1767                }
1768                /*
1769                 * Try to send a simple ACK to work around a Mellanox bug
1770                 * which doesn't accept a RDMA read response or atomic
1771                 * response as an ACK for earlier SENDs or RDMA writes.
1772                 */
1773                if (!(qp->s_flags & RVT_S_RESP_PENDING)) {
1774                        spin_unlock_irqrestore(&qp->s_lock, flags);
1775                        qp->r_nak_state = 0;
1776                        qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
1777                        goto send_ack;
1778                }
1779                /*
1780                 * Resend the RDMA read or atomic op which
1781                 * ACKs this duplicate request.
1782                 */
1783                qp->s_tail_ack_queue = i;
1784                break;
1785        }
1786        qp->s_ack_state = OP(ACKNOWLEDGE);
1787        qp->s_flags |= RVT_S_RESP_PENDING;
1788        qp->r_nak_state = 0;
1789        qib_schedule_send(qp);
1790
1791unlock_done:
1792        spin_unlock_irqrestore(&qp->s_lock, flags);
1793done:
1794        return 1;
1795
1796send_ack:
1797        return 0;
1798}
1799
1800void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
1801{
1802        unsigned long flags;
1803        int lastwqe;
1804
1805        spin_lock_irqsave(&qp->s_lock, flags);
1806        lastwqe = rvt_error_qp(qp, err);
1807        spin_unlock_irqrestore(&qp->s_lock, flags);
1808
1809        if (lastwqe) {
1810                struct ib_event ev;
1811
1812                ev.device = qp->ibqp.device;
1813                ev.element.qp = &qp->ibqp;
1814                ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1815                qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1816        }
1817}
1818
1819static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n)
1820{
1821        unsigned next;
1822
1823        next = n + 1;
1824        if (next > QIB_MAX_RDMA_ATOMIC)
1825                next = 0;
1826        qp->s_tail_ack_queue = next;
1827        qp->s_ack_state = OP(ACKNOWLEDGE);
1828}
1829
1830/**
1831 * qib_rc_rcv - process an incoming RC packet
1832 * @rcd: the context pointer
1833 * @hdr: the header of this packet
1834 * @has_grh: true if the header has a GRH
1835 * @data: the packet data
1836 * @tlen: the packet length
1837 * @qp: the QP for this packet
1838 *
1839 * This is called from qib_qp_rcv() to process an incoming RC packet
1840 * for the given QP.
1841 * Called at interrupt level.
1842 */
1843void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
1844                int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
1845{
1846        struct qib_ibport *ibp = &rcd->ppd->ibport_data;
1847        struct ib_other_headers *ohdr;
1848        u32 opcode;
1849        u32 hdrsize;
1850        u32 psn;
1851        u32 pad;
1852        struct ib_wc wc;
1853        u32 pmtu = qp->pmtu;
1854        int diff;
1855        struct ib_reth *reth;
1856        unsigned long flags;
1857        int ret;
1858
1859        /* Check for GRH */
1860        if (!has_grh) {
1861                ohdr = &hdr->u.oth;
1862                hdrsize = 8 + 12;       /* LRH + BTH */
1863        } else {
1864                ohdr = &hdr->u.l.oth;
1865                hdrsize = 8 + 40 + 12;  /* LRH + GRH + BTH */
1866        }
1867
1868        opcode = be32_to_cpu(ohdr->bth[0]);
1869        if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
1870                return;
1871
1872        psn = be32_to_cpu(ohdr->bth[2]);
1873        opcode >>= 24;
1874
1875        /*
1876         * Process responses (ACKs) before anything else.  Note that the
1877         * packet sequence number will be for something in the send work
1878         * queue rather than the expected receive packet sequence number.
1879         * In other words, this QP is the requester.
1880         */
1881        if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
1882            opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
1883                qib_rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn,
1884                                hdrsize, pmtu, rcd);
1885                return;
1886        }
1887
1888        /* Compute 24 bits worth of difference. */
1889        diff = qib_cmp24(psn, qp->r_psn);
1890        if (unlikely(diff)) {
1891                if (qib_rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd))
1892                        return;
1893                goto send_ack;
1894        }
1895
1896        /* Check for opcode sequence errors. */
1897        switch (qp->r_state) {
1898        case OP(SEND_FIRST):
1899        case OP(SEND_MIDDLE):
1900                if (opcode == OP(SEND_MIDDLE) ||
1901                    opcode == OP(SEND_LAST) ||
1902                    opcode == OP(SEND_LAST_WITH_IMMEDIATE))
1903                        break;
1904                goto nack_inv;
1905
1906        case OP(RDMA_WRITE_FIRST):
1907        case OP(RDMA_WRITE_MIDDLE):
1908                if (opcode == OP(RDMA_WRITE_MIDDLE) ||
1909                    opcode == OP(RDMA_WRITE_LAST) ||
1910                    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1911                        break;
1912                goto nack_inv;
1913
1914        default:
1915                if (opcode == OP(SEND_MIDDLE) ||
1916                    opcode == OP(SEND_LAST) ||
1917                    opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
1918                    opcode == OP(RDMA_WRITE_MIDDLE) ||
1919                    opcode == OP(RDMA_WRITE_LAST) ||
1920                    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1921                        goto nack_inv;
1922                /*
1923                 * Note that it is up to the requester to not send a new
1924                 * RDMA read or atomic operation before receiving an ACK
1925                 * for the previous operation.
1926                 */
1927                break;
1928        }
1929
1930        if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) {
1931                qp->r_flags |= RVT_R_COMM_EST;
1932                if (qp->ibqp.event_handler) {
1933                        struct ib_event ev;
1934
1935                        ev.device = qp->ibqp.device;
1936                        ev.element.qp = &qp->ibqp;
1937                        ev.event = IB_EVENT_COMM_EST;
1938                        qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1939                }
1940        }
1941
1942        /* OK, process the packet. */
1943        switch (opcode) {
1944        case OP(SEND_FIRST):
1945                ret = qib_get_rwqe(qp, 0);
1946                if (ret < 0)
1947                        goto nack_op_err;
1948                if (!ret)
1949                        goto rnr_nak;
1950                qp->r_rcv_len = 0;
1951                /* FALLTHROUGH */
1952        case OP(SEND_MIDDLE):
1953        case OP(RDMA_WRITE_MIDDLE):
1954send_middle:
1955                /* Check for invalid length PMTU or posted rwqe len. */
1956                if (unlikely(tlen != (hdrsize + pmtu + 4)))
1957                        goto nack_inv;
1958                qp->r_rcv_len += pmtu;
1959                if (unlikely(qp->r_rcv_len > qp->r_len))
1960                        goto nack_inv;
1961                qib_copy_sge(&qp->r_sge, data, pmtu, 1);
1962                break;
1963
1964        case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
1965                /* consume RWQE */
1966                ret = qib_get_rwqe(qp, 1);
1967                if (ret < 0)
1968                        goto nack_op_err;
1969                if (!ret)
1970                        goto rnr_nak;
1971                goto send_last_imm;
1972
1973        case OP(SEND_ONLY):
1974        case OP(SEND_ONLY_WITH_IMMEDIATE):
1975                ret = qib_get_rwqe(qp, 0);
1976                if (ret < 0)
1977                        goto nack_op_err;
1978                if (!ret)
1979                        goto rnr_nak;
1980                qp->r_rcv_len = 0;
1981                if (opcode == OP(SEND_ONLY))
1982                        goto no_immediate_data;
1983                /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */
1984        case OP(SEND_LAST_WITH_IMMEDIATE):
1985send_last_imm:
1986                wc.ex.imm_data = ohdr->u.imm_data;
1987                hdrsize += 4;
1988                wc.wc_flags = IB_WC_WITH_IMM;
1989                goto send_last;
1990        case OP(SEND_LAST):
1991        case OP(RDMA_WRITE_LAST):
1992no_immediate_data:
1993                wc.wc_flags = 0;
1994                wc.ex.imm_data = 0;
1995send_last:
1996                /* Get the number of bytes the message was padded by. */
1997                pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1998                /* Check for invalid length. */
1999                /* XXX LAST len should be >= 1 */
2000                if (unlikely(tlen < (hdrsize + pad + 4)))
2001                        goto nack_inv;
2002                /* Don't count the CRC. */
2003                tlen -= (hdrsize + pad + 4);
2004                wc.byte_len = tlen + qp->r_rcv_len;
2005                if (unlikely(wc.byte_len > qp->r_len))
2006                        goto nack_inv;
2007                qib_copy_sge(&qp->r_sge, data, tlen, 1);
2008                rvt_put_ss(&qp->r_sge);
2009                qp->r_msn++;
2010                if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
2011                        break;
2012                wc.wr_id = qp->r_wr_id;
2013                wc.status = IB_WC_SUCCESS;
2014                if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
2015                    opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
2016                        wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
2017                else
2018                        wc.opcode = IB_WC_RECV;
2019                wc.qp = &qp->ibqp;
2020                wc.src_qp = qp->remote_qpn;
2021                wc.slid = qp->remote_ah_attr.dlid;
2022                wc.sl = qp->remote_ah_attr.sl;
2023                /* zero fields that are N/A */
2024                wc.vendor_err = 0;
2025                wc.pkey_index = 0;
2026                wc.dlid_path_bits = 0;
2027                wc.port_num = 0;
2028                /* Signal completion event if the solicited bit is set. */
2029                rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
2030                             (ohdr->bth[0] &
2031                              cpu_to_be32(IB_BTH_SOLICITED)) != 0);
2032                break;
2033
2034        case OP(RDMA_WRITE_FIRST):
2035        case OP(RDMA_WRITE_ONLY):
2036        case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
2037                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
2038                        goto nack_inv;
2039                /* consume RWQE */
2040                reth = &ohdr->u.rc.reth;
2041                hdrsize += sizeof(*reth);
2042                qp->r_len = be32_to_cpu(reth->length);
2043                qp->r_rcv_len = 0;
2044                qp->r_sge.sg_list = NULL;
2045                if (qp->r_len != 0) {
2046                        u32 rkey = be32_to_cpu(reth->rkey);
2047                        u64 vaddr = be64_to_cpu(reth->vaddr);
2048                        int ok;
2049
2050                        /* Check rkey & NAK */
2051                        ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
2052                                         rkey, IB_ACCESS_REMOTE_WRITE);
2053                        if (unlikely(!ok))
2054                                goto nack_acc;
2055                        qp->r_sge.num_sge = 1;
2056                } else {
2057                        qp->r_sge.num_sge = 0;
2058                        qp->r_sge.sge.mr = NULL;
2059                        qp->r_sge.sge.vaddr = NULL;
2060                        qp->r_sge.sge.length = 0;
2061                        qp->r_sge.sge.sge_length = 0;
2062                }
2063                if (opcode == OP(RDMA_WRITE_FIRST))
2064                        goto send_middle;
2065                else if (opcode == OP(RDMA_WRITE_ONLY))
2066                        goto no_immediate_data;
2067                ret = qib_get_rwqe(qp, 1);
2068                if (ret < 0)
2069                        goto nack_op_err;
2070                if (!ret)
2071                        goto rnr_nak;
2072                wc.ex.imm_data = ohdr->u.rc.imm_data;
2073                hdrsize += 4;
2074                wc.wc_flags = IB_WC_WITH_IMM;
2075                goto send_last;
2076
2077        case OP(RDMA_READ_REQUEST): {
2078                struct rvt_ack_entry *e;
2079                u32 len;
2080                u8 next;
2081
2082                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
2083                        goto nack_inv;
2084                next = qp->r_head_ack_queue + 1;
2085                /* s_ack_queue is size QIB_MAX_RDMA_ATOMIC+1 so use > not >= */
2086                if (next > QIB_MAX_RDMA_ATOMIC)
2087                        next = 0;
2088                spin_lock_irqsave(&qp->s_lock, flags);
2089                if (unlikely(next == qp->s_tail_ack_queue)) {
2090                        if (!qp->s_ack_queue[next].sent)
2091                                goto nack_inv_unlck;
2092                        qib_update_ack_queue(qp, next);
2093                }
2094                e = &qp->s_ack_queue[qp->r_head_ack_queue];
2095                if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
2096                        rvt_put_mr(e->rdma_sge.mr);
2097                        e->rdma_sge.mr = NULL;
2098                }
2099                reth = &ohdr->u.rc.reth;
2100                len = be32_to_cpu(reth->length);
2101                if (len) {
2102                        u32 rkey = be32_to_cpu(reth->rkey);
2103                        u64 vaddr = be64_to_cpu(reth->vaddr);
2104                        int ok;
2105
2106                        /* Check rkey & NAK */
2107                        ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
2108                                         rkey, IB_ACCESS_REMOTE_READ);
2109                        if (unlikely(!ok))
2110                                goto nack_acc_unlck;
2111                        /*
2112                         * Update the next expected PSN.  We add 1 later
2113                         * below, so only add the remainder here.
2114                         */
2115                        if (len > pmtu)
2116                                qp->r_psn += (len - 1) / pmtu;
2117                } else {
2118                        e->rdma_sge.mr = NULL;
2119                        e->rdma_sge.vaddr = NULL;
2120                        e->rdma_sge.length = 0;
2121                        e->rdma_sge.sge_length = 0;
2122                }
2123                e->opcode = opcode;
2124                e->sent = 0;
2125                e->psn = psn;
2126                e->lpsn = qp->r_psn;
2127                /*
2128                 * We need to increment the MSN here instead of when we
2129                 * finish sending the result since a duplicate request would
2130                 * increment it more than once.
2131                 */
2132                qp->r_msn++;
2133                qp->r_psn++;
2134                qp->r_state = opcode;
2135                qp->r_nak_state = 0;
2136                qp->r_head_ack_queue = next;
2137
2138                /* Schedule the send tasklet. */
2139                qp->s_flags |= RVT_S_RESP_PENDING;
2140                qib_schedule_send(qp);
2141
2142                goto sunlock;
2143        }
2144
2145        case OP(COMPARE_SWAP):
2146        case OP(FETCH_ADD): {
2147                struct ib_atomic_eth *ateth;
2148                struct rvt_ack_entry *e;
2149                u64 vaddr;
2150                atomic64_t *maddr;
2151                u64 sdata;
2152                u32 rkey;
2153                u8 next;
2154
2155                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
2156                        goto nack_inv;
2157                next = qp->r_head_ack_queue + 1;
2158                if (next > QIB_MAX_RDMA_ATOMIC)
2159                        next = 0;
2160                spin_lock_irqsave(&qp->s_lock, flags);
2161                if (unlikely(next == qp->s_tail_ack_queue)) {
2162                        if (!qp->s_ack_queue[next].sent)
2163                                goto nack_inv_unlck;
2164                        qib_update_ack_queue(qp, next);
2165                }
2166                e = &qp->s_ack_queue[qp->r_head_ack_queue];
2167                if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
2168                        rvt_put_mr(e->rdma_sge.mr);
2169                        e->rdma_sge.mr = NULL;
2170                }
2171                ateth = &ohdr->u.atomic_eth;
2172                vaddr = get_ib_ateth_vaddr(ateth);
2173                if (unlikely(vaddr & (sizeof(u64) - 1)))
2174                        goto nack_inv_unlck;
2175                rkey = be32_to_cpu(ateth->rkey);
2176                /* Check rkey & NAK */
2177                if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
2178                                          vaddr, rkey,
2179                                          IB_ACCESS_REMOTE_ATOMIC)))
2180                        goto nack_acc_unlck;
2181                /* Perform atomic OP and save result. */
2182                maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
2183                sdata = get_ib_ateth_swap(ateth);
2184                e->atomic_data = (opcode == OP(FETCH_ADD)) ?
2185                        (u64) atomic64_add_return(sdata, maddr) - sdata :
2186                        (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
2187                                      get_ib_ateth_compare(ateth),
2188                                      sdata);
2189                rvt_put_mr(qp->r_sge.sge.mr);
2190                qp->r_sge.num_sge = 0;
2191                e->opcode = opcode;
2192                e->sent = 0;
2193                e->psn = psn;
2194                e->lpsn = psn;
2195                qp->r_msn++;
2196                qp->r_psn++;
2197                qp->r_state = opcode;
2198                qp->r_nak_state = 0;
2199                qp->r_head_ack_queue = next;
2200
2201                /* Schedule the send tasklet. */
2202                qp->s_flags |= RVT_S_RESP_PENDING;
2203                qib_schedule_send(qp);
2204
2205                goto sunlock;
2206        }
2207
2208        default:
2209                /* NAK unknown opcodes. */
2210                goto nack_inv;
2211        }
2212        qp->r_psn++;
2213        qp->r_state = opcode;
2214        qp->r_ack_psn = psn;
2215        qp->r_nak_state = 0;
2216        /* Send an ACK if requested or required. */
2217        if (psn & (1 << 31))
2218                goto send_ack;
2219        return;
2220
2221rnr_nak:
2222        qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
2223        qp->r_ack_psn = qp->r_psn;
2224        /* Queue RNR NAK for later */
2225        if (list_empty(&qp->rspwait)) {
2226                qp->r_flags |= RVT_R_RSP_NAK;
2227                rvt_get_qp(qp);
2228                list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2229        }
2230        return;
2231
2232nack_op_err:
2233        qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2234        qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
2235        qp->r_ack_psn = qp->r_psn;
2236        /* Queue NAK for later */
2237        if (list_empty(&qp->rspwait)) {
2238                qp->r_flags |= RVT_R_RSP_NAK;
2239                rvt_get_qp(qp);
2240                list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2241        }
2242        return;
2243
2244nack_inv_unlck:
2245        spin_unlock_irqrestore(&qp->s_lock, flags);
2246nack_inv:
2247        qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2248        qp->r_nak_state = IB_NAK_INVALID_REQUEST;
2249        qp->r_ack_psn = qp->r_psn;
2250        /* Queue NAK for later */
2251        if (list_empty(&qp->rspwait)) {
2252                qp->r_flags |= RVT_R_RSP_NAK;
2253                rvt_get_qp(qp);
2254                list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2255        }
2256        return;
2257
2258nack_acc_unlck:
2259        spin_unlock_irqrestore(&qp->s_lock, flags);
2260nack_acc:
2261        qib_rc_error(qp, IB_WC_LOC_PROT_ERR);
2262        qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
2263        qp->r_ack_psn = qp->r_psn;
2264send_ack:
2265        qib_send_rc_ack(qp);
2266        return;
2267
2268sunlock:
2269        spin_unlock_irqrestore(&qp->s_lock, flags);
2270}
2271