linux/drivers/infiniband/hw/qib/qib_ruc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
   3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33
  34#include <linux/spinlock.h>
  35#include <rdma/ib_smi.h>
  36
  37#include "qib.h"
  38#include "qib_mad.h"
  39
  40/*
  41 * Validate a RWQE and fill in the SGE state.
  42 * Return 1 if OK.
  43 */
  44static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
  45{
  46        int i, j, ret;
  47        struct ib_wc wc;
  48        struct rvt_lkey_table *rkt;
  49        struct rvt_pd *pd;
  50        struct rvt_sge_state *ss;
  51
  52        rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
  53        pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
  54        ss = &qp->r_sge;
  55        ss->sg_list = qp->r_sg_list;
  56        qp->r_len = 0;
  57        for (i = j = 0; i < wqe->num_sge; i++) {
  58                if (wqe->sg_list[i].length == 0)
  59                        continue;
  60                /* Check LKEY */
  61                ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
  62                                  NULL, &wqe->sg_list[i],
  63                                  IB_ACCESS_LOCAL_WRITE);
  64                if (unlikely(ret <= 0))
  65                        goto bad_lkey;
  66                qp->r_len += wqe->sg_list[i].length;
  67                j++;
  68        }
  69        ss->num_sge = j;
  70        ss->total_len = qp->r_len;
  71        ret = 1;
  72        goto bail;
  73
  74bad_lkey:
  75        while (j) {
  76                struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
  77
  78                rvt_put_mr(sge->mr);
  79        }
  80        ss->num_sge = 0;
  81        memset(&wc, 0, sizeof(wc));
  82        wc.wr_id = wqe->wr_id;
  83        wc.status = IB_WC_LOC_PROT_ERR;
  84        wc.opcode = IB_WC_RECV;
  85        wc.qp = &qp->ibqp;
  86        /* Signal solicited completion event. */
  87        rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
  88        ret = 0;
  89bail:
  90        return ret;
  91}
  92
  93/**
  94 * qib_get_rwqe - copy the next RWQE into the QP's RWQE
  95 * @qp: the QP
  96 * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
  97 *
  98 * Return -1 if there is a local error, 0 if no RWQE is available,
  99 * otherwise return 1.
 100 *
 101 * Can be called from interrupt level.
 102 */
 103int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only)
 104{
 105        unsigned long flags;
 106        struct rvt_rq *rq;
 107        struct rvt_rwq *wq;
 108        struct rvt_srq *srq;
 109        struct rvt_rwqe *wqe;
 110        void (*handler)(struct ib_event *, void *);
 111        u32 tail;
 112        int ret;
 113
 114        if (qp->ibqp.srq) {
 115                srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
 116                handler = srq->ibsrq.event_handler;
 117                rq = &srq->rq;
 118        } else {
 119                srq = NULL;
 120                handler = NULL;
 121                rq = &qp->r_rq;
 122        }
 123
 124        spin_lock_irqsave(&rq->lock, flags);
 125        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
 126                ret = 0;
 127                goto unlock;
 128        }
 129
 130        wq = rq->wq;
 131        tail = wq->tail;
 132        /* Validate tail before using it since it is user writable. */
 133        if (tail >= rq->size)
 134                tail = 0;
 135        if (unlikely(tail == wq->head)) {
 136                ret = 0;
 137                goto unlock;
 138        }
 139        /* Make sure entry is read after head index is read. */
 140        smp_rmb();
 141        wqe = rvt_get_rwqe_ptr(rq, tail);
 142        /*
 143         * Even though we update the tail index in memory, the verbs
 144         * consumer is not supposed to post more entries until a
 145         * completion is generated.
 146         */
 147        if (++tail >= rq->size)
 148                tail = 0;
 149        wq->tail = tail;
 150        if (!wr_id_only && !qib_init_sge(qp, wqe)) {
 151                ret = -1;
 152                goto unlock;
 153        }
 154        qp->r_wr_id = wqe->wr_id;
 155
 156        ret = 1;
 157        set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
 158        if (handler) {
 159                u32 n;
 160
 161                /*
 162                 * Validate head pointer value and compute
 163                 * the number of remaining WQEs.
 164                 */
 165                n = wq->head;
 166                if (n >= rq->size)
 167                        n = 0;
 168                if (n < tail)
 169                        n += rq->size - tail;
 170                else
 171                        n -= tail;
 172                if (n < srq->limit) {
 173                        struct ib_event ev;
 174
 175                        srq->limit = 0;
 176                        spin_unlock_irqrestore(&rq->lock, flags);
 177                        ev.device = qp->ibqp.device;
 178                        ev.element.srq = qp->ibqp.srq;
 179                        ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
 180                        handler(&ev, srq->ibsrq.srq_context);
 181                        goto bail;
 182                }
 183        }
 184unlock:
 185        spin_unlock_irqrestore(&rq->lock, flags);
 186bail:
 187        return ret;
 188}
 189
 190/*
 191 * Switch to alternate path.
 192 * The QP s_lock should be held and interrupts disabled.
 193 */
 194void qib_migrate_qp(struct rvt_qp *qp)
 195{
 196        struct ib_event ev;
 197
 198        qp->s_mig_state = IB_MIG_MIGRATED;
 199        qp->remote_ah_attr = qp->alt_ah_attr;
 200        qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
 201        qp->s_pkey_index = qp->s_alt_pkey_index;
 202
 203        ev.device = qp->ibqp.device;
 204        ev.element.qp = &qp->ibqp;
 205        ev.event = IB_EVENT_PATH_MIG;
 206        qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
 207}
 208
 209static __be64 get_sguid(struct qib_ibport *ibp, unsigned index)
 210{
 211        if (!index) {
 212                struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 213
 214                return ppd->guid;
 215        }
 216        return ibp->guids[index - 1];
 217}
 218
 219static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
 220{
 221        return (gid->global.interface_id == id &&
 222                (gid->global.subnet_prefix == gid_prefix ||
 223                 gid->global.subnet_prefix == IB_DEFAULT_GID_PREFIX));
 224}
 225
 226/*
 227 *
 228 * This should be called with the QP r_lock held.
 229 *
 230 * The s_lock will be acquired around the qib_migrate_qp() call.
 231 */
 232int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
 233                      int has_grh, struct rvt_qp *qp, u32 bth0)
 234{
 235        __be64 guid;
 236        unsigned long flags;
 237
 238        if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
 239                if (!has_grh) {
 240                        if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
 241                            IB_AH_GRH)
 242                                goto err;
 243                } else {
 244                        const struct ib_global_route *grh;
 245
 246                        if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
 247                              IB_AH_GRH))
 248                                goto err;
 249                        grh = rdma_ah_read_grh(&qp->alt_ah_attr);
 250                        guid = get_sguid(ibp, grh->sgid_index);
 251                        if (!gid_ok(&hdr->u.l.grh.dgid,
 252                                    ibp->rvp.gid_prefix, guid))
 253                                goto err;
 254                        if (!gid_ok(&hdr->u.l.grh.sgid,
 255                            grh->dgid.global.subnet_prefix,
 256                            grh->dgid.global.interface_id))
 257                                goto err;
 258                }
 259                if (!qib_pkey_ok((u16)bth0,
 260                                 qib_get_pkey(ibp, qp->s_alt_pkey_index))) {
 261                        qib_bad_pkey(ibp,
 262                                     (u16)bth0,
 263                                     (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
 264                                     0, qp->ibqp.qp_num,
 265                                     hdr->lrh[3], hdr->lrh[1]);
 266                        goto err;
 267                }
 268                /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
 269                if ((be16_to_cpu(hdr->lrh[3]) !=
 270                     rdma_ah_get_dlid(&qp->alt_ah_attr)) ||
 271                    ppd_from_ibp(ibp)->port !=
 272                            rdma_ah_get_port_num(&qp->alt_ah_attr))
 273                        goto err;
 274                spin_lock_irqsave(&qp->s_lock, flags);
 275                qib_migrate_qp(qp);
 276                spin_unlock_irqrestore(&qp->s_lock, flags);
 277        } else {
 278                if (!has_grh) {
 279                        if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
 280                            IB_AH_GRH)
 281                                goto err;
 282                } else {
 283                        const struct ib_global_route *grh;
 284
 285                        if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
 286                              IB_AH_GRH))
 287                                goto err;
 288                        grh = rdma_ah_read_grh(&qp->remote_ah_attr);
 289                        guid = get_sguid(ibp, grh->sgid_index);
 290                        if (!gid_ok(&hdr->u.l.grh.dgid,
 291                                    ibp->rvp.gid_prefix, guid))
 292                                goto err;
 293                        if (!gid_ok(&hdr->u.l.grh.sgid,
 294                            grh->dgid.global.subnet_prefix,
 295                            grh->dgid.global.interface_id))
 296                                goto err;
 297                }
 298                if (!qib_pkey_ok((u16)bth0,
 299                                 qib_get_pkey(ibp, qp->s_pkey_index))) {
 300                        qib_bad_pkey(ibp,
 301                                     (u16)bth0,
 302                                     (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
 303                                     0, qp->ibqp.qp_num,
 304                                     hdr->lrh[3], hdr->lrh[1]);
 305                        goto err;
 306                }
 307                /* Validate the SLID. See Ch. 9.6.1.5 */
 308                if (be16_to_cpu(hdr->lrh[3]) !=
 309                    rdma_ah_get_dlid(&qp->remote_ah_attr) ||
 310                    ppd_from_ibp(ibp)->port != qp->port_num)
 311                        goto err;
 312                if (qp->s_mig_state == IB_MIG_REARM &&
 313                    !(bth0 & IB_BTH_MIG_REQ))
 314                        qp->s_mig_state = IB_MIG_ARMED;
 315        }
 316
 317        return 0;
 318
 319err:
 320        return 1;
 321}
 322
 323/**
 324 * qib_ruc_loopback - handle UC and RC lookback requests
 325 * @sqp: the sending QP
 326 *
 327 * This is called from qib_do_send() to
 328 * forward a WQE addressed to the same HCA.
 329 * Note that although we are single threaded due to the tasklet, we still
 330 * have to protect against post_send().  We don't have to worry about
 331 * receive interrupts since this is a connected protocol and all packets
 332 * will pass through here.
 333 */
 334static void qib_ruc_loopback(struct rvt_qp *sqp)
 335{
 336        struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
 337        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 338        struct qib_devdata *dd = ppd->dd;
 339        struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 340        struct rvt_qp *qp;
 341        struct rvt_swqe *wqe;
 342        struct rvt_sge *sge;
 343        unsigned long flags;
 344        struct ib_wc wc;
 345        u64 sdata;
 346        atomic64_t *maddr;
 347        enum ib_wc_status send_status;
 348        int release;
 349        int ret;
 350
 351        rcu_read_lock();
 352        /*
 353         * Note that we check the responder QP state after
 354         * checking the requester's state.
 355         */
 356        qp = rvt_lookup_qpn(rdi, &ibp->rvp, sqp->remote_qpn);
 357        if (!qp)
 358                goto done;
 359
 360        spin_lock_irqsave(&sqp->s_lock, flags);
 361
 362        /* Return if we are already busy processing a work request. */
 363        if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
 364            !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 365                goto unlock;
 366
 367        sqp->s_flags |= RVT_S_BUSY;
 368
 369again:
 370        smp_read_barrier_depends(); /* see post_one_send() */
 371        if (sqp->s_last == ACCESS_ONCE(sqp->s_head))
 372                goto clr_busy;
 373        wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
 374
 375        /* Return if it is not OK to start a new work reqeust. */
 376        if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
 377                if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
 378                        goto clr_busy;
 379                /* We are in the error state, flush the work request. */
 380                send_status = IB_WC_WR_FLUSH_ERR;
 381                goto flush_send;
 382        }
 383
 384        /*
 385         * We can rely on the entry not changing without the s_lock
 386         * being held until we update s_last.
 387         * We increment s_cur to indicate s_last is in progress.
 388         */
 389        if (sqp->s_last == sqp->s_cur) {
 390                if (++sqp->s_cur >= sqp->s_size)
 391                        sqp->s_cur = 0;
 392        }
 393        spin_unlock_irqrestore(&sqp->s_lock, flags);
 394
 395        if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
 396            qp->ibqp.qp_type != sqp->ibqp.qp_type) {
 397                ibp->rvp.n_pkt_drops++;
 398                /*
 399                 * For RC, the requester would timeout and retry so
 400                 * shortcut the timeouts and just signal too many retries.
 401                 */
 402                if (sqp->ibqp.qp_type == IB_QPT_RC)
 403                        send_status = IB_WC_RETRY_EXC_ERR;
 404                else
 405                        send_status = IB_WC_SUCCESS;
 406                goto serr;
 407        }
 408
 409        memset(&wc, 0, sizeof(wc));
 410        send_status = IB_WC_SUCCESS;
 411
 412        release = 1;
 413        sqp->s_sge.sge = wqe->sg_list[0];
 414        sqp->s_sge.sg_list = wqe->sg_list + 1;
 415        sqp->s_sge.num_sge = wqe->wr.num_sge;
 416        sqp->s_len = wqe->length;
 417        switch (wqe->wr.opcode) {
 418        case IB_WR_SEND_WITH_IMM:
 419                wc.wc_flags = IB_WC_WITH_IMM;
 420                wc.ex.imm_data = wqe->wr.ex.imm_data;
 421                /* FALLTHROUGH */
 422        case IB_WR_SEND:
 423                ret = qib_get_rwqe(qp, 0);
 424                if (ret < 0)
 425                        goto op_err;
 426                if (!ret)
 427                        goto rnr_nak;
 428                break;
 429
 430        case IB_WR_RDMA_WRITE_WITH_IMM:
 431                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
 432                        goto inv_err;
 433                wc.wc_flags = IB_WC_WITH_IMM;
 434                wc.ex.imm_data = wqe->wr.ex.imm_data;
 435                ret = qib_get_rwqe(qp, 1);
 436                if (ret < 0)
 437                        goto op_err;
 438                if (!ret)
 439                        goto rnr_nak;
 440                /* FALLTHROUGH */
 441        case IB_WR_RDMA_WRITE:
 442                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
 443                        goto inv_err;
 444                if (wqe->length == 0)
 445                        break;
 446                if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
 447                                          wqe->rdma_wr.remote_addr,
 448                                          wqe->rdma_wr.rkey,
 449                                          IB_ACCESS_REMOTE_WRITE)))
 450                        goto acc_err;
 451                qp->r_sge.sg_list = NULL;
 452                qp->r_sge.num_sge = 1;
 453                qp->r_sge.total_len = wqe->length;
 454                break;
 455
 456        case IB_WR_RDMA_READ:
 457                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
 458                        goto inv_err;
 459                if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
 460                                          wqe->rdma_wr.remote_addr,
 461                                          wqe->rdma_wr.rkey,
 462                                          IB_ACCESS_REMOTE_READ)))
 463                        goto acc_err;
 464                release = 0;
 465                sqp->s_sge.sg_list = NULL;
 466                sqp->s_sge.num_sge = 1;
 467                qp->r_sge.sge = wqe->sg_list[0];
 468                qp->r_sge.sg_list = wqe->sg_list + 1;
 469                qp->r_sge.num_sge = wqe->wr.num_sge;
 470                qp->r_sge.total_len = wqe->length;
 471                break;
 472
 473        case IB_WR_ATOMIC_CMP_AND_SWP:
 474        case IB_WR_ATOMIC_FETCH_AND_ADD:
 475                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
 476                        goto inv_err;
 477                if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
 478                                          wqe->atomic_wr.remote_addr,
 479                                          wqe->atomic_wr.rkey,
 480                                          IB_ACCESS_REMOTE_ATOMIC)))
 481                        goto acc_err;
 482                /* Perform atomic OP and save result. */
 483                maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
 484                sdata = wqe->atomic_wr.compare_add;
 485                *(u64 *) sqp->s_sge.sge.vaddr =
 486                        (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
 487                        (u64) atomic64_add_return(sdata, maddr) - sdata :
 488                        (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
 489                                      sdata, wqe->atomic_wr.swap);
 490                rvt_put_mr(qp->r_sge.sge.mr);
 491                qp->r_sge.num_sge = 0;
 492                goto send_comp;
 493
 494        default:
 495                send_status = IB_WC_LOC_QP_OP_ERR;
 496                goto serr;
 497        }
 498
 499        sge = &sqp->s_sge.sge;
 500        while (sqp->s_len) {
 501                u32 len = sqp->s_len;
 502
 503                if (len > sge->length)
 504                        len = sge->length;
 505                if (len > sge->sge_length)
 506                        len = sge->sge_length;
 507                BUG_ON(len == 0);
 508                qib_copy_sge(&qp->r_sge, sge->vaddr, len, release);
 509                sge->vaddr += len;
 510                sge->length -= len;
 511                sge->sge_length -= len;
 512                if (sge->sge_length == 0) {
 513                        if (!release)
 514                                rvt_put_mr(sge->mr);
 515                        if (--sqp->s_sge.num_sge)
 516                                *sge = *sqp->s_sge.sg_list++;
 517                } else if (sge->length == 0 && sge->mr->lkey) {
 518                        if (++sge->n >= RVT_SEGSZ) {
 519                                if (++sge->m >= sge->mr->mapsz)
 520                                        break;
 521                                sge->n = 0;
 522                        }
 523                        sge->vaddr =
 524                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 525                        sge->length =
 526                                sge->mr->map[sge->m]->segs[sge->n].length;
 527                }
 528                sqp->s_len -= len;
 529        }
 530        if (release)
 531                rvt_put_ss(&qp->r_sge);
 532
 533        if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 534                goto send_comp;
 535
 536        if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
 537                wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
 538        else
 539                wc.opcode = IB_WC_RECV;
 540        wc.wr_id = qp->r_wr_id;
 541        wc.status = IB_WC_SUCCESS;
 542        wc.byte_len = wqe->length;
 543        wc.qp = &qp->ibqp;
 544        wc.src_qp = qp->remote_qpn;
 545        wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
 546        wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
 547        wc.port_num = 1;
 548        /* Signal completion event if the solicited bit is set. */
 549        rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
 550                     wqe->wr.send_flags & IB_SEND_SOLICITED);
 551
 552send_comp:
 553        spin_lock_irqsave(&sqp->s_lock, flags);
 554        ibp->rvp.n_loop_pkts++;
 555flush_send:
 556        sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
 557        qib_send_complete(sqp, wqe, send_status);
 558        goto again;
 559
 560rnr_nak:
 561        /* Handle RNR NAK */
 562        if (qp->ibqp.qp_type == IB_QPT_UC)
 563                goto send_comp;
 564        ibp->rvp.n_rnr_naks++;
 565        /*
 566         * Note: we don't need the s_lock held since the BUSY flag
 567         * makes this single threaded.
 568         */
 569        if (sqp->s_rnr_retry == 0) {
 570                send_status = IB_WC_RNR_RETRY_EXC_ERR;
 571                goto serr;
 572        }
 573        if (sqp->s_rnr_retry_cnt < 7)
 574                sqp->s_rnr_retry--;
 575        spin_lock_irqsave(&sqp->s_lock, flags);
 576        if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
 577                goto clr_busy;
 578        rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
 579                                IB_AETH_CREDIT_SHIFT);
 580        goto clr_busy;
 581
 582op_err:
 583        send_status = IB_WC_REM_OP_ERR;
 584        wc.status = IB_WC_LOC_QP_OP_ERR;
 585        goto err;
 586
 587inv_err:
 588        send_status = IB_WC_REM_INV_REQ_ERR;
 589        wc.status = IB_WC_LOC_QP_OP_ERR;
 590        goto err;
 591
 592acc_err:
 593        send_status = IB_WC_REM_ACCESS_ERR;
 594        wc.status = IB_WC_LOC_PROT_ERR;
 595err:
 596        /* responder goes to error state */
 597        rvt_rc_error(qp, wc.status);
 598
 599serr:
 600        spin_lock_irqsave(&sqp->s_lock, flags);
 601        qib_send_complete(sqp, wqe, send_status);
 602        if (sqp->ibqp.qp_type == IB_QPT_RC) {
 603                int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
 604
 605                sqp->s_flags &= ~RVT_S_BUSY;
 606                spin_unlock_irqrestore(&sqp->s_lock, flags);
 607                if (lastwqe) {
 608                        struct ib_event ev;
 609
 610                        ev.device = sqp->ibqp.device;
 611                        ev.element.qp = &sqp->ibqp;
 612                        ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
 613                        sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
 614                }
 615                goto done;
 616        }
 617clr_busy:
 618        sqp->s_flags &= ~RVT_S_BUSY;
 619unlock:
 620        spin_unlock_irqrestore(&sqp->s_lock, flags);
 621done:
 622        rcu_read_unlock();
 623}
 624
 625/**
 626 * qib_make_grh - construct a GRH header
 627 * @ibp: a pointer to the IB port
 628 * @hdr: a pointer to the GRH header being constructed
 629 * @grh: the global route address to send to
 630 * @hwords: the number of 32 bit words of header being sent
 631 * @nwords: the number of 32 bit words of data being sent
 632 *
 633 * Return the size of the header in 32 bit words.
 634 */
 635u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
 636                 const struct ib_global_route *grh, u32 hwords, u32 nwords)
 637{
 638        hdr->version_tclass_flow =
 639                cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) |
 640                            (grh->traffic_class << IB_GRH_TCLASS_SHIFT) |
 641                            (grh->flow_label << IB_GRH_FLOW_SHIFT));
 642        hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
 643        /* next_hdr is defined by C8-7 in ch. 8.4.1 */
 644        hdr->next_hdr = IB_GRH_NEXT_HDR;
 645        hdr->hop_limit = grh->hop_limit;
 646        /* The SGID is 32-bit aligned. */
 647        hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
 648        if (!grh->sgid_index)
 649                hdr->sgid.global.interface_id = ppd_from_ibp(ibp)->guid;
 650        else if (grh->sgid_index < QIB_GUIDS_PER_PORT)
 651                hdr->sgid.global.interface_id = ibp->guids[grh->sgid_index - 1];
 652        hdr->dgid = grh->dgid;
 653
 654        /* GRH header size in 32-bit words. */
 655        return sizeof(struct ib_grh) / sizeof(u32);
 656}
 657
 658void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
 659                         u32 bth0, u32 bth2)
 660{
 661        struct qib_qp_priv *priv = qp->priv;
 662        struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 663        u16 lrh0;
 664        u32 nwords;
 665        u32 extra_bytes;
 666
 667        /* Construct the header. */
 668        extra_bytes = -qp->s_cur_size & 3;
 669        nwords = (qp->s_cur_size + extra_bytes) >> 2;
 670        lrh0 = QIB_LRH_BTH;
 671        if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
 672                qp->s_hdrwords +=
 673                        qib_make_grh(ibp, &priv->s_hdr->u.l.grh,
 674                                     rdma_ah_read_grh(&qp->remote_ah_attr),
 675                                     qp->s_hdrwords, nwords);
 676                lrh0 = QIB_LRH_GRH;
 677        }
 678        lrh0 |= ibp->sl_to_vl[rdma_ah_get_sl(&qp->remote_ah_attr)] << 12 |
 679                rdma_ah_get_sl(&qp->remote_ah_attr) << 4;
 680        priv->s_hdr->lrh[0] = cpu_to_be16(lrh0);
 681        priv->s_hdr->lrh[1] =
 682                        cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr));
 683        priv->s_hdr->lrh[2] =
 684                        cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
 685        priv->s_hdr->lrh[3] =
 686                cpu_to_be16(ppd_from_ibp(ibp)->lid |
 687                            rdma_ah_get_path_bits(&qp->remote_ah_attr));
 688        bth0 |= qib_get_pkey(ibp, qp->s_pkey_index);
 689        bth0 |= extra_bytes << 20;
 690        if (qp->s_mig_state == IB_MIG_MIGRATED)
 691                bth0 |= IB_BTH_MIG_REQ;
 692        ohdr->bth[0] = cpu_to_be32(bth0);
 693        ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
 694        ohdr->bth[2] = cpu_to_be32(bth2);
 695        this_cpu_inc(ibp->pmastats->n_unicast_xmit);
 696}
 697
 698void _qib_do_send(struct work_struct *work)
 699{
 700        struct qib_qp_priv *priv = container_of(work, struct qib_qp_priv,
 701                                                s_work);
 702        struct rvt_qp *qp = priv->owner;
 703
 704        qib_do_send(qp);
 705}
 706
 707/**
 708 * qib_do_send - perform a send on a QP
 709 * @qp: pointer to the QP
 710 *
 711 * Process entries in the send work queue until credit or queue is
 712 * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
 713 * Otherwise, two threads could send packets out of order.
 714 */
 715void qib_do_send(struct rvt_qp *qp)
 716{
 717        struct qib_qp_priv *priv = qp->priv;
 718        struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 719        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 720        int (*make_req)(struct rvt_qp *qp, unsigned long *flags);
 721        unsigned long flags;
 722
 723        if ((qp->ibqp.qp_type == IB_QPT_RC ||
 724             qp->ibqp.qp_type == IB_QPT_UC) &&
 725            (rdma_ah_get_dlid(&qp->remote_ah_attr) &
 726             ~((1 << ppd->lmc) - 1)) == ppd->lid) {
 727                qib_ruc_loopback(qp);
 728                return;
 729        }
 730
 731        if (qp->ibqp.qp_type == IB_QPT_RC)
 732                make_req = qib_make_rc_req;
 733        else if (qp->ibqp.qp_type == IB_QPT_UC)
 734                make_req = qib_make_uc_req;
 735        else
 736                make_req = qib_make_ud_req;
 737
 738        spin_lock_irqsave(&qp->s_lock, flags);
 739
 740        /* Return if we are already busy processing a work request. */
 741        if (!qib_send_ok(qp)) {
 742                spin_unlock_irqrestore(&qp->s_lock, flags);
 743                return;
 744        }
 745
 746        qp->s_flags |= RVT_S_BUSY;
 747
 748        do {
 749                /* Check for a constructed packet to be sent. */
 750                if (qp->s_hdrwords != 0) {
 751                        spin_unlock_irqrestore(&qp->s_lock, flags);
 752                        /*
 753                         * If the packet cannot be sent now, return and
 754                         * the send tasklet will be woken up later.
 755                         */
 756                        if (qib_verbs_send(qp, priv->s_hdr, qp->s_hdrwords,
 757                                           qp->s_cur_sge, qp->s_cur_size))
 758                                return;
 759                        /* Record that s_hdr is empty. */
 760                        qp->s_hdrwords = 0;
 761                        spin_lock_irqsave(&qp->s_lock, flags);
 762                }
 763        } while (make_req(qp, &flags));
 764
 765        spin_unlock_irqrestore(&qp->s_lock, flags);
 766}
 767
 768/*
 769 * This should be called with s_lock held.
 770 */
 771void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
 772                       enum ib_wc_status status)
 773{
 774        u32 old_last, last;
 775
 776        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 777                return;
 778
 779        last = qp->s_last;
 780        old_last = last;
 781        if (++last >= qp->s_size)
 782                last = 0;
 783        qp->s_last = last;
 784        /* See post_send() */
 785        barrier();
 786        rvt_put_swqe(wqe);
 787        if (qp->ibqp.qp_type == IB_QPT_UD ||
 788            qp->ibqp.qp_type == IB_QPT_SMI ||
 789            qp->ibqp.qp_type == IB_QPT_GSI)
 790                atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
 791
 792        rvt_qp_swqe_complete(qp,
 793                             wqe,
 794                             ib_qib_wc_opcode[wqe->wr.opcode],
 795                             status);
 796
 797        if (qp->s_acked == old_last)
 798                qp->s_acked = last;
 799        if (qp->s_cur == old_last)
 800                qp->s_cur = last;
 801        if (qp->s_tail == old_last)
 802                qp->s_tail = last;
 803        if (qp->state == IB_QPS_SQD && last == qp->s_cur)
 804                qp->s_draining = 0;
 805}
 806