linux/drivers/infiniband/hw/hfi1/ruc.c
<<
>>
Prefs
   1/*
   2 * Copyright(c) 2015, 2016 Intel Corporation.
   3 *
   4 * This file is provided under a dual BSD/GPLv2 license.  When using or
   5 * redistributing this file, you may do so under either license.
   6 *
   7 * GPL LICENSE SUMMARY
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of version 2 of the GNU General Public License as
  11 * published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful, but
  14 * WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * BSD LICENSE
  19 *
  20 * Redistribution and use in source and binary forms, with or without
  21 * modification, are permitted provided that the following conditions
  22 * are met:
  23 *
  24 *  - Redistributions of source code must retain the above copyright
  25 *    notice, this list of conditions and the following disclaimer.
  26 *  - Redistributions in binary form must reproduce the above copyright
  27 *    notice, this list of conditions and the following disclaimer in
  28 *    the documentation and/or other materials provided with the
  29 *    distribution.
  30 *  - Neither the name of Intel Corporation nor the names of its
  31 *    contributors may be used to endorse or promote products derived
  32 *    from this software without specific prior written permission.
  33 *
  34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45 *
  46 */
  47
  48#include <linux/spinlock.h>
  49
  50#include "hfi.h"
  51#include "mad.h"
  52#include "qp.h"
  53#include "verbs_txreq.h"
  54#include "trace.h"
  55
  56/*
  57 * Validate a RWQE and fill in the SGE state.
  58 * Return 1 if OK.
  59 */
  60static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
  61{
  62        int i, j, ret;
  63        struct ib_wc wc;
  64        struct rvt_lkey_table *rkt;
  65        struct rvt_pd *pd;
  66        struct rvt_sge_state *ss;
  67
  68        rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
  69        pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
  70        ss = &qp->r_sge;
  71        ss->sg_list = qp->r_sg_list;
  72        qp->r_len = 0;
  73        for (i = j = 0; i < wqe->num_sge; i++) {
  74                if (wqe->sg_list[i].length == 0)
  75                        continue;
  76                /* Check LKEY */
  77                if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
  78                                 &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
  79                        goto bad_lkey;
  80                qp->r_len += wqe->sg_list[i].length;
  81                j++;
  82        }
  83        ss->num_sge = j;
  84        ss->total_len = qp->r_len;
  85        ret = 1;
  86        goto bail;
  87
  88bad_lkey:
  89        while (j) {
  90                struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
  91
  92                rvt_put_mr(sge->mr);
  93        }
  94        ss->num_sge = 0;
  95        memset(&wc, 0, sizeof(wc));
  96        wc.wr_id = wqe->wr_id;
  97        wc.status = IB_WC_LOC_PROT_ERR;
  98        wc.opcode = IB_WC_RECV;
  99        wc.qp = &qp->ibqp;
 100        /* Signal solicited completion event. */
 101        rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
 102        ret = 0;
 103bail:
 104        return ret;
 105}
 106
 107/**
 108 * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE
 109 * @qp: the QP
 110 * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
 111 *
 112 * Return -1 if there is a local error, 0 if no RWQE is available,
 113 * otherwise return 1.
 114 *
 115 * Can be called from interrupt level.
 116 */
 117int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only)
 118{
 119        unsigned long flags;
 120        struct rvt_rq *rq;
 121        struct rvt_rwq *wq;
 122        struct rvt_srq *srq;
 123        struct rvt_rwqe *wqe;
 124        void (*handler)(struct ib_event *, void *);
 125        u32 tail;
 126        int ret;
 127
 128        if (qp->ibqp.srq) {
 129                srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
 130                handler = srq->ibsrq.event_handler;
 131                rq = &srq->rq;
 132        } else {
 133                srq = NULL;
 134                handler = NULL;
 135                rq = &qp->r_rq;
 136        }
 137
 138        spin_lock_irqsave(&rq->lock, flags);
 139        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
 140                ret = 0;
 141                goto unlock;
 142        }
 143
 144        wq = rq->wq;
 145        tail = wq->tail;
 146        /* Validate tail before using it since it is user writable. */
 147        if (tail >= rq->size)
 148                tail = 0;
 149        if (unlikely(tail == wq->head)) {
 150                ret = 0;
 151                goto unlock;
 152        }
 153        /* Make sure entry is read after head index is read. */
 154        smp_rmb();
 155        wqe = rvt_get_rwqe_ptr(rq, tail);
 156        /*
 157         * Even though we update the tail index in memory, the verbs
 158         * consumer is not supposed to post more entries until a
 159         * completion is generated.
 160         */
 161        if (++tail >= rq->size)
 162                tail = 0;
 163        wq->tail = tail;
 164        if (!wr_id_only && !init_sge(qp, wqe)) {
 165                ret = -1;
 166                goto unlock;
 167        }
 168        qp->r_wr_id = wqe->wr_id;
 169
 170        ret = 1;
 171        set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
 172        if (handler) {
 173                u32 n;
 174
 175                /*
 176                 * Validate head pointer value and compute
 177                 * the number of remaining WQEs.
 178                 */
 179                n = wq->head;
 180                if (n >= rq->size)
 181                        n = 0;
 182                if (n < tail)
 183                        n += rq->size - tail;
 184                else
 185                        n -= tail;
 186                if (n < srq->limit) {
 187                        struct ib_event ev;
 188
 189                        srq->limit = 0;
 190                        spin_unlock_irqrestore(&rq->lock, flags);
 191                        ev.device = qp->ibqp.device;
 192                        ev.element.srq = qp->ibqp.srq;
 193                        ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
 194                        handler(&ev, srq->ibsrq.srq_context);
 195                        goto bail;
 196                }
 197        }
 198unlock:
 199        spin_unlock_irqrestore(&rq->lock, flags);
 200bail:
 201        return ret;
 202}
 203
 204static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
 205{
 206        return (gid->global.interface_id == id &&
 207                (gid->global.subnet_prefix == gid_prefix ||
 208                 gid->global.subnet_prefix == IB_DEFAULT_GID_PREFIX));
 209}
 210
 211/*
 212 *
 213 * This should be called with the QP r_lock held.
 214 *
 215 * The s_lock will be acquired around the hfi1_migrate_qp() call.
 216 */
 217int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
 218                       int has_grh, struct rvt_qp *qp, u32 bth0)
 219{
 220        __be64 guid;
 221        unsigned long flags;
 222        u8 sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
 223
 224        if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
 225                if (!has_grh) {
 226                        if (qp->alt_ah_attr.ah_flags & IB_AH_GRH)
 227                                goto err;
 228                } else {
 229                        if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
 230                                goto err;
 231                        guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
 232                        if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
 233                                    guid))
 234                                goto err;
 235                        if (!gid_ok(
 236                                &hdr->u.l.grh.sgid,
 237                                qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
 238                                qp->alt_ah_attr.grh.dgid.global.interface_id))
 239                                goto err;
 240                }
 241                if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
 242                                            sc5, be16_to_cpu(hdr->lrh[3])))) {
 243                        hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
 244                                       (u16)bth0,
 245                                       (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
 246                                       0, qp->ibqp.qp_num,
 247                                       be16_to_cpu(hdr->lrh[3]),
 248                                       be16_to_cpu(hdr->lrh[1]));
 249                        goto err;
 250                }
 251                /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
 252                if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
 253                    ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
 254                        goto err;
 255                spin_lock_irqsave(&qp->s_lock, flags);
 256                hfi1_migrate_qp(qp);
 257                spin_unlock_irqrestore(&qp->s_lock, flags);
 258        } else {
 259                if (!has_grh) {
 260                        if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
 261                                goto err;
 262                } else {
 263                        if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH))
 264                                goto err;
 265                        guid = get_sguid(ibp,
 266                                         qp->remote_ah_attr.grh.sgid_index);
 267                        if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
 268                                    guid))
 269                                goto err;
 270                        if (!gid_ok(
 271                             &hdr->u.l.grh.sgid,
 272                             qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
 273                             qp->remote_ah_attr.grh.dgid.global.interface_id))
 274                                goto err;
 275                }
 276                if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
 277                                            sc5, be16_to_cpu(hdr->lrh[3])))) {
 278                        hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
 279                                       (u16)bth0,
 280                                       (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
 281                                       0, qp->ibqp.qp_num,
 282                                       be16_to_cpu(hdr->lrh[3]),
 283                                       be16_to_cpu(hdr->lrh[1]));
 284                        goto err;
 285                }
 286                /* Validate the SLID. See Ch. 9.6.1.5 */
 287                if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid ||
 288                    ppd_from_ibp(ibp)->port != qp->port_num)
 289                        goto err;
 290                if (qp->s_mig_state == IB_MIG_REARM &&
 291                    !(bth0 & IB_BTH_MIG_REQ))
 292                        qp->s_mig_state = IB_MIG_ARMED;
 293        }
 294
 295        return 0;
 296
 297err:
 298        return 1;
 299}
 300
 301/**
 302 * ruc_loopback - handle UC and RC loopback requests
 303 * @sqp: the sending QP
 304 *
 305 * This is called from hfi1_do_send() to
 306 * forward a WQE addressed to the same HFI.
 307 * Note that although we are single threaded due to the send engine, we still
 308 * have to protect against post_send().  We don't have to worry about
 309 * receive interrupts since this is a connected protocol and all packets
 310 * will pass through here.
 311 */
 312static void ruc_loopback(struct rvt_qp *sqp)
 313{
 314        struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
 315        struct rvt_qp *qp;
 316        struct rvt_swqe *wqe;
 317        struct rvt_sge *sge;
 318        unsigned long flags;
 319        struct ib_wc wc;
 320        u64 sdata;
 321        atomic64_t *maddr;
 322        enum ib_wc_status send_status;
 323        bool release;
 324        int ret;
 325        bool copy_last = false;
 326        int local_ops = 0;
 327
 328        rcu_read_lock();
 329
 330        /*
 331         * Note that we check the responder QP state after
 332         * checking the requester's state.
 333         */
 334        qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
 335                            sqp->remote_qpn);
 336
 337        spin_lock_irqsave(&sqp->s_lock, flags);
 338
 339        /* Return if we are already busy processing a work request. */
 340        if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
 341            !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 342                goto unlock;
 343
 344        sqp->s_flags |= RVT_S_BUSY;
 345
 346again:
 347        smp_read_barrier_depends(); /* see post_one_send() */
 348        if (sqp->s_last == ACCESS_ONCE(sqp->s_head))
 349                goto clr_busy;
 350        wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
 351
 352        /* Return if it is not OK to start a new work request. */
 353        if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
 354                if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
 355                        goto clr_busy;
 356                /* We are in the error state, flush the work request. */
 357                send_status = IB_WC_WR_FLUSH_ERR;
 358                goto flush_send;
 359        }
 360
 361        /*
 362         * We can rely on the entry not changing without the s_lock
 363         * being held until we update s_last.
 364         * We increment s_cur to indicate s_last is in progress.
 365         */
 366        if (sqp->s_last == sqp->s_cur) {
 367                if (++sqp->s_cur >= sqp->s_size)
 368                        sqp->s_cur = 0;
 369        }
 370        spin_unlock_irqrestore(&sqp->s_lock, flags);
 371
 372        if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
 373            qp->ibqp.qp_type != sqp->ibqp.qp_type) {
 374                ibp->rvp.n_pkt_drops++;
 375                /*
 376                 * For RC, the requester would timeout and retry so
 377                 * shortcut the timeouts and just signal too many retries.
 378                 */
 379                if (sqp->ibqp.qp_type == IB_QPT_RC)
 380                        send_status = IB_WC_RETRY_EXC_ERR;
 381                else
 382                        send_status = IB_WC_SUCCESS;
 383                goto serr;
 384        }
 385
 386        memset(&wc, 0, sizeof(wc));
 387        send_status = IB_WC_SUCCESS;
 388
 389        release = true;
 390        sqp->s_sge.sge = wqe->sg_list[0];
 391        sqp->s_sge.sg_list = wqe->sg_list + 1;
 392        sqp->s_sge.num_sge = wqe->wr.num_sge;
 393        sqp->s_len = wqe->length;
 394        switch (wqe->wr.opcode) {
 395        case IB_WR_REG_MR:
 396                goto send_comp;
 397
 398        case IB_WR_LOCAL_INV:
 399                if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
 400                        if (rvt_invalidate_rkey(sqp,
 401                                                wqe->wr.ex.invalidate_rkey))
 402                                send_status = IB_WC_LOC_PROT_ERR;
 403                        local_ops = 1;
 404                }
 405                goto send_comp;
 406
 407        case IB_WR_SEND_WITH_INV:
 408                if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
 409                        wc.wc_flags = IB_WC_WITH_INVALIDATE;
 410                        wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
 411                }
 412                goto send;
 413
 414        case IB_WR_SEND_WITH_IMM:
 415                wc.wc_flags = IB_WC_WITH_IMM;
 416                wc.ex.imm_data = wqe->wr.ex.imm_data;
 417                /* FALLTHROUGH */
 418        case IB_WR_SEND:
 419send:
 420                ret = hfi1_rvt_get_rwqe(qp, 0);
 421                if (ret < 0)
 422                        goto op_err;
 423                if (!ret)
 424                        goto rnr_nak;
 425                break;
 426
 427        case IB_WR_RDMA_WRITE_WITH_IMM:
 428                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
 429                        goto inv_err;
 430                wc.wc_flags = IB_WC_WITH_IMM;
 431                wc.ex.imm_data = wqe->wr.ex.imm_data;
 432                ret = hfi1_rvt_get_rwqe(qp, 1);
 433                if (ret < 0)
 434                        goto op_err;
 435                if (!ret)
 436                        goto rnr_nak;
 437                /* skip copy_last set and qp_access_flags recheck */
 438                goto do_write;
 439        case IB_WR_RDMA_WRITE:
 440                copy_last = rvt_is_user_qp(qp);
 441                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
 442                        goto inv_err;
 443do_write:
 444                if (wqe->length == 0)
 445                        break;
 446                if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
 447                                          wqe->rdma_wr.remote_addr,
 448                                          wqe->rdma_wr.rkey,
 449                                          IB_ACCESS_REMOTE_WRITE)))
 450                        goto acc_err;
 451                qp->r_sge.sg_list = NULL;
 452                qp->r_sge.num_sge = 1;
 453                qp->r_sge.total_len = wqe->length;
 454                break;
 455
 456        case IB_WR_RDMA_READ:
 457                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
 458                        goto inv_err;
 459                if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
 460                                          wqe->rdma_wr.remote_addr,
 461                                          wqe->rdma_wr.rkey,
 462                                          IB_ACCESS_REMOTE_READ)))
 463                        goto acc_err;
 464                release = false;
 465                sqp->s_sge.sg_list = NULL;
 466                sqp->s_sge.num_sge = 1;
 467                qp->r_sge.sge = wqe->sg_list[0];
 468                qp->r_sge.sg_list = wqe->sg_list + 1;
 469                qp->r_sge.num_sge = wqe->wr.num_sge;
 470                qp->r_sge.total_len = wqe->length;
 471                break;
 472
 473        case IB_WR_ATOMIC_CMP_AND_SWP:
 474        case IB_WR_ATOMIC_FETCH_AND_ADD:
 475                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
 476                        goto inv_err;
 477                if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
 478                                          wqe->atomic_wr.remote_addr,
 479                                          wqe->atomic_wr.rkey,
 480                                          IB_ACCESS_REMOTE_ATOMIC)))
 481                        goto acc_err;
 482                /* Perform atomic OP and save result. */
 483                maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
 484                sdata = wqe->atomic_wr.compare_add;
 485                *(u64 *)sqp->s_sge.sge.vaddr =
 486                        (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
 487                        (u64)atomic64_add_return(sdata, maddr) - sdata :
 488                        (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
 489                                      sdata, wqe->atomic_wr.swap);
 490                rvt_put_mr(qp->r_sge.sge.mr);
 491                qp->r_sge.num_sge = 0;
 492                goto send_comp;
 493
 494        default:
 495                send_status = IB_WC_LOC_QP_OP_ERR;
 496                goto serr;
 497        }
 498
 499        sge = &sqp->s_sge.sge;
 500        while (sqp->s_len) {
 501                u32 len = sqp->s_len;
 502
 503                if (len > sge->length)
 504                        len = sge->length;
 505                if (len > sge->sge_length)
 506                        len = sge->sge_length;
 507                WARN_ON_ONCE(len == 0);
 508                hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
 509                sge->vaddr += len;
 510                sge->length -= len;
 511                sge->sge_length -= len;
 512                if (sge->sge_length == 0) {
 513                        if (!release)
 514                                rvt_put_mr(sge->mr);
 515                        if (--sqp->s_sge.num_sge)
 516                                *sge = *sqp->s_sge.sg_list++;
 517                } else if (sge->length == 0 && sge->mr->lkey) {
 518                        if (++sge->n >= RVT_SEGSZ) {
 519                                if (++sge->m >= sge->mr->mapsz)
 520                                        break;
 521                                sge->n = 0;
 522                        }
 523                        sge->vaddr =
 524                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 525                        sge->length =
 526                                sge->mr->map[sge->m]->segs[sge->n].length;
 527                }
 528                sqp->s_len -= len;
 529        }
 530        if (release)
 531                rvt_put_ss(&qp->r_sge);
 532
 533        if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 534                goto send_comp;
 535
 536        if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
 537                wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
 538        else
 539                wc.opcode = IB_WC_RECV;
 540        wc.wr_id = qp->r_wr_id;
 541        wc.status = IB_WC_SUCCESS;
 542        wc.byte_len = wqe->length;
 543        wc.qp = &qp->ibqp;
 544        wc.src_qp = qp->remote_qpn;
 545        wc.slid = qp->remote_ah_attr.dlid;
 546        wc.sl = qp->remote_ah_attr.sl;
 547        wc.port_num = 1;
 548        /* Signal completion event if the solicited bit is set. */
 549        rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
 550                     wqe->wr.send_flags & IB_SEND_SOLICITED);
 551
 552send_comp:
 553        spin_lock_irqsave(&sqp->s_lock, flags);
 554        ibp->rvp.n_loop_pkts++;
 555flush_send:
 556        sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
 557        hfi1_send_complete(sqp, wqe, send_status);
 558        if (local_ops) {
 559                atomic_dec(&sqp->local_ops_pending);
 560                local_ops = 0;
 561        }
 562        goto again;
 563
 564rnr_nak:
 565        /* Handle RNR NAK */
 566        if (qp->ibqp.qp_type == IB_QPT_UC)
 567                goto send_comp;
 568        ibp->rvp.n_rnr_naks++;
 569        /*
 570         * Note: we don't need the s_lock held since the BUSY flag
 571         * makes this single threaded.
 572         */
 573        if (sqp->s_rnr_retry == 0) {
 574                send_status = IB_WC_RNR_RETRY_EXC_ERR;
 575                goto serr;
 576        }
 577        if (sqp->s_rnr_retry_cnt < 7)
 578                sqp->s_rnr_retry--;
 579        spin_lock_irqsave(&sqp->s_lock, flags);
 580        if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
 581                goto clr_busy;
 582        rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
 583                                IB_AETH_CREDIT_SHIFT);
 584        goto clr_busy;
 585
 586op_err:
 587        send_status = IB_WC_REM_OP_ERR;
 588        wc.status = IB_WC_LOC_QP_OP_ERR;
 589        goto err;
 590
 591inv_err:
 592        send_status = IB_WC_REM_INV_REQ_ERR;
 593        wc.status = IB_WC_LOC_QP_OP_ERR;
 594        goto err;
 595
 596acc_err:
 597        send_status = IB_WC_REM_ACCESS_ERR;
 598        wc.status = IB_WC_LOC_PROT_ERR;
 599err:
 600        /* responder goes to error state */
 601        rvt_rc_error(qp, wc.status);
 602
 603serr:
 604        spin_lock_irqsave(&sqp->s_lock, flags);
 605        hfi1_send_complete(sqp, wqe, send_status);
 606        if (sqp->ibqp.qp_type == IB_QPT_RC) {
 607                int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
 608
 609                sqp->s_flags &= ~RVT_S_BUSY;
 610                spin_unlock_irqrestore(&sqp->s_lock, flags);
 611                if (lastwqe) {
 612                        struct ib_event ev;
 613
 614                        ev.device = sqp->ibqp.device;
 615                        ev.element.qp = &sqp->ibqp;
 616                        ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
 617                        sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
 618                }
 619                goto done;
 620        }
 621clr_busy:
 622        sqp->s_flags &= ~RVT_S_BUSY;
 623unlock:
 624        spin_unlock_irqrestore(&sqp->s_lock, flags);
 625done:
 626        rcu_read_unlock();
 627}
 628
 629/**
 630 * hfi1_make_grh - construct a GRH header
 631 * @ibp: a pointer to the IB port
 632 * @hdr: a pointer to the GRH header being constructed
 633 * @grh: the global route address to send to
 634 * @hwords: the number of 32 bit words of header being sent
 635 * @nwords: the number of 32 bit words of data being sent
 636 *
 637 * Return the size of the header in 32 bit words.
 638 */
 639u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
 640                  struct ib_global_route *grh, u32 hwords, u32 nwords)
 641{
 642        hdr->version_tclass_flow =
 643                cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) |
 644                            (grh->traffic_class << IB_GRH_TCLASS_SHIFT) |
 645                            (grh->flow_label << IB_GRH_FLOW_SHIFT));
 646        hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
 647        /* next_hdr is defined by C8-7 in ch. 8.4.1 */
 648        hdr->next_hdr = IB_GRH_NEXT_HDR;
 649        hdr->hop_limit = grh->hop_limit;
 650        /* The SGID is 32-bit aligned. */
 651        hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
 652        hdr->sgid.global.interface_id =
 653                grh->sgid_index < HFI1_GUIDS_PER_PORT ?
 654                get_sguid(ibp, grh->sgid_index) :
 655                get_sguid(ibp, HFI1_PORT_GUID_INDEX);
 656        hdr->dgid = grh->dgid;
 657
 658        /* GRH header size in 32-bit words. */
 659        return sizeof(struct ib_grh) / sizeof(u32);
 660}
 661
 662#define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, hdr.u.oth.bth[2]) / 4)
 663
 664/**
 665 * build_ahg - create ahg in s_ahg
 666 * @qp: a pointer to QP
 667 * @npsn: the next PSN for the request/response
 668 *
 669 * This routine handles the AHG by allocating an ahg entry and causing the
 670 * copy of the first middle.
 671 *
 672 * Subsequent middles use the copied entry, editing the
 673 * PSN with 1 or 2 edits.
 674 */
 675static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
 676{
 677        struct hfi1_qp_priv *priv = qp->priv;
 678
 679        if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR))
 680                clear_ahg(qp);
 681        if (!(qp->s_flags & RVT_S_AHG_VALID)) {
 682                /* first middle that needs copy  */
 683                if (qp->s_ahgidx < 0)
 684                        qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde);
 685                if (qp->s_ahgidx >= 0) {
 686                        qp->s_ahgpsn = npsn;
 687                        priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
 688                        /* save to protect a change in another thread */
 689                        priv->s_ahg->ahgidx = qp->s_ahgidx;
 690                        qp->s_flags |= RVT_S_AHG_VALID;
 691                }
 692        } else {
 693                /* subsequent middle after valid */
 694                if (qp->s_ahgidx >= 0) {
 695                        priv->s_ahg->tx_flags |= SDMA_TXREQ_F_USE_AHG;
 696                        priv->s_ahg->ahgidx = qp->s_ahgidx;
 697                        priv->s_ahg->ahgcount++;
 698                        priv->s_ahg->ahgdesc[0] =
 699                                sdma_build_ahg_descriptor(
 700                                        (__force u16)cpu_to_be16((u16)npsn),
 701                                        BTH2_OFFSET,
 702                                        16,
 703                                        16);
 704                        if ((npsn & 0xffff0000) !=
 705                                        (qp->s_ahgpsn & 0xffff0000)) {
 706                                priv->s_ahg->ahgcount++;
 707                                priv->s_ahg->ahgdesc[1] =
 708                                        sdma_build_ahg_descriptor(
 709                                                (__force u16)cpu_to_be16(
 710                                                        (u16)(npsn >> 16)),
 711                                                BTH2_OFFSET,
 712                                                0,
 713                                                16);
 714                        }
 715                }
 716        }
 717}
 718
 719void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
 720                          u32 bth0, u32 bth2, int middle,
 721                          struct hfi1_pkt_state *ps)
 722{
 723        struct hfi1_qp_priv *priv = qp->priv;
 724        struct hfi1_ibport *ibp = ps->ibp;
 725        u16 lrh0;
 726        u32 nwords;
 727        u32 extra_bytes;
 728        u32 bth1;
 729
 730        /* Construct the header. */
 731        extra_bytes = -ps->s_txreq->s_cur_size & 3;
 732        nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2;
 733        lrh0 = HFI1_LRH_BTH;
 734        if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
 735                qp->s_hdrwords += hfi1_make_grh(ibp,
 736                                                &ps->s_txreq->phdr.hdr.u.l.grh,
 737                                                &qp->remote_ah_attr.grh,
 738                                                qp->s_hdrwords, nwords);
 739                lrh0 = HFI1_LRH_GRH;
 740                middle = 0;
 741        }
 742        lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
 743        /*
 744         * reset s_ahg/AHG fields
 745         *
 746         * This insures that the ahgentry/ahgcount
 747         * are at a non-AHG default to protect
 748         * build_verbs_tx_desc() from using
 749         * an include ahgidx.
 750         *
 751         * build_ahg() will modify as appropriate
 752         * to use the AHG feature.
 753         */
 754        priv->s_ahg->tx_flags = 0;
 755        priv->s_ahg->ahgcount = 0;
 756        priv->s_ahg->ahgidx = 0;
 757        if (qp->s_mig_state == IB_MIG_MIGRATED)
 758                bth0 |= IB_BTH_MIG_REQ;
 759        else
 760                middle = 0;
 761        if (middle)
 762                build_ahg(qp, bth2);
 763        else
 764                qp->s_flags &= ~RVT_S_AHG_VALID;
 765        ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
 766        ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
 767        ps->s_txreq->phdr.hdr.lrh[2] =
 768                cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
 769        ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
 770                                       qp->remote_ah_attr.src_path_bits);
 771        bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
 772        bth0 |= extra_bytes << 20;
 773        ohdr->bth[0] = cpu_to_be32(bth0);
 774        bth1 = qp->remote_qpn;
 775        if (qp->s_flags & RVT_S_ECN) {
 776                qp->s_flags &= ~RVT_S_ECN;
 777                /* we recently received a FECN, so return a BECN */
 778                bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT);
 779        }
 780        ohdr->bth[1] = cpu_to_be32(bth1);
 781        ohdr->bth[2] = cpu_to_be32(bth2);
 782}
 783
 784/* when sending, force a reschedule every one of these periods */
 785#define SEND_RESCHED_TIMEOUT (5 * HZ)  /* 5s in jiffies */
 786
 787void _hfi1_do_send(struct work_struct *work)
 788{
 789        struct iowait *wait = container_of(work, struct iowait, iowork);
 790        struct rvt_qp *qp = iowait_to_qp(wait);
 791
 792        hfi1_do_send(qp);
 793}
 794
 795/**
 796 * hfi1_do_send - perform a send on a QP
 797 * @work: contains a pointer to the QP
 798 *
 799 * Process entries in the send work queue until credit or queue is
 800 * exhausted.  Only allow one CPU to send a packet per QP.
 801 * Otherwise, two threads could send packets out of order.
 802 */
 803void hfi1_do_send(struct rvt_qp *qp)
 804{
 805        struct hfi1_pkt_state ps;
 806        struct hfi1_qp_priv *priv = qp->priv;
 807        int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 808        unsigned long timeout;
 809        unsigned long timeout_int;
 810        int cpu;
 811
 812        ps.dev = to_idev(qp->ibqp.device);
 813        ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
 814        ps.ppd = ppd_from_ibp(ps.ibp);
 815
 816        switch (qp->ibqp.qp_type) {
 817        case IB_QPT_RC:
 818                if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
 819                                                                ) - 1)) ==
 820                                 ps.ppd->lid)) {
 821                        ruc_loopback(qp);
 822                        return;
 823                }
 824                make_req = hfi1_make_rc_req;
 825                timeout_int = (qp->timeout_jiffies);
 826                break;
 827        case IB_QPT_UC:
 828                if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
 829                                                                ) - 1)) ==
 830                                 ps.ppd->lid)) {
 831                        ruc_loopback(qp);
 832                        return;
 833                }
 834                make_req = hfi1_make_uc_req;
 835                timeout_int = SEND_RESCHED_TIMEOUT;
 836                break;
 837        default:
 838                make_req = hfi1_make_ud_req;
 839                timeout_int = SEND_RESCHED_TIMEOUT;
 840        }
 841
 842        spin_lock_irqsave(&qp->s_lock, ps.flags);
 843
 844        /* Return if we are already busy processing a work request. */
 845        if (!hfi1_send_ok(qp)) {
 846                spin_unlock_irqrestore(&qp->s_lock, ps.flags);
 847                return;
 848        }
 849
 850        qp->s_flags |= RVT_S_BUSY;
 851
 852        timeout = jiffies + (timeout_int) / 8;
 853        cpu = priv->s_sde ? priv->s_sde->cpu :
 854                        cpumask_first(cpumask_of_node(ps.ppd->dd->node));
 855        /* insure a pre-built packet is handled  */
 856        ps.s_txreq = get_waiting_verbs_txreq(qp);
 857        do {
 858                /* Check for a constructed packet to be sent. */
 859                if (qp->s_hdrwords != 0) {
 860                        spin_unlock_irqrestore(&qp->s_lock, ps.flags);
 861                        /*
 862                         * If the packet cannot be sent now, return and
 863                         * the send engine will be woken up later.
 864                         */
 865                        if (hfi1_verbs_send(qp, &ps))
 866                                return;
 867                        /* Record that s_ahg is empty. */
 868                        qp->s_hdrwords = 0;
 869                        /* allow other tasks to run */
 870                        if (unlikely(time_after(jiffies, timeout))) {
 871                                if (workqueue_congested(cpu,
 872                                                        ps.ppd->hfi1_wq)) {
 873                                        spin_lock_irqsave(
 874                                                &qp->s_lock,
 875                                                ps.flags);
 876                                        qp->s_flags &= ~RVT_S_BUSY;
 877                                        hfi1_schedule_send(qp);
 878                                        spin_unlock_irqrestore(
 879                                                &qp->s_lock,
 880                                                ps.flags);
 881                                        this_cpu_inc(
 882                                                *ps.ppd->dd->send_schedule);
 883                                        return;
 884                                }
 885                                if (!irqs_disabled()) {
 886                                        cond_resched();
 887                                        this_cpu_inc(
 888                                           *ps.ppd->dd->send_schedule);
 889                                }
 890                                timeout = jiffies + (timeout_int) / 8;
 891                        }
 892                        spin_lock_irqsave(&qp->s_lock, ps.flags);
 893                }
 894        } while (make_req(qp, &ps));
 895
 896        spin_unlock_irqrestore(&qp->s_lock, ps.flags);
 897}
 898
 899/*
 900 * This should be called with s_lock held.
 901 */
 902void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
 903                        enum ib_wc_status status)
 904{
 905        u32 old_last, last;
 906
 907        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 908                return;
 909
 910        last = qp->s_last;
 911        old_last = last;
 912        if (++last >= qp->s_size)
 913                last = 0;
 914        qp->s_last = last;
 915        /* See post_send() */
 916        barrier();
 917        rvt_put_swqe(wqe);
 918        if (qp->ibqp.qp_type == IB_QPT_UD ||
 919            qp->ibqp.qp_type == IB_QPT_SMI ||
 920            qp->ibqp.qp_type == IB_QPT_GSI)
 921                atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
 922
 923        rvt_qp_swqe_complete(qp, wqe, status);
 924
 925        if (qp->s_acked == old_last)
 926                qp->s_acked = last;
 927        if (qp->s_cur == old_last)
 928                qp->s_cur = last;
 929        if (qp->s_tail == old_last)
 930                qp->s_tail = last;
 931        if (qp->state == IB_QPS_SQD && last == qp->s_cur)
 932                qp->s_draining = 0;
 933}
 934