linux/drivers/infiniband/hw/hfi1/qp.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
   2/*
   3 * Copyright(c) 2015 - 2020 Intel Corporation.
   4 */
   5
   6#include <linux/err.h>
   7#include <linux/vmalloc.h>
   8#include <linux/hash.h>
   9#include <linux/module.h>
  10#include <linux/seq_file.h>
  11#include <rdma/rdma_vt.h>
  12#include <rdma/rdmavt_qp.h>
  13#include <rdma/ib_verbs.h>
  14
  15#include "hfi.h"
  16#include "qp.h"
  17#include "trace.h"
  18#include "verbs_txreq.h"
  19
  20unsigned int hfi1_qp_table_size = 256;
  21module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO);
  22MODULE_PARM_DESC(qp_table_size, "QP table size");
  23
  24static void flush_tx_list(struct rvt_qp *qp);
  25static int iowait_sleep(
  26        struct sdma_engine *sde,
  27        struct iowait_work *wait,
  28        struct sdma_txreq *stx,
  29        unsigned int seq,
  30        bool pkts_sent);
  31static void iowait_wakeup(struct iowait *wait, int reason);
  32static void iowait_sdma_drained(struct iowait *wait);
  33static void qp_pio_drain(struct rvt_qp *qp);
  34
  35const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
  36[IB_WR_RDMA_WRITE] = {
  37        .length = sizeof(struct ib_rdma_wr),
  38        .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
  39},
  40
  41[IB_WR_RDMA_READ] = {
  42        .length = sizeof(struct ib_rdma_wr),
  43        .qpt_support = BIT(IB_QPT_RC),
  44        .flags = RVT_OPERATION_ATOMIC,
  45},
  46
  47[IB_WR_ATOMIC_CMP_AND_SWP] = {
  48        .length = sizeof(struct ib_atomic_wr),
  49        .qpt_support = BIT(IB_QPT_RC),
  50        .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE,
  51},
  52
  53[IB_WR_ATOMIC_FETCH_AND_ADD] = {
  54        .length = sizeof(struct ib_atomic_wr),
  55        .qpt_support = BIT(IB_QPT_RC),
  56        .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE,
  57},
  58
  59[IB_WR_RDMA_WRITE_WITH_IMM] = {
  60        .length = sizeof(struct ib_rdma_wr),
  61        .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
  62},
  63
  64[IB_WR_SEND] = {
  65        .length = sizeof(struct ib_send_wr),
  66        .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) |
  67                       BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
  68},
  69
  70[IB_WR_SEND_WITH_IMM] = {
  71        .length = sizeof(struct ib_send_wr),
  72        .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) |
  73                       BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
  74},
  75
  76[IB_WR_REG_MR] = {
  77        .length = sizeof(struct ib_reg_wr),
  78        .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
  79        .flags = RVT_OPERATION_LOCAL,
  80},
  81
  82[IB_WR_LOCAL_INV] = {
  83        .length = sizeof(struct ib_send_wr),
  84        .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC),
  85        .flags = RVT_OPERATION_LOCAL,
  86},
  87
  88[IB_WR_SEND_WITH_INV] = {
  89        .length = sizeof(struct ib_send_wr),
  90        .qpt_support = BIT(IB_QPT_RC),
  91},
  92
  93[IB_WR_OPFN] = {
  94        .length = sizeof(struct ib_atomic_wr),
  95        .qpt_support = BIT(IB_QPT_RC),
  96        .flags = RVT_OPERATION_USE_RESERVE,
  97},
  98
  99[IB_WR_TID_RDMA_WRITE] = {
 100        .length = sizeof(struct ib_rdma_wr),
 101        .qpt_support = BIT(IB_QPT_RC),
 102        .flags = RVT_OPERATION_IGN_RNR_CNT,
 103},
 104
 105};
 106
 107static void flush_list_head(struct list_head *l)
 108{
 109        while (!list_empty(l)) {
 110                struct sdma_txreq *tx;
 111
 112                tx = list_first_entry(
 113                        l,
 114                        struct sdma_txreq,
 115                        list);
 116                list_del_init(&tx->list);
 117                hfi1_put_txreq(
 118                        container_of(tx, struct verbs_txreq, txreq));
 119        }
 120}
 121
 122static void flush_tx_list(struct rvt_qp *qp)
 123{
 124        struct hfi1_qp_priv *priv = qp->priv;
 125
 126        flush_list_head(&iowait_get_ib_work(&priv->s_iowait)->tx_head);
 127        flush_list_head(&iowait_get_tid_work(&priv->s_iowait)->tx_head);
 128}
 129
 130static void flush_iowait(struct rvt_qp *qp)
 131{
 132        struct hfi1_qp_priv *priv = qp->priv;
 133        unsigned long flags;
 134        seqlock_t *lock = priv->s_iowait.lock;
 135
 136        if (!lock)
 137                return;
 138        write_seqlock_irqsave(lock, flags);
 139        if (!list_empty(&priv->s_iowait.list)) {
 140                list_del_init(&priv->s_iowait.list);
 141                priv->s_iowait.lock = NULL;
 142                rvt_put_qp(qp);
 143        }
 144        write_sequnlock_irqrestore(lock, flags);
 145}
 146
 147/*
 148 * This function is what we would push to the core layer if we wanted to be a
 149 * "first class citizen".  Instead we hide this here and rely on Verbs ULPs
 150 * to blindly pass the MTU enum value from the PathRecord to us.
 151 */
 152static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
 153{
 154        /* Constraining 10KB packets to 8KB packets */
 155        if (mtu == (enum ib_mtu)OPA_MTU_10240)
 156                mtu = (enum ib_mtu)OPA_MTU_8192;
 157        return opa_mtu_enum_to_int((enum opa_mtu)mtu);
 158}
 159
 160int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
 161                         int attr_mask, struct ib_udata *udata)
 162{
 163        struct ib_qp *ibqp = &qp->ibqp;
 164        struct hfi1_ibdev *dev = to_idev(ibqp->device);
 165        struct hfi1_devdata *dd = dd_from_dev(dev);
 166        u8 sc;
 167
 168        if (attr_mask & IB_QP_AV) {
 169                sc = ah_to_sc(ibqp->device, &attr->ah_attr);
 170                if (sc == 0xf)
 171                        return -EINVAL;
 172
 173                if (!qp_to_sdma_engine(qp, sc) &&
 174                    dd->flags & HFI1_HAS_SEND_DMA)
 175                        return -EINVAL;
 176
 177                if (!qp_to_send_context(qp, sc))
 178                        return -EINVAL;
 179        }
 180
 181        if (attr_mask & IB_QP_ALT_PATH) {
 182                sc = ah_to_sc(ibqp->device, &attr->alt_ah_attr);
 183                if (sc == 0xf)
 184                        return -EINVAL;
 185
 186                if (!qp_to_sdma_engine(qp, sc) &&
 187                    dd->flags & HFI1_HAS_SEND_DMA)
 188                        return -EINVAL;
 189
 190                if (!qp_to_send_context(qp, sc))
 191                        return -EINVAL;
 192        }
 193
 194        return 0;
 195}
 196
 197/*
 198 * qp_set_16b - Set the hdr_type based on whether the slid or the
 199 * dlid in the connection is extended. Only applicable for RC and UC
 200 * QPs. UD QPs determine this on the fly from the ah in the wqe
 201 */
 202static inline void qp_set_16b(struct rvt_qp *qp)
 203{
 204        struct hfi1_pportdata *ppd;
 205        struct hfi1_ibport *ibp;
 206        struct hfi1_qp_priv *priv = qp->priv;
 207
 208        /* Update ah_attr to account for extended LIDs */
 209        hfi1_update_ah_attr(qp->ibqp.device, &qp->remote_ah_attr);
 210
 211        /* Create 32 bit LIDs */
 212        hfi1_make_opa_lid(&qp->remote_ah_attr);
 213
 214        if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH))
 215                return;
 216
 217        ibp = to_iport(qp->ibqp.device, qp->port_num);
 218        ppd = ppd_from_ibp(ibp);
 219        priv->hdr_type = hfi1_get_hdr_type(ppd->lid, &qp->remote_ah_attr);
 220}
 221
 222void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
 223                    int attr_mask, struct ib_udata *udata)
 224{
 225        struct ib_qp *ibqp = &qp->ibqp;
 226        struct hfi1_qp_priv *priv = qp->priv;
 227
 228        if (attr_mask & IB_QP_AV) {
 229                priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
 230                priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
 231                priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
 232                qp_set_16b(qp);
 233        }
 234
 235        if (attr_mask & IB_QP_PATH_MIG_STATE &&
 236            attr->path_mig_state == IB_MIG_MIGRATED &&
 237            qp->s_mig_state == IB_MIG_ARMED) {
 238                qp->s_flags |= HFI1_S_AHG_CLEAR;
 239                priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
 240                priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
 241                priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
 242                qp_set_16b(qp);
 243        }
 244
 245        opfn_qp_init(qp, attr, attr_mask);
 246}
 247
 248/**
 249 * hfi1_setup_wqe - set up the wqe
 250 * @qp: The qp
 251 * @wqe: The built wqe
 252 * @call_send: Determine if the send should be posted or scheduled.
 253 *
 254 * Perform setup of the wqe.  This is called
 255 * prior to inserting the wqe into the ring but after
 256 * the wqe has been setup by RDMAVT. This function
 257 * allows the driver the opportunity to perform
 258 * validation and additional setup of the wqe.
 259 *
 260 * Returns 0 on success, -EINVAL on failure
 261 *
 262 */
 263int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
 264{
 265        struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 266        struct rvt_ah *ah;
 267        struct hfi1_pportdata *ppd;
 268        struct hfi1_devdata *dd;
 269
 270        switch (qp->ibqp.qp_type) {
 271        case IB_QPT_RC:
 272                hfi1_setup_tid_rdma_wqe(qp, wqe);
 273                fallthrough;
 274        case IB_QPT_UC:
 275                if (wqe->length > 0x80000000U)
 276                        return -EINVAL;
 277                if (wqe->length > qp->pmtu)
 278                        *call_send = false;
 279                break;
 280        case IB_QPT_SMI:
 281                /*
 282                 * SM packets should exclusively use VL15 and their SL is
 283                 * ignored (IBTA v1.3, Section 3.5.8.2). Therefore, when ah
 284                 * is created, SL is 0 in most cases and as a result some
 285                 * fields (vl and pmtu) in ah may not be set correctly,
 286                 * depending on the SL2SC and SC2VL tables at the time.
 287                 */
 288                ppd = ppd_from_ibp(ibp);
 289                dd = dd_from_ppd(ppd);
 290                if (wqe->length > dd->vld[15].mtu)
 291                        return -EINVAL;
 292                break;
 293        case IB_QPT_GSI:
 294        case IB_QPT_UD:
 295                ah = rvt_get_swqe_ah(wqe);
 296                if (wqe->length > (1 << ah->log_pmtu))
 297                        return -EINVAL;
 298                if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf)
 299                        return -EINVAL;
 300                break;
 301        default:
 302                break;
 303        }
 304
 305        /*
 306         * System latency between send and schedule is large enough that
 307         * forcing call_send to true for piothreshold packets is necessary.
 308         */
 309        if (wqe->length <= piothreshold)
 310                *call_send = true;
 311        return 0;
 312}
 313
 314/**
 315 * _hfi1_schedule_send - schedule progress
 316 * @qp: the QP
 317 *
 318 * This schedules qp progress w/o regard to the s_flags.
 319 *
 320 * It is only used in the post send, which doesn't hold
 321 * the s_lock.
 322 */
 323bool _hfi1_schedule_send(struct rvt_qp *qp)
 324{
 325        struct hfi1_qp_priv *priv = qp->priv;
 326        struct hfi1_ibport *ibp =
 327                to_iport(qp->ibqp.device, qp->port_num);
 328        struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 329        struct hfi1_devdata *dd = ppd->dd;
 330
 331        if (dd->flags & HFI1_SHUTDOWN)
 332                return true;
 333
 334        return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
 335                               priv->s_sde ?
 336                               priv->s_sde->cpu :
 337                               cpumask_first(cpumask_of_node(dd->node)));
 338}
 339
 340static void qp_pio_drain(struct rvt_qp *qp)
 341{
 342        struct hfi1_qp_priv *priv = qp->priv;
 343
 344        if (!priv->s_sendcontext)
 345                return;
 346        while (iowait_pio_pending(&priv->s_iowait)) {
 347                write_seqlock_irq(&priv->s_sendcontext->waitlock);
 348                hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1);
 349                write_sequnlock_irq(&priv->s_sendcontext->waitlock);
 350                iowait_pio_drain(&priv->s_iowait);
 351                write_seqlock_irq(&priv->s_sendcontext->waitlock);
 352                hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0);
 353                write_sequnlock_irq(&priv->s_sendcontext->waitlock);
 354        }
 355}
 356
 357/**
 358 * hfi1_schedule_send - schedule progress
 359 * @qp: the QP
 360 *
 361 * This schedules qp progress and caller should hold
 362 * the s_lock.
 363 * @return true if the first leg is scheduled;
 364 * false if the first leg is not scheduled.
 365 */
 366bool hfi1_schedule_send(struct rvt_qp *qp)
 367{
 368        lockdep_assert_held(&qp->s_lock);
 369        if (hfi1_send_ok(qp)) {
 370                _hfi1_schedule_send(qp);
 371                return true;
 372        }
 373        if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
 374                iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
 375                                IOWAIT_PENDING_IB);
 376        return false;
 377}
 378
 379static void hfi1_qp_schedule(struct rvt_qp *qp)
 380{
 381        struct hfi1_qp_priv *priv = qp->priv;
 382        bool ret;
 383
 384        if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_IB)) {
 385                ret = hfi1_schedule_send(qp);
 386                if (ret)
 387                        iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
 388        }
 389        if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_TID)) {
 390                ret = hfi1_schedule_tid_send(qp);
 391                if (ret)
 392                        iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
 393        }
 394}
 395
 396void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
 397{
 398        unsigned long flags;
 399
 400        spin_lock_irqsave(&qp->s_lock, flags);
 401        if (qp->s_flags & flag) {
 402                qp->s_flags &= ~flag;
 403                trace_hfi1_qpwakeup(qp, flag);
 404                hfi1_qp_schedule(qp);
 405        }
 406        spin_unlock_irqrestore(&qp->s_lock, flags);
 407        /* Notify hfi1_destroy_qp() if it is waiting. */
 408        rvt_put_qp(qp);
 409}
 410
 411void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
 412{
 413        struct hfi1_qp_priv *priv = qp->priv;
 414
 415        if (iowait_set_work_flag(wait) == IOWAIT_IB_SE) {
 416                qp->s_flags &= ~RVT_S_BUSY;
 417                /*
 418                 * If we are sending a first-leg packet from the second leg,
 419                 * we need to clear the busy flag from priv->s_flags to
 420                 * avoid a race condition when the qp wakes up before
 421                 * the call to hfi1_verbs_send() returns to the second
 422                 * leg. In that case, the second leg will terminate without
 423                 * being re-scheduled, resulting in failure to send TID RDMA
 424                 * WRITE DATA and TID RDMA ACK packets.
 425                 */
 426                if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
 427                        priv->s_flags &= ~(HFI1_S_TID_BUSY_SET |
 428                                           RVT_S_BUSY);
 429                        iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
 430                }
 431        } else {
 432                priv->s_flags &= ~RVT_S_BUSY;
 433        }
 434}
 435
 436static int iowait_sleep(
 437        struct sdma_engine *sde,
 438        struct iowait_work *wait,
 439        struct sdma_txreq *stx,
 440        uint seq,
 441        bool pkts_sent)
 442{
 443        struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq);
 444        struct rvt_qp *qp;
 445        struct hfi1_qp_priv *priv;
 446        unsigned long flags;
 447        int ret = 0;
 448
 449        qp = tx->qp;
 450        priv = qp->priv;
 451
 452        spin_lock_irqsave(&qp->s_lock, flags);
 453        if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 454                /*
 455                 * If we couldn't queue the DMA request, save the info
 456                 * and try again later rather than destroying the
 457                 * buffer and undoing the side effects of the copy.
 458                 */
 459                /* Make a common routine? */
 460                list_add_tail(&stx->list, &wait->tx_head);
 461                write_seqlock(&sde->waitlock);
 462                if (sdma_progress(sde, seq, stx))
 463                        goto eagain;
 464                if (list_empty(&priv->s_iowait.list)) {
 465                        struct hfi1_ibport *ibp =
 466                                to_iport(qp->ibqp.device, qp->port_num);
 467
 468                        ibp->rvp.n_dmawait++;
 469                        qp->s_flags |= RVT_S_WAIT_DMA_DESC;
 470                        iowait_get_priority(&priv->s_iowait);
 471                        iowait_queue(pkts_sent, &priv->s_iowait,
 472                                     &sde->dmawait);
 473                        priv->s_iowait.lock = &sde->waitlock;
 474                        trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
 475                        rvt_get_qp(qp);
 476                }
 477                write_sequnlock(&sde->waitlock);
 478                hfi1_qp_unbusy(qp, wait);
 479                spin_unlock_irqrestore(&qp->s_lock, flags);
 480                ret = -EBUSY;
 481        } else {
 482                spin_unlock_irqrestore(&qp->s_lock, flags);
 483                hfi1_put_txreq(tx);
 484        }
 485        return ret;
 486eagain:
 487        write_sequnlock(&sde->waitlock);
 488        spin_unlock_irqrestore(&qp->s_lock, flags);
 489        list_del_init(&stx->list);
 490        return -EAGAIN;
 491}
 492
 493static void iowait_wakeup(struct iowait *wait, int reason)
 494{
 495        struct rvt_qp *qp = iowait_to_qp(wait);
 496
 497        WARN_ON(reason != SDMA_AVAIL_REASON);
 498        hfi1_qp_wakeup(qp, RVT_S_WAIT_DMA_DESC);
 499}
 500
 501static void iowait_sdma_drained(struct iowait *wait)
 502{
 503        struct rvt_qp *qp = iowait_to_qp(wait);
 504        unsigned long flags;
 505
 506        /*
 507         * This happens when the send engine notes
 508         * a QP in the error state and cannot
 509         * do the flush work until that QP's
 510         * sdma work has finished.
 511         */
 512        spin_lock_irqsave(&qp->s_lock, flags);
 513        if (qp->s_flags & RVT_S_WAIT_DMA) {
 514                qp->s_flags &= ~RVT_S_WAIT_DMA;
 515                hfi1_schedule_send(qp);
 516        }
 517        spin_unlock_irqrestore(&qp->s_lock, flags);
 518}
 519
 520static void hfi1_init_priority(struct iowait *w)
 521{
 522        struct rvt_qp *qp = iowait_to_qp(w);
 523        struct hfi1_qp_priv *priv = qp->priv;
 524
 525        if (qp->s_flags & RVT_S_ACK_PENDING)
 526                w->priority++;
 527        if (priv->s_flags & RVT_S_ACK_PENDING)
 528                w->priority++;
 529}
 530
 531/**
 532 * qp_to_sdma_engine - map a qp to a send engine
 533 * @qp: the QP
 534 * @sc5: the 5 bit sc
 535 *
 536 * Return:
 537 * A send engine for the qp or NULL for SMI type qp.
 538 */
 539struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5)
 540{
 541        struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 542        struct sdma_engine *sde;
 543
 544        if (!(dd->flags & HFI1_HAS_SEND_DMA))
 545                return NULL;
 546        switch (qp->ibqp.qp_type) {
 547        case IB_QPT_SMI:
 548                return NULL;
 549        default:
 550                break;
 551        }
 552        sde = sdma_select_engine_sc(dd, qp->ibqp.qp_num >> dd->qos_shift, sc5);
 553        return sde;
 554}
 555
 556/**
 557 * qp_to_send_context - map a qp to a send context
 558 * @qp: the QP
 559 * @sc5: the 5 bit sc
 560 *
 561 * Return:
 562 * A send context for the qp
 563 */
 564struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5)
 565{
 566        struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 567
 568        switch (qp->ibqp.qp_type) {
 569        case IB_QPT_SMI:
 570                /* SMA packets to VL15 */
 571                return dd->vld[15].sc;
 572        default:
 573                break;
 574        }
 575
 576        return pio_select_send_context_sc(dd, qp->ibqp.qp_num >> dd->qos_shift,
 577                                          sc5);
 578}
 579
 580static const char * const qp_type_str[] = {
 581        "SMI", "GSI", "RC", "UC", "UD",
 582};
 583
 584static int qp_idle(struct rvt_qp *qp)
 585{
 586        return
 587                qp->s_last == qp->s_acked &&
 588                qp->s_acked == qp->s_cur &&
 589                qp->s_cur == qp->s_tail &&
 590                qp->s_tail == qp->s_head;
 591}
 592
 593/**
 594 * qp_iter_print - print the qp information to seq_file
 595 * @s: the seq_file to emit the qp information on
 596 * @iter: the iterator for the qp hash list
 597 */
 598void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter)
 599{
 600        struct rvt_swqe *wqe;
 601        struct rvt_qp *qp = iter->qp;
 602        struct hfi1_qp_priv *priv = qp->priv;
 603        struct sdma_engine *sde;
 604        struct send_context *send_context;
 605        struct rvt_ack_entry *e = NULL;
 606        struct rvt_srq *srq = qp->ibqp.srq ?
 607                ibsrq_to_rvtsrq(qp->ibqp.srq) : NULL;
 608
 609        sde = qp_to_sdma_engine(qp, priv->s_sc);
 610        wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 611        send_context = qp_to_send_context(qp, priv->s_sc);
 612        if (qp->s_ack_queue)
 613                e = &qp->s_ack_queue[qp->s_tail_ack_queue];
 614        seq_printf(s,
 615                   "N %d %s QP %x R %u %s %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d OS %x %x E %x %x %x RNR %d %s %d\n",
 616                   iter->n,
 617                   qp_idle(qp) ? "I" : "B",
 618                   qp->ibqp.qp_num,
 619                   atomic_read(&qp->refcount),
 620                   qp_type_str[qp->ibqp.qp_type],
 621                   qp->state,
 622                   wqe ? wqe->wr.opcode : 0,
 623                   qp->s_flags,
 624                   iowait_sdma_pending(&priv->s_iowait),
 625                   iowait_pio_pending(&priv->s_iowait),
 626                   !list_empty(&priv->s_iowait.list),
 627                   qp->timeout,
 628                   wqe ? wqe->ssn : 0,
 629                   qp->s_lsn,
 630                   qp->s_last_psn,
 631                   qp->s_psn, qp->s_next_psn,
 632                   qp->s_sending_psn, qp->s_sending_hpsn,
 633                   qp->r_psn,
 634                   qp->s_last, qp->s_acked, qp->s_cur,
 635                   qp->s_tail, qp->s_head, qp->s_size,
 636                   qp->s_avail,
 637                   /* ack_queue ring pointers, size */
 638                   qp->s_tail_ack_queue, qp->r_head_ack_queue,
 639                   rvt_max_atomic(&to_idev(qp->ibqp.device)->rdi),
 640                   /* remote QP info  */
 641                   qp->remote_qpn,
 642                   rdma_ah_get_dlid(&qp->remote_ah_attr),
 643                   rdma_ah_get_sl(&qp->remote_ah_attr),
 644                   qp->pmtu,
 645                   qp->s_retry,
 646                   qp->s_retry_cnt,
 647                   qp->s_rnr_retry_cnt,
 648                   qp->s_rnr_retry,
 649                   sde,
 650                   sde ? sde->this_idx : 0,
 651                   send_context,
 652                   send_context ? send_context->sw_index : 0,
 653                   ib_cq_head(qp->ibqp.send_cq),
 654                   ib_cq_tail(qp->ibqp.send_cq),
 655                   qp->pid,
 656                   qp->s_state,
 657                   qp->s_ack_state,
 658                   /* ack queue information */
 659                   e ? e->opcode : 0,
 660                   e ? e->psn : 0,
 661                   e ? e->lpsn : 0,
 662                   qp->r_min_rnr_timer,
 663                   srq ? "SRQ" : "RQ",
 664                   srq ? srq->rq.size : qp->r_rq.size
 665                );
 666}
 667
 668void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
 669{
 670        struct hfi1_qp_priv *priv;
 671
 672        priv = kzalloc_node(sizeof(*priv), GFP_KERNEL, rdi->dparms.node);
 673        if (!priv)
 674                return ERR_PTR(-ENOMEM);
 675
 676        priv->owner = qp;
 677
 678        priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), GFP_KERNEL,
 679                                   rdi->dparms.node);
 680        if (!priv->s_ahg) {
 681                kfree(priv);
 682                return ERR_PTR(-ENOMEM);
 683        }
 684        iowait_init(
 685                &priv->s_iowait,
 686                1,
 687                _hfi1_do_send,
 688                _hfi1_do_tid_send,
 689                iowait_sleep,
 690                iowait_wakeup,
 691                iowait_sdma_drained,
 692                hfi1_init_priority);
 693        /* Init to a value to start the running average correctly */
 694        priv->s_running_pkt_size = piothreshold / 2;
 695        return priv;
 696}
 697
 698void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
 699{
 700        struct hfi1_qp_priv *priv = qp->priv;
 701
 702        hfi1_qp_priv_tid_free(rdi, qp);
 703        kfree(priv->s_ahg);
 704        kfree(priv);
 705}
 706
 707unsigned free_all_qps(struct rvt_dev_info *rdi)
 708{
 709        struct hfi1_ibdev *verbs_dev = container_of(rdi,
 710                                                    struct hfi1_ibdev,
 711                                                    rdi);
 712        struct hfi1_devdata *dd = container_of(verbs_dev,
 713                                               struct hfi1_devdata,
 714                                               verbs_dev);
 715        int n;
 716        unsigned qp_inuse = 0;
 717
 718        for (n = 0; n < dd->num_pports; n++) {
 719                struct hfi1_ibport *ibp = &dd->pport[n].ibport_data;
 720
 721                rcu_read_lock();
 722                if (rcu_dereference(ibp->rvp.qp[0]))
 723                        qp_inuse++;
 724                if (rcu_dereference(ibp->rvp.qp[1]))
 725                        qp_inuse++;
 726                rcu_read_unlock();
 727        }
 728
 729        return qp_inuse;
 730}
 731
 732void flush_qp_waiters(struct rvt_qp *qp)
 733{
 734        lockdep_assert_held(&qp->s_lock);
 735        flush_iowait(qp);
 736        hfi1_tid_rdma_flush_wait(qp);
 737}
 738
 739void stop_send_queue(struct rvt_qp *qp)
 740{
 741        struct hfi1_qp_priv *priv = qp->priv;
 742
 743        iowait_cancel_work(&priv->s_iowait);
 744        if (cancel_work_sync(&priv->tid_rdma.trigger_work))
 745                rvt_put_qp(qp);
 746}
 747
 748void quiesce_qp(struct rvt_qp *qp)
 749{
 750        struct hfi1_qp_priv *priv = qp->priv;
 751
 752        hfi1_del_tid_reap_timer(qp);
 753        hfi1_del_tid_retry_timer(qp);
 754        iowait_sdma_drain(&priv->s_iowait);
 755        qp_pio_drain(qp);
 756        flush_tx_list(qp);
 757}
 758
 759void notify_qp_reset(struct rvt_qp *qp)
 760{
 761        hfi1_qp_kern_exp_rcv_clear_all(qp);
 762        qp->r_adefered = 0;
 763        clear_ahg(qp);
 764
 765        /* Clear any OPFN state */
 766        if (qp->ibqp.qp_type == IB_QPT_RC)
 767                opfn_conn_error(qp);
 768}
 769
 770/*
 771 * Switch to alternate path.
 772 * The QP s_lock should be held and interrupts disabled.
 773 */
 774void hfi1_migrate_qp(struct rvt_qp *qp)
 775{
 776        struct hfi1_qp_priv *priv = qp->priv;
 777        struct ib_event ev;
 778
 779        qp->s_mig_state = IB_MIG_MIGRATED;
 780        qp->remote_ah_attr = qp->alt_ah_attr;
 781        qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
 782        qp->s_pkey_index = qp->s_alt_pkey_index;
 783        qp->s_flags |= HFI1_S_AHG_CLEAR;
 784        priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
 785        priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
 786        qp_set_16b(qp);
 787
 788        ev.device = qp->ibqp.device;
 789        ev.element.qp = &qp->ibqp;
 790        ev.event = IB_EVENT_PATH_MIG;
 791        qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
 792}
 793
 794int mtu_to_path_mtu(u32 mtu)
 795{
 796        return mtu_to_enum(mtu, OPA_MTU_8192);
 797}
 798
 799u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
 800{
 801        u32 mtu;
 802        struct hfi1_ibdev *verbs_dev = container_of(rdi,
 803                                                    struct hfi1_ibdev,
 804                                                    rdi);
 805        struct hfi1_devdata *dd = container_of(verbs_dev,
 806                                               struct hfi1_devdata,
 807                                               verbs_dev);
 808        struct hfi1_ibport *ibp;
 809        u8 sc, vl;
 810
 811        ibp = &dd->pport[qp->port_num - 1].ibport_data;
 812        sc = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
 813        vl = sc_to_vlt(dd, sc);
 814
 815        mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu);
 816        if (vl < PER_VL_SEND_CONTEXTS)
 817                mtu = min_t(u32, mtu, dd->vld[vl].mtu);
 818        return mtu;
 819}
 820
 821int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 822                       struct ib_qp_attr *attr)
 823{
 824        int mtu, pidx = qp->port_num - 1;
 825        struct hfi1_ibdev *verbs_dev = container_of(rdi,
 826                                                    struct hfi1_ibdev,
 827                                                    rdi);
 828        struct hfi1_devdata *dd = container_of(verbs_dev,
 829                                               struct hfi1_devdata,
 830                                               verbs_dev);
 831        mtu = verbs_mtu_enum_to_int(qp->ibqp.device, attr->path_mtu);
 832        if (mtu == -1)
 833                return -1; /* values less than 0 are error */
 834
 835        if (mtu > dd->pport[pidx].ibmtu)
 836                return mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048);
 837        else
 838                return attr->path_mtu;
 839}
 840
 841void notify_error_qp(struct rvt_qp *qp)
 842{
 843        struct hfi1_qp_priv *priv = qp->priv;
 844        seqlock_t *lock = priv->s_iowait.lock;
 845
 846        if (lock) {
 847                write_seqlock(lock);
 848                if (!list_empty(&priv->s_iowait.list) &&
 849                    !(qp->s_flags & RVT_S_BUSY) &&
 850                    !(priv->s_flags & RVT_S_BUSY)) {
 851                        qp->s_flags &= ~HFI1_S_ANY_WAIT_IO;
 852                        iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
 853                        iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
 854                        list_del_init(&priv->s_iowait.list);
 855                        priv->s_iowait.lock = NULL;
 856                        rvt_put_qp(qp);
 857                }
 858                write_sequnlock(lock);
 859        }
 860
 861        if (!(qp->s_flags & RVT_S_BUSY) && !(priv->s_flags & RVT_S_BUSY)) {
 862                qp->s_hdrwords = 0;
 863                if (qp->s_rdma_mr) {
 864                        rvt_put_mr(qp->s_rdma_mr);
 865                        qp->s_rdma_mr = NULL;
 866                }
 867                flush_tx_list(qp);
 868        }
 869}
 870
 871/**
 872 * hfi1_qp_iter_cb - callback for iterator
 873 * @qp: the qp
 874 * @v: the sl in low bits of v
 875 *
 876 * This is called from the iterator callback to work
 877 * on an individual qp.
 878 */
 879static void hfi1_qp_iter_cb(struct rvt_qp *qp, u64 v)
 880{
 881        int lastwqe;
 882        struct ib_event ev;
 883        struct hfi1_ibport *ibp =
 884                to_iport(qp->ibqp.device, qp->port_num);
 885        struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 886        u8 sl = (u8)v;
 887
 888        if (qp->port_num != ppd->port ||
 889            (qp->ibqp.qp_type != IB_QPT_UC &&
 890             qp->ibqp.qp_type != IB_QPT_RC) ||
 891            rdma_ah_get_sl(&qp->remote_ah_attr) != sl ||
 892            !(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))
 893                return;
 894
 895        spin_lock_irq(&qp->r_lock);
 896        spin_lock(&qp->s_hlock);
 897        spin_lock(&qp->s_lock);
 898        lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 899        spin_unlock(&qp->s_lock);
 900        spin_unlock(&qp->s_hlock);
 901        spin_unlock_irq(&qp->r_lock);
 902        if (lastwqe) {
 903                ev.device = qp->ibqp.device;
 904                ev.element.qp = &qp->ibqp;
 905                ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
 906                qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
 907        }
 908}
 909
 910/**
 911 * hfi1_error_port_qps - put a port's RC/UC qps into error state
 912 * @ibp: the ibport.
 913 * @sl: the service level.
 914 *
 915 * This function places all RC/UC qps with a given service level into error
 916 * state. It is generally called to force upper lay apps to abandon stale qps
 917 * after an sl->sc mapping change.
 918 */
 919void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl)
 920{
 921        struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 922        struct hfi1_ibdev *dev = &ppd->dd->verbs_dev;
 923
 924        rvt_qp_iter(&dev->rdi, sl, hfi1_qp_iter_cb);
 925}
 926