linux/drivers/staging/rdma/hfi1/qp.c
<<
>>
Prefs
   1/*
   2 * Copyright(c) 2015, 2016 Intel Corporation.
   3 *
   4 * This file is provided under a dual BSD/GPLv2 license.  When using or
   5 * redistributing this file, you may do so under either license.
   6 *
   7 * GPL LICENSE SUMMARY
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of version 2 of the GNU General Public License as
  11 * published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful, but
  14 * WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * BSD LICENSE
  19 *
  20 * Redistribution and use in source and binary forms, with or without
  21 * modification, are permitted provided that the following conditions
  22 * are met:
  23 *
  24 *  - Redistributions of source code must retain the above copyright
  25 *    notice, this list of conditions and the following disclaimer.
  26 *  - Redistributions in binary form must reproduce the above copyright
  27 *    notice, this list of conditions and the following disclaimer in
  28 *    the documentation and/or other materials provided with the
  29 *    distribution.
  30 *  - Neither the name of Intel Corporation nor the names of its
  31 *    contributors may be used to endorse or promote products derived
  32 *    from this software without specific prior written permission.
  33 *
  34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45 *
  46 */
  47
  48#include <linux/err.h>
  49#include <linux/vmalloc.h>
  50#include <linux/hash.h>
  51#include <linux/module.h>
  52#include <linux/random.h>
  53#include <linux/seq_file.h>
  54#include <rdma/rdma_vt.h>
  55#include <rdma/rdmavt_qp.h>
  56
  57#include "hfi.h"
  58#include "qp.h"
  59#include "trace.h"
  60#include "verbs_txreq.h"
  61
  62unsigned int hfi1_qp_table_size = 256;
  63module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO);
  64MODULE_PARM_DESC(qp_table_size, "QP table size");
  65
  66static void flush_tx_list(struct rvt_qp *qp);
  67static int iowait_sleep(
  68        struct sdma_engine *sde,
  69        struct iowait *wait,
  70        struct sdma_txreq *stx,
  71        unsigned seq);
  72static void iowait_wakeup(struct iowait *wait, int reason);
  73static void iowait_sdma_drained(struct iowait *wait);
  74static void qp_pio_drain(struct rvt_qp *qp);
  75
  76static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
  77                              struct rvt_qpn_map *map, unsigned off)
  78{
  79        return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
  80}
  81
  82/*
  83 * Convert the AETH credit code into the number of credits.
  84 */
  85static const u16 credit_table[31] = {
  86        0,                      /* 0 */
  87        1,                      /* 1 */
  88        2,                      /* 2 */
  89        3,                      /* 3 */
  90        4,                      /* 4 */
  91        6,                      /* 5 */
  92        8,                      /* 6 */
  93        12,                     /* 7 */
  94        16,                     /* 8 */
  95        24,                     /* 9 */
  96        32,                     /* A */
  97        48,                     /* B */
  98        64,                     /* C */
  99        96,                     /* D */
 100        128,                    /* E */
 101        192,                    /* F */
 102        256,                    /* 10 */
 103        384,                    /* 11 */
 104        512,                    /* 12 */
 105        768,                    /* 13 */
 106        1024,                   /* 14 */
 107        1536,                   /* 15 */
 108        2048,                   /* 16 */
 109        3072,                   /* 17 */
 110        4096,                   /* 18 */
 111        6144,                   /* 19 */
 112        8192,                   /* 1A */
 113        12288,                  /* 1B */
 114        16384,                  /* 1C */
 115        24576,                  /* 1D */
 116        32768                   /* 1E */
 117};
 118
 119static void flush_tx_list(struct rvt_qp *qp)
 120{
 121        struct hfi1_qp_priv *priv = qp->priv;
 122
 123        while (!list_empty(&priv->s_iowait.tx_head)) {
 124                struct sdma_txreq *tx;
 125
 126                tx = list_first_entry(
 127                        &priv->s_iowait.tx_head,
 128                        struct sdma_txreq,
 129                        list);
 130                list_del_init(&tx->list);
 131                hfi1_put_txreq(
 132                        container_of(tx, struct verbs_txreq, txreq));
 133        }
 134}
 135
 136static void flush_iowait(struct rvt_qp *qp)
 137{
 138        struct hfi1_qp_priv *priv = qp->priv;
 139        struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
 140        unsigned long flags;
 141
 142        write_seqlock_irqsave(&dev->iowait_lock, flags);
 143        if (!list_empty(&priv->s_iowait.list)) {
 144                list_del_init(&priv->s_iowait.list);
 145                if (atomic_dec_and_test(&qp->refcount))
 146                        wake_up(&qp->wait);
 147        }
 148        write_sequnlock_irqrestore(&dev->iowait_lock, flags);
 149}
 150
 151static inline int opa_mtu_enum_to_int(int mtu)
 152{
 153        switch (mtu) {
 154        case OPA_MTU_8192:  return 8192;
 155        case OPA_MTU_10240: return 10240;
 156        default:            return -1;
 157        }
 158}
 159
 160/**
 161 * This function is what we would push to the core layer if we wanted to be a
 162 * "first class citizen".  Instead we hide this here and rely on Verbs ULPs
 163 * to blindly pass the MTU enum value from the PathRecord to us.
 164 *
 165 * The actual flag used to determine "8k MTU" will change and is currently
 166 * unknown.
 167 */
 168static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
 169{
 170        int val = opa_mtu_enum_to_int((int)mtu);
 171
 172        if (val > 0)
 173                return val;
 174        return ib_mtu_enum_to_int(mtu);
 175}
 176
 177int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
 178                         int attr_mask, struct ib_udata *udata)
 179{
 180        struct ib_qp *ibqp = &qp->ibqp;
 181        struct hfi1_ibdev *dev = to_idev(ibqp->device);
 182        struct hfi1_devdata *dd = dd_from_dev(dev);
 183        u8 sc;
 184
 185        if (attr_mask & IB_QP_AV) {
 186                sc = ah_to_sc(ibqp->device, &attr->ah_attr);
 187                if (sc == 0xf)
 188                        return -EINVAL;
 189
 190                if (!qp_to_sdma_engine(qp, sc) &&
 191                    dd->flags & HFI1_HAS_SEND_DMA)
 192                        return -EINVAL;
 193
 194                if (!qp_to_send_context(qp, sc))
 195                        return -EINVAL;
 196        }
 197
 198        if (attr_mask & IB_QP_ALT_PATH) {
 199                sc = ah_to_sc(ibqp->device, &attr->alt_ah_attr);
 200                if (sc == 0xf)
 201                        return -EINVAL;
 202
 203                if (!qp_to_sdma_engine(qp, sc) &&
 204                    dd->flags & HFI1_HAS_SEND_DMA)
 205                        return -EINVAL;
 206
 207                if (!qp_to_send_context(qp, sc))
 208                        return -EINVAL;
 209        }
 210
 211        return 0;
 212}
 213
 214void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
 215                    int attr_mask, struct ib_udata *udata)
 216{
 217        struct ib_qp *ibqp = &qp->ibqp;
 218        struct hfi1_qp_priv *priv = qp->priv;
 219
 220        if (attr_mask & IB_QP_AV) {
 221                priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
 222                priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
 223                priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
 224        }
 225
 226        if (attr_mask & IB_QP_PATH_MIG_STATE &&
 227            attr->path_mig_state == IB_MIG_MIGRATED &&
 228            qp->s_mig_state == IB_MIG_ARMED) {
 229                qp->s_flags |= RVT_S_AHG_CLEAR;
 230                priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
 231                priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
 232                priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
 233        }
 234}
 235
 236/**
 237 * hfi1_check_send_wqe - validate wqe
 238 * @qp - The qp
 239 * @wqe - The built wqe
 240 *
 241 * validate wqe.  This is called
 242 * prior to inserting the wqe into
 243 * the ring but after the wqe has been
 244 * setup.
 245 *
 246 * Returns 0 on success, -EINVAL on failure
 247 *
 248 */
 249int hfi1_check_send_wqe(struct rvt_qp *qp,
 250                        struct rvt_swqe *wqe)
 251{
 252        struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 253        struct rvt_ah *ah;
 254
 255        switch (qp->ibqp.qp_type) {
 256        case IB_QPT_RC:
 257        case IB_QPT_UC:
 258                if (wqe->length > 0x80000000U)
 259                        return -EINVAL;
 260                break;
 261        case IB_QPT_SMI:
 262                ah = ibah_to_rvtah(wqe->ud_wr.ah);
 263                if (wqe->length > (1 << ah->log_pmtu))
 264                        return -EINVAL;
 265                break;
 266        case IB_QPT_GSI:
 267        case IB_QPT_UD:
 268                ah = ibah_to_rvtah(wqe->ud_wr.ah);
 269                if (wqe->length > (1 << ah->log_pmtu))
 270                        return -EINVAL;
 271                if (ibp->sl_to_sc[ah->attr.sl] == 0xf)
 272                        return -EINVAL;
 273        default:
 274                break;
 275        }
 276        return wqe->length <= piothreshold;
 277}
 278
 279/**
 280 * hfi1_compute_aeth - compute the AETH (syndrome + MSN)
 281 * @qp: the queue pair to compute the AETH for
 282 *
 283 * Returns the AETH.
 284 */
 285__be32 hfi1_compute_aeth(struct rvt_qp *qp)
 286{
 287        u32 aeth = qp->r_msn & HFI1_MSN_MASK;
 288
 289        if (qp->ibqp.srq) {
 290                /*
 291                 * Shared receive queues don't generate credits.
 292                 * Set the credit field to the invalid value.
 293                 */
 294                aeth |= HFI1_AETH_CREDIT_INVAL << HFI1_AETH_CREDIT_SHIFT;
 295        } else {
 296                u32 min, max, x;
 297                u32 credits;
 298                struct rvt_rwq *wq = qp->r_rq.wq;
 299                u32 head;
 300                u32 tail;
 301
 302                /* sanity check pointers before trusting them */
 303                head = wq->head;
 304                if (head >= qp->r_rq.size)
 305                        head = 0;
 306                tail = wq->tail;
 307                if (tail >= qp->r_rq.size)
 308                        tail = 0;
 309                /*
 310                 * Compute the number of credits available (RWQEs).
 311                 * There is a small chance that the pair of reads are
 312                 * not atomic, which is OK, since the fuzziness is
 313                 * resolved as further ACKs go out.
 314                 */
 315                credits = head - tail;
 316                if ((int)credits < 0)
 317                        credits += qp->r_rq.size;
 318                /*
 319                 * Binary search the credit table to find the code to
 320                 * use.
 321                 */
 322                min = 0;
 323                max = 31;
 324                for (;;) {
 325                        x = (min + max) / 2;
 326                        if (credit_table[x] == credits)
 327                                break;
 328                        if (credit_table[x] > credits) {
 329                                max = x;
 330                        } else {
 331                                if (min == x)
 332                                        break;
 333                                min = x;
 334                        }
 335                }
 336                aeth |= x << HFI1_AETH_CREDIT_SHIFT;
 337        }
 338        return cpu_to_be32(aeth);
 339}
 340
 341/**
 342 * _hfi1_schedule_send - schedule progress
 343 * @qp: the QP
 344 *
 345 * This schedules qp progress w/o regard to the s_flags.
 346 *
 347 * It is only used in the post send, which doesn't hold
 348 * the s_lock.
 349 */
 350void _hfi1_schedule_send(struct rvt_qp *qp)
 351{
 352        struct hfi1_qp_priv *priv = qp->priv;
 353        struct hfi1_ibport *ibp =
 354                to_iport(qp->ibqp.device, qp->port_num);
 355        struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 356        struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 357
 358        iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
 359                        priv->s_sde ?
 360                        priv->s_sde->cpu :
 361                        cpumask_first(cpumask_of_node(dd->node)));
 362}
 363
 364static void qp_pio_drain(struct rvt_qp *qp)
 365{
 366        struct hfi1_ibdev *dev;
 367        struct hfi1_qp_priv *priv = qp->priv;
 368
 369        if (!priv->s_sendcontext)
 370                return;
 371        dev = to_idev(qp->ibqp.device);
 372        while (iowait_pio_pending(&priv->s_iowait)) {
 373                write_seqlock_irq(&dev->iowait_lock);
 374                hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1);
 375                write_sequnlock_irq(&dev->iowait_lock);
 376                iowait_pio_drain(&priv->s_iowait);
 377                write_seqlock_irq(&dev->iowait_lock);
 378                hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0);
 379                write_sequnlock_irq(&dev->iowait_lock);
 380        }
 381}
 382
 383/**
 384 * hfi1_schedule_send - schedule progress
 385 * @qp: the QP
 386 *
 387 * This schedules qp progress and caller should hold
 388 * the s_lock.
 389 */
 390void hfi1_schedule_send(struct rvt_qp *qp)
 391{
 392        if (hfi1_send_ok(qp))
 393                _hfi1_schedule_send(qp);
 394}
 395
 396/**
 397 * hfi1_get_credit - flush the send work queue of a QP
 398 * @qp: the qp who's send work queue to flush
 399 * @aeth: the Acknowledge Extended Transport Header
 400 *
 401 * The QP s_lock should be held.
 402 */
 403void hfi1_get_credit(struct rvt_qp *qp, u32 aeth)
 404{
 405        u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK;
 406
 407        /*
 408         * If the credit is invalid, we can send
 409         * as many packets as we like.  Otherwise, we have to
 410         * honor the credit field.
 411         */
 412        if (credit == HFI1_AETH_CREDIT_INVAL) {
 413                if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
 414                        qp->s_flags |= RVT_S_UNLIMITED_CREDIT;
 415                        if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
 416                                qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
 417                                hfi1_schedule_send(qp);
 418                        }
 419                }
 420        } else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
 421                /* Compute new LSN (i.e., MSN + credit) */
 422                credit = (aeth + credit_table[credit]) & HFI1_MSN_MASK;
 423                if (cmp_msn(credit, qp->s_lsn) > 0) {
 424                        qp->s_lsn = credit;
 425                        if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
 426                                qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
 427                                hfi1_schedule_send(qp);
 428                        }
 429                }
 430        }
 431}
 432
 433void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
 434{
 435        unsigned long flags;
 436
 437        spin_lock_irqsave(&qp->s_lock, flags);
 438        if (qp->s_flags & flag) {
 439                qp->s_flags &= ~flag;
 440                trace_hfi1_qpwakeup(qp, flag);
 441                hfi1_schedule_send(qp);
 442        }
 443        spin_unlock_irqrestore(&qp->s_lock, flags);
 444        /* Notify hfi1_destroy_qp() if it is waiting. */
 445        if (atomic_dec_and_test(&qp->refcount))
 446                wake_up(&qp->wait);
 447}
 448
 449static int iowait_sleep(
 450        struct sdma_engine *sde,
 451        struct iowait *wait,
 452        struct sdma_txreq *stx,
 453        unsigned seq)
 454{
 455        struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq);
 456        struct rvt_qp *qp;
 457        struct hfi1_qp_priv *priv;
 458        unsigned long flags;
 459        int ret = 0;
 460        struct hfi1_ibdev *dev;
 461
 462        qp = tx->qp;
 463        priv = qp->priv;
 464
 465        spin_lock_irqsave(&qp->s_lock, flags);
 466        if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 467                /*
 468                 * If we couldn't queue the DMA request, save the info
 469                 * and try again later rather than destroying the
 470                 * buffer and undoing the side effects of the copy.
 471                 */
 472                /* Make a common routine? */
 473                dev = &sde->dd->verbs_dev;
 474                list_add_tail(&stx->list, &wait->tx_head);
 475                write_seqlock(&dev->iowait_lock);
 476                if (sdma_progress(sde, seq, stx))
 477                        goto eagain;
 478                if (list_empty(&priv->s_iowait.list)) {
 479                        struct hfi1_ibport *ibp =
 480                                to_iport(qp->ibqp.device, qp->port_num);
 481
 482                        ibp->rvp.n_dmawait++;
 483                        qp->s_flags |= RVT_S_WAIT_DMA_DESC;
 484                        list_add_tail(&priv->s_iowait.list, &sde->dmawait);
 485                        trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
 486                        atomic_inc(&qp->refcount);
 487                }
 488                write_sequnlock(&dev->iowait_lock);
 489                qp->s_flags &= ~RVT_S_BUSY;
 490                spin_unlock_irqrestore(&qp->s_lock, flags);
 491                ret = -EBUSY;
 492        } else {
 493                spin_unlock_irqrestore(&qp->s_lock, flags);
 494                hfi1_put_txreq(tx);
 495        }
 496        return ret;
 497eagain:
 498        write_sequnlock(&dev->iowait_lock);
 499        spin_unlock_irqrestore(&qp->s_lock, flags);
 500        list_del_init(&stx->list);
 501        return -EAGAIN;
 502}
 503
 504static void iowait_wakeup(struct iowait *wait, int reason)
 505{
 506        struct rvt_qp *qp = iowait_to_qp(wait);
 507
 508        WARN_ON(reason != SDMA_AVAIL_REASON);
 509        hfi1_qp_wakeup(qp, RVT_S_WAIT_DMA_DESC);
 510}
 511
 512static void iowait_sdma_drained(struct iowait *wait)
 513{
 514        struct rvt_qp *qp = iowait_to_qp(wait);
 515
 516        /*
 517         * This happens when the send engine notes
 518         * a QP in the error state and cannot
 519         * do the flush work until that QP's
 520         * sdma work has finished.
 521         */
 522        spin_lock(&qp->s_lock);
 523        if (qp->s_flags & RVT_S_WAIT_DMA) {
 524                qp->s_flags &= ~RVT_S_WAIT_DMA;
 525                hfi1_schedule_send(qp);
 526        }
 527        spin_unlock(&qp->s_lock);
 528}
 529
 530/**
 531 *
 532 * qp_to_sdma_engine - map a qp to a send engine
 533 * @qp: the QP
 534 * @sc5: the 5 bit sc
 535 *
 536 * Return:
 537 * A send engine for the qp or NULL for SMI type qp.
 538 */
 539struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5)
 540{
 541        struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 542        struct sdma_engine *sde;
 543
 544        if (!(dd->flags & HFI1_HAS_SEND_DMA))
 545                return NULL;
 546        switch (qp->ibqp.qp_type) {
 547        case IB_QPT_SMI:
 548                return NULL;
 549        default:
 550                break;
 551        }
 552        sde = sdma_select_engine_sc(dd, qp->ibqp.qp_num >> dd->qos_shift, sc5);
 553        return sde;
 554}
 555
 556/*
 557 * qp_to_send_context - map a qp to a send context
 558 * @qp: the QP
 559 * @sc5: the 5 bit sc
 560 *
 561 * Return:
 562 * A send context for the qp
 563 */
 564struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5)
 565{
 566        struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 567
 568        switch (qp->ibqp.qp_type) {
 569        case IB_QPT_SMI:
 570                /* SMA packets to VL15 */
 571                return dd->vld[15].sc;
 572        default:
 573                break;
 574        }
 575
 576        return pio_select_send_context_sc(dd, qp->ibqp.qp_num >> dd->qos_shift,
 577                                          sc5);
 578}
 579
 580struct qp_iter {
 581        struct hfi1_ibdev *dev;
 582        struct rvt_qp *qp;
 583        int specials;
 584        int n;
 585};
 586
 587struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev)
 588{
 589        struct qp_iter *iter;
 590
 591        iter = kzalloc(sizeof(*iter), GFP_KERNEL);
 592        if (!iter)
 593                return NULL;
 594
 595        iter->dev = dev;
 596        iter->specials = dev->rdi.ibdev.phys_port_cnt * 2;
 597        if (qp_iter_next(iter)) {
 598                kfree(iter);
 599                return NULL;
 600        }
 601
 602        return iter;
 603}
 604
 605int qp_iter_next(struct qp_iter *iter)
 606{
 607        struct hfi1_ibdev *dev = iter->dev;
 608        int n = iter->n;
 609        int ret = 1;
 610        struct rvt_qp *pqp = iter->qp;
 611        struct rvt_qp *qp;
 612
 613        /*
 614         * The approach is to consider the special qps
 615         * as an additional table entries before the
 616         * real hash table.  Since the qp code sets
 617         * the qp->next hash link to NULL, this works just fine.
 618         *
 619         * iter->specials is 2 * # ports
 620         *
 621         * n = 0..iter->specials is the special qp indices
 622         *
 623         * n = iter->specials..dev->rdi.qp_dev->qp_table_size+iter->specials are
 624         * the potential hash bucket entries
 625         *
 626         */
 627        for (; n <  dev->rdi.qp_dev->qp_table_size + iter->specials; n++) {
 628                if (pqp) {
 629                        qp = rcu_dereference(pqp->next);
 630                } else {
 631                        if (n < iter->specials) {
 632                                struct hfi1_pportdata *ppd;
 633                                struct hfi1_ibport *ibp;
 634                                int pidx;
 635
 636                                pidx = n % dev->rdi.ibdev.phys_port_cnt;
 637                                ppd = &dd_from_dev(dev)->pport[pidx];
 638                                ibp = &ppd->ibport_data;
 639
 640                                if (!(n & 1))
 641                                        qp = rcu_dereference(ibp->rvp.qp[0]);
 642                                else
 643                                        qp = rcu_dereference(ibp->rvp.qp[1]);
 644                        } else {
 645                                qp = rcu_dereference(
 646                                        dev->rdi.qp_dev->qp_table[
 647                                                (n - iter->specials)]);
 648                        }
 649                }
 650                pqp = qp;
 651                if (qp) {
 652                        iter->qp = qp;
 653                        iter->n = n;
 654                        return 0;
 655                }
 656        }
 657        return ret;
 658}
 659
 660static const char * const qp_type_str[] = {
 661        "SMI", "GSI", "RC", "UC", "UD",
 662};
 663
 664static int qp_idle(struct rvt_qp *qp)
 665{
 666        return
 667                qp->s_last == qp->s_acked &&
 668                qp->s_acked == qp->s_cur &&
 669                qp->s_cur == qp->s_tail &&
 670                qp->s_tail == qp->s_head;
 671}
 672
 673void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
 674{
 675        struct rvt_swqe *wqe;
 676        struct rvt_qp *qp = iter->qp;
 677        struct hfi1_qp_priv *priv = qp->priv;
 678        struct sdma_engine *sde;
 679        struct send_context *send_context;
 680
 681        sde = qp_to_sdma_engine(qp, priv->s_sc);
 682        wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 683        send_context = qp_to_send_context(qp, priv->s_sc);
 684        seq_printf(s,
 685                   "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n",
 686                   iter->n,
 687                   qp_idle(qp) ? "I" : "B",
 688                   qp->ibqp.qp_num,
 689                   atomic_read(&qp->refcount),
 690                   qp_type_str[qp->ibqp.qp_type],
 691                   qp->state,
 692                   wqe ? wqe->wr.opcode : 0,
 693                   qp->s_hdrwords,
 694                   qp->s_flags,
 695                   iowait_sdma_pending(&priv->s_iowait),
 696                   iowait_pio_pending(&priv->s_iowait),
 697                   !list_empty(&priv->s_iowait.list),
 698                   qp->timeout,
 699                   wqe ? wqe->ssn : 0,
 700                   qp->s_lsn,
 701                   qp->s_last_psn,
 702                   qp->s_psn, qp->s_next_psn,
 703                   qp->s_sending_psn, qp->s_sending_hpsn,
 704                   qp->s_last, qp->s_acked, qp->s_cur,
 705                   qp->s_tail, qp->s_head, qp->s_size,
 706                   qp->s_avail,
 707                   qp->remote_qpn,
 708                   qp->remote_ah_attr.dlid,
 709                   qp->remote_ah_attr.sl,
 710                   qp->pmtu,
 711                   qp->s_retry,
 712                   qp->s_retry_cnt,
 713                   qp->s_rnr_retry_cnt,
 714                   sde,
 715                   sde ? sde->this_idx : 0,
 716                   send_context,
 717                   send_context ? send_context->sw_index : 0,
 718                   ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head,
 719                   ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail,
 720                   qp->pid);
 721}
 722
 723void qp_comm_est(struct rvt_qp *qp)
 724{
 725        qp->r_flags |= RVT_R_COMM_EST;
 726        if (qp->ibqp.event_handler) {
 727                struct ib_event ev;
 728
 729                ev.device = qp->ibqp.device;
 730                ev.element.qp = &qp->ibqp;
 731                ev.event = IB_EVENT_COMM_EST;
 732                qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
 733        }
 734}
 735
 736void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 737                    gfp_t gfp)
 738{
 739        struct hfi1_qp_priv *priv;
 740
 741        priv = kzalloc_node(sizeof(*priv), gfp, rdi->dparms.node);
 742        if (!priv)
 743                return ERR_PTR(-ENOMEM);
 744
 745        priv->owner = qp;
 746
 747        priv->s_hdr = kzalloc_node(sizeof(*priv->s_hdr), gfp, rdi->dparms.node);
 748        if (!priv->s_hdr) {
 749                kfree(priv);
 750                return ERR_PTR(-ENOMEM);
 751        }
 752        setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp);
 753        qp->s_timer.function = hfi1_rc_timeout;
 754        return priv;
 755}
 756
 757void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
 758{
 759        struct hfi1_qp_priv *priv = qp->priv;
 760
 761        kfree(priv->s_hdr);
 762        kfree(priv);
 763}
 764
 765unsigned free_all_qps(struct rvt_dev_info *rdi)
 766{
 767        struct hfi1_ibdev *verbs_dev = container_of(rdi,
 768                                                    struct hfi1_ibdev,
 769                                                    rdi);
 770        struct hfi1_devdata *dd = container_of(verbs_dev,
 771                                               struct hfi1_devdata,
 772                                               verbs_dev);
 773        int n;
 774        unsigned qp_inuse = 0;
 775
 776        for (n = 0; n < dd->num_pports; n++) {
 777                struct hfi1_ibport *ibp = &dd->pport[n].ibport_data;
 778
 779                rcu_read_lock();
 780                if (rcu_dereference(ibp->rvp.qp[0]))
 781                        qp_inuse++;
 782                if (rcu_dereference(ibp->rvp.qp[1]))
 783                        qp_inuse++;
 784                rcu_read_unlock();
 785        }
 786
 787        return qp_inuse;
 788}
 789
 790void flush_qp_waiters(struct rvt_qp *qp)
 791{
 792        flush_iowait(qp);
 793        hfi1_stop_rc_timers(qp);
 794}
 795
 796void stop_send_queue(struct rvt_qp *qp)
 797{
 798        struct hfi1_qp_priv *priv = qp->priv;
 799
 800        cancel_work_sync(&priv->s_iowait.iowork);
 801        hfi1_del_timers_sync(qp);
 802}
 803
 804void quiesce_qp(struct rvt_qp *qp)
 805{
 806        struct hfi1_qp_priv *priv = qp->priv;
 807
 808        iowait_sdma_drain(&priv->s_iowait);
 809        qp_pio_drain(qp);
 810        flush_tx_list(qp);
 811}
 812
 813void notify_qp_reset(struct rvt_qp *qp)
 814{
 815        struct hfi1_qp_priv *priv = qp->priv;
 816
 817        iowait_init(
 818                &priv->s_iowait,
 819                1,
 820                _hfi1_do_send,
 821                iowait_sleep,
 822                iowait_wakeup,
 823                iowait_sdma_drained);
 824        priv->r_adefered = 0;
 825        clear_ahg(qp);
 826}
 827
 828/*
 829 * Switch to alternate path.
 830 * The QP s_lock should be held and interrupts disabled.
 831 */
 832void hfi1_migrate_qp(struct rvt_qp *qp)
 833{
 834        struct hfi1_qp_priv *priv = qp->priv;
 835        struct ib_event ev;
 836
 837        qp->s_mig_state = IB_MIG_MIGRATED;
 838        qp->remote_ah_attr = qp->alt_ah_attr;
 839        qp->port_num = qp->alt_ah_attr.port_num;
 840        qp->s_pkey_index = qp->s_alt_pkey_index;
 841        qp->s_flags |= RVT_S_AHG_CLEAR;
 842        priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
 843        priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
 844
 845        ev.device = qp->ibqp.device;
 846        ev.element.qp = &qp->ibqp;
 847        ev.event = IB_EVENT_PATH_MIG;
 848        qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
 849}
 850
 851int mtu_to_path_mtu(u32 mtu)
 852{
 853        return mtu_to_enum(mtu, OPA_MTU_8192);
 854}
 855
 856u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
 857{
 858        u32 mtu;
 859        struct hfi1_ibdev *verbs_dev = container_of(rdi,
 860                                                    struct hfi1_ibdev,
 861                                                    rdi);
 862        struct hfi1_devdata *dd = container_of(verbs_dev,
 863                                               struct hfi1_devdata,
 864                                               verbs_dev);
 865        struct hfi1_ibport *ibp;
 866        u8 sc, vl;
 867
 868        ibp = &dd->pport[qp->port_num - 1].ibport_data;
 869        sc = ibp->sl_to_sc[qp->remote_ah_attr.sl];
 870        vl = sc_to_vlt(dd, sc);
 871
 872        mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu);
 873        if (vl < PER_VL_SEND_CONTEXTS)
 874                mtu = min_t(u32, mtu, dd->vld[vl].mtu);
 875        return mtu;
 876}
 877
 878int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 879                       struct ib_qp_attr *attr)
 880{
 881        int mtu, pidx = qp->port_num - 1;
 882        struct hfi1_ibdev *verbs_dev = container_of(rdi,
 883                                                    struct hfi1_ibdev,
 884                                                    rdi);
 885        struct hfi1_devdata *dd = container_of(verbs_dev,
 886                                               struct hfi1_devdata,
 887                                               verbs_dev);
 888        mtu = verbs_mtu_enum_to_int(qp->ibqp.device, attr->path_mtu);
 889        if (mtu == -1)
 890                return -1; /* values less than 0 are error */
 891
 892        if (mtu > dd->pport[pidx].ibmtu)
 893                return mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048);
 894        else
 895                return attr->path_mtu;
 896}
 897
 898void notify_error_qp(struct rvt_qp *qp)
 899{
 900        struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
 901        struct hfi1_qp_priv *priv = qp->priv;
 902
 903        write_seqlock(&dev->iowait_lock);
 904        if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
 905                qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
 906                list_del_init(&priv->s_iowait.list);
 907                if (atomic_dec_and_test(&qp->refcount))
 908                        wake_up(&qp->wait);
 909        }
 910        write_sequnlock(&dev->iowait_lock);
 911
 912        if (!(qp->s_flags & RVT_S_BUSY)) {
 913                qp->s_hdrwords = 0;
 914                if (qp->s_rdma_mr) {
 915                        rvt_put_mr(qp->s_rdma_mr);
 916                        qp->s_rdma_mr = NULL;
 917                }
 918                flush_tx_list(qp);
 919        }
 920}
 921
 922/**
 923 * hfi1_error_port_qps - put a port's RC/UC qps into error state
 924 * @ibp: the ibport.
 925 * @sl: the service level.
 926 *
 927 * This function places all RC/UC qps with a given service level into error
 928 * state. It is generally called to force upper lay apps to abandon stale qps
 929 * after an sl->sc mapping change.
 930 */
 931void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl)
 932{
 933        struct rvt_qp *qp = NULL;
 934        struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 935        struct hfi1_ibdev *dev = &ppd->dd->verbs_dev;
 936        int n;
 937        int lastwqe;
 938        struct ib_event ev;
 939
 940        rcu_read_lock();
 941
 942        /* Deal only with RC/UC qps that use the given SL. */
 943        for (n = 0; n < dev->rdi.qp_dev->qp_table_size; n++) {
 944                for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp;
 945                        qp = rcu_dereference(qp->next)) {
 946                        if (qp->port_num == ppd->port &&
 947                            (qp->ibqp.qp_type == IB_QPT_UC ||
 948                             qp->ibqp.qp_type == IB_QPT_RC) &&
 949                            qp->remote_ah_attr.sl == sl &&
 950                            (ib_rvt_state_ops[qp->state] &
 951                             RVT_POST_SEND_OK)) {
 952                                spin_lock_irq(&qp->r_lock);
 953                                spin_lock(&qp->s_hlock);
 954                                spin_lock(&qp->s_lock);
 955                                lastwqe = rvt_error_qp(qp,
 956                                                       IB_WC_WR_FLUSH_ERR);
 957                                spin_unlock(&qp->s_lock);
 958                                spin_unlock(&qp->s_hlock);
 959                                spin_unlock_irq(&qp->r_lock);
 960                                if (lastwqe) {
 961                                        ev.device = qp->ibqp.device;
 962                                        ev.element.qp = &qp->ibqp;
 963                                        ev.event =
 964                                                IB_EVENT_QP_LAST_WQE_REACHED;
 965                                        qp->ibqp.event_handler(&ev,
 966                                                qp->ibqp.qp_context);
 967                                }
 968                        }
 969                }
 970        }
 971
 972        rcu_read_unlock();
 973}
 974