linux/drivers/infiniband/hw/qib/qib_verbs.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2012 - 2018 Intel Corporation.  All rights reserved.
   3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
   4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
   5 *
   6 * This software is available to you under a choice of one of two
   7 * licenses.  You may choose to be licensed under the terms of the GNU
   8 * General Public License (GPL) Version 2, available from the file
   9 * COPYING in the main directory of this source tree, or the
  10 * OpenIB.org BSD license below:
  11 *
  12 *     Redistribution and use in source and binary forms, with or
  13 *     without modification, are permitted provided that the following
  14 *     conditions are met:
  15 *
  16 *      - Redistributions of source code must retain the above
  17 *        copyright notice, this list of conditions and the following
  18 *        disclaimer.
  19 *
  20 *      - Redistributions in binary form must reproduce the above
  21 *        copyright notice, this list of conditions and the following
  22 *        disclaimer in the documentation and/or other materials
  23 *        provided with the distribution.
  24 *
  25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  32 * SOFTWARE.
  33 */
  34
  35#include <rdma/ib_mad.h>
  36#include <rdma/ib_user_verbs.h>
  37#include <linux/io.h>
  38#include <linux/module.h>
  39#include <linux/utsname.h>
  40#include <linux/rculist.h>
  41#include <linux/mm.h>
  42#include <linux/random.h>
  43#include <linux/vmalloc.h>
  44#include <rdma/rdma_vt.h>
  45
  46#include "qib.h"
  47#include "qib_common.h"
  48
  49static unsigned int ib_qib_qp_table_size = 256;
  50module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO);
  51MODULE_PARM_DESC(qp_table_size, "QP table size");
  52
  53static unsigned int qib_lkey_table_size = 16;
  54module_param_named(lkey_table_size, qib_lkey_table_size, uint,
  55                   S_IRUGO);
  56MODULE_PARM_DESC(lkey_table_size,
  57                 "LKEY table size in bits (2^n, 1 <= n <= 23)");
  58
  59static unsigned int ib_qib_max_pds = 0xFFFF;
  60module_param_named(max_pds, ib_qib_max_pds, uint, S_IRUGO);
  61MODULE_PARM_DESC(max_pds,
  62                 "Maximum number of protection domains to support");
  63
  64static unsigned int ib_qib_max_ahs = 0xFFFF;
  65module_param_named(max_ahs, ib_qib_max_ahs, uint, S_IRUGO);
  66MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
  67
  68unsigned int ib_qib_max_cqes = 0x2FFFF;
  69module_param_named(max_cqes, ib_qib_max_cqes, uint, S_IRUGO);
  70MODULE_PARM_DESC(max_cqes,
  71                 "Maximum number of completion queue entries to support");
  72
  73unsigned int ib_qib_max_cqs = 0x1FFFF;
  74module_param_named(max_cqs, ib_qib_max_cqs, uint, S_IRUGO);
  75MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
  76
  77unsigned int ib_qib_max_qp_wrs = 0x3FFF;
  78module_param_named(max_qp_wrs, ib_qib_max_qp_wrs, uint, S_IRUGO);
  79MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
  80
  81unsigned int ib_qib_max_qps = 16384;
  82module_param_named(max_qps, ib_qib_max_qps, uint, S_IRUGO);
  83MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
  84
  85unsigned int ib_qib_max_sges = 0x60;
  86module_param_named(max_sges, ib_qib_max_sges, uint, S_IRUGO);
  87MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
  88
  89unsigned int ib_qib_max_mcast_grps = 16384;
  90module_param_named(max_mcast_grps, ib_qib_max_mcast_grps, uint, S_IRUGO);
  91MODULE_PARM_DESC(max_mcast_grps,
  92                 "Maximum number of multicast groups to support");
  93
  94unsigned int ib_qib_max_mcast_qp_attached = 16;
  95module_param_named(max_mcast_qp_attached, ib_qib_max_mcast_qp_attached,
  96                   uint, S_IRUGO);
  97MODULE_PARM_DESC(max_mcast_qp_attached,
  98                 "Maximum number of attached QPs to support");
  99
 100unsigned int ib_qib_max_srqs = 1024;
 101module_param_named(max_srqs, ib_qib_max_srqs, uint, S_IRUGO);
 102MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
 103
 104unsigned int ib_qib_max_srq_sges = 128;
 105module_param_named(max_srq_sges, ib_qib_max_srq_sges, uint, S_IRUGO);
 106MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
 107
 108unsigned int ib_qib_max_srq_wrs = 0x1FFFF;
 109module_param_named(max_srq_wrs, ib_qib_max_srq_wrs, uint, S_IRUGO);
 110MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
 111
 112static unsigned int ib_qib_disable_sma;
 113module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
 114MODULE_PARM_DESC(disable_sma, "Disable the SMA");
 115
 116/*
 117 * Translate ib_wr_opcode into ib_wc_opcode.
 118 */
 119const enum ib_wc_opcode ib_qib_wc_opcode[] = {
 120        [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
 121        [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
 122        [IB_WR_SEND] = IB_WC_SEND,
 123        [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
 124        [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
 125        [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
 126        [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
 127};
 128
 129/*
 130 * System image GUID.
 131 */
 132__be64 ib_qib_sys_image_guid;
 133
 134/*
 135 * Count the number of DMA descriptors needed to send length bytes of data.
 136 * Don't modify the qib_sge_state to get the count.
 137 * Return zero if any of the segments is not aligned.
 138 */
 139static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length)
 140{
 141        struct rvt_sge *sg_list = ss->sg_list;
 142        struct rvt_sge sge = ss->sge;
 143        u8 num_sge = ss->num_sge;
 144        u32 ndesc = 1;  /* count the header */
 145
 146        while (length) {
 147                u32 len = rvt_get_sge_length(&sge, length);
 148
 149                if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
 150                    (len != length && (len & (sizeof(u32) - 1)))) {
 151                        ndesc = 0;
 152                        break;
 153                }
 154                ndesc++;
 155                sge.vaddr += len;
 156                sge.length -= len;
 157                sge.sge_length -= len;
 158                if (sge.sge_length == 0) {
 159                        if (--num_sge)
 160                                sge = *sg_list++;
 161                } else if (sge.length == 0 && sge.mr->lkey) {
 162                        if (++sge.n >= RVT_SEGSZ) {
 163                                if (++sge.m >= sge.mr->mapsz)
 164                                        break;
 165                                sge.n = 0;
 166                        }
 167                        sge.vaddr =
 168                                sge.mr->map[sge.m]->segs[sge.n].vaddr;
 169                        sge.length =
 170                                sge.mr->map[sge.m]->segs[sge.n].length;
 171                }
 172                length -= len;
 173        }
 174        return ndesc;
 175}
 176
 177/*
 178 * Copy from the SGEs to the data buffer.
 179 */
 180static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length)
 181{
 182        struct rvt_sge *sge = &ss->sge;
 183
 184        while (length) {
 185                u32 len = rvt_get_sge_length(sge, length);
 186
 187                memcpy(data, sge->vaddr, len);
 188                sge->vaddr += len;
 189                sge->length -= len;
 190                sge->sge_length -= len;
 191                if (sge->sge_length == 0) {
 192                        if (--ss->num_sge)
 193                                *sge = *ss->sg_list++;
 194                } else if (sge->length == 0 && sge->mr->lkey) {
 195                        if (++sge->n >= RVT_SEGSZ) {
 196                                if (++sge->m >= sge->mr->mapsz)
 197                                        break;
 198                                sge->n = 0;
 199                        }
 200                        sge->vaddr =
 201                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 202                        sge->length =
 203                                sge->mr->map[sge->m]->segs[sge->n].length;
 204                }
 205                data += len;
 206                length -= len;
 207        }
 208}
 209
 210/**
 211 * qib_qp_rcv - processing an incoming packet on a QP
 212 * @rcd: the context pointer
 213 * @hdr: the packet header
 214 * @has_grh: true if the packet has a GRH
 215 * @data: the packet data
 216 * @tlen: the packet length
 217 * @qp: the QP the packet came on
 218 *
 219 * This is called from qib_ib_rcv() to process an incoming packet
 220 * for the given QP.
 221 * Called at interrupt level.
 222 */
 223static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
 224                       int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 225{
 226        struct qib_ibport *ibp = &rcd->ppd->ibport_data;
 227
 228        spin_lock(&qp->r_lock);
 229
 230        /* Check for valid receive state. */
 231        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
 232                ibp->rvp.n_pkt_drops++;
 233                goto unlock;
 234        }
 235
 236        switch (qp->ibqp.qp_type) {
 237        case IB_QPT_SMI:
 238        case IB_QPT_GSI:
 239                if (ib_qib_disable_sma)
 240                        break;
 241                /* FALLTHROUGH */
 242        case IB_QPT_UD:
 243                qib_ud_rcv(ibp, hdr, has_grh, data, tlen, qp);
 244                break;
 245
 246        case IB_QPT_RC:
 247                qib_rc_rcv(rcd, hdr, has_grh, data, tlen, qp);
 248                break;
 249
 250        case IB_QPT_UC:
 251                qib_uc_rcv(ibp, hdr, has_grh, data, tlen, qp);
 252                break;
 253
 254        default:
 255                break;
 256        }
 257
 258unlock:
 259        spin_unlock(&qp->r_lock);
 260}
 261
 262/**
 263 * qib_ib_rcv - process an incoming packet
 264 * @rcd: the context pointer
 265 * @rhdr: the header of the packet
 266 * @data: the packet payload
 267 * @tlen: the packet length
 268 *
 269 * This is called from qib_kreceive() to process an incoming packet at
 270 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
 271 */
 272void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
 273{
 274        struct qib_pportdata *ppd = rcd->ppd;
 275        struct qib_ibport *ibp = &ppd->ibport_data;
 276        struct ib_header *hdr = rhdr;
 277        struct qib_devdata *dd = ppd->dd;
 278        struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 279        struct ib_other_headers *ohdr;
 280        struct rvt_qp *qp;
 281        u32 qp_num;
 282        int lnh;
 283        u8 opcode;
 284        u16 lid;
 285
 286        /* 24 == LRH+BTH+CRC */
 287        if (unlikely(tlen < 24))
 288                goto drop;
 289
 290        /* Check for a valid destination LID (see ch. 7.11.1). */
 291        lid = be16_to_cpu(hdr->lrh[1]);
 292        if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 293                lid &= ~((1 << ppd->lmc) - 1);
 294                if (unlikely(lid != ppd->lid))
 295                        goto drop;
 296        }
 297
 298        /* Check for GRH */
 299        lnh = be16_to_cpu(hdr->lrh[0]) & 3;
 300        if (lnh == QIB_LRH_BTH)
 301                ohdr = &hdr->u.oth;
 302        else if (lnh == QIB_LRH_GRH) {
 303                u32 vtf;
 304
 305                ohdr = &hdr->u.l.oth;
 306                if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
 307                        goto drop;
 308                vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
 309                if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
 310                        goto drop;
 311        } else
 312                goto drop;
 313
 314        opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
 315#ifdef CONFIG_DEBUG_FS
 316        rcd->opstats->stats[opcode].n_bytes += tlen;
 317        rcd->opstats->stats[opcode].n_packets++;
 318#endif
 319
 320        /* Get the destination QP number. */
 321        qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
 322        if (qp_num == QIB_MULTICAST_QPN) {
 323                struct rvt_mcast *mcast;
 324                struct rvt_mcast_qp *p;
 325
 326                if (lnh != QIB_LRH_GRH)
 327                        goto drop;
 328                mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid);
 329                if (mcast == NULL)
 330                        goto drop;
 331                this_cpu_inc(ibp->pmastats->n_multicast_rcv);
 332                list_for_each_entry_rcu(p, &mcast->qp_list, list)
 333                        qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
 334                /*
 335                 * Notify rvt_multicast_detach() if it is waiting for us
 336                 * to finish.
 337                 */
 338                if (atomic_dec_return(&mcast->refcount) <= 1)
 339                        wake_up(&mcast->wait);
 340        } else {
 341                rcu_read_lock();
 342                qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
 343                if (!qp) {
 344                        rcu_read_unlock();
 345                        goto drop;
 346                }
 347                this_cpu_inc(ibp->pmastats->n_unicast_rcv);
 348                qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
 349                rcu_read_unlock();
 350        }
 351        return;
 352
 353drop:
 354        ibp->rvp.n_pkt_drops++;
 355}
 356
 357/*
 358 * This is called from a timer to check for QPs
 359 * which need kernel memory in order to send a packet.
 360 */
 361static void mem_timer(struct timer_list *t)
 362{
 363        struct qib_ibdev *dev = from_timer(dev, t, mem_timer);
 364        struct list_head *list = &dev->memwait;
 365        struct rvt_qp *qp = NULL;
 366        struct qib_qp_priv *priv = NULL;
 367        unsigned long flags;
 368
 369        spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 370        if (!list_empty(list)) {
 371                priv = list_entry(list->next, struct qib_qp_priv, iowait);
 372                qp = priv->owner;
 373                list_del_init(&priv->iowait);
 374                rvt_get_qp(qp);
 375                if (!list_empty(list))
 376                        mod_timer(&dev->mem_timer, jiffies + 1);
 377        }
 378        spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 379
 380        if (qp) {
 381                spin_lock_irqsave(&qp->s_lock, flags);
 382                if (qp->s_flags & RVT_S_WAIT_KMEM) {
 383                        qp->s_flags &= ~RVT_S_WAIT_KMEM;
 384                        qib_schedule_send(qp);
 385                }
 386                spin_unlock_irqrestore(&qp->s_lock, flags);
 387                rvt_put_qp(qp);
 388        }
 389}
 390
 391#ifdef __LITTLE_ENDIAN
 392static inline u32 get_upper_bits(u32 data, u32 shift)
 393{
 394        return data >> shift;
 395}
 396
 397static inline u32 set_upper_bits(u32 data, u32 shift)
 398{
 399        return data << shift;
 400}
 401
 402static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 403{
 404        data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
 405        data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
 406        return data;
 407}
 408#else
 409static inline u32 get_upper_bits(u32 data, u32 shift)
 410{
 411        return data << shift;
 412}
 413
 414static inline u32 set_upper_bits(u32 data, u32 shift)
 415{
 416        return data >> shift;
 417}
 418
 419static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 420{
 421        data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
 422        data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
 423        return data;
 424}
 425#endif
 426
 427static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
 428                    u32 length, unsigned flush_wc)
 429{
 430        u32 extra = 0;
 431        u32 data = 0;
 432        u32 last;
 433
 434        while (1) {
 435                u32 len = rvt_get_sge_length(&ss->sge, length);
 436                u32 off;
 437
 438                /* If the source address is not aligned, try to align it. */
 439                off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
 440                if (off) {
 441                        u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
 442                                            ~(sizeof(u32) - 1));
 443                        u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
 444                        u32 y;
 445
 446                        y = sizeof(u32) - off;
 447                        if (len > y)
 448                                len = y;
 449                        if (len + extra >= sizeof(u32)) {
 450                                data |= set_upper_bits(v, extra *
 451                                                       BITS_PER_BYTE);
 452                                len = sizeof(u32) - extra;
 453                                if (len == length) {
 454                                        last = data;
 455                                        break;
 456                                }
 457                                __raw_writel(data, piobuf);
 458                                piobuf++;
 459                                extra = 0;
 460                                data = 0;
 461                        } else {
 462                                /* Clear unused upper bytes */
 463                                data |= clear_upper_bytes(v, len, extra);
 464                                if (len == length) {
 465                                        last = data;
 466                                        break;
 467                                }
 468                                extra += len;
 469                        }
 470                } else if (extra) {
 471                        /* Source address is aligned. */
 472                        u32 *addr = (u32 *) ss->sge.vaddr;
 473                        int shift = extra * BITS_PER_BYTE;
 474                        int ushift = 32 - shift;
 475                        u32 l = len;
 476
 477                        while (l >= sizeof(u32)) {
 478                                u32 v = *addr;
 479
 480                                data |= set_upper_bits(v, shift);
 481                                __raw_writel(data, piobuf);
 482                                data = get_upper_bits(v, ushift);
 483                                piobuf++;
 484                                addr++;
 485                                l -= sizeof(u32);
 486                        }
 487                        /*
 488                         * We still have 'extra' number of bytes leftover.
 489                         */
 490                        if (l) {
 491                                u32 v = *addr;
 492
 493                                if (l + extra >= sizeof(u32)) {
 494                                        data |= set_upper_bits(v, shift);
 495                                        len -= l + extra - sizeof(u32);
 496                                        if (len == length) {
 497                                                last = data;
 498                                                break;
 499                                        }
 500                                        __raw_writel(data, piobuf);
 501                                        piobuf++;
 502                                        extra = 0;
 503                                        data = 0;
 504                                } else {
 505                                        /* Clear unused upper bytes */
 506                                        data |= clear_upper_bytes(v, l, extra);
 507                                        if (len == length) {
 508                                                last = data;
 509                                                break;
 510                                        }
 511                                        extra += l;
 512                                }
 513                        } else if (len == length) {
 514                                last = data;
 515                                break;
 516                        }
 517                } else if (len == length) {
 518                        u32 w;
 519
 520                        /*
 521                         * Need to round up for the last dword in the
 522                         * packet.
 523                         */
 524                        w = (len + 3) >> 2;
 525                        qib_pio_copy(piobuf, ss->sge.vaddr, w - 1);
 526                        piobuf += w - 1;
 527                        last = ((u32 *) ss->sge.vaddr)[w - 1];
 528                        break;
 529                } else {
 530                        u32 w = len >> 2;
 531
 532                        qib_pio_copy(piobuf, ss->sge.vaddr, w);
 533                        piobuf += w;
 534
 535                        extra = len & (sizeof(u32) - 1);
 536                        if (extra) {
 537                                u32 v = ((u32 *) ss->sge.vaddr)[w];
 538
 539                                /* Clear unused upper bytes */
 540                                data = clear_upper_bytes(v, extra, 0);
 541                        }
 542                }
 543                rvt_update_sge(ss, len, false);
 544                length -= len;
 545        }
 546        /* Update address before sending packet. */
 547        rvt_update_sge(ss, length, false);
 548        if (flush_wc) {
 549                /* must flush early everything before trigger word */
 550                qib_flush_wc();
 551                __raw_writel(last, piobuf);
 552                /* be sure trigger word is written */
 553                qib_flush_wc();
 554        } else
 555                __raw_writel(last, piobuf);
 556}
 557
 558static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev,
 559                                           struct rvt_qp *qp)
 560{
 561        struct qib_qp_priv *priv = qp->priv;
 562        struct qib_verbs_txreq *tx;
 563        unsigned long flags;
 564
 565        spin_lock_irqsave(&qp->s_lock, flags);
 566        spin_lock(&dev->rdi.pending_lock);
 567
 568        if (!list_empty(&dev->txreq_free)) {
 569                struct list_head *l = dev->txreq_free.next;
 570
 571                list_del(l);
 572                spin_unlock(&dev->rdi.pending_lock);
 573                spin_unlock_irqrestore(&qp->s_lock, flags);
 574                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 575        } else {
 576                if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK &&
 577                    list_empty(&priv->iowait)) {
 578                        dev->n_txwait++;
 579                        qp->s_flags |= RVT_S_WAIT_TX;
 580                        list_add_tail(&priv->iowait, &dev->txwait);
 581                }
 582                qp->s_flags &= ~RVT_S_BUSY;
 583                spin_unlock(&dev->rdi.pending_lock);
 584                spin_unlock_irqrestore(&qp->s_lock, flags);
 585                tx = ERR_PTR(-EBUSY);
 586        }
 587        return tx;
 588}
 589
 590static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
 591                                         struct rvt_qp *qp)
 592{
 593        struct qib_verbs_txreq *tx;
 594        unsigned long flags;
 595
 596        spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 597        /* assume the list non empty */
 598        if (likely(!list_empty(&dev->txreq_free))) {
 599                struct list_head *l = dev->txreq_free.next;
 600
 601                list_del(l);
 602                spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 603                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 604        } else {
 605                /* call slow path to get the extra lock */
 606                spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 607                tx =  __get_txreq(dev, qp);
 608        }
 609        return tx;
 610}
 611
 612void qib_put_txreq(struct qib_verbs_txreq *tx)
 613{
 614        struct qib_ibdev *dev;
 615        struct rvt_qp *qp;
 616        struct qib_qp_priv *priv;
 617        unsigned long flags;
 618
 619        qp = tx->qp;
 620        dev = to_idev(qp->ibqp.device);
 621
 622        if (tx->mr) {
 623                rvt_put_mr(tx->mr);
 624                tx->mr = NULL;
 625        }
 626        if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
 627                tx->txreq.flags &= ~QIB_SDMA_TXREQ_F_FREEBUF;
 628                dma_unmap_single(&dd_from_dev(dev)->pcidev->dev,
 629                                 tx->txreq.addr, tx->hdr_dwords << 2,
 630                                 DMA_TO_DEVICE);
 631                kfree(tx->align_buf);
 632        }
 633
 634        spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 635
 636        /* Put struct back on free list */
 637        list_add(&tx->txreq.list, &dev->txreq_free);
 638
 639        if (!list_empty(&dev->txwait)) {
 640                /* Wake up first QP wanting a free struct */
 641                priv = list_entry(dev->txwait.next, struct qib_qp_priv,
 642                                  iowait);
 643                qp = priv->owner;
 644                list_del_init(&priv->iowait);
 645                rvt_get_qp(qp);
 646                spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 647
 648                spin_lock_irqsave(&qp->s_lock, flags);
 649                if (qp->s_flags & RVT_S_WAIT_TX) {
 650                        qp->s_flags &= ~RVT_S_WAIT_TX;
 651                        qib_schedule_send(qp);
 652                }
 653                spin_unlock_irqrestore(&qp->s_lock, flags);
 654
 655                rvt_put_qp(qp);
 656        } else
 657                spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 658}
 659
 660/*
 661 * This is called when there are send DMA descriptors that might be
 662 * available.
 663 *
 664 * This is called with ppd->sdma_lock held.
 665 */
 666void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
 667{
 668        struct rvt_qp *qp;
 669        struct qib_qp_priv *qpp, *nqpp;
 670        struct rvt_qp *qps[20];
 671        struct qib_ibdev *dev;
 672        unsigned i, n;
 673
 674        n = 0;
 675        dev = &ppd->dd->verbs_dev;
 676        spin_lock(&dev->rdi.pending_lock);
 677
 678        /* Search wait list for first QP wanting DMA descriptors. */
 679        list_for_each_entry_safe(qpp, nqpp, &dev->dmawait, iowait) {
 680                qp = qpp->owner;
 681                if (qp->port_num != ppd->port)
 682                        continue;
 683                if (n == ARRAY_SIZE(qps))
 684                        break;
 685                if (qpp->s_tx->txreq.sg_count > avail)
 686                        break;
 687                avail -= qpp->s_tx->txreq.sg_count;
 688                list_del_init(&qpp->iowait);
 689                rvt_get_qp(qp);
 690                qps[n++] = qp;
 691        }
 692
 693        spin_unlock(&dev->rdi.pending_lock);
 694
 695        for (i = 0; i < n; i++) {
 696                qp = qps[i];
 697                spin_lock(&qp->s_lock);
 698                if (qp->s_flags & RVT_S_WAIT_DMA_DESC) {
 699                        qp->s_flags &= ~RVT_S_WAIT_DMA_DESC;
 700                        qib_schedule_send(qp);
 701                }
 702                spin_unlock(&qp->s_lock);
 703                rvt_put_qp(qp);
 704        }
 705}
 706
 707/*
 708 * This is called with ppd->sdma_lock held.
 709 */
 710static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
 711{
 712        struct qib_verbs_txreq *tx =
 713                container_of(cookie, struct qib_verbs_txreq, txreq);
 714        struct rvt_qp *qp = tx->qp;
 715        struct qib_qp_priv *priv = qp->priv;
 716
 717        spin_lock(&qp->s_lock);
 718        if (tx->wqe)
 719                rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
 720        else if (qp->ibqp.qp_type == IB_QPT_RC) {
 721                struct ib_header *hdr;
 722
 723                if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF)
 724                        hdr = &tx->align_buf->hdr;
 725                else {
 726                        struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 727
 728                        hdr = &dev->pio_hdrs[tx->hdr_inx].hdr;
 729                }
 730                qib_rc_send_complete(qp, hdr);
 731        }
 732        if (atomic_dec_and_test(&priv->s_dma_busy)) {
 733                if (qp->state == IB_QPS_RESET)
 734                        wake_up(&priv->wait_dma);
 735                else if (qp->s_flags & RVT_S_WAIT_DMA) {
 736                        qp->s_flags &= ~RVT_S_WAIT_DMA;
 737                        qib_schedule_send(qp);
 738                }
 739        }
 740        spin_unlock(&qp->s_lock);
 741
 742        qib_put_txreq(tx);
 743}
 744
 745static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp)
 746{
 747        struct qib_qp_priv *priv = qp->priv;
 748        unsigned long flags;
 749        int ret = 0;
 750
 751        spin_lock_irqsave(&qp->s_lock, flags);
 752        if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 753                spin_lock(&dev->rdi.pending_lock);
 754                if (list_empty(&priv->iowait)) {
 755                        if (list_empty(&dev->memwait))
 756                                mod_timer(&dev->mem_timer, jiffies + 1);
 757                        qp->s_flags |= RVT_S_WAIT_KMEM;
 758                        list_add_tail(&priv->iowait, &dev->memwait);
 759                }
 760                spin_unlock(&dev->rdi.pending_lock);
 761                qp->s_flags &= ~RVT_S_BUSY;
 762                ret = -EBUSY;
 763        }
 764        spin_unlock_irqrestore(&qp->s_lock, flags);
 765
 766        return ret;
 767}
 768
 769static int qib_verbs_send_dma(struct rvt_qp *qp, struct ib_header *hdr,
 770                              u32 hdrwords, struct rvt_sge_state *ss, u32 len,
 771                              u32 plen, u32 dwords)
 772{
 773        struct qib_qp_priv *priv = qp->priv;
 774        struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 775        struct qib_devdata *dd = dd_from_dev(dev);
 776        struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 777        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 778        struct qib_verbs_txreq *tx;
 779        struct qib_pio_header *phdr;
 780        u32 control;
 781        u32 ndesc;
 782        int ret;
 783
 784        tx = priv->s_tx;
 785        if (tx) {
 786                priv->s_tx = NULL;
 787                /* resend previously constructed packet */
 788                ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx);
 789                goto bail;
 790        }
 791
 792        tx = get_txreq(dev, qp);
 793        if (IS_ERR(tx))
 794                goto bail_tx;
 795
 796        control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
 797                                       be16_to_cpu(hdr->lrh[0]) >> 12);
 798        tx->qp = qp;
 799        tx->wqe = qp->s_wqe;
 800        tx->mr = qp->s_rdma_mr;
 801        if (qp->s_rdma_mr)
 802                qp->s_rdma_mr = NULL;
 803        tx->txreq.callback = sdma_complete;
 804        if (dd->flags & QIB_HAS_SDMA_TIMEOUT)
 805                tx->txreq.flags = QIB_SDMA_TXREQ_F_HEADTOHOST;
 806        else
 807                tx->txreq.flags = QIB_SDMA_TXREQ_F_INTREQ;
 808        if (plen + 1 > dd->piosize2kmax_dwords)
 809                tx->txreq.flags |= QIB_SDMA_TXREQ_F_USELARGEBUF;
 810
 811        if (len) {
 812                /*
 813                 * Don't try to DMA if it takes more descriptors than
 814                 * the queue holds.
 815                 */
 816                ndesc = qib_count_sge(ss, len);
 817                if (ndesc >= ppd->sdma_descq_cnt)
 818                        ndesc = 0;
 819        } else
 820                ndesc = 1;
 821        if (ndesc) {
 822                phdr = &dev->pio_hdrs[tx->hdr_inx];
 823                phdr->pbc[0] = cpu_to_le32(plen);
 824                phdr->pbc[1] = cpu_to_le32(control);
 825                memcpy(&phdr->hdr, hdr, hdrwords << 2);
 826                tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEDESC;
 827                tx->txreq.sg_count = ndesc;
 828                tx->txreq.addr = dev->pio_hdrs_phys +
 829                        tx->hdr_inx * sizeof(struct qib_pio_header);
 830                tx->hdr_dwords = hdrwords + 2; /* add PBC length */
 831                ret = qib_sdma_verbs_send(ppd, ss, dwords, tx);
 832                goto bail;
 833        }
 834
 835        /* Allocate a buffer and copy the header and payload to it. */
 836        tx->hdr_dwords = plen + 1;
 837        phdr = kmalloc(tx->hdr_dwords << 2, GFP_ATOMIC);
 838        if (!phdr)
 839                goto err_tx;
 840        phdr->pbc[0] = cpu_to_le32(plen);
 841        phdr->pbc[1] = cpu_to_le32(control);
 842        memcpy(&phdr->hdr, hdr, hdrwords << 2);
 843        qib_copy_from_sge((u32 *) &phdr->hdr + hdrwords, ss, len);
 844
 845        tx->txreq.addr = dma_map_single(&dd->pcidev->dev, phdr,
 846                                        tx->hdr_dwords << 2, DMA_TO_DEVICE);
 847        if (dma_mapping_error(&dd->pcidev->dev, tx->txreq.addr))
 848                goto map_err;
 849        tx->align_buf = phdr;
 850        tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEBUF;
 851        tx->txreq.sg_count = 1;
 852        ret = qib_sdma_verbs_send(ppd, NULL, 0, tx);
 853        goto unaligned;
 854
 855map_err:
 856        kfree(phdr);
 857err_tx:
 858        qib_put_txreq(tx);
 859        ret = wait_kmem(dev, qp);
 860unaligned:
 861        ibp->rvp.n_unaligned++;
 862bail:
 863        return ret;
 864bail_tx:
 865        ret = PTR_ERR(tx);
 866        goto bail;
 867}
 868
 869/*
 870 * If we are now in the error state, return zero to flush the
 871 * send work request.
 872 */
 873static int no_bufs_available(struct rvt_qp *qp)
 874{
 875        struct qib_qp_priv *priv = qp->priv;
 876        struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 877        struct qib_devdata *dd;
 878        unsigned long flags;
 879        int ret = 0;
 880
 881        /*
 882         * Note that as soon as want_buffer() is called and
 883         * possibly before it returns, qib_ib_piobufavail()
 884         * could be called. Therefore, put QP on the I/O wait list before
 885         * enabling the PIO avail interrupt.
 886         */
 887        spin_lock_irqsave(&qp->s_lock, flags);
 888        if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 889                spin_lock(&dev->rdi.pending_lock);
 890                if (list_empty(&priv->iowait)) {
 891                        dev->n_piowait++;
 892                        qp->s_flags |= RVT_S_WAIT_PIO;
 893                        list_add_tail(&priv->iowait, &dev->piowait);
 894                        dd = dd_from_dev(dev);
 895                        dd->f_wantpiobuf_intr(dd, 1);
 896                }
 897                spin_unlock(&dev->rdi.pending_lock);
 898                qp->s_flags &= ~RVT_S_BUSY;
 899                ret = -EBUSY;
 900        }
 901        spin_unlock_irqrestore(&qp->s_lock, flags);
 902        return ret;
 903}
 904
 905static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr,
 906                              u32 hdrwords, struct rvt_sge_state *ss, u32 len,
 907                              u32 plen, u32 dwords)
 908{
 909        struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 910        struct qib_pportdata *ppd = dd->pport + qp->port_num - 1;
 911        u32 *hdr = (u32 *) ibhdr;
 912        u32 __iomem *piobuf_orig;
 913        u32 __iomem *piobuf;
 914        u64 pbc;
 915        unsigned long flags;
 916        unsigned flush_wc;
 917        u32 control;
 918        u32 pbufn;
 919
 920        control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
 921                be16_to_cpu(ibhdr->lrh[0]) >> 12);
 922        pbc = ((u64) control << 32) | plen;
 923        piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn);
 924        if (unlikely(piobuf == NULL))
 925                return no_bufs_available(qp);
 926
 927        /*
 928         * Write the pbc.
 929         * We have to flush after the PBC for correctness on some cpus
 930         * or WC buffer can be written out of order.
 931         */
 932        writeq(pbc, piobuf);
 933        piobuf_orig = piobuf;
 934        piobuf += 2;
 935
 936        flush_wc = dd->flags & QIB_PIO_FLUSH_WC;
 937        if (len == 0) {
 938                /*
 939                 * If there is just the header portion, must flush before
 940                 * writing last word of header for correctness, and after
 941                 * the last header word (trigger word).
 942                 */
 943                if (flush_wc) {
 944                        qib_flush_wc();
 945                        qib_pio_copy(piobuf, hdr, hdrwords - 1);
 946                        qib_flush_wc();
 947                        __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
 948                        qib_flush_wc();
 949                } else
 950                        qib_pio_copy(piobuf, hdr, hdrwords);
 951                goto done;
 952        }
 953
 954        if (flush_wc)
 955                qib_flush_wc();
 956        qib_pio_copy(piobuf, hdr, hdrwords);
 957        piobuf += hdrwords;
 958
 959        /* The common case is aligned and contained in one segment. */
 960        if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
 961                   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
 962                u32 *addr = (u32 *) ss->sge.vaddr;
 963
 964                /* Update address before sending packet. */
 965                rvt_update_sge(ss, len, false);
 966                if (flush_wc) {
 967                        qib_pio_copy(piobuf, addr, dwords - 1);
 968                        /* must flush early everything before trigger word */
 969                        qib_flush_wc();
 970                        __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
 971                        /* be sure trigger word is written */
 972                        qib_flush_wc();
 973                } else
 974                        qib_pio_copy(piobuf, addr, dwords);
 975                goto done;
 976        }
 977        copy_io(piobuf, ss, len, flush_wc);
 978done:
 979        if (dd->flags & QIB_USE_SPCL_TRIG) {
 980                u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
 981
 982                qib_flush_wc();
 983                __raw_writel(0xaebecede, piobuf_orig + spcl_off);
 984        }
 985        qib_sendbuf_done(dd, pbufn);
 986        if (qp->s_rdma_mr) {
 987                rvt_put_mr(qp->s_rdma_mr);
 988                qp->s_rdma_mr = NULL;
 989        }
 990        if (qp->s_wqe) {
 991                spin_lock_irqsave(&qp->s_lock, flags);
 992                rvt_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
 993                spin_unlock_irqrestore(&qp->s_lock, flags);
 994        } else if (qp->ibqp.qp_type == IB_QPT_RC) {
 995                spin_lock_irqsave(&qp->s_lock, flags);
 996                qib_rc_send_complete(qp, ibhdr);
 997                spin_unlock_irqrestore(&qp->s_lock, flags);
 998        }
 999        return 0;
1000}
1001
1002/**
1003 * qib_verbs_send - send a packet
1004 * @qp: the QP to send on
1005 * @hdr: the packet header
1006 * @hdrwords: the number of 32-bit words in the header
1007 * @ss: the SGE to send
1008 * @len: the length of the packet in bytes
1009 *
1010 * Return zero if packet is sent or queued OK.
1011 * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
1012 */
1013int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
1014                   u32 hdrwords, struct rvt_sge_state *ss, u32 len)
1015{
1016        struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
1017        u32 plen;
1018        int ret;
1019        u32 dwords = (len + 3) >> 2;
1020
1021        /*
1022         * Calculate the send buffer trigger address.
1023         * The +1 counts for the pbc control dword following the pbc length.
1024         */
1025        plen = hdrwords + dwords + 1;
1026
1027        /*
1028         * VL15 packets (IB_QPT_SMI) will always use PIO, so we
1029         * can defer SDMA restart until link goes ACTIVE without
1030         * worrying about just how we got there.
1031         */
1032        if (qp->ibqp.qp_type == IB_QPT_SMI ||
1033            !(dd->flags & QIB_HAS_SEND_DMA))
1034                ret = qib_verbs_send_pio(qp, hdr, hdrwords, ss, len,
1035                                         plen, dwords);
1036        else
1037                ret = qib_verbs_send_dma(qp, hdr, hdrwords, ss, len,
1038                                         plen, dwords);
1039
1040        return ret;
1041}
1042
1043int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
1044                          u64 *rwords, u64 *spkts, u64 *rpkts,
1045                          u64 *xmit_wait)
1046{
1047        int ret;
1048        struct qib_devdata *dd = ppd->dd;
1049
1050        if (!(dd->flags & QIB_PRESENT)) {
1051                /* no hardware, freeze, etc. */
1052                ret = -EINVAL;
1053                goto bail;
1054        }
1055        *swords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDSEND);
1056        *rwords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDRCV);
1057        *spkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTSEND);
1058        *rpkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTRCV);
1059        *xmit_wait = dd->f_portcntr(ppd, QIBPORTCNTR_SENDSTALL);
1060
1061        ret = 0;
1062
1063bail:
1064        return ret;
1065}
1066
1067/**
1068 * qib_get_counters - get various chip counters
1069 * @dd: the qlogic_ib device
1070 * @cntrs: counters are placed here
1071 *
1072 * Return the counters needed by recv_pma_get_portcounters().
1073 */
1074int qib_get_counters(struct qib_pportdata *ppd,
1075                     struct qib_verbs_counters *cntrs)
1076{
1077        int ret;
1078
1079        if (!(ppd->dd->flags & QIB_PRESENT)) {
1080                /* no hardware, freeze, etc. */
1081                ret = -EINVAL;
1082                goto bail;
1083        }
1084        cntrs->symbol_error_counter =
1085                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBSYMBOLERR);
1086        cntrs->link_error_recovery_counter =
1087                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKERRRECOV);
1088        /*
1089         * The link downed counter counts when the other side downs the
1090         * connection.  We add in the number of times we downed the link
1091         * due to local link integrity errors to compensate.
1092         */
1093        cntrs->link_downed_counter =
1094                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKDOWN);
1095        cntrs->port_rcv_errors =
1096                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXDROPPKT) +
1097                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVOVFL) +
1098                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERR_RLEN) +
1099                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_INVALIDRLEN) +
1100                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLINK) +
1101                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRICRC) +
1102                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRVCRC) +
1103                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLPCRC) +
1104                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_BADFORMAT);
1105        cntrs->port_rcv_errors +=
1106                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXLOCALPHYERR);
1107        cntrs->port_rcv_errors +=
1108                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXVLERR);
1109        cntrs->port_rcv_remphys_errors =
1110                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVEBP);
1111        cntrs->port_xmit_discards =
1112                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_UNSUPVL);
1113        cntrs->port_xmit_data = ppd->dd->f_portcntr(ppd,
1114                        QIBPORTCNTR_WORDSEND);
1115        cntrs->port_rcv_data = ppd->dd->f_portcntr(ppd,
1116                        QIBPORTCNTR_WORDRCV);
1117        cntrs->port_xmit_packets = ppd->dd->f_portcntr(ppd,
1118                        QIBPORTCNTR_PKTSEND);
1119        cntrs->port_rcv_packets = ppd->dd->f_portcntr(ppd,
1120                        QIBPORTCNTR_PKTRCV);
1121        cntrs->local_link_integrity_errors =
1122                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_LLI);
1123        cntrs->excessive_buffer_overrun_errors =
1124                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_EXCESSBUFOVFL);
1125        cntrs->vl15_dropped =
1126                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_VL15PKTDROP);
1127
1128        ret = 0;
1129
1130bail:
1131        return ret;
1132}
1133
1134/**
1135 * qib_ib_piobufavail - callback when a PIO buffer is available
1136 * @dd: the device pointer
1137 *
1138 * This is called from qib_intr() at interrupt level when a PIO buffer is
1139 * available after qib_verbs_send() returned an error that no buffers were
1140 * available. Disable the interrupt if there are no more QPs waiting.
1141 */
1142void qib_ib_piobufavail(struct qib_devdata *dd)
1143{
1144        struct qib_ibdev *dev = &dd->verbs_dev;
1145        struct list_head *list;
1146        struct rvt_qp *qps[5];
1147        struct rvt_qp *qp;
1148        unsigned long flags;
1149        unsigned i, n;
1150        struct qib_qp_priv *priv;
1151
1152        list = &dev->piowait;
1153        n = 0;
1154
1155        /*
1156         * Note: checking that the piowait list is empty and clearing
1157         * the buffer available interrupt needs to be atomic or we
1158         * could end up with QPs on the wait list with the interrupt
1159         * disabled.
1160         */
1161        spin_lock_irqsave(&dev->rdi.pending_lock, flags);
1162        while (!list_empty(list)) {
1163                if (n == ARRAY_SIZE(qps))
1164                        goto full;
1165                priv = list_entry(list->next, struct qib_qp_priv, iowait);
1166                qp = priv->owner;
1167                list_del_init(&priv->iowait);
1168                rvt_get_qp(qp);
1169                qps[n++] = qp;
1170        }
1171        dd->f_wantpiobuf_intr(dd, 0);
1172full:
1173        spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
1174
1175        for (i = 0; i < n; i++) {
1176                qp = qps[i];
1177
1178                spin_lock_irqsave(&qp->s_lock, flags);
1179                if (qp->s_flags & RVT_S_WAIT_PIO) {
1180                        qp->s_flags &= ~RVT_S_WAIT_PIO;
1181                        qib_schedule_send(qp);
1182                }
1183                spin_unlock_irqrestore(&qp->s_lock, flags);
1184
1185                /* Notify qib_destroy_qp() if it is waiting. */
1186                rvt_put_qp(qp);
1187        }
1188}
1189
1190static int qib_query_port(struct rvt_dev_info *rdi, u8 port_num,
1191                          struct ib_port_attr *props)
1192{
1193        struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
1194        struct qib_devdata *dd = dd_from_dev(ibdev);
1195        struct qib_pportdata *ppd = &dd->pport[port_num - 1];
1196        enum ib_mtu mtu;
1197        u16 lid = ppd->lid;
1198
1199        /* props being zeroed by the caller, avoid zeroing it here */
1200        props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
1201        props->lmc = ppd->lmc;
1202        props->state = dd->f_iblink_state(ppd->lastibcstat);
1203        props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat);
1204        props->gid_tbl_len = QIB_GUIDS_PER_PORT;
1205        props->active_width = ppd->link_width_active;
1206        /* See rate_show() */
1207        props->active_speed = ppd->link_speed_active;
1208        props->max_vl_num = qib_num_vls(ppd->vls_supported);
1209
1210        props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
1211        switch (ppd->ibmtu) {
1212        case 4096:
1213                mtu = IB_MTU_4096;
1214                break;
1215        case 2048:
1216                mtu = IB_MTU_2048;
1217                break;
1218        case 1024:
1219                mtu = IB_MTU_1024;
1220                break;
1221        case 512:
1222                mtu = IB_MTU_512;
1223                break;
1224        case 256:
1225                mtu = IB_MTU_256;
1226                break;
1227        default:
1228                mtu = IB_MTU_2048;
1229        }
1230        props->active_mtu = mtu;
1231
1232        return 0;
1233}
1234
1235static int qib_modify_device(struct ib_device *device,
1236                             int device_modify_mask,
1237                             struct ib_device_modify *device_modify)
1238{
1239        struct qib_devdata *dd = dd_from_ibdev(device);
1240        unsigned i;
1241        int ret;
1242
1243        if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
1244                                   IB_DEVICE_MODIFY_NODE_DESC)) {
1245                ret = -EOPNOTSUPP;
1246                goto bail;
1247        }
1248
1249        if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) {
1250                memcpy(device->node_desc, device_modify->node_desc,
1251                       IB_DEVICE_NODE_DESC_MAX);
1252                for (i = 0; i < dd->num_pports; i++) {
1253                        struct qib_ibport *ibp = &dd->pport[i].ibport_data;
1254
1255                        qib_node_desc_chg(ibp);
1256                }
1257        }
1258
1259        if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
1260                ib_qib_sys_image_guid =
1261                        cpu_to_be64(device_modify->sys_image_guid);
1262                for (i = 0; i < dd->num_pports; i++) {
1263                        struct qib_ibport *ibp = &dd->pport[i].ibport_data;
1264
1265                        qib_sys_guid_chg(ibp);
1266                }
1267        }
1268
1269        ret = 0;
1270
1271bail:
1272        return ret;
1273}
1274
1275static int qib_shut_down_port(struct rvt_dev_info *rdi, u8 port_num)
1276{
1277        struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
1278        struct qib_devdata *dd = dd_from_dev(ibdev);
1279        struct qib_pportdata *ppd = &dd->pport[port_num - 1];
1280
1281        qib_set_linkstate(ppd, QIB_IB_LINKDOWN);
1282
1283        return 0;
1284}
1285
1286static int qib_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
1287                           int guid_index, __be64 *guid)
1288{
1289        struct qib_ibport *ibp = container_of(rvp, struct qib_ibport, rvp);
1290        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1291
1292        if (guid_index == 0)
1293                *guid = ppd->guid;
1294        else if (guid_index < QIB_GUIDS_PER_PORT)
1295                *guid = ibp->guids[guid_index - 1];
1296        else
1297                return -EINVAL;
1298
1299        return 0;
1300}
1301
1302int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
1303{
1304        if (rdma_ah_get_sl(ah_attr) > 15)
1305                return -EINVAL;
1306
1307        if (rdma_ah_get_dlid(ah_attr) == 0)
1308                return -EINVAL;
1309        if (rdma_ah_get_dlid(ah_attr) >=
1310                be16_to_cpu(IB_MULTICAST_LID_BASE) &&
1311            rdma_ah_get_dlid(ah_attr) !=
1312                be16_to_cpu(IB_LID_PERMISSIVE) &&
1313            !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
1314                return -EINVAL;
1315
1316        return 0;
1317}
1318
1319static void qib_notify_new_ah(struct ib_device *ibdev,
1320                              struct rdma_ah_attr *ah_attr,
1321                              struct rvt_ah *ah)
1322{
1323        struct qib_ibport *ibp;
1324        struct qib_pportdata *ppd;
1325
1326        /*
1327         * Do not trust reading anything from rvt_ah at this point as it is not
1328         * done being setup. We can however modify things which we need to set.
1329         */
1330
1331        ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
1332        ppd = ppd_from_ibp(ibp);
1333        ah->vl = ibp->sl_to_vl[rdma_ah_get_sl(&ah->attr)];
1334        ah->log_pmtu = ilog2(ppd->ibmtu);
1335}
1336
1337struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
1338{
1339        struct rdma_ah_attr attr;
1340        struct ib_ah *ah = ERR_PTR(-EINVAL);
1341        struct rvt_qp *qp0;
1342        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1343        struct qib_devdata *dd = dd_from_ppd(ppd);
1344        u8 port_num = ppd->port;
1345
1346        memset(&attr, 0, sizeof(attr));
1347        attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num);
1348        rdma_ah_set_dlid(&attr, dlid);
1349        rdma_ah_set_port_num(&attr, port_num);
1350        rcu_read_lock();
1351        qp0 = rcu_dereference(ibp->rvp.qp[0]);
1352        if (qp0)
1353                ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0);
1354        rcu_read_unlock();
1355        return ah;
1356}
1357
1358/**
1359 * qib_get_npkeys - return the size of the PKEY table for context 0
1360 * @dd: the qlogic_ib device
1361 */
1362unsigned qib_get_npkeys(struct qib_devdata *dd)
1363{
1364        return ARRAY_SIZE(dd->rcd[0]->pkeys);
1365}
1366
1367/*
1368 * Return the indexed PKEY from the port PKEY table.
1369 * No need to validate rcd[ctxt]; the port is setup if we are here.
1370 */
1371unsigned qib_get_pkey(struct qib_ibport *ibp, unsigned index)
1372{
1373        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1374        struct qib_devdata *dd = ppd->dd;
1375        unsigned ctxt = ppd->hw_pidx;
1376        unsigned ret;
1377
1378        /* dd->rcd null if mini_init or some init failures */
1379        if (!dd->rcd || index >= ARRAY_SIZE(dd->rcd[ctxt]->pkeys))
1380                ret = 0;
1381        else
1382                ret = dd->rcd[ctxt]->pkeys[index];
1383
1384        return ret;
1385}
1386
1387static void init_ibport(struct qib_pportdata *ppd)
1388{
1389        struct qib_verbs_counters cntrs;
1390        struct qib_ibport *ibp = &ppd->ibport_data;
1391
1392        spin_lock_init(&ibp->rvp.lock);
1393        /* Set the prefix to the default value (see ch. 4.1.1) */
1394        ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
1395        ibp->rvp.sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
1396        ibp->rvp.port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP |
1397                IB_PORT_CLIENT_REG_SUP | IB_PORT_SL_MAP_SUP |
1398                IB_PORT_TRAP_SUP | IB_PORT_AUTO_MIGR_SUP |
1399                IB_PORT_DR_NOTICE_SUP | IB_PORT_CAP_MASK_NOTICE_SUP |
1400                IB_PORT_OTHER_LOCAL_CHANGES_SUP;
1401        if (ppd->dd->flags & QIB_HAS_LINK_LATENCY)
1402                ibp->rvp.port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
1403        ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
1404        ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
1405        ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
1406        ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
1407        ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
1408
1409        /* Snapshot current HW counters to "clear" them. */
1410        qib_get_counters(ppd, &cntrs);
1411        ibp->z_symbol_error_counter = cntrs.symbol_error_counter;
1412        ibp->z_link_error_recovery_counter =
1413                cntrs.link_error_recovery_counter;
1414        ibp->z_link_downed_counter = cntrs.link_downed_counter;
1415        ibp->z_port_rcv_errors = cntrs.port_rcv_errors;
1416        ibp->z_port_rcv_remphys_errors = cntrs.port_rcv_remphys_errors;
1417        ibp->z_port_xmit_discards = cntrs.port_xmit_discards;
1418        ibp->z_port_xmit_data = cntrs.port_xmit_data;
1419        ibp->z_port_rcv_data = cntrs.port_rcv_data;
1420        ibp->z_port_xmit_packets = cntrs.port_xmit_packets;
1421        ibp->z_port_rcv_packets = cntrs.port_rcv_packets;
1422        ibp->z_local_link_integrity_errors =
1423                cntrs.local_link_integrity_errors;
1424        ibp->z_excessive_buffer_overrun_errors =
1425                cntrs.excessive_buffer_overrun_errors;
1426        ibp->z_vl15_dropped = cntrs.vl15_dropped;
1427        RCU_INIT_POINTER(ibp->rvp.qp[0], NULL);
1428        RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
1429}
1430
1431/**
1432 * qib_fill_device_attr - Fill in rvt dev info device attributes.
1433 * @dd: the device data structure
1434 */
1435static void qib_fill_device_attr(struct qib_devdata *dd)
1436{
1437        struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
1438
1439        memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
1440
1441        rdi->dparms.props.max_pd = ib_qib_max_pds;
1442        rdi->dparms.props.max_ah = ib_qib_max_ahs;
1443        rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
1444                IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
1445                IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
1446                IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
1447        rdi->dparms.props.page_size_cap = PAGE_SIZE;
1448        rdi->dparms.props.vendor_id =
1449                QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3;
1450        rdi->dparms.props.vendor_part_id = dd->deviceid;
1451        rdi->dparms.props.hw_ver = dd->minrev;
1452        rdi->dparms.props.sys_image_guid = ib_qib_sys_image_guid;
1453        rdi->dparms.props.max_mr_size = ~0ULL;
1454        rdi->dparms.props.max_qp = ib_qib_max_qps;
1455        rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs;
1456        rdi->dparms.props.max_send_sge = ib_qib_max_sges;
1457        rdi->dparms.props.max_recv_sge = ib_qib_max_sges;
1458        rdi->dparms.props.max_sge_rd = ib_qib_max_sges;
1459        rdi->dparms.props.max_cq = ib_qib_max_cqs;
1460        rdi->dparms.props.max_cqe = ib_qib_max_cqes;
1461        rdi->dparms.props.max_ah = ib_qib_max_ahs;
1462        rdi->dparms.props.max_map_per_fmr = 32767;
1463        rdi->dparms.props.max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC;
1464        rdi->dparms.props.max_qp_init_rd_atom = 255;
1465        rdi->dparms.props.max_srq = ib_qib_max_srqs;
1466        rdi->dparms.props.max_srq_wr = ib_qib_max_srq_wrs;
1467        rdi->dparms.props.max_srq_sge = ib_qib_max_srq_sges;
1468        rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB;
1469        rdi->dparms.props.max_pkeys = qib_get_npkeys(dd);
1470        rdi->dparms.props.max_mcast_grp = ib_qib_max_mcast_grps;
1471        rdi->dparms.props.max_mcast_qp_attach = ib_qib_max_mcast_qp_attached;
1472        rdi->dparms.props.max_total_mcast_qp_attach =
1473                                        rdi->dparms.props.max_mcast_qp_attach *
1474                                        rdi->dparms.props.max_mcast_grp;
1475        /* post send table */
1476        dd->verbs_dev.rdi.post_parms = qib_post_parms;
1477
1478        /* opcode translation table */
1479        dd->verbs_dev.rdi.wc_opcode = ib_qib_wc_opcode;
1480}
1481
1482static const struct ib_device_ops qib_dev_ops = {
1483        .owner = THIS_MODULE,
1484        .driver_id = RDMA_DRIVER_QIB,
1485
1486        .init_port = qib_create_port_files,
1487        .modify_device = qib_modify_device,
1488        .process_mad = qib_process_mad,
1489};
1490
1491/**
1492 * qib_register_ib_device - register our device with the infiniband core
1493 * @dd: the device data structure
1494 * Return the allocated qib_ibdev pointer or NULL on error.
1495 */
1496int qib_register_ib_device(struct qib_devdata *dd)
1497{
1498        struct qib_ibdev *dev = &dd->verbs_dev;
1499        struct ib_device *ibdev = &dev->rdi.ibdev;
1500        struct qib_pportdata *ppd = dd->pport;
1501        unsigned i, ctxt;
1502        int ret;
1503
1504        get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd));
1505        for (i = 0; i < dd->num_pports; i++)
1506                init_ibport(ppd + i);
1507
1508        /* Only need to initialize non-zero fields. */
1509        timer_setup(&dev->mem_timer, mem_timer, 0);
1510
1511        INIT_LIST_HEAD(&dev->piowait);
1512        INIT_LIST_HEAD(&dev->dmawait);
1513        INIT_LIST_HEAD(&dev->txwait);
1514        INIT_LIST_HEAD(&dev->memwait);
1515        INIT_LIST_HEAD(&dev->txreq_free);
1516
1517        if (ppd->sdma_descq_cnt) {
1518                dev->pio_hdrs = dma_alloc_coherent(&dd->pcidev->dev,
1519                                                ppd->sdma_descq_cnt *
1520                                                sizeof(struct qib_pio_header),
1521                                                &dev->pio_hdrs_phys,
1522                                                GFP_KERNEL);
1523                if (!dev->pio_hdrs) {
1524                        ret = -ENOMEM;
1525                        goto err_hdrs;
1526                }
1527        }
1528
1529        for (i = 0; i < ppd->sdma_descq_cnt; i++) {
1530                struct qib_verbs_txreq *tx;
1531
1532                tx = kzalloc(sizeof(*tx), GFP_KERNEL);
1533                if (!tx) {
1534                        ret = -ENOMEM;
1535                        goto err_tx;
1536                }
1537                tx->hdr_inx = i;
1538                list_add(&tx->txreq.list, &dev->txreq_free);
1539        }
1540
1541        /*
1542         * The system image GUID is supposed to be the same for all
1543         * IB HCAs in a single system but since there can be other
1544         * device types in the system, we can't be sure this is unique.
1545         */
1546        if (!ib_qib_sys_image_guid)
1547                ib_qib_sys_image_guid = ppd->guid;
1548
1549        ibdev->node_guid = ppd->guid;
1550        ibdev->phys_port_cnt = dd->num_pports;
1551        ibdev->dev.parent = &dd->pcidev->dev;
1552
1553        snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
1554                 "Intel Infiniband HCA %s", init_utsname()->nodename);
1555
1556        /*
1557         * Fill in rvt info object.
1558         */
1559        dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
1560        dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
1561        dd->verbs_dev.rdi.driver_f.setup_wqe = qib_check_send_wqe;
1562        dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah;
1563        dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn;
1564        dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc;
1565        dd->verbs_dev.rdi.driver_f.qp_priv_free = qib_qp_priv_free;
1566        dd->verbs_dev.rdi.driver_f.free_all_qps = qib_free_all_qps;
1567        dd->verbs_dev.rdi.driver_f.notify_qp_reset = qib_notify_qp_reset;
1568        dd->verbs_dev.rdi.driver_f.do_send = qib_do_send;
1569        dd->verbs_dev.rdi.driver_f.schedule_send = qib_schedule_send;
1570        dd->verbs_dev.rdi.driver_f.quiesce_qp = qib_quiesce_qp;
1571        dd->verbs_dev.rdi.driver_f.stop_send_queue = qib_stop_send_queue;
1572        dd->verbs_dev.rdi.driver_f.flush_qp_waiters = qib_flush_qp_waiters;
1573        dd->verbs_dev.rdi.driver_f.notify_error_qp = qib_notify_error_qp;
1574        dd->verbs_dev.rdi.driver_f.notify_restart_rc = qib_restart_rc;
1575        dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu;
1576        dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp;
1577        dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr;
1578        dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _qib_schedule_send;
1579        dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port;
1580        dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port;
1581        dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg;
1582        dd->verbs_dev.rdi.driver_f.notify_create_mad_agent =
1583                                                qib_notify_create_mad_agent;
1584        dd->verbs_dev.rdi.driver_f.notify_free_mad_agent =
1585                                                qib_notify_free_mad_agent;
1586
1587        dd->verbs_dev.rdi.dparms.max_rdma_atomic = QIB_MAX_RDMA_ATOMIC;
1588        dd->verbs_dev.rdi.driver_f.get_guid_be = qib_get_guid_be;
1589        dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size;
1590        dd->verbs_dev.rdi.dparms.qp_table_size = ib_qib_qp_table_size;
1591        dd->verbs_dev.rdi.dparms.qpn_start = 1;
1592        dd->verbs_dev.rdi.dparms.qpn_res_start = QIB_KD_QP;
1593        dd->verbs_dev.rdi.dparms.qpn_res_end = QIB_KD_QP; /* Reserve one QP */
1594        dd->verbs_dev.rdi.dparms.qpn_inc = 1;
1595        dd->verbs_dev.rdi.dparms.qos_shift = 1;
1596        dd->verbs_dev.rdi.dparms.psn_mask = QIB_PSN_MASK;
1597        dd->verbs_dev.rdi.dparms.psn_shift = QIB_PSN_SHIFT;
1598        dd->verbs_dev.rdi.dparms.psn_modify_mask = QIB_PSN_MASK;
1599        dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
1600        dd->verbs_dev.rdi.dparms.npkeys = qib_get_npkeys(dd);
1601        dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id;
1602        dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB;
1603        dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE;
1604        dd->verbs_dev.rdi.dparms.sge_copy_mode = RVT_SGE_COPY_MEMCPY;
1605
1606        qib_fill_device_attr(dd);
1607
1608        ppd = dd->pport;
1609        for (i = 0; i < dd->num_pports; i++, ppd++) {
1610                ctxt = ppd->hw_pidx;
1611                rvt_init_port(&dd->verbs_dev.rdi,
1612                              &ppd->ibport_data.rvp,
1613                              i,
1614                              dd->rcd[ctxt]->pkeys);
1615        }
1616        rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, &qib_attr_group);
1617
1618        ib_set_device_ops(ibdev, &qib_dev_ops);
1619        ret = rvt_register_device(&dd->verbs_dev.rdi);
1620        if (ret)
1621                goto err_tx;
1622
1623        return ret;
1624
1625err_tx:
1626        while (!list_empty(&dev->txreq_free)) {
1627                struct list_head *l = dev->txreq_free.next;
1628                struct qib_verbs_txreq *tx;
1629
1630                list_del(l);
1631                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
1632                kfree(tx);
1633        }
1634        if (ppd->sdma_descq_cnt)
1635                dma_free_coherent(&dd->pcidev->dev,
1636                                  ppd->sdma_descq_cnt *
1637                                        sizeof(struct qib_pio_header),
1638                                  dev->pio_hdrs, dev->pio_hdrs_phys);
1639err_hdrs:
1640        qib_dev_err(dd, "cannot register verbs: %d!\n", -ret);
1641        return ret;
1642}
1643
1644void qib_unregister_ib_device(struct qib_devdata *dd)
1645{
1646        struct qib_ibdev *dev = &dd->verbs_dev;
1647
1648        qib_verbs_unregister_sysfs(dd);
1649
1650        rvt_unregister_device(&dd->verbs_dev.rdi);
1651
1652        if (!list_empty(&dev->piowait))
1653                qib_dev_err(dd, "piowait list not empty!\n");
1654        if (!list_empty(&dev->dmawait))
1655                qib_dev_err(dd, "dmawait list not empty!\n");
1656        if (!list_empty(&dev->txwait))
1657                qib_dev_err(dd, "txwait list not empty!\n");
1658        if (!list_empty(&dev->memwait))
1659                qib_dev_err(dd, "memwait list not empty!\n");
1660
1661        del_timer_sync(&dev->mem_timer);
1662        while (!list_empty(&dev->txreq_free)) {
1663                struct list_head *l = dev->txreq_free.next;
1664                struct qib_verbs_txreq *tx;
1665
1666                list_del(l);
1667                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
1668                kfree(tx);
1669        }
1670        if (dd->pport->sdma_descq_cnt)
1671                dma_free_coherent(&dd->pcidev->dev,
1672                                  dd->pport->sdma_descq_cnt *
1673                                        sizeof(struct qib_pio_header),
1674                                  dev->pio_hdrs, dev->pio_hdrs_phys);
1675}
1676
1677/**
1678 * _qib_schedule_send - schedule progress
1679 * @qp - the qp
1680 *
1681 * This schedules progress w/o regard to the s_flags.
1682 *
1683 * It is only used in post send, which doesn't hold
1684 * the s_lock.
1685 */
1686bool _qib_schedule_send(struct rvt_qp *qp)
1687{
1688        struct qib_ibport *ibp =
1689                to_iport(qp->ibqp.device, qp->port_num);
1690        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1691        struct qib_qp_priv *priv = qp->priv;
1692
1693        return queue_work(ppd->qib_wq, &priv->s_work);
1694}
1695
1696/**
1697 * qib_schedule_send - schedule progress
1698 * @qp - the qp
1699 *
1700 * This schedules qp progress.  The s_lock
1701 * should be held.
1702 */
1703bool qib_schedule_send(struct rvt_qp *qp)
1704{
1705        if (qib_send_ok(qp))
1706                return _qib_schedule_send(qp);
1707        return false;
1708}
1709