linux/drivers/infiniband/hw/qib/qib_verbs.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2012 - 2018 Intel Corporation.  All rights reserved.
   3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
   4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
   5 *
   6 * This software is available to you under a choice of one of two
   7 * licenses.  You may choose to be licensed under the terms of the GNU
   8 * General Public License (GPL) Version 2, available from the file
   9 * COPYING in the main directory of this source tree, or the
  10 * OpenIB.org BSD license below:
  11 *
  12 *     Redistribution and use in source and binary forms, with or
  13 *     without modification, are permitted provided that the following
  14 *     conditions are met:
  15 *
  16 *      - Redistributions of source code must retain the above
  17 *        copyright notice, this list of conditions and the following
  18 *        disclaimer.
  19 *
  20 *      - Redistributions in binary form must reproduce the above
  21 *        copyright notice, this list of conditions and the following
  22 *        disclaimer in the documentation and/or other materials
  23 *        provided with the distribution.
  24 *
  25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  32 * SOFTWARE.
  33 */
  34
  35#include <rdma/ib_mad.h>
  36#include <rdma/ib_user_verbs.h>
  37#include <linux/io.h>
  38#include <linux/module.h>
  39#include <linux/utsname.h>
  40#include <linux/rculist.h>
  41#include <linux/mm.h>
  42#include <linux/vmalloc.h>
  43#include <rdma/rdma_vt.h>
  44
  45#include "qib.h"
  46#include "qib_common.h"
  47
  48static unsigned int ib_qib_qp_table_size = 256;
  49module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO);
  50MODULE_PARM_DESC(qp_table_size, "QP table size");
  51
  52static unsigned int qib_lkey_table_size = 16;
  53module_param_named(lkey_table_size, qib_lkey_table_size, uint,
  54                   S_IRUGO);
  55MODULE_PARM_DESC(lkey_table_size,
  56                 "LKEY table size in bits (2^n, 1 <= n <= 23)");
  57
  58static unsigned int ib_qib_max_pds = 0xFFFF;
  59module_param_named(max_pds, ib_qib_max_pds, uint, S_IRUGO);
  60MODULE_PARM_DESC(max_pds,
  61                 "Maximum number of protection domains to support");
  62
  63static unsigned int ib_qib_max_ahs = 0xFFFF;
  64module_param_named(max_ahs, ib_qib_max_ahs, uint, S_IRUGO);
  65MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
  66
  67unsigned int ib_qib_max_cqes = 0x2FFFF;
  68module_param_named(max_cqes, ib_qib_max_cqes, uint, S_IRUGO);
  69MODULE_PARM_DESC(max_cqes,
  70                 "Maximum number of completion queue entries to support");
  71
  72unsigned int ib_qib_max_cqs = 0x1FFFF;
  73module_param_named(max_cqs, ib_qib_max_cqs, uint, S_IRUGO);
  74MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
  75
  76unsigned int ib_qib_max_qp_wrs = 0x3FFF;
  77module_param_named(max_qp_wrs, ib_qib_max_qp_wrs, uint, S_IRUGO);
  78MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
  79
  80unsigned int ib_qib_max_qps = 16384;
  81module_param_named(max_qps, ib_qib_max_qps, uint, S_IRUGO);
  82MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
  83
  84unsigned int ib_qib_max_sges = 0x60;
  85module_param_named(max_sges, ib_qib_max_sges, uint, S_IRUGO);
  86MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
  87
  88unsigned int ib_qib_max_mcast_grps = 16384;
  89module_param_named(max_mcast_grps, ib_qib_max_mcast_grps, uint, S_IRUGO);
  90MODULE_PARM_DESC(max_mcast_grps,
  91                 "Maximum number of multicast groups to support");
  92
  93unsigned int ib_qib_max_mcast_qp_attached = 16;
  94module_param_named(max_mcast_qp_attached, ib_qib_max_mcast_qp_attached,
  95                   uint, S_IRUGO);
  96MODULE_PARM_DESC(max_mcast_qp_attached,
  97                 "Maximum number of attached QPs to support");
  98
  99unsigned int ib_qib_max_srqs = 1024;
 100module_param_named(max_srqs, ib_qib_max_srqs, uint, S_IRUGO);
 101MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
 102
 103unsigned int ib_qib_max_srq_sges = 128;
 104module_param_named(max_srq_sges, ib_qib_max_srq_sges, uint, S_IRUGO);
 105MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
 106
 107unsigned int ib_qib_max_srq_wrs = 0x1FFFF;
 108module_param_named(max_srq_wrs, ib_qib_max_srq_wrs, uint, S_IRUGO);
 109MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
 110
 111static unsigned int ib_qib_disable_sma;
 112module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
 113MODULE_PARM_DESC(disable_sma, "Disable the SMA");
 114
 115/*
 116 * Translate ib_wr_opcode into ib_wc_opcode.
 117 */
 118const enum ib_wc_opcode ib_qib_wc_opcode[] = {
 119        [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
 120        [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
 121        [IB_WR_SEND] = IB_WC_SEND,
 122        [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
 123        [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
 124        [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
 125        [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
 126};
 127
 128/*
 129 * System image GUID.
 130 */
 131__be64 ib_qib_sys_image_guid;
 132
 133/*
 134 * Count the number of DMA descriptors needed to send length bytes of data.
 135 * Don't modify the qib_sge_state to get the count.
 136 * Return zero if any of the segments is not aligned.
 137 */
 138static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length)
 139{
 140        struct rvt_sge *sg_list = ss->sg_list;
 141        struct rvt_sge sge = ss->sge;
 142        u8 num_sge = ss->num_sge;
 143        u32 ndesc = 1;  /* count the header */
 144
 145        while (length) {
 146                u32 len = rvt_get_sge_length(&sge, length);
 147
 148                if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
 149                    (len != length && (len & (sizeof(u32) - 1)))) {
 150                        ndesc = 0;
 151                        break;
 152                }
 153                ndesc++;
 154                sge.vaddr += len;
 155                sge.length -= len;
 156                sge.sge_length -= len;
 157                if (sge.sge_length == 0) {
 158                        if (--num_sge)
 159                                sge = *sg_list++;
 160                } else if (sge.length == 0 && sge.mr->lkey) {
 161                        if (++sge.n >= RVT_SEGSZ) {
 162                                if (++sge.m >= sge.mr->mapsz)
 163                                        break;
 164                                sge.n = 0;
 165                        }
 166                        sge.vaddr =
 167                                sge.mr->map[sge.m]->segs[sge.n].vaddr;
 168                        sge.length =
 169                                sge.mr->map[sge.m]->segs[sge.n].length;
 170                }
 171                length -= len;
 172        }
 173        return ndesc;
 174}
 175
 176/*
 177 * Copy from the SGEs to the data buffer.
 178 */
 179static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length)
 180{
 181        struct rvt_sge *sge = &ss->sge;
 182
 183        while (length) {
 184                u32 len = rvt_get_sge_length(sge, length);
 185
 186                memcpy(data, sge->vaddr, len);
 187                sge->vaddr += len;
 188                sge->length -= len;
 189                sge->sge_length -= len;
 190                if (sge->sge_length == 0) {
 191                        if (--ss->num_sge)
 192                                *sge = *ss->sg_list++;
 193                } else if (sge->length == 0 && sge->mr->lkey) {
 194                        if (++sge->n >= RVT_SEGSZ) {
 195                                if (++sge->m >= sge->mr->mapsz)
 196                                        break;
 197                                sge->n = 0;
 198                        }
 199                        sge->vaddr =
 200                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 201                        sge->length =
 202                                sge->mr->map[sge->m]->segs[sge->n].length;
 203                }
 204                data += len;
 205                length -= len;
 206        }
 207}
 208
 209/**
 210 * qib_qp_rcv - processing an incoming packet on a QP
 211 * @rcd: the context pointer
 212 * @hdr: the packet header
 213 * @has_grh: true if the packet has a GRH
 214 * @data: the packet data
 215 * @tlen: the packet length
 216 * @qp: the QP the packet came on
 217 *
 218 * This is called from qib_ib_rcv() to process an incoming packet
 219 * for the given QP.
 220 * Called at interrupt level.
 221 */
 222static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
 223                       int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 224{
 225        struct qib_ibport *ibp = &rcd->ppd->ibport_data;
 226
 227        spin_lock(&qp->r_lock);
 228
 229        /* Check for valid receive state. */
 230        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
 231                ibp->rvp.n_pkt_drops++;
 232                goto unlock;
 233        }
 234
 235        switch (qp->ibqp.qp_type) {
 236        case IB_QPT_SMI:
 237        case IB_QPT_GSI:
 238                if (ib_qib_disable_sma)
 239                        break;
 240                fallthrough;
 241        case IB_QPT_UD:
 242                qib_ud_rcv(ibp, hdr, has_grh, data, tlen, qp);
 243                break;
 244
 245        case IB_QPT_RC:
 246                qib_rc_rcv(rcd, hdr, has_grh, data, tlen, qp);
 247                break;
 248
 249        case IB_QPT_UC:
 250                qib_uc_rcv(ibp, hdr, has_grh, data, tlen, qp);
 251                break;
 252
 253        default:
 254                break;
 255        }
 256
 257unlock:
 258        spin_unlock(&qp->r_lock);
 259}
 260
 261/**
 262 * qib_ib_rcv - process an incoming packet
 263 * @rcd: the context pointer
 264 * @rhdr: the header of the packet
 265 * @data: the packet payload
 266 * @tlen: the packet length
 267 *
 268 * This is called from qib_kreceive() to process an incoming packet at
 269 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
 270 */
 271void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
 272{
 273        struct qib_pportdata *ppd = rcd->ppd;
 274        struct qib_ibport *ibp = &ppd->ibport_data;
 275        struct ib_header *hdr = rhdr;
 276        struct qib_devdata *dd = ppd->dd;
 277        struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 278        struct ib_other_headers *ohdr;
 279        struct rvt_qp *qp;
 280        u32 qp_num;
 281        int lnh;
 282        u8 opcode;
 283        u16 lid;
 284
 285        /* 24 == LRH+BTH+CRC */
 286        if (unlikely(tlen < 24))
 287                goto drop;
 288
 289        /* Check for a valid destination LID (see ch. 7.11.1). */
 290        lid = be16_to_cpu(hdr->lrh[1]);
 291        if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 292                lid &= ~((1 << ppd->lmc) - 1);
 293                if (unlikely(lid != ppd->lid))
 294                        goto drop;
 295        }
 296
 297        /* Check for GRH */
 298        lnh = be16_to_cpu(hdr->lrh[0]) & 3;
 299        if (lnh == QIB_LRH_BTH)
 300                ohdr = &hdr->u.oth;
 301        else if (lnh == QIB_LRH_GRH) {
 302                u32 vtf;
 303
 304                ohdr = &hdr->u.l.oth;
 305                if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
 306                        goto drop;
 307                vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
 308                if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
 309                        goto drop;
 310        } else
 311                goto drop;
 312
 313        opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
 314#ifdef CONFIG_DEBUG_FS
 315        rcd->opstats->stats[opcode].n_bytes += tlen;
 316        rcd->opstats->stats[opcode].n_packets++;
 317#endif
 318
 319        /* Get the destination QP number. */
 320        qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
 321        if (qp_num == QIB_MULTICAST_QPN) {
 322                struct rvt_mcast *mcast;
 323                struct rvt_mcast_qp *p;
 324
 325                if (lnh != QIB_LRH_GRH)
 326                        goto drop;
 327                mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid);
 328                if (mcast == NULL)
 329                        goto drop;
 330                this_cpu_inc(ibp->pmastats->n_multicast_rcv);
 331                rcu_read_lock();
 332                list_for_each_entry_rcu(p, &mcast->qp_list, list)
 333                        qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
 334                rcu_read_unlock();
 335                /*
 336                 * Notify rvt_multicast_detach() if it is waiting for us
 337                 * to finish.
 338                 */
 339                if (atomic_dec_return(&mcast->refcount) <= 1)
 340                        wake_up(&mcast->wait);
 341        } else {
 342                rcu_read_lock();
 343                qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
 344                if (!qp) {
 345                        rcu_read_unlock();
 346                        goto drop;
 347                }
 348                this_cpu_inc(ibp->pmastats->n_unicast_rcv);
 349                qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
 350                rcu_read_unlock();
 351        }
 352        return;
 353
 354drop:
 355        ibp->rvp.n_pkt_drops++;
 356}
 357
 358/*
 359 * This is called from a timer to check for QPs
 360 * which need kernel memory in order to send a packet.
 361 */
 362static void mem_timer(struct timer_list *t)
 363{
 364        struct qib_ibdev *dev = from_timer(dev, t, mem_timer);
 365        struct list_head *list = &dev->memwait;
 366        struct rvt_qp *qp = NULL;
 367        struct qib_qp_priv *priv = NULL;
 368        unsigned long flags;
 369
 370        spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 371        if (!list_empty(list)) {
 372                priv = list_entry(list->next, struct qib_qp_priv, iowait);
 373                qp = priv->owner;
 374                list_del_init(&priv->iowait);
 375                rvt_get_qp(qp);
 376                if (!list_empty(list))
 377                        mod_timer(&dev->mem_timer, jiffies + 1);
 378        }
 379        spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 380
 381        if (qp) {
 382                spin_lock_irqsave(&qp->s_lock, flags);
 383                if (qp->s_flags & RVT_S_WAIT_KMEM) {
 384                        qp->s_flags &= ~RVT_S_WAIT_KMEM;
 385                        qib_schedule_send(qp);
 386                }
 387                spin_unlock_irqrestore(&qp->s_lock, flags);
 388                rvt_put_qp(qp);
 389        }
 390}
 391
 392#ifdef __LITTLE_ENDIAN
 393static inline u32 get_upper_bits(u32 data, u32 shift)
 394{
 395        return data >> shift;
 396}
 397
 398static inline u32 set_upper_bits(u32 data, u32 shift)
 399{
 400        return data << shift;
 401}
 402
 403static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 404{
 405        data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
 406        data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
 407        return data;
 408}
 409#else
 410static inline u32 get_upper_bits(u32 data, u32 shift)
 411{
 412        return data << shift;
 413}
 414
 415static inline u32 set_upper_bits(u32 data, u32 shift)
 416{
 417        return data >> shift;
 418}
 419
 420static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 421{
 422        data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
 423        data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
 424        return data;
 425}
 426#endif
 427
 428static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
 429                    u32 length, unsigned flush_wc)
 430{
 431        u32 extra = 0;
 432        u32 data = 0;
 433        u32 last;
 434
 435        while (1) {
 436                u32 len = rvt_get_sge_length(&ss->sge, length);
 437                u32 off;
 438
 439                /* If the source address is not aligned, try to align it. */
 440                off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
 441                if (off) {
 442                        u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
 443                                            ~(sizeof(u32) - 1));
 444                        u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
 445                        u32 y;
 446
 447                        y = sizeof(u32) - off;
 448                        if (len > y)
 449                                len = y;
 450                        if (len + extra >= sizeof(u32)) {
 451                                data |= set_upper_bits(v, extra *
 452                                                       BITS_PER_BYTE);
 453                                len = sizeof(u32) - extra;
 454                                if (len == length) {
 455                                        last = data;
 456                                        break;
 457                                }
 458                                __raw_writel(data, piobuf);
 459                                piobuf++;
 460                                extra = 0;
 461                                data = 0;
 462                        } else {
 463                                /* Clear unused upper bytes */
 464                                data |= clear_upper_bytes(v, len, extra);
 465                                if (len == length) {
 466                                        last = data;
 467                                        break;
 468                                }
 469                                extra += len;
 470                        }
 471                } else if (extra) {
 472                        /* Source address is aligned. */
 473                        u32 *addr = (u32 *) ss->sge.vaddr;
 474                        int shift = extra * BITS_PER_BYTE;
 475                        int ushift = 32 - shift;
 476                        u32 l = len;
 477
 478                        while (l >= sizeof(u32)) {
 479                                u32 v = *addr;
 480
 481                                data |= set_upper_bits(v, shift);
 482                                __raw_writel(data, piobuf);
 483                                data = get_upper_bits(v, ushift);
 484                                piobuf++;
 485                                addr++;
 486                                l -= sizeof(u32);
 487                        }
 488                        /*
 489                         * We still have 'extra' number of bytes leftover.
 490                         */
 491                        if (l) {
 492                                u32 v = *addr;
 493
 494                                if (l + extra >= sizeof(u32)) {
 495                                        data |= set_upper_bits(v, shift);
 496                                        len -= l + extra - sizeof(u32);
 497                                        if (len == length) {
 498                                                last = data;
 499                                                break;
 500                                        }
 501                                        __raw_writel(data, piobuf);
 502                                        piobuf++;
 503                                        extra = 0;
 504                                        data = 0;
 505                                } else {
 506                                        /* Clear unused upper bytes */
 507                                        data |= clear_upper_bytes(v, l, extra);
 508                                        if (len == length) {
 509                                                last = data;
 510                                                break;
 511                                        }
 512                                        extra += l;
 513                                }
 514                        } else if (len == length) {
 515                                last = data;
 516                                break;
 517                        }
 518                } else if (len == length) {
 519                        u32 w;
 520
 521                        /*
 522                         * Need to round up for the last dword in the
 523                         * packet.
 524                         */
 525                        w = (len + 3) >> 2;
 526                        qib_pio_copy(piobuf, ss->sge.vaddr, w - 1);
 527                        piobuf += w - 1;
 528                        last = ((u32 *) ss->sge.vaddr)[w - 1];
 529                        break;
 530                } else {
 531                        u32 w = len >> 2;
 532
 533                        qib_pio_copy(piobuf, ss->sge.vaddr, w);
 534                        piobuf += w;
 535
 536                        extra = len & (sizeof(u32) - 1);
 537                        if (extra) {
 538                                u32 v = ((u32 *) ss->sge.vaddr)[w];
 539
 540                                /* Clear unused upper bytes */
 541                                data = clear_upper_bytes(v, extra, 0);
 542                        }
 543                }
 544                rvt_update_sge(ss, len, false);
 545                length -= len;
 546        }
 547        /* Update address before sending packet. */
 548        rvt_update_sge(ss, length, false);
 549        if (flush_wc) {
 550                /* must flush early everything before trigger word */
 551                qib_flush_wc();
 552                __raw_writel(last, piobuf);
 553                /* be sure trigger word is written */
 554                qib_flush_wc();
 555        } else
 556                __raw_writel(last, piobuf);
 557}
 558
 559static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev,
 560                                           struct rvt_qp *qp)
 561{
 562        struct qib_qp_priv *priv = qp->priv;
 563        struct qib_verbs_txreq *tx;
 564        unsigned long flags;
 565
 566        spin_lock_irqsave(&qp->s_lock, flags);
 567        spin_lock(&dev->rdi.pending_lock);
 568
 569        if (!list_empty(&dev->txreq_free)) {
 570                struct list_head *l = dev->txreq_free.next;
 571
 572                list_del(l);
 573                spin_unlock(&dev->rdi.pending_lock);
 574                spin_unlock_irqrestore(&qp->s_lock, flags);
 575                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 576        } else {
 577                if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK &&
 578                    list_empty(&priv->iowait)) {
 579                        dev->n_txwait++;
 580                        qp->s_flags |= RVT_S_WAIT_TX;
 581                        list_add_tail(&priv->iowait, &dev->txwait);
 582                }
 583                qp->s_flags &= ~RVT_S_BUSY;
 584                spin_unlock(&dev->rdi.pending_lock);
 585                spin_unlock_irqrestore(&qp->s_lock, flags);
 586                tx = ERR_PTR(-EBUSY);
 587        }
 588        return tx;
 589}
 590
 591static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
 592                                         struct rvt_qp *qp)
 593{
 594        struct qib_verbs_txreq *tx;
 595        unsigned long flags;
 596
 597        spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 598        /* assume the list non empty */
 599        if (likely(!list_empty(&dev->txreq_free))) {
 600                struct list_head *l = dev->txreq_free.next;
 601
 602                list_del(l);
 603                spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 604                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 605        } else {
 606                /* call slow path to get the extra lock */
 607                spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 608                tx =  __get_txreq(dev, qp);
 609        }
 610        return tx;
 611}
 612
 613void qib_put_txreq(struct qib_verbs_txreq *tx)
 614{
 615        struct qib_ibdev *dev;
 616        struct rvt_qp *qp;
 617        struct qib_qp_priv *priv;
 618        unsigned long flags;
 619
 620        qp = tx->qp;
 621        dev = to_idev(qp->ibqp.device);
 622
 623        if (tx->mr) {
 624                rvt_put_mr(tx->mr);
 625                tx->mr = NULL;
 626        }
 627        if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
 628                tx->txreq.flags &= ~QIB_SDMA_TXREQ_F_FREEBUF;
 629                dma_unmap_single(&dd_from_dev(dev)->pcidev->dev,
 630                                 tx->txreq.addr, tx->hdr_dwords << 2,
 631                                 DMA_TO_DEVICE);
 632                kfree(tx->align_buf);
 633        }
 634
 635        spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 636
 637        /* Put struct back on free list */
 638        list_add(&tx->txreq.list, &dev->txreq_free);
 639
 640        if (!list_empty(&dev->txwait)) {
 641                /* Wake up first QP wanting a free struct */
 642                priv = list_entry(dev->txwait.next, struct qib_qp_priv,
 643                                  iowait);
 644                qp = priv->owner;
 645                list_del_init(&priv->iowait);
 646                rvt_get_qp(qp);
 647                spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 648
 649                spin_lock_irqsave(&qp->s_lock, flags);
 650                if (qp->s_flags & RVT_S_WAIT_TX) {
 651                        qp->s_flags &= ~RVT_S_WAIT_TX;
 652                        qib_schedule_send(qp);
 653                }
 654                spin_unlock_irqrestore(&qp->s_lock, flags);
 655
 656                rvt_put_qp(qp);
 657        } else
 658                spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 659}
 660
 661/*
 662 * This is called when there are send DMA descriptors that might be
 663 * available.
 664 *
 665 * This is called with ppd->sdma_lock held.
 666 */
 667void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
 668{
 669        struct rvt_qp *qp;
 670        struct qib_qp_priv *qpp, *nqpp;
 671        struct rvt_qp *qps[20];
 672        struct qib_ibdev *dev;
 673        unsigned i, n;
 674
 675        n = 0;
 676        dev = &ppd->dd->verbs_dev;
 677        spin_lock(&dev->rdi.pending_lock);
 678
 679        /* Search wait list for first QP wanting DMA descriptors. */
 680        list_for_each_entry_safe(qpp, nqpp, &dev->dmawait, iowait) {
 681                qp = qpp->owner;
 682                if (qp->port_num != ppd->port)
 683                        continue;
 684                if (n == ARRAY_SIZE(qps))
 685                        break;
 686                if (qpp->s_tx->txreq.sg_count > avail)
 687                        break;
 688                avail -= qpp->s_tx->txreq.sg_count;
 689                list_del_init(&qpp->iowait);
 690                rvt_get_qp(qp);
 691                qps[n++] = qp;
 692        }
 693
 694        spin_unlock(&dev->rdi.pending_lock);
 695
 696        for (i = 0; i < n; i++) {
 697                qp = qps[i];
 698                spin_lock(&qp->s_lock);
 699                if (qp->s_flags & RVT_S_WAIT_DMA_DESC) {
 700                        qp->s_flags &= ~RVT_S_WAIT_DMA_DESC;
 701                        qib_schedule_send(qp);
 702                }
 703                spin_unlock(&qp->s_lock);
 704                rvt_put_qp(qp);
 705        }
 706}
 707
 708/*
 709 * This is called with ppd->sdma_lock held.
 710 */
 711static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
 712{
 713        struct qib_verbs_txreq *tx =
 714                container_of(cookie, struct qib_verbs_txreq, txreq);
 715        struct rvt_qp *qp = tx->qp;
 716        struct qib_qp_priv *priv = qp->priv;
 717
 718        spin_lock(&qp->s_lock);
 719        if (tx->wqe)
 720                rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
 721        else if (qp->ibqp.qp_type == IB_QPT_RC) {
 722                struct ib_header *hdr;
 723
 724                if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF)
 725                        hdr = &tx->align_buf->hdr;
 726                else {
 727                        struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 728
 729                        hdr = &dev->pio_hdrs[tx->hdr_inx].hdr;
 730                }
 731                qib_rc_send_complete(qp, hdr);
 732        }
 733        if (atomic_dec_and_test(&priv->s_dma_busy)) {
 734                if (qp->state == IB_QPS_RESET)
 735                        wake_up(&priv->wait_dma);
 736                else if (qp->s_flags & RVT_S_WAIT_DMA) {
 737                        qp->s_flags &= ~RVT_S_WAIT_DMA;
 738                        qib_schedule_send(qp);
 739                }
 740        }
 741        spin_unlock(&qp->s_lock);
 742
 743        qib_put_txreq(tx);
 744}
 745
 746static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp)
 747{
 748        struct qib_qp_priv *priv = qp->priv;
 749        unsigned long flags;
 750        int ret = 0;
 751
 752        spin_lock_irqsave(&qp->s_lock, flags);
 753        if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 754                spin_lock(&dev->rdi.pending_lock);
 755                if (list_empty(&priv->iowait)) {
 756                        if (list_empty(&dev->memwait))
 757                                mod_timer(&dev->mem_timer, jiffies + 1);
 758                        qp->s_flags |= RVT_S_WAIT_KMEM;
 759                        list_add_tail(&priv->iowait, &dev->memwait);
 760                }
 761                spin_unlock(&dev->rdi.pending_lock);
 762                qp->s_flags &= ~RVT_S_BUSY;
 763                ret = -EBUSY;
 764        }
 765        spin_unlock_irqrestore(&qp->s_lock, flags);
 766
 767        return ret;
 768}
 769
 770static int qib_verbs_send_dma(struct rvt_qp *qp, struct ib_header *hdr,
 771                              u32 hdrwords, struct rvt_sge_state *ss, u32 len,
 772                              u32 plen, u32 dwords)
 773{
 774        struct qib_qp_priv *priv = qp->priv;
 775        struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 776        struct qib_devdata *dd = dd_from_dev(dev);
 777        struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 778        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 779        struct qib_verbs_txreq *tx;
 780        struct qib_pio_header *phdr;
 781        u32 control;
 782        u32 ndesc;
 783        int ret;
 784
 785        tx = priv->s_tx;
 786        if (tx) {
 787                priv->s_tx = NULL;
 788                /* resend previously constructed packet */
 789                ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx);
 790                goto bail;
 791        }
 792
 793        tx = get_txreq(dev, qp);
 794        if (IS_ERR(tx))
 795                goto bail_tx;
 796
 797        control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
 798                                       be16_to_cpu(hdr->lrh[0]) >> 12);
 799        tx->qp = qp;
 800        tx->wqe = qp->s_wqe;
 801        tx->mr = qp->s_rdma_mr;
 802        if (qp->s_rdma_mr)
 803                qp->s_rdma_mr = NULL;
 804        tx->txreq.callback = sdma_complete;
 805        if (dd->flags & QIB_HAS_SDMA_TIMEOUT)
 806                tx->txreq.flags = QIB_SDMA_TXREQ_F_HEADTOHOST;
 807        else
 808                tx->txreq.flags = QIB_SDMA_TXREQ_F_INTREQ;
 809        if (plen + 1 > dd->piosize2kmax_dwords)
 810                tx->txreq.flags |= QIB_SDMA_TXREQ_F_USELARGEBUF;
 811
 812        if (len) {
 813                /*
 814                 * Don't try to DMA if it takes more descriptors than
 815                 * the queue holds.
 816                 */
 817                ndesc = qib_count_sge(ss, len);
 818                if (ndesc >= ppd->sdma_descq_cnt)
 819                        ndesc = 0;
 820        } else
 821                ndesc = 1;
 822        if (ndesc) {
 823                phdr = &dev->pio_hdrs[tx->hdr_inx];
 824                phdr->pbc[0] = cpu_to_le32(plen);
 825                phdr->pbc[1] = cpu_to_le32(control);
 826                memcpy(&phdr->hdr, hdr, hdrwords << 2);
 827                tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEDESC;
 828                tx->txreq.sg_count = ndesc;
 829                tx->txreq.addr = dev->pio_hdrs_phys +
 830                        tx->hdr_inx * sizeof(struct qib_pio_header);
 831                tx->hdr_dwords = hdrwords + 2; /* add PBC length */
 832                ret = qib_sdma_verbs_send(ppd, ss, dwords, tx);
 833                goto bail;
 834        }
 835
 836        /* Allocate a buffer and copy the header and payload to it. */
 837        tx->hdr_dwords = plen + 1;
 838        phdr = kmalloc(tx->hdr_dwords << 2, GFP_ATOMIC);
 839        if (!phdr)
 840                goto err_tx;
 841        phdr->pbc[0] = cpu_to_le32(plen);
 842        phdr->pbc[1] = cpu_to_le32(control);
 843        memcpy(&phdr->hdr, hdr, hdrwords << 2);
 844        qib_copy_from_sge((u32 *) &phdr->hdr + hdrwords, ss, len);
 845
 846        tx->txreq.addr = dma_map_single(&dd->pcidev->dev, phdr,
 847                                        tx->hdr_dwords << 2, DMA_TO_DEVICE);
 848        if (dma_mapping_error(&dd->pcidev->dev, tx->txreq.addr))
 849                goto map_err;
 850        tx->align_buf = phdr;
 851        tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEBUF;
 852        tx->txreq.sg_count = 1;
 853        ret = qib_sdma_verbs_send(ppd, NULL, 0, tx);
 854        goto unaligned;
 855
 856map_err:
 857        kfree(phdr);
 858err_tx:
 859        qib_put_txreq(tx);
 860        ret = wait_kmem(dev, qp);
 861unaligned:
 862        ibp->rvp.n_unaligned++;
 863bail:
 864        return ret;
 865bail_tx:
 866        ret = PTR_ERR(tx);
 867        goto bail;
 868}
 869
 870/*
 871 * If we are now in the error state, return zero to flush the
 872 * send work request.
 873 */
 874static int no_bufs_available(struct rvt_qp *qp)
 875{
 876        struct qib_qp_priv *priv = qp->priv;
 877        struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 878        struct qib_devdata *dd;
 879        unsigned long flags;
 880        int ret = 0;
 881
 882        /*
 883         * Note that as soon as want_buffer() is called and
 884         * possibly before it returns, qib_ib_piobufavail()
 885         * could be called. Therefore, put QP on the I/O wait list before
 886         * enabling the PIO avail interrupt.
 887         */
 888        spin_lock_irqsave(&qp->s_lock, flags);
 889        if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 890                spin_lock(&dev->rdi.pending_lock);
 891                if (list_empty(&priv->iowait)) {
 892                        dev->n_piowait++;
 893                        qp->s_flags |= RVT_S_WAIT_PIO;
 894                        list_add_tail(&priv->iowait, &dev->piowait);
 895                        dd = dd_from_dev(dev);
 896                        dd->f_wantpiobuf_intr(dd, 1);
 897                }
 898                spin_unlock(&dev->rdi.pending_lock);
 899                qp->s_flags &= ~RVT_S_BUSY;
 900                ret = -EBUSY;
 901        }
 902        spin_unlock_irqrestore(&qp->s_lock, flags);
 903        return ret;
 904}
 905
 906static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr,
 907                              u32 hdrwords, struct rvt_sge_state *ss, u32 len,
 908                              u32 plen, u32 dwords)
 909{
 910        struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 911        struct qib_pportdata *ppd = dd->pport + qp->port_num - 1;
 912        u32 *hdr = (u32 *) ibhdr;
 913        u32 __iomem *piobuf_orig;
 914        u32 __iomem *piobuf;
 915        u64 pbc;
 916        unsigned long flags;
 917        unsigned flush_wc;
 918        u32 control;
 919        u32 pbufn;
 920
 921        control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
 922                be16_to_cpu(ibhdr->lrh[0]) >> 12);
 923        pbc = ((u64) control << 32) | plen;
 924        piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn);
 925        if (unlikely(piobuf == NULL))
 926                return no_bufs_available(qp);
 927
 928        /*
 929         * Write the pbc.
 930         * We have to flush after the PBC for correctness on some cpus
 931         * or WC buffer can be written out of order.
 932         */
 933        writeq(pbc, piobuf);
 934        piobuf_orig = piobuf;
 935        piobuf += 2;
 936
 937        flush_wc = dd->flags & QIB_PIO_FLUSH_WC;
 938        if (len == 0) {
 939                /*
 940                 * If there is just the header portion, must flush before
 941                 * writing last word of header for correctness, and after
 942                 * the last header word (trigger word).
 943                 */
 944                if (flush_wc) {
 945                        qib_flush_wc();
 946                        qib_pio_copy(piobuf, hdr, hdrwords - 1);
 947                        qib_flush_wc();
 948                        __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
 949                        qib_flush_wc();
 950                } else
 951                        qib_pio_copy(piobuf, hdr, hdrwords);
 952                goto done;
 953        }
 954
 955        if (flush_wc)
 956                qib_flush_wc();
 957        qib_pio_copy(piobuf, hdr, hdrwords);
 958        piobuf += hdrwords;
 959
 960        /* The common case is aligned and contained in one segment. */
 961        if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
 962                   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
 963                u32 *addr = (u32 *) ss->sge.vaddr;
 964
 965                /* Update address before sending packet. */
 966                rvt_update_sge(ss, len, false);
 967                if (flush_wc) {
 968                        qib_pio_copy(piobuf, addr, dwords - 1);
 969                        /* must flush early everything before trigger word */
 970                        qib_flush_wc();
 971                        __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
 972                        /* be sure trigger word is written */
 973                        qib_flush_wc();
 974                } else
 975                        qib_pio_copy(piobuf, addr, dwords);
 976                goto done;
 977        }
 978        copy_io(piobuf, ss, len, flush_wc);
 979done:
 980        if (dd->flags & QIB_USE_SPCL_TRIG) {
 981                u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
 982
 983                qib_flush_wc();
 984                __raw_writel(0xaebecede, piobuf_orig + spcl_off);
 985        }
 986        qib_sendbuf_done(dd, pbufn);
 987        if (qp->s_rdma_mr) {
 988                rvt_put_mr(qp->s_rdma_mr);
 989                qp->s_rdma_mr = NULL;
 990        }
 991        if (qp->s_wqe) {
 992                spin_lock_irqsave(&qp->s_lock, flags);
 993                rvt_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
 994                spin_unlock_irqrestore(&qp->s_lock, flags);
 995        } else if (qp->ibqp.qp_type == IB_QPT_RC) {
 996                spin_lock_irqsave(&qp->s_lock, flags);
 997                qib_rc_send_complete(qp, ibhdr);
 998                spin_unlock_irqrestore(&qp->s_lock, flags);
 999        }
1000        return 0;
1001}
1002
1003/**
1004 * qib_verbs_send - send a packet
1005 * @qp: the QP to send on
1006 * @hdr: the packet header
1007 * @hdrwords: the number of 32-bit words in the header
1008 * @ss: the SGE to send
1009 * @len: the length of the packet in bytes
1010 *
1011 * Return zero if packet is sent or queued OK.
1012 * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
1013 */
1014int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
1015                   u32 hdrwords, struct rvt_sge_state *ss, u32 len)
1016{
1017        struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
1018        u32 plen;
1019        int ret;
1020        u32 dwords = (len + 3) >> 2;
1021
1022        /*
1023         * Calculate the send buffer trigger address.
1024         * The +1 counts for the pbc control dword following the pbc length.
1025         */
1026        plen = hdrwords + dwords + 1;
1027
1028        /*
1029         * VL15 packets (IB_QPT_SMI) will always use PIO, so we
1030         * can defer SDMA restart until link goes ACTIVE without
1031         * worrying about just how we got there.
1032         */
1033        if (qp->ibqp.qp_type == IB_QPT_SMI ||
1034            !(dd->flags & QIB_HAS_SEND_DMA))
1035                ret = qib_verbs_send_pio(qp, hdr, hdrwords, ss, len,
1036                                         plen, dwords);
1037        else
1038                ret = qib_verbs_send_dma(qp, hdr, hdrwords, ss, len,
1039                                         plen, dwords);
1040
1041        return ret;
1042}
1043
1044int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
1045                          u64 *rwords, u64 *spkts, u64 *rpkts,
1046                          u64 *xmit_wait)
1047{
1048        int ret;
1049        struct qib_devdata *dd = ppd->dd;
1050
1051        if (!(dd->flags & QIB_PRESENT)) {
1052                /* no hardware, freeze, etc. */
1053                ret = -EINVAL;
1054                goto bail;
1055        }
1056        *swords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDSEND);
1057        *rwords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDRCV);
1058        *spkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTSEND);
1059        *rpkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTRCV);
1060        *xmit_wait = dd->f_portcntr(ppd, QIBPORTCNTR_SENDSTALL);
1061
1062        ret = 0;
1063
1064bail:
1065        return ret;
1066}
1067
1068/**
1069 * qib_get_counters - get various chip counters
1070 * @ppd: the qlogic_ib device
1071 * @cntrs: counters are placed here
1072 *
1073 * Return the counters needed by recv_pma_get_portcounters().
1074 */
1075int qib_get_counters(struct qib_pportdata *ppd,
1076                     struct qib_verbs_counters *cntrs)
1077{
1078        int ret;
1079
1080        if (!(ppd->dd->flags & QIB_PRESENT)) {
1081                /* no hardware, freeze, etc. */
1082                ret = -EINVAL;
1083                goto bail;
1084        }
1085        cntrs->symbol_error_counter =
1086                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBSYMBOLERR);
1087        cntrs->link_error_recovery_counter =
1088                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKERRRECOV);
1089        /*
1090         * The link downed counter counts when the other side downs the
1091         * connection.  We add in the number of times we downed the link
1092         * due to local link integrity errors to compensate.
1093         */
1094        cntrs->link_downed_counter =
1095                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKDOWN);
1096        cntrs->port_rcv_errors =
1097                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXDROPPKT) +
1098                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVOVFL) +
1099                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERR_RLEN) +
1100                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_INVALIDRLEN) +
1101                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLINK) +
1102                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRICRC) +
1103                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRVCRC) +
1104                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLPCRC) +
1105                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_BADFORMAT);
1106        cntrs->port_rcv_errors +=
1107                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXLOCALPHYERR);
1108        cntrs->port_rcv_errors +=
1109                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXVLERR);
1110        cntrs->port_rcv_remphys_errors =
1111                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVEBP);
1112        cntrs->port_xmit_discards =
1113                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_UNSUPVL);
1114        cntrs->port_xmit_data = ppd->dd->f_portcntr(ppd,
1115                        QIBPORTCNTR_WORDSEND);
1116        cntrs->port_rcv_data = ppd->dd->f_portcntr(ppd,
1117                        QIBPORTCNTR_WORDRCV);
1118        cntrs->port_xmit_packets = ppd->dd->f_portcntr(ppd,
1119                        QIBPORTCNTR_PKTSEND);
1120        cntrs->port_rcv_packets = ppd->dd->f_portcntr(ppd,
1121                        QIBPORTCNTR_PKTRCV);
1122        cntrs->local_link_integrity_errors =
1123                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_LLI);
1124        cntrs->excessive_buffer_overrun_errors =
1125                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_EXCESSBUFOVFL);
1126        cntrs->vl15_dropped =
1127                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_VL15PKTDROP);
1128
1129        ret = 0;
1130
1131bail:
1132        return ret;
1133}
1134
1135/**
1136 * qib_ib_piobufavail - callback when a PIO buffer is available
1137 * @dd: the device pointer
1138 *
1139 * This is called from qib_intr() at interrupt level when a PIO buffer is
1140 * available after qib_verbs_send() returned an error that no buffers were
1141 * available. Disable the interrupt if there are no more QPs waiting.
1142 */
1143void qib_ib_piobufavail(struct qib_devdata *dd)
1144{
1145        struct qib_ibdev *dev = &dd->verbs_dev;
1146        struct list_head *list;
1147        struct rvt_qp *qps[5];
1148        struct rvt_qp *qp;
1149        unsigned long flags;
1150        unsigned i, n;
1151        struct qib_qp_priv *priv;
1152
1153        list = &dev->piowait;
1154        n = 0;
1155
1156        /*
1157         * Note: checking that the piowait list is empty and clearing
1158         * the buffer available interrupt needs to be atomic or we
1159         * could end up with QPs on the wait list with the interrupt
1160         * disabled.
1161         */
1162        spin_lock_irqsave(&dev->rdi.pending_lock, flags);
1163        while (!list_empty(list)) {
1164                if (n == ARRAY_SIZE(qps))
1165                        goto full;
1166                priv = list_entry(list->next, struct qib_qp_priv, iowait);
1167                qp = priv->owner;
1168                list_del_init(&priv->iowait);
1169                rvt_get_qp(qp);
1170                qps[n++] = qp;
1171        }
1172        dd->f_wantpiobuf_intr(dd, 0);
1173full:
1174        spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
1175
1176        for (i = 0; i < n; i++) {
1177                qp = qps[i];
1178
1179                spin_lock_irqsave(&qp->s_lock, flags);
1180                if (qp->s_flags & RVT_S_WAIT_PIO) {
1181                        qp->s_flags &= ~RVT_S_WAIT_PIO;
1182                        qib_schedule_send(qp);
1183                }
1184                spin_unlock_irqrestore(&qp->s_lock, flags);
1185
1186                /* Notify qib_destroy_qp() if it is waiting. */
1187                rvt_put_qp(qp);
1188        }
1189}
1190
1191static int qib_query_port(struct rvt_dev_info *rdi, u32 port_num,
1192                          struct ib_port_attr *props)
1193{
1194        struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
1195        struct qib_devdata *dd = dd_from_dev(ibdev);
1196        struct qib_pportdata *ppd = &dd->pport[port_num - 1];
1197        enum ib_mtu mtu;
1198        u16 lid = ppd->lid;
1199
1200        /* props being zeroed by the caller, avoid zeroing it here */
1201        props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
1202        props->lmc = ppd->lmc;
1203        props->state = dd->f_iblink_state(ppd->lastibcstat);
1204        props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat);
1205        props->gid_tbl_len = QIB_GUIDS_PER_PORT;
1206        props->active_width = ppd->link_width_active;
1207        /* See rate_show() */
1208        props->active_speed = ppd->link_speed_active;
1209        props->max_vl_num = qib_num_vls(ppd->vls_supported);
1210
1211        props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
1212        switch (ppd->ibmtu) {
1213        case 4096:
1214                mtu = IB_MTU_4096;
1215                break;
1216        case 2048:
1217                mtu = IB_MTU_2048;
1218                break;
1219        case 1024:
1220                mtu = IB_MTU_1024;
1221                break;
1222        case 512:
1223                mtu = IB_MTU_512;
1224                break;
1225        case 256:
1226                mtu = IB_MTU_256;
1227                break;
1228        default:
1229                mtu = IB_MTU_2048;
1230        }
1231        props->active_mtu = mtu;
1232
1233        return 0;
1234}
1235
1236static int qib_modify_device(struct ib_device *device,
1237                             int device_modify_mask,
1238                             struct ib_device_modify *device_modify)
1239{
1240        struct qib_devdata *dd = dd_from_ibdev(device);
1241        unsigned i;
1242        int ret;
1243
1244        if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
1245                                   IB_DEVICE_MODIFY_NODE_DESC)) {
1246                ret = -EOPNOTSUPP;
1247                goto bail;
1248        }
1249
1250        if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) {
1251                memcpy(device->node_desc, device_modify->node_desc,
1252                       IB_DEVICE_NODE_DESC_MAX);
1253                for (i = 0; i < dd->num_pports; i++) {
1254                        struct qib_ibport *ibp = &dd->pport[i].ibport_data;
1255
1256                        qib_node_desc_chg(ibp);
1257                }
1258        }
1259
1260        if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
1261                ib_qib_sys_image_guid =
1262                        cpu_to_be64(device_modify->sys_image_guid);
1263                for (i = 0; i < dd->num_pports; i++) {
1264                        struct qib_ibport *ibp = &dd->pport[i].ibport_data;
1265
1266                        qib_sys_guid_chg(ibp);
1267                }
1268        }
1269
1270        ret = 0;
1271
1272bail:
1273        return ret;
1274}
1275
1276static int qib_shut_down_port(struct rvt_dev_info *rdi, u32 port_num)
1277{
1278        struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
1279        struct qib_devdata *dd = dd_from_dev(ibdev);
1280        struct qib_pportdata *ppd = &dd->pport[port_num - 1];
1281
1282        qib_set_linkstate(ppd, QIB_IB_LINKDOWN);
1283
1284        return 0;
1285}
1286
1287static int qib_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
1288                           int guid_index, __be64 *guid)
1289{
1290        struct qib_ibport *ibp = container_of(rvp, struct qib_ibport, rvp);
1291        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1292
1293        if (guid_index == 0)
1294                *guid = ppd->guid;
1295        else if (guid_index < QIB_GUIDS_PER_PORT)
1296                *guid = ibp->guids[guid_index - 1];
1297        else
1298                return -EINVAL;
1299
1300        return 0;
1301}
1302
1303int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
1304{
1305        if (rdma_ah_get_sl(ah_attr) > 15)
1306                return -EINVAL;
1307
1308        if (rdma_ah_get_dlid(ah_attr) == 0)
1309                return -EINVAL;
1310        if (rdma_ah_get_dlid(ah_attr) >=
1311                be16_to_cpu(IB_MULTICAST_LID_BASE) &&
1312            rdma_ah_get_dlid(ah_attr) !=
1313                be16_to_cpu(IB_LID_PERMISSIVE) &&
1314            !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
1315                return -EINVAL;
1316
1317        return 0;
1318}
1319
1320static void qib_notify_new_ah(struct ib_device *ibdev,
1321                              struct rdma_ah_attr *ah_attr,
1322                              struct rvt_ah *ah)
1323{
1324        struct qib_ibport *ibp;
1325        struct qib_pportdata *ppd;
1326
1327        /*
1328         * Do not trust reading anything from rvt_ah at this point as it is not
1329         * done being setup. We can however modify things which we need to set.
1330         */
1331
1332        ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
1333        ppd = ppd_from_ibp(ibp);
1334        ah->vl = ibp->sl_to_vl[rdma_ah_get_sl(&ah->attr)];
1335        ah->log_pmtu = ilog2(ppd->ibmtu);
1336}
1337
1338struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
1339{
1340        struct rdma_ah_attr attr;
1341        struct ib_ah *ah = ERR_PTR(-EINVAL);
1342        struct rvt_qp *qp0;
1343        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1344        struct qib_devdata *dd = dd_from_ppd(ppd);
1345        u32 port_num = ppd->port;
1346
1347        memset(&attr, 0, sizeof(attr));
1348        attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num);
1349        rdma_ah_set_dlid(&attr, dlid);
1350        rdma_ah_set_port_num(&attr, port_num);
1351        rcu_read_lock();
1352        qp0 = rcu_dereference(ibp->rvp.qp[0]);
1353        if (qp0)
1354                ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0);
1355        rcu_read_unlock();
1356        return ah;
1357}
1358
1359/**
1360 * qib_get_npkeys - return the size of the PKEY table for context 0
1361 * @dd: the qlogic_ib device
1362 */
1363unsigned qib_get_npkeys(struct qib_devdata *dd)
1364{
1365        return ARRAY_SIZE(dd->rcd[0]->pkeys);
1366}
1367
1368/*
1369 * Return the indexed PKEY from the port PKEY table.
1370 * No need to validate rcd[ctxt]; the port is setup if we are here.
1371 */
1372unsigned qib_get_pkey(struct qib_ibport *ibp, unsigned index)
1373{
1374        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1375        struct qib_devdata *dd = ppd->dd;
1376        unsigned ctxt = ppd->hw_pidx;
1377        unsigned ret;
1378
1379        /* dd->rcd null if mini_init or some init failures */
1380        if (!dd->rcd || index >= ARRAY_SIZE(dd->rcd[ctxt]->pkeys))
1381                ret = 0;
1382        else
1383                ret = dd->rcd[ctxt]->pkeys[index];
1384
1385        return ret;
1386}
1387
1388static void init_ibport(struct qib_pportdata *ppd)
1389{
1390        struct qib_verbs_counters cntrs;
1391        struct qib_ibport *ibp = &ppd->ibport_data;
1392
1393        spin_lock_init(&ibp->rvp.lock);
1394        /* Set the prefix to the default value (see ch. 4.1.1) */
1395        ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
1396        ibp->rvp.sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
1397        ibp->rvp.port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP |
1398                IB_PORT_CLIENT_REG_SUP | IB_PORT_SL_MAP_SUP |
1399                IB_PORT_TRAP_SUP | IB_PORT_AUTO_MIGR_SUP |
1400                IB_PORT_DR_NOTICE_SUP | IB_PORT_CAP_MASK_NOTICE_SUP |
1401                IB_PORT_OTHER_LOCAL_CHANGES_SUP;
1402        if (ppd->dd->flags & QIB_HAS_LINK_LATENCY)
1403                ibp->rvp.port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
1404        ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
1405        ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
1406        ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
1407        ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
1408        ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
1409
1410        /* Snapshot current HW counters to "clear" them. */
1411        qib_get_counters(ppd, &cntrs);
1412        ibp->z_symbol_error_counter = cntrs.symbol_error_counter;
1413        ibp->z_link_error_recovery_counter =
1414                cntrs.link_error_recovery_counter;
1415        ibp->z_link_downed_counter = cntrs.link_downed_counter;
1416        ibp->z_port_rcv_errors = cntrs.port_rcv_errors;
1417        ibp->z_port_rcv_remphys_errors = cntrs.port_rcv_remphys_errors;
1418        ibp->z_port_xmit_discards = cntrs.port_xmit_discards;
1419        ibp->z_port_xmit_data = cntrs.port_xmit_data;
1420        ibp->z_port_rcv_data = cntrs.port_rcv_data;
1421        ibp->z_port_xmit_packets = cntrs.port_xmit_packets;
1422        ibp->z_port_rcv_packets = cntrs.port_rcv_packets;
1423        ibp->z_local_link_integrity_errors =
1424                cntrs.local_link_integrity_errors;
1425        ibp->z_excessive_buffer_overrun_errors =
1426                cntrs.excessive_buffer_overrun_errors;
1427        ibp->z_vl15_dropped = cntrs.vl15_dropped;
1428        RCU_INIT_POINTER(ibp->rvp.qp[0], NULL);
1429        RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
1430}
1431
1432/**
1433 * qib_fill_device_attr - Fill in rvt dev info device attributes.
1434 * @dd: the device data structure
1435 */
1436static void qib_fill_device_attr(struct qib_devdata *dd)
1437{
1438        struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
1439
1440        memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
1441
1442        rdi->dparms.props.max_pd = ib_qib_max_pds;
1443        rdi->dparms.props.max_ah = ib_qib_max_ahs;
1444        rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
1445                IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
1446                IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
1447                IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
1448        rdi->dparms.props.page_size_cap = PAGE_SIZE;
1449        rdi->dparms.props.vendor_id =
1450                QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3;
1451        rdi->dparms.props.vendor_part_id = dd->deviceid;
1452        rdi->dparms.props.hw_ver = dd->minrev;
1453        rdi->dparms.props.sys_image_guid = ib_qib_sys_image_guid;
1454        rdi->dparms.props.max_mr_size = ~0ULL;
1455        rdi->dparms.props.max_qp = ib_qib_max_qps;
1456        rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs;
1457        rdi->dparms.props.max_send_sge = ib_qib_max_sges;
1458        rdi->dparms.props.max_recv_sge = ib_qib_max_sges;
1459        rdi->dparms.props.max_sge_rd = ib_qib_max_sges;
1460        rdi->dparms.props.max_cq = ib_qib_max_cqs;
1461        rdi->dparms.props.max_cqe = ib_qib_max_cqes;
1462        rdi->dparms.props.max_ah = ib_qib_max_ahs;
1463        rdi->dparms.props.max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC;
1464        rdi->dparms.props.max_qp_init_rd_atom = 255;
1465        rdi->dparms.props.max_srq = ib_qib_max_srqs;
1466        rdi->dparms.props.max_srq_wr = ib_qib_max_srq_wrs;
1467        rdi->dparms.props.max_srq_sge = ib_qib_max_srq_sges;
1468        rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB;
1469        rdi->dparms.props.max_pkeys = qib_get_npkeys(dd);
1470        rdi->dparms.props.max_mcast_grp = ib_qib_max_mcast_grps;
1471        rdi->dparms.props.max_mcast_qp_attach = ib_qib_max_mcast_qp_attached;
1472        rdi->dparms.props.max_total_mcast_qp_attach =
1473                                        rdi->dparms.props.max_mcast_qp_attach *
1474                                        rdi->dparms.props.max_mcast_grp;
1475        /* post send table */
1476        dd->verbs_dev.rdi.post_parms = qib_post_parms;
1477
1478        /* opcode translation table */
1479        dd->verbs_dev.rdi.wc_opcode = ib_qib_wc_opcode;
1480}
1481
1482static const struct ib_device_ops qib_dev_ops = {
1483        .owner = THIS_MODULE,
1484        .driver_id = RDMA_DRIVER_QIB,
1485
1486        .port_groups = qib_attr_port_groups,
1487        .device_group = &qib_attr_group,
1488        .modify_device = qib_modify_device,
1489        .process_mad = qib_process_mad,
1490};
1491
1492/**
1493 * qib_register_ib_device - register our device with the infiniband core
1494 * @dd: the device data structure
1495 * Return the allocated qib_ibdev pointer or NULL on error.
1496 */
1497int qib_register_ib_device(struct qib_devdata *dd)
1498{
1499        struct qib_ibdev *dev = &dd->verbs_dev;
1500        struct ib_device *ibdev = &dev->rdi.ibdev;
1501        struct qib_pportdata *ppd = dd->pport;
1502        unsigned i, ctxt;
1503        int ret;
1504
1505        for (i = 0; i < dd->num_pports; i++)
1506                init_ibport(ppd + i);
1507
1508        /* Only need to initialize non-zero fields. */
1509        timer_setup(&dev->mem_timer, mem_timer, 0);
1510
1511        INIT_LIST_HEAD(&dev->piowait);
1512        INIT_LIST_HEAD(&dev->dmawait);
1513        INIT_LIST_HEAD(&dev->txwait);
1514        INIT_LIST_HEAD(&dev->memwait);
1515        INIT_LIST_HEAD(&dev->txreq_free);
1516
1517        if (ppd->sdma_descq_cnt) {
1518                dev->pio_hdrs = dma_alloc_coherent(&dd->pcidev->dev,
1519                                                ppd->sdma_descq_cnt *
1520                                                sizeof(struct qib_pio_header),
1521                                                &dev->pio_hdrs_phys,
1522                                                GFP_KERNEL);
1523                if (!dev->pio_hdrs) {
1524                        ret = -ENOMEM;
1525                        goto err_hdrs;
1526                }
1527        }
1528
1529        for (i = 0; i < ppd->sdma_descq_cnt; i++) {
1530                struct qib_verbs_txreq *tx;
1531
1532                tx = kzalloc(sizeof(*tx), GFP_KERNEL);
1533                if (!tx) {
1534                        ret = -ENOMEM;
1535                        goto err_tx;
1536                }
1537                tx->hdr_inx = i;
1538                list_add(&tx->txreq.list, &dev->txreq_free);
1539        }
1540
1541        /*
1542         * The system image GUID is supposed to be the same for all
1543         * IB HCAs in a single system but since there can be other
1544         * device types in the system, we can't be sure this is unique.
1545         */
1546        if (!ib_qib_sys_image_guid)
1547                ib_qib_sys_image_guid = ppd->guid;
1548
1549        ibdev->node_guid = ppd->guid;
1550        ibdev->phys_port_cnt = dd->num_pports;
1551        ibdev->dev.parent = &dd->pcidev->dev;
1552
1553        snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
1554                 "Intel Infiniband HCA %s", init_utsname()->nodename);
1555
1556        /*
1557         * Fill in rvt info object.
1558         */
1559        dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
1560        dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
1561        dd->verbs_dev.rdi.driver_f.setup_wqe = qib_check_send_wqe;
1562        dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah;
1563        dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn;
1564        dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc;
1565        dd->verbs_dev.rdi.driver_f.qp_priv_free = qib_qp_priv_free;
1566        dd->verbs_dev.rdi.driver_f.free_all_qps = qib_free_all_qps;
1567        dd->verbs_dev.rdi.driver_f.notify_qp_reset = qib_notify_qp_reset;
1568        dd->verbs_dev.rdi.driver_f.do_send = qib_do_send;
1569        dd->verbs_dev.rdi.driver_f.schedule_send = qib_schedule_send;
1570        dd->verbs_dev.rdi.driver_f.quiesce_qp = qib_quiesce_qp;
1571        dd->verbs_dev.rdi.driver_f.stop_send_queue = qib_stop_send_queue;
1572        dd->verbs_dev.rdi.driver_f.flush_qp_waiters = qib_flush_qp_waiters;
1573        dd->verbs_dev.rdi.driver_f.notify_error_qp = qib_notify_error_qp;
1574        dd->verbs_dev.rdi.driver_f.notify_restart_rc = qib_restart_rc;
1575        dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu;
1576        dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp;
1577        dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr;
1578        dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _qib_schedule_send;
1579        dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port;
1580        dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port;
1581        dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg;
1582        dd->verbs_dev.rdi.driver_f.notify_create_mad_agent =
1583                                                qib_notify_create_mad_agent;
1584        dd->verbs_dev.rdi.driver_f.notify_free_mad_agent =
1585                                                qib_notify_free_mad_agent;
1586
1587        dd->verbs_dev.rdi.dparms.max_rdma_atomic = QIB_MAX_RDMA_ATOMIC;
1588        dd->verbs_dev.rdi.driver_f.get_guid_be = qib_get_guid_be;
1589        dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size;
1590        dd->verbs_dev.rdi.dparms.qp_table_size = ib_qib_qp_table_size;
1591        dd->verbs_dev.rdi.dparms.qpn_start = 1;
1592        dd->verbs_dev.rdi.dparms.qpn_res_start = QIB_KD_QP;
1593        dd->verbs_dev.rdi.dparms.qpn_res_end = QIB_KD_QP; /* Reserve one QP */
1594        dd->verbs_dev.rdi.dparms.qpn_inc = 1;
1595        dd->verbs_dev.rdi.dparms.qos_shift = 1;
1596        dd->verbs_dev.rdi.dparms.psn_mask = QIB_PSN_MASK;
1597        dd->verbs_dev.rdi.dparms.psn_shift = QIB_PSN_SHIFT;
1598        dd->verbs_dev.rdi.dparms.psn_modify_mask = QIB_PSN_MASK;
1599        dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
1600        dd->verbs_dev.rdi.dparms.npkeys = qib_get_npkeys(dd);
1601        dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id;
1602        dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB;
1603        dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE;
1604        dd->verbs_dev.rdi.dparms.sge_copy_mode = RVT_SGE_COPY_MEMCPY;
1605
1606        qib_fill_device_attr(dd);
1607
1608        ppd = dd->pport;
1609        for (i = 0; i < dd->num_pports; i++, ppd++) {
1610                ctxt = ppd->hw_pidx;
1611                rvt_init_port(&dd->verbs_dev.rdi,
1612                              &ppd->ibport_data.rvp,
1613                              i,
1614                              dd->rcd[ctxt]->pkeys);
1615        }
1616
1617        ib_set_device_ops(ibdev, &qib_dev_ops);
1618        ret = rvt_register_device(&dd->verbs_dev.rdi);
1619        if (ret)
1620                goto err_tx;
1621
1622        return ret;
1623
1624err_tx:
1625        while (!list_empty(&dev->txreq_free)) {
1626                struct list_head *l = dev->txreq_free.next;
1627                struct qib_verbs_txreq *tx;
1628
1629                list_del(l);
1630                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
1631                kfree(tx);
1632        }
1633        if (ppd->sdma_descq_cnt)
1634                dma_free_coherent(&dd->pcidev->dev,
1635                                  ppd->sdma_descq_cnt *
1636                                        sizeof(struct qib_pio_header),
1637                                  dev->pio_hdrs, dev->pio_hdrs_phys);
1638err_hdrs:
1639        qib_dev_err(dd, "cannot register verbs: %d!\n", -ret);
1640        return ret;
1641}
1642
1643void qib_unregister_ib_device(struct qib_devdata *dd)
1644{
1645        struct qib_ibdev *dev = &dd->verbs_dev;
1646
1647        rvt_unregister_device(&dd->verbs_dev.rdi);
1648
1649        if (!list_empty(&dev->piowait))
1650                qib_dev_err(dd, "piowait list not empty!\n");
1651        if (!list_empty(&dev->dmawait))
1652                qib_dev_err(dd, "dmawait list not empty!\n");
1653        if (!list_empty(&dev->txwait))
1654                qib_dev_err(dd, "txwait list not empty!\n");
1655        if (!list_empty(&dev->memwait))
1656                qib_dev_err(dd, "memwait list not empty!\n");
1657
1658        del_timer_sync(&dev->mem_timer);
1659        while (!list_empty(&dev->txreq_free)) {
1660                struct list_head *l = dev->txreq_free.next;
1661                struct qib_verbs_txreq *tx;
1662
1663                list_del(l);
1664                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
1665                kfree(tx);
1666        }
1667        if (dd->pport->sdma_descq_cnt)
1668                dma_free_coherent(&dd->pcidev->dev,
1669                                  dd->pport->sdma_descq_cnt *
1670                                        sizeof(struct qib_pio_header),
1671                                  dev->pio_hdrs, dev->pio_hdrs_phys);
1672}
1673
1674/**
1675 * _qib_schedule_send - schedule progress
1676 * @qp: the qp
1677 *
1678 * This schedules progress w/o regard to the s_flags.
1679 *
1680 * It is only used in post send, which doesn't hold
1681 * the s_lock.
1682 */
1683bool _qib_schedule_send(struct rvt_qp *qp)
1684{
1685        struct qib_ibport *ibp =
1686                to_iport(qp->ibqp.device, qp->port_num);
1687        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1688        struct qib_qp_priv *priv = qp->priv;
1689
1690        return queue_work(ppd->qib_wq, &priv->s_work);
1691}
1692
1693/**
1694 * qib_schedule_send - schedule progress
1695 * @qp: the qp
1696 *
1697 * This schedules qp progress.  The s_lock
1698 * should be held.
1699 */
1700bool qib_schedule_send(struct rvt_qp *qp)
1701{
1702        if (qib_send_ok(qp))
1703                return _qib_schedule_send(qp);
1704        return false;
1705}
1706