linux/drivers/infiniband/hw/qib/qib_verbs.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2012, 2013 Intel Corporation.  All rights reserved.
   3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
   4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
   5 *
   6 * This software is available to you under a choice of one of two
   7 * licenses.  You may choose to be licensed under the terms of the GNU
   8 * General Public License (GPL) Version 2, available from the file
   9 * COPYING in the main directory of this source tree, or the
  10 * OpenIB.org BSD license below:
  11 *
  12 *     Redistribution and use in source and binary forms, with or
  13 *     without modification, are permitted provided that the following
  14 *     conditions are met:
  15 *
  16 *      - Redistributions of source code must retain the above
  17 *        copyright notice, this list of conditions and the following
  18 *        disclaimer.
  19 *
  20 *      - Redistributions in binary form must reproduce the above
  21 *        copyright notice, this list of conditions and the following
  22 *        disclaimer in the documentation and/or other materials
  23 *        provided with the distribution.
  24 *
  25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  32 * SOFTWARE.
  33 */
  34
  35#include <rdma/ib_mad.h>
  36#include <rdma/ib_user_verbs.h>
  37#include <linux/io.h>
  38#include <linux/module.h>
  39#include <linux/utsname.h>
  40#include <linux/rculist.h>
  41#include <linux/mm.h>
  42#include <linux/random.h>
  43#include <linux/vmalloc.h>
  44#include <rdma/rdma_vt.h>
  45
  46#include "qib.h"
  47#include "qib_common.h"
  48
  49static unsigned int ib_qib_qp_table_size = 256;
  50module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO);
  51MODULE_PARM_DESC(qp_table_size, "QP table size");
  52
  53static unsigned int qib_lkey_table_size = 16;
  54module_param_named(lkey_table_size, qib_lkey_table_size, uint,
  55                   S_IRUGO);
  56MODULE_PARM_DESC(lkey_table_size,
  57                 "LKEY table size in bits (2^n, 1 <= n <= 23)");
  58
  59static unsigned int ib_qib_max_pds = 0xFFFF;
  60module_param_named(max_pds, ib_qib_max_pds, uint, S_IRUGO);
  61MODULE_PARM_DESC(max_pds,
  62                 "Maximum number of protection domains to support");
  63
  64static unsigned int ib_qib_max_ahs = 0xFFFF;
  65module_param_named(max_ahs, ib_qib_max_ahs, uint, S_IRUGO);
  66MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
  67
  68unsigned int ib_qib_max_cqes = 0x2FFFF;
  69module_param_named(max_cqes, ib_qib_max_cqes, uint, S_IRUGO);
  70MODULE_PARM_DESC(max_cqes,
  71                 "Maximum number of completion queue entries to support");
  72
  73unsigned int ib_qib_max_cqs = 0x1FFFF;
  74module_param_named(max_cqs, ib_qib_max_cqs, uint, S_IRUGO);
  75MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
  76
  77unsigned int ib_qib_max_qp_wrs = 0x3FFF;
  78module_param_named(max_qp_wrs, ib_qib_max_qp_wrs, uint, S_IRUGO);
  79MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
  80
  81unsigned int ib_qib_max_qps = 16384;
  82module_param_named(max_qps, ib_qib_max_qps, uint, S_IRUGO);
  83MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
  84
  85unsigned int ib_qib_max_sges = 0x60;
  86module_param_named(max_sges, ib_qib_max_sges, uint, S_IRUGO);
  87MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
  88
  89unsigned int ib_qib_max_mcast_grps = 16384;
  90module_param_named(max_mcast_grps, ib_qib_max_mcast_grps, uint, S_IRUGO);
  91MODULE_PARM_DESC(max_mcast_grps,
  92                 "Maximum number of multicast groups to support");
  93
  94unsigned int ib_qib_max_mcast_qp_attached = 16;
  95module_param_named(max_mcast_qp_attached, ib_qib_max_mcast_qp_attached,
  96                   uint, S_IRUGO);
  97MODULE_PARM_DESC(max_mcast_qp_attached,
  98                 "Maximum number of attached QPs to support");
  99
 100unsigned int ib_qib_max_srqs = 1024;
 101module_param_named(max_srqs, ib_qib_max_srqs, uint, S_IRUGO);
 102MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
 103
 104unsigned int ib_qib_max_srq_sges = 128;
 105module_param_named(max_srq_sges, ib_qib_max_srq_sges, uint, S_IRUGO);
 106MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
 107
 108unsigned int ib_qib_max_srq_wrs = 0x1FFFF;
 109module_param_named(max_srq_wrs, ib_qib_max_srq_wrs, uint, S_IRUGO);
 110MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
 111
 112static unsigned int ib_qib_disable_sma;
 113module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
 114MODULE_PARM_DESC(disable_sma, "Disable the SMA");
 115
 116/*
 117 * Translate ib_wr_opcode into ib_wc_opcode.
 118 */
 119const enum ib_wc_opcode ib_qib_wc_opcode[] = {
 120        [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
 121        [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
 122        [IB_WR_SEND] = IB_WC_SEND,
 123        [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
 124        [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
 125        [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
 126        [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
 127};
 128
 129/*
 130 * System image GUID.
 131 */
 132__be64 ib_qib_sys_image_guid;
 133
 134/**
 135 * qib_copy_sge - copy data to SGE memory
 136 * @ss: the SGE state
 137 * @data: the data to copy
 138 * @length: the length of the data
 139 */
 140void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, int release)
 141{
 142        struct rvt_sge *sge = &ss->sge;
 143
 144        while (length) {
 145                u32 len = rvt_get_sge_length(sge, length);
 146
 147                WARN_ON_ONCE(len == 0);
 148                memcpy(sge->vaddr, data, len);
 149                rvt_update_sge(ss, len, release);
 150                data += len;
 151                length -= len;
 152        }
 153}
 154
 155/*
 156 * Count the number of DMA descriptors needed to send length bytes of data.
 157 * Don't modify the qib_sge_state to get the count.
 158 * Return zero if any of the segments is not aligned.
 159 */
 160static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length)
 161{
 162        struct rvt_sge *sg_list = ss->sg_list;
 163        struct rvt_sge sge = ss->sge;
 164        u8 num_sge = ss->num_sge;
 165        u32 ndesc = 1;  /* count the header */
 166
 167        while (length) {
 168                u32 len = sge.length;
 169
 170                if (len > length)
 171                        len = length;
 172                if (len > sge.sge_length)
 173                        len = sge.sge_length;
 174                BUG_ON(len == 0);
 175                if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
 176                    (len != length && (len & (sizeof(u32) - 1)))) {
 177                        ndesc = 0;
 178                        break;
 179                }
 180                ndesc++;
 181                sge.vaddr += len;
 182                sge.length -= len;
 183                sge.sge_length -= len;
 184                if (sge.sge_length == 0) {
 185                        if (--num_sge)
 186                                sge = *sg_list++;
 187                } else if (sge.length == 0 && sge.mr->lkey) {
 188                        if (++sge.n >= RVT_SEGSZ) {
 189                                if (++sge.m >= sge.mr->mapsz)
 190                                        break;
 191                                sge.n = 0;
 192                        }
 193                        sge.vaddr =
 194                                sge.mr->map[sge.m]->segs[sge.n].vaddr;
 195                        sge.length =
 196                                sge.mr->map[sge.m]->segs[sge.n].length;
 197                }
 198                length -= len;
 199        }
 200        return ndesc;
 201}
 202
 203/*
 204 * Copy from the SGEs to the data buffer.
 205 */
 206static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length)
 207{
 208        struct rvt_sge *sge = &ss->sge;
 209
 210        while (length) {
 211                u32 len = sge->length;
 212
 213                if (len > length)
 214                        len = length;
 215                if (len > sge->sge_length)
 216                        len = sge->sge_length;
 217                BUG_ON(len == 0);
 218                memcpy(data, sge->vaddr, len);
 219                sge->vaddr += len;
 220                sge->length -= len;
 221                sge->sge_length -= len;
 222                if (sge->sge_length == 0) {
 223                        if (--ss->num_sge)
 224                                *sge = *ss->sg_list++;
 225                } else if (sge->length == 0 && sge->mr->lkey) {
 226                        if (++sge->n >= RVT_SEGSZ) {
 227                                if (++sge->m >= sge->mr->mapsz)
 228                                        break;
 229                                sge->n = 0;
 230                        }
 231                        sge->vaddr =
 232                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 233                        sge->length =
 234                                sge->mr->map[sge->m]->segs[sge->n].length;
 235                }
 236                data += len;
 237                length -= len;
 238        }
 239}
 240
 241/**
 242 * qib_qp_rcv - processing an incoming packet on a QP
 243 * @rcd: the context pointer
 244 * @hdr: the packet header
 245 * @has_grh: true if the packet has a GRH
 246 * @data: the packet data
 247 * @tlen: the packet length
 248 * @qp: the QP the packet came on
 249 *
 250 * This is called from qib_ib_rcv() to process an incoming packet
 251 * for the given QP.
 252 * Called at interrupt level.
 253 */
 254static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
 255                       int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 256{
 257        struct qib_ibport *ibp = &rcd->ppd->ibport_data;
 258
 259        spin_lock(&qp->r_lock);
 260
 261        /* Check for valid receive state. */
 262        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
 263                ibp->rvp.n_pkt_drops++;
 264                goto unlock;
 265        }
 266
 267        switch (qp->ibqp.qp_type) {
 268        case IB_QPT_SMI:
 269        case IB_QPT_GSI:
 270                if (ib_qib_disable_sma)
 271                        break;
 272                /* FALLTHROUGH */
 273        case IB_QPT_UD:
 274                qib_ud_rcv(ibp, hdr, has_grh, data, tlen, qp);
 275                break;
 276
 277        case IB_QPT_RC:
 278                qib_rc_rcv(rcd, hdr, has_grh, data, tlen, qp);
 279                break;
 280
 281        case IB_QPT_UC:
 282                qib_uc_rcv(ibp, hdr, has_grh, data, tlen, qp);
 283                break;
 284
 285        default:
 286                break;
 287        }
 288
 289unlock:
 290        spin_unlock(&qp->r_lock);
 291}
 292
 293/**
 294 * qib_ib_rcv - process an incoming packet
 295 * @rcd: the context pointer
 296 * @rhdr: the header of the packet
 297 * @data: the packet payload
 298 * @tlen: the packet length
 299 *
 300 * This is called from qib_kreceive() to process an incoming packet at
 301 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
 302 */
 303void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
 304{
 305        struct qib_pportdata *ppd = rcd->ppd;
 306        struct qib_ibport *ibp = &ppd->ibport_data;
 307        struct ib_header *hdr = rhdr;
 308        struct qib_devdata *dd = ppd->dd;
 309        struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 310        struct ib_other_headers *ohdr;
 311        struct rvt_qp *qp;
 312        u32 qp_num;
 313        int lnh;
 314        u8 opcode;
 315        u16 lid;
 316
 317        /* 24 == LRH+BTH+CRC */
 318        if (unlikely(tlen < 24))
 319                goto drop;
 320
 321        /* Check for a valid destination LID (see ch. 7.11.1). */
 322        lid = be16_to_cpu(hdr->lrh[1]);
 323        if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 324                lid &= ~((1 << ppd->lmc) - 1);
 325                if (unlikely(lid != ppd->lid))
 326                        goto drop;
 327        }
 328
 329        /* Check for GRH */
 330        lnh = be16_to_cpu(hdr->lrh[0]) & 3;
 331        if (lnh == QIB_LRH_BTH)
 332                ohdr = &hdr->u.oth;
 333        else if (lnh == QIB_LRH_GRH) {
 334                u32 vtf;
 335
 336                ohdr = &hdr->u.l.oth;
 337                if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
 338                        goto drop;
 339                vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
 340                if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
 341                        goto drop;
 342        } else
 343                goto drop;
 344
 345        opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
 346#ifdef CONFIG_DEBUG_FS
 347        rcd->opstats->stats[opcode].n_bytes += tlen;
 348        rcd->opstats->stats[opcode].n_packets++;
 349#endif
 350
 351        /* Get the destination QP number. */
 352        qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
 353        if (qp_num == QIB_MULTICAST_QPN) {
 354                struct rvt_mcast *mcast;
 355                struct rvt_mcast_qp *p;
 356
 357                if (lnh != QIB_LRH_GRH)
 358                        goto drop;
 359                mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid);
 360                if (mcast == NULL)
 361                        goto drop;
 362                this_cpu_inc(ibp->pmastats->n_multicast_rcv);
 363                list_for_each_entry_rcu(p, &mcast->qp_list, list)
 364                        qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
 365                /*
 366                 * Notify rvt_multicast_detach() if it is waiting for us
 367                 * to finish.
 368                 */
 369                if (atomic_dec_return(&mcast->refcount) <= 1)
 370                        wake_up(&mcast->wait);
 371        } else {
 372                rcu_read_lock();
 373                qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
 374                if (!qp) {
 375                        rcu_read_unlock();
 376                        goto drop;
 377                }
 378                this_cpu_inc(ibp->pmastats->n_unicast_rcv);
 379                qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
 380                rcu_read_unlock();
 381        }
 382        return;
 383
 384drop:
 385        ibp->rvp.n_pkt_drops++;
 386}
 387
 388/*
 389 * This is called from a timer to check for QPs
 390 * which need kernel memory in order to send a packet.
 391 */
 392static void mem_timer(struct timer_list *t)
 393{
 394        struct qib_ibdev *dev = from_timer(dev, t, mem_timer);
 395        struct list_head *list = &dev->memwait;
 396        struct rvt_qp *qp = NULL;
 397        struct qib_qp_priv *priv = NULL;
 398        unsigned long flags;
 399
 400        spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 401        if (!list_empty(list)) {
 402                priv = list_entry(list->next, struct qib_qp_priv, iowait);
 403                qp = priv->owner;
 404                list_del_init(&priv->iowait);
 405                rvt_get_qp(qp);
 406                if (!list_empty(list))
 407                        mod_timer(&dev->mem_timer, jiffies + 1);
 408        }
 409        spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 410
 411        if (qp) {
 412                spin_lock_irqsave(&qp->s_lock, flags);
 413                if (qp->s_flags & RVT_S_WAIT_KMEM) {
 414                        qp->s_flags &= ~RVT_S_WAIT_KMEM;
 415                        qib_schedule_send(qp);
 416                }
 417                spin_unlock_irqrestore(&qp->s_lock, flags);
 418                rvt_put_qp(qp);
 419        }
 420}
 421
 422#ifdef __LITTLE_ENDIAN
 423static inline u32 get_upper_bits(u32 data, u32 shift)
 424{
 425        return data >> shift;
 426}
 427
 428static inline u32 set_upper_bits(u32 data, u32 shift)
 429{
 430        return data << shift;
 431}
 432
 433static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 434{
 435        data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
 436        data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
 437        return data;
 438}
 439#else
 440static inline u32 get_upper_bits(u32 data, u32 shift)
 441{
 442        return data << shift;
 443}
 444
 445static inline u32 set_upper_bits(u32 data, u32 shift)
 446{
 447        return data >> shift;
 448}
 449
 450static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 451{
 452        data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
 453        data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
 454        return data;
 455}
 456#endif
 457
 458static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
 459                    u32 length, unsigned flush_wc)
 460{
 461        u32 extra = 0;
 462        u32 data = 0;
 463        u32 last;
 464
 465        while (1) {
 466                u32 len = ss->sge.length;
 467                u32 off;
 468
 469                if (len > length)
 470                        len = length;
 471                if (len > ss->sge.sge_length)
 472                        len = ss->sge.sge_length;
 473                BUG_ON(len == 0);
 474                /* If the source address is not aligned, try to align it. */
 475                off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
 476                if (off) {
 477                        u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
 478                                            ~(sizeof(u32) - 1));
 479                        u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
 480                        u32 y;
 481
 482                        y = sizeof(u32) - off;
 483                        if (len > y)
 484                                len = y;
 485                        if (len + extra >= sizeof(u32)) {
 486                                data |= set_upper_bits(v, extra *
 487                                                       BITS_PER_BYTE);
 488                                len = sizeof(u32) - extra;
 489                                if (len == length) {
 490                                        last = data;
 491                                        break;
 492                                }
 493                                __raw_writel(data, piobuf);
 494                                piobuf++;
 495                                extra = 0;
 496                                data = 0;
 497                        } else {
 498                                /* Clear unused upper bytes */
 499                                data |= clear_upper_bytes(v, len, extra);
 500                                if (len == length) {
 501                                        last = data;
 502                                        break;
 503                                }
 504                                extra += len;
 505                        }
 506                } else if (extra) {
 507                        /* Source address is aligned. */
 508                        u32 *addr = (u32 *) ss->sge.vaddr;
 509                        int shift = extra * BITS_PER_BYTE;
 510                        int ushift = 32 - shift;
 511                        u32 l = len;
 512
 513                        while (l >= sizeof(u32)) {
 514                                u32 v = *addr;
 515
 516                                data |= set_upper_bits(v, shift);
 517                                __raw_writel(data, piobuf);
 518                                data = get_upper_bits(v, ushift);
 519                                piobuf++;
 520                                addr++;
 521                                l -= sizeof(u32);
 522                        }
 523                        /*
 524                         * We still have 'extra' number of bytes leftover.
 525                         */
 526                        if (l) {
 527                                u32 v = *addr;
 528
 529                                if (l + extra >= sizeof(u32)) {
 530                                        data |= set_upper_bits(v, shift);
 531                                        len -= l + extra - sizeof(u32);
 532                                        if (len == length) {
 533                                                last = data;
 534                                                break;
 535                                        }
 536                                        __raw_writel(data, piobuf);
 537                                        piobuf++;
 538                                        extra = 0;
 539                                        data = 0;
 540                                } else {
 541                                        /* Clear unused upper bytes */
 542                                        data |= clear_upper_bytes(v, l, extra);
 543                                        if (len == length) {
 544                                                last = data;
 545                                                break;
 546                                        }
 547                                        extra += l;
 548                                }
 549                        } else if (len == length) {
 550                                last = data;
 551                                break;
 552                        }
 553                } else if (len == length) {
 554                        u32 w;
 555
 556                        /*
 557                         * Need to round up for the last dword in the
 558                         * packet.
 559                         */
 560                        w = (len + 3) >> 2;
 561                        qib_pio_copy(piobuf, ss->sge.vaddr, w - 1);
 562                        piobuf += w - 1;
 563                        last = ((u32 *) ss->sge.vaddr)[w - 1];
 564                        break;
 565                } else {
 566                        u32 w = len >> 2;
 567
 568                        qib_pio_copy(piobuf, ss->sge.vaddr, w);
 569                        piobuf += w;
 570
 571                        extra = len & (sizeof(u32) - 1);
 572                        if (extra) {
 573                                u32 v = ((u32 *) ss->sge.vaddr)[w];
 574
 575                                /* Clear unused upper bytes */
 576                                data = clear_upper_bytes(v, extra, 0);
 577                        }
 578                }
 579                rvt_update_sge(ss, len, false);
 580                length -= len;
 581        }
 582        /* Update address before sending packet. */
 583        rvt_update_sge(ss, length, false);
 584        if (flush_wc) {
 585                /* must flush early everything before trigger word */
 586                qib_flush_wc();
 587                __raw_writel(last, piobuf);
 588                /* be sure trigger word is written */
 589                qib_flush_wc();
 590        } else
 591                __raw_writel(last, piobuf);
 592}
 593
 594static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev,
 595                                           struct rvt_qp *qp)
 596{
 597        struct qib_qp_priv *priv = qp->priv;
 598        struct qib_verbs_txreq *tx;
 599        unsigned long flags;
 600
 601        spin_lock_irqsave(&qp->s_lock, flags);
 602        spin_lock(&dev->rdi.pending_lock);
 603
 604        if (!list_empty(&dev->txreq_free)) {
 605                struct list_head *l = dev->txreq_free.next;
 606
 607                list_del(l);
 608                spin_unlock(&dev->rdi.pending_lock);
 609                spin_unlock_irqrestore(&qp->s_lock, flags);
 610                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 611        } else {
 612                if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK &&
 613                    list_empty(&priv->iowait)) {
 614                        dev->n_txwait++;
 615                        qp->s_flags |= RVT_S_WAIT_TX;
 616                        list_add_tail(&priv->iowait, &dev->txwait);
 617                }
 618                qp->s_flags &= ~RVT_S_BUSY;
 619                spin_unlock(&dev->rdi.pending_lock);
 620                spin_unlock_irqrestore(&qp->s_lock, flags);
 621                tx = ERR_PTR(-EBUSY);
 622        }
 623        return tx;
 624}
 625
 626static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
 627                                         struct rvt_qp *qp)
 628{
 629        struct qib_verbs_txreq *tx;
 630        unsigned long flags;
 631
 632        spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 633        /* assume the list non empty */
 634        if (likely(!list_empty(&dev->txreq_free))) {
 635                struct list_head *l = dev->txreq_free.next;
 636
 637                list_del(l);
 638                spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 639                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 640        } else {
 641                /* call slow path to get the extra lock */
 642                spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 643                tx =  __get_txreq(dev, qp);
 644        }
 645        return tx;
 646}
 647
 648void qib_put_txreq(struct qib_verbs_txreq *tx)
 649{
 650        struct qib_ibdev *dev;
 651        struct rvt_qp *qp;
 652        struct qib_qp_priv *priv;
 653        unsigned long flags;
 654
 655        qp = tx->qp;
 656        dev = to_idev(qp->ibqp.device);
 657
 658        if (tx->mr) {
 659                rvt_put_mr(tx->mr);
 660                tx->mr = NULL;
 661        }
 662        if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
 663                tx->txreq.flags &= ~QIB_SDMA_TXREQ_F_FREEBUF;
 664                dma_unmap_single(&dd_from_dev(dev)->pcidev->dev,
 665                                 tx->txreq.addr, tx->hdr_dwords << 2,
 666                                 DMA_TO_DEVICE);
 667                kfree(tx->align_buf);
 668        }
 669
 670        spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 671
 672        /* Put struct back on free list */
 673        list_add(&tx->txreq.list, &dev->txreq_free);
 674
 675        if (!list_empty(&dev->txwait)) {
 676                /* Wake up first QP wanting a free struct */
 677                priv = list_entry(dev->txwait.next, struct qib_qp_priv,
 678                                  iowait);
 679                qp = priv->owner;
 680                list_del_init(&priv->iowait);
 681                rvt_get_qp(qp);
 682                spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 683
 684                spin_lock_irqsave(&qp->s_lock, flags);
 685                if (qp->s_flags & RVT_S_WAIT_TX) {
 686                        qp->s_flags &= ~RVT_S_WAIT_TX;
 687                        qib_schedule_send(qp);
 688                }
 689                spin_unlock_irqrestore(&qp->s_lock, flags);
 690
 691                rvt_put_qp(qp);
 692        } else
 693                spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 694}
 695
 696/*
 697 * This is called when there are send DMA descriptors that might be
 698 * available.
 699 *
 700 * This is called with ppd->sdma_lock held.
 701 */
 702void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
 703{
 704        struct rvt_qp *qp;
 705        struct qib_qp_priv *qpp, *nqpp;
 706        struct rvt_qp *qps[20];
 707        struct qib_ibdev *dev;
 708        unsigned i, n;
 709
 710        n = 0;
 711        dev = &ppd->dd->verbs_dev;
 712        spin_lock(&dev->rdi.pending_lock);
 713
 714        /* Search wait list for first QP wanting DMA descriptors. */
 715        list_for_each_entry_safe(qpp, nqpp, &dev->dmawait, iowait) {
 716                qp = qpp->owner;
 717                if (qp->port_num != ppd->port)
 718                        continue;
 719                if (n == ARRAY_SIZE(qps))
 720                        break;
 721                if (qpp->s_tx->txreq.sg_count > avail)
 722                        break;
 723                avail -= qpp->s_tx->txreq.sg_count;
 724                list_del_init(&qpp->iowait);
 725                rvt_get_qp(qp);
 726                qps[n++] = qp;
 727        }
 728
 729        spin_unlock(&dev->rdi.pending_lock);
 730
 731        for (i = 0; i < n; i++) {
 732                qp = qps[i];
 733                spin_lock(&qp->s_lock);
 734                if (qp->s_flags & RVT_S_WAIT_DMA_DESC) {
 735                        qp->s_flags &= ~RVT_S_WAIT_DMA_DESC;
 736                        qib_schedule_send(qp);
 737                }
 738                spin_unlock(&qp->s_lock);
 739                rvt_put_qp(qp);
 740        }
 741}
 742
 743/*
 744 * This is called with ppd->sdma_lock held.
 745 */
 746static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
 747{
 748        struct qib_verbs_txreq *tx =
 749                container_of(cookie, struct qib_verbs_txreq, txreq);
 750        struct rvt_qp *qp = tx->qp;
 751        struct qib_qp_priv *priv = qp->priv;
 752
 753        spin_lock(&qp->s_lock);
 754        if (tx->wqe)
 755                qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
 756        else if (qp->ibqp.qp_type == IB_QPT_RC) {
 757                struct ib_header *hdr;
 758
 759                if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF)
 760                        hdr = &tx->align_buf->hdr;
 761                else {
 762                        struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 763
 764                        hdr = &dev->pio_hdrs[tx->hdr_inx].hdr;
 765                }
 766                qib_rc_send_complete(qp, hdr);
 767        }
 768        if (atomic_dec_and_test(&priv->s_dma_busy)) {
 769                if (qp->state == IB_QPS_RESET)
 770                        wake_up(&priv->wait_dma);
 771                else if (qp->s_flags & RVT_S_WAIT_DMA) {
 772                        qp->s_flags &= ~RVT_S_WAIT_DMA;
 773                        qib_schedule_send(qp);
 774                }
 775        }
 776        spin_unlock(&qp->s_lock);
 777
 778        qib_put_txreq(tx);
 779}
 780
 781static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp)
 782{
 783        struct qib_qp_priv *priv = qp->priv;
 784        unsigned long flags;
 785        int ret = 0;
 786
 787        spin_lock_irqsave(&qp->s_lock, flags);
 788        if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 789                spin_lock(&dev->rdi.pending_lock);
 790                if (list_empty(&priv->iowait)) {
 791                        if (list_empty(&dev->memwait))
 792                                mod_timer(&dev->mem_timer, jiffies + 1);
 793                        qp->s_flags |= RVT_S_WAIT_KMEM;
 794                        list_add_tail(&priv->iowait, &dev->memwait);
 795                }
 796                spin_unlock(&dev->rdi.pending_lock);
 797                qp->s_flags &= ~RVT_S_BUSY;
 798                ret = -EBUSY;
 799        }
 800        spin_unlock_irqrestore(&qp->s_lock, flags);
 801
 802        return ret;
 803}
 804
 805static int qib_verbs_send_dma(struct rvt_qp *qp, struct ib_header *hdr,
 806                              u32 hdrwords, struct rvt_sge_state *ss, u32 len,
 807                              u32 plen, u32 dwords)
 808{
 809        struct qib_qp_priv *priv = qp->priv;
 810        struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 811        struct qib_devdata *dd = dd_from_dev(dev);
 812        struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 813        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 814        struct qib_verbs_txreq *tx;
 815        struct qib_pio_header *phdr;
 816        u32 control;
 817        u32 ndesc;
 818        int ret;
 819
 820        tx = priv->s_tx;
 821        if (tx) {
 822                priv->s_tx = NULL;
 823                /* resend previously constructed packet */
 824                ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx);
 825                goto bail;
 826        }
 827
 828        tx = get_txreq(dev, qp);
 829        if (IS_ERR(tx))
 830                goto bail_tx;
 831
 832        control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
 833                                       be16_to_cpu(hdr->lrh[0]) >> 12);
 834        tx->qp = qp;
 835        tx->wqe = qp->s_wqe;
 836        tx->mr = qp->s_rdma_mr;
 837        if (qp->s_rdma_mr)
 838                qp->s_rdma_mr = NULL;
 839        tx->txreq.callback = sdma_complete;
 840        if (dd->flags & QIB_HAS_SDMA_TIMEOUT)
 841                tx->txreq.flags = QIB_SDMA_TXREQ_F_HEADTOHOST;
 842        else
 843                tx->txreq.flags = QIB_SDMA_TXREQ_F_INTREQ;
 844        if (plen + 1 > dd->piosize2kmax_dwords)
 845                tx->txreq.flags |= QIB_SDMA_TXREQ_F_USELARGEBUF;
 846
 847        if (len) {
 848                /*
 849                 * Don't try to DMA if it takes more descriptors than
 850                 * the queue holds.
 851                 */
 852                ndesc = qib_count_sge(ss, len);
 853                if (ndesc >= ppd->sdma_descq_cnt)
 854                        ndesc = 0;
 855        } else
 856                ndesc = 1;
 857        if (ndesc) {
 858                phdr = &dev->pio_hdrs[tx->hdr_inx];
 859                phdr->pbc[0] = cpu_to_le32(plen);
 860                phdr->pbc[1] = cpu_to_le32(control);
 861                memcpy(&phdr->hdr, hdr, hdrwords << 2);
 862                tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEDESC;
 863                tx->txreq.sg_count = ndesc;
 864                tx->txreq.addr = dev->pio_hdrs_phys +
 865                        tx->hdr_inx * sizeof(struct qib_pio_header);
 866                tx->hdr_dwords = hdrwords + 2; /* add PBC length */
 867                ret = qib_sdma_verbs_send(ppd, ss, dwords, tx);
 868                goto bail;
 869        }
 870
 871        /* Allocate a buffer and copy the header and payload to it. */
 872        tx->hdr_dwords = plen + 1;
 873        phdr = kmalloc(tx->hdr_dwords << 2, GFP_ATOMIC);
 874        if (!phdr)
 875                goto err_tx;
 876        phdr->pbc[0] = cpu_to_le32(plen);
 877        phdr->pbc[1] = cpu_to_le32(control);
 878        memcpy(&phdr->hdr, hdr, hdrwords << 2);
 879        qib_copy_from_sge((u32 *) &phdr->hdr + hdrwords, ss, len);
 880
 881        tx->txreq.addr = dma_map_single(&dd->pcidev->dev, phdr,
 882                                        tx->hdr_dwords << 2, DMA_TO_DEVICE);
 883        if (dma_mapping_error(&dd->pcidev->dev, tx->txreq.addr))
 884                goto map_err;
 885        tx->align_buf = phdr;
 886        tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEBUF;
 887        tx->txreq.sg_count = 1;
 888        ret = qib_sdma_verbs_send(ppd, NULL, 0, tx);
 889        goto unaligned;
 890
 891map_err:
 892        kfree(phdr);
 893err_tx:
 894        qib_put_txreq(tx);
 895        ret = wait_kmem(dev, qp);
 896unaligned:
 897        ibp->rvp.n_unaligned++;
 898bail:
 899        return ret;
 900bail_tx:
 901        ret = PTR_ERR(tx);
 902        goto bail;
 903}
 904
 905/*
 906 * If we are now in the error state, return zero to flush the
 907 * send work request.
 908 */
 909static int no_bufs_available(struct rvt_qp *qp)
 910{
 911        struct qib_qp_priv *priv = qp->priv;
 912        struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 913        struct qib_devdata *dd;
 914        unsigned long flags;
 915        int ret = 0;
 916
 917        /*
 918         * Note that as soon as want_buffer() is called and
 919         * possibly before it returns, qib_ib_piobufavail()
 920         * could be called. Therefore, put QP on the I/O wait list before
 921         * enabling the PIO avail interrupt.
 922         */
 923        spin_lock_irqsave(&qp->s_lock, flags);
 924        if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 925                spin_lock(&dev->rdi.pending_lock);
 926                if (list_empty(&priv->iowait)) {
 927                        dev->n_piowait++;
 928                        qp->s_flags |= RVT_S_WAIT_PIO;
 929                        list_add_tail(&priv->iowait, &dev->piowait);
 930                        dd = dd_from_dev(dev);
 931                        dd->f_wantpiobuf_intr(dd, 1);
 932                }
 933                spin_unlock(&dev->rdi.pending_lock);
 934                qp->s_flags &= ~RVT_S_BUSY;
 935                ret = -EBUSY;
 936        }
 937        spin_unlock_irqrestore(&qp->s_lock, flags);
 938        return ret;
 939}
 940
 941static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr,
 942                              u32 hdrwords, struct rvt_sge_state *ss, u32 len,
 943                              u32 plen, u32 dwords)
 944{
 945        struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 946        struct qib_pportdata *ppd = dd->pport + qp->port_num - 1;
 947        u32 *hdr = (u32 *) ibhdr;
 948        u32 __iomem *piobuf_orig;
 949        u32 __iomem *piobuf;
 950        u64 pbc;
 951        unsigned long flags;
 952        unsigned flush_wc;
 953        u32 control;
 954        u32 pbufn;
 955
 956        control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
 957                be16_to_cpu(ibhdr->lrh[0]) >> 12);
 958        pbc = ((u64) control << 32) | plen;
 959        piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn);
 960        if (unlikely(piobuf == NULL))
 961                return no_bufs_available(qp);
 962
 963        /*
 964         * Write the pbc.
 965         * We have to flush after the PBC for correctness on some cpus
 966         * or WC buffer can be written out of order.
 967         */
 968        writeq(pbc, piobuf);
 969        piobuf_orig = piobuf;
 970        piobuf += 2;
 971
 972        flush_wc = dd->flags & QIB_PIO_FLUSH_WC;
 973        if (len == 0) {
 974                /*
 975                 * If there is just the header portion, must flush before
 976                 * writing last word of header for correctness, and after
 977                 * the last header word (trigger word).
 978                 */
 979                if (flush_wc) {
 980                        qib_flush_wc();
 981                        qib_pio_copy(piobuf, hdr, hdrwords - 1);
 982                        qib_flush_wc();
 983                        __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
 984                        qib_flush_wc();
 985                } else
 986                        qib_pio_copy(piobuf, hdr, hdrwords);
 987                goto done;
 988        }
 989
 990        if (flush_wc)
 991                qib_flush_wc();
 992        qib_pio_copy(piobuf, hdr, hdrwords);
 993        piobuf += hdrwords;
 994
 995        /* The common case is aligned and contained in one segment. */
 996        if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
 997                   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
 998                u32 *addr = (u32 *) ss->sge.vaddr;
 999
1000                /* Update address before sending packet. */
1001                rvt_update_sge(ss, len, false);
1002                if (flush_wc) {
1003                        qib_pio_copy(piobuf, addr, dwords - 1);
1004                        /* must flush early everything before trigger word */
1005                        qib_flush_wc();
1006                        __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
1007                        /* be sure trigger word is written */
1008                        qib_flush_wc();
1009                } else
1010                        qib_pio_copy(piobuf, addr, dwords);
1011                goto done;
1012        }
1013        copy_io(piobuf, ss, len, flush_wc);
1014done:
1015        if (dd->flags & QIB_USE_SPCL_TRIG) {
1016                u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
1017
1018                qib_flush_wc();
1019                __raw_writel(0xaebecede, piobuf_orig + spcl_off);
1020        }
1021        qib_sendbuf_done(dd, pbufn);
1022        if (qp->s_rdma_mr) {
1023                rvt_put_mr(qp->s_rdma_mr);
1024                qp->s_rdma_mr = NULL;
1025        }
1026        if (qp->s_wqe) {
1027                spin_lock_irqsave(&qp->s_lock, flags);
1028                qib_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
1029                spin_unlock_irqrestore(&qp->s_lock, flags);
1030        } else if (qp->ibqp.qp_type == IB_QPT_RC) {
1031                spin_lock_irqsave(&qp->s_lock, flags);
1032                qib_rc_send_complete(qp, ibhdr);
1033                spin_unlock_irqrestore(&qp->s_lock, flags);
1034        }
1035        return 0;
1036}
1037
1038/**
1039 * qib_verbs_send - send a packet
1040 * @qp: the QP to send on
1041 * @hdr: the packet header
1042 * @hdrwords: the number of 32-bit words in the header
1043 * @ss: the SGE to send
1044 * @len: the length of the packet in bytes
1045 *
1046 * Return zero if packet is sent or queued OK.
1047 * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
1048 */
1049int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
1050                   u32 hdrwords, struct rvt_sge_state *ss, u32 len)
1051{
1052        struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
1053        u32 plen;
1054        int ret;
1055        u32 dwords = (len + 3) >> 2;
1056
1057        /*
1058         * Calculate the send buffer trigger address.
1059         * The +1 counts for the pbc control dword following the pbc length.
1060         */
1061        plen = hdrwords + dwords + 1;
1062
1063        /*
1064         * VL15 packets (IB_QPT_SMI) will always use PIO, so we
1065         * can defer SDMA restart until link goes ACTIVE without
1066         * worrying about just how we got there.
1067         */
1068        if (qp->ibqp.qp_type == IB_QPT_SMI ||
1069            !(dd->flags & QIB_HAS_SEND_DMA))
1070                ret = qib_verbs_send_pio(qp, hdr, hdrwords, ss, len,
1071                                         plen, dwords);
1072        else
1073                ret = qib_verbs_send_dma(qp, hdr, hdrwords, ss, len,
1074                                         plen, dwords);
1075
1076        return ret;
1077}
1078
1079int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
1080                          u64 *rwords, u64 *spkts, u64 *rpkts,
1081                          u64 *xmit_wait)
1082{
1083        int ret;
1084        struct qib_devdata *dd = ppd->dd;
1085
1086        if (!(dd->flags & QIB_PRESENT)) {
1087                /* no hardware, freeze, etc. */
1088                ret = -EINVAL;
1089                goto bail;
1090        }
1091        *swords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDSEND);
1092        *rwords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDRCV);
1093        *spkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTSEND);
1094        *rpkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTRCV);
1095        *xmit_wait = dd->f_portcntr(ppd, QIBPORTCNTR_SENDSTALL);
1096
1097        ret = 0;
1098
1099bail:
1100        return ret;
1101}
1102
1103/**
1104 * qib_get_counters - get various chip counters
1105 * @dd: the qlogic_ib device
1106 * @cntrs: counters are placed here
1107 *
1108 * Return the counters needed by recv_pma_get_portcounters().
1109 */
1110int qib_get_counters(struct qib_pportdata *ppd,
1111                     struct qib_verbs_counters *cntrs)
1112{
1113        int ret;
1114
1115        if (!(ppd->dd->flags & QIB_PRESENT)) {
1116                /* no hardware, freeze, etc. */
1117                ret = -EINVAL;
1118                goto bail;
1119        }
1120        cntrs->symbol_error_counter =
1121                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBSYMBOLERR);
1122        cntrs->link_error_recovery_counter =
1123                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKERRRECOV);
1124        /*
1125         * The link downed counter counts when the other side downs the
1126         * connection.  We add in the number of times we downed the link
1127         * due to local link integrity errors to compensate.
1128         */
1129        cntrs->link_downed_counter =
1130                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKDOWN);
1131        cntrs->port_rcv_errors =
1132                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXDROPPKT) +
1133                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVOVFL) +
1134                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERR_RLEN) +
1135                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_INVALIDRLEN) +
1136                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLINK) +
1137                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRICRC) +
1138                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRVCRC) +
1139                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLPCRC) +
1140                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_BADFORMAT);
1141        cntrs->port_rcv_errors +=
1142                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXLOCALPHYERR);
1143        cntrs->port_rcv_errors +=
1144                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXVLERR);
1145        cntrs->port_rcv_remphys_errors =
1146                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVEBP);
1147        cntrs->port_xmit_discards =
1148                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_UNSUPVL);
1149        cntrs->port_xmit_data = ppd->dd->f_portcntr(ppd,
1150                        QIBPORTCNTR_WORDSEND);
1151        cntrs->port_rcv_data = ppd->dd->f_portcntr(ppd,
1152                        QIBPORTCNTR_WORDRCV);
1153        cntrs->port_xmit_packets = ppd->dd->f_portcntr(ppd,
1154                        QIBPORTCNTR_PKTSEND);
1155        cntrs->port_rcv_packets = ppd->dd->f_portcntr(ppd,
1156                        QIBPORTCNTR_PKTRCV);
1157        cntrs->local_link_integrity_errors =
1158                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_LLI);
1159        cntrs->excessive_buffer_overrun_errors =
1160                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_EXCESSBUFOVFL);
1161        cntrs->vl15_dropped =
1162                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_VL15PKTDROP);
1163
1164        ret = 0;
1165
1166bail:
1167        return ret;
1168}
1169
1170/**
1171 * qib_ib_piobufavail - callback when a PIO buffer is available
1172 * @dd: the device pointer
1173 *
1174 * This is called from qib_intr() at interrupt level when a PIO buffer is
1175 * available after qib_verbs_send() returned an error that no buffers were
1176 * available. Disable the interrupt if there are no more QPs waiting.
1177 */
1178void qib_ib_piobufavail(struct qib_devdata *dd)
1179{
1180        struct qib_ibdev *dev = &dd->verbs_dev;
1181        struct list_head *list;
1182        struct rvt_qp *qps[5];
1183        struct rvt_qp *qp;
1184        unsigned long flags;
1185        unsigned i, n;
1186        struct qib_qp_priv *priv;
1187
1188        list = &dev->piowait;
1189        n = 0;
1190
1191        /*
1192         * Note: checking that the piowait list is empty and clearing
1193         * the buffer available interrupt needs to be atomic or we
1194         * could end up with QPs on the wait list with the interrupt
1195         * disabled.
1196         */
1197        spin_lock_irqsave(&dev->rdi.pending_lock, flags);
1198        while (!list_empty(list)) {
1199                if (n == ARRAY_SIZE(qps))
1200                        goto full;
1201                priv = list_entry(list->next, struct qib_qp_priv, iowait);
1202                qp = priv->owner;
1203                list_del_init(&priv->iowait);
1204                rvt_get_qp(qp);
1205                qps[n++] = qp;
1206        }
1207        dd->f_wantpiobuf_intr(dd, 0);
1208full:
1209        spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
1210
1211        for (i = 0; i < n; i++) {
1212                qp = qps[i];
1213
1214                spin_lock_irqsave(&qp->s_lock, flags);
1215                if (qp->s_flags & RVT_S_WAIT_PIO) {
1216                        qp->s_flags &= ~RVT_S_WAIT_PIO;
1217                        qib_schedule_send(qp);
1218                }
1219                spin_unlock_irqrestore(&qp->s_lock, flags);
1220
1221                /* Notify qib_destroy_qp() if it is waiting. */
1222                rvt_put_qp(qp);
1223        }
1224}
1225
1226static int qib_query_port(struct rvt_dev_info *rdi, u8 port_num,
1227                          struct ib_port_attr *props)
1228{
1229        struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
1230        struct qib_devdata *dd = dd_from_dev(ibdev);
1231        struct qib_pportdata *ppd = &dd->pport[port_num - 1];
1232        enum ib_mtu mtu;
1233        u16 lid = ppd->lid;
1234
1235        /* props being zeroed by the caller, avoid zeroing it here */
1236        props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
1237        props->lmc = ppd->lmc;
1238        props->state = dd->f_iblink_state(ppd->lastibcstat);
1239        props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat);
1240        props->gid_tbl_len = QIB_GUIDS_PER_PORT;
1241        props->active_width = ppd->link_width_active;
1242        /* See rate_show() */
1243        props->active_speed = ppd->link_speed_active;
1244        props->max_vl_num = qib_num_vls(ppd->vls_supported);
1245
1246        props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
1247        switch (ppd->ibmtu) {
1248        case 4096:
1249                mtu = IB_MTU_4096;
1250                break;
1251        case 2048:
1252                mtu = IB_MTU_2048;
1253                break;
1254        case 1024:
1255                mtu = IB_MTU_1024;
1256                break;
1257        case 512:
1258                mtu = IB_MTU_512;
1259                break;
1260        case 256:
1261                mtu = IB_MTU_256;
1262                break;
1263        default:
1264                mtu = IB_MTU_2048;
1265        }
1266        props->active_mtu = mtu;
1267
1268        return 0;
1269}
1270
1271static int qib_modify_device(struct ib_device *device,
1272                             int device_modify_mask,
1273                             struct ib_device_modify *device_modify)
1274{
1275        struct qib_devdata *dd = dd_from_ibdev(device);
1276        unsigned i;
1277        int ret;
1278
1279        if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
1280                                   IB_DEVICE_MODIFY_NODE_DESC)) {
1281                ret = -EOPNOTSUPP;
1282                goto bail;
1283        }
1284
1285        if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) {
1286                memcpy(device->node_desc, device_modify->node_desc,
1287                       IB_DEVICE_NODE_DESC_MAX);
1288                for (i = 0; i < dd->num_pports; i++) {
1289                        struct qib_ibport *ibp = &dd->pport[i].ibport_data;
1290
1291                        qib_node_desc_chg(ibp);
1292                }
1293        }
1294
1295        if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
1296                ib_qib_sys_image_guid =
1297                        cpu_to_be64(device_modify->sys_image_guid);
1298                for (i = 0; i < dd->num_pports; i++) {
1299                        struct qib_ibport *ibp = &dd->pport[i].ibport_data;
1300
1301                        qib_sys_guid_chg(ibp);
1302                }
1303        }
1304
1305        ret = 0;
1306
1307bail:
1308        return ret;
1309}
1310
1311static int qib_shut_down_port(struct rvt_dev_info *rdi, u8 port_num)
1312{
1313        struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
1314        struct qib_devdata *dd = dd_from_dev(ibdev);
1315        struct qib_pportdata *ppd = &dd->pport[port_num - 1];
1316
1317        qib_set_linkstate(ppd, QIB_IB_LINKDOWN);
1318
1319        return 0;
1320}
1321
1322static int qib_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
1323                           int guid_index, __be64 *guid)
1324{
1325        struct qib_ibport *ibp = container_of(rvp, struct qib_ibport, rvp);
1326        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1327
1328        if (guid_index == 0)
1329                *guid = ppd->guid;
1330        else if (guid_index < QIB_GUIDS_PER_PORT)
1331                *guid = ibp->guids[guid_index - 1];
1332        else
1333                return -EINVAL;
1334
1335        return 0;
1336}
1337
1338int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
1339{
1340        if (rdma_ah_get_sl(ah_attr) > 15)
1341                return -EINVAL;
1342
1343        if (rdma_ah_get_dlid(ah_attr) == 0)
1344                return -EINVAL;
1345        if (rdma_ah_get_dlid(ah_attr) >=
1346                be16_to_cpu(IB_MULTICAST_LID_BASE) &&
1347            rdma_ah_get_dlid(ah_attr) !=
1348                be16_to_cpu(IB_LID_PERMISSIVE) &&
1349            !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
1350                return -EINVAL;
1351
1352        return 0;
1353}
1354
1355static void qib_notify_new_ah(struct ib_device *ibdev,
1356                              struct rdma_ah_attr *ah_attr,
1357                              struct rvt_ah *ah)
1358{
1359        struct qib_ibport *ibp;
1360        struct qib_pportdata *ppd;
1361
1362        /*
1363         * Do not trust reading anything from rvt_ah at this point as it is not
1364         * done being setup. We can however modify things which we need to set.
1365         */
1366
1367        ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
1368        ppd = ppd_from_ibp(ibp);
1369        ah->vl = ibp->sl_to_vl[rdma_ah_get_sl(&ah->attr)];
1370        ah->log_pmtu = ilog2(ppd->ibmtu);
1371}
1372
1373struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
1374{
1375        struct rdma_ah_attr attr;
1376        struct ib_ah *ah = ERR_PTR(-EINVAL);
1377        struct rvt_qp *qp0;
1378        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1379        struct qib_devdata *dd = dd_from_ppd(ppd);
1380        u8 port_num = ppd->port;
1381
1382        memset(&attr, 0, sizeof(attr));
1383        attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num);
1384        rdma_ah_set_dlid(&attr, dlid);
1385        rdma_ah_set_port_num(&attr, port_num);
1386        rcu_read_lock();
1387        qp0 = rcu_dereference(ibp->rvp.qp[0]);
1388        if (qp0)
1389                ah = rdma_create_ah(qp0->ibqp.pd, &attr);
1390        rcu_read_unlock();
1391        return ah;
1392}
1393
1394/**
1395 * qib_get_npkeys - return the size of the PKEY table for context 0
1396 * @dd: the qlogic_ib device
1397 */
1398unsigned qib_get_npkeys(struct qib_devdata *dd)
1399{
1400        return ARRAY_SIZE(dd->rcd[0]->pkeys);
1401}
1402
1403/*
1404 * Return the indexed PKEY from the port PKEY table.
1405 * No need to validate rcd[ctxt]; the port is setup if we are here.
1406 */
1407unsigned qib_get_pkey(struct qib_ibport *ibp, unsigned index)
1408{
1409        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1410        struct qib_devdata *dd = ppd->dd;
1411        unsigned ctxt = ppd->hw_pidx;
1412        unsigned ret;
1413
1414        /* dd->rcd null if mini_init or some init failures */
1415        if (!dd->rcd || index >= ARRAY_SIZE(dd->rcd[ctxt]->pkeys))
1416                ret = 0;
1417        else
1418                ret = dd->rcd[ctxt]->pkeys[index];
1419
1420        return ret;
1421}
1422
1423static void init_ibport(struct qib_pportdata *ppd)
1424{
1425        struct qib_verbs_counters cntrs;
1426        struct qib_ibport *ibp = &ppd->ibport_data;
1427
1428        spin_lock_init(&ibp->rvp.lock);
1429        /* Set the prefix to the default value (see ch. 4.1.1) */
1430        ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
1431        ibp->rvp.sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
1432        ibp->rvp.port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP |
1433                IB_PORT_CLIENT_REG_SUP | IB_PORT_SL_MAP_SUP |
1434                IB_PORT_TRAP_SUP | IB_PORT_AUTO_MIGR_SUP |
1435                IB_PORT_DR_NOTICE_SUP | IB_PORT_CAP_MASK_NOTICE_SUP |
1436                IB_PORT_OTHER_LOCAL_CHANGES_SUP;
1437        if (ppd->dd->flags & QIB_HAS_LINK_LATENCY)
1438                ibp->rvp.port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
1439        ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
1440        ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
1441        ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
1442        ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
1443        ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
1444
1445        /* Snapshot current HW counters to "clear" them. */
1446        qib_get_counters(ppd, &cntrs);
1447        ibp->z_symbol_error_counter = cntrs.symbol_error_counter;
1448        ibp->z_link_error_recovery_counter =
1449                cntrs.link_error_recovery_counter;
1450        ibp->z_link_downed_counter = cntrs.link_downed_counter;
1451        ibp->z_port_rcv_errors = cntrs.port_rcv_errors;
1452        ibp->z_port_rcv_remphys_errors = cntrs.port_rcv_remphys_errors;
1453        ibp->z_port_xmit_discards = cntrs.port_xmit_discards;
1454        ibp->z_port_xmit_data = cntrs.port_xmit_data;
1455        ibp->z_port_rcv_data = cntrs.port_rcv_data;
1456        ibp->z_port_xmit_packets = cntrs.port_xmit_packets;
1457        ibp->z_port_rcv_packets = cntrs.port_rcv_packets;
1458        ibp->z_local_link_integrity_errors =
1459                cntrs.local_link_integrity_errors;
1460        ibp->z_excessive_buffer_overrun_errors =
1461                cntrs.excessive_buffer_overrun_errors;
1462        ibp->z_vl15_dropped = cntrs.vl15_dropped;
1463        RCU_INIT_POINTER(ibp->rvp.qp[0], NULL);
1464        RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
1465}
1466
1467/**
1468 * qib_fill_device_attr - Fill in rvt dev info device attributes.
1469 * @dd: the device data structure
1470 */
1471static void qib_fill_device_attr(struct qib_devdata *dd)
1472{
1473        struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
1474
1475        memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
1476
1477        rdi->dparms.props.max_pd = ib_qib_max_pds;
1478        rdi->dparms.props.max_ah = ib_qib_max_ahs;
1479        rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
1480                IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
1481                IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
1482                IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
1483        rdi->dparms.props.page_size_cap = PAGE_SIZE;
1484        rdi->dparms.props.vendor_id =
1485                QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3;
1486        rdi->dparms.props.vendor_part_id = dd->deviceid;
1487        rdi->dparms.props.hw_ver = dd->minrev;
1488        rdi->dparms.props.sys_image_guid = ib_qib_sys_image_guid;
1489        rdi->dparms.props.max_mr_size = ~0ULL;
1490        rdi->dparms.props.max_qp = ib_qib_max_qps;
1491        rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs;
1492        rdi->dparms.props.max_sge = ib_qib_max_sges;
1493        rdi->dparms.props.max_sge_rd = ib_qib_max_sges;
1494        rdi->dparms.props.max_cq = ib_qib_max_cqs;
1495        rdi->dparms.props.max_cqe = ib_qib_max_cqes;
1496        rdi->dparms.props.max_ah = ib_qib_max_ahs;
1497        rdi->dparms.props.max_mr = rdi->lkey_table.max;
1498        rdi->dparms.props.max_fmr = rdi->lkey_table.max;
1499        rdi->dparms.props.max_map_per_fmr = 32767;
1500        rdi->dparms.props.max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC;
1501        rdi->dparms.props.max_qp_init_rd_atom = 255;
1502        rdi->dparms.props.max_srq = ib_qib_max_srqs;
1503        rdi->dparms.props.max_srq_wr = ib_qib_max_srq_wrs;
1504        rdi->dparms.props.max_srq_sge = ib_qib_max_srq_sges;
1505        rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB;
1506        rdi->dparms.props.max_pkeys = qib_get_npkeys(dd);
1507        rdi->dparms.props.max_mcast_grp = ib_qib_max_mcast_grps;
1508        rdi->dparms.props.max_mcast_qp_attach = ib_qib_max_mcast_qp_attached;
1509        rdi->dparms.props.max_total_mcast_qp_attach =
1510                                        rdi->dparms.props.max_mcast_qp_attach *
1511                                        rdi->dparms.props.max_mcast_grp;
1512        /* post send table */
1513        dd->verbs_dev.rdi.post_parms = qib_post_parms;
1514}
1515
1516/**
1517 * qib_register_ib_device - register our device with the infiniband core
1518 * @dd: the device data structure
1519 * Return the allocated qib_ibdev pointer or NULL on error.
1520 */
1521int qib_register_ib_device(struct qib_devdata *dd)
1522{
1523        struct qib_ibdev *dev = &dd->verbs_dev;
1524        struct ib_device *ibdev = &dev->rdi.ibdev;
1525        struct qib_pportdata *ppd = dd->pport;
1526        unsigned i, ctxt;
1527        int ret;
1528
1529        get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd));
1530        for (i = 0; i < dd->num_pports; i++)
1531                init_ibport(ppd + i);
1532
1533        /* Only need to initialize non-zero fields. */
1534        timer_setup(&dev->mem_timer, mem_timer, 0);
1535
1536        INIT_LIST_HEAD(&dev->piowait);
1537        INIT_LIST_HEAD(&dev->dmawait);
1538        INIT_LIST_HEAD(&dev->txwait);
1539        INIT_LIST_HEAD(&dev->memwait);
1540        INIT_LIST_HEAD(&dev->txreq_free);
1541
1542        if (ppd->sdma_descq_cnt) {
1543                dev->pio_hdrs = dma_alloc_coherent(&dd->pcidev->dev,
1544                                                ppd->sdma_descq_cnt *
1545                                                sizeof(struct qib_pio_header),
1546                                                &dev->pio_hdrs_phys,
1547                                                GFP_KERNEL);
1548                if (!dev->pio_hdrs) {
1549                        ret = -ENOMEM;
1550                        goto err_hdrs;
1551                }
1552        }
1553
1554        for (i = 0; i < ppd->sdma_descq_cnt; i++) {
1555                struct qib_verbs_txreq *tx;
1556
1557                tx = kzalloc(sizeof(*tx), GFP_KERNEL);
1558                if (!tx) {
1559                        ret = -ENOMEM;
1560                        goto err_tx;
1561                }
1562                tx->hdr_inx = i;
1563                list_add(&tx->txreq.list, &dev->txreq_free);
1564        }
1565
1566        /*
1567         * The system image GUID is supposed to be the same for all
1568         * IB HCAs in a single system but since there can be other
1569         * device types in the system, we can't be sure this is unique.
1570         */
1571        if (!ib_qib_sys_image_guid)
1572                ib_qib_sys_image_guid = ppd->guid;
1573
1574        ibdev->owner = THIS_MODULE;
1575        ibdev->node_guid = ppd->guid;
1576        ibdev->phys_port_cnt = dd->num_pports;
1577        ibdev->dev.parent = &dd->pcidev->dev;
1578        ibdev->modify_device = qib_modify_device;
1579        ibdev->process_mad = qib_process_mad;
1580
1581        snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
1582                 "Intel Infiniband HCA %s", init_utsname()->nodename);
1583
1584        /*
1585         * Fill in rvt info object.
1586         */
1587        dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files;
1588        dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
1589        dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
1590        dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe;
1591        dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah;
1592        dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn;
1593        dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc;
1594        dd->verbs_dev.rdi.driver_f.qp_priv_free = qib_qp_priv_free;
1595        dd->verbs_dev.rdi.driver_f.free_all_qps = qib_free_all_qps;
1596        dd->verbs_dev.rdi.driver_f.notify_qp_reset = qib_notify_qp_reset;
1597        dd->verbs_dev.rdi.driver_f.do_send = qib_do_send;
1598        dd->verbs_dev.rdi.driver_f.schedule_send = qib_schedule_send;
1599        dd->verbs_dev.rdi.driver_f.quiesce_qp = qib_quiesce_qp;
1600        dd->verbs_dev.rdi.driver_f.stop_send_queue = qib_stop_send_queue;
1601        dd->verbs_dev.rdi.driver_f.flush_qp_waiters = qib_flush_qp_waiters;
1602        dd->verbs_dev.rdi.driver_f.notify_error_qp = qib_notify_error_qp;
1603        dd->verbs_dev.rdi.driver_f.notify_restart_rc = qib_restart_rc;
1604        dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu;
1605        dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp;
1606        dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr;
1607        dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _qib_schedule_send;
1608        dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port;
1609        dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port;
1610        dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg;
1611        dd->verbs_dev.rdi.driver_f.notify_create_mad_agent =
1612                                                qib_notify_create_mad_agent;
1613        dd->verbs_dev.rdi.driver_f.notify_free_mad_agent =
1614                                                qib_notify_free_mad_agent;
1615
1616        dd->verbs_dev.rdi.dparms.max_rdma_atomic = QIB_MAX_RDMA_ATOMIC;
1617        dd->verbs_dev.rdi.driver_f.get_guid_be = qib_get_guid_be;
1618        dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size;
1619        dd->verbs_dev.rdi.dparms.qp_table_size = ib_qib_qp_table_size;
1620        dd->verbs_dev.rdi.dparms.qpn_start = 1;
1621        dd->verbs_dev.rdi.dparms.qpn_res_start = QIB_KD_QP;
1622        dd->verbs_dev.rdi.dparms.qpn_res_end = QIB_KD_QP; /* Reserve one QP */
1623        dd->verbs_dev.rdi.dparms.qpn_inc = 1;
1624        dd->verbs_dev.rdi.dparms.qos_shift = 1;
1625        dd->verbs_dev.rdi.dparms.psn_mask = QIB_PSN_MASK;
1626        dd->verbs_dev.rdi.dparms.psn_shift = QIB_PSN_SHIFT;
1627        dd->verbs_dev.rdi.dparms.psn_modify_mask = QIB_PSN_MASK;
1628        dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
1629        dd->verbs_dev.rdi.dparms.npkeys = qib_get_npkeys(dd);
1630        dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id;
1631        dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB;
1632        dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE;
1633
1634        snprintf(dd->verbs_dev.rdi.dparms.cq_name,
1635                 sizeof(dd->verbs_dev.rdi.dparms.cq_name),
1636                 "qib_cq%d", dd->unit);
1637
1638        qib_fill_device_attr(dd);
1639
1640        ppd = dd->pport;
1641        for (i = 0; i < dd->num_pports; i++, ppd++) {
1642                ctxt = ppd->hw_pidx;
1643                rvt_init_port(&dd->verbs_dev.rdi,
1644                              &ppd->ibport_data.rvp,
1645                              i,
1646                              dd->rcd[ctxt]->pkeys);
1647        }
1648
1649        ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_QIB);
1650        if (ret)
1651                goto err_tx;
1652
1653        ret = qib_verbs_register_sysfs(dd);
1654        if (ret)
1655                goto err_class;
1656
1657        return ret;
1658
1659err_class:
1660        rvt_unregister_device(&dd->verbs_dev.rdi);
1661err_tx:
1662        while (!list_empty(&dev->txreq_free)) {
1663                struct list_head *l = dev->txreq_free.next;
1664                struct qib_verbs_txreq *tx;
1665
1666                list_del(l);
1667                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
1668                kfree(tx);
1669        }
1670        if (ppd->sdma_descq_cnt)
1671                dma_free_coherent(&dd->pcidev->dev,
1672                                  ppd->sdma_descq_cnt *
1673                                        sizeof(struct qib_pio_header),
1674                                  dev->pio_hdrs, dev->pio_hdrs_phys);
1675err_hdrs:
1676        qib_dev_err(dd, "cannot register verbs: %d!\n", -ret);
1677        return ret;
1678}
1679
1680void qib_unregister_ib_device(struct qib_devdata *dd)
1681{
1682        struct qib_ibdev *dev = &dd->verbs_dev;
1683
1684        qib_verbs_unregister_sysfs(dd);
1685
1686        rvt_unregister_device(&dd->verbs_dev.rdi);
1687
1688        if (!list_empty(&dev->piowait))
1689                qib_dev_err(dd, "piowait list not empty!\n");
1690        if (!list_empty(&dev->dmawait))
1691                qib_dev_err(dd, "dmawait list not empty!\n");
1692        if (!list_empty(&dev->txwait))
1693                qib_dev_err(dd, "txwait list not empty!\n");
1694        if (!list_empty(&dev->memwait))
1695                qib_dev_err(dd, "memwait list not empty!\n");
1696
1697        del_timer_sync(&dev->mem_timer);
1698        while (!list_empty(&dev->txreq_free)) {
1699                struct list_head *l = dev->txreq_free.next;
1700                struct qib_verbs_txreq *tx;
1701
1702                list_del(l);
1703                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
1704                kfree(tx);
1705        }
1706        if (dd->pport->sdma_descq_cnt)
1707                dma_free_coherent(&dd->pcidev->dev,
1708                                  dd->pport->sdma_descq_cnt *
1709                                        sizeof(struct qib_pio_header),
1710                                  dev->pio_hdrs, dev->pio_hdrs_phys);
1711}
1712
1713/**
1714 * _qib_schedule_send - schedule progress
1715 * @qp - the qp
1716 *
1717 * This schedules progress w/o regard to the s_flags.
1718 *
1719 * It is only used in post send, which doesn't hold
1720 * the s_lock.
1721 */
1722void _qib_schedule_send(struct rvt_qp *qp)
1723{
1724        struct qib_ibport *ibp =
1725                to_iport(qp->ibqp.device, qp->port_num);
1726        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1727        struct qib_qp_priv *priv = qp->priv;
1728
1729        queue_work(ppd->qib_wq, &priv->s_work);
1730}
1731
1732/**
1733 * qib_schedule_send - schedule progress
1734 * @qp - the qp
1735 *
1736 * This schedules qp progress.  The s_lock
1737 * should be held.
1738 */
1739void qib_schedule_send(struct rvt_qp *qp)
1740{
1741        if (qib_send_ok(qp))
1742                _qib_schedule_send(qp);
1743}
1744