linux/drivers/infiniband/hw/qib/qib_verbs.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2012, 2013 Intel Corporation.  All rights reserved.
   3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
   4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
   5 *
   6 * This software is available to you under a choice of one of two
   7 * licenses.  You may choose to be licensed under the terms of the GNU
   8 * General Public License (GPL) Version 2, available from the file
   9 * COPYING in the main directory of this source tree, or the
  10 * OpenIB.org BSD license below:
  11 *
  12 *     Redistribution and use in source and binary forms, with or
  13 *     without modification, are permitted provided that the following
  14 *     conditions are met:
  15 *
  16 *      - Redistributions of source code must retain the above
  17 *        copyright notice, this list of conditions and the following
  18 *        disclaimer.
  19 *
  20 *      - Redistributions in binary form must reproduce the above
  21 *        copyright notice, this list of conditions and the following
  22 *        disclaimer in the documentation and/or other materials
  23 *        provided with the distribution.
  24 *
  25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  32 * SOFTWARE.
  33 */
  34
  35#include <rdma/ib_mad.h>
  36#include <rdma/ib_user_verbs.h>
  37#include <linux/io.h>
  38#include <linux/module.h>
  39#include <linux/utsname.h>
  40#include <linux/rculist.h>
  41#include <linux/mm.h>
  42#include <linux/random.h>
  43#include <linux/vmalloc.h>
  44#include <rdma/rdma_vt.h>
  45
  46#include "qib.h"
  47#include "qib_common.h"
  48
  49static unsigned int ib_qib_qp_table_size = 256;
  50module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO);
  51MODULE_PARM_DESC(qp_table_size, "QP table size");
  52
  53static unsigned int qib_lkey_table_size = 16;
  54module_param_named(lkey_table_size, qib_lkey_table_size, uint,
  55                   S_IRUGO);
  56MODULE_PARM_DESC(lkey_table_size,
  57                 "LKEY table size in bits (2^n, 1 <= n <= 23)");
  58
  59static unsigned int ib_qib_max_pds = 0xFFFF;
  60module_param_named(max_pds, ib_qib_max_pds, uint, S_IRUGO);
  61MODULE_PARM_DESC(max_pds,
  62                 "Maximum number of protection domains to support");
  63
  64static unsigned int ib_qib_max_ahs = 0xFFFF;
  65module_param_named(max_ahs, ib_qib_max_ahs, uint, S_IRUGO);
  66MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
  67
  68unsigned int ib_qib_max_cqes = 0x2FFFF;
  69module_param_named(max_cqes, ib_qib_max_cqes, uint, S_IRUGO);
  70MODULE_PARM_DESC(max_cqes,
  71                 "Maximum number of completion queue entries to support");
  72
  73unsigned int ib_qib_max_cqs = 0x1FFFF;
  74module_param_named(max_cqs, ib_qib_max_cqs, uint, S_IRUGO);
  75MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
  76
  77unsigned int ib_qib_max_qp_wrs = 0x3FFF;
  78module_param_named(max_qp_wrs, ib_qib_max_qp_wrs, uint, S_IRUGO);
  79MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
  80
  81unsigned int ib_qib_max_qps = 16384;
  82module_param_named(max_qps, ib_qib_max_qps, uint, S_IRUGO);
  83MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
  84
  85unsigned int ib_qib_max_sges = 0x60;
  86module_param_named(max_sges, ib_qib_max_sges, uint, S_IRUGO);
  87MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
  88
  89unsigned int ib_qib_max_mcast_grps = 16384;
  90module_param_named(max_mcast_grps, ib_qib_max_mcast_grps, uint, S_IRUGO);
  91MODULE_PARM_DESC(max_mcast_grps,
  92                 "Maximum number of multicast groups to support");
  93
  94unsigned int ib_qib_max_mcast_qp_attached = 16;
  95module_param_named(max_mcast_qp_attached, ib_qib_max_mcast_qp_attached,
  96                   uint, S_IRUGO);
  97MODULE_PARM_DESC(max_mcast_qp_attached,
  98                 "Maximum number of attached QPs to support");
  99
 100unsigned int ib_qib_max_srqs = 1024;
 101module_param_named(max_srqs, ib_qib_max_srqs, uint, S_IRUGO);
 102MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
 103
 104unsigned int ib_qib_max_srq_sges = 128;
 105module_param_named(max_srq_sges, ib_qib_max_srq_sges, uint, S_IRUGO);
 106MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
 107
 108unsigned int ib_qib_max_srq_wrs = 0x1FFFF;
 109module_param_named(max_srq_wrs, ib_qib_max_srq_wrs, uint, S_IRUGO);
 110MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
 111
 112static unsigned int ib_qib_disable_sma;
 113module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
 114MODULE_PARM_DESC(disable_sma, "Disable the SMA");
 115
 116/*
 117 * System image GUID.
 118 */
 119__be64 ib_qib_sys_image_guid;
 120
 121/**
 122 * qib_copy_sge - copy data to SGE memory
 123 * @ss: the SGE state
 124 * @data: the data to copy
 125 * @length: the length of the data
 126 */
 127void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, int release)
 128{
 129        struct rvt_sge *sge = &ss->sge;
 130
 131        while (length) {
 132                u32 len = sge->length;
 133
 134                if (len > length)
 135                        len = length;
 136                if (len > sge->sge_length)
 137                        len = sge->sge_length;
 138                BUG_ON(len == 0);
 139                memcpy(sge->vaddr, data, len);
 140                sge->vaddr += len;
 141                sge->length -= len;
 142                sge->sge_length -= len;
 143                if (sge->sge_length == 0) {
 144                        if (release)
 145                                rvt_put_mr(sge->mr);
 146                        if (--ss->num_sge)
 147                                *sge = *ss->sg_list++;
 148                } else if (sge->length == 0 && sge->mr->lkey) {
 149                        if (++sge->n >= RVT_SEGSZ) {
 150                                if (++sge->m >= sge->mr->mapsz)
 151                                        break;
 152                                sge->n = 0;
 153                        }
 154                        sge->vaddr =
 155                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 156                        sge->length =
 157                                sge->mr->map[sge->m]->segs[sge->n].length;
 158                }
 159                data += len;
 160                length -= len;
 161        }
 162}
 163
 164/**
 165 * qib_skip_sge - skip over SGE memory - XXX almost dup of prev func
 166 * @ss: the SGE state
 167 * @length: the number of bytes to skip
 168 */
 169void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
 170{
 171        struct rvt_sge *sge = &ss->sge;
 172
 173        while (length) {
 174                u32 len = sge->length;
 175
 176                if (len > length)
 177                        len = length;
 178                if (len > sge->sge_length)
 179                        len = sge->sge_length;
 180                BUG_ON(len == 0);
 181                sge->vaddr += len;
 182                sge->length -= len;
 183                sge->sge_length -= len;
 184                if (sge->sge_length == 0) {
 185                        if (release)
 186                                rvt_put_mr(sge->mr);
 187                        if (--ss->num_sge)
 188                                *sge = *ss->sg_list++;
 189                } else if (sge->length == 0 && sge->mr->lkey) {
 190                        if (++sge->n >= RVT_SEGSZ) {
 191                                if (++sge->m >= sge->mr->mapsz)
 192                                        break;
 193                                sge->n = 0;
 194                        }
 195                        sge->vaddr =
 196                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 197                        sge->length =
 198                                sge->mr->map[sge->m]->segs[sge->n].length;
 199                }
 200                length -= len;
 201        }
 202}
 203
 204/*
 205 * Count the number of DMA descriptors needed to send length bytes of data.
 206 * Don't modify the qib_sge_state to get the count.
 207 * Return zero if any of the segments is not aligned.
 208 */
 209static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length)
 210{
 211        struct rvt_sge *sg_list = ss->sg_list;
 212        struct rvt_sge sge = ss->sge;
 213        u8 num_sge = ss->num_sge;
 214        u32 ndesc = 1;  /* count the header */
 215
 216        while (length) {
 217                u32 len = sge.length;
 218
 219                if (len > length)
 220                        len = length;
 221                if (len > sge.sge_length)
 222                        len = sge.sge_length;
 223                BUG_ON(len == 0);
 224                if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
 225                    (len != length && (len & (sizeof(u32) - 1)))) {
 226                        ndesc = 0;
 227                        break;
 228                }
 229                ndesc++;
 230                sge.vaddr += len;
 231                sge.length -= len;
 232                sge.sge_length -= len;
 233                if (sge.sge_length == 0) {
 234                        if (--num_sge)
 235                                sge = *sg_list++;
 236                } else if (sge.length == 0 && sge.mr->lkey) {
 237                        if (++sge.n >= RVT_SEGSZ) {
 238                                if (++sge.m >= sge.mr->mapsz)
 239                                        break;
 240                                sge.n = 0;
 241                        }
 242                        sge.vaddr =
 243                                sge.mr->map[sge.m]->segs[sge.n].vaddr;
 244                        sge.length =
 245                                sge.mr->map[sge.m]->segs[sge.n].length;
 246                }
 247                length -= len;
 248        }
 249        return ndesc;
 250}
 251
 252/*
 253 * Copy from the SGEs to the data buffer.
 254 */
 255static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length)
 256{
 257        struct rvt_sge *sge = &ss->sge;
 258
 259        while (length) {
 260                u32 len = sge->length;
 261
 262                if (len > length)
 263                        len = length;
 264                if (len > sge->sge_length)
 265                        len = sge->sge_length;
 266                BUG_ON(len == 0);
 267                memcpy(data, sge->vaddr, len);
 268                sge->vaddr += len;
 269                sge->length -= len;
 270                sge->sge_length -= len;
 271                if (sge->sge_length == 0) {
 272                        if (--ss->num_sge)
 273                                *sge = *ss->sg_list++;
 274                } else if (sge->length == 0 && sge->mr->lkey) {
 275                        if (++sge->n >= RVT_SEGSZ) {
 276                                if (++sge->m >= sge->mr->mapsz)
 277                                        break;
 278                                sge->n = 0;
 279                        }
 280                        sge->vaddr =
 281                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 282                        sge->length =
 283                                sge->mr->map[sge->m]->segs[sge->n].length;
 284                }
 285                data += len;
 286                length -= len;
 287        }
 288}
 289
 290/**
 291 * qib_qp_rcv - processing an incoming packet on a QP
 292 * @rcd: the context pointer
 293 * @hdr: the packet header
 294 * @has_grh: true if the packet has a GRH
 295 * @data: the packet data
 296 * @tlen: the packet length
 297 * @qp: the QP the packet came on
 298 *
 299 * This is called from qib_ib_rcv() to process an incoming packet
 300 * for the given QP.
 301 * Called at interrupt level.
 302 */
 303static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
 304                       int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 305{
 306        struct qib_ibport *ibp = &rcd->ppd->ibport_data;
 307
 308        spin_lock(&qp->r_lock);
 309
 310        /* Check for valid receive state. */
 311        if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
 312                ibp->rvp.n_pkt_drops++;
 313                goto unlock;
 314        }
 315
 316        switch (qp->ibqp.qp_type) {
 317        case IB_QPT_SMI:
 318        case IB_QPT_GSI:
 319                if (ib_qib_disable_sma)
 320                        break;
 321                /* FALLTHROUGH */
 322        case IB_QPT_UD:
 323                qib_ud_rcv(ibp, hdr, has_grh, data, tlen, qp);
 324                break;
 325
 326        case IB_QPT_RC:
 327                qib_rc_rcv(rcd, hdr, has_grh, data, tlen, qp);
 328                break;
 329
 330        case IB_QPT_UC:
 331                qib_uc_rcv(ibp, hdr, has_grh, data, tlen, qp);
 332                break;
 333
 334        default:
 335                break;
 336        }
 337
 338unlock:
 339        spin_unlock(&qp->r_lock);
 340}
 341
 342/**
 343 * qib_ib_rcv - process an incoming packet
 344 * @rcd: the context pointer
 345 * @rhdr: the header of the packet
 346 * @data: the packet payload
 347 * @tlen: the packet length
 348 *
 349 * This is called from qib_kreceive() to process an incoming packet at
 350 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
 351 */
 352void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
 353{
 354        struct qib_pportdata *ppd = rcd->ppd;
 355        struct qib_ibport *ibp = &ppd->ibport_data;
 356        struct ib_header *hdr = rhdr;
 357        struct qib_devdata *dd = ppd->dd;
 358        struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 359        struct ib_other_headers *ohdr;
 360        struct rvt_qp *qp;
 361        u32 qp_num;
 362        int lnh;
 363        u8 opcode;
 364        u16 lid;
 365
 366        /* 24 == LRH+BTH+CRC */
 367        if (unlikely(tlen < 24))
 368                goto drop;
 369
 370        /* Check for a valid destination LID (see ch. 7.11.1). */
 371        lid = be16_to_cpu(hdr->lrh[1]);
 372        if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 373                lid &= ~((1 << ppd->lmc) - 1);
 374                if (unlikely(lid != ppd->lid))
 375                        goto drop;
 376        }
 377
 378        /* Check for GRH */
 379        lnh = be16_to_cpu(hdr->lrh[0]) & 3;
 380        if (lnh == QIB_LRH_BTH)
 381                ohdr = &hdr->u.oth;
 382        else if (lnh == QIB_LRH_GRH) {
 383                u32 vtf;
 384
 385                ohdr = &hdr->u.l.oth;
 386                if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
 387                        goto drop;
 388                vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
 389                if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
 390                        goto drop;
 391        } else
 392                goto drop;
 393
 394        opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
 395#ifdef CONFIG_DEBUG_FS
 396        rcd->opstats->stats[opcode].n_bytes += tlen;
 397        rcd->opstats->stats[opcode].n_packets++;
 398#endif
 399
 400        /* Get the destination QP number. */
 401        qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
 402        if (qp_num == QIB_MULTICAST_QPN) {
 403                struct rvt_mcast *mcast;
 404                struct rvt_mcast_qp *p;
 405
 406                if (lnh != QIB_LRH_GRH)
 407                        goto drop;
 408                mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
 409                if (mcast == NULL)
 410                        goto drop;
 411                this_cpu_inc(ibp->pmastats->n_multicast_rcv);
 412                list_for_each_entry_rcu(p, &mcast->qp_list, list)
 413                        qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
 414                /*
 415                 * Notify rvt_multicast_detach() if it is waiting for us
 416                 * to finish.
 417                 */
 418                if (atomic_dec_return(&mcast->refcount) <= 1)
 419                        wake_up(&mcast->wait);
 420        } else {
 421                rcu_read_lock();
 422                qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
 423                if (!qp) {
 424                        rcu_read_unlock();
 425                        goto drop;
 426                }
 427                this_cpu_inc(ibp->pmastats->n_unicast_rcv);
 428                qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
 429                rcu_read_unlock();
 430        }
 431        return;
 432
 433drop:
 434        ibp->rvp.n_pkt_drops++;
 435}
 436
 437/*
 438 * This is called from a timer to check for QPs
 439 * which need kernel memory in order to send a packet.
 440 */
 441static void mem_timer(unsigned long data)
 442{
 443        struct qib_ibdev *dev = (struct qib_ibdev *) data;
 444        struct list_head *list = &dev->memwait;
 445        struct rvt_qp *qp = NULL;
 446        struct qib_qp_priv *priv = NULL;
 447        unsigned long flags;
 448
 449        spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 450        if (!list_empty(list)) {
 451                priv = list_entry(list->next, struct qib_qp_priv, iowait);
 452                qp = priv->owner;
 453                list_del_init(&priv->iowait);
 454                rvt_get_qp(qp);
 455                if (!list_empty(list))
 456                        mod_timer(&dev->mem_timer, jiffies + 1);
 457        }
 458        spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 459
 460        if (qp) {
 461                spin_lock_irqsave(&qp->s_lock, flags);
 462                if (qp->s_flags & RVT_S_WAIT_KMEM) {
 463                        qp->s_flags &= ~RVT_S_WAIT_KMEM;
 464                        qib_schedule_send(qp);
 465                }
 466                spin_unlock_irqrestore(&qp->s_lock, flags);
 467                rvt_put_qp(qp);
 468        }
 469}
 470
 471static void update_sge(struct rvt_sge_state *ss, u32 length)
 472{
 473        struct rvt_sge *sge = &ss->sge;
 474
 475        sge->vaddr += length;
 476        sge->length -= length;
 477        sge->sge_length -= length;
 478        if (sge->sge_length == 0) {
 479                if (--ss->num_sge)
 480                        *sge = *ss->sg_list++;
 481        } else if (sge->length == 0 && sge->mr->lkey) {
 482                if (++sge->n >= RVT_SEGSZ) {
 483                        if (++sge->m >= sge->mr->mapsz)
 484                                return;
 485                        sge->n = 0;
 486                }
 487                sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
 488                sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
 489        }
 490}
 491
 492#ifdef __LITTLE_ENDIAN
 493static inline u32 get_upper_bits(u32 data, u32 shift)
 494{
 495        return data >> shift;
 496}
 497
 498static inline u32 set_upper_bits(u32 data, u32 shift)
 499{
 500        return data << shift;
 501}
 502
 503static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 504{
 505        data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
 506        data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
 507        return data;
 508}
 509#else
 510static inline u32 get_upper_bits(u32 data, u32 shift)
 511{
 512        return data << shift;
 513}
 514
 515static inline u32 set_upper_bits(u32 data, u32 shift)
 516{
 517        return data >> shift;
 518}
 519
 520static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 521{
 522        data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
 523        data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
 524        return data;
 525}
 526#endif
 527
 528static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
 529                    u32 length, unsigned flush_wc)
 530{
 531        u32 extra = 0;
 532        u32 data = 0;
 533        u32 last;
 534
 535        while (1) {
 536                u32 len = ss->sge.length;
 537                u32 off;
 538
 539                if (len > length)
 540                        len = length;
 541                if (len > ss->sge.sge_length)
 542                        len = ss->sge.sge_length;
 543                BUG_ON(len == 0);
 544                /* If the source address is not aligned, try to align it. */
 545                off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
 546                if (off) {
 547                        u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
 548                                            ~(sizeof(u32) - 1));
 549                        u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
 550                        u32 y;
 551
 552                        y = sizeof(u32) - off;
 553                        if (len > y)
 554                                len = y;
 555                        if (len + extra >= sizeof(u32)) {
 556                                data |= set_upper_bits(v, extra *
 557                                                       BITS_PER_BYTE);
 558                                len = sizeof(u32) - extra;
 559                                if (len == length) {
 560                                        last = data;
 561                                        break;
 562                                }
 563                                __raw_writel(data, piobuf);
 564                                piobuf++;
 565                                extra = 0;
 566                                data = 0;
 567                        } else {
 568                                /* Clear unused upper bytes */
 569                                data |= clear_upper_bytes(v, len, extra);
 570                                if (len == length) {
 571                                        last = data;
 572                                        break;
 573                                }
 574                                extra += len;
 575                        }
 576                } else if (extra) {
 577                        /* Source address is aligned. */
 578                        u32 *addr = (u32 *) ss->sge.vaddr;
 579                        int shift = extra * BITS_PER_BYTE;
 580                        int ushift = 32 - shift;
 581                        u32 l = len;
 582
 583                        while (l >= sizeof(u32)) {
 584                                u32 v = *addr;
 585
 586                                data |= set_upper_bits(v, shift);
 587                                __raw_writel(data, piobuf);
 588                                data = get_upper_bits(v, ushift);
 589                                piobuf++;
 590                                addr++;
 591                                l -= sizeof(u32);
 592                        }
 593                        /*
 594                         * We still have 'extra' number of bytes leftover.
 595                         */
 596                        if (l) {
 597                                u32 v = *addr;
 598
 599                                if (l + extra >= sizeof(u32)) {
 600                                        data |= set_upper_bits(v, shift);
 601                                        len -= l + extra - sizeof(u32);
 602                                        if (len == length) {
 603                                                last = data;
 604                                                break;
 605                                        }
 606                                        __raw_writel(data, piobuf);
 607                                        piobuf++;
 608                                        extra = 0;
 609                                        data = 0;
 610                                } else {
 611                                        /* Clear unused upper bytes */
 612                                        data |= clear_upper_bytes(v, l, extra);
 613                                        if (len == length) {
 614                                                last = data;
 615                                                break;
 616                                        }
 617                                        extra += l;
 618                                }
 619                        } else if (len == length) {
 620                                last = data;
 621                                break;
 622                        }
 623                } else if (len == length) {
 624                        u32 w;
 625
 626                        /*
 627                         * Need to round up for the last dword in the
 628                         * packet.
 629                         */
 630                        w = (len + 3) >> 2;
 631                        qib_pio_copy(piobuf, ss->sge.vaddr, w - 1);
 632                        piobuf += w - 1;
 633                        last = ((u32 *) ss->sge.vaddr)[w - 1];
 634                        break;
 635                } else {
 636                        u32 w = len >> 2;
 637
 638                        qib_pio_copy(piobuf, ss->sge.vaddr, w);
 639                        piobuf += w;
 640
 641                        extra = len & (sizeof(u32) - 1);
 642                        if (extra) {
 643                                u32 v = ((u32 *) ss->sge.vaddr)[w];
 644
 645                                /* Clear unused upper bytes */
 646                                data = clear_upper_bytes(v, extra, 0);
 647                        }
 648                }
 649                update_sge(ss, len);
 650                length -= len;
 651        }
 652        /* Update address before sending packet. */
 653        update_sge(ss, length);
 654        if (flush_wc) {
 655                /* must flush early everything before trigger word */
 656                qib_flush_wc();
 657                __raw_writel(last, piobuf);
 658                /* be sure trigger word is written */
 659                qib_flush_wc();
 660        } else
 661                __raw_writel(last, piobuf);
 662}
 663
 664static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev,
 665                                           struct rvt_qp *qp)
 666{
 667        struct qib_qp_priv *priv = qp->priv;
 668        struct qib_verbs_txreq *tx;
 669        unsigned long flags;
 670
 671        spin_lock_irqsave(&qp->s_lock, flags);
 672        spin_lock(&dev->rdi.pending_lock);
 673
 674        if (!list_empty(&dev->txreq_free)) {
 675                struct list_head *l = dev->txreq_free.next;
 676
 677                list_del(l);
 678                spin_unlock(&dev->rdi.pending_lock);
 679                spin_unlock_irqrestore(&qp->s_lock, flags);
 680                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 681        } else {
 682                if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK &&
 683                    list_empty(&priv->iowait)) {
 684                        dev->n_txwait++;
 685                        qp->s_flags |= RVT_S_WAIT_TX;
 686                        list_add_tail(&priv->iowait, &dev->txwait);
 687                }
 688                qp->s_flags &= ~RVT_S_BUSY;
 689                spin_unlock(&dev->rdi.pending_lock);
 690                spin_unlock_irqrestore(&qp->s_lock, flags);
 691                tx = ERR_PTR(-EBUSY);
 692        }
 693        return tx;
 694}
 695
 696static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
 697                                         struct rvt_qp *qp)
 698{
 699        struct qib_verbs_txreq *tx;
 700        unsigned long flags;
 701
 702        spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 703        /* assume the list non empty */
 704        if (likely(!list_empty(&dev->txreq_free))) {
 705                struct list_head *l = dev->txreq_free.next;
 706
 707                list_del(l);
 708                spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 709                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 710        } else {
 711                /* call slow path to get the extra lock */
 712                spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 713                tx =  __get_txreq(dev, qp);
 714        }
 715        return tx;
 716}
 717
 718void qib_put_txreq(struct qib_verbs_txreq *tx)
 719{
 720        struct qib_ibdev *dev;
 721        struct rvt_qp *qp;
 722        struct qib_qp_priv *priv;
 723        unsigned long flags;
 724
 725        qp = tx->qp;
 726        dev = to_idev(qp->ibqp.device);
 727
 728        if (tx->mr) {
 729                rvt_put_mr(tx->mr);
 730                tx->mr = NULL;
 731        }
 732        if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
 733                tx->txreq.flags &= ~QIB_SDMA_TXREQ_F_FREEBUF;
 734                dma_unmap_single(&dd_from_dev(dev)->pcidev->dev,
 735                                 tx->txreq.addr, tx->hdr_dwords << 2,
 736                                 DMA_TO_DEVICE);
 737                kfree(tx->align_buf);
 738        }
 739
 740        spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 741
 742        /* Put struct back on free list */
 743        list_add(&tx->txreq.list, &dev->txreq_free);
 744
 745        if (!list_empty(&dev->txwait)) {
 746                /* Wake up first QP wanting a free struct */
 747                priv = list_entry(dev->txwait.next, struct qib_qp_priv,
 748                                  iowait);
 749                qp = priv->owner;
 750                list_del_init(&priv->iowait);
 751                rvt_get_qp(qp);
 752                spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 753
 754                spin_lock_irqsave(&qp->s_lock, flags);
 755                if (qp->s_flags & RVT_S_WAIT_TX) {
 756                        qp->s_flags &= ~RVT_S_WAIT_TX;
 757                        qib_schedule_send(qp);
 758                }
 759                spin_unlock_irqrestore(&qp->s_lock, flags);
 760
 761                rvt_put_qp(qp);
 762        } else
 763                spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 764}
 765
 766/*
 767 * This is called when there are send DMA descriptors that might be
 768 * available.
 769 *
 770 * This is called with ppd->sdma_lock held.
 771 */
 772void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
 773{
 774        struct rvt_qp *qp, *nqp;
 775        struct qib_qp_priv *qpp, *nqpp;
 776        struct rvt_qp *qps[20];
 777        struct qib_ibdev *dev;
 778        unsigned i, n;
 779
 780        n = 0;
 781        dev = &ppd->dd->verbs_dev;
 782        spin_lock(&dev->rdi.pending_lock);
 783
 784        /* Search wait list for first QP wanting DMA descriptors. */
 785        list_for_each_entry_safe(qpp, nqpp, &dev->dmawait, iowait) {
 786                qp = qpp->owner;
 787                nqp = nqpp->owner;
 788                if (qp->port_num != ppd->port)
 789                        continue;
 790                if (n == ARRAY_SIZE(qps))
 791                        break;
 792                if (qpp->s_tx->txreq.sg_count > avail)
 793                        break;
 794                avail -= qpp->s_tx->txreq.sg_count;
 795                list_del_init(&qpp->iowait);
 796                rvt_get_qp(qp);
 797                qps[n++] = qp;
 798        }
 799
 800        spin_unlock(&dev->rdi.pending_lock);
 801
 802        for (i = 0; i < n; i++) {
 803                qp = qps[i];
 804                spin_lock(&qp->s_lock);
 805                if (qp->s_flags & RVT_S_WAIT_DMA_DESC) {
 806                        qp->s_flags &= ~RVT_S_WAIT_DMA_DESC;
 807                        qib_schedule_send(qp);
 808                }
 809                spin_unlock(&qp->s_lock);
 810                rvt_put_qp(qp);
 811        }
 812}
 813
 814/*
 815 * This is called with ppd->sdma_lock held.
 816 */
 817static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
 818{
 819        struct qib_verbs_txreq *tx =
 820                container_of(cookie, struct qib_verbs_txreq, txreq);
 821        struct rvt_qp *qp = tx->qp;
 822        struct qib_qp_priv *priv = qp->priv;
 823
 824        spin_lock(&qp->s_lock);
 825        if (tx->wqe)
 826                qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
 827        else if (qp->ibqp.qp_type == IB_QPT_RC) {
 828                struct ib_header *hdr;
 829
 830                if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF)
 831                        hdr = &tx->align_buf->hdr;
 832                else {
 833                        struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 834
 835                        hdr = &dev->pio_hdrs[tx->hdr_inx].hdr;
 836                }
 837                qib_rc_send_complete(qp, hdr);
 838        }
 839        if (atomic_dec_and_test(&priv->s_dma_busy)) {
 840                if (qp->state == IB_QPS_RESET)
 841                        wake_up(&priv->wait_dma);
 842                else if (qp->s_flags & RVT_S_WAIT_DMA) {
 843                        qp->s_flags &= ~RVT_S_WAIT_DMA;
 844                        qib_schedule_send(qp);
 845                }
 846        }
 847        spin_unlock(&qp->s_lock);
 848
 849        qib_put_txreq(tx);
 850}
 851
 852static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp)
 853{
 854        struct qib_qp_priv *priv = qp->priv;
 855        unsigned long flags;
 856        int ret = 0;
 857
 858        spin_lock_irqsave(&qp->s_lock, flags);
 859        if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 860                spin_lock(&dev->rdi.pending_lock);
 861                if (list_empty(&priv->iowait)) {
 862                        if (list_empty(&dev->memwait))
 863                                mod_timer(&dev->mem_timer, jiffies + 1);
 864                        qp->s_flags |= RVT_S_WAIT_KMEM;
 865                        list_add_tail(&priv->iowait, &dev->memwait);
 866                }
 867                spin_unlock(&dev->rdi.pending_lock);
 868                qp->s_flags &= ~RVT_S_BUSY;
 869                ret = -EBUSY;
 870        }
 871        spin_unlock_irqrestore(&qp->s_lock, flags);
 872
 873        return ret;
 874}
 875
 876static int qib_verbs_send_dma(struct rvt_qp *qp, struct ib_header *hdr,
 877                              u32 hdrwords, struct rvt_sge_state *ss, u32 len,
 878                              u32 plen, u32 dwords)
 879{
 880        struct qib_qp_priv *priv = qp->priv;
 881        struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 882        struct qib_devdata *dd = dd_from_dev(dev);
 883        struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 884        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 885        struct qib_verbs_txreq *tx;
 886        struct qib_pio_header *phdr;
 887        u32 control;
 888        u32 ndesc;
 889        int ret;
 890
 891        tx = priv->s_tx;
 892        if (tx) {
 893                priv->s_tx = NULL;
 894                /* resend previously constructed packet */
 895                ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx);
 896                goto bail;
 897        }
 898
 899        tx = get_txreq(dev, qp);
 900        if (IS_ERR(tx))
 901                goto bail_tx;
 902
 903        control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
 904                                       be16_to_cpu(hdr->lrh[0]) >> 12);
 905        tx->qp = qp;
 906        tx->wqe = qp->s_wqe;
 907        tx->mr = qp->s_rdma_mr;
 908        if (qp->s_rdma_mr)
 909                qp->s_rdma_mr = NULL;
 910        tx->txreq.callback = sdma_complete;
 911        if (dd->flags & QIB_HAS_SDMA_TIMEOUT)
 912                tx->txreq.flags = QIB_SDMA_TXREQ_F_HEADTOHOST;
 913        else
 914                tx->txreq.flags = QIB_SDMA_TXREQ_F_INTREQ;
 915        if (plen + 1 > dd->piosize2kmax_dwords)
 916                tx->txreq.flags |= QIB_SDMA_TXREQ_F_USELARGEBUF;
 917
 918        if (len) {
 919                /*
 920                 * Don't try to DMA if it takes more descriptors than
 921                 * the queue holds.
 922                 */
 923                ndesc = qib_count_sge(ss, len);
 924                if (ndesc >= ppd->sdma_descq_cnt)
 925                        ndesc = 0;
 926        } else
 927                ndesc = 1;
 928        if (ndesc) {
 929                phdr = &dev->pio_hdrs[tx->hdr_inx];
 930                phdr->pbc[0] = cpu_to_le32(plen);
 931                phdr->pbc[1] = cpu_to_le32(control);
 932                memcpy(&phdr->hdr, hdr, hdrwords << 2);
 933                tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEDESC;
 934                tx->txreq.sg_count = ndesc;
 935                tx->txreq.addr = dev->pio_hdrs_phys +
 936                        tx->hdr_inx * sizeof(struct qib_pio_header);
 937                tx->hdr_dwords = hdrwords + 2; /* add PBC length */
 938                ret = qib_sdma_verbs_send(ppd, ss, dwords, tx);
 939                goto bail;
 940        }
 941
 942        /* Allocate a buffer and copy the header and payload to it. */
 943        tx->hdr_dwords = plen + 1;
 944        phdr = kmalloc(tx->hdr_dwords << 2, GFP_ATOMIC);
 945        if (!phdr)
 946                goto err_tx;
 947        phdr->pbc[0] = cpu_to_le32(plen);
 948        phdr->pbc[1] = cpu_to_le32(control);
 949        memcpy(&phdr->hdr, hdr, hdrwords << 2);
 950        qib_copy_from_sge((u32 *) &phdr->hdr + hdrwords, ss, len);
 951
 952        tx->txreq.addr = dma_map_single(&dd->pcidev->dev, phdr,
 953                                        tx->hdr_dwords << 2, DMA_TO_DEVICE);
 954        if (dma_mapping_error(&dd->pcidev->dev, tx->txreq.addr))
 955                goto map_err;
 956        tx->align_buf = phdr;
 957        tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEBUF;
 958        tx->txreq.sg_count = 1;
 959        ret = qib_sdma_verbs_send(ppd, NULL, 0, tx);
 960        goto unaligned;
 961
 962map_err:
 963        kfree(phdr);
 964err_tx:
 965        qib_put_txreq(tx);
 966        ret = wait_kmem(dev, qp);
 967unaligned:
 968        ibp->rvp.n_unaligned++;
 969bail:
 970        return ret;
 971bail_tx:
 972        ret = PTR_ERR(tx);
 973        goto bail;
 974}
 975
 976/*
 977 * If we are now in the error state, return zero to flush the
 978 * send work request.
 979 */
 980static int no_bufs_available(struct rvt_qp *qp)
 981{
 982        struct qib_qp_priv *priv = qp->priv;
 983        struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 984        struct qib_devdata *dd;
 985        unsigned long flags;
 986        int ret = 0;
 987
 988        /*
 989         * Note that as soon as want_buffer() is called and
 990         * possibly before it returns, qib_ib_piobufavail()
 991         * could be called. Therefore, put QP on the I/O wait list before
 992         * enabling the PIO avail interrupt.
 993         */
 994        spin_lock_irqsave(&qp->s_lock, flags);
 995        if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 996                spin_lock(&dev->rdi.pending_lock);
 997                if (list_empty(&priv->iowait)) {
 998                        dev->n_piowait++;
 999                        qp->s_flags |= RVT_S_WAIT_PIO;
1000                        list_add_tail(&priv->iowait, &dev->piowait);
1001                        dd = dd_from_dev(dev);
1002                        dd->f_wantpiobuf_intr(dd, 1);
1003                }
1004                spin_unlock(&dev->rdi.pending_lock);
1005                qp->s_flags &= ~RVT_S_BUSY;
1006                ret = -EBUSY;
1007        }
1008        spin_unlock_irqrestore(&qp->s_lock, flags);
1009        return ret;
1010}
1011
1012static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr,
1013                              u32 hdrwords, struct rvt_sge_state *ss, u32 len,
1014                              u32 plen, u32 dwords)
1015{
1016        struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
1017        struct qib_pportdata *ppd = dd->pport + qp->port_num - 1;
1018        u32 *hdr = (u32 *) ibhdr;
1019        u32 __iomem *piobuf_orig;
1020        u32 __iomem *piobuf;
1021        u64 pbc;
1022        unsigned long flags;
1023        unsigned flush_wc;
1024        u32 control;
1025        u32 pbufn;
1026
1027        control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
1028                be16_to_cpu(ibhdr->lrh[0]) >> 12);
1029        pbc = ((u64) control << 32) | plen;
1030        piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn);
1031        if (unlikely(piobuf == NULL))
1032                return no_bufs_available(qp);
1033
1034        /*
1035         * Write the pbc.
1036         * We have to flush after the PBC for correctness on some cpus
1037         * or WC buffer can be written out of order.
1038         */
1039        writeq(pbc, piobuf);
1040        piobuf_orig = piobuf;
1041        piobuf += 2;
1042
1043        flush_wc = dd->flags & QIB_PIO_FLUSH_WC;
1044        if (len == 0) {
1045                /*
1046                 * If there is just the header portion, must flush before
1047                 * writing last word of header for correctness, and after
1048                 * the last header word (trigger word).
1049                 */
1050                if (flush_wc) {
1051                        qib_flush_wc();
1052                        qib_pio_copy(piobuf, hdr, hdrwords - 1);
1053                        qib_flush_wc();
1054                        __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
1055                        qib_flush_wc();
1056                } else
1057                        qib_pio_copy(piobuf, hdr, hdrwords);
1058                goto done;
1059        }
1060
1061        if (flush_wc)
1062                qib_flush_wc();
1063        qib_pio_copy(piobuf, hdr, hdrwords);
1064        piobuf += hdrwords;
1065
1066        /* The common case is aligned and contained in one segment. */
1067        if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
1068                   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
1069                u32 *addr = (u32 *) ss->sge.vaddr;
1070
1071                /* Update address before sending packet. */
1072                update_sge(ss, len);
1073                if (flush_wc) {
1074                        qib_pio_copy(piobuf, addr, dwords - 1);
1075                        /* must flush early everything before trigger word */
1076                        qib_flush_wc();
1077                        __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
1078                        /* be sure trigger word is written */
1079                        qib_flush_wc();
1080                } else
1081                        qib_pio_copy(piobuf, addr, dwords);
1082                goto done;
1083        }
1084        copy_io(piobuf, ss, len, flush_wc);
1085done:
1086        if (dd->flags & QIB_USE_SPCL_TRIG) {
1087                u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
1088
1089                qib_flush_wc();
1090                __raw_writel(0xaebecede, piobuf_orig + spcl_off);
1091        }
1092        qib_sendbuf_done(dd, pbufn);
1093        if (qp->s_rdma_mr) {
1094                rvt_put_mr(qp->s_rdma_mr);
1095                qp->s_rdma_mr = NULL;
1096        }
1097        if (qp->s_wqe) {
1098                spin_lock_irqsave(&qp->s_lock, flags);
1099                qib_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
1100                spin_unlock_irqrestore(&qp->s_lock, flags);
1101        } else if (qp->ibqp.qp_type == IB_QPT_RC) {
1102                spin_lock_irqsave(&qp->s_lock, flags);
1103                qib_rc_send_complete(qp, ibhdr);
1104                spin_unlock_irqrestore(&qp->s_lock, flags);
1105        }
1106        return 0;
1107}
1108
1109/**
1110 * qib_verbs_send - send a packet
1111 * @qp: the QP to send on
1112 * @hdr: the packet header
1113 * @hdrwords: the number of 32-bit words in the header
1114 * @ss: the SGE to send
1115 * @len: the length of the packet in bytes
1116 *
1117 * Return zero if packet is sent or queued OK.
1118 * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
1119 */
1120int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
1121                   u32 hdrwords, struct rvt_sge_state *ss, u32 len)
1122{
1123        struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
1124        u32 plen;
1125        int ret;
1126        u32 dwords = (len + 3) >> 2;
1127
1128        /*
1129         * Calculate the send buffer trigger address.
1130         * The +1 counts for the pbc control dword following the pbc length.
1131         */
1132        plen = hdrwords + dwords + 1;
1133
1134        /*
1135         * VL15 packets (IB_QPT_SMI) will always use PIO, so we
1136         * can defer SDMA restart until link goes ACTIVE without
1137         * worrying about just how we got there.
1138         */
1139        if (qp->ibqp.qp_type == IB_QPT_SMI ||
1140            !(dd->flags & QIB_HAS_SEND_DMA))
1141                ret = qib_verbs_send_pio(qp, hdr, hdrwords, ss, len,
1142                                         plen, dwords);
1143        else
1144                ret = qib_verbs_send_dma(qp, hdr, hdrwords, ss, len,
1145                                         plen, dwords);
1146
1147        return ret;
1148}
1149
1150int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
1151                          u64 *rwords, u64 *spkts, u64 *rpkts,
1152                          u64 *xmit_wait)
1153{
1154        int ret;
1155        struct qib_devdata *dd = ppd->dd;
1156
1157        if (!(dd->flags & QIB_PRESENT)) {
1158                /* no hardware, freeze, etc. */
1159                ret = -EINVAL;
1160                goto bail;
1161        }
1162        *swords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDSEND);
1163        *rwords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDRCV);
1164        *spkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTSEND);
1165        *rpkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTRCV);
1166        *xmit_wait = dd->f_portcntr(ppd, QIBPORTCNTR_SENDSTALL);
1167
1168        ret = 0;
1169
1170bail:
1171        return ret;
1172}
1173
1174/**
1175 * qib_get_counters - get various chip counters
1176 * @dd: the qlogic_ib device
1177 * @cntrs: counters are placed here
1178 *
1179 * Return the counters needed by recv_pma_get_portcounters().
1180 */
1181int qib_get_counters(struct qib_pportdata *ppd,
1182                     struct qib_verbs_counters *cntrs)
1183{
1184        int ret;
1185
1186        if (!(ppd->dd->flags & QIB_PRESENT)) {
1187                /* no hardware, freeze, etc. */
1188                ret = -EINVAL;
1189                goto bail;
1190        }
1191        cntrs->symbol_error_counter =
1192                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBSYMBOLERR);
1193        cntrs->link_error_recovery_counter =
1194                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKERRRECOV);
1195        /*
1196         * The link downed counter counts when the other side downs the
1197         * connection.  We add in the number of times we downed the link
1198         * due to local link integrity errors to compensate.
1199         */
1200        cntrs->link_downed_counter =
1201                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKDOWN);
1202        cntrs->port_rcv_errors =
1203                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXDROPPKT) +
1204                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVOVFL) +
1205                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERR_RLEN) +
1206                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_INVALIDRLEN) +
1207                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLINK) +
1208                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRICRC) +
1209                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRVCRC) +
1210                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLPCRC) +
1211                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_BADFORMAT);
1212        cntrs->port_rcv_errors +=
1213                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXLOCALPHYERR);
1214        cntrs->port_rcv_errors +=
1215                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXVLERR);
1216        cntrs->port_rcv_remphys_errors =
1217                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVEBP);
1218        cntrs->port_xmit_discards =
1219                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_UNSUPVL);
1220        cntrs->port_xmit_data = ppd->dd->f_portcntr(ppd,
1221                        QIBPORTCNTR_WORDSEND);
1222        cntrs->port_rcv_data = ppd->dd->f_portcntr(ppd,
1223                        QIBPORTCNTR_WORDRCV);
1224        cntrs->port_xmit_packets = ppd->dd->f_portcntr(ppd,
1225                        QIBPORTCNTR_PKTSEND);
1226        cntrs->port_rcv_packets = ppd->dd->f_portcntr(ppd,
1227                        QIBPORTCNTR_PKTRCV);
1228        cntrs->local_link_integrity_errors =
1229                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_LLI);
1230        cntrs->excessive_buffer_overrun_errors =
1231                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_EXCESSBUFOVFL);
1232        cntrs->vl15_dropped =
1233                ppd->dd->f_portcntr(ppd, QIBPORTCNTR_VL15PKTDROP);
1234
1235        ret = 0;
1236
1237bail:
1238        return ret;
1239}
1240
1241/**
1242 * qib_ib_piobufavail - callback when a PIO buffer is available
1243 * @dd: the device pointer
1244 *
1245 * This is called from qib_intr() at interrupt level when a PIO buffer is
1246 * available after qib_verbs_send() returned an error that no buffers were
1247 * available. Disable the interrupt if there are no more QPs waiting.
1248 */
1249void qib_ib_piobufavail(struct qib_devdata *dd)
1250{
1251        struct qib_ibdev *dev = &dd->verbs_dev;
1252        struct list_head *list;
1253        struct rvt_qp *qps[5];
1254        struct rvt_qp *qp;
1255        unsigned long flags;
1256        unsigned i, n;
1257        struct qib_qp_priv *priv;
1258
1259        list = &dev->piowait;
1260        n = 0;
1261
1262        /*
1263         * Note: checking that the piowait list is empty and clearing
1264         * the buffer available interrupt needs to be atomic or we
1265         * could end up with QPs on the wait list with the interrupt
1266         * disabled.
1267         */
1268        spin_lock_irqsave(&dev->rdi.pending_lock, flags);
1269        while (!list_empty(list)) {
1270                if (n == ARRAY_SIZE(qps))
1271                        goto full;
1272                priv = list_entry(list->next, struct qib_qp_priv, iowait);
1273                qp = priv->owner;
1274                list_del_init(&priv->iowait);
1275                rvt_get_qp(qp);
1276                qps[n++] = qp;
1277        }
1278        dd->f_wantpiobuf_intr(dd, 0);
1279full:
1280        spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
1281
1282        for (i = 0; i < n; i++) {
1283                qp = qps[i];
1284
1285                spin_lock_irqsave(&qp->s_lock, flags);
1286                if (qp->s_flags & RVT_S_WAIT_PIO) {
1287                        qp->s_flags &= ~RVT_S_WAIT_PIO;
1288                        qib_schedule_send(qp);
1289                }
1290                spin_unlock_irqrestore(&qp->s_lock, flags);
1291
1292                /* Notify qib_destroy_qp() if it is waiting. */
1293                rvt_put_qp(qp);
1294        }
1295}
1296
1297static int qib_query_port(struct rvt_dev_info *rdi, u8 port_num,
1298                          struct ib_port_attr *props)
1299{
1300        struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
1301        struct qib_devdata *dd = dd_from_dev(ibdev);
1302        struct qib_pportdata *ppd = &dd->pport[port_num - 1];
1303        enum ib_mtu mtu;
1304        u16 lid = ppd->lid;
1305
1306        props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
1307        props->lmc = ppd->lmc;
1308        props->state = dd->f_iblink_state(ppd->lastibcstat);
1309        props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat);
1310        props->gid_tbl_len = QIB_GUIDS_PER_PORT;
1311        props->active_width = ppd->link_width_active;
1312        /* See rate_show() */
1313        props->active_speed = ppd->link_speed_active;
1314        props->max_vl_num = qib_num_vls(ppd->vls_supported);
1315
1316        props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
1317        switch (ppd->ibmtu) {
1318        case 4096:
1319                mtu = IB_MTU_4096;
1320                break;
1321        case 2048:
1322                mtu = IB_MTU_2048;
1323                break;
1324        case 1024:
1325                mtu = IB_MTU_1024;
1326                break;
1327        case 512:
1328                mtu = IB_MTU_512;
1329                break;
1330        case 256:
1331                mtu = IB_MTU_256;
1332                break;
1333        default:
1334                mtu = IB_MTU_2048;
1335        }
1336        props->active_mtu = mtu;
1337
1338        return 0;
1339}
1340
1341static int qib_modify_device(struct ib_device *device,
1342                             int device_modify_mask,
1343                             struct ib_device_modify *device_modify)
1344{
1345        struct qib_devdata *dd = dd_from_ibdev(device);
1346        unsigned i;
1347        int ret;
1348
1349        if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
1350                                   IB_DEVICE_MODIFY_NODE_DESC)) {
1351                ret = -EOPNOTSUPP;
1352                goto bail;
1353        }
1354
1355        if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) {
1356                memcpy(device->node_desc, device_modify->node_desc,
1357                       IB_DEVICE_NODE_DESC_MAX);
1358                for (i = 0; i < dd->num_pports; i++) {
1359                        struct qib_ibport *ibp = &dd->pport[i].ibport_data;
1360
1361                        qib_node_desc_chg(ibp);
1362                }
1363        }
1364
1365        if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
1366                ib_qib_sys_image_guid =
1367                        cpu_to_be64(device_modify->sys_image_guid);
1368                for (i = 0; i < dd->num_pports; i++) {
1369                        struct qib_ibport *ibp = &dd->pport[i].ibport_data;
1370
1371                        qib_sys_guid_chg(ibp);
1372                }
1373        }
1374
1375        ret = 0;
1376
1377bail:
1378        return ret;
1379}
1380
1381static int qib_shut_down_port(struct rvt_dev_info *rdi, u8 port_num)
1382{
1383        struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
1384        struct qib_devdata *dd = dd_from_dev(ibdev);
1385        struct qib_pportdata *ppd = &dd->pport[port_num - 1];
1386
1387        qib_set_linkstate(ppd, QIB_IB_LINKDOWN);
1388
1389        return 0;
1390}
1391
1392static int qib_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
1393                           int guid_index, __be64 *guid)
1394{
1395        struct qib_ibport *ibp = container_of(rvp, struct qib_ibport, rvp);
1396        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1397
1398        if (guid_index == 0)
1399                *guid = ppd->guid;
1400        else if (guid_index < QIB_GUIDS_PER_PORT)
1401                *guid = ibp->guids[guid_index - 1];
1402        else
1403                return -EINVAL;
1404
1405        return 0;
1406}
1407
1408int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
1409{
1410        if (ah_attr->sl > 15)
1411                return -EINVAL;
1412
1413        return 0;
1414}
1415
1416static void qib_notify_new_ah(struct ib_device *ibdev,
1417                              struct ib_ah_attr *ah_attr,
1418                              struct rvt_ah *ah)
1419{
1420        struct qib_ibport *ibp;
1421        struct qib_pportdata *ppd;
1422
1423        /*
1424         * Do not trust reading anything from rvt_ah at this point as it is not
1425         * done being setup. We can however modify things which we need to set.
1426         */
1427
1428        ibp = to_iport(ibdev, ah_attr->port_num);
1429        ppd = ppd_from_ibp(ibp);
1430        ah->vl = ibp->sl_to_vl[ah->attr.sl];
1431        ah->log_pmtu = ilog2(ppd->ibmtu);
1432}
1433
1434struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
1435{
1436        struct ib_ah_attr attr;
1437        struct ib_ah *ah = ERR_PTR(-EINVAL);
1438        struct rvt_qp *qp0;
1439
1440        memset(&attr, 0, sizeof(attr));
1441        attr.dlid = dlid;
1442        attr.port_num = ppd_from_ibp(ibp)->port;
1443        rcu_read_lock();
1444        qp0 = rcu_dereference(ibp->rvp.qp[0]);
1445        if (qp0)
1446                ah = ib_create_ah(qp0->ibqp.pd, &attr);
1447        rcu_read_unlock();
1448        return ah;
1449}
1450
1451/**
1452 * qib_get_npkeys - return the size of the PKEY table for context 0
1453 * @dd: the qlogic_ib device
1454 */
1455unsigned qib_get_npkeys(struct qib_devdata *dd)
1456{
1457        return ARRAY_SIZE(dd->rcd[0]->pkeys);
1458}
1459
1460/*
1461 * Return the indexed PKEY from the port PKEY table.
1462 * No need to validate rcd[ctxt]; the port is setup if we are here.
1463 */
1464unsigned qib_get_pkey(struct qib_ibport *ibp, unsigned index)
1465{
1466        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1467        struct qib_devdata *dd = ppd->dd;
1468        unsigned ctxt = ppd->hw_pidx;
1469        unsigned ret;
1470
1471        /* dd->rcd null if mini_init or some init failures */
1472        if (!dd->rcd || index >= ARRAY_SIZE(dd->rcd[ctxt]->pkeys))
1473                ret = 0;
1474        else
1475                ret = dd->rcd[ctxt]->pkeys[index];
1476
1477        return ret;
1478}
1479
1480static void init_ibport(struct qib_pportdata *ppd)
1481{
1482        struct qib_verbs_counters cntrs;
1483        struct qib_ibport *ibp = &ppd->ibport_data;
1484
1485        spin_lock_init(&ibp->rvp.lock);
1486        /* Set the prefix to the default value (see ch. 4.1.1) */
1487        ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
1488        ibp->rvp.sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
1489        ibp->rvp.port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP |
1490                IB_PORT_CLIENT_REG_SUP | IB_PORT_SL_MAP_SUP |
1491                IB_PORT_TRAP_SUP | IB_PORT_AUTO_MIGR_SUP |
1492                IB_PORT_DR_NOTICE_SUP | IB_PORT_CAP_MASK_NOTICE_SUP |
1493                IB_PORT_OTHER_LOCAL_CHANGES_SUP;
1494        if (ppd->dd->flags & QIB_HAS_LINK_LATENCY)
1495                ibp->rvp.port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
1496        ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
1497        ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
1498        ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
1499        ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
1500        ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
1501
1502        /* Snapshot current HW counters to "clear" them. */
1503        qib_get_counters(ppd, &cntrs);
1504        ibp->z_symbol_error_counter = cntrs.symbol_error_counter;
1505        ibp->z_link_error_recovery_counter =
1506                cntrs.link_error_recovery_counter;
1507        ibp->z_link_downed_counter = cntrs.link_downed_counter;
1508        ibp->z_port_rcv_errors = cntrs.port_rcv_errors;
1509        ibp->z_port_rcv_remphys_errors = cntrs.port_rcv_remphys_errors;
1510        ibp->z_port_xmit_discards = cntrs.port_xmit_discards;
1511        ibp->z_port_xmit_data = cntrs.port_xmit_data;
1512        ibp->z_port_rcv_data = cntrs.port_rcv_data;
1513        ibp->z_port_xmit_packets = cntrs.port_xmit_packets;
1514        ibp->z_port_rcv_packets = cntrs.port_rcv_packets;
1515        ibp->z_local_link_integrity_errors =
1516                cntrs.local_link_integrity_errors;
1517        ibp->z_excessive_buffer_overrun_errors =
1518                cntrs.excessive_buffer_overrun_errors;
1519        ibp->z_vl15_dropped = cntrs.vl15_dropped;
1520        RCU_INIT_POINTER(ibp->rvp.qp[0], NULL);
1521        RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
1522}
1523
1524/**
1525 * qib_fill_device_attr - Fill in rvt dev info device attributes.
1526 * @dd: the device data structure
1527 */
1528static void qib_fill_device_attr(struct qib_devdata *dd)
1529{
1530        struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
1531
1532        memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
1533
1534        rdi->dparms.props.max_pd = ib_qib_max_pds;
1535        rdi->dparms.props.max_ah = ib_qib_max_ahs;
1536        rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
1537                IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
1538                IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
1539                IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
1540        rdi->dparms.props.page_size_cap = PAGE_SIZE;
1541        rdi->dparms.props.vendor_id =
1542                QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3;
1543        rdi->dparms.props.vendor_part_id = dd->deviceid;
1544        rdi->dparms.props.hw_ver = dd->minrev;
1545        rdi->dparms.props.sys_image_guid = ib_qib_sys_image_guid;
1546        rdi->dparms.props.max_mr_size = ~0ULL;
1547        rdi->dparms.props.max_qp = ib_qib_max_qps;
1548        rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs;
1549        rdi->dparms.props.max_sge = ib_qib_max_sges;
1550        rdi->dparms.props.max_sge_rd = ib_qib_max_sges;
1551        rdi->dparms.props.max_cq = ib_qib_max_cqs;
1552        rdi->dparms.props.max_cqe = ib_qib_max_cqes;
1553        rdi->dparms.props.max_ah = ib_qib_max_ahs;
1554        rdi->dparms.props.max_mr = rdi->lkey_table.max;
1555        rdi->dparms.props.max_fmr = rdi->lkey_table.max;
1556        rdi->dparms.props.max_map_per_fmr = 32767;
1557        rdi->dparms.props.max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC;
1558        rdi->dparms.props.max_qp_init_rd_atom = 255;
1559        rdi->dparms.props.max_srq = ib_qib_max_srqs;
1560        rdi->dparms.props.max_srq_wr = ib_qib_max_srq_wrs;
1561        rdi->dparms.props.max_srq_sge = ib_qib_max_srq_sges;
1562        rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB;
1563        rdi->dparms.props.max_pkeys = qib_get_npkeys(dd);
1564        rdi->dparms.props.max_mcast_grp = ib_qib_max_mcast_grps;
1565        rdi->dparms.props.max_mcast_qp_attach = ib_qib_max_mcast_qp_attached;
1566        rdi->dparms.props.max_total_mcast_qp_attach =
1567                                        rdi->dparms.props.max_mcast_qp_attach *
1568                                        rdi->dparms.props.max_mcast_grp;
1569        /* post send table */
1570        dd->verbs_dev.rdi.post_parms = qib_post_parms;
1571}
1572
1573/**
1574 * qib_register_ib_device - register our device with the infiniband core
1575 * @dd: the device data structure
1576 * Return the allocated qib_ibdev pointer or NULL on error.
1577 */
1578int qib_register_ib_device(struct qib_devdata *dd)
1579{
1580        struct qib_ibdev *dev = &dd->verbs_dev;
1581        struct ib_device *ibdev = &dev->rdi.ibdev;
1582        struct qib_pportdata *ppd = dd->pport;
1583        unsigned i, ctxt;
1584        int ret;
1585
1586        get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd));
1587        for (i = 0; i < dd->num_pports; i++)
1588                init_ibport(ppd + i);
1589
1590        /* Only need to initialize non-zero fields. */
1591        setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
1592
1593        INIT_LIST_HEAD(&dev->piowait);
1594        INIT_LIST_HEAD(&dev->dmawait);
1595        INIT_LIST_HEAD(&dev->txwait);
1596        INIT_LIST_HEAD(&dev->memwait);
1597        INIT_LIST_HEAD(&dev->txreq_free);
1598
1599        if (ppd->sdma_descq_cnt) {
1600                dev->pio_hdrs = dma_alloc_coherent(&dd->pcidev->dev,
1601                                                ppd->sdma_descq_cnt *
1602                                                sizeof(struct qib_pio_header),
1603                                                &dev->pio_hdrs_phys,
1604                                                GFP_KERNEL);
1605                if (!dev->pio_hdrs) {
1606                        ret = -ENOMEM;
1607                        goto err_hdrs;
1608                }
1609        }
1610
1611        for (i = 0; i < ppd->sdma_descq_cnt; i++) {
1612                struct qib_verbs_txreq *tx;
1613
1614                tx = kzalloc(sizeof(*tx), GFP_KERNEL);
1615                if (!tx) {
1616                        ret = -ENOMEM;
1617                        goto err_tx;
1618                }
1619                tx->hdr_inx = i;
1620                list_add(&tx->txreq.list, &dev->txreq_free);
1621        }
1622
1623        /*
1624         * The system image GUID is supposed to be the same for all
1625         * IB HCAs in a single system but since there can be other
1626         * device types in the system, we can't be sure this is unique.
1627         */
1628        if (!ib_qib_sys_image_guid)
1629                ib_qib_sys_image_guid = ppd->guid;
1630
1631        strlcpy(ibdev->name, "qib%d", IB_DEVICE_NAME_MAX);
1632        ibdev->owner = THIS_MODULE;
1633        ibdev->node_guid = ppd->guid;
1634        ibdev->phys_port_cnt = dd->num_pports;
1635        ibdev->dma_device = &dd->pcidev->dev;
1636        ibdev->modify_device = qib_modify_device;
1637        ibdev->process_mad = qib_process_mad;
1638
1639        snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
1640                 "Intel Infiniband HCA %s", init_utsname()->nodename);
1641
1642        /*
1643         * Fill in rvt info object.
1644         */
1645        dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files;
1646        dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name;
1647        dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
1648        dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
1649        dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe;
1650        dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah;
1651        dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn;
1652        dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc;
1653        dd->verbs_dev.rdi.driver_f.qp_priv_free = qib_qp_priv_free;
1654        dd->verbs_dev.rdi.driver_f.free_all_qps = qib_free_all_qps;
1655        dd->verbs_dev.rdi.driver_f.notify_qp_reset = qib_notify_qp_reset;
1656        dd->verbs_dev.rdi.driver_f.do_send = qib_do_send;
1657        dd->verbs_dev.rdi.driver_f.schedule_send = qib_schedule_send;
1658        dd->verbs_dev.rdi.driver_f.quiesce_qp = qib_quiesce_qp;
1659        dd->verbs_dev.rdi.driver_f.stop_send_queue = qib_stop_send_queue;
1660        dd->verbs_dev.rdi.driver_f.flush_qp_waiters = qib_flush_qp_waiters;
1661        dd->verbs_dev.rdi.driver_f.notify_error_qp = qib_notify_error_qp;
1662        dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu;
1663        dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp;
1664        dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr;
1665        dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _qib_schedule_send;
1666        dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port;
1667        dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port;
1668        dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg;
1669        dd->verbs_dev.rdi.driver_f.notify_create_mad_agent =
1670                                                qib_notify_create_mad_agent;
1671        dd->verbs_dev.rdi.driver_f.notify_free_mad_agent =
1672                                                qib_notify_free_mad_agent;
1673
1674        dd->verbs_dev.rdi.dparms.max_rdma_atomic = QIB_MAX_RDMA_ATOMIC;
1675        dd->verbs_dev.rdi.driver_f.get_guid_be = qib_get_guid_be;
1676        dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size;
1677        dd->verbs_dev.rdi.dparms.qp_table_size = ib_qib_qp_table_size;
1678        dd->verbs_dev.rdi.dparms.qpn_start = 1;
1679        dd->verbs_dev.rdi.dparms.qpn_res_start = QIB_KD_QP;
1680        dd->verbs_dev.rdi.dparms.qpn_res_end = QIB_KD_QP; /* Reserve one QP */
1681        dd->verbs_dev.rdi.dparms.qpn_inc = 1;
1682        dd->verbs_dev.rdi.dparms.qos_shift = 1;
1683        dd->verbs_dev.rdi.dparms.psn_mask = QIB_PSN_MASK;
1684        dd->verbs_dev.rdi.dparms.psn_shift = QIB_PSN_SHIFT;
1685        dd->verbs_dev.rdi.dparms.psn_modify_mask = QIB_PSN_MASK;
1686        dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
1687        dd->verbs_dev.rdi.dparms.npkeys = qib_get_npkeys(dd);
1688        dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id;
1689        dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB;
1690        dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE;
1691
1692        snprintf(dd->verbs_dev.rdi.dparms.cq_name,
1693                 sizeof(dd->verbs_dev.rdi.dparms.cq_name),
1694                 "qib_cq%d", dd->unit);
1695
1696        qib_fill_device_attr(dd);
1697
1698        ppd = dd->pport;
1699        for (i = 0; i < dd->num_pports; i++, ppd++) {
1700                ctxt = ppd->hw_pidx;
1701                rvt_init_port(&dd->verbs_dev.rdi,
1702                              &ppd->ibport_data.rvp,
1703                              i,
1704                              dd->rcd[ctxt]->pkeys);
1705        }
1706
1707        ret = rvt_register_device(&dd->verbs_dev.rdi);
1708        if (ret)
1709                goto err_tx;
1710
1711        ret = qib_verbs_register_sysfs(dd);
1712        if (ret)
1713                goto err_class;
1714
1715        return ret;
1716
1717err_class:
1718        rvt_unregister_device(&dd->verbs_dev.rdi);
1719err_tx:
1720        while (!list_empty(&dev->txreq_free)) {
1721                struct list_head *l = dev->txreq_free.next;
1722                struct qib_verbs_txreq *tx;
1723
1724                list_del(l);
1725                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
1726                kfree(tx);
1727        }
1728        if (ppd->sdma_descq_cnt)
1729                dma_free_coherent(&dd->pcidev->dev,
1730                                  ppd->sdma_descq_cnt *
1731                                        sizeof(struct qib_pio_header),
1732                                  dev->pio_hdrs, dev->pio_hdrs_phys);
1733err_hdrs:
1734        qib_dev_err(dd, "cannot register verbs: %d!\n", -ret);
1735        return ret;
1736}
1737
1738void qib_unregister_ib_device(struct qib_devdata *dd)
1739{
1740        struct qib_ibdev *dev = &dd->verbs_dev;
1741
1742        qib_verbs_unregister_sysfs(dd);
1743
1744        rvt_unregister_device(&dd->verbs_dev.rdi);
1745
1746        if (!list_empty(&dev->piowait))
1747                qib_dev_err(dd, "piowait list not empty!\n");
1748        if (!list_empty(&dev->dmawait))
1749                qib_dev_err(dd, "dmawait list not empty!\n");
1750        if (!list_empty(&dev->txwait))
1751                qib_dev_err(dd, "txwait list not empty!\n");
1752        if (!list_empty(&dev->memwait))
1753                qib_dev_err(dd, "memwait list not empty!\n");
1754
1755        del_timer_sync(&dev->mem_timer);
1756        while (!list_empty(&dev->txreq_free)) {
1757                struct list_head *l = dev->txreq_free.next;
1758                struct qib_verbs_txreq *tx;
1759
1760                list_del(l);
1761                tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
1762                kfree(tx);
1763        }
1764        if (dd->pport->sdma_descq_cnt)
1765                dma_free_coherent(&dd->pcidev->dev,
1766                                  dd->pport->sdma_descq_cnt *
1767                                        sizeof(struct qib_pio_header),
1768                                  dev->pio_hdrs, dev->pio_hdrs_phys);
1769}
1770
1771/**
1772 * _qib_schedule_send - schedule progress
1773 * @qp - the qp
1774 *
1775 * This schedules progress w/o regard to the s_flags.
1776 *
1777 * It is only used in post send, which doesn't hold
1778 * the s_lock.
1779 */
1780void _qib_schedule_send(struct rvt_qp *qp)
1781{
1782        struct qib_ibport *ibp =
1783                to_iport(qp->ibqp.device, qp->port_num);
1784        struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1785        struct qib_qp_priv *priv = qp->priv;
1786
1787        queue_work(ppd->qib_wq, &priv->s_work);
1788}
1789
1790/**
1791 * qib_schedule_send - schedule progress
1792 * @qp - the qp
1793 *
1794 * This schedules qp progress.  The s_lock
1795 * should be held.
1796 */
1797void qib_schedule_send(struct rvt_qp *qp)
1798{
1799        if (qib_send_ok(qp))
1800                _qib_schedule_send(qp);
1801}
1802