linux/drivers/infiniband/hw/ipath/ipath_verbs.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
   3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33
  34#include <rdma/ib_mad.h>
  35#include <rdma/ib_user_verbs.h>
  36#include <linux/io.h>
  37#include <linux/slab.h>
  38#include <linux/module.h>
  39#include <linux/utsname.h>
  40#include <linux/rculist.h>
  41
  42#include "ipath_kernel.h"
  43#include "ipath_verbs.h"
  44#include "ipath_common.h"
  45
  46static unsigned int ib_ipath_qp_table_size = 251;
  47module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
  48MODULE_PARM_DESC(qp_table_size, "QP table size");
  49
  50unsigned int ib_ipath_lkey_table_size = 12;
  51module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
  52                   S_IRUGO);
  53MODULE_PARM_DESC(lkey_table_size,
  54                 "LKEY table size in bits (2^n, 1 <= n <= 23)");
  55
  56static unsigned int ib_ipath_max_pds = 0xFFFF;
  57module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
  58MODULE_PARM_DESC(max_pds,
  59                 "Maximum number of protection domains to support");
  60
  61static unsigned int ib_ipath_max_ahs = 0xFFFF;
  62module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO);
  63MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
  64
  65unsigned int ib_ipath_max_cqes = 0x2FFFF;
  66module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO);
  67MODULE_PARM_DESC(max_cqes,
  68                 "Maximum number of completion queue entries to support");
  69
  70unsigned int ib_ipath_max_cqs = 0x1FFFF;
  71module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO);
  72MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
  73
  74unsigned int ib_ipath_max_qp_wrs = 0x3FFF;
  75module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
  76                   S_IWUSR | S_IRUGO);
  77MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
  78
  79unsigned int ib_ipath_max_qps = 16384;
  80module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
  81MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
  82
  83unsigned int ib_ipath_max_sges = 0x60;
  84module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
  85MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
  86
  87unsigned int ib_ipath_max_mcast_grps = 16384;
  88module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint,
  89                   S_IWUSR | S_IRUGO);
  90MODULE_PARM_DESC(max_mcast_grps,
  91                 "Maximum number of multicast groups to support");
  92
  93unsigned int ib_ipath_max_mcast_qp_attached = 16;
  94module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached,
  95                   uint, S_IWUSR | S_IRUGO);
  96MODULE_PARM_DESC(max_mcast_qp_attached,
  97                 "Maximum number of attached QPs to support");
  98
  99unsigned int ib_ipath_max_srqs = 1024;
 100module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO);
 101MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
 102
 103unsigned int ib_ipath_max_srq_sges = 128;
 104module_param_named(max_srq_sges, ib_ipath_max_srq_sges,
 105                   uint, S_IWUSR | S_IRUGO);
 106MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
 107
 108unsigned int ib_ipath_max_srq_wrs = 0x1FFFF;
 109module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
 110                   uint, S_IWUSR | S_IRUGO);
 111MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
 112
 113static unsigned int ib_ipath_disable_sma;
 114module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
 115MODULE_PARM_DESC(disable_sma, "Disable the SMA");
 116
 117/*
 118 * Note that it is OK to post send work requests in the SQE and ERR
 119 * states; ipath_do_send() will process them and generate error
 120 * completions as per IB 1.2 C10-96.
 121 */
 122const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
 123        [IB_QPS_RESET] = 0,
 124        [IB_QPS_INIT] = IPATH_POST_RECV_OK,
 125        [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
 126        [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
 127            IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
 128            IPATH_PROCESS_NEXT_SEND_OK,
 129        [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
 130            IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
 131        [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
 132            IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
 133        [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
 134            IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
 135};
 136
 137struct ipath_ucontext {
 138        struct ib_ucontext ibucontext;
 139};
 140
 141static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
 142                                                  *ibucontext)
 143{
 144        return container_of(ibucontext, struct ipath_ucontext, ibucontext);
 145}
 146
 147/*
 148 * Translate ib_wr_opcode into ib_wc_opcode.
 149 */
 150const enum ib_wc_opcode ib_ipath_wc_opcode[] = {
 151        [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
 152        [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
 153        [IB_WR_SEND] = IB_WC_SEND,
 154        [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
 155        [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
 156        [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
 157        [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
 158};
 159
 160/*
 161 * System image GUID.
 162 */
 163static __be64 sys_image_guid;
 164
 165/**
 166 * ipath_copy_sge - copy data to SGE memory
 167 * @ss: the SGE state
 168 * @data: the data to copy
 169 * @length: the length of the data
 170 */
 171void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
 172{
 173        struct ipath_sge *sge = &ss->sge;
 174
 175        while (length) {
 176                u32 len = sge->length;
 177
 178                if (len > length)
 179                        len = length;
 180                if (len > sge->sge_length)
 181                        len = sge->sge_length;
 182                BUG_ON(len == 0);
 183                memcpy(sge->vaddr, data, len);
 184                sge->vaddr += len;
 185                sge->length -= len;
 186                sge->sge_length -= len;
 187                if (sge->sge_length == 0) {
 188                        if (--ss->num_sge)
 189                                *sge = *ss->sg_list++;
 190                } else if (sge->length == 0 && sge->mr != NULL) {
 191                        if (++sge->n >= IPATH_SEGSZ) {
 192                                if (++sge->m >= sge->mr->mapsz)
 193                                        break;
 194                                sge->n = 0;
 195                        }
 196                        sge->vaddr =
 197                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 198                        sge->length =
 199                                sge->mr->map[sge->m]->segs[sge->n].length;
 200                }
 201                data += len;
 202                length -= len;
 203        }
 204}
 205
 206/**
 207 * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
 208 * @ss: the SGE state
 209 * @length: the number of bytes to skip
 210 */
 211void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
 212{
 213        struct ipath_sge *sge = &ss->sge;
 214
 215        while (length) {
 216                u32 len = sge->length;
 217
 218                if (len > length)
 219                        len = length;
 220                if (len > sge->sge_length)
 221                        len = sge->sge_length;
 222                BUG_ON(len == 0);
 223                sge->vaddr += len;
 224                sge->length -= len;
 225                sge->sge_length -= len;
 226                if (sge->sge_length == 0) {
 227                        if (--ss->num_sge)
 228                                *sge = *ss->sg_list++;
 229                } else if (sge->length == 0 && sge->mr != NULL) {
 230                        if (++sge->n >= IPATH_SEGSZ) {
 231                                if (++sge->m >= sge->mr->mapsz)
 232                                        break;
 233                                sge->n = 0;
 234                        }
 235                        sge->vaddr =
 236                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 237                        sge->length =
 238                                sge->mr->map[sge->m]->segs[sge->n].length;
 239                }
 240                length -= len;
 241        }
 242}
 243
 244/*
 245 * Count the number of DMA descriptors needed to send length bytes of data.
 246 * Don't modify the ipath_sge_state to get the count.
 247 * Return zero if any of the segments is not aligned.
 248 */
 249static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)
 250{
 251        struct ipath_sge *sg_list = ss->sg_list;
 252        struct ipath_sge sge = ss->sge;
 253        u8 num_sge = ss->num_sge;
 254        u32 ndesc = 1;  /* count the header */
 255
 256        while (length) {
 257                u32 len = sge.length;
 258
 259                if (len > length)
 260                        len = length;
 261                if (len > sge.sge_length)
 262                        len = sge.sge_length;
 263                BUG_ON(len == 0);
 264                if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
 265                    (len != length && (len & (sizeof(u32) - 1)))) {
 266                        ndesc = 0;
 267                        break;
 268                }
 269                ndesc++;
 270                sge.vaddr += len;
 271                sge.length -= len;
 272                sge.sge_length -= len;
 273                if (sge.sge_length == 0) {
 274                        if (--num_sge)
 275                                sge = *sg_list++;
 276                } else if (sge.length == 0 && sge.mr != NULL) {
 277                        if (++sge.n >= IPATH_SEGSZ) {
 278                                if (++sge.m >= sge.mr->mapsz)
 279                                        break;
 280                                sge.n = 0;
 281                        }
 282                        sge.vaddr =
 283                                sge.mr->map[sge.m]->segs[sge.n].vaddr;
 284                        sge.length =
 285                                sge.mr->map[sge.m]->segs[sge.n].length;
 286                }
 287                length -= len;
 288        }
 289        return ndesc;
 290}
 291
 292/*
 293 * Copy from the SGEs to the data buffer.
 294 */
 295static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,
 296                                u32 length)
 297{
 298        struct ipath_sge *sge = &ss->sge;
 299
 300        while (length) {
 301                u32 len = sge->length;
 302
 303                if (len > length)
 304                        len = length;
 305                if (len > sge->sge_length)
 306                        len = sge->sge_length;
 307                BUG_ON(len == 0);
 308                memcpy(data, sge->vaddr, len);
 309                sge->vaddr += len;
 310                sge->length -= len;
 311                sge->sge_length -= len;
 312                if (sge->sge_length == 0) {
 313                        if (--ss->num_sge)
 314                                *sge = *ss->sg_list++;
 315                } else if (sge->length == 0 && sge->mr != NULL) {
 316                        if (++sge->n >= IPATH_SEGSZ) {
 317                                if (++sge->m >= sge->mr->mapsz)
 318                                        break;
 319                                sge->n = 0;
 320                        }
 321                        sge->vaddr =
 322                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 323                        sge->length =
 324                                sge->mr->map[sge->m]->segs[sge->n].length;
 325                }
 326                data += len;
 327                length -= len;
 328        }
 329}
 330
 331/**
 332 * ipath_post_one_send - post one RC, UC, or UD send work request
 333 * @qp: the QP to post on
 334 * @wr: the work request to send
 335 */
 336static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 337{
 338        struct ipath_swqe *wqe;
 339        u32 next;
 340        int i;
 341        int j;
 342        int acc;
 343        int ret;
 344        unsigned long flags;
 345        struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
 346
 347        spin_lock_irqsave(&qp->s_lock, flags);
 348
 349        if (qp->ibqp.qp_type != IB_QPT_SMI &&
 350            !(dd->ipath_flags & IPATH_LINKACTIVE)) {
 351                ret = -ENETDOWN;
 352                goto bail;
 353        }
 354
 355        /* Check that state is OK to post send. */
 356        if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
 357                goto bail_inval;
 358
 359        /* IB spec says that num_sge == 0 is OK. */
 360        if (wr->num_sge > qp->s_max_sge)
 361                goto bail_inval;
 362
 363        /*
 364         * Don't allow RDMA reads or atomic operations on UC or
 365         * undefined operations.
 366         * Make sure buffer is large enough to hold the result for atomics.
 367         */
 368        if (qp->ibqp.qp_type == IB_QPT_UC) {
 369                if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
 370                        goto bail_inval;
 371        } else if (qp->ibqp.qp_type == IB_QPT_UD) {
 372                /* Check UD opcode */
 373                if (wr->opcode != IB_WR_SEND &&
 374                    wr->opcode != IB_WR_SEND_WITH_IMM)
 375                        goto bail_inval;
 376                /* Check UD destination address PD */
 377                if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
 378                        goto bail_inval;
 379        } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
 380                goto bail_inval;
 381        else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
 382                   (wr->num_sge == 0 ||
 383                    wr->sg_list[0].length < sizeof(u64) ||
 384                    wr->sg_list[0].addr & (sizeof(u64) - 1)))
 385                goto bail_inval;
 386        else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
 387                goto bail_inval;
 388
 389        next = qp->s_head + 1;
 390        if (next >= qp->s_size)
 391                next = 0;
 392        if (next == qp->s_last) {
 393                ret = -ENOMEM;
 394                goto bail;
 395        }
 396
 397        wqe = get_swqe_ptr(qp, qp->s_head);
 398
 399        if (qp->ibqp.qp_type != IB_QPT_UC &&
 400            qp->ibqp.qp_type != IB_QPT_RC)
 401                memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
 402        else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
 403                 wr->opcode == IB_WR_RDMA_WRITE ||
 404                 wr->opcode == IB_WR_RDMA_READ)
 405                memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
 406        else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
 407                 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
 408                memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
 409        else
 410                memcpy(&wqe->wr, wr, sizeof(wqe->wr));
 411
 412        wqe->length = 0;
 413        if (wr->num_sge) {
 414                acc = wr->opcode >= IB_WR_RDMA_READ ?
 415                        IB_ACCESS_LOCAL_WRITE : 0;
 416                for (i = 0, j = 0; i < wr->num_sge; i++) {
 417                        u32 length = wr->sg_list[i].length;
 418                        int ok;
 419
 420                        if (length == 0)
 421                                continue;
 422                        ok = ipath_lkey_ok(qp, &wqe->sg_list[j],
 423                                           &wr->sg_list[i], acc);
 424                        if (!ok)
 425                                goto bail_inval;
 426                        wqe->length += length;
 427                        j++;
 428                }
 429                wqe->wr.num_sge = j;
 430        }
 431        if (qp->ibqp.qp_type == IB_QPT_UC ||
 432            qp->ibqp.qp_type == IB_QPT_RC) {
 433                if (wqe->length > 0x80000000U)
 434                        goto bail_inval;
 435        } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
 436                goto bail_inval;
 437        wqe->ssn = qp->s_ssn++;
 438        qp->s_head = next;
 439
 440        ret = 0;
 441        goto bail;
 442
 443bail_inval:
 444        ret = -EINVAL;
 445bail:
 446        spin_unlock_irqrestore(&qp->s_lock, flags);
 447        return ret;
 448}
 449
 450/**
 451 * ipath_post_send - post a send on a QP
 452 * @ibqp: the QP to post the send on
 453 * @wr: the list of work requests to post
 454 * @bad_wr: the first bad WR is put here
 455 *
 456 * This may be called from interrupt context.
 457 */
 458static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 459                           struct ib_send_wr **bad_wr)
 460{
 461        struct ipath_qp *qp = to_iqp(ibqp);
 462        int err = 0;
 463
 464        for (; wr; wr = wr->next) {
 465                err = ipath_post_one_send(qp, wr);
 466                if (err) {
 467                        *bad_wr = wr;
 468                        goto bail;
 469                }
 470        }
 471
 472        /* Try to do the send work in the caller's context. */
 473        ipath_do_send((unsigned long) qp);
 474
 475bail:
 476        return err;
 477}
 478
 479/**
 480 * ipath_post_receive - post a receive on a QP
 481 * @ibqp: the QP to post the receive on
 482 * @wr: the WR to post
 483 * @bad_wr: the first bad WR is put here
 484 *
 485 * This may be called from interrupt context.
 486 */
 487static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 488                              struct ib_recv_wr **bad_wr)
 489{
 490        struct ipath_qp *qp = to_iqp(ibqp);
 491        struct ipath_rwq *wq = qp->r_rq.wq;
 492        unsigned long flags;
 493        int ret;
 494
 495        /* Check that state is OK to post receive. */
 496        if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
 497                *bad_wr = wr;
 498                ret = -EINVAL;
 499                goto bail;
 500        }
 501
 502        for (; wr; wr = wr->next) {
 503                struct ipath_rwqe *wqe;
 504                u32 next;
 505                int i;
 506
 507                if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
 508                        *bad_wr = wr;
 509                        ret = -EINVAL;
 510                        goto bail;
 511                }
 512
 513                spin_lock_irqsave(&qp->r_rq.lock, flags);
 514                next = wq->head + 1;
 515                if (next >= qp->r_rq.size)
 516                        next = 0;
 517                if (next == wq->tail) {
 518                        spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 519                        *bad_wr = wr;
 520                        ret = -ENOMEM;
 521                        goto bail;
 522                }
 523
 524                wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
 525                wqe->wr_id = wr->wr_id;
 526                wqe->num_sge = wr->num_sge;
 527                for (i = 0; i < wr->num_sge; i++)
 528                        wqe->sg_list[i] = wr->sg_list[i];
 529                /* Make sure queue entry is written before the head index. */
 530                smp_wmb();
 531                wq->head = next;
 532                spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 533        }
 534        ret = 0;
 535
 536bail:
 537        return ret;
 538}
 539
 540/**
 541 * ipath_qp_rcv - processing an incoming packet on a QP
 542 * @dev: the device the packet came on
 543 * @hdr: the packet header
 544 * @has_grh: true if the packet has a GRH
 545 * @data: the packet data
 546 * @tlen: the packet length
 547 * @qp: the QP the packet came on
 548 *
 549 * This is called from ipath_ib_rcv() to process an incoming packet
 550 * for the given QP.
 551 * Called at interrupt level.
 552 */
 553static void ipath_qp_rcv(struct ipath_ibdev *dev,
 554                         struct ipath_ib_header *hdr, int has_grh,
 555                         void *data, u32 tlen, struct ipath_qp *qp)
 556{
 557        /* Check for valid receive state. */
 558        if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
 559                dev->n_pkt_drops++;
 560                return;
 561        }
 562
 563        switch (qp->ibqp.qp_type) {
 564        case IB_QPT_SMI:
 565        case IB_QPT_GSI:
 566                if (ib_ipath_disable_sma)
 567                        break;
 568                /* FALLTHROUGH */
 569        case IB_QPT_UD:
 570                ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
 571                break;
 572
 573        case IB_QPT_RC:
 574                ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
 575                break;
 576
 577        case IB_QPT_UC:
 578                ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
 579                break;
 580
 581        default:
 582                break;
 583        }
 584}
 585
 586/**
 587 * ipath_ib_rcv - process an incoming packet
 588 * @arg: the device pointer
 589 * @rhdr: the header of the packet
 590 * @data: the packet data
 591 * @tlen: the packet length
 592 *
 593 * This is called from ipath_kreceive() to process an incoming packet at
 594 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
 595 */
 596void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
 597                  u32 tlen)
 598{
 599        struct ipath_ib_header *hdr = rhdr;
 600        struct ipath_other_headers *ohdr;
 601        struct ipath_qp *qp;
 602        u32 qp_num;
 603        int lnh;
 604        u8 opcode;
 605        u16 lid;
 606
 607        if (unlikely(dev == NULL))
 608                goto bail;
 609
 610        if (unlikely(tlen < 24)) {      /* LRH+BTH+CRC */
 611                dev->rcv_errors++;
 612                goto bail;
 613        }
 614
 615        /* Check for a valid destination LID (see ch. 7.11.1). */
 616        lid = be16_to_cpu(hdr->lrh[1]);
 617        if (lid < IPATH_MULTICAST_LID_BASE) {
 618                lid &= ~((1 << dev->dd->ipath_lmc) - 1);
 619                if (unlikely(lid != dev->dd->ipath_lid)) {
 620                        dev->rcv_errors++;
 621                        goto bail;
 622                }
 623        }
 624
 625        /* Check for GRH */
 626        lnh = be16_to_cpu(hdr->lrh[0]) & 3;
 627        if (lnh == IPATH_LRH_BTH)
 628                ohdr = &hdr->u.oth;
 629        else if (lnh == IPATH_LRH_GRH)
 630                ohdr = &hdr->u.l.oth;
 631        else {
 632                dev->rcv_errors++;
 633                goto bail;
 634        }
 635
 636        opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
 637        dev->opstats[opcode].n_bytes += tlen;
 638        dev->opstats[opcode].n_packets++;
 639
 640        /* Get the destination QP number. */
 641        qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK;
 642        if (qp_num == IPATH_MULTICAST_QPN) {
 643                struct ipath_mcast *mcast;
 644                struct ipath_mcast_qp *p;
 645
 646                if (lnh != IPATH_LRH_GRH) {
 647                        dev->n_pkt_drops++;
 648                        goto bail;
 649                }
 650                mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
 651                if (mcast == NULL) {
 652                        dev->n_pkt_drops++;
 653                        goto bail;
 654                }
 655                dev->n_multicast_rcv++;
 656                list_for_each_entry_rcu(p, &mcast->qp_list, list)
 657                        ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
 658                /*
 659                 * Notify ipath_multicast_detach() if it is waiting for us
 660                 * to finish.
 661                 */
 662                if (atomic_dec_return(&mcast->refcount) <= 1)
 663                        wake_up(&mcast->wait);
 664        } else {
 665                qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
 666                if (qp) {
 667                        dev->n_unicast_rcv++;
 668                        ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
 669                                     tlen, qp);
 670                        /*
 671                         * Notify ipath_destroy_qp() if it is waiting
 672                         * for us to finish.
 673                         */
 674                        if (atomic_dec_and_test(&qp->refcount))
 675                                wake_up(&qp->wait);
 676                } else
 677                        dev->n_pkt_drops++;
 678        }
 679
 680bail:;
 681}
 682
 683/**
 684 * ipath_ib_timer - verbs timer
 685 * @arg: the device pointer
 686 *
 687 * This is called from ipath_do_rcv_timer() at interrupt level to check for
 688 * QPs which need retransmits and to collect performance numbers.
 689 */
 690static void ipath_ib_timer(struct ipath_ibdev *dev)
 691{
 692        struct ipath_qp *resend = NULL;
 693        struct ipath_qp *rnr = NULL;
 694        struct list_head *last;
 695        struct ipath_qp *qp;
 696        unsigned long flags;
 697
 698        if (dev == NULL)
 699                return;
 700
 701        spin_lock_irqsave(&dev->pending_lock, flags);
 702        /* Start filling the next pending queue. */
 703        if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
 704                dev->pending_index = 0;
 705        /* Save any requests still in the new queue, they have timed out. */
 706        last = &dev->pending[dev->pending_index];
 707        while (!list_empty(last)) {
 708                qp = list_entry(last->next, struct ipath_qp, timerwait);
 709                list_del_init(&qp->timerwait);
 710                qp->timer_next = resend;
 711                resend = qp;
 712                atomic_inc(&qp->refcount);
 713        }
 714        last = &dev->rnrwait;
 715        if (!list_empty(last)) {
 716                qp = list_entry(last->next, struct ipath_qp, timerwait);
 717                if (--qp->s_rnr_timeout == 0) {
 718                        do {
 719                                list_del_init(&qp->timerwait);
 720                                qp->timer_next = rnr;
 721                                rnr = qp;
 722                                atomic_inc(&qp->refcount);
 723                                if (list_empty(last))
 724                                        break;
 725                                qp = list_entry(last->next, struct ipath_qp,
 726                                                timerwait);
 727                        } while (qp->s_rnr_timeout == 0);
 728                }
 729        }
 730        /*
 731         * We should only be in the started state if pma_sample_start != 0
 732         */
 733        if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
 734            --dev->pma_sample_start == 0) {
 735                dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
 736                ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
 737                                        &dev->ipath_rword,
 738                                        &dev->ipath_spkts,
 739                                        &dev->ipath_rpkts,
 740                                        &dev->ipath_xmit_wait);
 741        }
 742        if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
 743                if (dev->pma_sample_interval == 0) {
 744                        u64 ta, tb, tc, td, te;
 745
 746                        dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
 747                        ipath_snapshot_counters(dev->dd, &ta, &tb,
 748                                                &tc, &td, &te);
 749
 750                        dev->ipath_sword = ta - dev->ipath_sword;
 751                        dev->ipath_rword = tb - dev->ipath_rword;
 752                        dev->ipath_spkts = tc - dev->ipath_spkts;
 753                        dev->ipath_rpkts = td - dev->ipath_rpkts;
 754                        dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
 755                }
 756                else
 757                        dev->pma_sample_interval--;
 758        }
 759        spin_unlock_irqrestore(&dev->pending_lock, flags);
 760
 761        /* XXX What if timer fires again while this is running? */
 762        while (resend != NULL) {
 763                qp = resend;
 764                resend = qp->timer_next;
 765
 766                spin_lock_irqsave(&qp->s_lock, flags);
 767                if (qp->s_last != qp->s_tail &&
 768                    ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
 769                        dev->n_timeouts++;
 770                        ipath_restart_rc(qp, qp->s_last_psn + 1);
 771                }
 772                spin_unlock_irqrestore(&qp->s_lock, flags);
 773
 774                /* Notify ipath_destroy_qp() if it is waiting. */
 775                if (atomic_dec_and_test(&qp->refcount))
 776                        wake_up(&qp->wait);
 777        }
 778        while (rnr != NULL) {
 779                qp = rnr;
 780                rnr = qp->timer_next;
 781
 782                spin_lock_irqsave(&qp->s_lock, flags);
 783                if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
 784                        ipath_schedule_send(qp);
 785                spin_unlock_irqrestore(&qp->s_lock, flags);
 786
 787                /* Notify ipath_destroy_qp() if it is waiting. */
 788                if (atomic_dec_and_test(&qp->refcount))
 789                        wake_up(&qp->wait);
 790        }
 791}
 792
 793static void update_sge(struct ipath_sge_state *ss, u32 length)
 794{
 795        struct ipath_sge *sge = &ss->sge;
 796
 797        sge->vaddr += length;
 798        sge->length -= length;
 799        sge->sge_length -= length;
 800        if (sge->sge_length == 0) {
 801                if (--ss->num_sge)
 802                        *sge = *ss->sg_list++;
 803        } else if (sge->length == 0 && sge->mr != NULL) {
 804                if (++sge->n >= IPATH_SEGSZ) {
 805                        if (++sge->m >= sge->mr->mapsz)
 806                                return;
 807                        sge->n = 0;
 808                }
 809                sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
 810                sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
 811        }
 812}
 813
 814#ifdef __LITTLE_ENDIAN
 815static inline u32 get_upper_bits(u32 data, u32 shift)
 816{
 817        return data >> shift;
 818}
 819
 820static inline u32 set_upper_bits(u32 data, u32 shift)
 821{
 822        return data << shift;
 823}
 824
 825static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 826{
 827        data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
 828        data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
 829        return data;
 830}
 831#else
 832static inline u32 get_upper_bits(u32 data, u32 shift)
 833{
 834        return data << shift;
 835}
 836
 837static inline u32 set_upper_bits(u32 data, u32 shift)
 838{
 839        return data >> shift;
 840}
 841
 842static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 843{
 844        data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
 845        data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
 846        return data;
 847}
 848#endif
 849
 850static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
 851                    u32 length, unsigned flush_wc)
 852{
 853        u32 extra = 0;
 854        u32 data = 0;
 855        u32 last;
 856
 857        while (1) {
 858                u32 len = ss->sge.length;
 859                u32 off;
 860
 861                if (len > length)
 862                        len = length;
 863                if (len > ss->sge.sge_length)
 864                        len = ss->sge.sge_length;
 865                BUG_ON(len == 0);
 866                /* If the source address is not aligned, try to align it. */
 867                off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
 868                if (off) {
 869                        u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
 870                                            ~(sizeof(u32) - 1));
 871                        u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
 872                        u32 y;
 873
 874                        y = sizeof(u32) - off;
 875                        if (len > y)
 876                                len = y;
 877                        if (len + extra >= sizeof(u32)) {
 878                                data |= set_upper_bits(v, extra *
 879                                                       BITS_PER_BYTE);
 880                                len = sizeof(u32) - extra;
 881                                if (len == length) {
 882                                        last = data;
 883                                        break;
 884                                }
 885                                __raw_writel(data, piobuf);
 886                                piobuf++;
 887                                extra = 0;
 888                                data = 0;
 889                        } else {
 890                                /* Clear unused upper bytes */
 891                                data |= clear_upper_bytes(v, len, extra);
 892                                if (len == length) {
 893                                        last = data;
 894                                        break;
 895                                }
 896                                extra += len;
 897                        }
 898                } else if (extra) {
 899                        /* Source address is aligned. */
 900                        u32 *addr = (u32 *) ss->sge.vaddr;
 901                        int shift = extra * BITS_PER_BYTE;
 902                        int ushift = 32 - shift;
 903                        u32 l = len;
 904
 905                        while (l >= sizeof(u32)) {
 906                                u32 v = *addr;
 907
 908                                data |= set_upper_bits(v, shift);
 909                                __raw_writel(data, piobuf);
 910                                data = get_upper_bits(v, ushift);
 911                                piobuf++;
 912                                addr++;
 913                                l -= sizeof(u32);
 914                        }
 915                        /*
 916                         * We still have 'extra' number of bytes leftover.
 917                         */
 918                        if (l) {
 919                                u32 v = *addr;
 920
 921                                if (l + extra >= sizeof(u32)) {
 922                                        data |= set_upper_bits(v, shift);
 923                                        len -= l + extra - sizeof(u32);
 924                                        if (len == length) {
 925                                                last = data;
 926                                                break;
 927                                        }
 928                                        __raw_writel(data, piobuf);
 929                                        piobuf++;
 930                                        extra = 0;
 931                                        data = 0;
 932                                } else {
 933                                        /* Clear unused upper bytes */
 934                                        data |= clear_upper_bytes(v, l,
 935                                                                  extra);
 936                                        if (len == length) {
 937                                                last = data;
 938                                                break;
 939                                        }
 940                                        extra += l;
 941                                }
 942                        } else if (len == length) {
 943                                last = data;
 944                                break;
 945                        }
 946                } else if (len == length) {
 947                        u32 w;
 948
 949                        /*
 950                         * Need to round up for the last dword in the
 951                         * packet.
 952                         */
 953                        w = (len + 3) >> 2;
 954                        __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
 955                        piobuf += w - 1;
 956                        last = ((u32 *) ss->sge.vaddr)[w - 1];
 957                        break;
 958                } else {
 959                        u32 w = len >> 2;
 960
 961                        __iowrite32_copy(piobuf, ss->sge.vaddr, w);
 962                        piobuf += w;
 963
 964                        extra = len & (sizeof(u32) - 1);
 965                        if (extra) {
 966                                u32 v = ((u32 *) ss->sge.vaddr)[w];
 967
 968                                /* Clear unused upper bytes */
 969                                data = clear_upper_bytes(v, extra, 0);
 970                        }
 971                }
 972                update_sge(ss, len);
 973                length -= len;
 974        }
 975        /* Update address before sending packet. */
 976        update_sge(ss, length);
 977        if (flush_wc) {
 978                /* must flush early everything before trigger word */
 979                ipath_flush_wc();
 980                __raw_writel(last, piobuf);
 981                /* be sure trigger word is written */
 982                ipath_flush_wc();
 983        } else
 984                __raw_writel(last, piobuf);
 985}
 986
 987/*
 988 * Convert IB rate to delay multiplier.
 989 */
 990unsigned ipath_ib_rate_to_mult(enum ib_rate rate)
 991{
 992        switch (rate) {
 993        case IB_RATE_2_5_GBPS: return 8;
 994        case IB_RATE_5_GBPS:   return 4;
 995        case IB_RATE_10_GBPS:  return 2;
 996        case IB_RATE_20_GBPS:  return 1;
 997        default:               return 0;
 998        }
 999}
1000
1001/*
1002 * Convert delay multiplier to IB rate
1003 */
1004static enum ib_rate ipath_mult_to_ib_rate(unsigned mult)
1005{
1006        switch (mult) {
1007        case 8:  return IB_RATE_2_5_GBPS;
1008        case 4:  return IB_RATE_5_GBPS;
1009        case 2:  return IB_RATE_10_GBPS;
1010        case 1:  return IB_RATE_20_GBPS;
1011        default: return IB_RATE_PORT_CURRENT;
1012        }
1013}
1014
1015static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)
1016{
1017        struct ipath_verbs_txreq *tx = NULL;
1018        unsigned long flags;
1019
1020        spin_lock_irqsave(&dev->pending_lock, flags);
1021        if (!list_empty(&dev->txreq_free)) {
1022                struct list_head *l = dev->txreq_free.next;
1023
1024                list_del(l);
1025                tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);
1026        }
1027        spin_unlock_irqrestore(&dev->pending_lock, flags);
1028        return tx;
1029}
1030
1031static inline void put_txreq(struct ipath_ibdev *dev,
1032                             struct ipath_verbs_txreq *tx)
1033{
1034        unsigned long flags;
1035
1036        spin_lock_irqsave(&dev->pending_lock, flags);
1037        list_add(&tx->txreq.list, &dev->txreq_free);
1038        spin_unlock_irqrestore(&dev->pending_lock, flags);
1039}
1040
1041static void sdma_complete(void *cookie, int status)
1042{
1043        struct ipath_verbs_txreq *tx = cookie;
1044        struct ipath_qp *qp = tx->qp;
1045        struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1046        unsigned long flags;
1047        enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
1048                IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
1049
1050        if (atomic_dec_and_test(&qp->s_dma_busy)) {
1051                spin_lock_irqsave(&qp->s_lock, flags);
1052                if (tx->wqe)
1053                        ipath_send_complete(qp, tx->wqe, ibs);
1054                if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1055                     qp->s_last != qp->s_head) ||
1056                    (qp->s_flags & IPATH_S_WAIT_DMA))
1057                        ipath_schedule_send(qp);
1058                spin_unlock_irqrestore(&qp->s_lock, flags);
1059                wake_up(&qp->wait_dma);
1060        } else if (tx->wqe) {
1061                spin_lock_irqsave(&qp->s_lock, flags);
1062                ipath_send_complete(qp, tx->wqe, ibs);
1063                spin_unlock_irqrestore(&qp->s_lock, flags);
1064        }
1065
1066        if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
1067                kfree(tx->txreq.map_addr);
1068        put_txreq(dev, tx);
1069
1070        if (atomic_dec_and_test(&qp->refcount))
1071                wake_up(&qp->wait);
1072}
1073
1074static void decrement_dma_busy(struct ipath_qp *qp)
1075{
1076        unsigned long flags;
1077
1078        if (atomic_dec_and_test(&qp->s_dma_busy)) {
1079                spin_lock_irqsave(&qp->s_lock, flags);
1080                if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1081                     qp->s_last != qp->s_head) ||
1082                    (qp->s_flags & IPATH_S_WAIT_DMA))
1083                        ipath_schedule_send(qp);
1084                spin_unlock_irqrestore(&qp->s_lock, flags);
1085                wake_up(&qp->wait_dma);
1086        }
1087}
1088
1089/*
1090 * Compute the number of clock cycles of delay before sending the next packet.
1091 * The multipliers reflect the number of clocks for the fastest rate so
1092 * one tick at 4xDDR is 8 ticks at 1xSDR.
1093 * If the destination port will take longer to receive a packet than
1094 * the outgoing link can send it, we need to delay sending the next packet
1095 * by the difference in time it takes the receiver to receive and the sender
1096 * to send this packet.
1097 * Note that this delay is always correct for UC and RC but not always
1098 * optimal for UD. For UD, the destination HCA can be different for each
1099 * packet, in which case, we could send packets to a different destination
1100 * while "waiting" for the delay. The overhead for doing this without
1101 * HW support is more than just paying the cost of delaying some packets
1102 * unnecessarily.
1103 */
1104static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)
1105{
1106        return (rcv_mult > snd_mult) ?
1107                (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
1108}
1109
1110static int ipath_verbs_send_dma(struct ipath_qp *qp,
1111                                struct ipath_ib_header *hdr, u32 hdrwords,
1112                                struct ipath_sge_state *ss, u32 len,
1113                                u32 plen, u32 dwords)
1114{
1115        struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1116        struct ipath_devdata *dd = dev->dd;
1117        struct ipath_verbs_txreq *tx;
1118        u32 *piobuf;
1119        u32 control;
1120        u32 ndesc;
1121        int ret;
1122
1123        tx = qp->s_tx;
1124        if (tx) {
1125                qp->s_tx = NULL;
1126                /* resend previously constructed packet */
1127                atomic_inc(&qp->s_dma_busy);
1128                ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
1129                if (ret) {
1130                        qp->s_tx = tx;
1131                        decrement_dma_busy(qp);
1132                }
1133                goto bail;
1134        }
1135
1136        tx = get_txreq(dev);
1137        if (!tx) {
1138                ret = -EBUSY;
1139                goto bail;
1140        }
1141
1142        /*
1143         * Get the saved delay count we computed for the previous packet
1144         * and save the delay count for this packet to be used next time
1145         * we get here.
1146         */
1147        control = qp->s_pkt_delay;
1148        qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
1149
1150        tx->qp = qp;
1151        atomic_inc(&qp->refcount);
1152        tx->wqe = qp->s_wqe;
1153        tx->txreq.callback = sdma_complete;
1154        tx->txreq.callback_cookie = tx;
1155        tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |
1156                IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC;
1157        if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
1158                tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF;
1159
1160        /* VL15 packets bypass credit check */
1161        if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {
1162                control |= 1ULL << 31;
1163                tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;
1164        }
1165
1166        if (len) {
1167                /*
1168                 * Don't try to DMA if it takes more descriptors than
1169                 * the queue holds.
1170                 */
1171                ndesc = ipath_count_sge(ss, len);
1172                if (ndesc >= dd->ipath_sdma_descq_cnt)
1173                        ndesc = 0;
1174        } else
1175                ndesc = 1;
1176        if (ndesc) {
1177                tx->hdr.pbc[0] = cpu_to_le32(plen);
1178                tx->hdr.pbc[1] = cpu_to_le32(control);
1179                memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);
1180                tx->txreq.sg_count = ndesc;
1181                tx->map_len = (hdrwords + 2) << 2;
1182                tx->txreq.map_addr = &tx->hdr;
1183                atomic_inc(&qp->s_dma_busy);
1184                ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
1185                if (ret) {
1186                        /* save ss and length in dwords */
1187                        tx->ss = ss;
1188                        tx->len = dwords;
1189                        qp->s_tx = tx;
1190                        decrement_dma_busy(qp);
1191                }
1192                goto bail;
1193        }
1194
1195        /* Allocate a buffer and copy the header and payload to it. */
1196        tx->map_len = (plen + 1) << 2;
1197        piobuf = kmalloc(tx->map_len, GFP_ATOMIC);
1198        if (unlikely(piobuf == NULL)) {
1199                ret = -EBUSY;
1200                goto err_tx;
1201        }
1202        tx->txreq.map_addr = piobuf;
1203        tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;
1204        tx->txreq.sg_count = 1;
1205
1206        *piobuf++ = (__force u32) cpu_to_le32(plen);
1207        *piobuf++ = (__force u32) cpu_to_le32(control);
1208        memcpy(piobuf, hdr, hdrwords << 2);
1209        ipath_copy_from_sge(piobuf + hdrwords, ss, len);
1210
1211        atomic_inc(&qp->s_dma_busy);
1212        ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
1213        /*
1214         * If we couldn't queue the DMA request, save the info
1215         * and try again later rather than destroying the
1216         * buffer and undoing the side effects of the copy.
1217         */
1218        if (ret) {
1219                tx->ss = NULL;
1220                tx->len = 0;
1221                qp->s_tx = tx;
1222                decrement_dma_busy(qp);
1223        }
1224        dev->n_unaligned++;
1225        goto bail;
1226
1227err_tx:
1228        if (atomic_dec_and_test(&qp->refcount))
1229                wake_up(&qp->wait);
1230        put_txreq(dev, tx);
1231bail:
1232        return ret;
1233}
1234
1235static int ipath_verbs_send_pio(struct ipath_qp *qp,
1236                                struct ipath_ib_header *ibhdr, u32 hdrwords,
1237                                struct ipath_sge_state *ss, u32 len,
1238                                u32 plen, u32 dwords)
1239{
1240        struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
1241        u32 *hdr = (u32 *) ibhdr;
1242        u32 __iomem *piobuf;
1243        unsigned flush_wc;
1244        u32 control;
1245        int ret;
1246        unsigned long flags;
1247
1248        piobuf = ipath_getpiobuf(dd, plen, NULL);
1249        if (unlikely(piobuf == NULL)) {
1250                ret = -EBUSY;
1251                goto bail;
1252        }
1253
1254        /*
1255         * Get the saved delay count we computed for the previous packet
1256         * and save the delay count for this packet to be used next time
1257         * we get here.
1258         */
1259        control = qp->s_pkt_delay;
1260        qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
1261
1262        /* VL15 packets bypass credit check */
1263        if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)
1264                control |= 1ULL << 31;
1265
1266        /*
1267         * Write the length to the control qword plus any needed flags.
1268         * We have to flush after the PBC for correctness on some cpus
1269         * or WC buffer can be written out of order.
1270         */
1271        writeq(((u64) control << 32) | plen, piobuf);
1272        piobuf += 2;
1273
1274        flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
1275        if (len == 0) {
1276                /*
1277                 * If there is just the header portion, must flush before
1278                 * writing last word of header for correctness, and after
1279                 * the last header word (trigger word).
1280                 */
1281                if (flush_wc) {
1282                        ipath_flush_wc();
1283                        __iowrite32_copy(piobuf, hdr, hdrwords - 1);
1284                        ipath_flush_wc();
1285                        __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
1286                        ipath_flush_wc();
1287                } else
1288                        __iowrite32_copy(piobuf, hdr, hdrwords);
1289                goto done;
1290        }
1291
1292        if (flush_wc)
1293                ipath_flush_wc();
1294        __iowrite32_copy(piobuf, hdr, hdrwords);
1295        piobuf += hdrwords;
1296
1297        /* The common case is aligned and contained in one segment. */
1298        if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
1299                   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
1300                u32 *addr = (u32 *) ss->sge.vaddr;
1301
1302                /* Update address before sending packet. */
1303                update_sge(ss, len);
1304                if (flush_wc) {
1305                        __iowrite32_copy(piobuf, addr, dwords - 1);
1306                        /* must flush early everything before trigger word */
1307                        ipath_flush_wc();
1308                        __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
1309                        /* be sure trigger word is written */
1310                        ipath_flush_wc();
1311                } else
1312                        __iowrite32_copy(piobuf, addr, dwords);
1313                goto done;
1314        }
1315        copy_io(piobuf, ss, len, flush_wc);
1316done:
1317        if (qp->s_wqe) {
1318                spin_lock_irqsave(&qp->s_lock, flags);
1319                ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
1320                spin_unlock_irqrestore(&qp->s_lock, flags);
1321        }
1322        ret = 0;
1323bail:
1324        return ret;
1325}
1326
1327/**
1328 * ipath_verbs_send - send a packet
1329 * @qp: the QP to send on
1330 * @hdr: the packet header
1331 * @hdrwords: the number of 32-bit words in the header
1332 * @ss: the SGE to send
1333 * @len: the length of the packet in bytes
1334 */
1335int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
1336                     u32 hdrwords, struct ipath_sge_state *ss, u32 len)
1337{
1338        struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
1339        u32 plen;
1340        int ret;
1341        u32 dwords = (len + 3) >> 2;
1342
1343        /*
1344         * Calculate the send buffer trigger address.
1345         * The +1 counts for the pbc control dword following the pbc length.
1346         */
1347        plen = hdrwords + dwords + 1;
1348
1349        /*
1350         * VL15 packets (IB_QPT_SMI) will always use PIO, so we
1351         * can defer SDMA restart until link goes ACTIVE without
1352         * worrying about just how we got there.
1353         */
1354        if (qp->ibqp.qp_type == IB_QPT_SMI ||
1355            !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
1356                ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
1357                                           plen, dwords);
1358        else
1359                ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
1360                                           plen, dwords);
1361
1362        return ret;
1363}
1364
1365int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
1366                            u64 *rwords, u64 *spkts, u64 *rpkts,
1367                            u64 *xmit_wait)
1368{
1369        int ret;
1370
1371        if (!(dd->ipath_flags & IPATH_INITTED)) {
1372                /* no hardware, freeze, etc. */
1373                ret = -EINVAL;
1374                goto bail;
1375        }
1376        *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
1377        *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
1378        *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
1379        *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
1380        *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
1381
1382        ret = 0;
1383
1384bail:
1385        return ret;
1386}
1387
1388/**
1389 * ipath_get_counters - get various chip counters
1390 * @dd: the infinipath device
1391 * @cntrs: counters are placed here
1392 *
1393 * Return the counters needed by recv_pma_get_portcounters().
1394 */
1395int ipath_get_counters(struct ipath_devdata *dd,
1396                       struct ipath_verbs_counters *cntrs)
1397{
1398        struct ipath_cregs const *crp = dd->ipath_cregs;
1399        int ret;
1400
1401        if (!(dd->ipath_flags & IPATH_INITTED)) {
1402                /* no hardware, freeze, etc. */
1403                ret = -EINVAL;
1404                goto bail;
1405        }
1406        cntrs->symbol_error_counter =
1407                ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt);
1408        cntrs->link_error_recovery_counter =
1409                ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt);
1410        /*
1411         * The link downed counter counts when the other side downs the
1412         * connection.  We add in the number of times we downed the link
1413         * due to local link integrity errors to compensate.
1414         */
1415        cntrs->link_downed_counter =
1416                ipath_snap_cntr(dd, crp->cr_iblinkdowncnt);
1417        cntrs->port_rcv_errors =
1418                ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +
1419                ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +
1420                ipath_snap_cntr(dd, crp->cr_portovflcnt) +
1421                ipath_snap_cntr(dd, crp->cr_err_rlencnt) +
1422                ipath_snap_cntr(dd, crp->cr_invalidrlencnt) +
1423                ipath_snap_cntr(dd, crp->cr_errlinkcnt) +
1424                ipath_snap_cntr(dd, crp->cr_erricrccnt) +
1425                ipath_snap_cntr(dd, crp->cr_errvcrccnt) +
1426                ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
1427                ipath_snap_cntr(dd, crp->cr_badformatcnt) +
1428                dd->ipath_rxfc_unsupvl_errs;
1429        if (crp->cr_rxotherlocalphyerrcnt)
1430                cntrs->port_rcv_errors +=
1431                        ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt);
1432        if (crp->cr_rxvlerrcnt)
1433                cntrs->port_rcv_errors +=
1434                        ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);
1435        cntrs->port_rcv_remphys_errors =
1436                ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
1437        cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
1438        cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);
1439        cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);
1440        cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
1441        cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
1442        cntrs->local_link_integrity_errors =
1443                crp->cr_locallinkintegrityerrcnt ?
1444                ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) :
1445                ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
1446                 dd->ipath_lli_errs : dd->ipath_lli_errors);
1447        cntrs->excessive_buffer_overrun_errors =
1448                crp->cr_excessbufferovflcnt ?
1449                ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) :
1450                dd->ipath_overrun_thresh_errs;
1451        cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?
1452                ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;
1453
1454        ret = 0;
1455
1456bail:
1457        return ret;
1458}
1459
1460/**
1461 * ipath_ib_piobufavail - callback when a PIO buffer is available
1462 * @arg: the device pointer
1463 *
1464 * This is called from ipath_intr() at interrupt level when a PIO buffer is
1465 * available after ipath_verbs_send() returned an error that no buffers were
1466 * available.  Return 1 if we consumed all the PIO buffers and we still have
1467 * QPs waiting for buffers (for now, just restart the send tasklet and
1468 * return zero).
1469 */
1470int ipath_ib_piobufavail(struct ipath_ibdev *dev)
1471{
1472        struct list_head *list;
1473        struct ipath_qp *qplist;
1474        struct ipath_qp *qp;
1475        unsigned long flags;
1476
1477        if (dev == NULL)
1478                goto bail;
1479
1480        list = &dev->piowait;
1481        qplist = NULL;
1482
1483        spin_lock_irqsave(&dev->pending_lock, flags);
1484        while (!list_empty(list)) {
1485                qp = list_entry(list->next, struct ipath_qp, piowait);
1486                list_del_init(&qp->piowait);
1487                qp->pio_next = qplist;
1488                qplist = qp;
1489                atomic_inc(&qp->refcount);
1490        }
1491        spin_unlock_irqrestore(&dev->pending_lock, flags);
1492
1493        while (qplist != NULL) {
1494                qp = qplist;
1495                qplist = qp->pio_next;
1496
1497                spin_lock_irqsave(&qp->s_lock, flags);
1498                if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
1499                        ipath_schedule_send(qp);
1500                spin_unlock_irqrestore(&qp->s_lock, flags);
1501
1502                /* Notify ipath_destroy_qp() if it is waiting. */
1503                if (atomic_dec_and_test(&qp->refcount))
1504                        wake_up(&qp->wait);
1505        }
1506
1507bail:
1508        return 0;
1509}
1510
1511static int ipath_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
1512                              struct ib_udata *uhw)
1513{
1514        struct ipath_ibdev *dev = to_idev(ibdev);
1515
1516        if (uhw->inlen || uhw->outlen)
1517                return -EINVAL;
1518
1519        memset(props, 0, sizeof(*props));
1520
1521        props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
1522                IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
1523                IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
1524                IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
1525        props->page_size_cap = PAGE_SIZE;
1526        props->vendor_id =
1527                IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;
1528        props->vendor_part_id = dev->dd->ipath_deviceid;
1529        props->hw_ver = dev->dd->ipath_pcirev;
1530
1531        props->sys_image_guid = dev->sys_image_guid;
1532
1533        props->max_mr_size = ~0ull;
1534        props->max_qp = ib_ipath_max_qps;
1535        props->max_qp_wr = ib_ipath_max_qp_wrs;
1536        props->max_sge = ib_ipath_max_sges;
1537        props->max_sge_rd = ib_ipath_max_sges;
1538        props->max_cq = ib_ipath_max_cqs;
1539        props->max_ah = ib_ipath_max_ahs;
1540        props->max_cqe = ib_ipath_max_cqes;
1541        props->max_mr = dev->lk_table.max;
1542        props->max_fmr = dev->lk_table.max;
1543        props->max_map_per_fmr = 32767;
1544        props->max_pd = ib_ipath_max_pds;
1545        props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
1546        props->max_qp_init_rd_atom = 255;
1547        /* props->max_res_rd_atom */
1548        props->max_srq = ib_ipath_max_srqs;
1549        props->max_srq_wr = ib_ipath_max_srq_wrs;
1550        props->max_srq_sge = ib_ipath_max_srq_sges;
1551        /* props->local_ca_ack_delay */
1552        props->atomic_cap = IB_ATOMIC_GLOB;
1553        props->max_pkeys = ipath_get_npkeys(dev->dd);
1554        props->max_mcast_grp = ib_ipath_max_mcast_grps;
1555        props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
1556        props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
1557                props->max_mcast_grp;
1558
1559        return 0;
1560}
1561
1562const u8 ipath_cvt_physportstate[32] = {
1563        [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
1564        [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
1565        [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
1566        [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
1567        [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
1568        [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
1569        [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] =
1570                IB_PHYSPORTSTATE_CFG_TRAIN,
1571        [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] =
1572                IB_PHYSPORTSTATE_CFG_TRAIN,
1573        [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] =
1574                IB_PHYSPORTSTATE_CFG_TRAIN,
1575        [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,
1576        [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] =
1577                IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1578        [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] =
1579                IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1580        [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] =
1581                IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1582        [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
1583        [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
1584        [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
1585        [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
1586        [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
1587        [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
1588        [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
1589        [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
1590};
1591
1592u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
1593{
1594        return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
1595}
1596
1597static int ipath_query_port(struct ib_device *ibdev,
1598                            u8 port, struct ib_port_attr *props)
1599{
1600        struct ipath_ibdev *dev = to_idev(ibdev);
1601        struct ipath_devdata *dd = dev->dd;
1602        enum ib_mtu mtu;
1603        u16 lid = dd->ipath_lid;
1604        u64 ibcstat;
1605
1606        memset(props, 0, sizeof(*props));
1607        props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
1608        props->lmc = dd->ipath_lmc;
1609        props->sm_lid = dev->sm_lid;
1610        props->sm_sl = dev->sm_sl;
1611        ibcstat = dd->ipath_lastibcstat;
1612        /* map LinkState to IB portinfo values.  */
1613        props->state = ipath_ib_linkstate(dd, ibcstat) + 1;
1614
1615        /* See phys_state_show() */
1616        props->phys_state = /* MEA: assumes shift == 0 */
1617                ipath_cvt_physportstate[dd->ipath_lastibcstat &
1618                dd->ibcs_lts_mask];
1619        props->port_cap_flags = dev->port_cap_flags;
1620        props->gid_tbl_len = 1;
1621        props->max_msg_sz = 0x80000000;
1622        props->pkey_tbl_len = ipath_get_npkeys(dd);
1623        props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
1624                dev->z_pkey_violations;
1625        props->qkey_viol_cntr = dev->qkey_violations;
1626        props->active_width = dd->ipath_link_width_active;
1627        /* See rate_show() */
1628        props->active_speed = dd->ipath_link_speed_active;
1629        props->max_vl_num = 1;          /* VLCap = VL0 */
1630        props->init_type_reply = 0;
1631
1632        props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
1633        switch (dd->ipath_ibmtu) {
1634        case 4096:
1635                mtu = IB_MTU_4096;
1636                break;
1637        case 2048:
1638                mtu = IB_MTU_2048;
1639                break;
1640        case 1024:
1641                mtu = IB_MTU_1024;
1642                break;
1643        case 512:
1644                mtu = IB_MTU_512;
1645                break;
1646        case 256:
1647                mtu = IB_MTU_256;
1648                break;
1649        default:
1650                mtu = IB_MTU_2048;
1651        }
1652        props->active_mtu = mtu;
1653        props->subnet_timeout = dev->subnet_timeout;
1654
1655        return 0;
1656}
1657
1658static int ipath_modify_device(struct ib_device *device,
1659                               int device_modify_mask,
1660                               struct ib_device_modify *device_modify)
1661{
1662        int ret;
1663
1664        if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
1665                                   IB_DEVICE_MODIFY_NODE_DESC)) {
1666                ret = -EOPNOTSUPP;
1667                goto bail;
1668        }
1669
1670        if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
1671                memcpy(device->node_desc, device_modify->node_desc, 64);
1672
1673        if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
1674                to_idev(device)->sys_image_guid =
1675                        cpu_to_be64(device_modify->sys_image_guid);
1676
1677        ret = 0;
1678
1679bail:
1680        return ret;
1681}
1682
1683static int ipath_modify_port(struct ib_device *ibdev,
1684                             u8 port, int port_modify_mask,
1685                             struct ib_port_modify *props)
1686{
1687        struct ipath_ibdev *dev = to_idev(ibdev);
1688
1689        dev->port_cap_flags |= props->set_port_cap_mask;
1690        dev->port_cap_flags &= ~props->clr_port_cap_mask;
1691        if (port_modify_mask & IB_PORT_SHUTDOWN)
1692                ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
1693        if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
1694                dev->qkey_violations = 0;
1695        return 0;
1696}
1697
1698static int ipath_query_gid(struct ib_device *ibdev, u8 port,
1699                           int index, union ib_gid *gid)
1700{
1701        struct ipath_ibdev *dev = to_idev(ibdev);
1702        int ret;
1703
1704        if (index >= 1) {
1705                ret = -EINVAL;
1706                goto bail;
1707        }
1708        gid->global.subnet_prefix = dev->gid_prefix;
1709        gid->global.interface_id = dev->dd->ipath_guid;
1710
1711        ret = 0;
1712
1713bail:
1714        return ret;
1715}
1716
1717static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
1718                                    struct ib_ucontext *context,
1719                                    struct ib_udata *udata)
1720{
1721        struct ipath_ibdev *dev = to_idev(ibdev);
1722        struct ipath_pd *pd;
1723        struct ib_pd *ret;
1724
1725        /*
1726         * This is actually totally arbitrary.  Some correctness tests
1727         * assume there's a maximum number of PDs that can be allocated.
1728         * We don't actually have this limit, but we fail the test if
1729         * we allow allocations of more than we report for this value.
1730         */
1731
1732        pd = kmalloc(sizeof *pd, GFP_KERNEL);
1733        if (!pd) {
1734                ret = ERR_PTR(-ENOMEM);
1735                goto bail;
1736        }
1737
1738        spin_lock(&dev->n_pds_lock);
1739        if (dev->n_pds_allocated == ib_ipath_max_pds) {
1740                spin_unlock(&dev->n_pds_lock);
1741                kfree(pd);
1742                ret = ERR_PTR(-ENOMEM);
1743                goto bail;
1744        }
1745
1746        dev->n_pds_allocated++;
1747        spin_unlock(&dev->n_pds_lock);
1748
1749        /* ib_alloc_pd() will initialize pd->ibpd. */
1750        pd->user = udata != NULL;
1751
1752        ret = &pd->ibpd;
1753
1754bail:
1755        return ret;
1756}
1757
1758static int ipath_dealloc_pd(struct ib_pd *ibpd)
1759{
1760        struct ipath_pd *pd = to_ipd(ibpd);
1761        struct ipath_ibdev *dev = to_idev(ibpd->device);
1762
1763        spin_lock(&dev->n_pds_lock);
1764        dev->n_pds_allocated--;
1765        spin_unlock(&dev->n_pds_lock);
1766
1767        kfree(pd);
1768
1769        return 0;
1770}
1771
1772/**
1773 * ipath_create_ah - create an address handle
1774 * @pd: the protection domain
1775 * @ah_attr: the attributes of the AH
1776 *
1777 * This may be called from interrupt context.
1778 */
1779static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
1780                                     struct rdma_ah_attr *ah_attr,
1781                                     struct ib_udata *udata)
1782
1783{
1784        struct ipath_ah *ah;
1785        struct ib_ah *ret;
1786        struct ipath_ibdev *dev = to_idev(pd->device);
1787        unsigned long flags;
1788        u16 dlid;
1789
1790        dlid = rdma_ah_get_dlid(ah_attr);
1791
1792        /* A multicast address requires a GRH (see ch. 8.4.1). */
1793        if (dlid >= IPATH_MULTICAST_LID_BASE &&
1794            dlid != IPATH_PERMISSIVE_LID &&
1795            !(ah_attr->ah_flags & IB_AH_GRH)) {
1796                ret = ERR_PTR(-EINVAL);
1797                goto bail;
1798        }
1799
1800        if (dlid == 0) {
1801                ret = ERR_PTR(-EINVAL);
1802                goto bail;
1803        }
1804
1805        if (ah_attr->port_num < 1 ||
1806            ah_attr->port_num > pd->device->phys_port_cnt) {
1807                ret = ERR_PTR(-EINVAL);
1808                goto bail;
1809        }
1810
1811        ah = kmalloc(sizeof *ah, GFP_ATOMIC);
1812        if (!ah) {
1813                ret = ERR_PTR(-ENOMEM);
1814                goto bail;
1815        }
1816
1817        spin_lock_irqsave(&dev->n_ahs_lock, flags);
1818        if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
1819                spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1820                kfree(ah);
1821                ret = ERR_PTR(-ENOMEM);
1822                goto bail;
1823        }
1824
1825        dev->n_ahs_allocated++;
1826        spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1827
1828        /* ib_create_ah() will initialize ah->ibah. */
1829        ah->attr = *ah_attr;
1830        ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);
1831
1832        ret = &ah->ibah;
1833
1834bail:
1835        return ret;
1836}
1837
1838/**
1839 * ipath_destroy_ah - destroy an address handle
1840 * @ibah: the AH to destroy
1841 *
1842 * This may be called from interrupt context.
1843 */
1844static int ipath_destroy_ah(struct ib_ah *ibah)
1845{
1846        struct ipath_ibdev *dev = to_idev(ibah->device);
1847        struct ipath_ah *ah = to_iah(ibah);
1848        unsigned long flags;
1849
1850        spin_lock_irqsave(&dev->n_ahs_lock, flags);
1851        dev->n_ahs_allocated--;
1852        spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1853
1854        kfree(ah);
1855
1856        return 0;
1857}
1858
1859static int ipath_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
1860{
1861        struct ipath_ah *ah = to_iah(ibah);
1862
1863        *ah_attr = ah->attr;
1864        ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);
1865
1866        return 0;
1867}
1868
1869/**
1870 * ipath_get_npkeys - return the size of the PKEY table for port 0
1871 * @dd: the infinipath device
1872 */
1873unsigned ipath_get_npkeys(struct ipath_devdata *dd)
1874{
1875        return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
1876}
1877
1878/**
1879 * ipath_get_pkey - return the indexed PKEY from the port PKEY table
1880 * @dd: the infinipath device
1881 * @index: the PKEY index
1882 */
1883unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
1884{
1885        unsigned ret;
1886
1887        /* always a kernel port, no locking needed */
1888        if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
1889                ret = 0;
1890        else
1891                ret = dd->ipath_pd[0]->port_pkeys[index];
1892
1893        return ret;
1894}
1895
1896static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
1897                            u16 *pkey)
1898{
1899        struct ipath_ibdev *dev = to_idev(ibdev);
1900        int ret;
1901
1902        if (index >= ipath_get_npkeys(dev->dd)) {
1903                ret = -EINVAL;
1904                goto bail;
1905        }
1906
1907        *pkey = ipath_get_pkey(dev->dd, index);
1908        ret = 0;
1909
1910bail:
1911        return ret;
1912}
1913
1914/**
1915 * ipath_alloc_ucontext - allocate a ucontest
1916 * @ibdev: the infiniband device
1917 * @udata: not used by the InfiniPath driver
1918 */
1919
1920static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
1921                                                struct ib_udata *udata)
1922{
1923        struct ipath_ucontext *context;
1924        struct ib_ucontext *ret;
1925
1926        context = kmalloc(sizeof *context, GFP_KERNEL);
1927        if (!context) {
1928                ret = ERR_PTR(-ENOMEM);
1929                goto bail;
1930        }
1931
1932        ret = &context->ibucontext;
1933
1934bail:
1935        return ret;
1936}
1937
1938static int ipath_dealloc_ucontext(struct ib_ucontext *context)
1939{
1940        kfree(to_iucontext(context));
1941        return 0;
1942}
1943
1944static int ipath_verbs_register_sysfs(struct ib_device *dev);
1945
1946static void __verbs_timer(unsigned long arg)
1947{
1948        struct ipath_devdata *dd = (struct ipath_devdata *) arg;
1949
1950        /* Handle verbs layer timeouts. */
1951        ipath_ib_timer(dd->verbs_dev);
1952
1953        mod_timer(&dd->verbs_timer, jiffies + 1);
1954}
1955
1956static int enable_timer(struct ipath_devdata *dd)
1957{
1958        /*
1959         * Early chips had a design flaw where the chip and kernel idea
1960         * of the tail register don't always agree, and therefore we won't
1961         * get an interrupt on the next packet received.
1962         * If the board supports per packet receive interrupts, use it.
1963         * Otherwise, the timer function periodically checks for packets
1964         * to cover this case.
1965         * Either way, the timer is needed for verbs layer related
1966         * processing.
1967         */
1968        if (dd->ipath_flags & IPATH_GPIO_INTR) {
1969                ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
1970                                 0x2074076542310ULL);
1971                /* Enable GPIO bit 2 interrupt */
1972                dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
1973                ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1974                                 dd->ipath_gpio_mask);
1975        }
1976
1977        setup_timer(&dd->verbs_timer, __verbs_timer, (unsigned long)dd);
1978
1979        dd->verbs_timer.expires = jiffies + 1;
1980        add_timer(&dd->verbs_timer);
1981
1982        return 0;
1983}
1984
1985static int disable_timer(struct ipath_devdata *dd)
1986{
1987        /* Disable GPIO bit 2 interrupt */
1988        if (dd->ipath_flags & IPATH_GPIO_INTR) {
1989                /* Disable GPIO bit 2 interrupt */
1990                dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
1991                ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1992                                 dd->ipath_gpio_mask);
1993                /*
1994                 * We might want to undo changes to debugportselect,
1995                 * but how?
1996                 */
1997        }
1998
1999        del_timer_sync(&dd->verbs_timer);
2000
2001        return 0;
2002}
2003
2004static int ipath_port_immutable(struct ib_device *ibdev, u8 port_num,
2005                                struct ib_port_immutable *immutable)
2006{
2007        struct ib_port_attr attr;
2008        int err;
2009
2010        err = ipath_query_port(ibdev, port_num, &attr);
2011        if (err)
2012                return err;
2013
2014        immutable->pkey_tbl_len = attr.pkey_tbl_len;
2015        immutable->gid_tbl_len = attr.gid_tbl_len;
2016        immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
2017        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2018
2019        return 0;
2020}
2021
2022/**
2023 * ipath_register_ib_device - register our device with the infiniband core
2024 * @dd: the device data structure
2025 * Return the allocated ipath_ibdev pointer or NULL on error.
2026 */
2027int ipath_register_ib_device(struct ipath_devdata *dd)
2028{
2029        struct ipath_verbs_counters cntrs;
2030        struct ipath_ibdev *idev;
2031        struct ib_device *dev;
2032        struct ipath_verbs_txreq *tx;
2033        unsigned i;
2034        int ret;
2035
2036        idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
2037        if (idev == NULL) {
2038                ret = -ENOMEM;
2039                goto bail;
2040        }
2041
2042        dev = &idev->ibdev;
2043
2044        if (dd->ipath_sdma_descq_cnt) {
2045                tx = kmalloc_array(dd->ipath_sdma_descq_cnt, sizeof *tx,
2046                                   GFP_KERNEL);
2047                if (tx == NULL) {
2048                        ret = -ENOMEM;
2049                        goto err_tx;
2050                }
2051        } else
2052                tx = NULL;
2053        idev->txreq_bufs = tx;
2054
2055        /* Only need to initialize non-zero fields. */
2056        spin_lock_init(&idev->n_pds_lock);
2057        spin_lock_init(&idev->n_ahs_lock);
2058        spin_lock_init(&idev->n_cqs_lock);
2059        spin_lock_init(&idev->n_qps_lock);
2060        spin_lock_init(&idev->n_srqs_lock);
2061        spin_lock_init(&idev->n_mcast_grps_lock);
2062
2063        spin_lock_init(&idev->qp_table.lock);
2064        spin_lock_init(&idev->lk_table.lock);
2065        idev->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
2066        /* Set the prefix to the default value (see ch. 4.1.1) */
2067        idev->gid_prefix = cpu_to_be64(0xfe80000000000000ULL);
2068
2069        ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
2070        if (ret)
2071                goto err_qp;
2072
2073        /*
2074         * The top ib_ipath_lkey_table_size bits are used to index the
2075         * table.  The lower 8 bits can be owned by the user (copied from
2076         * the LKEY).  The remaining bits act as a generation number or tag.
2077         */
2078        idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
2079        idev->lk_table.table = kcalloc(idev->lk_table.max,
2080                                       sizeof(*idev->lk_table.table),
2081                                       GFP_KERNEL);
2082        if (idev->lk_table.table == NULL) {
2083                ret = -ENOMEM;
2084                goto err_lk;
2085        }
2086        INIT_LIST_HEAD(&idev->pending_mmaps);
2087        spin_lock_init(&idev->pending_lock);
2088        idev->mmap_offset = PAGE_SIZE;
2089        spin_lock_init(&idev->mmap_offset_lock);
2090        INIT_LIST_HEAD(&idev->pending[0]);
2091        INIT_LIST_HEAD(&idev->pending[1]);
2092        INIT_LIST_HEAD(&idev->pending[2]);
2093        INIT_LIST_HEAD(&idev->piowait);
2094        INIT_LIST_HEAD(&idev->rnrwait);
2095        INIT_LIST_HEAD(&idev->txreq_free);
2096        idev->pending_index = 0;
2097        idev->port_cap_flags =
2098                IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
2099        if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY)
2100                idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
2101        idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
2102        idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
2103        idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
2104        idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
2105        idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
2106
2107        /* Snapshot current HW counters to "clear" them. */
2108        ipath_get_counters(dd, &cntrs);
2109        idev->z_symbol_error_counter = cntrs.symbol_error_counter;
2110        idev->z_link_error_recovery_counter =
2111                cntrs.link_error_recovery_counter;
2112        idev->z_link_downed_counter = cntrs.link_downed_counter;
2113        idev->z_port_rcv_errors = cntrs.port_rcv_errors;
2114        idev->z_port_rcv_remphys_errors =
2115                cntrs.port_rcv_remphys_errors;
2116        idev->z_port_xmit_discards = cntrs.port_xmit_discards;
2117        idev->z_port_xmit_data = cntrs.port_xmit_data;
2118        idev->z_port_rcv_data = cntrs.port_rcv_data;
2119        idev->z_port_xmit_packets = cntrs.port_xmit_packets;
2120        idev->z_port_rcv_packets = cntrs.port_rcv_packets;
2121        idev->z_local_link_integrity_errors =
2122                cntrs.local_link_integrity_errors;
2123        idev->z_excessive_buffer_overrun_errors =
2124                cntrs.excessive_buffer_overrun_errors;
2125        idev->z_vl15_dropped = cntrs.vl15_dropped;
2126
2127        for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)
2128                list_add(&tx->txreq.list, &idev->txreq_free);
2129
2130        /*
2131         * The system image GUID is supposed to be the same for all
2132         * IB HCAs in a single system but since there can be other
2133         * device types in the system, we can't be sure this is unique.
2134         */
2135        if (!sys_image_guid)
2136                sys_image_guid = dd->ipath_guid;
2137        idev->sys_image_guid = sys_image_guid;
2138        idev->ib_unit = dd->ipath_unit;
2139        idev->dd = dd;
2140
2141        strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
2142        dev->owner = THIS_MODULE;
2143        dev->node_guid = dd->ipath_guid;
2144        dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
2145        dev->uverbs_cmd_mask =
2146                (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
2147                (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
2148                (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
2149                (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
2150                (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
2151                (1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
2152                (1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
2153                (1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
2154                (1ull << IB_USER_VERBS_CMD_REG_MR)              |
2155                (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
2156                (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2157                (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
2158                (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
2159                (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
2160                (1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
2161                (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
2162                (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
2163                (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
2164                (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
2165                (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
2166                (1ull << IB_USER_VERBS_CMD_POST_SEND)           |
2167                (1ull << IB_USER_VERBS_CMD_POST_RECV)           |
2168                (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
2169                (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
2170                (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
2171                (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
2172                (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
2173                (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
2174                (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
2175        dev->node_type = RDMA_NODE_IB_CA;
2176        dev->phys_port_cnt = 1;
2177        dev->num_comp_vectors = 1;
2178        dev->dev.parent = &dd->pcidev->dev;
2179        dev->query_device = ipath_query_device;
2180        dev->modify_device = ipath_modify_device;
2181        dev->query_port = ipath_query_port;
2182        dev->modify_port = ipath_modify_port;
2183        dev->query_pkey = ipath_query_pkey;
2184        dev->query_gid = ipath_query_gid;
2185        dev->alloc_ucontext = ipath_alloc_ucontext;
2186        dev->dealloc_ucontext = ipath_dealloc_ucontext;
2187        dev->alloc_pd = ipath_alloc_pd;
2188        dev->dealloc_pd = ipath_dealloc_pd;
2189        dev->create_ah = ipath_create_ah;
2190        dev->destroy_ah = ipath_destroy_ah;
2191        dev->query_ah = ipath_query_ah;
2192        dev->create_srq = ipath_create_srq;
2193        dev->modify_srq = ipath_modify_srq;
2194        dev->query_srq = ipath_query_srq;
2195        dev->destroy_srq = ipath_destroy_srq;
2196        dev->create_qp = ipath_create_qp;
2197        dev->modify_qp = ipath_modify_qp;
2198        dev->query_qp = ipath_query_qp;
2199        dev->destroy_qp = ipath_destroy_qp;
2200        dev->post_send = ipath_post_send;
2201        dev->post_recv = ipath_post_receive;
2202        dev->post_srq_recv = ipath_post_srq_receive;
2203        dev->create_cq = ipath_create_cq;
2204        dev->destroy_cq = ipath_destroy_cq;
2205        dev->resize_cq = ipath_resize_cq;
2206        dev->poll_cq = ipath_poll_cq;
2207        dev->req_notify_cq = ipath_req_notify_cq;
2208        dev->get_dma_mr = ipath_get_dma_mr;
2209        dev->reg_user_mr = ipath_reg_user_mr;
2210        dev->dereg_mr = ipath_dereg_mr;
2211        dev->alloc_fmr = ipath_alloc_fmr;
2212        dev->map_phys_fmr = ipath_map_phys_fmr;
2213        dev->unmap_fmr = ipath_unmap_fmr;
2214        dev->dealloc_fmr = ipath_dealloc_fmr;
2215        dev->attach_mcast = ipath_multicast_attach;
2216        dev->detach_mcast = ipath_multicast_detach;
2217        dev->process_mad = ipath_process_mad;
2218        dev->mmap = ipath_mmap;
2219        dev->dma_ops = &ipath_dma_mapping_ops;
2220        dev->get_port_immutable = ipath_port_immutable;
2221
2222        snprintf(dev->node_desc, sizeof(dev->node_desc),
2223                 IPATH_IDSTR " %s", init_utsname()->nodename);
2224
2225        ret = ib_register_device(dev, NULL);
2226        if (ret)
2227                goto err_reg;
2228
2229        ret = ipath_verbs_register_sysfs(dev);
2230        if (ret)
2231                goto err_class;
2232
2233        enable_timer(dd);
2234
2235        goto bail;
2236
2237err_class:
2238        ib_unregister_device(dev);
2239err_reg:
2240        kfree(idev->lk_table.table);
2241err_lk:
2242        kfree(idev->qp_table.table);
2243err_qp:
2244        kfree(idev->txreq_bufs);
2245err_tx:
2246        ib_dealloc_device(dev);
2247        ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
2248        idev = NULL;
2249
2250bail:
2251        dd->verbs_dev = idev;
2252        return ret;
2253}
2254
2255void ipath_unregister_ib_device(struct ipath_ibdev *dev)
2256{
2257        struct ib_device *ibdev = &dev->ibdev;
2258        u32 qps_inuse;
2259
2260        ib_unregister_device(ibdev);
2261
2262        disable_timer(dev->dd);
2263
2264        if (!list_empty(&dev->pending[0]) ||
2265            !list_empty(&dev->pending[1]) ||
2266            !list_empty(&dev->pending[2]))
2267                ipath_dev_err(dev->dd, "pending list not empty!\n");
2268        if (!list_empty(&dev->piowait))
2269                ipath_dev_err(dev->dd, "piowait list not empty!\n");
2270        if (!list_empty(&dev->rnrwait))
2271                ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
2272        if (!ipath_mcast_tree_empty())
2273                ipath_dev_err(dev->dd, "multicast table memory leak!\n");
2274        /*
2275         * Note that ipath_unregister_ib_device() can be called before all
2276         * the QPs are destroyed!
2277         */
2278        qps_inuse = ipath_free_all_qps(&dev->qp_table);
2279        if (qps_inuse)
2280                ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
2281                        qps_inuse);
2282        kfree(dev->qp_table.table);
2283        kfree(dev->lk_table.table);
2284        kfree(dev->txreq_bufs);
2285        ib_dealloc_device(ibdev);
2286}
2287
2288static ssize_t show_rev(struct device *device, struct device_attribute *attr,
2289                        char *buf)
2290{
2291        struct ipath_ibdev *dev =
2292                container_of(device, struct ipath_ibdev, ibdev.dev);
2293
2294        return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
2295}
2296
2297static ssize_t show_hca(struct device *device, struct device_attribute *attr,
2298                        char *buf)
2299{
2300        struct ipath_ibdev *dev =
2301                container_of(device, struct ipath_ibdev, ibdev.dev);
2302        int ret;
2303
2304        ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
2305        if (ret < 0)
2306                goto bail;
2307        strcat(buf, "\n");
2308        ret = strlen(buf);
2309
2310bail:
2311        return ret;
2312}
2313
2314static ssize_t show_stats(struct device *device, struct device_attribute *attr,
2315                          char *buf)
2316{
2317        struct ipath_ibdev *dev =
2318                container_of(device, struct ipath_ibdev, ibdev.dev);
2319        int i;
2320        int len;
2321
2322        len = sprintf(buf,
2323                      "RC resends  %d\n"
2324                      "RC no QACK  %d\n"
2325                      "RC ACKs     %d\n"
2326                      "RC SEQ NAKs %d\n"
2327                      "RC RDMA seq %d\n"
2328                      "RC RNR NAKs %d\n"
2329                      "RC OTH NAKs %d\n"
2330                      "RC timeouts %d\n"
2331                      "RC RDMA dup %d\n"
2332                      "piobuf wait %d\n"
2333                      "unaligned   %d\n"
2334                      "PKT drops   %d\n"
2335                      "WQE errs    %d\n",
2336                      dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
2337                      dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
2338                      dev->n_other_naks, dev->n_timeouts,
2339                      dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
2340                      dev->n_pkt_drops, dev->n_wqe_errs);
2341        for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
2342                const struct ipath_opcode_stats *si = &dev->opstats[i];
2343
2344                if (!si->n_packets && !si->n_bytes)
2345                        continue;
2346                len += sprintf(buf + len, "%02x %llu/%llu\n", i,
2347                               (unsigned long long) si->n_packets,
2348                               (unsigned long long) si->n_bytes);
2349        }
2350        return len;
2351}
2352
2353static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
2354static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
2355static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
2356static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
2357
2358static struct device_attribute *ipath_class_attributes[] = {
2359        &dev_attr_hw_rev,
2360        &dev_attr_hca_type,
2361        &dev_attr_board_id,
2362        &dev_attr_stats
2363};
2364
2365static int ipath_verbs_register_sysfs(struct ib_device *dev)
2366{
2367        int i;
2368        int ret;
2369
2370        for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) {
2371                ret = device_create_file(&dev->dev,
2372                                       ipath_class_attributes[i]);
2373                if (ret)
2374                        goto bail;
2375        }
2376        return 0;
2377bail:
2378        for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
2379                device_remove_file(&dev->dev, ipath_class_attributes[i]);
2380        return ret;
2381}
2382