linux/drivers/infiniband/hw/ipath/ipath_verbs.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
   3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33
  34#include <rdma/ib_mad.h>
  35#include <rdma/ib_user_verbs.h>
  36#include <linux/io.h>
  37#include <linux/utsname.h>
  38#include <linux/rculist.h>
  39
  40#include "ipath_kernel.h"
  41#include "ipath_verbs.h"
  42#include "ipath_common.h"
  43
  44static unsigned int ib_ipath_qp_table_size = 251;
  45module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
  46MODULE_PARM_DESC(qp_table_size, "QP table size");
  47
  48unsigned int ib_ipath_lkey_table_size = 12;
  49module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
  50                   S_IRUGO);
  51MODULE_PARM_DESC(lkey_table_size,
  52                 "LKEY table size in bits (2^n, 1 <= n <= 23)");
  53
  54static unsigned int ib_ipath_max_pds = 0xFFFF;
  55module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
  56MODULE_PARM_DESC(max_pds,
  57                 "Maximum number of protection domains to support");
  58
  59static unsigned int ib_ipath_max_ahs = 0xFFFF;
  60module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO);
  61MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
  62
  63unsigned int ib_ipath_max_cqes = 0x2FFFF;
  64module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO);
  65MODULE_PARM_DESC(max_cqes,
  66                 "Maximum number of completion queue entries to support");
  67
  68unsigned int ib_ipath_max_cqs = 0x1FFFF;
  69module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO);
  70MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
  71
  72unsigned int ib_ipath_max_qp_wrs = 0x3FFF;
  73module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
  74                   S_IWUSR | S_IRUGO);
  75MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
  76
  77unsigned int ib_ipath_max_qps = 16384;
  78module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
  79MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
  80
  81unsigned int ib_ipath_max_sges = 0x60;
  82module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
  83MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
  84
  85unsigned int ib_ipath_max_mcast_grps = 16384;
  86module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint,
  87                   S_IWUSR | S_IRUGO);
  88MODULE_PARM_DESC(max_mcast_grps,
  89                 "Maximum number of multicast groups to support");
  90
  91unsigned int ib_ipath_max_mcast_qp_attached = 16;
  92module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached,
  93                   uint, S_IWUSR | S_IRUGO);
  94MODULE_PARM_DESC(max_mcast_qp_attached,
  95                 "Maximum number of attached QPs to support");
  96
  97unsigned int ib_ipath_max_srqs = 1024;
  98module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO);
  99MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
 100
 101unsigned int ib_ipath_max_srq_sges = 128;
 102module_param_named(max_srq_sges, ib_ipath_max_srq_sges,
 103                   uint, S_IWUSR | S_IRUGO);
 104MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
 105
 106unsigned int ib_ipath_max_srq_wrs = 0x1FFFF;
 107module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
 108                   uint, S_IWUSR | S_IRUGO);
 109MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
 110
 111static unsigned int ib_ipath_disable_sma;
 112module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
 113MODULE_PARM_DESC(disable_sma, "Disable the SMA");
 114
 115/*
 116 * Note that it is OK to post send work requests in the SQE and ERR
 117 * states; ipath_do_send() will process them and generate error
 118 * completions as per IB 1.2 C10-96.
 119 */
 120const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
 121        [IB_QPS_RESET] = 0,
 122        [IB_QPS_INIT] = IPATH_POST_RECV_OK,
 123        [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
 124        [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
 125            IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
 126            IPATH_PROCESS_NEXT_SEND_OK,
 127        [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
 128            IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
 129        [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
 130            IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
 131        [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
 132            IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
 133};
 134
 135struct ipath_ucontext {
 136        struct ib_ucontext ibucontext;
 137};
 138
 139static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
 140                                                  *ibucontext)
 141{
 142        return container_of(ibucontext, struct ipath_ucontext, ibucontext);
 143}
 144
 145/*
 146 * Translate ib_wr_opcode into ib_wc_opcode.
 147 */
 148const enum ib_wc_opcode ib_ipath_wc_opcode[] = {
 149        [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
 150        [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
 151        [IB_WR_SEND] = IB_WC_SEND,
 152        [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
 153        [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
 154        [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
 155        [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
 156};
 157
 158/*
 159 * System image GUID.
 160 */
 161static __be64 sys_image_guid;
 162
 163/**
 164 * ipath_copy_sge - copy data to SGE memory
 165 * @ss: the SGE state
 166 * @data: the data to copy
 167 * @length: the length of the data
 168 */
 169void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
 170{
 171        struct ipath_sge *sge = &ss->sge;
 172
 173        while (length) {
 174                u32 len = sge->length;
 175
 176                if (len > length)
 177                        len = length;
 178                if (len > sge->sge_length)
 179                        len = sge->sge_length;
 180                BUG_ON(len == 0);
 181                memcpy(sge->vaddr, data, len);
 182                sge->vaddr += len;
 183                sge->length -= len;
 184                sge->sge_length -= len;
 185                if (sge->sge_length == 0) {
 186                        if (--ss->num_sge)
 187                                *sge = *ss->sg_list++;
 188                } else if (sge->length == 0 && sge->mr != NULL) {
 189                        if (++sge->n >= IPATH_SEGSZ) {
 190                                if (++sge->m >= sge->mr->mapsz)
 191                                        break;
 192                                sge->n = 0;
 193                        }
 194                        sge->vaddr =
 195                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 196                        sge->length =
 197                                sge->mr->map[sge->m]->segs[sge->n].length;
 198                }
 199                data += len;
 200                length -= len;
 201        }
 202}
 203
 204/**
 205 * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
 206 * @ss: the SGE state
 207 * @length: the number of bytes to skip
 208 */
 209void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
 210{
 211        struct ipath_sge *sge = &ss->sge;
 212
 213        while (length) {
 214                u32 len = sge->length;
 215
 216                if (len > length)
 217                        len = length;
 218                if (len > sge->sge_length)
 219                        len = sge->sge_length;
 220                BUG_ON(len == 0);
 221                sge->vaddr += len;
 222                sge->length -= len;
 223                sge->sge_length -= len;
 224                if (sge->sge_length == 0) {
 225                        if (--ss->num_sge)
 226                                *sge = *ss->sg_list++;
 227                } else if (sge->length == 0 && sge->mr != NULL) {
 228                        if (++sge->n >= IPATH_SEGSZ) {
 229                                if (++sge->m >= sge->mr->mapsz)
 230                                        break;
 231                                sge->n = 0;
 232                        }
 233                        sge->vaddr =
 234                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 235                        sge->length =
 236                                sge->mr->map[sge->m]->segs[sge->n].length;
 237                }
 238                length -= len;
 239        }
 240}
 241
 242/*
 243 * Count the number of DMA descriptors needed to send length bytes of data.
 244 * Don't modify the ipath_sge_state to get the count.
 245 * Return zero if any of the segments is not aligned.
 246 */
 247static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)
 248{
 249        struct ipath_sge *sg_list = ss->sg_list;
 250        struct ipath_sge sge = ss->sge;
 251        u8 num_sge = ss->num_sge;
 252        u32 ndesc = 1;  /* count the header */
 253
 254        while (length) {
 255                u32 len = sge.length;
 256
 257                if (len > length)
 258                        len = length;
 259                if (len > sge.sge_length)
 260                        len = sge.sge_length;
 261                BUG_ON(len == 0);
 262                if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
 263                    (len != length && (len & (sizeof(u32) - 1)))) {
 264                        ndesc = 0;
 265                        break;
 266                }
 267                ndesc++;
 268                sge.vaddr += len;
 269                sge.length -= len;
 270                sge.sge_length -= len;
 271                if (sge.sge_length == 0) {
 272                        if (--num_sge)
 273                                sge = *sg_list++;
 274                } else if (sge.length == 0 && sge.mr != NULL) {
 275                        if (++sge.n >= IPATH_SEGSZ) {
 276                                if (++sge.m >= sge.mr->mapsz)
 277                                        break;
 278                                sge.n = 0;
 279                        }
 280                        sge.vaddr =
 281                                sge.mr->map[sge.m]->segs[sge.n].vaddr;
 282                        sge.length =
 283                                sge.mr->map[sge.m]->segs[sge.n].length;
 284                }
 285                length -= len;
 286        }
 287        return ndesc;
 288}
 289
 290/*
 291 * Copy from the SGEs to the data buffer.
 292 */
 293static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,
 294                                u32 length)
 295{
 296        struct ipath_sge *sge = &ss->sge;
 297
 298        while (length) {
 299                u32 len = sge->length;
 300
 301                if (len > length)
 302                        len = length;
 303                if (len > sge->sge_length)
 304                        len = sge->sge_length;
 305                BUG_ON(len == 0);
 306                memcpy(data, sge->vaddr, len);
 307                sge->vaddr += len;
 308                sge->length -= len;
 309                sge->sge_length -= len;
 310                if (sge->sge_length == 0) {
 311                        if (--ss->num_sge)
 312                                *sge = *ss->sg_list++;
 313                } else if (sge->length == 0 && sge->mr != NULL) {
 314                        if (++sge->n >= IPATH_SEGSZ) {
 315                                if (++sge->m >= sge->mr->mapsz)
 316                                        break;
 317                                sge->n = 0;
 318                        }
 319                        sge->vaddr =
 320                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 321                        sge->length =
 322                                sge->mr->map[sge->m]->segs[sge->n].length;
 323                }
 324                data += len;
 325                length -= len;
 326        }
 327}
 328
 329/**
 330 * ipath_post_one_send - post one RC, UC, or UD send work request
 331 * @qp: the QP to post on
 332 * @wr: the work request to send
 333 */
 334static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 335{
 336        struct ipath_swqe *wqe;
 337        u32 next;
 338        int i;
 339        int j;
 340        int acc;
 341        int ret;
 342        unsigned long flags;
 343        struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
 344
 345        spin_lock_irqsave(&qp->s_lock, flags);
 346
 347        if (qp->ibqp.qp_type != IB_QPT_SMI &&
 348            !(dd->ipath_flags & IPATH_LINKACTIVE)) {
 349                ret = -ENETDOWN;
 350                goto bail;
 351        }
 352
 353        /* Check that state is OK to post send. */
 354        if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
 355                goto bail_inval;
 356
 357        /* IB spec says that num_sge == 0 is OK. */
 358        if (wr->num_sge > qp->s_max_sge)
 359                goto bail_inval;
 360
 361        /*
 362         * Don't allow RDMA reads or atomic operations on UC or
 363         * undefined operations.
 364         * Make sure buffer is large enough to hold the result for atomics.
 365         */
 366        if (qp->ibqp.qp_type == IB_QPT_UC) {
 367                if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
 368                        goto bail_inval;
 369        } else if (qp->ibqp.qp_type == IB_QPT_UD) {
 370                /* Check UD opcode */
 371                if (wr->opcode != IB_WR_SEND &&
 372                    wr->opcode != IB_WR_SEND_WITH_IMM)
 373                        goto bail_inval;
 374                /* Check UD destination address PD */
 375                if (qp->ibqp.pd != wr->wr.ud.ah->pd)
 376                        goto bail_inval;
 377        } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
 378                goto bail_inval;
 379        else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
 380                   (wr->num_sge == 0 ||
 381                    wr->sg_list[0].length < sizeof(u64) ||
 382                    wr->sg_list[0].addr & (sizeof(u64) - 1)))
 383                goto bail_inval;
 384        else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
 385                goto bail_inval;
 386
 387        next = qp->s_head + 1;
 388        if (next >= qp->s_size)
 389                next = 0;
 390        if (next == qp->s_last) {
 391                ret = -ENOMEM;
 392                goto bail;
 393        }
 394
 395        wqe = get_swqe_ptr(qp, qp->s_head);
 396        wqe->wr = *wr;
 397        wqe->length = 0;
 398        if (wr->num_sge) {
 399                acc = wr->opcode >= IB_WR_RDMA_READ ?
 400                        IB_ACCESS_LOCAL_WRITE : 0;
 401                for (i = 0, j = 0; i < wr->num_sge; i++) {
 402                        u32 length = wr->sg_list[i].length;
 403                        int ok;
 404
 405                        if (length == 0)
 406                                continue;
 407                        ok = ipath_lkey_ok(qp, &wqe->sg_list[j],
 408                                           &wr->sg_list[i], acc);
 409                        if (!ok)
 410                                goto bail_inval;
 411                        wqe->length += length;
 412                        j++;
 413                }
 414                wqe->wr.num_sge = j;
 415        }
 416        if (qp->ibqp.qp_type == IB_QPT_UC ||
 417            qp->ibqp.qp_type == IB_QPT_RC) {
 418                if (wqe->length > 0x80000000U)
 419                        goto bail_inval;
 420        } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
 421                goto bail_inval;
 422        wqe->ssn = qp->s_ssn++;
 423        qp->s_head = next;
 424
 425        ret = 0;
 426        goto bail;
 427
 428bail_inval:
 429        ret = -EINVAL;
 430bail:
 431        spin_unlock_irqrestore(&qp->s_lock, flags);
 432        return ret;
 433}
 434
 435/**
 436 * ipath_post_send - post a send on a QP
 437 * @ibqp: the QP to post the send on
 438 * @wr: the list of work requests to post
 439 * @bad_wr: the first bad WR is put here
 440 *
 441 * This may be called from interrupt context.
 442 */
 443static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 444                           struct ib_send_wr **bad_wr)
 445{
 446        struct ipath_qp *qp = to_iqp(ibqp);
 447        int err = 0;
 448
 449        for (; wr; wr = wr->next) {
 450                err = ipath_post_one_send(qp, wr);
 451                if (err) {
 452                        *bad_wr = wr;
 453                        goto bail;
 454                }
 455        }
 456
 457        /* Try to do the send work in the caller's context. */
 458        ipath_do_send((unsigned long) qp);
 459
 460bail:
 461        return err;
 462}
 463
 464/**
 465 * ipath_post_receive - post a receive on a QP
 466 * @ibqp: the QP to post the receive on
 467 * @wr: the WR to post
 468 * @bad_wr: the first bad WR is put here
 469 *
 470 * This may be called from interrupt context.
 471 */
 472static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 473                              struct ib_recv_wr **bad_wr)
 474{
 475        struct ipath_qp *qp = to_iqp(ibqp);
 476        struct ipath_rwq *wq = qp->r_rq.wq;
 477        unsigned long flags;
 478        int ret;
 479
 480        /* Check that state is OK to post receive. */
 481        if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
 482                *bad_wr = wr;
 483                ret = -EINVAL;
 484                goto bail;
 485        }
 486
 487        for (; wr; wr = wr->next) {
 488                struct ipath_rwqe *wqe;
 489                u32 next;
 490                int i;
 491
 492                if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
 493                        *bad_wr = wr;
 494                        ret = -EINVAL;
 495                        goto bail;
 496                }
 497
 498                spin_lock_irqsave(&qp->r_rq.lock, flags);
 499                next = wq->head + 1;
 500                if (next >= qp->r_rq.size)
 501                        next = 0;
 502                if (next == wq->tail) {
 503                        spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 504                        *bad_wr = wr;
 505                        ret = -ENOMEM;
 506                        goto bail;
 507                }
 508
 509                wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
 510                wqe->wr_id = wr->wr_id;
 511                wqe->num_sge = wr->num_sge;
 512                for (i = 0; i < wr->num_sge; i++)
 513                        wqe->sg_list[i] = wr->sg_list[i];
 514                /* Make sure queue entry is written before the head index. */
 515                smp_wmb();
 516                wq->head = next;
 517                spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 518        }
 519        ret = 0;
 520
 521bail:
 522        return ret;
 523}
 524
 525/**
 526 * ipath_qp_rcv - processing an incoming packet on a QP
 527 * @dev: the device the packet came on
 528 * @hdr: the packet header
 529 * @has_grh: true if the packet has a GRH
 530 * @data: the packet data
 531 * @tlen: the packet length
 532 * @qp: the QP the packet came on
 533 *
 534 * This is called from ipath_ib_rcv() to process an incoming packet
 535 * for the given QP.
 536 * Called at interrupt level.
 537 */
 538static void ipath_qp_rcv(struct ipath_ibdev *dev,
 539                         struct ipath_ib_header *hdr, int has_grh,
 540                         void *data, u32 tlen, struct ipath_qp *qp)
 541{
 542        /* Check for valid receive state. */
 543        if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
 544                dev->n_pkt_drops++;
 545                return;
 546        }
 547
 548        switch (qp->ibqp.qp_type) {
 549        case IB_QPT_SMI:
 550        case IB_QPT_GSI:
 551                if (ib_ipath_disable_sma)
 552                        break;
 553                /* FALLTHROUGH */
 554        case IB_QPT_UD:
 555                ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
 556                break;
 557
 558        case IB_QPT_RC:
 559                ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
 560                break;
 561
 562        case IB_QPT_UC:
 563                ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
 564                break;
 565
 566        default:
 567                break;
 568        }
 569}
 570
 571/**
 572 * ipath_ib_rcv - process an incoming packet
 573 * @arg: the device pointer
 574 * @rhdr: the header of the packet
 575 * @data: the packet data
 576 * @tlen: the packet length
 577 *
 578 * This is called from ipath_kreceive() to process an incoming packet at
 579 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
 580 */
 581void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
 582                  u32 tlen)
 583{
 584        struct ipath_ib_header *hdr = rhdr;
 585        struct ipath_other_headers *ohdr;
 586        struct ipath_qp *qp;
 587        u32 qp_num;
 588        int lnh;
 589        u8 opcode;
 590        u16 lid;
 591
 592        if (unlikely(dev == NULL))
 593                goto bail;
 594
 595        if (unlikely(tlen < 24)) {      /* LRH+BTH+CRC */
 596                dev->rcv_errors++;
 597                goto bail;
 598        }
 599
 600        /* Check for a valid destination LID (see ch. 7.11.1). */
 601        lid = be16_to_cpu(hdr->lrh[1]);
 602        if (lid < IPATH_MULTICAST_LID_BASE) {
 603                lid &= ~((1 << dev->dd->ipath_lmc) - 1);
 604                if (unlikely(lid != dev->dd->ipath_lid)) {
 605                        dev->rcv_errors++;
 606                        goto bail;
 607                }
 608        }
 609
 610        /* Check for GRH */
 611        lnh = be16_to_cpu(hdr->lrh[0]) & 3;
 612        if (lnh == IPATH_LRH_BTH)
 613                ohdr = &hdr->u.oth;
 614        else if (lnh == IPATH_LRH_GRH)
 615                ohdr = &hdr->u.l.oth;
 616        else {
 617                dev->rcv_errors++;
 618                goto bail;
 619        }
 620
 621        opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
 622        dev->opstats[opcode].n_bytes += tlen;
 623        dev->opstats[opcode].n_packets++;
 624
 625        /* Get the destination QP number. */
 626        qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK;
 627        if (qp_num == IPATH_MULTICAST_QPN) {
 628                struct ipath_mcast *mcast;
 629                struct ipath_mcast_qp *p;
 630
 631                if (lnh != IPATH_LRH_GRH) {
 632                        dev->n_pkt_drops++;
 633                        goto bail;
 634                }
 635                mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
 636                if (mcast == NULL) {
 637                        dev->n_pkt_drops++;
 638                        goto bail;
 639                }
 640                dev->n_multicast_rcv++;
 641                list_for_each_entry_rcu(p, &mcast->qp_list, list)
 642                        ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
 643                /*
 644                 * Notify ipath_multicast_detach() if it is waiting for us
 645                 * to finish.
 646                 */
 647                if (atomic_dec_return(&mcast->refcount) <= 1)
 648                        wake_up(&mcast->wait);
 649        } else {
 650                qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
 651                if (qp) {
 652                        dev->n_unicast_rcv++;
 653                        ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
 654                                     tlen, qp);
 655                        /*
 656                         * Notify ipath_destroy_qp() if it is waiting
 657                         * for us to finish.
 658                         */
 659                        if (atomic_dec_and_test(&qp->refcount))
 660                                wake_up(&qp->wait);
 661                } else
 662                        dev->n_pkt_drops++;
 663        }
 664
 665bail:;
 666}
 667
 668/**
 669 * ipath_ib_timer - verbs timer
 670 * @arg: the device pointer
 671 *
 672 * This is called from ipath_do_rcv_timer() at interrupt level to check for
 673 * QPs which need retransmits and to collect performance numbers.
 674 */
 675static void ipath_ib_timer(struct ipath_ibdev *dev)
 676{
 677        struct ipath_qp *resend = NULL;
 678        struct ipath_qp *rnr = NULL;
 679        struct list_head *last;
 680        struct ipath_qp *qp;
 681        unsigned long flags;
 682
 683        if (dev == NULL)
 684                return;
 685
 686        spin_lock_irqsave(&dev->pending_lock, flags);
 687        /* Start filling the next pending queue. */
 688        if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
 689                dev->pending_index = 0;
 690        /* Save any requests still in the new queue, they have timed out. */
 691        last = &dev->pending[dev->pending_index];
 692        while (!list_empty(last)) {
 693                qp = list_entry(last->next, struct ipath_qp, timerwait);
 694                list_del_init(&qp->timerwait);
 695                qp->timer_next = resend;
 696                resend = qp;
 697                atomic_inc(&qp->refcount);
 698        }
 699        last = &dev->rnrwait;
 700        if (!list_empty(last)) {
 701                qp = list_entry(last->next, struct ipath_qp, timerwait);
 702                if (--qp->s_rnr_timeout == 0) {
 703                        do {
 704                                list_del_init(&qp->timerwait);
 705                                qp->timer_next = rnr;
 706                                rnr = qp;
 707                                atomic_inc(&qp->refcount);
 708                                if (list_empty(last))
 709                                        break;
 710                                qp = list_entry(last->next, struct ipath_qp,
 711                                                timerwait);
 712                        } while (qp->s_rnr_timeout == 0);
 713                }
 714        }
 715        /*
 716         * We should only be in the started state if pma_sample_start != 0
 717         */
 718        if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
 719            --dev->pma_sample_start == 0) {
 720                dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
 721                ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
 722                                        &dev->ipath_rword,
 723                                        &dev->ipath_spkts,
 724                                        &dev->ipath_rpkts,
 725                                        &dev->ipath_xmit_wait);
 726        }
 727        if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
 728                if (dev->pma_sample_interval == 0) {
 729                        u64 ta, tb, tc, td, te;
 730
 731                        dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
 732                        ipath_snapshot_counters(dev->dd, &ta, &tb,
 733                                                &tc, &td, &te);
 734
 735                        dev->ipath_sword = ta - dev->ipath_sword;
 736                        dev->ipath_rword = tb - dev->ipath_rword;
 737                        dev->ipath_spkts = tc - dev->ipath_spkts;
 738                        dev->ipath_rpkts = td - dev->ipath_rpkts;
 739                        dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
 740                }
 741                else
 742                        dev->pma_sample_interval--;
 743        }
 744        spin_unlock_irqrestore(&dev->pending_lock, flags);
 745
 746        /* XXX What if timer fires again while this is running? */
 747        while (resend != NULL) {
 748                qp = resend;
 749                resend = qp->timer_next;
 750
 751                spin_lock_irqsave(&qp->s_lock, flags);
 752                if (qp->s_last != qp->s_tail &&
 753                    ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
 754                        dev->n_timeouts++;
 755                        ipath_restart_rc(qp, qp->s_last_psn + 1);
 756                }
 757                spin_unlock_irqrestore(&qp->s_lock, flags);
 758
 759                /* Notify ipath_destroy_qp() if it is waiting. */
 760                if (atomic_dec_and_test(&qp->refcount))
 761                        wake_up(&qp->wait);
 762        }
 763        while (rnr != NULL) {
 764                qp = rnr;
 765                rnr = qp->timer_next;
 766
 767                spin_lock_irqsave(&qp->s_lock, flags);
 768                if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
 769                        ipath_schedule_send(qp);
 770                spin_unlock_irqrestore(&qp->s_lock, flags);
 771
 772                /* Notify ipath_destroy_qp() if it is waiting. */
 773                if (atomic_dec_and_test(&qp->refcount))
 774                        wake_up(&qp->wait);
 775        }
 776}
 777
 778static void update_sge(struct ipath_sge_state *ss, u32 length)
 779{
 780        struct ipath_sge *sge = &ss->sge;
 781
 782        sge->vaddr += length;
 783        sge->length -= length;
 784        sge->sge_length -= length;
 785        if (sge->sge_length == 0) {
 786                if (--ss->num_sge)
 787                        *sge = *ss->sg_list++;
 788        } else if (sge->length == 0 && sge->mr != NULL) {
 789                if (++sge->n >= IPATH_SEGSZ) {
 790                        if (++sge->m >= sge->mr->mapsz)
 791                                return;
 792                        sge->n = 0;
 793                }
 794                sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
 795                sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
 796        }
 797}
 798
 799#ifdef __LITTLE_ENDIAN
 800static inline u32 get_upper_bits(u32 data, u32 shift)
 801{
 802        return data >> shift;
 803}
 804
 805static inline u32 set_upper_bits(u32 data, u32 shift)
 806{
 807        return data << shift;
 808}
 809
 810static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 811{
 812        data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
 813        data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
 814        return data;
 815}
 816#else
 817static inline u32 get_upper_bits(u32 data, u32 shift)
 818{
 819        return data << shift;
 820}
 821
 822static inline u32 set_upper_bits(u32 data, u32 shift)
 823{
 824        return data >> shift;
 825}
 826
 827static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 828{
 829        data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
 830        data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
 831        return data;
 832}
 833#endif
 834
 835static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
 836                    u32 length, unsigned flush_wc)
 837{
 838        u32 extra = 0;
 839        u32 data = 0;
 840        u32 last;
 841
 842        while (1) {
 843                u32 len = ss->sge.length;
 844                u32 off;
 845
 846                if (len > length)
 847                        len = length;
 848                if (len > ss->sge.sge_length)
 849                        len = ss->sge.sge_length;
 850                BUG_ON(len == 0);
 851                /* If the source address is not aligned, try to align it. */
 852                off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
 853                if (off) {
 854                        u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
 855                                            ~(sizeof(u32) - 1));
 856                        u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
 857                        u32 y;
 858
 859                        y = sizeof(u32) - off;
 860                        if (len > y)
 861                                len = y;
 862                        if (len + extra >= sizeof(u32)) {
 863                                data |= set_upper_bits(v, extra *
 864                                                       BITS_PER_BYTE);
 865                                len = sizeof(u32) - extra;
 866                                if (len == length) {
 867                                        last = data;
 868                                        break;
 869                                }
 870                                __raw_writel(data, piobuf);
 871                                piobuf++;
 872                                extra = 0;
 873                                data = 0;
 874                        } else {
 875                                /* Clear unused upper bytes */
 876                                data |= clear_upper_bytes(v, len, extra);
 877                                if (len == length) {
 878                                        last = data;
 879                                        break;
 880                                }
 881                                extra += len;
 882                        }
 883                } else if (extra) {
 884                        /* Source address is aligned. */
 885                        u32 *addr = (u32 *) ss->sge.vaddr;
 886                        int shift = extra * BITS_PER_BYTE;
 887                        int ushift = 32 - shift;
 888                        u32 l = len;
 889
 890                        while (l >= sizeof(u32)) {
 891                                u32 v = *addr;
 892
 893                                data |= set_upper_bits(v, shift);
 894                                __raw_writel(data, piobuf);
 895                                data = get_upper_bits(v, ushift);
 896                                piobuf++;
 897                                addr++;
 898                                l -= sizeof(u32);
 899                        }
 900                        /*
 901                         * We still have 'extra' number of bytes leftover.
 902                         */
 903                        if (l) {
 904                                u32 v = *addr;
 905
 906                                if (l + extra >= sizeof(u32)) {
 907                                        data |= set_upper_bits(v, shift);
 908                                        len -= l + extra - sizeof(u32);
 909                                        if (len == length) {
 910                                                last = data;
 911                                                break;
 912                                        }
 913                                        __raw_writel(data, piobuf);
 914                                        piobuf++;
 915                                        extra = 0;
 916                                        data = 0;
 917                                } else {
 918                                        /* Clear unused upper bytes */
 919                                        data |= clear_upper_bytes(v, l,
 920                                                                  extra);
 921                                        if (len == length) {
 922                                                last = data;
 923                                                break;
 924                                        }
 925                                        extra += l;
 926                                }
 927                        } else if (len == length) {
 928                                last = data;
 929                                break;
 930                        }
 931                } else if (len == length) {
 932                        u32 w;
 933
 934                        /*
 935                         * Need to round up for the last dword in the
 936                         * packet.
 937                         */
 938                        w = (len + 3) >> 2;
 939                        __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
 940                        piobuf += w - 1;
 941                        last = ((u32 *) ss->sge.vaddr)[w - 1];
 942                        break;
 943                } else {
 944                        u32 w = len >> 2;
 945
 946                        __iowrite32_copy(piobuf, ss->sge.vaddr, w);
 947                        piobuf += w;
 948
 949                        extra = len & (sizeof(u32) - 1);
 950                        if (extra) {
 951                                u32 v = ((u32 *) ss->sge.vaddr)[w];
 952
 953                                /* Clear unused upper bytes */
 954                                data = clear_upper_bytes(v, extra, 0);
 955                        }
 956                }
 957                update_sge(ss, len);
 958                length -= len;
 959        }
 960        /* Update address before sending packet. */
 961        update_sge(ss, length);
 962        if (flush_wc) {
 963                /* must flush early everything before trigger word */
 964                ipath_flush_wc();
 965                __raw_writel(last, piobuf);
 966                /* be sure trigger word is written */
 967                ipath_flush_wc();
 968        } else
 969                __raw_writel(last, piobuf);
 970}
 971
 972/*
 973 * Convert IB rate to delay multiplier.
 974 */
 975unsigned ipath_ib_rate_to_mult(enum ib_rate rate)
 976{
 977        switch (rate) {
 978        case IB_RATE_2_5_GBPS: return 8;
 979        case IB_RATE_5_GBPS:   return 4;
 980        case IB_RATE_10_GBPS:  return 2;
 981        case IB_RATE_20_GBPS:  return 1;
 982        default:               return 0;
 983        }
 984}
 985
 986/*
 987 * Convert delay multiplier to IB rate
 988 */
 989static enum ib_rate ipath_mult_to_ib_rate(unsigned mult)
 990{
 991        switch (mult) {
 992        case 8:  return IB_RATE_2_5_GBPS;
 993        case 4:  return IB_RATE_5_GBPS;
 994        case 2:  return IB_RATE_10_GBPS;
 995        case 1:  return IB_RATE_20_GBPS;
 996        default: return IB_RATE_PORT_CURRENT;
 997        }
 998}
 999
1000static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)
1001{
1002        struct ipath_verbs_txreq *tx = NULL;
1003        unsigned long flags;
1004
1005        spin_lock_irqsave(&dev->pending_lock, flags);
1006        if (!list_empty(&dev->txreq_free)) {
1007                struct list_head *l = dev->txreq_free.next;
1008
1009                list_del(l);
1010                tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);
1011        }
1012        spin_unlock_irqrestore(&dev->pending_lock, flags);
1013        return tx;
1014}
1015
1016static inline void put_txreq(struct ipath_ibdev *dev,
1017                             struct ipath_verbs_txreq *tx)
1018{
1019        unsigned long flags;
1020
1021        spin_lock_irqsave(&dev->pending_lock, flags);
1022        list_add(&tx->txreq.list, &dev->txreq_free);
1023        spin_unlock_irqrestore(&dev->pending_lock, flags);
1024}
1025
1026static void sdma_complete(void *cookie, int status)
1027{
1028        struct ipath_verbs_txreq *tx = cookie;
1029        struct ipath_qp *qp = tx->qp;
1030        struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1031        unsigned long flags;
1032        enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
1033                IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
1034
1035        if (atomic_dec_and_test(&qp->s_dma_busy)) {
1036                spin_lock_irqsave(&qp->s_lock, flags);
1037                if (tx->wqe)
1038                        ipath_send_complete(qp, tx->wqe, ibs);
1039                if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1040                     qp->s_last != qp->s_head) ||
1041                    (qp->s_flags & IPATH_S_WAIT_DMA))
1042                        ipath_schedule_send(qp);
1043                spin_unlock_irqrestore(&qp->s_lock, flags);
1044                wake_up(&qp->wait_dma);
1045        } else if (tx->wqe) {
1046                spin_lock_irqsave(&qp->s_lock, flags);
1047                ipath_send_complete(qp, tx->wqe, ibs);
1048                spin_unlock_irqrestore(&qp->s_lock, flags);
1049        }
1050
1051        if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
1052                kfree(tx->txreq.map_addr);
1053        put_txreq(dev, tx);
1054
1055        if (atomic_dec_and_test(&qp->refcount))
1056                wake_up(&qp->wait);
1057}
1058
1059static void decrement_dma_busy(struct ipath_qp *qp)
1060{
1061        unsigned long flags;
1062
1063        if (atomic_dec_and_test(&qp->s_dma_busy)) {
1064                spin_lock_irqsave(&qp->s_lock, flags);
1065                if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1066                     qp->s_last != qp->s_head) ||
1067                    (qp->s_flags & IPATH_S_WAIT_DMA))
1068                        ipath_schedule_send(qp);
1069                spin_unlock_irqrestore(&qp->s_lock, flags);
1070                wake_up(&qp->wait_dma);
1071        }
1072}
1073
1074/*
1075 * Compute the number of clock cycles of delay before sending the next packet.
1076 * The multipliers reflect the number of clocks for the fastest rate so
1077 * one tick at 4xDDR is 8 ticks at 1xSDR.
1078 * If the destination port will take longer to receive a packet than
1079 * the outgoing link can send it, we need to delay sending the next packet
1080 * by the difference in time it takes the receiver to receive and the sender
1081 * to send this packet.
1082 * Note that this delay is always correct for UC and RC but not always
1083 * optimal for UD. For UD, the destination HCA can be different for each
1084 * packet, in which case, we could send packets to a different destination
1085 * while "waiting" for the delay. The overhead for doing this without
1086 * HW support is more than just paying the cost of delaying some packets
1087 * unnecessarily.
1088 */
1089static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)
1090{
1091        return (rcv_mult > snd_mult) ?
1092                (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
1093}
1094
1095static int ipath_verbs_send_dma(struct ipath_qp *qp,
1096                                struct ipath_ib_header *hdr, u32 hdrwords,
1097                                struct ipath_sge_state *ss, u32 len,
1098                                u32 plen, u32 dwords)
1099{
1100        struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1101        struct ipath_devdata *dd = dev->dd;
1102        struct ipath_verbs_txreq *tx;
1103        u32 *piobuf;
1104        u32 control;
1105        u32 ndesc;
1106        int ret;
1107
1108        tx = qp->s_tx;
1109        if (tx) {
1110                qp->s_tx = NULL;
1111                /* resend previously constructed packet */
1112                atomic_inc(&qp->s_dma_busy);
1113                ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
1114                if (ret) {
1115                        qp->s_tx = tx;
1116                        decrement_dma_busy(qp);
1117                }
1118                goto bail;
1119        }
1120
1121        tx = get_txreq(dev);
1122        if (!tx) {
1123                ret = -EBUSY;
1124                goto bail;
1125        }
1126
1127        /*
1128         * Get the saved delay count we computed for the previous packet
1129         * and save the delay count for this packet to be used next time
1130         * we get here.
1131         */
1132        control = qp->s_pkt_delay;
1133        qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
1134
1135        tx->qp = qp;
1136        atomic_inc(&qp->refcount);
1137        tx->wqe = qp->s_wqe;
1138        tx->txreq.callback = sdma_complete;
1139        tx->txreq.callback_cookie = tx;
1140        tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |
1141                IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC;
1142        if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
1143                tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF;
1144
1145        /* VL15 packets bypass credit check */
1146        if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {
1147                control |= 1ULL << 31;
1148                tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;
1149        }
1150
1151        if (len) {
1152                /*
1153                 * Don't try to DMA if it takes more descriptors than
1154                 * the queue holds.
1155                 */
1156                ndesc = ipath_count_sge(ss, len);
1157                if (ndesc >= dd->ipath_sdma_descq_cnt)
1158                        ndesc = 0;
1159        } else
1160                ndesc = 1;
1161        if (ndesc) {
1162                tx->hdr.pbc[0] = cpu_to_le32(plen);
1163                tx->hdr.pbc[1] = cpu_to_le32(control);
1164                memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);
1165                tx->txreq.sg_count = ndesc;
1166                tx->map_len = (hdrwords + 2) << 2;
1167                tx->txreq.map_addr = &tx->hdr;
1168                atomic_inc(&qp->s_dma_busy);
1169                ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
1170                if (ret) {
1171                        /* save ss and length in dwords */
1172                        tx->ss = ss;
1173                        tx->len = dwords;
1174                        qp->s_tx = tx;
1175                        decrement_dma_busy(qp);
1176                }
1177                goto bail;
1178        }
1179
1180        /* Allocate a buffer and copy the header and payload to it. */
1181        tx->map_len = (plen + 1) << 2;
1182        piobuf = kmalloc(tx->map_len, GFP_ATOMIC);
1183        if (unlikely(piobuf == NULL)) {
1184                ret = -EBUSY;
1185                goto err_tx;
1186        }
1187        tx->txreq.map_addr = piobuf;
1188        tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;
1189        tx->txreq.sg_count = 1;
1190
1191        *piobuf++ = (__force u32) cpu_to_le32(plen);
1192        *piobuf++ = (__force u32) cpu_to_le32(control);
1193        memcpy(piobuf, hdr, hdrwords << 2);
1194        ipath_copy_from_sge(piobuf + hdrwords, ss, len);
1195
1196        atomic_inc(&qp->s_dma_busy);
1197        ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
1198        /*
1199         * If we couldn't queue the DMA request, save the info
1200         * and try again later rather than destroying the
1201         * buffer and undoing the side effects of the copy.
1202         */
1203        if (ret) {
1204                tx->ss = NULL;
1205                tx->len = 0;
1206                qp->s_tx = tx;
1207                decrement_dma_busy(qp);
1208        }
1209        dev->n_unaligned++;
1210        goto bail;
1211
1212err_tx:
1213        if (atomic_dec_and_test(&qp->refcount))
1214                wake_up(&qp->wait);
1215        put_txreq(dev, tx);
1216bail:
1217        return ret;
1218}
1219
1220static int ipath_verbs_send_pio(struct ipath_qp *qp,
1221                                struct ipath_ib_header *ibhdr, u32 hdrwords,
1222                                struct ipath_sge_state *ss, u32 len,
1223                                u32 plen, u32 dwords)
1224{
1225        struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
1226        u32 *hdr = (u32 *) ibhdr;
1227        u32 __iomem *piobuf;
1228        unsigned flush_wc;
1229        u32 control;
1230        int ret;
1231        unsigned long flags;
1232
1233        piobuf = ipath_getpiobuf(dd, plen, NULL);
1234        if (unlikely(piobuf == NULL)) {
1235                ret = -EBUSY;
1236                goto bail;
1237        }
1238
1239        /*
1240         * Get the saved delay count we computed for the previous packet
1241         * and save the delay count for this packet to be used next time
1242         * we get here.
1243         */
1244        control = qp->s_pkt_delay;
1245        qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
1246
1247        /* VL15 packets bypass credit check */
1248        if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)
1249                control |= 1ULL << 31;
1250
1251        /*
1252         * Write the length to the control qword plus any needed flags.
1253         * We have to flush after the PBC for correctness on some cpus
1254         * or WC buffer can be written out of order.
1255         */
1256        writeq(((u64) control << 32) | plen, piobuf);
1257        piobuf += 2;
1258
1259        flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
1260        if (len == 0) {
1261                /*
1262                 * If there is just the header portion, must flush before
1263                 * writing last word of header for correctness, and after
1264                 * the last header word (trigger word).
1265                 */
1266                if (flush_wc) {
1267                        ipath_flush_wc();
1268                        __iowrite32_copy(piobuf, hdr, hdrwords - 1);
1269                        ipath_flush_wc();
1270                        __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
1271                        ipath_flush_wc();
1272                } else
1273                        __iowrite32_copy(piobuf, hdr, hdrwords);
1274                goto done;
1275        }
1276
1277        if (flush_wc)
1278                ipath_flush_wc();
1279        __iowrite32_copy(piobuf, hdr, hdrwords);
1280        piobuf += hdrwords;
1281
1282        /* The common case is aligned and contained in one segment. */
1283        if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
1284                   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
1285                u32 *addr = (u32 *) ss->sge.vaddr;
1286
1287                /* Update address before sending packet. */
1288                update_sge(ss, len);
1289                if (flush_wc) {
1290                        __iowrite32_copy(piobuf, addr, dwords - 1);
1291                        /* must flush early everything before trigger word */
1292                        ipath_flush_wc();
1293                        __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
1294                        /* be sure trigger word is written */
1295                        ipath_flush_wc();
1296                } else
1297                        __iowrite32_copy(piobuf, addr, dwords);
1298                goto done;
1299        }
1300        copy_io(piobuf, ss, len, flush_wc);
1301done:
1302        if (qp->s_wqe) {
1303                spin_lock_irqsave(&qp->s_lock, flags);
1304                ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
1305                spin_unlock_irqrestore(&qp->s_lock, flags);
1306        }
1307        ret = 0;
1308bail:
1309        return ret;
1310}
1311
1312/**
1313 * ipath_verbs_send - send a packet
1314 * @qp: the QP to send on
1315 * @hdr: the packet header
1316 * @hdrwords: the number of 32-bit words in the header
1317 * @ss: the SGE to send
1318 * @len: the length of the packet in bytes
1319 */
1320int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
1321                     u32 hdrwords, struct ipath_sge_state *ss, u32 len)
1322{
1323        struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
1324        u32 plen;
1325        int ret;
1326        u32 dwords = (len + 3) >> 2;
1327
1328        /*
1329         * Calculate the send buffer trigger address.
1330         * The +1 counts for the pbc control dword following the pbc length.
1331         */
1332        plen = hdrwords + dwords + 1;
1333
1334        /*
1335         * VL15 packets (IB_QPT_SMI) will always use PIO, so we
1336         * can defer SDMA restart until link goes ACTIVE without
1337         * worrying about just how we got there.
1338         */
1339        if (qp->ibqp.qp_type == IB_QPT_SMI ||
1340            !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
1341                ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
1342                                           plen, dwords);
1343        else
1344                ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
1345                                           plen, dwords);
1346
1347        return ret;
1348}
1349
1350int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
1351                            u64 *rwords, u64 *spkts, u64 *rpkts,
1352                            u64 *xmit_wait)
1353{
1354        int ret;
1355
1356        if (!(dd->ipath_flags & IPATH_INITTED)) {
1357                /* no hardware, freeze, etc. */
1358                ret = -EINVAL;
1359                goto bail;
1360        }
1361        *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
1362        *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
1363        *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
1364        *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
1365        *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
1366
1367        ret = 0;
1368
1369bail:
1370        return ret;
1371}
1372
1373/**
1374 * ipath_get_counters - get various chip counters
1375 * @dd: the infinipath device
1376 * @cntrs: counters are placed here
1377 *
1378 * Return the counters needed by recv_pma_get_portcounters().
1379 */
1380int ipath_get_counters(struct ipath_devdata *dd,
1381                       struct ipath_verbs_counters *cntrs)
1382{
1383        struct ipath_cregs const *crp = dd->ipath_cregs;
1384        int ret;
1385
1386        if (!(dd->ipath_flags & IPATH_INITTED)) {
1387                /* no hardware, freeze, etc. */
1388                ret = -EINVAL;
1389                goto bail;
1390        }
1391        cntrs->symbol_error_counter =
1392                ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt);
1393        cntrs->link_error_recovery_counter =
1394                ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt);
1395        /*
1396         * The link downed counter counts when the other side downs the
1397         * connection.  We add in the number of times we downed the link
1398         * due to local link integrity errors to compensate.
1399         */
1400        cntrs->link_downed_counter =
1401                ipath_snap_cntr(dd, crp->cr_iblinkdowncnt);
1402        cntrs->port_rcv_errors =
1403                ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +
1404                ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +
1405                ipath_snap_cntr(dd, crp->cr_portovflcnt) +
1406                ipath_snap_cntr(dd, crp->cr_err_rlencnt) +
1407                ipath_snap_cntr(dd, crp->cr_invalidrlencnt) +
1408                ipath_snap_cntr(dd, crp->cr_errlinkcnt) +
1409                ipath_snap_cntr(dd, crp->cr_erricrccnt) +
1410                ipath_snap_cntr(dd, crp->cr_errvcrccnt) +
1411                ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
1412                ipath_snap_cntr(dd, crp->cr_badformatcnt) +
1413                dd->ipath_rxfc_unsupvl_errs;
1414        if (crp->cr_rxotherlocalphyerrcnt)
1415                cntrs->port_rcv_errors +=
1416                        ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt);
1417        if (crp->cr_rxvlerrcnt)
1418                cntrs->port_rcv_errors +=
1419                        ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);
1420        cntrs->port_rcv_remphys_errors =
1421                ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
1422        cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
1423        cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);
1424        cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);
1425        cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
1426        cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
1427        cntrs->local_link_integrity_errors =
1428                crp->cr_locallinkintegrityerrcnt ?
1429                ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) :
1430                ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
1431                 dd->ipath_lli_errs : dd->ipath_lli_errors);
1432        cntrs->excessive_buffer_overrun_errors =
1433                crp->cr_excessbufferovflcnt ?
1434                ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) :
1435                dd->ipath_overrun_thresh_errs;
1436        cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?
1437                ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;
1438
1439        ret = 0;
1440
1441bail:
1442        return ret;
1443}
1444
1445/**
1446 * ipath_ib_piobufavail - callback when a PIO buffer is available
1447 * @arg: the device pointer
1448 *
1449 * This is called from ipath_intr() at interrupt level when a PIO buffer is
1450 * available after ipath_verbs_send() returned an error that no buffers were
1451 * available.  Return 1 if we consumed all the PIO buffers and we still have
1452 * QPs waiting for buffers (for now, just restart the send tasklet and
1453 * return zero).
1454 */
1455int ipath_ib_piobufavail(struct ipath_ibdev *dev)
1456{
1457        struct list_head *list;
1458        struct ipath_qp *qplist;
1459        struct ipath_qp *qp;
1460        unsigned long flags;
1461
1462        if (dev == NULL)
1463                goto bail;
1464
1465        list = &dev->piowait;
1466        qplist = NULL;
1467
1468        spin_lock_irqsave(&dev->pending_lock, flags);
1469        while (!list_empty(list)) {
1470                qp = list_entry(list->next, struct ipath_qp, piowait);
1471                list_del_init(&qp->piowait);
1472                qp->pio_next = qplist;
1473                qplist = qp;
1474                atomic_inc(&qp->refcount);
1475        }
1476        spin_unlock_irqrestore(&dev->pending_lock, flags);
1477
1478        while (qplist != NULL) {
1479                qp = qplist;
1480                qplist = qp->pio_next;
1481
1482                spin_lock_irqsave(&qp->s_lock, flags);
1483                if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
1484                        ipath_schedule_send(qp);
1485                spin_unlock_irqrestore(&qp->s_lock, flags);
1486
1487                /* Notify ipath_destroy_qp() if it is waiting. */
1488                if (atomic_dec_and_test(&qp->refcount))
1489                        wake_up(&qp->wait);
1490        }
1491
1492bail:
1493        return 0;
1494}
1495
1496static int ipath_query_device(struct ib_device *ibdev,
1497                              struct ib_device_attr *props)
1498{
1499        struct ipath_ibdev *dev = to_idev(ibdev);
1500
1501        memset(props, 0, sizeof(*props));
1502
1503        props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
1504                IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
1505                IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
1506                IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
1507        props->page_size_cap = PAGE_SIZE;
1508        props->vendor_id =
1509                IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;
1510        props->vendor_part_id = dev->dd->ipath_deviceid;
1511        props->hw_ver = dev->dd->ipath_pcirev;
1512
1513        props->sys_image_guid = dev->sys_image_guid;
1514
1515        props->max_mr_size = ~0ull;
1516        props->max_qp = ib_ipath_max_qps;
1517        props->max_qp_wr = ib_ipath_max_qp_wrs;
1518        props->max_sge = ib_ipath_max_sges;
1519        props->max_cq = ib_ipath_max_cqs;
1520        props->max_ah = ib_ipath_max_ahs;
1521        props->max_cqe = ib_ipath_max_cqes;
1522        props->max_mr = dev->lk_table.max;
1523        props->max_fmr = dev->lk_table.max;
1524        props->max_map_per_fmr = 32767;
1525        props->max_pd = ib_ipath_max_pds;
1526        props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
1527        props->max_qp_init_rd_atom = 255;
1528        /* props->max_res_rd_atom */
1529        props->max_srq = ib_ipath_max_srqs;
1530        props->max_srq_wr = ib_ipath_max_srq_wrs;
1531        props->max_srq_sge = ib_ipath_max_srq_sges;
1532        /* props->local_ca_ack_delay */
1533        props->atomic_cap = IB_ATOMIC_GLOB;
1534        props->max_pkeys = ipath_get_npkeys(dev->dd);
1535        props->max_mcast_grp = ib_ipath_max_mcast_grps;
1536        props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
1537        props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
1538                props->max_mcast_grp;
1539
1540        return 0;
1541}
1542
1543const u8 ipath_cvt_physportstate[32] = {
1544        [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
1545        [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
1546        [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
1547        [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
1548        [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
1549        [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
1550        [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] =
1551                IB_PHYSPORTSTATE_CFG_TRAIN,
1552        [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] =
1553                IB_PHYSPORTSTATE_CFG_TRAIN,
1554        [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] =
1555                IB_PHYSPORTSTATE_CFG_TRAIN,
1556        [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,
1557        [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] =
1558                IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1559        [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] =
1560                IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1561        [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] =
1562                IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1563        [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
1564        [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
1565        [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
1566        [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
1567        [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
1568        [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
1569        [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
1570        [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
1571};
1572
1573u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
1574{
1575        return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
1576}
1577
1578static int ipath_query_port(struct ib_device *ibdev,
1579                            u8 port, struct ib_port_attr *props)
1580{
1581        struct ipath_ibdev *dev = to_idev(ibdev);
1582        struct ipath_devdata *dd = dev->dd;
1583        enum ib_mtu mtu;
1584        u16 lid = dd->ipath_lid;
1585        u64 ibcstat;
1586
1587        memset(props, 0, sizeof(*props));
1588        props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
1589        props->lmc = dd->ipath_lmc;
1590        props->sm_lid = dev->sm_lid;
1591        props->sm_sl = dev->sm_sl;
1592        ibcstat = dd->ipath_lastibcstat;
1593        /* map LinkState to IB portinfo values.  */
1594        props->state = ipath_ib_linkstate(dd, ibcstat) + 1;
1595
1596        /* See phys_state_show() */
1597        props->phys_state = /* MEA: assumes shift == 0 */
1598                ipath_cvt_physportstate[dd->ipath_lastibcstat &
1599                dd->ibcs_lts_mask];
1600        props->port_cap_flags = dev->port_cap_flags;
1601        props->gid_tbl_len = 1;
1602        props->max_msg_sz = 0x80000000;
1603        props->pkey_tbl_len = ipath_get_npkeys(dd);
1604        props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
1605                dev->z_pkey_violations;
1606        props->qkey_viol_cntr = dev->qkey_violations;
1607        props->active_width = dd->ipath_link_width_active;
1608        /* See rate_show() */
1609        props->active_speed = dd->ipath_link_speed_active;
1610        props->max_vl_num = 1;          /* VLCap = VL0 */
1611        props->init_type_reply = 0;
1612
1613        props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
1614        switch (dd->ipath_ibmtu) {
1615        case 4096:
1616                mtu = IB_MTU_4096;
1617                break;
1618        case 2048:
1619                mtu = IB_MTU_2048;
1620                break;
1621        case 1024:
1622                mtu = IB_MTU_1024;
1623                break;
1624        case 512:
1625                mtu = IB_MTU_512;
1626                break;
1627        case 256:
1628                mtu = IB_MTU_256;
1629                break;
1630        default:
1631                mtu = IB_MTU_2048;
1632        }
1633        props->active_mtu = mtu;
1634        props->subnet_timeout = dev->subnet_timeout;
1635
1636        return 0;
1637}
1638
1639static int ipath_modify_device(struct ib_device *device,
1640                               int device_modify_mask,
1641                               struct ib_device_modify *device_modify)
1642{
1643        int ret;
1644
1645        if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
1646                                   IB_DEVICE_MODIFY_NODE_DESC)) {
1647                ret = -EOPNOTSUPP;
1648                goto bail;
1649        }
1650
1651        if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
1652                memcpy(device->node_desc, device_modify->node_desc, 64);
1653
1654        if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
1655                to_idev(device)->sys_image_guid =
1656                        cpu_to_be64(device_modify->sys_image_guid);
1657
1658        ret = 0;
1659
1660bail:
1661        return ret;
1662}
1663
1664static int ipath_modify_port(struct ib_device *ibdev,
1665                             u8 port, int port_modify_mask,
1666                             struct ib_port_modify *props)
1667{
1668        struct ipath_ibdev *dev = to_idev(ibdev);
1669
1670        dev->port_cap_flags |= props->set_port_cap_mask;
1671        dev->port_cap_flags &= ~props->clr_port_cap_mask;
1672        if (port_modify_mask & IB_PORT_SHUTDOWN)
1673                ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
1674        if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
1675                dev->qkey_violations = 0;
1676        return 0;
1677}
1678
1679static int ipath_query_gid(struct ib_device *ibdev, u8 port,
1680                           int index, union ib_gid *gid)
1681{
1682        struct ipath_ibdev *dev = to_idev(ibdev);
1683        int ret;
1684
1685        if (index >= 1) {
1686                ret = -EINVAL;
1687                goto bail;
1688        }
1689        gid->global.subnet_prefix = dev->gid_prefix;
1690        gid->global.interface_id = dev->dd->ipath_guid;
1691
1692        ret = 0;
1693
1694bail:
1695        return ret;
1696}
1697
1698static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
1699                                    struct ib_ucontext *context,
1700                                    struct ib_udata *udata)
1701{
1702        struct ipath_ibdev *dev = to_idev(ibdev);
1703        struct ipath_pd *pd;
1704        struct ib_pd *ret;
1705
1706        /*
1707         * This is actually totally arbitrary.  Some correctness tests
1708         * assume there's a maximum number of PDs that can be allocated.
1709         * We don't actually have this limit, but we fail the test if
1710         * we allow allocations of more than we report for this value.
1711         */
1712
1713        pd = kmalloc(sizeof *pd, GFP_KERNEL);
1714        if (!pd) {
1715                ret = ERR_PTR(-ENOMEM);
1716                goto bail;
1717        }
1718
1719        spin_lock(&dev->n_pds_lock);
1720        if (dev->n_pds_allocated == ib_ipath_max_pds) {
1721                spin_unlock(&dev->n_pds_lock);
1722                kfree(pd);
1723                ret = ERR_PTR(-ENOMEM);
1724                goto bail;
1725        }
1726
1727        dev->n_pds_allocated++;
1728        spin_unlock(&dev->n_pds_lock);
1729
1730        /* ib_alloc_pd() will initialize pd->ibpd. */
1731        pd->user = udata != NULL;
1732
1733        ret = &pd->ibpd;
1734
1735bail:
1736        return ret;
1737}
1738
1739static int ipath_dealloc_pd(struct ib_pd *ibpd)
1740{
1741        struct ipath_pd *pd = to_ipd(ibpd);
1742        struct ipath_ibdev *dev = to_idev(ibpd->device);
1743
1744        spin_lock(&dev->n_pds_lock);
1745        dev->n_pds_allocated--;
1746        spin_unlock(&dev->n_pds_lock);
1747
1748        kfree(pd);
1749
1750        return 0;
1751}
1752
1753/**
1754 * ipath_create_ah - create an address handle
1755 * @pd: the protection domain
1756 * @ah_attr: the attributes of the AH
1757 *
1758 * This may be called from interrupt context.
1759 */
1760static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
1761                                     struct ib_ah_attr *ah_attr)
1762{
1763        struct ipath_ah *ah;
1764        struct ib_ah *ret;
1765        struct ipath_ibdev *dev = to_idev(pd->device);
1766        unsigned long flags;
1767
1768        /* A multicast address requires a GRH (see ch. 8.4.1). */
1769        if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
1770            ah_attr->dlid != IPATH_PERMISSIVE_LID &&
1771            !(ah_attr->ah_flags & IB_AH_GRH)) {
1772                ret = ERR_PTR(-EINVAL);
1773                goto bail;
1774        }
1775
1776        if (ah_attr->dlid == 0) {
1777                ret = ERR_PTR(-EINVAL);
1778                goto bail;
1779        }
1780
1781        if (ah_attr->port_num < 1 ||
1782            ah_attr->port_num > pd->device->phys_port_cnt) {
1783                ret = ERR_PTR(-EINVAL);
1784                goto bail;
1785        }
1786
1787        ah = kmalloc(sizeof *ah, GFP_ATOMIC);
1788        if (!ah) {
1789                ret = ERR_PTR(-ENOMEM);
1790                goto bail;
1791        }
1792
1793        spin_lock_irqsave(&dev->n_ahs_lock, flags);
1794        if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
1795                spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1796                kfree(ah);
1797                ret = ERR_PTR(-ENOMEM);
1798                goto bail;
1799        }
1800
1801        dev->n_ahs_allocated++;
1802        spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1803
1804        /* ib_create_ah() will initialize ah->ibah. */
1805        ah->attr = *ah_attr;
1806        ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);
1807
1808        ret = &ah->ibah;
1809
1810bail:
1811        return ret;
1812}
1813
1814/**
1815 * ipath_destroy_ah - destroy an address handle
1816 * @ibah: the AH to destroy
1817 *
1818 * This may be called from interrupt context.
1819 */
1820static int ipath_destroy_ah(struct ib_ah *ibah)
1821{
1822        struct ipath_ibdev *dev = to_idev(ibah->device);
1823        struct ipath_ah *ah = to_iah(ibah);
1824        unsigned long flags;
1825
1826        spin_lock_irqsave(&dev->n_ahs_lock, flags);
1827        dev->n_ahs_allocated--;
1828        spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1829
1830        kfree(ah);
1831
1832        return 0;
1833}
1834
1835static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
1836{
1837        struct ipath_ah *ah = to_iah(ibah);
1838
1839        *ah_attr = ah->attr;
1840        ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);
1841
1842        return 0;
1843}
1844
1845/**
1846 * ipath_get_npkeys - return the size of the PKEY table for port 0
1847 * @dd: the infinipath device
1848 */
1849unsigned ipath_get_npkeys(struct ipath_devdata *dd)
1850{
1851        return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
1852}
1853
1854/**
1855 * ipath_get_pkey - return the indexed PKEY from the port PKEY table
1856 * @dd: the infinipath device
1857 * @index: the PKEY index
1858 */
1859unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
1860{
1861        unsigned ret;
1862
1863        /* always a kernel port, no locking needed */
1864        if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
1865                ret = 0;
1866        else
1867                ret = dd->ipath_pd[0]->port_pkeys[index];
1868
1869        return ret;
1870}
1871
1872static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
1873                            u16 *pkey)
1874{
1875        struct ipath_ibdev *dev = to_idev(ibdev);
1876        int ret;
1877
1878        if (index >= ipath_get_npkeys(dev->dd)) {
1879                ret = -EINVAL;
1880                goto bail;
1881        }
1882
1883        *pkey = ipath_get_pkey(dev->dd, index);
1884        ret = 0;
1885
1886bail:
1887        return ret;
1888}
1889
1890/**
1891 * ipath_alloc_ucontext - allocate a ucontest
1892 * @ibdev: the infiniband device
1893 * @udata: not used by the InfiniPath driver
1894 */
1895
1896static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
1897                                                struct ib_udata *udata)
1898{
1899        struct ipath_ucontext *context;
1900        struct ib_ucontext *ret;
1901
1902        context = kmalloc(sizeof *context, GFP_KERNEL);
1903        if (!context) {
1904                ret = ERR_PTR(-ENOMEM);
1905                goto bail;
1906        }
1907
1908        ret = &context->ibucontext;
1909
1910bail:
1911        return ret;
1912}
1913
1914static int ipath_dealloc_ucontext(struct ib_ucontext *context)
1915{
1916        kfree(to_iucontext(context));
1917        return 0;
1918}
1919
1920static int ipath_verbs_register_sysfs(struct ib_device *dev);
1921
1922static void __verbs_timer(unsigned long arg)
1923{
1924        struct ipath_devdata *dd = (struct ipath_devdata *) arg;
1925
1926        /* Handle verbs layer timeouts. */
1927        ipath_ib_timer(dd->verbs_dev);
1928
1929        mod_timer(&dd->verbs_timer, jiffies + 1);
1930}
1931
1932static int enable_timer(struct ipath_devdata *dd)
1933{
1934        /*
1935         * Early chips had a design flaw where the chip and kernel idea
1936         * of the tail register don't always agree, and therefore we won't
1937         * get an interrupt on the next packet received.
1938         * If the board supports per packet receive interrupts, use it.
1939         * Otherwise, the timer function periodically checks for packets
1940         * to cover this case.
1941         * Either way, the timer is needed for verbs layer related
1942         * processing.
1943         */
1944        if (dd->ipath_flags & IPATH_GPIO_INTR) {
1945                ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
1946                                 0x2074076542310ULL);
1947                /* Enable GPIO bit 2 interrupt */
1948                dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
1949                ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1950                                 dd->ipath_gpio_mask);
1951        }
1952
1953        init_timer(&dd->verbs_timer);
1954        dd->verbs_timer.function = __verbs_timer;
1955        dd->verbs_timer.data = (unsigned long)dd;
1956        dd->verbs_timer.expires = jiffies + 1;
1957        add_timer(&dd->verbs_timer);
1958
1959        return 0;
1960}
1961
1962static int disable_timer(struct ipath_devdata *dd)
1963{
1964        /* Disable GPIO bit 2 interrupt */
1965        if (dd->ipath_flags & IPATH_GPIO_INTR) {
1966                /* Disable GPIO bit 2 interrupt */
1967                dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
1968                ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1969                                 dd->ipath_gpio_mask);
1970                /*
1971                 * We might want to undo changes to debugportselect,
1972                 * but how?
1973                 */
1974        }
1975
1976        del_timer_sync(&dd->verbs_timer);
1977
1978        return 0;
1979}
1980
1981/**
1982 * ipath_register_ib_device - register our device with the infiniband core
1983 * @dd: the device data structure
1984 * Return the allocated ipath_ibdev pointer or NULL on error.
1985 */
1986int ipath_register_ib_device(struct ipath_devdata *dd)
1987{
1988        struct ipath_verbs_counters cntrs;
1989        struct ipath_ibdev *idev;
1990        struct ib_device *dev;
1991        struct ipath_verbs_txreq *tx;
1992        unsigned i;
1993        int ret;
1994
1995        idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
1996        if (idev == NULL) {
1997                ret = -ENOMEM;
1998                goto bail;
1999        }
2000
2001        dev = &idev->ibdev;
2002
2003        if (dd->ipath_sdma_descq_cnt) {
2004                tx = kmalloc(dd->ipath_sdma_descq_cnt * sizeof *tx,
2005                             GFP_KERNEL);
2006                if (tx == NULL) {
2007                        ret = -ENOMEM;
2008                        goto err_tx;
2009                }
2010        } else
2011                tx = NULL;
2012        idev->txreq_bufs = tx;
2013
2014        /* Only need to initialize non-zero fields. */
2015        spin_lock_init(&idev->n_pds_lock);
2016        spin_lock_init(&idev->n_ahs_lock);
2017        spin_lock_init(&idev->n_cqs_lock);
2018        spin_lock_init(&idev->n_qps_lock);
2019        spin_lock_init(&idev->n_srqs_lock);
2020        spin_lock_init(&idev->n_mcast_grps_lock);
2021
2022        spin_lock_init(&idev->qp_table.lock);
2023        spin_lock_init(&idev->lk_table.lock);
2024        idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE);
2025        /* Set the prefix to the default value (see ch. 4.1.1) */
2026        idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL);
2027
2028        ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
2029        if (ret)
2030                goto err_qp;
2031
2032        /*
2033         * The top ib_ipath_lkey_table_size bits are used to index the
2034         * table.  The lower 8 bits can be owned by the user (copied from
2035         * the LKEY).  The remaining bits act as a generation number or tag.
2036         */
2037        idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
2038        idev->lk_table.table = kzalloc(idev->lk_table.max *
2039                                       sizeof(*idev->lk_table.table),
2040                                       GFP_KERNEL);
2041        if (idev->lk_table.table == NULL) {
2042                ret = -ENOMEM;
2043                goto err_lk;
2044        }
2045        INIT_LIST_HEAD(&idev->pending_mmaps);
2046        spin_lock_init(&idev->pending_lock);
2047        idev->mmap_offset = PAGE_SIZE;
2048        spin_lock_init(&idev->mmap_offset_lock);
2049        INIT_LIST_HEAD(&idev->pending[0]);
2050        INIT_LIST_HEAD(&idev->pending[1]);
2051        INIT_LIST_HEAD(&idev->pending[2]);
2052        INIT_LIST_HEAD(&idev->piowait);
2053        INIT_LIST_HEAD(&idev->rnrwait);
2054        INIT_LIST_HEAD(&idev->txreq_free);
2055        idev->pending_index = 0;
2056        idev->port_cap_flags =
2057                IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
2058        if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY)
2059                idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
2060        idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
2061        idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
2062        idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
2063        idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
2064        idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
2065
2066        /* Snapshot current HW counters to "clear" them. */
2067        ipath_get_counters(dd, &cntrs);
2068        idev->z_symbol_error_counter = cntrs.symbol_error_counter;
2069        idev->z_link_error_recovery_counter =
2070                cntrs.link_error_recovery_counter;
2071        idev->z_link_downed_counter = cntrs.link_downed_counter;
2072        idev->z_port_rcv_errors = cntrs.port_rcv_errors;
2073        idev->z_port_rcv_remphys_errors =
2074                cntrs.port_rcv_remphys_errors;
2075        idev->z_port_xmit_discards = cntrs.port_xmit_discards;
2076        idev->z_port_xmit_data = cntrs.port_xmit_data;
2077        idev->z_port_rcv_data = cntrs.port_rcv_data;
2078        idev->z_port_xmit_packets = cntrs.port_xmit_packets;
2079        idev->z_port_rcv_packets = cntrs.port_rcv_packets;
2080        idev->z_local_link_integrity_errors =
2081                cntrs.local_link_integrity_errors;
2082        idev->z_excessive_buffer_overrun_errors =
2083                cntrs.excessive_buffer_overrun_errors;
2084        idev->z_vl15_dropped = cntrs.vl15_dropped;
2085
2086        for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)
2087                list_add(&tx->txreq.list, &idev->txreq_free);
2088
2089        /*
2090         * The system image GUID is supposed to be the same for all
2091         * IB HCAs in a single system but since there can be other
2092         * device types in the system, we can't be sure this is unique.
2093         */
2094        if (!sys_image_guid)
2095                sys_image_guid = dd->ipath_guid;
2096        idev->sys_image_guid = sys_image_guid;
2097        idev->ib_unit = dd->ipath_unit;
2098        idev->dd = dd;
2099
2100        strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
2101        dev->owner = THIS_MODULE;
2102        dev->node_guid = dd->ipath_guid;
2103        dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
2104        dev->uverbs_cmd_mask =
2105                (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
2106                (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
2107                (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
2108                (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
2109                (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
2110                (1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
2111                (1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
2112                (1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
2113                (1ull << IB_USER_VERBS_CMD_REG_MR)              |
2114                (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
2115                (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2116                (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
2117                (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
2118                (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
2119                (1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
2120                (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
2121                (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
2122                (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
2123                (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
2124                (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
2125                (1ull << IB_USER_VERBS_CMD_POST_SEND)           |
2126                (1ull << IB_USER_VERBS_CMD_POST_RECV)           |
2127                (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
2128                (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
2129                (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
2130                (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
2131                (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
2132                (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
2133                (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
2134        dev->node_type = RDMA_NODE_IB_CA;
2135        dev->phys_port_cnt = 1;
2136        dev->num_comp_vectors = 1;
2137        dev->dma_device = &dd->pcidev->dev;
2138        dev->query_device = ipath_query_device;
2139        dev->modify_device = ipath_modify_device;
2140        dev->query_port = ipath_query_port;
2141        dev->modify_port = ipath_modify_port;
2142        dev->query_pkey = ipath_query_pkey;
2143        dev->query_gid = ipath_query_gid;
2144        dev->alloc_ucontext = ipath_alloc_ucontext;
2145        dev->dealloc_ucontext = ipath_dealloc_ucontext;
2146        dev->alloc_pd = ipath_alloc_pd;
2147        dev->dealloc_pd = ipath_dealloc_pd;
2148        dev->create_ah = ipath_create_ah;
2149        dev->destroy_ah = ipath_destroy_ah;
2150        dev->query_ah = ipath_query_ah;
2151        dev->create_srq = ipath_create_srq;
2152        dev->modify_srq = ipath_modify_srq;
2153        dev->query_srq = ipath_query_srq;
2154        dev->destroy_srq = ipath_destroy_srq;
2155        dev->create_qp = ipath_create_qp;
2156        dev->modify_qp = ipath_modify_qp;
2157        dev->query_qp = ipath_query_qp;
2158        dev->destroy_qp = ipath_destroy_qp;
2159        dev->post_send = ipath_post_send;
2160        dev->post_recv = ipath_post_receive;
2161        dev->post_srq_recv = ipath_post_srq_receive;
2162        dev->create_cq = ipath_create_cq;
2163        dev->destroy_cq = ipath_destroy_cq;
2164        dev->resize_cq = ipath_resize_cq;
2165        dev->poll_cq = ipath_poll_cq;
2166        dev->req_notify_cq = ipath_req_notify_cq;
2167        dev->get_dma_mr = ipath_get_dma_mr;
2168        dev->reg_phys_mr = ipath_reg_phys_mr;
2169        dev->reg_user_mr = ipath_reg_user_mr;
2170        dev->dereg_mr = ipath_dereg_mr;
2171        dev->alloc_fmr = ipath_alloc_fmr;
2172        dev->map_phys_fmr = ipath_map_phys_fmr;
2173        dev->unmap_fmr = ipath_unmap_fmr;
2174        dev->dealloc_fmr = ipath_dealloc_fmr;
2175        dev->attach_mcast = ipath_multicast_attach;
2176        dev->detach_mcast = ipath_multicast_detach;
2177        dev->process_mad = ipath_process_mad;
2178        dev->mmap = ipath_mmap;
2179        dev->dma_ops = &ipath_dma_mapping_ops;
2180
2181        snprintf(dev->node_desc, sizeof(dev->node_desc),
2182                 IPATH_IDSTR " %s", init_utsname()->nodename);
2183
2184        ret = ib_register_device(dev);
2185        if (ret)
2186                goto err_reg;
2187
2188        if (ipath_verbs_register_sysfs(dev))
2189                goto err_class;
2190
2191        enable_timer(dd);
2192
2193        goto bail;
2194
2195err_class:
2196        ib_unregister_device(dev);
2197err_reg:
2198        kfree(idev->lk_table.table);
2199err_lk:
2200        kfree(idev->qp_table.table);
2201err_qp:
2202        kfree(idev->txreq_bufs);
2203err_tx:
2204        ib_dealloc_device(dev);
2205        ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
2206        idev = NULL;
2207
2208bail:
2209        dd->verbs_dev = idev;
2210        return ret;
2211}
2212
2213void ipath_unregister_ib_device(struct ipath_ibdev *dev)
2214{
2215        struct ib_device *ibdev = &dev->ibdev;
2216        u32 qps_inuse;
2217
2218        ib_unregister_device(ibdev);
2219
2220        disable_timer(dev->dd);
2221
2222        if (!list_empty(&dev->pending[0]) ||
2223            !list_empty(&dev->pending[1]) ||
2224            !list_empty(&dev->pending[2]))
2225                ipath_dev_err(dev->dd, "pending list not empty!\n");
2226        if (!list_empty(&dev->piowait))
2227                ipath_dev_err(dev->dd, "piowait list not empty!\n");
2228        if (!list_empty(&dev->rnrwait))
2229                ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
2230        if (!ipath_mcast_tree_empty())
2231                ipath_dev_err(dev->dd, "multicast table memory leak!\n");
2232        /*
2233         * Note that ipath_unregister_ib_device() can be called before all
2234         * the QPs are destroyed!
2235         */
2236        qps_inuse = ipath_free_all_qps(&dev->qp_table);
2237        if (qps_inuse)
2238                ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
2239                        qps_inuse);
2240        kfree(dev->qp_table.table);
2241        kfree(dev->lk_table.table);
2242        kfree(dev->txreq_bufs);
2243        ib_dealloc_device(ibdev);
2244}
2245
2246static ssize_t show_rev(struct device *device, struct device_attribute *attr,
2247                        char *buf)
2248{
2249        struct ipath_ibdev *dev =
2250                container_of(device, struct ipath_ibdev, ibdev.dev);
2251
2252        return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
2253}
2254
2255static ssize_t show_hca(struct device *device, struct device_attribute *attr,
2256                        char *buf)
2257{
2258        struct ipath_ibdev *dev =
2259                container_of(device, struct ipath_ibdev, ibdev.dev);
2260        int ret;
2261
2262        ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
2263        if (ret < 0)
2264                goto bail;
2265        strcat(buf, "\n");
2266        ret = strlen(buf);
2267
2268bail:
2269        return ret;
2270}
2271
2272static ssize_t show_stats(struct device *device, struct device_attribute *attr,
2273                          char *buf)
2274{
2275        struct ipath_ibdev *dev =
2276                container_of(device, struct ipath_ibdev, ibdev.dev);
2277        int i;
2278        int len;
2279
2280        len = sprintf(buf,
2281                      "RC resends  %d\n"
2282                      "RC no QACK  %d\n"
2283                      "RC ACKs     %d\n"
2284                      "RC SEQ NAKs %d\n"
2285                      "RC RDMA seq %d\n"
2286                      "RC RNR NAKs %d\n"
2287                      "RC OTH NAKs %d\n"
2288                      "RC timeouts %d\n"
2289                      "RC RDMA dup %d\n"
2290                      "piobuf wait %d\n"
2291                      "unaligned   %d\n"
2292                      "PKT drops   %d\n"
2293                      "WQE errs    %d\n",
2294                      dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
2295                      dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
2296                      dev->n_other_naks, dev->n_timeouts,
2297                      dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
2298                      dev->n_pkt_drops, dev->n_wqe_errs);
2299        for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
2300                const struct ipath_opcode_stats *si = &dev->opstats[i];
2301
2302                if (!si->n_packets && !si->n_bytes)
2303                        continue;
2304                len += sprintf(buf + len, "%02x %llu/%llu\n", i,
2305                               (unsigned long long) si->n_packets,
2306                               (unsigned long long) si->n_bytes);
2307        }
2308        return len;
2309}
2310
2311static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
2312static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
2313static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
2314static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
2315
2316static struct device_attribute *ipath_class_attributes[] = {
2317        &dev_attr_hw_rev,
2318        &dev_attr_hca_type,
2319        &dev_attr_board_id,
2320        &dev_attr_stats
2321};
2322
2323static int ipath_verbs_register_sysfs(struct ib_device *dev)
2324{
2325        int i;
2326        int ret;
2327
2328        for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
2329                if (device_create_file(&dev->dev,
2330                                       ipath_class_attributes[i])) {
2331                        ret = 1;
2332                        goto bail;
2333                }
2334
2335        ret = 0;
2336
2337bail:
2338        return ret;
2339}
2340