linux/drivers/infiniband/hw/ipath/ipath_verbs.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
   3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33
  34#include <rdma/ib_mad.h>
  35#include <rdma/ib_user_verbs.h>
  36#include <linux/io.h>
  37#include <linux/slab.h>
  38#include <linux/utsname.h>
  39#include <linux/rculist.h>
  40
  41#include "ipath_kernel.h"
  42#include "ipath_verbs.h"
  43#include "ipath_common.h"
  44
  45static unsigned int ib_ipath_qp_table_size = 251;
  46module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
  47MODULE_PARM_DESC(qp_table_size, "QP table size");
  48
  49unsigned int ib_ipath_lkey_table_size = 12;
  50module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
  51                   S_IRUGO);
  52MODULE_PARM_DESC(lkey_table_size,
  53                 "LKEY table size in bits (2^n, 1 <= n <= 23)");
  54
  55static unsigned int ib_ipath_max_pds = 0xFFFF;
  56module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
  57MODULE_PARM_DESC(max_pds,
  58                 "Maximum number of protection domains to support");
  59
  60static unsigned int ib_ipath_max_ahs = 0xFFFF;
  61module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO);
  62MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
  63
  64unsigned int ib_ipath_max_cqes = 0x2FFFF;
  65module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO);
  66MODULE_PARM_DESC(max_cqes,
  67                 "Maximum number of completion queue entries to support");
  68
  69unsigned int ib_ipath_max_cqs = 0x1FFFF;
  70module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO);
  71MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
  72
  73unsigned int ib_ipath_max_qp_wrs = 0x3FFF;
  74module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
  75                   S_IWUSR | S_IRUGO);
  76MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
  77
  78unsigned int ib_ipath_max_qps = 16384;
  79module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
  80MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
  81
  82unsigned int ib_ipath_max_sges = 0x60;
  83module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
  84MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
  85
  86unsigned int ib_ipath_max_mcast_grps = 16384;
  87module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint,
  88                   S_IWUSR | S_IRUGO);
  89MODULE_PARM_DESC(max_mcast_grps,
  90                 "Maximum number of multicast groups to support");
  91
  92unsigned int ib_ipath_max_mcast_qp_attached = 16;
  93module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached,
  94                   uint, S_IWUSR | S_IRUGO);
  95MODULE_PARM_DESC(max_mcast_qp_attached,
  96                 "Maximum number of attached QPs to support");
  97
  98unsigned int ib_ipath_max_srqs = 1024;
  99module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO);
 100MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
 101
 102unsigned int ib_ipath_max_srq_sges = 128;
 103module_param_named(max_srq_sges, ib_ipath_max_srq_sges,
 104                   uint, S_IWUSR | S_IRUGO);
 105MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
 106
 107unsigned int ib_ipath_max_srq_wrs = 0x1FFFF;
 108module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
 109                   uint, S_IWUSR | S_IRUGO);
 110MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
 111
 112static unsigned int ib_ipath_disable_sma;
 113module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
 114MODULE_PARM_DESC(disable_sma, "Disable the SMA");
 115
 116/*
 117 * Note that it is OK to post send work requests in the SQE and ERR
 118 * states; ipath_do_send() will process them and generate error
 119 * completions as per IB 1.2 C10-96.
 120 */
 121const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
 122        [IB_QPS_RESET] = 0,
 123        [IB_QPS_INIT] = IPATH_POST_RECV_OK,
 124        [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
 125        [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
 126            IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
 127            IPATH_PROCESS_NEXT_SEND_OK,
 128        [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
 129            IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
 130        [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
 131            IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
 132        [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
 133            IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
 134};
 135
 136struct ipath_ucontext {
 137        struct ib_ucontext ibucontext;
 138};
 139
 140static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
 141                                                  *ibucontext)
 142{
 143        return container_of(ibucontext, struct ipath_ucontext, ibucontext);
 144}
 145
 146/*
 147 * Translate ib_wr_opcode into ib_wc_opcode.
 148 */
 149const enum ib_wc_opcode ib_ipath_wc_opcode[] = {
 150        [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
 151        [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
 152        [IB_WR_SEND] = IB_WC_SEND,
 153        [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
 154        [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
 155        [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
 156        [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
 157};
 158
 159/*
 160 * System image GUID.
 161 */
 162static __be64 sys_image_guid;
 163
 164/**
 165 * ipath_copy_sge - copy data to SGE memory
 166 * @ss: the SGE state
 167 * @data: the data to copy
 168 * @length: the length of the data
 169 */
 170void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
 171{
 172        struct ipath_sge *sge = &ss->sge;
 173
 174        while (length) {
 175                u32 len = sge->length;
 176
 177                if (len > length)
 178                        len = length;
 179                if (len > sge->sge_length)
 180                        len = sge->sge_length;
 181                BUG_ON(len == 0);
 182                memcpy(sge->vaddr, data, len);
 183                sge->vaddr += len;
 184                sge->length -= len;
 185                sge->sge_length -= len;
 186                if (sge->sge_length == 0) {
 187                        if (--ss->num_sge)
 188                                *sge = *ss->sg_list++;
 189                } else if (sge->length == 0 && sge->mr != NULL) {
 190                        if (++sge->n >= IPATH_SEGSZ) {
 191                                if (++sge->m >= sge->mr->mapsz)
 192                                        break;
 193                                sge->n = 0;
 194                        }
 195                        sge->vaddr =
 196                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 197                        sge->length =
 198                                sge->mr->map[sge->m]->segs[sge->n].length;
 199                }
 200                data += len;
 201                length -= len;
 202        }
 203}
 204
 205/**
 206 * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
 207 * @ss: the SGE state
 208 * @length: the number of bytes to skip
 209 */
 210void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
 211{
 212        struct ipath_sge *sge = &ss->sge;
 213
 214        while (length) {
 215                u32 len = sge->length;
 216
 217                if (len > length)
 218                        len = length;
 219                if (len > sge->sge_length)
 220                        len = sge->sge_length;
 221                BUG_ON(len == 0);
 222                sge->vaddr += len;
 223                sge->length -= len;
 224                sge->sge_length -= len;
 225                if (sge->sge_length == 0) {
 226                        if (--ss->num_sge)
 227                                *sge = *ss->sg_list++;
 228                } else if (sge->length == 0 && sge->mr != NULL) {
 229                        if (++sge->n >= IPATH_SEGSZ) {
 230                                if (++sge->m >= sge->mr->mapsz)
 231                                        break;
 232                                sge->n = 0;
 233                        }
 234                        sge->vaddr =
 235                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 236                        sge->length =
 237                                sge->mr->map[sge->m]->segs[sge->n].length;
 238                }
 239                length -= len;
 240        }
 241}
 242
 243/*
 244 * Count the number of DMA descriptors needed to send length bytes of data.
 245 * Don't modify the ipath_sge_state to get the count.
 246 * Return zero if any of the segments is not aligned.
 247 */
 248static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)
 249{
 250        struct ipath_sge *sg_list = ss->sg_list;
 251        struct ipath_sge sge = ss->sge;
 252        u8 num_sge = ss->num_sge;
 253        u32 ndesc = 1;  /* count the header */
 254
 255        while (length) {
 256                u32 len = sge.length;
 257
 258                if (len > length)
 259                        len = length;
 260                if (len > sge.sge_length)
 261                        len = sge.sge_length;
 262                BUG_ON(len == 0);
 263                if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
 264                    (len != length && (len & (sizeof(u32) - 1)))) {
 265                        ndesc = 0;
 266                        break;
 267                }
 268                ndesc++;
 269                sge.vaddr += len;
 270                sge.length -= len;
 271                sge.sge_length -= len;
 272                if (sge.sge_length == 0) {
 273                        if (--num_sge)
 274                                sge = *sg_list++;
 275                } else if (sge.length == 0 && sge.mr != NULL) {
 276                        if (++sge.n >= IPATH_SEGSZ) {
 277                                if (++sge.m >= sge.mr->mapsz)
 278                                        break;
 279                                sge.n = 0;
 280                        }
 281                        sge.vaddr =
 282                                sge.mr->map[sge.m]->segs[sge.n].vaddr;
 283                        sge.length =
 284                                sge.mr->map[sge.m]->segs[sge.n].length;
 285                }
 286                length -= len;
 287        }
 288        return ndesc;
 289}
 290
 291/*
 292 * Copy from the SGEs to the data buffer.
 293 */
 294static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,
 295                                u32 length)
 296{
 297        struct ipath_sge *sge = &ss->sge;
 298
 299        while (length) {
 300                u32 len = sge->length;
 301
 302                if (len > length)
 303                        len = length;
 304                if (len > sge->sge_length)
 305                        len = sge->sge_length;
 306                BUG_ON(len == 0);
 307                memcpy(data, sge->vaddr, len);
 308                sge->vaddr += len;
 309                sge->length -= len;
 310                sge->sge_length -= len;
 311                if (sge->sge_length == 0) {
 312                        if (--ss->num_sge)
 313                                *sge = *ss->sg_list++;
 314                } else if (sge->length == 0 && sge->mr != NULL) {
 315                        if (++sge->n >= IPATH_SEGSZ) {
 316                                if (++sge->m >= sge->mr->mapsz)
 317                                        break;
 318                                sge->n = 0;
 319                        }
 320                        sge->vaddr =
 321                                sge->mr->map[sge->m]->segs[sge->n].vaddr;
 322                        sge->length =
 323                                sge->mr->map[sge->m]->segs[sge->n].length;
 324                }
 325                data += len;
 326                length -= len;
 327        }
 328}
 329
 330/**
 331 * ipath_post_one_send - post one RC, UC, or UD send work request
 332 * @qp: the QP to post on
 333 * @wr: the work request to send
 334 */
 335static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 336{
 337        struct ipath_swqe *wqe;
 338        u32 next;
 339        int i;
 340        int j;
 341        int acc;
 342        int ret;
 343        unsigned long flags;
 344        struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
 345
 346        spin_lock_irqsave(&qp->s_lock, flags);
 347
 348        if (qp->ibqp.qp_type != IB_QPT_SMI &&
 349            !(dd->ipath_flags & IPATH_LINKACTIVE)) {
 350                ret = -ENETDOWN;
 351                goto bail;
 352        }
 353
 354        /* Check that state is OK to post send. */
 355        if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
 356                goto bail_inval;
 357
 358        /* IB spec says that num_sge == 0 is OK. */
 359        if (wr->num_sge > qp->s_max_sge)
 360                goto bail_inval;
 361
 362        /*
 363         * Don't allow RDMA reads or atomic operations on UC or
 364         * undefined operations.
 365         * Make sure buffer is large enough to hold the result for atomics.
 366         */
 367        if (qp->ibqp.qp_type == IB_QPT_UC) {
 368                if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
 369                        goto bail_inval;
 370        } else if (qp->ibqp.qp_type == IB_QPT_UD) {
 371                /* Check UD opcode */
 372                if (wr->opcode != IB_WR_SEND &&
 373                    wr->opcode != IB_WR_SEND_WITH_IMM)
 374                        goto bail_inval;
 375                /* Check UD destination address PD */
 376                if (qp->ibqp.pd != wr->wr.ud.ah->pd)
 377                        goto bail_inval;
 378        } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
 379                goto bail_inval;
 380        else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
 381                   (wr->num_sge == 0 ||
 382                    wr->sg_list[0].length < sizeof(u64) ||
 383                    wr->sg_list[0].addr & (sizeof(u64) - 1)))
 384                goto bail_inval;
 385        else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
 386                goto bail_inval;
 387
 388        next = qp->s_head + 1;
 389        if (next >= qp->s_size)
 390                next = 0;
 391        if (next == qp->s_last) {
 392                ret = -ENOMEM;
 393                goto bail;
 394        }
 395
 396        wqe = get_swqe_ptr(qp, qp->s_head);
 397        wqe->wr = *wr;
 398        wqe->length = 0;
 399        if (wr->num_sge) {
 400                acc = wr->opcode >= IB_WR_RDMA_READ ?
 401                        IB_ACCESS_LOCAL_WRITE : 0;
 402                for (i = 0, j = 0; i < wr->num_sge; i++) {
 403                        u32 length = wr->sg_list[i].length;
 404                        int ok;
 405
 406                        if (length == 0)
 407                                continue;
 408                        ok = ipath_lkey_ok(qp, &wqe->sg_list[j],
 409                                           &wr->sg_list[i], acc);
 410                        if (!ok)
 411                                goto bail_inval;
 412                        wqe->length += length;
 413                        j++;
 414                }
 415                wqe->wr.num_sge = j;
 416        }
 417        if (qp->ibqp.qp_type == IB_QPT_UC ||
 418            qp->ibqp.qp_type == IB_QPT_RC) {
 419                if (wqe->length > 0x80000000U)
 420                        goto bail_inval;
 421        } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
 422                goto bail_inval;
 423        wqe->ssn = qp->s_ssn++;
 424        qp->s_head = next;
 425
 426        ret = 0;
 427        goto bail;
 428
 429bail_inval:
 430        ret = -EINVAL;
 431bail:
 432        spin_unlock_irqrestore(&qp->s_lock, flags);
 433        return ret;
 434}
 435
 436/**
 437 * ipath_post_send - post a send on a QP
 438 * @ibqp: the QP to post the send on
 439 * @wr: the list of work requests to post
 440 * @bad_wr: the first bad WR is put here
 441 *
 442 * This may be called from interrupt context.
 443 */
 444static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 445                           struct ib_send_wr **bad_wr)
 446{
 447        struct ipath_qp *qp = to_iqp(ibqp);
 448        int err = 0;
 449
 450        for (; wr; wr = wr->next) {
 451                err = ipath_post_one_send(qp, wr);
 452                if (err) {
 453                        *bad_wr = wr;
 454                        goto bail;
 455                }
 456        }
 457
 458        /* Try to do the send work in the caller's context. */
 459        ipath_do_send((unsigned long) qp);
 460
 461bail:
 462        return err;
 463}
 464
 465/**
 466 * ipath_post_receive - post a receive on a QP
 467 * @ibqp: the QP to post the receive on
 468 * @wr: the WR to post
 469 * @bad_wr: the first bad WR is put here
 470 *
 471 * This may be called from interrupt context.
 472 */
 473static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 474                              struct ib_recv_wr **bad_wr)
 475{
 476        struct ipath_qp *qp = to_iqp(ibqp);
 477        struct ipath_rwq *wq = qp->r_rq.wq;
 478        unsigned long flags;
 479        int ret;
 480
 481        /* Check that state is OK to post receive. */
 482        if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
 483                *bad_wr = wr;
 484                ret = -EINVAL;
 485                goto bail;
 486        }
 487
 488        for (; wr; wr = wr->next) {
 489                struct ipath_rwqe *wqe;
 490                u32 next;
 491                int i;
 492
 493                if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
 494                        *bad_wr = wr;
 495                        ret = -EINVAL;
 496                        goto bail;
 497                }
 498
 499                spin_lock_irqsave(&qp->r_rq.lock, flags);
 500                next = wq->head + 1;
 501                if (next >= qp->r_rq.size)
 502                        next = 0;
 503                if (next == wq->tail) {
 504                        spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 505                        *bad_wr = wr;
 506                        ret = -ENOMEM;
 507                        goto bail;
 508                }
 509
 510                wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
 511                wqe->wr_id = wr->wr_id;
 512                wqe->num_sge = wr->num_sge;
 513                for (i = 0; i < wr->num_sge; i++)
 514                        wqe->sg_list[i] = wr->sg_list[i];
 515                /* Make sure queue entry is written before the head index. */
 516                smp_wmb();
 517                wq->head = next;
 518                spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 519        }
 520        ret = 0;
 521
 522bail:
 523        return ret;
 524}
 525
 526/**
 527 * ipath_qp_rcv - processing an incoming packet on a QP
 528 * @dev: the device the packet came on
 529 * @hdr: the packet header
 530 * @has_grh: true if the packet has a GRH
 531 * @data: the packet data
 532 * @tlen: the packet length
 533 * @qp: the QP the packet came on
 534 *
 535 * This is called from ipath_ib_rcv() to process an incoming packet
 536 * for the given QP.
 537 * Called at interrupt level.
 538 */
 539static void ipath_qp_rcv(struct ipath_ibdev *dev,
 540                         struct ipath_ib_header *hdr, int has_grh,
 541                         void *data, u32 tlen, struct ipath_qp *qp)
 542{
 543        /* Check for valid receive state. */
 544        if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
 545                dev->n_pkt_drops++;
 546                return;
 547        }
 548
 549        switch (qp->ibqp.qp_type) {
 550        case IB_QPT_SMI:
 551        case IB_QPT_GSI:
 552                if (ib_ipath_disable_sma)
 553                        break;
 554                /* FALLTHROUGH */
 555        case IB_QPT_UD:
 556                ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
 557                break;
 558
 559        case IB_QPT_RC:
 560                ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
 561                break;
 562
 563        case IB_QPT_UC:
 564                ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
 565                break;
 566
 567        default:
 568                break;
 569        }
 570}
 571
 572/**
 573 * ipath_ib_rcv - process an incoming packet
 574 * @arg: the device pointer
 575 * @rhdr: the header of the packet
 576 * @data: the packet data
 577 * @tlen: the packet length
 578 *
 579 * This is called from ipath_kreceive() to process an incoming packet at
 580 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
 581 */
 582void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
 583                  u32 tlen)
 584{
 585        struct ipath_ib_header *hdr = rhdr;
 586        struct ipath_other_headers *ohdr;
 587        struct ipath_qp *qp;
 588        u32 qp_num;
 589        int lnh;
 590        u8 opcode;
 591        u16 lid;
 592
 593        if (unlikely(dev == NULL))
 594                goto bail;
 595
 596        if (unlikely(tlen < 24)) {      /* LRH+BTH+CRC */
 597                dev->rcv_errors++;
 598                goto bail;
 599        }
 600
 601        /* Check for a valid destination LID (see ch. 7.11.1). */
 602        lid = be16_to_cpu(hdr->lrh[1]);
 603        if (lid < IPATH_MULTICAST_LID_BASE) {
 604                lid &= ~((1 << dev->dd->ipath_lmc) - 1);
 605                if (unlikely(lid != dev->dd->ipath_lid)) {
 606                        dev->rcv_errors++;
 607                        goto bail;
 608                }
 609        }
 610
 611        /* Check for GRH */
 612        lnh = be16_to_cpu(hdr->lrh[0]) & 3;
 613        if (lnh == IPATH_LRH_BTH)
 614                ohdr = &hdr->u.oth;
 615        else if (lnh == IPATH_LRH_GRH)
 616                ohdr = &hdr->u.l.oth;
 617        else {
 618                dev->rcv_errors++;
 619                goto bail;
 620        }
 621
 622        opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
 623        dev->opstats[opcode].n_bytes += tlen;
 624        dev->opstats[opcode].n_packets++;
 625
 626        /* Get the destination QP number. */
 627        qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK;
 628        if (qp_num == IPATH_MULTICAST_QPN) {
 629                struct ipath_mcast *mcast;
 630                struct ipath_mcast_qp *p;
 631
 632                if (lnh != IPATH_LRH_GRH) {
 633                        dev->n_pkt_drops++;
 634                        goto bail;
 635                }
 636                mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
 637                if (mcast == NULL) {
 638                        dev->n_pkt_drops++;
 639                        goto bail;
 640                }
 641                dev->n_multicast_rcv++;
 642                list_for_each_entry_rcu(p, &mcast->qp_list, list)
 643                        ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
 644                /*
 645                 * Notify ipath_multicast_detach() if it is waiting for us
 646                 * to finish.
 647                 */
 648                if (atomic_dec_return(&mcast->refcount) <= 1)
 649                        wake_up(&mcast->wait);
 650        } else {
 651                qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
 652                if (qp) {
 653                        dev->n_unicast_rcv++;
 654                        ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
 655                                     tlen, qp);
 656                        /*
 657                         * Notify ipath_destroy_qp() if it is waiting
 658                         * for us to finish.
 659                         */
 660                        if (atomic_dec_and_test(&qp->refcount))
 661                                wake_up(&qp->wait);
 662                } else
 663                        dev->n_pkt_drops++;
 664        }
 665
 666bail:;
 667}
 668
 669/**
 670 * ipath_ib_timer - verbs timer
 671 * @arg: the device pointer
 672 *
 673 * This is called from ipath_do_rcv_timer() at interrupt level to check for
 674 * QPs which need retransmits and to collect performance numbers.
 675 */
 676static void ipath_ib_timer(struct ipath_ibdev *dev)
 677{
 678        struct ipath_qp *resend = NULL;
 679        struct ipath_qp *rnr = NULL;
 680        struct list_head *last;
 681        struct ipath_qp *qp;
 682        unsigned long flags;
 683
 684        if (dev == NULL)
 685                return;
 686
 687        spin_lock_irqsave(&dev->pending_lock, flags);
 688        /* Start filling the next pending queue. */
 689        if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
 690                dev->pending_index = 0;
 691        /* Save any requests still in the new queue, they have timed out. */
 692        last = &dev->pending[dev->pending_index];
 693        while (!list_empty(last)) {
 694                qp = list_entry(last->next, struct ipath_qp, timerwait);
 695                list_del_init(&qp->timerwait);
 696                qp->timer_next = resend;
 697                resend = qp;
 698                atomic_inc(&qp->refcount);
 699        }
 700        last = &dev->rnrwait;
 701        if (!list_empty(last)) {
 702                qp = list_entry(last->next, struct ipath_qp, timerwait);
 703                if (--qp->s_rnr_timeout == 0) {
 704                        do {
 705                                list_del_init(&qp->timerwait);
 706                                qp->timer_next = rnr;
 707                                rnr = qp;
 708                                atomic_inc(&qp->refcount);
 709                                if (list_empty(last))
 710                                        break;
 711                                qp = list_entry(last->next, struct ipath_qp,
 712                                                timerwait);
 713                        } while (qp->s_rnr_timeout == 0);
 714                }
 715        }
 716        /*
 717         * We should only be in the started state if pma_sample_start != 0
 718         */
 719        if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
 720            --dev->pma_sample_start == 0) {
 721                dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
 722                ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
 723                                        &dev->ipath_rword,
 724                                        &dev->ipath_spkts,
 725                                        &dev->ipath_rpkts,
 726                                        &dev->ipath_xmit_wait);
 727        }
 728        if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
 729                if (dev->pma_sample_interval == 0) {
 730                        u64 ta, tb, tc, td, te;
 731
 732                        dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
 733                        ipath_snapshot_counters(dev->dd, &ta, &tb,
 734                                                &tc, &td, &te);
 735
 736                        dev->ipath_sword = ta - dev->ipath_sword;
 737                        dev->ipath_rword = tb - dev->ipath_rword;
 738                        dev->ipath_spkts = tc - dev->ipath_spkts;
 739                        dev->ipath_rpkts = td - dev->ipath_rpkts;
 740                        dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
 741                }
 742                else
 743                        dev->pma_sample_interval--;
 744        }
 745        spin_unlock_irqrestore(&dev->pending_lock, flags);
 746
 747        /* XXX What if timer fires again while this is running? */
 748        while (resend != NULL) {
 749                qp = resend;
 750                resend = qp->timer_next;
 751
 752                spin_lock_irqsave(&qp->s_lock, flags);
 753                if (qp->s_last != qp->s_tail &&
 754                    ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
 755                        dev->n_timeouts++;
 756                        ipath_restart_rc(qp, qp->s_last_psn + 1);
 757                }
 758                spin_unlock_irqrestore(&qp->s_lock, flags);
 759
 760                /* Notify ipath_destroy_qp() if it is waiting. */
 761                if (atomic_dec_and_test(&qp->refcount))
 762                        wake_up(&qp->wait);
 763        }
 764        while (rnr != NULL) {
 765                qp = rnr;
 766                rnr = qp->timer_next;
 767
 768                spin_lock_irqsave(&qp->s_lock, flags);
 769                if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
 770                        ipath_schedule_send(qp);
 771                spin_unlock_irqrestore(&qp->s_lock, flags);
 772
 773                /* Notify ipath_destroy_qp() if it is waiting. */
 774                if (atomic_dec_and_test(&qp->refcount))
 775                        wake_up(&qp->wait);
 776        }
 777}
 778
 779static void update_sge(struct ipath_sge_state *ss, u32 length)
 780{
 781        struct ipath_sge *sge = &ss->sge;
 782
 783        sge->vaddr += length;
 784        sge->length -= length;
 785        sge->sge_length -= length;
 786        if (sge->sge_length == 0) {
 787                if (--ss->num_sge)
 788                        *sge = *ss->sg_list++;
 789        } else if (sge->length == 0 && sge->mr != NULL) {
 790                if (++sge->n >= IPATH_SEGSZ) {
 791                        if (++sge->m >= sge->mr->mapsz)
 792                                return;
 793                        sge->n = 0;
 794                }
 795                sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
 796                sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
 797        }
 798}
 799
 800#ifdef __LITTLE_ENDIAN
 801static inline u32 get_upper_bits(u32 data, u32 shift)
 802{
 803        return data >> shift;
 804}
 805
 806static inline u32 set_upper_bits(u32 data, u32 shift)
 807{
 808        return data << shift;
 809}
 810
 811static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 812{
 813        data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
 814        data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
 815        return data;
 816}
 817#else
 818static inline u32 get_upper_bits(u32 data, u32 shift)
 819{
 820        return data << shift;
 821}
 822
 823static inline u32 set_upper_bits(u32 data, u32 shift)
 824{
 825        return data >> shift;
 826}
 827
 828static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 829{
 830        data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
 831        data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
 832        return data;
 833}
 834#endif
 835
 836static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
 837                    u32 length, unsigned flush_wc)
 838{
 839        u32 extra = 0;
 840        u32 data = 0;
 841        u32 last;
 842
 843        while (1) {
 844                u32 len = ss->sge.length;
 845                u32 off;
 846
 847                if (len > length)
 848                        len = length;
 849                if (len > ss->sge.sge_length)
 850                        len = ss->sge.sge_length;
 851                BUG_ON(len == 0);
 852                /* If the source address is not aligned, try to align it. */
 853                off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
 854                if (off) {
 855                        u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
 856                                            ~(sizeof(u32) - 1));
 857                        u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
 858                        u32 y;
 859
 860                        y = sizeof(u32) - off;
 861                        if (len > y)
 862                                len = y;
 863                        if (len + extra >= sizeof(u32)) {
 864                                data |= set_upper_bits(v, extra *
 865                                                       BITS_PER_BYTE);
 866                                len = sizeof(u32) - extra;
 867                                if (len == length) {
 868                                        last = data;
 869                                        break;
 870                                }
 871                                __raw_writel(data, piobuf);
 872                                piobuf++;
 873                                extra = 0;
 874                                data = 0;
 875                        } else {
 876                                /* Clear unused upper bytes */
 877                                data |= clear_upper_bytes(v, len, extra);
 878                                if (len == length) {
 879                                        last = data;
 880                                        break;
 881                                }
 882                                extra += len;
 883                        }
 884                } else if (extra) {
 885                        /* Source address is aligned. */
 886                        u32 *addr = (u32 *) ss->sge.vaddr;
 887                        int shift = extra * BITS_PER_BYTE;
 888                        int ushift = 32 - shift;
 889                        u32 l = len;
 890
 891                        while (l >= sizeof(u32)) {
 892                                u32 v = *addr;
 893
 894                                data |= set_upper_bits(v, shift);
 895                                __raw_writel(data, piobuf);
 896                                data = get_upper_bits(v, ushift);
 897                                piobuf++;
 898                                addr++;
 899                                l -= sizeof(u32);
 900                        }
 901                        /*
 902                         * We still have 'extra' number of bytes leftover.
 903                         */
 904                        if (l) {
 905                                u32 v = *addr;
 906
 907                                if (l + extra >= sizeof(u32)) {
 908                                        data |= set_upper_bits(v, shift);
 909                                        len -= l + extra - sizeof(u32);
 910                                        if (len == length) {
 911                                                last = data;
 912                                                break;
 913                                        }
 914                                        __raw_writel(data, piobuf);
 915                                        piobuf++;
 916                                        extra = 0;
 917                                        data = 0;
 918                                } else {
 919                                        /* Clear unused upper bytes */
 920                                        data |= clear_upper_bytes(v, l,
 921                                                                  extra);
 922                                        if (len == length) {
 923                                                last = data;
 924                                                break;
 925                                        }
 926                                        extra += l;
 927                                }
 928                        } else if (len == length) {
 929                                last = data;
 930                                break;
 931                        }
 932                } else if (len == length) {
 933                        u32 w;
 934
 935                        /*
 936                         * Need to round up for the last dword in the
 937                         * packet.
 938                         */
 939                        w = (len + 3) >> 2;
 940                        __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
 941                        piobuf += w - 1;
 942                        last = ((u32 *) ss->sge.vaddr)[w - 1];
 943                        break;
 944                } else {
 945                        u32 w = len >> 2;
 946
 947                        __iowrite32_copy(piobuf, ss->sge.vaddr, w);
 948                        piobuf += w;
 949
 950                        extra = len & (sizeof(u32) - 1);
 951                        if (extra) {
 952                                u32 v = ((u32 *) ss->sge.vaddr)[w];
 953
 954                                /* Clear unused upper bytes */
 955                                data = clear_upper_bytes(v, extra, 0);
 956                        }
 957                }
 958                update_sge(ss, len);
 959                length -= len;
 960        }
 961        /* Update address before sending packet. */
 962        update_sge(ss, length);
 963        if (flush_wc) {
 964                /* must flush early everything before trigger word */
 965                ipath_flush_wc();
 966                __raw_writel(last, piobuf);
 967                /* be sure trigger word is written */
 968                ipath_flush_wc();
 969        } else
 970                __raw_writel(last, piobuf);
 971}
 972
 973/*
 974 * Convert IB rate to delay multiplier.
 975 */
 976unsigned ipath_ib_rate_to_mult(enum ib_rate rate)
 977{
 978        switch (rate) {
 979        case IB_RATE_2_5_GBPS: return 8;
 980        case IB_RATE_5_GBPS:   return 4;
 981        case IB_RATE_10_GBPS:  return 2;
 982        case IB_RATE_20_GBPS:  return 1;
 983        default:               return 0;
 984        }
 985}
 986
 987/*
 988 * Convert delay multiplier to IB rate
 989 */
 990static enum ib_rate ipath_mult_to_ib_rate(unsigned mult)
 991{
 992        switch (mult) {
 993        case 8:  return IB_RATE_2_5_GBPS;
 994        case 4:  return IB_RATE_5_GBPS;
 995        case 2:  return IB_RATE_10_GBPS;
 996        case 1:  return IB_RATE_20_GBPS;
 997        default: return IB_RATE_PORT_CURRENT;
 998        }
 999}
1000
1001static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)
1002{
1003        struct ipath_verbs_txreq *tx = NULL;
1004        unsigned long flags;
1005
1006        spin_lock_irqsave(&dev->pending_lock, flags);
1007        if (!list_empty(&dev->txreq_free)) {
1008                struct list_head *l = dev->txreq_free.next;
1009
1010                list_del(l);
1011                tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);
1012        }
1013        spin_unlock_irqrestore(&dev->pending_lock, flags);
1014        return tx;
1015}
1016
1017static inline void put_txreq(struct ipath_ibdev *dev,
1018                             struct ipath_verbs_txreq *tx)
1019{
1020        unsigned long flags;
1021
1022        spin_lock_irqsave(&dev->pending_lock, flags);
1023        list_add(&tx->txreq.list, &dev->txreq_free);
1024        spin_unlock_irqrestore(&dev->pending_lock, flags);
1025}
1026
1027static void sdma_complete(void *cookie, int status)
1028{
1029        struct ipath_verbs_txreq *tx = cookie;
1030        struct ipath_qp *qp = tx->qp;
1031        struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1032        unsigned long flags;
1033        enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
1034                IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
1035
1036        if (atomic_dec_and_test(&qp->s_dma_busy)) {
1037                spin_lock_irqsave(&qp->s_lock, flags);
1038                if (tx->wqe)
1039                        ipath_send_complete(qp, tx->wqe, ibs);
1040                if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1041                     qp->s_last != qp->s_head) ||
1042                    (qp->s_flags & IPATH_S_WAIT_DMA))
1043                        ipath_schedule_send(qp);
1044                spin_unlock_irqrestore(&qp->s_lock, flags);
1045                wake_up(&qp->wait_dma);
1046        } else if (tx->wqe) {
1047                spin_lock_irqsave(&qp->s_lock, flags);
1048                ipath_send_complete(qp, tx->wqe, ibs);
1049                spin_unlock_irqrestore(&qp->s_lock, flags);
1050        }
1051
1052        if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
1053                kfree(tx->txreq.map_addr);
1054        put_txreq(dev, tx);
1055
1056        if (atomic_dec_and_test(&qp->refcount))
1057                wake_up(&qp->wait);
1058}
1059
1060static void decrement_dma_busy(struct ipath_qp *qp)
1061{
1062        unsigned long flags;
1063
1064        if (atomic_dec_and_test(&qp->s_dma_busy)) {
1065                spin_lock_irqsave(&qp->s_lock, flags);
1066                if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1067                     qp->s_last != qp->s_head) ||
1068                    (qp->s_flags & IPATH_S_WAIT_DMA))
1069                        ipath_schedule_send(qp);
1070                spin_unlock_irqrestore(&qp->s_lock, flags);
1071                wake_up(&qp->wait_dma);
1072        }
1073}
1074
1075/*
1076 * Compute the number of clock cycles of delay before sending the next packet.
1077 * The multipliers reflect the number of clocks for the fastest rate so
1078 * one tick at 4xDDR is 8 ticks at 1xSDR.
1079 * If the destination port will take longer to receive a packet than
1080 * the outgoing link can send it, we need to delay sending the next packet
1081 * by the difference in time it takes the receiver to receive and the sender
1082 * to send this packet.
1083 * Note that this delay is always correct for UC and RC but not always
1084 * optimal for UD. For UD, the destination HCA can be different for each
1085 * packet, in which case, we could send packets to a different destination
1086 * while "waiting" for the delay. The overhead for doing this without
1087 * HW support is more than just paying the cost of delaying some packets
1088 * unnecessarily.
1089 */
1090static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)
1091{
1092        return (rcv_mult > snd_mult) ?
1093                (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
1094}
1095
1096static int ipath_verbs_send_dma(struct ipath_qp *qp,
1097                                struct ipath_ib_header *hdr, u32 hdrwords,
1098                                struct ipath_sge_state *ss, u32 len,
1099                                u32 plen, u32 dwords)
1100{
1101        struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1102        struct ipath_devdata *dd = dev->dd;
1103        struct ipath_verbs_txreq *tx;
1104        u32 *piobuf;
1105        u32 control;
1106        u32 ndesc;
1107        int ret;
1108
1109        tx = qp->s_tx;
1110        if (tx) {
1111                qp->s_tx = NULL;
1112                /* resend previously constructed packet */
1113                atomic_inc(&qp->s_dma_busy);
1114                ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
1115                if (ret) {
1116                        qp->s_tx = tx;
1117                        decrement_dma_busy(qp);
1118                }
1119                goto bail;
1120        }
1121
1122        tx = get_txreq(dev);
1123        if (!tx) {
1124                ret = -EBUSY;
1125                goto bail;
1126        }
1127
1128        /*
1129         * Get the saved delay count we computed for the previous packet
1130         * and save the delay count for this packet to be used next time
1131         * we get here.
1132         */
1133        control = qp->s_pkt_delay;
1134        qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
1135
1136        tx->qp = qp;
1137        atomic_inc(&qp->refcount);
1138        tx->wqe = qp->s_wqe;
1139        tx->txreq.callback = sdma_complete;
1140        tx->txreq.callback_cookie = tx;
1141        tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |
1142                IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC;
1143        if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
1144                tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF;
1145
1146        /* VL15 packets bypass credit check */
1147        if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {
1148                control |= 1ULL << 31;
1149                tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;
1150        }
1151
1152        if (len) {
1153                /*
1154                 * Don't try to DMA if it takes more descriptors than
1155                 * the queue holds.
1156                 */
1157                ndesc = ipath_count_sge(ss, len);
1158                if (ndesc >= dd->ipath_sdma_descq_cnt)
1159                        ndesc = 0;
1160        } else
1161                ndesc = 1;
1162        if (ndesc) {
1163                tx->hdr.pbc[0] = cpu_to_le32(plen);
1164                tx->hdr.pbc[1] = cpu_to_le32(control);
1165                memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);
1166                tx->txreq.sg_count = ndesc;
1167                tx->map_len = (hdrwords + 2) << 2;
1168                tx->txreq.map_addr = &tx->hdr;
1169                atomic_inc(&qp->s_dma_busy);
1170                ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
1171                if (ret) {
1172                        /* save ss and length in dwords */
1173                        tx->ss = ss;
1174                        tx->len = dwords;
1175                        qp->s_tx = tx;
1176                        decrement_dma_busy(qp);
1177                }
1178                goto bail;
1179        }
1180
1181        /* Allocate a buffer and copy the header and payload to it. */
1182        tx->map_len = (plen + 1) << 2;
1183        piobuf = kmalloc(tx->map_len, GFP_ATOMIC);
1184        if (unlikely(piobuf == NULL)) {
1185                ret = -EBUSY;
1186                goto err_tx;
1187        }
1188        tx->txreq.map_addr = piobuf;
1189        tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;
1190        tx->txreq.sg_count = 1;
1191
1192        *piobuf++ = (__force u32) cpu_to_le32(plen);
1193        *piobuf++ = (__force u32) cpu_to_le32(control);
1194        memcpy(piobuf, hdr, hdrwords << 2);
1195        ipath_copy_from_sge(piobuf + hdrwords, ss, len);
1196
1197        atomic_inc(&qp->s_dma_busy);
1198        ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
1199        /*
1200         * If we couldn't queue the DMA request, save the info
1201         * and try again later rather than destroying the
1202         * buffer and undoing the side effects of the copy.
1203         */
1204        if (ret) {
1205                tx->ss = NULL;
1206                tx->len = 0;
1207                qp->s_tx = tx;
1208                decrement_dma_busy(qp);
1209        }
1210        dev->n_unaligned++;
1211        goto bail;
1212
1213err_tx:
1214        if (atomic_dec_and_test(&qp->refcount))
1215                wake_up(&qp->wait);
1216        put_txreq(dev, tx);
1217bail:
1218        return ret;
1219}
1220
1221static int ipath_verbs_send_pio(struct ipath_qp *qp,
1222                                struct ipath_ib_header *ibhdr, u32 hdrwords,
1223                                struct ipath_sge_state *ss, u32 len,
1224                                u32 plen, u32 dwords)
1225{
1226        struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
1227        u32 *hdr = (u32 *) ibhdr;
1228        u32 __iomem *piobuf;
1229        unsigned flush_wc;
1230        u32 control;
1231        int ret;
1232        unsigned long flags;
1233
1234        piobuf = ipath_getpiobuf(dd, plen, NULL);
1235        if (unlikely(piobuf == NULL)) {
1236                ret = -EBUSY;
1237                goto bail;
1238        }
1239
1240        /*
1241         * Get the saved delay count we computed for the previous packet
1242         * and save the delay count for this packet to be used next time
1243         * we get here.
1244         */
1245        control = qp->s_pkt_delay;
1246        qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
1247
1248        /* VL15 packets bypass credit check */
1249        if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)
1250                control |= 1ULL << 31;
1251
1252        /*
1253         * Write the length to the control qword plus any needed flags.
1254         * We have to flush after the PBC for correctness on some cpus
1255         * or WC buffer can be written out of order.
1256         */
1257        writeq(((u64) control << 32) | plen, piobuf);
1258        piobuf += 2;
1259
1260        flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
1261        if (len == 0) {
1262                /*
1263                 * If there is just the header portion, must flush before
1264                 * writing last word of header for correctness, and after
1265                 * the last header word (trigger word).
1266                 */
1267                if (flush_wc) {
1268                        ipath_flush_wc();
1269                        __iowrite32_copy(piobuf, hdr, hdrwords - 1);
1270                        ipath_flush_wc();
1271                        __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
1272                        ipath_flush_wc();
1273                } else
1274                        __iowrite32_copy(piobuf, hdr, hdrwords);
1275                goto done;
1276        }
1277
1278        if (flush_wc)
1279                ipath_flush_wc();
1280        __iowrite32_copy(piobuf, hdr, hdrwords);
1281        piobuf += hdrwords;
1282
1283        /* The common case is aligned and contained in one segment. */
1284        if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
1285                   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
1286                u32 *addr = (u32 *) ss->sge.vaddr;
1287
1288                /* Update address before sending packet. */
1289                update_sge(ss, len);
1290                if (flush_wc) {
1291                        __iowrite32_copy(piobuf, addr, dwords - 1);
1292                        /* must flush early everything before trigger word */
1293                        ipath_flush_wc();
1294                        __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
1295                        /* be sure trigger word is written */
1296                        ipath_flush_wc();
1297                } else
1298                        __iowrite32_copy(piobuf, addr, dwords);
1299                goto done;
1300        }
1301        copy_io(piobuf, ss, len, flush_wc);
1302done:
1303        if (qp->s_wqe) {
1304                spin_lock_irqsave(&qp->s_lock, flags);
1305                ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
1306                spin_unlock_irqrestore(&qp->s_lock, flags);
1307        }
1308        ret = 0;
1309bail:
1310        return ret;
1311}
1312
1313/**
1314 * ipath_verbs_send - send a packet
1315 * @qp: the QP to send on
1316 * @hdr: the packet header
1317 * @hdrwords: the number of 32-bit words in the header
1318 * @ss: the SGE to send
1319 * @len: the length of the packet in bytes
1320 */
1321int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
1322                     u32 hdrwords, struct ipath_sge_state *ss, u32 len)
1323{
1324        struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
1325        u32 plen;
1326        int ret;
1327        u32 dwords = (len + 3) >> 2;
1328
1329        /*
1330         * Calculate the send buffer trigger address.
1331         * The +1 counts for the pbc control dword following the pbc length.
1332         */
1333        plen = hdrwords + dwords + 1;
1334
1335        /*
1336         * VL15 packets (IB_QPT_SMI) will always use PIO, so we
1337         * can defer SDMA restart until link goes ACTIVE without
1338         * worrying about just how we got there.
1339         */
1340        if (qp->ibqp.qp_type == IB_QPT_SMI ||
1341            !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
1342                ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
1343                                           plen, dwords);
1344        else
1345                ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
1346                                           plen, dwords);
1347
1348        return ret;
1349}
1350
1351int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
1352                            u64 *rwords, u64 *spkts, u64 *rpkts,
1353                            u64 *xmit_wait)
1354{
1355        int ret;
1356
1357        if (!(dd->ipath_flags & IPATH_INITTED)) {
1358                /* no hardware, freeze, etc. */
1359                ret = -EINVAL;
1360                goto bail;
1361        }
1362        *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
1363        *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
1364        *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
1365        *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
1366        *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
1367
1368        ret = 0;
1369
1370bail:
1371        return ret;
1372}
1373
1374/**
1375 * ipath_get_counters - get various chip counters
1376 * @dd: the infinipath device
1377 * @cntrs: counters are placed here
1378 *
1379 * Return the counters needed by recv_pma_get_portcounters().
1380 */
1381int ipath_get_counters(struct ipath_devdata *dd,
1382                       struct ipath_verbs_counters *cntrs)
1383{
1384        struct ipath_cregs const *crp = dd->ipath_cregs;
1385        int ret;
1386
1387        if (!(dd->ipath_flags & IPATH_INITTED)) {
1388                /* no hardware, freeze, etc. */
1389                ret = -EINVAL;
1390                goto bail;
1391        }
1392        cntrs->symbol_error_counter =
1393                ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt);
1394        cntrs->link_error_recovery_counter =
1395                ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt);
1396        /*
1397         * The link downed counter counts when the other side downs the
1398         * connection.  We add in the number of times we downed the link
1399         * due to local link integrity errors to compensate.
1400         */
1401        cntrs->link_downed_counter =
1402                ipath_snap_cntr(dd, crp->cr_iblinkdowncnt);
1403        cntrs->port_rcv_errors =
1404                ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +
1405                ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +
1406                ipath_snap_cntr(dd, crp->cr_portovflcnt) +
1407                ipath_snap_cntr(dd, crp->cr_err_rlencnt) +
1408                ipath_snap_cntr(dd, crp->cr_invalidrlencnt) +
1409                ipath_snap_cntr(dd, crp->cr_errlinkcnt) +
1410                ipath_snap_cntr(dd, crp->cr_erricrccnt) +
1411                ipath_snap_cntr(dd, crp->cr_errvcrccnt) +
1412                ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
1413                ipath_snap_cntr(dd, crp->cr_badformatcnt) +
1414                dd->ipath_rxfc_unsupvl_errs;
1415        if (crp->cr_rxotherlocalphyerrcnt)
1416                cntrs->port_rcv_errors +=
1417                        ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt);
1418        if (crp->cr_rxvlerrcnt)
1419                cntrs->port_rcv_errors +=
1420                        ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);
1421        cntrs->port_rcv_remphys_errors =
1422                ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
1423        cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
1424        cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);
1425        cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);
1426        cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
1427        cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
1428        cntrs->local_link_integrity_errors =
1429                crp->cr_locallinkintegrityerrcnt ?
1430                ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) :
1431                ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
1432                 dd->ipath_lli_errs : dd->ipath_lli_errors);
1433        cntrs->excessive_buffer_overrun_errors =
1434                crp->cr_excessbufferovflcnt ?
1435                ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) :
1436                dd->ipath_overrun_thresh_errs;
1437        cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?
1438                ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;
1439
1440        ret = 0;
1441
1442bail:
1443        return ret;
1444}
1445
1446/**
1447 * ipath_ib_piobufavail - callback when a PIO buffer is available
1448 * @arg: the device pointer
1449 *
1450 * This is called from ipath_intr() at interrupt level when a PIO buffer is
1451 * available after ipath_verbs_send() returned an error that no buffers were
1452 * available.  Return 1 if we consumed all the PIO buffers and we still have
1453 * QPs waiting for buffers (for now, just restart the send tasklet and
1454 * return zero).
1455 */
1456int ipath_ib_piobufavail(struct ipath_ibdev *dev)
1457{
1458        struct list_head *list;
1459        struct ipath_qp *qplist;
1460        struct ipath_qp *qp;
1461        unsigned long flags;
1462
1463        if (dev == NULL)
1464                goto bail;
1465
1466        list = &dev->piowait;
1467        qplist = NULL;
1468
1469        spin_lock_irqsave(&dev->pending_lock, flags);
1470        while (!list_empty(list)) {
1471                qp = list_entry(list->next, struct ipath_qp, piowait);
1472                list_del_init(&qp->piowait);
1473                qp->pio_next = qplist;
1474                qplist = qp;
1475                atomic_inc(&qp->refcount);
1476        }
1477        spin_unlock_irqrestore(&dev->pending_lock, flags);
1478
1479        while (qplist != NULL) {
1480                qp = qplist;
1481                qplist = qp->pio_next;
1482
1483                spin_lock_irqsave(&qp->s_lock, flags);
1484                if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
1485                        ipath_schedule_send(qp);
1486                spin_unlock_irqrestore(&qp->s_lock, flags);
1487
1488                /* Notify ipath_destroy_qp() if it is waiting. */
1489                if (atomic_dec_and_test(&qp->refcount))
1490                        wake_up(&qp->wait);
1491        }
1492
1493bail:
1494        return 0;
1495}
1496
1497static int ipath_query_device(struct ib_device *ibdev,
1498                              struct ib_device_attr *props)
1499{
1500        struct ipath_ibdev *dev = to_idev(ibdev);
1501
1502        memset(props, 0, sizeof(*props));
1503
1504        props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
1505                IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
1506                IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
1507                IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
1508        props->page_size_cap = PAGE_SIZE;
1509        props->vendor_id =
1510                IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;
1511        props->vendor_part_id = dev->dd->ipath_deviceid;
1512        props->hw_ver = dev->dd->ipath_pcirev;
1513
1514        props->sys_image_guid = dev->sys_image_guid;
1515
1516        props->max_mr_size = ~0ull;
1517        props->max_qp = ib_ipath_max_qps;
1518        props->max_qp_wr = ib_ipath_max_qp_wrs;
1519        props->max_sge = ib_ipath_max_sges;
1520        props->max_cq = ib_ipath_max_cqs;
1521        props->max_ah = ib_ipath_max_ahs;
1522        props->max_cqe = ib_ipath_max_cqes;
1523        props->max_mr = dev->lk_table.max;
1524        props->max_fmr = dev->lk_table.max;
1525        props->max_map_per_fmr = 32767;
1526        props->max_pd = ib_ipath_max_pds;
1527        props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
1528        props->max_qp_init_rd_atom = 255;
1529        /* props->max_res_rd_atom */
1530        props->max_srq = ib_ipath_max_srqs;
1531        props->max_srq_wr = ib_ipath_max_srq_wrs;
1532        props->max_srq_sge = ib_ipath_max_srq_sges;
1533        /* props->local_ca_ack_delay */
1534        props->atomic_cap = IB_ATOMIC_GLOB;
1535        props->max_pkeys = ipath_get_npkeys(dev->dd);
1536        props->max_mcast_grp = ib_ipath_max_mcast_grps;
1537        props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
1538        props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
1539                props->max_mcast_grp;
1540
1541        return 0;
1542}
1543
1544const u8 ipath_cvt_physportstate[32] = {
1545        [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
1546        [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
1547        [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
1548        [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
1549        [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
1550        [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
1551        [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] =
1552                IB_PHYSPORTSTATE_CFG_TRAIN,
1553        [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] =
1554                IB_PHYSPORTSTATE_CFG_TRAIN,
1555        [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] =
1556                IB_PHYSPORTSTATE_CFG_TRAIN,
1557        [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,
1558        [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] =
1559                IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1560        [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] =
1561                IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1562        [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] =
1563                IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1564        [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
1565        [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
1566        [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
1567        [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
1568        [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
1569        [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
1570        [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
1571        [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
1572};
1573
1574u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
1575{
1576        return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
1577}
1578
1579static int ipath_query_port(struct ib_device *ibdev,
1580                            u8 port, struct ib_port_attr *props)
1581{
1582        struct ipath_ibdev *dev = to_idev(ibdev);
1583        struct ipath_devdata *dd = dev->dd;
1584        enum ib_mtu mtu;
1585        u16 lid = dd->ipath_lid;
1586        u64 ibcstat;
1587
1588        memset(props, 0, sizeof(*props));
1589        props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
1590        props->lmc = dd->ipath_lmc;
1591        props->sm_lid = dev->sm_lid;
1592        props->sm_sl = dev->sm_sl;
1593        ibcstat = dd->ipath_lastibcstat;
1594        /* map LinkState to IB portinfo values.  */
1595        props->state = ipath_ib_linkstate(dd, ibcstat) + 1;
1596
1597        /* See phys_state_show() */
1598        props->phys_state = /* MEA: assumes shift == 0 */
1599                ipath_cvt_physportstate[dd->ipath_lastibcstat &
1600                dd->ibcs_lts_mask];
1601        props->port_cap_flags = dev->port_cap_flags;
1602        props->gid_tbl_len = 1;
1603        props->max_msg_sz = 0x80000000;
1604        props->pkey_tbl_len = ipath_get_npkeys(dd);
1605        props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
1606                dev->z_pkey_violations;
1607        props->qkey_viol_cntr = dev->qkey_violations;
1608        props->active_width = dd->ipath_link_width_active;
1609        /* See rate_show() */
1610        props->active_speed = dd->ipath_link_speed_active;
1611        props->max_vl_num = 1;          /* VLCap = VL0 */
1612        props->init_type_reply = 0;
1613
1614        props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
1615        switch (dd->ipath_ibmtu) {
1616        case 4096:
1617                mtu = IB_MTU_4096;
1618                break;
1619        case 2048:
1620                mtu = IB_MTU_2048;
1621                break;
1622        case 1024:
1623                mtu = IB_MTU_1024;
1624                break;
1625        case 512:
1626                mtu = IB_MTU_512;
1627                break;
1628        case 256:
1629                mtu = IB_MTU_256;
1630                break;
1631        default:
1632                mtu = IB_MTU_2048;
1633        }
1634        props->active_mtu = mtu;
1635        props->subnet_timeout = dev->subnet_timeout;
1636
1637        return 0;
1638}
1639
1640static int ipath_modify_device(struct ib_device *device,
1641                               int device_modify_mask,
1642                               struct ib_device_modify *device_modify)
1643{
1644        int ret;
1645
1646        if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
1647                                   IB_DEVICE_MODIFY_NODE_DESC)) {
1648                ret = -EOPNOTSUPP;
1649                goto bail;
1650        }
1651
1652        if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
1653                memcpy(device->node_desc, device_modify->node_desc, 64);
1654
1655        if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
1656                to_idev(device)->sys_image_guid =
1657                        cpu_to_be64(device_modify->sys_image_guid);
1658
1659        ret = 0;
1660
1661bail:
1662        return ret;
1663}
1664
1665static int ipath_modify_port(struct ib_device *ibdev,
1666                             u8 port, int port_modify_mask,
1667                             struct ib_port_modify *props)
1668{
1669        struct ipath_ibdev *dev = to_idev(ibdev);
1670
1671        dev->port_cap_flags |= props->set_port_cap_mask;
1672        dev->port_cap_flags &= ~props->clr_port_cap_mask;
1673        if (port_modify_mask & IB_PORT_SHUTDOWN)
1674                ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
1675        if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
1676                dev->qkey_violations = 0;
1677        return 0;
1678}
1679
1680static int ipath_query_gid(struct ib_device *ibdev, u8 port,
1681                           int index, union ib_gid *gid)
1682{
1683        struct ipath_ibdev *dev = to_idev(ibdev);
1684        int ret;
1685
1686        if (index >= 1) {
1687                ret = -EINVAL;
1688                goto bail;
1689        }
1690        gid->global.subnet_prefix = dev->gid_prefix;
1691        gid->global.interface_id = dev->dd->ipath_guid;
1692
1693        ret = 0;
1694
1695bail:
1696        return ret;
1697}
1698
1699static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
1700                                    struct ib_ucontext *context,
1701                                    struct ib_udata *udata)
1702{
1703        struct ipath_ibdev *dev = to_idev(ibdev);
1704        struct ipath_pd *pd;
1705        struct ib_pd *ret;
1706
1707        /*
1708         * This is actually totally arbitrary.  Some correctness tests
1709         * assume there's a maximum number of PDs that can be allocated.
1710         * We don't actually have this limit, but we fail the test if
1711         * we allow allocations of more than we report for this value.
1712         */
1713
1714        pd = kmalloc(sizeof *pd, GFP_KERNEL);
1715        if (!pd) {
1716                ret = ERR_PTR(-ENOMEM);
1717                goto bail;
1718        }
1719
1720        spin_lock(&dev->n_pds_lock);
1721        if (dev->n_pds_allocated == ib_ipath_max_pds) {
1722                spin_unlock(&dev->n_pds_lock);
1723                kfree(pd);
1724                ret = ERR_PTR(-ENOMEM);
1725                goto bail;
1726        }
1727
1728        dev->n_pds_allocated++;
1729        spin_unlock(&dev->n_pds_lock);
1730
1731        /* ib_alloc_pd() will initialize pd->ibpd. */
1732        pd->user = udata != NULL;
1733
1734        ret = &pd->ibpd;
1735
1736bail:
1737        return ret;
1738}
1739
1740static int ipath_dealloc_pd(struct ib_pd *ibpd)
1741{
1742        struct ipath_pd *pd = to_ipd(ibpd);
1743        struct ipath_ibdev *dev = to_idev(ibpd->device);
1744
1745        spin_lock(&dev->n_pds_lock);
1746        dev->n_pds_allocated--;
1747        spin_unlock(&dev->n_pds_lock);
1748
1749        kfree(pd);
1750
1751        return 0;
1752}
1753
1754/**
1755 * ipath_create_ah - create an address handle
1756 * @pd: the protection domain
1757 * @ah_attr: the attributes of the AH
1758 *
1759 * This may be called from interrupt context.
1760 */
1761static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
1762                                     struct ib_ah_attr *ah_attr)
1763{
1764        struct ipath_ah *ah;
1765        struct ib_ah *ret;
1766        struct ipath_ibdev *dev = to_idev(pd->device);
1767        unsigned long flags;
1768
1769        /* A multicast address requires a GRH (see ch. 8.4.1). */
1770        if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
1771            ah_attr->dlid != IPATH_PERMISSIVE_LID &&
1772            !(ah_attr->ah_flags & IB_AH_GRH)) {
1773                ret = ERR_PTR(-EINVAL);
1774                goto bail;
1775        }
1776
1777        if (ah_attr->dlid == 0) {
1778                ret = ERR_PTR(-EINVAL);
1779                goto bail;
1780        }
1781
1782        if (ah_attr->port_num < 1 ||
1783            ah_attr->port_num > pd->device->phys_port_cnt) {
1784                ret = ERR_PTR(-EINVAL);
1785                goto bail;
1786        }
1787
1788        ah = kmalloc(sizeof *ah, GFP_ATOMIC);
1789        if (!ah) {
1790                ret = ERR_PTR(-ENOMEM);
1791                goto bail;
1792        }
1793
1794        spin_lock_irqsave(&dev->n_ahs_lock, flags);
1795        if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
1796                spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1797                kfree(ah);
1798                ret = ERR_PTR(-ENOMEM);
1799                goto bail;
1800        }
1801
1802        dev->n_ahs_allocated++;
1803        spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1804
1805        /* ib_create_ah() will initialize ah->ibah. */
1806        ah->attr = *ah_attr;
1807        ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);
1808
1809        ret = &ah->ibah;
1810
1811bail:
1812        return ret;
1813}
1814
1815/**
1816 * ipath_destroy_ah - destroy an address handle
1817 * @ibah: the AH to destroy
1818 *
1819 * This may be called from interrupt context.
1820 */
1821static int ipath_destroy_ah(struct ib_ah *ibah)
1822{
1823        struct ipath_ibdev *dev = to_idev(ibah->device);
1824        struct ipath_ah *ah = to_iah(ibah);
1825        unsigned long flags;
1826
1827        spin_lock_irqsave(&dev->n_ahs_lock, flags);
1828        dev->n_ahs_allocated--;
1829        spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1830
1831        kfree(ah);
1832
1833        return 0;
1834}
1835
1836static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
1837{
1838        struct ipath_ah *ah = to_iah(ibah);
1839
1840        *ah_attr = ah->attr;
1841        ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);
1842
1843        return 0;
1844}
1845
1846/**
1847 * ipath_get_npkeys - return the size of the PKEY table for port 0
1848 * @dd: the infinipath device
1849 */
1850unsigned ipath_get_npkeys(struct ipath_devdata *dd)
1851{
1852        return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
1853}
1854
1855/**
1856 * ipath_get_pkey - return the indexed PKEY from the port PKEY table
1857 * @dd: the infinipath device
1858 * @index: the PKEY index
1859 */
1860unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
1861{
1862        unsigned ret;
1863
1864        /* always a kernel port, no locking needed */
1865        if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
1866                ret = 0;
1867        else
1868                ret = dd->ipath_pd[0]->port_pkeys[index];
1869
1870        return ret;
1871}
1872
1873static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
1874                            u16 *pkey)
1875{
1876        struct ipath_ibdev *dev = to_idev(ibdev);
1877        int ret;
1878
1879        if (index >= ipath_get_npkeys(dev->dd)) {
1880                ret = -EINVAL;
1881                goto bail;
1882        }
1883
1884        *pkey = ipath_get_pkey(dev->dd, index);
1885        ret = 0;
1886
1887bail:
1888        return ret;
1889}
1890
1891/**
1892 * ipath_alloc_ucontext - allocate a ucontest
1893 * @ibdev: the infiniband device
1894 * @udata: not used by the InfiniPath driver
1895 */
1896
1897static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
1898                                                struct ib_udata *udata)
1899{
1900        struct ipath_ucontext *context;
1901        struct ib_ucontext *ret;
1902
1903        context = kmalloc(sizeof *context, GFP_KERNEL);
1904        if (!context) {
1905                ret = ERR_PTR(-ENOMEM);
1906                goto bail;
1907        }
1908
1909        ret = &context->ibucontext;
1910
1911bail:
1912        return ret;
1913}
1914
1915static int ipath_dealloc_ucontext(struct ib_ucontext *context)
1916{
1917        kfree(to_iucontext(context));
1918        return 0;
1919}
1920
1921static int ipath_verbs_register_sysfs(struct ib_device *dev);
1922
1923static void __verbs_timer(unsigned long arg)
1924{
1925        struct ipath_devdata *dd = (struct ipath_devdata *) arg;
1926
1927        /* Handle verbs layer timeouts. */
1928        ipath_ib_timer(dd->verbs_dev);
1929
1930        mod_timer(&dd->verbs_timer, jiffies + 1);
1931}
1932
1933static int enable_timer(struct ipath_devdata *dd)
1934{
1935        /*
1936         * Early chips had a design flaw where the chip and kernel idea
1937         * of the tail register don't always agree, and therefore we won't
1938         * get an interrupt on the next packet received.
1939         * If the board supports per packet receive interrupts, use it.
1940         * Otherwise, the timer function periodically checks for packets
1941         * to cover this case.
1942         * Either way, the timer is needed for verbs layer related
1943         * processing.
1944         */
1945        if (dd->ipath_flags & IPATH_GPIO_INTR) {
1946                ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
1947                                 0x2074076542310ULL);
1948                /* Enable GPIO bit 2 interrupt */
1949                dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
1950                ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1951                                 dd->ipath_gpio_mask);
1952        }
1953
1954        init_timer(&dd->verbs_timer);
1955        dd->verbs_timer.function = __verbs_timer;
1956        dd->verbs_timer.data = (unsigned long)dd;
1957        dd->verbs_timer.expires = jiffies + 1;
1958        add_timer(&dd->verbs_timer);
1959
1960        return 0;
1961}
1962
1963static int disable_timer(struct ipath_devdata *dd)
1964{
1965        /* Disable GPIO bit 2 interrupt */
1966        if (dd->ipath_flags & IPATH_GPIO_INTR) {
1967                /* Disable GPIO bit 2 interrupt */
1968                dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
1969                ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1970                                 dd->ipath_gpio_mask);
1971                /*
1972                 * We might want to undo changes to debugportselect,
1973                 * but how?
1974                 */
1975        }
1976
1977        del_timer_sync(&dd->verbs_timer);
1978
1979        return 0;
1980}
1981
1982/**
1983 * ipath_register_ib_device - register our device with the infiniband core
1984 * @dd: the device data structure
1985 * Return the allocated ipath_ibdev pointer or NULL on error.
1986 */
1987int ipath_register_ib_device(struct ipath_devdata *dd)
1988{
1989        struct ipath_verbs_counters cntrs;
1990        struct ipath_ibdev *idev;
1991        struct ib_device *dev;
1992        struct ipath_verbs_txreq *tx;
1993        unsigned i;
1994        int ret;
1995
1996        idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
1997        if (idev == NULL) {
1998                ret = -ENOMEM;
1999                goto bail;
2000        }
2001
2002        dev = &idev->ibdev;
2003
2004        if (dd->ipath_sdma_descq_cnt) {
2005                tx = kmalloc(dd->ipath_sdma_descq_cnt * sizeof *tx,
2006                             GFP_KERNEL);
2007                if (tx == NULL) {
2008                        ret = -ENOMEM;
2009                        goto err_tx;
2010                }
2011        } else
2012                tx = NULL;
2013        idev->txreq_bufs = tx;
2014
2015        /* Only need to initialize non-zero fields. */
2016        spin_lock_init(&idev->n_pds_lock);
2017        spin_lock_init(&idev->n_ahs_lock);
2018        spin_lock_init(&idev->n_cqs_lock);
2019        spin_lock_init(&idev->n_qps_lock);
2020        spin_lock_init(&idev->n_srqs_lock);
2021        spin_lock_init(&idev->n_mcast_grps_lock);
2022
2023        spin_lock_init(&idev->qp_table.lock);
2024        spin_lock_init(&idev->lk_table.lock);
2025        idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE);
2026        /* Set the prefix to the default value (see ch. 4.1.1) */
2027        idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL);
2028
2029        ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
2030        if (ret)
2031                goto err_qp;
2032
2033        /*
2034         * The top ib_ipath_lkey_table_size bits are used to index the
2035         * table.  The lower 8 bits can be owned by the user (copied from
2036         * the LKEY).  The remaining bits act as a generation number or tag.
2037         */
2038        idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
2039        idev->lk_table.table = kzalloc(idev->lk_table.max *
2040                                       sizeof(*idev->lk_table.table),
2041                                       GFP_KERNEL);
2042        if (idev->lk_table.table == NULL) {
2043                ret = -ENOMEM;
2044                goto err_lk;
2045        }
2046        INIT_LIST_HEAD(&idev->pending_mmaps);
2047        spin_lock_init(&idev->pending_lock);
2048        idev->mmap_offset = PAGE_SIZE;
2049        spin_lock_init(&idev->mmap_offset_lock);
2050        INIT_LIST_HEAD(&idev->pending[0]);
2051        INIT_LIST_HEAD(&idev->pending[1]);
2052        INIT_LIST_HEAD(&idev->pending[2]);
2053        INIT_LIST_HEAD(&idev->piowait);
2054        INIT_LIST_HEAD(&idev->rnrwait);
2055        INIT_LIST_HEAD(&idev->txreq_free);
2056        idev->pending_index = 0;
2057        idev->port_cap_flags =
2058                IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
2059        if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY)
2060                idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
2061        idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
2062        idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
2063        idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
2064        idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
2065        idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
2066
2067        /* Snapshot current HW counters to "clear" them. */
2068        ipath_get_counters(dd, &cntrs);
2069        idev->z_symbol_error_counter = cntrs.symbol_error_counter;
2070        idev->z_link_error_recovery_counter =
2071                cntrs.link_error_recovery_counter;
2072        idev->z_link_downed_counter = cntrs.link_downed_counter;
2073        idev->z_port_rcv_errors = cntrs.port_rcv_errors;
2074        idev->z_port_rcv_remphys_errors =
2075                cntrs.port_rcv_remphys_errors;
2076        idev->z_port_xmit_discards = cntrs.port_xmit_discards;
2077        idev->z_port_xmit_data = cntrs.port_xmit_data;
2078        idev->z_port_rcv_data = cntrs.port_rcv_data;
2079        idev->z_port_xmit_packets = cntrs.port_xmit_packets;
2080        idev->z_port_rcv_packets = cntrs.port_rcv_packets;
2081        idev->z_local_link_integrity_errors =
2082                cntrs.local_link_integrity_errors;
2083        idev->z_excessive_buffer_overrun_errors =
2084                cntrs.excessive_buffer_overrun_errors;
2085        idev->z_vl15_dropped = cntrs.vl15_dropped;
2086
2087        for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)
2088                list_add(&tx->txreq.list, &idev->txreq_free);
2089
2090        /*
2091         * The system image GUID is supposed to be the same for all
2092         * IB HCAs in a single system but since there can be other
2093         * device types in the system, we can't be sure this is unique.
2094         */
2095        if (!sys_image_guid)
2096                sys_image_guid = dd->ipath_guid;
2097        idev->sys_image_guid = sys_image_guid;
2098        idev->ib_unit = dd->ipath_unit;
2099        idev->dd = dd;
2100
2101        strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
2102        dev->owner = THIS_MODULE;
2103        dev->node_guid = dd->ipath_guid;
2104        dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
2105        dev->uverbs_cmd_mask =
2106                (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
2107                (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
2108                (1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
2109                (1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
2110                (1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
2111                (1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
2112                (1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
2113                (1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
2114                (1ull << IB_USER_VERBS_CMD_REG_MR)              |
2115                (1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
2116                (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2117                (1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
2118                (1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
2119                (1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
2120                (1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
2121                (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
2122                (1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
2123                (1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
2124                (1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
2125                (1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
2126                (1ull << IB_USER_VERBS_CMD_POST_SEND)           |
2127                (1ull << IB_USER_VERBS_CMD_POST_RECV)           |
2128                (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
2129                (1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
2130                (1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
2131                (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
2132                (1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
2133                (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
2134                (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
2135        dev->node_type = RDMA_NODE_IB_CA;
2136        dev->phys_port_cnt = 1;
2137        dev->num_comp_vectors = 1;
2138        dev->dma_device = &dd->pcidev->dev;
2139        dev->query_device = ipath_query_device;
2140        dev->modify_device = ipath_modify_device;
2141        dev->query_port = ipath_query_port;
2142        dev->modify_port = ipath_modify_port;
2143        dev->query_pkey = ipath_query_pkey;
2144        dev->query_gid = ipath_query_gid;
2145        dev->alloc_ucontext = ipath_alloc_ucontext;
2146        dev->dealloc_ucontext = ipath_dealloc_ucontext;
2147        dev->alloc_pd = ipath_alloc_pd;
2148        dev->dealloc_pd = ipath_dealloc_pd;
2149        dev->create_ah = ipath_create_ah;
2150        dev->destroy_ah = ipath_destroy_ah;
2151        dev->query_ah = ipath_query_ah;
2152        dev->create_srq = ipath_create_srq;
2153        dev->modify_srq = ipath_modify_srq;
2154        dev->query_srq = ipath_query_srq;
2155        dev->destroy_srq = ipath_destroy_srq;
2156        dev->create_qp = ipath_create_qp;
2157        dev->modify_qp = ipath_modify_qp;
2158        dev->query_qp = ipath_query_qp;
2159        dev->destroy_qp = ipath_destroy_qp;
2160        dev->post_send = ipath_post_send;
2161        dev->post_recv = ipath_post_receive;
2162        dev->post_srq_recv = ipath_post_srq_receive;
2163        dev->create_cq = ipath_create_cq;
2164        dev->destroy_cq = ipath_destroy_cq;
2165        dev->resize_cq = ipath_resize_cq;
2166        dev->poll_cq = ipath_poll_cq;
2167        dev->req_notify_cq = ipath_req_notify_cq;
2168        dev->get_dma_mr = ipath_get_dma_mr;
2169        dev->reg_phys_mr = ipath_reg_phys_mr;
2170        dev->reg_user_mr = ipath_reg_user_mr;
2171        dev->dereg_mr = ipath_dereg_mr;
2172        dev->alloc_fmr = ipath_alloc_fmr;
2173        dev->map_phys_fmr = ipath_map_phys_fmr;
2174        dev->unmap_fmr = ipath_unmap_fmr;
2175        dev->dealloc_fmr = ipath_dealloc_fmr;
2176        dev->attach_mcast = ipath_multicast_attach;
2177        dev->detach_mcast = ipath_multicast_detach;
2178        dev->process_mad = ipath_process_mad;
2179        dev->mmap = ipath_mmap;
2180        dev->dma_ops = &ipath_dma_mapping_ops;
2181
2182        snprintf(dev->node_desc, sizeof(dev->node_desc),
2183                 IPATH_IDSTR " %s", init_utsname()->nodename);
2184
2185        ret = ib_register_device(dev, NULL);
2186        if (ret)
2187                goto err_reg;
2188
2189        if (ipath_verbs_register_sysfs(dev))
2190                goto err_class;
2191
2192        enable_timer(dd);
2193
2194        goto bail;
2195
2196err_class:
2197        ib_unregister_device(dev);
2198err_reg:
2199        kfree(idev->lk_table.table);
2200err_lk:
2201        kfree(idev->qp_table.table);
2202err_qp:
2203        kfree(idev->txreq_bufs);
2204err_tx:
2205        ib_dealloc_device(dev);
2206        ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
2207        idev = NULL;
2208
2209bail:
2210        dd->verbs_dev = idev;
2211        return ret;
2212}
2213
2214void ipath_unregister_ib_device(struct ipath_ibdev *dev)
2215{
2216        struct ib_device *ibdev = &dev->ibdev;
2217        u32 qps_inuse;
2218
2219        ib_unregister_device(ibdev);
2220
2221        disable_timer(dev->dd);
2222
2223        if (!list_empty(&dev->pending[0]) ||
2224            !list_empty(&dev->pending[1]) ||
2225            !list_empty(&dev->pending[2]))
2226                ipath_dev_err(dev->dd, "pending list not empty!\n");
2227        if (!list_empty(&dev->piowait))
2228                ipath_dev_err(dev->dd, "piowait list not empty!\n");
2229        if (!list_empty(&dev->rnrwait))
2230                ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
2231        if (!ipath_mcast_tree_empty())
2232                ipath_dev_err(dev->dd, "multicast table memory leak!\n");
2233        /*
2234         * Note that ipath_unregister_ib_device() can be called before all
2235         * the QPs are destroyed!
2236         */
2237        qps_inuse = ipath_free_all_qps(&dev->qp_table);
2238        if (qps_inuse)
2239                ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
2240                        qps_inuse);
2241        kfree(dev->qp_table.table);
2242        kfree(dev->lk_table.table);
2243        kfree(dev->txreq_bufs);
2244        ib_dealloc_device(ibdev);
2245}
2246
2247static ssize_t show_rev(struct device *device, struct device_attribute *attr,
2248                        char *buf)
2249{
2250        struct ipath_ibdev *dev =
2251                container_of(device, struct ipath_ibdev, ibdev.dev);
2252
2253        return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
2254}
2255
2256static ssize_t show_hca(struct device *device, struct device_attribute *attr,
2257                        char *buf)
2258{
2259        struct ipath_ibdev *dev =
2260                container_of(device, struct ipath_ibdev, ibdev.dev);
2261        int ret;
2262
2263        ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
2264        if (ret < 0)
2265                goto bail;
2266        strcat(buf, "\n");
2267        ret = strlen(buf);
2268
2269bail:
2270        return ret;
2271}
2272
2273static ssize_t show_stats(struct device *device, struct device_attribute *attr,
2274                          char *buf)
2275{
2276        struct ipath_ibdev *dev =
2277                container_of(device, struct ipath_ibdev, ibdev.dev);
2278        int i;
2279        int len;
2280
2281        len = sprintf(buf,
2282                      "RC resends  %d\n"
2283                      "RC no QACK  %d\n"
2284                      "RC ACKs     %d\n"
2285                      "RC SEQ NAKs %d\n"
2286                      "RC RDMA seq %d\n"
2287                      "RC RNR NAKs %d\n"
2288                      "RC OTH NAKs %d\n"
2289                      "RC timeouts %d\n"
2290                      "RC RDMA dup %d\n"
2291                      "piobuf wait %d\n"
2292                      "unaligned   %d\n"
2293                      "PKT drops   %d\n"
2294                      "WQE errs    %d\n",
2295                      dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
2296                      dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
2297                      dev->n_other_naks, dev->n_timeouts,
2298                      dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
2299                      dev->n_pkt_drops, dev->n_wqe_errs);
2300        for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
2301                const struct ipath_opcode_stats *si = &dev->opstats[i];
2302
2303                if (!si->n_packets && !si->n_bytes)
2304                        continue;
2305                len += sprintf(buf + len, "%02x %llu/%llu\n", i,
2306                               (unsigned long long) si->n_packets,
2307                               (unsigned long long) si->n_bytes);
2308        }
2309        return len;
2310}
2311
2312static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
2313static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
2314static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
2315static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
2316
2317static struct device_attribute *ipath_class_attributes[] = {
2318        &dev_attr_hw_rev,
2319        &dev_attr_hca_type,
2320        &dev_attr_board_id,
2321        &dev_attr_stats
2322};
2323
2324static int ipath_verbs_register_sysfs(struct ib_device *dev)
2325{
2326        int i;
2327        int ret;
2328
2329        for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
2330                if (device_create_file(&dev->dev,
2331                                       ipath_class_attributes[i])) {
2332                        ret = 1;
2333                        goto bail;
2334                }
2335
2336        ret = 0;
2337
2338bail:
2339        return ret;
2340}
2341