linux/drivers/infiniband/hw/hfi1/driver.c
<<
>>
Prefs
   1/*
   2 * Copyright(c) 2015-2017 Intel Corporation.
   3 *
   4 * This file is provided under a dual BSD/GPLv2 license.  When using or
   5 * redistributing this file, you may do so under either license.
   6 *
   7 * GPL LICENSE SUMMARY
   8 *
   9 * This program is free software; you can redistribute it and/or modify
  10 * it under the terms of version 2 of the GNU General Public License as
  11 * published by the Free Software Foundation.
  12 *
  13 * This program is distributed in the hope that it will be useful, but
  14 * WITHOUT ANY WARRANTY; without even the implied warranty of
  15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 * General Public License for more details.
  17 *
  18 * BSD LICENSE
  19 *
  20 * Redistribution and use in source and binary forms, with or without
  21 * modification, are permitted provided that the following conditions
  22 * are met:
  23 *
  24 *  - Redistributions of source code must retain the above copyright
  25 *    notice, this list of conditions and the following disclaimer.
  26 *  - Redistributions in binary form must reproduce the above copyright
  27 *    notice, this list of conditions and the following disclaimer in
  28 *    the documentation and/or other materials provided with the
  29 *    distribution.
  30 *  - Neither the name of Intel Corporation nor the names of its
  31 *    contributors may be used to endorse or promote products derived
  32 *    from this software without specific prior written permission.
  33 *
  34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45 *
  46 */
  47
  48#include <linux/spinlock.h>
  49#include <linux/pci.h>
  50#include <linux/io.h>
  51#include <linux/delay.h>
  52#include <linux/netdevice.h>
  53#include <linux/vmalloc.h>
  54#include <linux/module.h>
  55#include <linux/prefetch.h>
  56#include <rdma/ib_verbs.h>
  57
  58#include "hfi.h"
  59#include "trace.h"
  60#include "qp.h"
  61#include "sdma.h"
  62#include "debugfs.h"
  63#include "vnic.h"
  64
  65#undef pr_fmt
  66#define pr_fmt(fmt) DRIVER_NAME ": " fmt
  67
  68/*
  69 * The size has to be longer than this string, so we can append
  70 * board/chip information to it in the initialization code.
  71 */
  72const char ib_hfi1_version[] = HFI1_DRIVER_VERSION "\n";
  73
  74DEFINE_SPINLOCK(hfi1_devs_lock);
  75LIST_HEAD(hfi1_dev_list);
  76DEFINE_MUTEX(hfi1_mutex);       /* general driver use */
  77
  78unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU;
  79module_param_named(max_mtu, hfi1_max_mtu, uint, S_IRUGO);
  80MODULE_PARM_DESC(max_mtu, "Set max MTU bytes, default is " __stringify(
  81                 HFI1_DEFAULT_MAX_MTU));
  82
  83unsigned int hfi1_cu = 1;
  84module_param_named(cu, hfi1_cu, uint, S_IRUGO);
  85MODULE_PARM_DESC(cu, "Credit return units");
  86
  87unsigned long hfi1_cap_mask = HFI1_CAP_MASK_DEFAULT;
  88static int hfi1_caps_set(const char *val, const struct kernel_param *kp);
  89static int hfi1_caps_get(char *buffer, const struct kernel_param *kp);
  90static const struct kernel_param_ops cap_ops = {
  91        .set = hfi1_caps_set,
  92        .get = hfi1_caps_get
  93};
  94module_param_cb(cap_mask, &cap_ops, &hfi1_cap_mask, S_IWUSR | S_IRUGO);
  95MODULE_PARM_DESC(cap_mask, "Bit mask of enabled/disabled HW features");
  96
  97MODULE_LICENSE("Dual BSD/GPL");
  98MODULE_DESCRIPTION("Intel Omni-Path Architecture driver");
  99
 100/*
 101 * MAX_PKT_RCV is the max # if packets processed per receive interrupt.
 102 */
 103#define MAX_PKT_RECV 64
 104/*
 105 * MAX_PKT_THREAD_RCV is the max # of packets processed before
 106 * the qp_wait_list queue is flushed.
 107 */
 108#define MAX_PKT_RECV_THREAD (MAX_PKT_RECV * 4)
 109#define EGR_HEAD_UPDATE_THRESHOLD 16
 110
 111struct hfi1_ib_stats hfi1_stats;
 112
 113static int hfi1_caps_set(const char *val, const struct kernel_param *kp)
 114{
 115        int ret = 0;
 116        unsigned long *cap_mask_ptr = (unsigned long *)kp->arg,
 117                cap_mask = *cap_mask_ptr, value, diff,
 118                write_mask = ((HFI1_CAP_WRITABLE_MASK << HFI1_CAP_USER_SHIFT) |
 119                              HFI1_CAP_WRITABLE_MASK);
 120
 121        ret = kstrtoul(val, 0, &value);
 122        if (ret) {
 123                pr_warn("Invalid module parameter value for 'cap_mask'\n");
 124                goto done;
 125        }
 126        /* Get the changed bits (except the locked bit) */
 127        diff = value ^ (cap_mask & ~HFI1_CAP_LOCKED_SMASK);
 128
 129        /* Remove any bits that are not allowed to change after driver load */
 130        if (HFI1_CAP_LOCKED() && (diff & ~write_mask)) {
 131                pr_warn("Ignoring non-writable capability bits %#lx\n",
 132                        diff & ~write_mask);
 133                diff &= write_mask;
 134        }
 135
 136        /* Mask off any reserved bits */
 137        diff &= ~HFI1_CAP_RESERVED_MASK;
 138        /* Clear any previously set and changing bits */
 139        cap_mask &= ~diff;
 140        /* Update the bits with the new capability */
 141        cap_mask |= (value & diff);
 142        /* Check for any kernel/user restrictions */
 143        diff = (cap_mask & (HFI1_CAP_MUST_HAVE_KERN << HFI1_CAP_USER_SHIFT)) ^
 144                ((cap_mask & HFI1_CAP_MUST_HAVE_KERN) << HFI1_CAP_USER_SHIFT);
 145        cap_mask &= ~diff;
 146        /* Set the bitmask to the final set */
 147        *cap_mask_ptr = cap_mask;
 148done:
 149        return ret;
 150}
 151
 152static int hfi1_caps_get(char *buffer, const struct kernel_param *kp)
 153{
 154        unsigned long cap_mask = *(unsigned long *)kp->arg;
 155
 156        cap_mask &= ~HFI1_CAP_LOCKED_SMASK;
 157        cap_mask |= ((cap_mask & HFI1_CAP_K2U) << HFI1_CAP_USER_SHIFT);
 158
 159        return scnprintf(buffer, PAGE_SIZE, "0x%lx", cap_mask);
 160}
 161
 162const char *get_unit_name(int unit)
 163{
 164        static char iname[16];
 165
 166        snprintf(iname, sizeof(iname), DRIVER_NAME "_%u", unit);
 167        return iname;
 168}
 169
 170const char *get_card_name(struct rvt_dev_info *rdi)
 171{
 172        struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
 173        struct hfi1_devdata *dd = container_of(ibdev,
 174                                               struct hfi1_devdata, verbs_dev);
 175        return get_unit_name(dd->unit);
 176}
 177
 178struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi)
 179{
 180        struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
 181        struct hfi1_devdata *dd = container_of(ibdev,
 182                                               struct hfi1_devdata, verbs_dev);
 183        return dd->pcidev;
 184}
 185
 186/*
 187 * Return count of units with at least one port ACTIVE.
 188 */
 189int hfi1_count_active_units(void)
 190{
 191        struct hfi1_devdata *dd;
 192        struct hfi1_pportdata *ppd;
 193        unsigned long flags;
 194        int pidx, nunits_active = 0;
 195
 196        spin_lock_irqsave(&hfi1_devs_lock, flags);
 197        list_for_each_entry(dd, &hfi1_dev_list, list) {
 198                if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase1)
 199                        continue;
 200                for (pidx = 0; pidx < dd->num_pports; ++pidx) {
 201                        ppd = dd->pport + pidx;
 202                        if (ppd->lid && ppd->linkup) {
 203                                nunits_active++;
 204                                break;
 205                        }
 206                }
 207        }
 208        spin_unlock_irqrestore(&hfi1_devs_lock, flags);
 209        return nunits_active;
 210}
 211
 212/*
 213 * Get address of eager buffer from it's index (allocated in chunks, not
 214 * contiguous).
 215 */
 216static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf,
 217                               u8 *update)
 218{
 219        u32 idx = rhf_egr_index(rhf), offset = rhf_egr_buf_offset(rhf);
 220
 221        *update |= !(idx & (rcd->egrbufs.threshold - 1)) && !offset;
 222        return (void *)(((u64)(rcd->egrbufs.rcvtids[idx].addr)) +
 223                        (offset * RCV_BUF_BLOCK_SIZE));
 224}
 225
 226static inline void *hfi1_get_header(struct hfi1_devdata *dd,
 227                                    __le32 *rhf_addr)
 228{
 229        u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
 230
 231        return (void *)(rhf_addr - dd->rhf_offset + offset);
 232}
 233
 234static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd,
 235                                                   __le32 *rhf_addr)
 236{
 237        return (struct ib_header *)hfi1_get_header(dd, rhf_addr);
 238}
 239
 240static inline struct hfi1_16b_header
 241                *hfi1_get_16B_header(struct hfi1_devdata *dd,
 242                                     __le32 *rhf_addr)
 243{
 244        return (struct hfi1_16b_header *)hfi1_get_header(dd, rhf_addr);
 245}
 246
 247/*
 248 * Validate and encode the a given RcvArray Buffer size.
 249 * The function will check whether the given size falls within
 250 * allowed size ranges for the respective type and, optionally,
 251 * return the proper encoding.
 252 */
 253int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
 254{
 255        if (unlikely(!PAGE_ALIGNED(size)))
 256                return 0;
 257        if (unlikely(size < MIN_EAGER_BUFFER))
 258                return 0;
 259        if (size >
 260            (type == PT_EAGER ? MAX_EAGER_BUFFER : MAX_EXPECTED_BUFFER))
 261                return 0;
 262        if (encoded)
 263                *encoded = ilog2(size / PAGE_SIZE) + 1;
 264        return 1;
 265}
 266
 267static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 268                       struct hfi1_packet *packet)
 269{
 270        struct ib_header *rhdr = packet->hdr;
 271        u32 rte = rhf_rcv_type_err(packet->rhf);
 272        u32 mlid_base;
 273        struct hfi1_ibport *ibp = rcd_to_iport(rcd);
 274        struct hfi1_devdata *dd = ppd->dd;
 275        struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 276
 277        if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR))
 278                return;
 279
 280        if (packet->etype == RHF_RCV_TYPE_BYPASS) {
 281                goto drop;
 282        } else {
 283                u8 lnh = ib_get_lnh(rhdr);
 284
 285                mlid_base = be16_to_cpu(IB_MULTICAST_LID_BASE);
 286                if (lnh == HFI1_LRH_BTH) {
 287                        packet->ohdr = &rhdr->u.oth;
 288                } else if (lnh == HFI1_LRH_GRH) {
 289                        packet->ohdr = &rhdr->u.l.oth;
 290                        packet->grh = &rhdr->u.l.grh;
 291                } else {
 292                        goto drop;
 293                }
 294        }
 295
 296        if (packet->rhf & RHF_TID_ERR) {
 297                /* For TIDERR and RC QPs preemptively schedule a NAK */
 298                u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
 299                u32 dlid = ib_get_dlid(rhdr);
 300                u32 qp_num;
 301
 302                /* Sanity check packet */
 303                if (tlen < 24)
 304                        goto drop;
 305
 306                /* Check for GRH */
 307                if (packet->grh) {
 308                        u32 vtf;
 309                        struct ib_grh *grh = packet->grh;
 310
 311                        if (grh->next_hdr != IB_GRH_NEXT_HDR)
 312                                goto drop;
 313                        vtf = be32_to_cpu(grh->version_tclass_flow);
 314                        if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
 315                                goto drop;
 316                }
 317
 318                /* Get the destination QP number. */
 319                qp_num = ib_bth_get_qpn(packet->ohdr);
 320                if (dlid < mlid_base) {
 321                        struct rvt_qp *qp;
 322                        unsigned long flags;
 323
 324                        rcu_read_lock();
 325                        qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
 326                        if (!qp) {
 327                                rcu_read_unlock();
 328                                goto drop;
 329                        }
 330
 331                        /*
 332                         * Handle only RC QPs - for other QP types drop error
 333                         * packet.
 334                         */
 335                        spin_lock_irqsave(&qp->r_lock, flags);
 336
 337                        /* Check for valid receive state. */
 338                        if (!(ib_rvt_state_ops[qp->state] &
 339                              RVT_PROCESS_RECV_OK)) {
 340                                ibp->rvp.n_pkt_drops++;
 341                        }
 342
 343                        switch (qp->ibqp.qp_type) {
 344                        case IB_QPT_RC:
 345                                hfi1_rc_hdrerr(rcd, packet, qp);
 346                                break;
 347                        default:
 348                                /* For now don't handle any other QP types */
 349                                break;
 350                        }
 351
 352                        spin_unlock_irqrestore(&qp->r_lock, flags);
 353                        rcu_read_unlock();
 354                } /* Unicast QP */
 355        } /* Valid packet with TIDErr */
 356
 357        /* handle "RcvTypeErr" flags */
 358        switch (rte) {
 359        case RHF_RTE_ERROR_OP_CODE_ERR:
 360        {
 361                void *ebuf = NULL;
 362                u8 opcode;
 363
 364                if (rhf_use_egr_bfr(packet->rhf))
 365                        ebuf = packet->ebuf;
 366
 367                if (!ebuf)
 368                        goto drop; /* this should never happen */
 369
 370                opcode = ib_bth_get_opcode(packet->ohdr);
 371                if (opcode == IB_OPCODE_CNP) {
 372                        /*
 373                         * Only in pre-B0 h/w is the CNP_OPCODE handled
 374                         * via this code path.
 375                         */
 376                        struct rvt_qp *qp = NULL;
 377                        u32 lqpn, rqpn;
 378                        u16 rlid;
 379                        u8 svc_type, sl, sc5;
 380
 381                        sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf);
 382                        sl = ibp->sc_to_sl[sc5];
 383
 384                        lqpn = ib_bth_get_qpn(packet->ohdr);
 385                        rcu_read_lock();
 386                        qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn);
 387                        if (!qp) {
 388                                rcu_read_unlock();
 389                                goto drop;
 390                        }
 391
 392                        switch (qp->ibqp.qp_type) {
 393                        case IB_QPT_UD:
 394                                rlid = 0;
 395                                rqpn = 0;
 396                                svc_type = IB_CC_SVCTYPE_UD;
 397                                break;
 398                        case IB_QPT_UC:
 399                                rlid = ib_get_slid(rhdr);
 400                                rqpn = qp->remote_qpn;
 401                                svc_type = IB_CC_SVCTYPE_UC;
 402                                break;
 403                        default:
 404                                goto drop;
 405                        }
 406
 407                        process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
 408                        rcu_read_unlock();
 409                }
 410
 411                packet->rhf &= ~RHF_RCV_TYPE_ERR_SMASK;
 412                break;
 413        }
 414        default:
 415                break;
 416        }
 417
 418drop:
 419        return;
 420}
 421
 422static inline void init_packet(struct hfi1_ctxtdata *rcd,
 423                               struct hfi1_packet *packet)
 424{
 425        packet->rsize = rcd->rcvhdrqentsize; /* words */
 426        packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */
 427        packet->rcd = rcd;
 428        packet->updegr = 0;
 429        packet->etail = -1;
 430        packet->rhf_addr = get_rhf_addr(rcd);
 431        packet->rhf = rhf_to_cpu(packet->rhf_addr);
 432        packet->rhqoff = rcd->head;
 433        packet->numpkt = 0;
 434}
 435
 436void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
 437                               bool do_cnp)
 438{
 439        struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 440        struct ib_other_headers *ohdr = pkt->ohdr;
 441        struct ib_grh *grh = pkt->grh;
 442        u32 rqpn = 0, bth1;
 443        u16 pkey, rlid, dlid = ib_get_dlid(pkt->hdr);
 444        u8 hdr_type, sc, svc_type;
 445        bool is_mcast = false;
 446
 447        if (pkt->etype == RHF_RCV_TYPE_BYPASS) {
 448                is_mcast = hfi1_is_16B_mcast(dlid);
 449                pkey = hfi1_16B_get_pkey(pkt->hdr);
 450                sc = hfi1_16B_get_sc(pkt->hdr);
 451                hdr_type = HFI1_PKT_TYPE_16B;
 452        } else {
 453                is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
 454                           (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
 455                pkey = ib_bth_get_pkey(ohdr);
 456                sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf);
 457                hdr_type = HFI1_PKT_TYPE_9B;
 458        }
 459
 460        switch (qp->ibqp.qp_type) {
 461        case IB_QPT_SMI:
 462        case IB_QPT_GSI:
 463        case IB_QPT_UD:
 464                rlid = ib_get_slid(pkt->hdr);
 465                rqpn = ib_get_sqpn(pkt->ohdr);
 466                svc_type = IB_CC_SVCTYPE_UD;
 467                break;
 468        case IB_QPT_UC:
 469                rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
 470                rqpn = qp->remote_qpn;
 471                svc_type = IB_CC_SVCTYPE_UC;
 472                break;
 473        case IB_QPT_RC:
 474                rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
 475                rqpn = qp->remote_qpn;
 476                svc_type = IB_CC_SVCTYPE_RC;
 477                break;
 478        default:
 479                return;
 480        }
 481
 482        bth1 = be32_to_cpu(ohdr->bth[1]);
 483        /* Call appropriate CNP handler */
 484        if (do_cnp && (bth1 & IB_FECN_SMASK))
 485                hfi1_handle_cnp_tbl[hdr_type](ibp, qp, rqpn, pkey,
 486                                              dlid, rlid, sc, grh);
 487
 488        if (!is_mcast && (bth1 & IB_BECN_SMASK)) {
 489                struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 490                u32 lqpn = bth1 & RVT_QPN_MASK;
 491                u8 sl = ibp->sc_to_sl[sc];
 492
 493                process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
 494        }
 495
 496}
 497
 498struct ps_mdata {
 499        struct hfi1_ctxtdata *rcd;
 500        u32 rsize;
 501        u32 maxcnt;
 502        u32 ps_head;
 503        u32 ps_tail;
 504        u32 ps_seq;
 505};
 506
 507static inline void init_ps_mdata(struct ps_mdata *mdata,
 508                                 struct hfi1_packet *packet)
 509{
 510        struct hfi1_ctxtdata *rcd = packet->rcd;
 511
 512        mdata->rcd = rcd;
 513        mdata->rsize = packet->rsize;
 514        mdata->maxcnt = packet->maxcnt;
 515        mdata->ps_head = packet->rhqoff;
 516
 517        if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
 518                mdata->ps_tail = get_rcvhdrtail(rcd);
 519                if (rcd->ctxt == HFI1_CTRL_CTXT)
 520                        mdata->ps_seq = rcd->seq_cnt;
 521                else
 522                        mdata->ps_seq = 0; /* not used with DMA_RTAIL */
 523        } else {
 524                mdata->ps_tail = 0; /* used only with DMA_RTAIL*/
 525                mdata->ps_seq = rcd->seq_cnt;
 526        }
 527}
 528
 529static inline int ps_done(struct ps_mdata *mdata, u64 rhf,
 530                          struct hfi1_ctxtdata *rcd)
 531{
 532        if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
 533                return mdata->ps_head == mdata->ps_tail;
 534        return mdata->ps_seq != rhf_rcv_seq(rhf);
 535}
 536
 537static inline int ps_skip(struct ps_mdata *mdata, u64 rhf,
 538                          struct hfi1_ctxtdata *rcd)
 539{
 540        /*
 541         * Control context can potentially receive an invalid rhf.
 542         * Drop such packets.
 543         */
 544        if ((rcd->ctxt == HFI1_CTRL_CTXT) && (mdata->ps_head != mdata->ps_tail))
 545                return mdata->ps_seq != rhf_rcv_seq(rhf);
 546
 547        return 0;
 548}
 549
 550static inline void update_ps_mdata(struct ps_mdata *mdata,
 551                                   struct hfi1_ctxtdata *rcd)
 552{
 553        mdata->ps_head += mdata->rsize;
 554        if (mdata->ps_head >= mdata->maxcnt)
 555                mdata->ps_head = 0;
 556
 557        /* Control context must do seq counting */
 558        if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
 559            (rcd->ctxt == HFI1_CTRL_CTXT)) {
 560                if (++mdata->ps_seq > 13)
 561                        mdata->ps_seq = 1;
 562        }
 563}
 564
 565/*
 566 * prescan_rxq - search through the receive queue looking for packets
 567 * containing Excplicit Congestion Notifications (FECNs, or BECNs).
 568 * When an ECN is found, process the Congestion Notification, and toggle
 569 * it off.
 570 * This is declared as a macro to allow quick checking of the port to avoid
 571 * the overhead of a function call if not enabled.
 572 */
 573#define prescan_rxq(rcd, packet) \
 574        do { \
 575                if (rcd->ppd->cc_prescan) \
 576                        __prescan_rxq(packet); \
 577        } while (0)
 578static void __prescan_rxq(struct hfi1_packet *packet)
 579{
 580        struct hfi1_ctxtdata *rcd = packet->rcd;
 581        struct ps_mdata mdata;
 582
 583        init_ps_mdata(&mdata, packet);
 584
 585        while (1) {
 586                struct hfi1_devdata *dd = rcd->dd;
 587                struct hfi1_ibport *ibp = rcd_to_iport(rcd);
 588                __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
 589                                         dd->rhf_offset;
 590                struct rvt_qp *qp;
 591                struct ib_header *hdr;
 592                struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 593                u64 rhf = rhf_to_cpu(rhf_addr);
 594                u32 etype = rhf_rcv_type(rhf), qpn, bth1;
 595                int is_ecn = 0;
 596                u8 lnh;
 597
 598                if (ps_done(&mdata, rhf, rcd))
 599                        break;
 600
 601                if (ps_skip(&mdata, rhf, rcd))
 602                        goto next;
 603
 604                if (etype != RHF_RCV_TYPE_IB)
 605                        goto next;
 606
 607                packet->hdr = hfi1_get_msgheader(dd, rhf_addr);
 608                hdr = packet->hdr;
 609                lnh = ib_get_lnh(hdr);
 610
 611                if (lnh == HFI1_LRH_BTH) {
 612                        packet->ohdr = &hdr->u.oth;
 613                        packet->grh = NULL;
 614                } else if (lnh == HFI1_LRH_GRH) {
 615                        packet->ohdr = &hdr->u.l.oth;
 616                        packet->grh = &hdr->u.l.grh;
 617                } else {
 618                        goto next; /* just in case */
 619                }
 620
 621                bth1 = be32_to_cpu(packet->ohdr->bth[1]);
 622                is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK));
 623
 624                if (!is_ecn)
 625                        goto next;
 626
 627                qpn = bth1 & RVT_QPN_MASK;
 628                rcu_read_lock();
 629                qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn);
 630
 631                if (!qp) {
 632                        rcu_read_unlock();
 633                        goto next;
 634                }
 635
 636                process_ecn(qp, packet, true);
 637                rcu_read_unlock();
 638
 639                /* turn off BECN, FECN */
 640                bth1 &= ~(IB_FECN_SMASK | IB_BECN_SMASK);
 641                packet->ohdr->bth[1] = cpu_to_be32(bth1);
 642next:
 643                update_ps_mdata(&mdata, rcd);
 644        }
 645}
 646
 647static void process_rcv_qp_work(struct hfi1_ctxtdata *rcd)
 648{
 649        struct rvt_qp *qp, *nqp;
 650
 651        /*
 652         * Iterate over all QPs waiting to respond.
 653         * The list won't change since the IRQ is only run on one CPU.
 654         */
 655        list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
 656                list_del_init(&qp->rspwait);
 657                if (qp->r_flags & RVT_R_RSP_NAK) {
 658                        qp->r_flags &= ~RVT_R_RSP_NAK;
 659                        hfi1_send_rc_ack(rcd, qp, 0);
 660                }
 661                if (qp->r_flags & RVT_R_RSP_SEND) {
 662                        unsigned long flags;
 663
 664                        qp->r_flags &= ~RVT_R_RSP_SEND;
 665                        spin_lock_irqsave(&qp->s_lock, flags);
 666                        if (ib_rvt_state_ops[qp->state] &
 667                                        RVT_PROCESS_OR_FLUSH_SEND)
 668                                hfi1_schedule_send(qp);
 669                        spin_unlock_irqrestore(&qp->s_lock, flags);
 670                }
 671                rvt_put_qp(qp);
 672        }
 673}
 674
 675static noinline int max_packet_exceeded(struct hfi1_packet *packet, int thread)
 676{
 677        if (thread) {
 678                if ((packet->numpkt & (MAX_PKT_RECV_THREAD - 1)) == 0)
 679                        /* allow defered processing */
 680                        process_rcv_qp_work(packet->rcd);
 681                cond_resched();
 682                return RCV_PKT_OK;
 683        } else {
 684                this_cpu_inc(*packet->rcd->dd->rcv_limit);
 685                return RCV_PKT_LIMIT;
 686        }
 687}
 688
 689static inline int check_max_packet(struct hfi1_packet *packet, int thread)
 690{
 691        int ret = RCV_PKT_OK;
 692
 693        if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0))
 694                ret = max_packet_exceeded(packet, thread);
 695        return ret;
 696}
 697
 698static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
 699{
 700        int ret;
 701
 702        /* Set up for the next packet */
 703        packet->rhqoff += packet->rsize;
 704        if (packet->rhqoff >= packet->maxcnt)
 705                packet->rhqoff = 0;
 706
 707        packet->numpkt++;
 708        ret = check_max_packet(packet, thread);
 709
 710        packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
 711                                     packet->rcd->dd->rhf_offset;
 712        packet->rhf = rhf_to_cpu(packet->rhf_addr);
 713
 714        return ret;
 715}
 716
 717static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
 718{
 719        int ret;
 720
 721        packet->etype = rhf_rcv_type(packet->rhf);
 722
 723        /* total length */
 724        packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */
 725        /* retrieve eager buffer details */
 726        packet->ebuf = NULL;
 727        if (rhf_use_egr_bfr(packet->rhf)) {
 728                packet->etail = rhf_egr_index(packet->rhf);
 729                packet->ebuf = get_egrbuf(packet->rcd, packet->rhf,
 730                                 &packet->updegr);
 731                /*
 732                 * Prefetch the contents of the eager buffer.  It is
 733                 * OK to send a negative length to prefetch_range().
 734                 * The +2 is the size of the RHF.
 735                 */
 736                prefetch_range(packet->ebuf,
 737                               packet->tlen - ((packet->rcd->rcvhdrqentsize -
 738                                               (rhf_hdrq_offset(packet->rhf)
 739                                                + 2)) * 4));
 740        }
 741
 742        /*
 743         * Call a type specific handler for the packet. We
 744         * should be able to trust that etype won't be beyond
 745         * the range of valid indexes. If so something is really
 746         * wrong and we can probably just let things come
 747         * crashing down. There is no need to eat another
 748         * comparison in this performance critical code.
 749         */
 750        packet->rcd->dd->rhf_rcv_function_map[packet->etype](packet);
 751        packet->numpkt++;
 752
 753        /* Set up for the next packet */
 754        packet->rhqoff += packet->rsize;
 755        if (packet->rhqoff >= packet->maxcnt)
 756                packet->rhqoff = 0;
 757
 758        ret = check_max_packet(packet, thread);
 759
 760        packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
 761                                      packet->rcd->dd->rhf_offset;
 762        packet->rhf = rhf_to_cpu(packet->rhf_addr);
 763
 764        return ret;
 765}
 766
 767static inline void process_rcv_update(int last, struct hfi1_packet *packet)
 768{
 769        /*
 770         * Update head regs etc., every 16 packets, if not last pkt,
 771         * to help prevent rcvhdrq overflows, when many packets
 772         * are processed and queue is nearly full.
 773         * Don't request an interrupt for intermediate updates.
 774         */
 775        if (!last && !(packet->numpkt & 0xf)) {
 776                update_usrhead(packet->rcd, packet->rhqoff, packet->updegr,
 777                               packet->etail, 0, 0);
 778                packet->updegr = 0;
 779        }
 780        packet->grh = NULL;
 781}
 782
 783static inline void finish_packet(struct hfi1_packet *packet)
 784{
 785        /*
 786         * Nothing we need to free for the packet.
 787         *
 788         * The only thing we need to do is a final update and call for an
 789         * interrupt
 790         */
 791        update_usrhead(packet->rcd, packet->rcd->head, packet->updegr,
 792                       packet->etail, rcv_intr_dynamic, packet->numpkt);
 793}
 794
 795/*
 796 * Handle receive interrupts when using the no dma rtail option.
 797 */
 798int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
 799{
 800        u32 seq;
 801        int last = RCV_PKT_OK;
 802        struct hfi1_packet packet;
 803
 804        init_packet(rcd, &packet);
 805        seq = rhf_rcv_seq(packet.rhf);
 806        if (seq != rcd->seq_cnt) {
 807                last = RCV_PKT_DONE;
 808                goto bail;
 809        }
 810
 811        prescan_rxq(rcd, &packet);
 812
 813        while (last == RCV_PKT_OK) {
 814                last = process_rcv_packet(&packet, thread);
 815                seq = rhf_rcv_seq(packet.rhf);
 816                if (++rcd->seq_cnt > 13)
 817                        rcd->seq_cnt = 1;
 818                if (seq != rcd->seq_cnt)
 819                        last = RCV_PKT_DONE;
 820                process_rcv_update(last, &packet);
 821        }
 822        process_rcv_qp_work(rcd);
 823        rcd->head = packet.rhqoff;
 824bail:
 825        finish_packet(&packet);
 826        return last;
 827}
 828
 829int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
 830{
 831        u32 hdrqtail;
 832        int last = RCV_PKT_OK;
 833        struct hfi1_packet packet;
 834
 835        init_packet(rcd, &packet);
 836        hdrqtail = get_rcvhdrtail(rcd);
 837        if (packet.rhqoff == hdrqtail) {
 838                last = RCV_PKT_DONE;
 839                goto bail;
 840        }
 841        smp_rmb();  /* prevent speculative reads of dma'ed hdrq */
 842
 843        prescan_rxq(rcd, &packet);
 844
 845        while (last == RCV_PKT_OK) {
 846                last = process_rcv_packet(&packet, thread);
 847                if (packet.rhqoff == hdrqtail)
 848                        last = RCV_PKT_DONE;
 849                process_rcv_update(last, &packet);
 850        }
 851        process_rcv_qp_work(rcd);
 852        rcd->head = packet.rhqoff;
 853bail:
 854        finish_packet(&packet);
 855        return last;
 856}
 857
 858static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt)
 859{
 860        struct hfi1_ctxtdata *rcd;
 861        u16 i;
 862
 863        /*
 864         * For dynamically allocated kernel contexts (like vnic) switch
 865         * interrupt handler only for that context. Otherwise, switch
 866         * interrupt handler for all statically allocated kernel contexts.
 867         */
 868        if (ctxt >= dd->first_dyn_alloc_ctxt) {
 869                rcd = hfi1_rcd_get_by_index(dd, ctxt);
 870                if (rcd) {
 871                        rcd->do_interrupt =
 872                                &handle_receive_interrupt_nodma_rtail;
 873                        hfi1_rcd_put(rcd);
 874                }
 875                return;
 876        }
 877
 878        for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) {
 879                rcd = hfi1_rcd_get_by_index(dd, i);
 880                if (rcd)
 881                        rcd->do_interrupt =
 882                                &handle_receive_interrupt_nodma_rtail;
 883                hfi1_rcd_put(rcd);
 884        }
 885}
 886
 887static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt)
 888{
 889        struct hfi1_ctxtdata *rcd;
 890        u16 i;
 891
 892        /*
 893         * For dynamically allocated kernel contexts (like vnic) switch
 894         * interrupt handler only for that context. Otherwise, switch
 895         * interrupt handler for all statically allocated kernel contexts.
 896         */
 897        if (ctxt >= dd->first_dyn_alloc_ctxt) {
 898                rcd = hfi1_rcd_get_by_index(dd, ctxt);
 899                if (rcd) {
 900                        rcd->do_interrupt =
 901                                &handle_receive_interrupt_dma_rtail;
 902                        hfi1_rcd_put(rcd);
 903                }
 904                return;
 905        }
 906
 907        for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) {
 908                rcd = hfi1_rcd_get_by_index(dd, i);
 909                if (rcd)
 910                        rcd->do_interrupt =
 911                                &handle_receive_interrupt_dma_rtail;
 912                hfi1_rcd_put(rcd);
 913        }
 914}
 915
 916void set_all_slowpath(struct hfi1_devdata *dd)
 917{
 918        struct hfi1_ctxtdata *rcd;
 919        u16 i;
 920
 921        /* HFI1_CTRL_CTXT must always use the slow path interrupt handler */
 922        for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) {
 923                rcd = hfi1_rcd_get_by_index(dd, i);
 924                if (!rcd)
 925                        continue;
 926                if ((i < dd->first_dyn_alloc_ctxt) ||
 927                    (rcd->sc && (rcd->sc->type == SC_KERNEL))) {
 928                        rcd->do_interrupt = &handle_receive_interrupt;
 929                }
 930                hfi1_rcd_put(rcd);
 931        }
 932}
 933
 934static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
 935                                      struct hfi1_packet *packet,
 936                                      struct hfi1_devdata *dd)
 937{
 938        struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
 939        u8 etype = rhf_rcv_type(packet->rhf);
 940        u8 sc = SC15_PACKET;
 941
 942        if (etype == RHF_RCV_TYPE_IB) {
 943                struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
 944                                                           packet->rhf_addr);
 945                sc = hfi1_9B_get_sc5(hdr, packet->rhf);
 946        } else if (etype == RHF_RCV_TYPE_BYPASS) {
 947                struct hfi1_16b_header *hdr = hfi1_get_16B_header(
 948                                                packet->rcd->dd,
 949                                                packet->rhf_addr);
 950                sc = hfi1_16B_get_sc(hdr);
 951        }
 952        if (sc != SC15_PACKET) {
 953                int hwstate = driver_lstate(rcd->ppd);
 954
 955                if (hwstate != IB_PORT_ACTIVE) {
 956                        dd_dev_info(dd,
 957                                    "Unexpected link state %s\n",
 958                                    opa_lstate_name(hwstate));
 959                        return 0;
 960                }
 961
 962                queue_work(rcd->ppd->link_wq, lsaw);
 963                return 1;
 964        }
 965        return 0;
 966}
 967
 968/*
 969 * handle_receive_interrupt - receive a packet
 970 * @rcd: the context
 971 *
 972 * Called from interrupt handler for errors or receive interrupt.
 973 * This is the slow path interrupt handler.
 974 */
 975int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
 976{
 977        struct hfi1_devdata *dd = rcd->dd;
 978        u32 hdrqtail;
 979        int needset, last = RCV_PKT_OK;
 980        struct hfi1_packet packet;
 981        int skip_pkt = 0;
 982
 983        /* Control context will always use the slow path interrupt handler */
 984        needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1;
 985
 986        init_packet(rcd, &packet);
 987
 988        if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
 989                u32 seq = rhf_rcv_seq(packet.rhf);
 990
 991                if (seq != rcd->seq_cnt) {
 992                        last = RCV_PKT_DONE;
 993                        goto bail;
 994                }
 995                hdrqtail = 0;
 996        } else {
 997                hdrqtail = get_rcvhdrtail(rcd);
 998                if (packet.rhqoff == hdrqtail) {
 999                        last = RCV_PKT_DONE;
1000                        goto bail;
1001                }
1002                smp_rmb();  /* prevent speculative reads of dma'ed hdrq */
1003
1004                /*
1005                 * Control context can potentially receive an invalid
1006                 * rhf. Drop such packets.
1007                 */
1008                if (rcd->ctxt == HFI1_CTRL_CTXT) {
1009                        u32 seq = rhf_rcv_seq(packet.rhf);
1010
1011                        if (seq != rcd->seq_cnt)
1012                                skip_pkt = 1;
1013                }
1014        }
1015
1016        prescan_rxq(rcd, &packet);
1017
1018        while (last == RCV_PKT_OK) {
1019                if (unlikely(dd->do_drop &&
1020                             atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
1021                             DROP_PACKET_ON)) {
1022                        dd->do_drop = 0;
1023
1024                        /* On to the next packet */
1025                        packet.rhqoff += packet.rsize;
1026                        packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
1027                                          packet.rhqoff +
1028                                          dd->rhf_offset;
1029                        packet.rhf = rhf_to_cpu(packet.rhf_addr);
1030
1031                } else if (skip_pkt) {
1032                        last = skip_rcv_packet(&packet, thread);
1033                        skip_pkt = 0;
1034                } else {
1035                        /* Auto activate link on non-SC15 packet receive */
1036                        if (unlikely(rcd->ppd->host_link_state ==
1037                                     HLS_UP_ARMED) &&
1038                            set_armed_to_active(rcd, &packet, dd))
1039                                goto bail;
1040                        last = process_rcv_packet(&packet, thread);
1041                }
1042
1043                if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
1044                        u32 seq = rhf_rcv_seq(packet.rhf);
1045
1046                        if (++rcd->seq_cnt > 13)
1047                                rcd->seq_cnt = 1;
1048                        if (seq != rcd->seq_cnt)
1049                                last = RCV_PKT_DONE;
1050                        if (needset) {
1051                                dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
1052                                set_nodma_rtail(dd, rcd->ctxt);
1053                                needset = 0;
1054                        }
1055                } else {
1056                        if (packet.rhqoff == hdrqtail)
1057                                last = RCV_PKT_DONE;
1058                        /*
1059                         * Control context can potentially receive an invalid
1060                         * rhf. Drop such packets.
1061                         */
1062                        if (rcd->ctxt == HFI1_CTRL_CTXT) {
1063                                u32 seq = rhf_rcv_seq(packet.rhf);
1064
1065                                if (++rcd->seq_cnt > 13)
1066                                        rcd->seq_cnt = 1;
1067                                if (!last && (seq != rcd->seq_cnt))
1068                                        skip_pkt = 1;
1069                        }
1070
1071                        if (needset) {
1072                                dd_dev_info(dd,
1073                                            "Switching to DMA_RTAIL\n");
1074                                set_dma_rtail(dd, rcd->ctxt);
1075                                needset = 0;
1076                        }
1077                }
1078
1079                process_rcv_update(last, &packet);
1080        }
1081
1082        process_rcv_qp_work(rcd);
1083        rcd->head = packet.rhqoff;
1084
1085bail:
1086        /*
1087         * Always write head at end, and setup rcv interrupt, even
1088         * if no packets were processed.
1089         */
1090        finish_packet(&packet);
1091        return last;
1092}
1093
1094/*
1095 * We may discover in the interrupt that the hardware link state has
1096 * changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet),
1097 * and we need to update the driver's notion of the link state.  We cannot
1098 * run set_link_state from interrupt context, so we queue this function on
1099 * a workqueue.
1100 *
1101 * We delay the regular interrupt processing until after the state changes
1102 * so that the link will be in the correct state by the time any application
1103 * we wake up attempts to send a reply to any message it received.
1104 * (Subsequent receive interrupts may possibly force the wakeup before we
1105 * update the link state.)
1106 *
1107 * The rcd is freed in hfi1_free_ctxtdata after hfi1_postinit_cleanup invokes
1108 * dd->f_cleanup(dd) to disable the interrupt handler and flush workqueues,
1109 * so we're safe from use-after-free of the rcd.
1110 */
1111void receive_interrupt_work(struct work_struct *work)
1112{
1113        struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
1114                                                  linkstate_active_work);
1115        struct hfi1_devdata *dd = ppd->dd;
1116        struct hfi1_ctxtdata *rcd;
1117        u16 i;
1118
1119        /* Received non-SC15 packet implies neighbor_normal */
1120        ppd->neighbor_normal = 1;
1121        set_link_state(ppd, HLS_UP_ACTIVE);
1122
1123        /*
1124         * Interrupt all statically allocated kernel contexts that could
1125         * have had an interrupt during auto activation.
1126         */
1127        for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++) {
1128                rcd = hfi1_rcd_get_by_index(dd, i);
1129                if (rcd)
1130                        force_recv_intr(rcd);
1131                hfi1_rcd_put(rcd);
1132        }
1133}
1134
1135/*
1136 * Convert a given MTU size to the on-wire MAD packet enumeration.
1137 * Return -1 if the size is invalid.
1138 */
1139int mtu_to_enum(u32 mtu, int default_if_bad)
1140{
1141        switch (mtu) {
1142        case     0: return OPA_MTU_0;
1143        case   256: return OPA_MTU_256;
1144        case   512: return OPA_MTU_512;
1145        case  1024: return OPA_MTU_1024;
1146        case  2048: return OPA_MTU_2048;
1147        case  4096: return OPA_MTU_4096;
1148        case  8192: return OPA_MTU_8192;
1149        case 10240: return OPA_MTU_10240;
1150        }
1151        return default_if_bad;
1152}
1153
1154u16 enum_to_mtu(int mtu)
1155{
1156        switch (mtu) {
1157        case OPA_MTU_0:     return 0;
1158        case OPA_MTU_256:   return 256;
1159        case OPA_MTU_512:   return 512;
1160        case OPA_MTU_1024:  return 1024;
1161        case OPA_MTU_2048:  return 2048;
1162        case OPA_MTU_4096:  return 4096;
1163        case OPA_MTU_8192:  return 8192;
1164        case OPA_MTU_10240: return 10240;
1165        default: return 0xffff;
1166        }
1167}
1168
1169/*
1170 * set_mtu - set the MTU
1171 * @ppd: the per port data
1172 *
1173 * We can handle "any" incoming size, the issue here is whether we
1174 * need to restrict our outgoing size.  We do not deal with what happens
1175 * to programs that are already running when the size changes.
1176 */
1177int set_mtu(struct hfi1_pportdata *ppd)
1178{
1179        struct hfi1_devdata *dd = ppd->dd;
1180        int i, drain, ret = 0, is_up = 0;
1181
1182        ppd->ibmtu = 0;
1183        for (i = 0; i < ppd->vls_supported; i++)
1184                if (ppd->ibmtu < dd->vld[i].mtu)
1185                        ppd->ibmtu = dd->vld[i].mtu;
1186        ppd->ibmaxlen = ppd->ibmtu + lrh_max_header_bytes(ppd->dd);
1187
1188        mutex_lock(&ppd->hls_lock);
1189        if (ppd->host_link_state == HLS_UP_INIT ||
1190            ppd->host_link_state == HLS_UP_ARMED ||
1191            ppd->host_link_state == HLS_UP_ACTIVE)
1192                is_up = 1;
1193
1194        drain = !is_ax(dd) && is_up;
1195
1196        if (drain)
1197                /*
1198                 * MTU is specified per-VL. To ensure that no packet gets
1199                 * stuck (due, e.g., to the MTU for the packet's VL being
1200                 * reduced), empty the per-VL FIFOs before adjusting MTU.
1201                 */
1202                ret = stop_drain_data_vls(dd);
1203
1204        if (ret) {
1205                dd_dev_err(dd, "%s: cannot stop/drain VLs - refusing to change per-VL MTUs\n",
1206                           __func__);
1207                goto err;
1208        }
1209
1210        hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_MTU, 0);
1211
1212        if (drain)
1213                open_fill_data_vls(dd); /* reopen all VLs */
1214
1215err:
1216        mutex_unlock(&ppd->hls_lock);
1217
1218        return ret;
1219}
1220
1221int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc)
1222{
1223        struct hfi1_devdata *dd = ppd->dd;
1224
1225        ppd->lid = lid;
1226        ppd->lmc = lmc;
1227        hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_LIDLMC, 0);
1228
1229        dd_dev_info(dd, "port %u: got a lid: 0x%x\n", ppd->port, lid);
1230
1231        return 0;
1232}
1233
1234void shutdown_led_override(struct hfi1_pportdata *ppd)
1235{
1236        struct hfi1_devdata *dd = ppd->dd;
1237
1238        /*
1239         * This pairs with the memory barrier in hfi1_start_led_override to
1240         * ensure that we read the correct state of LED beaconing represented
1241         * by led_override_timer_active
1242         */
1243        smp_rmb();
1244        if (atomic_read(&ppd->led_override_timer_active)) {
1245                del_timer_sync(&ppd->led_override_timer);
1246                atomic_set(&ppd->led_override_timer_active, 0);
1247                /* Ensure the atomic_set is visible to all CPUs */
1248                smp_wmb();
1249        }
1250
1251        /* Hand control of the LED to the DC for normal operation */
1252        write_csr(dd, DCC_CFG_LED_CNTRL, 0);
1253}
1254
1255static void run_led_override(unsigned long opaque)
1256{
1257        struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)opaque;
1258        struct hfi1_devdata *dd = ppd->dd;
1259        unsigned long timeout;
1260        int phase_idx;
1261
1262        if (!(dd->flags & HFI1_INITTED))
1263                return;
1264
1265        phase_idx = ppd->led_override_phase & 1;
1266
1267        setextled(dd, phase_idx);
1268
1269        timeout = ppd->led_override_vals[phase_idx];
1270
1271        /* Set up for next phase */
1272        ppd->led_override_phase = !ppd->led_override_phase;
1273
1274        mod_timer(&ppd->led_override_timer, jiffies + timeout);
1275}
1276
1277/*
1278 * To have the LED blink in a particular pattern, provide timeon and timeoff
1279 * in milliseconds.
1280 * To turn off custom blinking and return to normal operation, use
1281 * shutdown_led_override()
1282 */
1283void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
1284                             unsigned int timeoff)
1285{
1286        if (!(ppd->dd->flags & HFI1_INITTED))
1287                return;
1288
1289        /* Convert to jiffies for direct use in timer */
1290        ppd->led_override_vals[0] = msecs_to_jiffies(timeoff);
1291        ppd->led_override_vals[1] = msecs_to_jiffies(timeon);
1292
1293        /* Arbitrarily start from LED on phase */
1294        ppd->led_override_phase = 1;
1295
1296        /*
1297         * If the timer has not already been started, do so. Use a "quick"
1298         * timeout so the handler will be called soon to look at our request.
1299         */
1300        if (!timer_pending(&ppd->led_override_timer)) {
1301                setup_timer(&ppd->led_override_timer, run_led_override,
1302                            (unsigned long)ppd);
1303                ppd->led_override_timer.expires = jiffies + 1;
1304                add_timer(&ppd->led_override_timer);
1305                atomic_set(&ppd->led_override_timer_active, 1);
1306                /* Ensure the atomic_set is visible to all CPUs */
1307                smp_wmb();
1308        }
1309}
1310
1311/**
1312 * hfi1_reset_device - reset the chip if possible
1313 * @unit: the device to reset
1314 *
1315 * Whether or not reset is successful, we attempt to re-initialize the chip
1316 * (that is, much like a driver unload/reload).  We clear the INITTED flag
1317 * so that the various entry points will fail until we reinitialize.  For
1318 * now, we only allow this if no user contexts are open that use chip resources
1319 */
1320int hfi1_reset_device(int unit)
1321{
1322        int ret;
1323        struct hfi1_devdata *dd = hfi1_lookup(unit);
1324        struct hfi1_pportdata *ppd;
1325        int pidx;
1326
1327        if (!dd) {
1328                ret = -ENODEV;
1329                goto bail;
1330        }
1331
1332        dd_dev_info(dd, "Reset on unit %u requested\n", unit);
1333
1334        if (!dd->kregbase1 || !(dd->flags & HFI1_PRESENT)) {
1335                dd_dev_info(dd,
1336                            "Invalid unit number %u or not initialized or not present\n",
1337                            unit);
1338                ret = -ENXIO;
1339                goto bail;
1340        }
1341
1342        /* If there are any user/vnic contexts, we cannot reset */
1343        mutex_lock(&hfi1_mutex);
1344        if (dd->rcd)
1345                if (hfi1_stats.sps_ctxts) {
1346                        mutex_unlock(&hfi1_mutex);
1347                        ret = -EBUSY;
1348                        goto bail;
1349                }
1350        mutex_unlock(&hfi1_mutex);
1351
1352        for (pidx = 0; pidx < dd->num_pports; ++pidx) {
1353                ppd = dd->pport + pidx;
1354
1355                shutdown_led_override(ppd);
1356        }
1357        if (dd->flags & HFI1_HAS_SEND_DMA)
1358                sdma_exit(dd);
1359
1360        hfi1_reset_cpu_counters(dd);
1361
1362        ret = hfi1_init(dd, 1);
1363
1364        if (ret)
1365                dd_dev_err(dd,
1366                           "Reinitialize unit %u after reset failed with %d\n",
1367                           unit, ret);
1368        else
1369                dd_dev_info(dd, "Reinitialized unit %u after resetting\n",
1370                            unit);
1371
1372bail:
1373        return ret;
1374}
1375
1376static inline void hfi1_setup_ib_header(struct hfi1_packet *packet)
1377{
1378        packet->hdr = (struct hfi1_ib_message_header *)
1379                        hfi1_get_msgheader(packet->rcd->dd,
1380                                           packet->rhf_addr);
1381        packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
1382}
1383
1384static int hfi1_bypass_ingress_pkt_check(struct hfi1_packet *packet)
1385{
1386        struct hfi1_pportdata *ppd = packet->rcd->ppd;
1387
1388        /* slid and dlid cannot be 0 */
1389        if ((!packet->slid) || (!packet->dlid))
1390                return -EINVAL;
1391
1392        /* Compare port lid with incoming packet dlid */
1393        if ((!(hfi1_is_16B_mcast(packet->dlid))) &&
1394            (packet->dlid !=
1395                opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))) {
1396                if (packet->dlid != ppd->lid)
1397                        return -EINVAL;
1398        }
1399
1400        /* No multicast packets with SC15 */
1401        if ((hfi1_is_16B_mcast(packet->dlid)) && (packet->sc == 0xF))
1402                return -EINVAL;
1403
1404        /* Packets with permissive DLID always on SC15 */
1405        if ((packet->dlid == opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE),
1406                                         16B)) &&
1407            (packet->sc != 0xF))
1408                return -EINVAL;
1409
1410        return 0;
1411}
1412
1413static int hfi1_setup_9B_packet(struct hfi1_packet *packet)
1414{
1415        struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
1416        struct ib_header *hdr;
1417        u8 lnh;
1418
1419        hfi1_setup_ib_header(packet);
1420        hdr = packet->hdr;
1421
1422        lnh = ib_get_lnh(hdr);
1423        if (lnh == HFI1_LRH_BTH) {
1424                packet->ohdr = &hdr->u.oth;
1425                packet->grh = NULL;
1426        } else if (lnh == HFI1_LRH_GRH) {
1427                u32 vtf;
1428
1429                packet->ohdr = &hdr->u.l.oth;
1430                packet->grh = &hdr->u.l.grh;
1431                if (packet->grh->next_hdr != IB_GRH_NEXT_HDR)
1432                        goto drop;
1433                vtf = be32_to_cpu(packet->grh->version_tclass_flow);
1434                if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
1435                        goto drop;
1436        } else {
1437                goto drop;
1438        }
1439
1440        /* Query commonly used fields from packet header */
1441        packet->payload = packet->ebuf;
1442        packet->opcode = ib_bth_get_opcode(packet->ohdr);
1443        packet->slid = ib_get_slid(hdr);
1444        packet->dlid = ib_get_dlid(hdr);
1445        if (unlikely((packet->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
1446                     (packet->dlid != be16_to_cpu(IB_LID_PERMISSIVE))))
1447                packet->dlid += opa_get_mcast_base(OPA_MCAST_NR) -
1448                                be16_to_cpu(IB_MULTICAST_LID_BASE);
1449        packet->sl = ib_get_sl(hdr);
1450        packet->sc = hfi1_9B_get_sc5(hdr, packet->rhf);
1451        packet->pad = ib_bth_get_pad(packet->ohdr);
1452        packet->extra_byte = 0;
1453        packet->fecn = ib_bth_get_fecn(packet->ohdr);
1454        packet->becn = ib_bth_get_becn(packet->ohdr);
1455
1456        return 0;
1457drop:
1458        ibp->rvp.n_pkt_drops++;
1459        return -EINVAL;
1460}
1461
1462static int hfi1_setup_bypass_packet(struct hfi1_packet *packet)
1463{
1464        /*
1465         * Bypass packets have a different header/payload split
1466         * compared to an IB packet.
1467         * Current split is set such that 16 bytes of the actual
1468         * header is in the header buffer and the remining is in
1469         * the eager buffer. We chose 16 since hfi1 driver only
1470         * supports 16B bypass packets and we will be able to
1471         * receive the entire LRH with such a split.
1472         */
1473
1474        struct hfi1_ctxtdata *rcd = packet->rcd;
1475        struct hfi1_pportdata *ppd = rcd->ppd;
1476        struct hfi1_ibport *ibp = &ppd->ibport_data;
1477        u8 l4;
1478        u8 grh_len;
1479
1480        packet->hdr = (struct hfi1_16b_header *)
1481                        hfi1_get_16B_header(packet->rcd->dd,
1482                                            packet->rhf_addr);
1483        packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
1484
1485        l4 = hfi1_16B_get_l4(packet->hdr);
1486        if (l4 == OPA_16B_L4_IB_LOCAL) {
1487                grh_len = 0;
1488                packet->ohdr = packet->ebuf;
1489                packet->grh = NULL;
1490        } else if (l4 == OPA_16B_L4_IB_GLOBAL) {
1491                u32 vtf;
1492
1493                grh_len = sizeof(struct ib_grh);
1494                packet->ohdr = packet->ebuf + grh_len;
1495                packet->grh = packet->ebuf;
1496                if (packet->grh->next_hdr != IB_GRH_NEXT_HDR)
1497                        goto drop;
1498                vtf = be32_to_cpu(packet->grh->version_tclass_flow);
1499                if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
1500                        goto drop;
1501        } else {
1502                goto drop;
1503        }
1504
1505        /* Query commonly used fields from packet header */
1506        packet->opcode = ib_bth_get_opcode(packet->ohdr);
1507        packet->hlen = hdr_len_by_opcode[packet->opcode] + 8 + grh_len;
1508        packet->payload = packet->ebuf + packet->hlen - (4 * sizeof(u32));
1509        packet->slid = hfi1_16B_get_slid(packet->hdr);
1510        packet->dlid = hfi1_16B_get_dlid(packet->hdr);
1511        if (unlikely(hfi1_is_16B_mcast(packet->dlid)))
1512                packet->dlid += opa_get_mcast_base(OPA_MCAST_NR) -
1513                                opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR),
1514                                            16B);
1515        packet->sc = hfi1_16B_get_sc(packet->hdr);
1516        packet->sl = ibp->sc_to_sl[packet->sc];
1517        packet->pad = hfi1_16B_bth_get_pad(packet->ohdr);
1518        packet->extra_byte = SIZE_OF_LT;
1519        packet->fecn = hfi1_16B_get_fecn(packet->hdr);
1520        packet->becn = hfi1_16B_get_becn(packet->hdr);
1521
1522        if (hfi1_bypass_ingress_pkt_check(packet))
1523                goto drop;
1524
1525        return 0;
1526drop:
1527        hfi1_cdbg(PKT, "%s: packet dropped\n", __func__);
1528        ibp->rvp.n_pkt_drops++;
1529        return -EINVAL;
1530}
1531
1532void handle_eflags(struct hfi1_packet *packet)
1533{
1534        struct hfi1_ctxtdata *rcd = packet->rcd;
1535        u32 rte = rhf_rcv_type_err(packet->rhf);
1536
1537        rcv_hdrerr(rcd, rcd->ppd, packet);
1538        if (rhf_err_flags(packet->rhf))
1539                dd_dev_err(rcd->dd,
1540                           "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n",
1541                           rcd->ctxt, packet->rhf,
1542                           packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "",
1543                           packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "",
1544                           packet->rhf & RHF_DC_ERR ? "dc " : "",
1545                           packet->rhf & RHF_TID_ERR ? "tid " : "",
1546                           packet->rhf & RHF_LEN_ERR ? "len " : "",
1547                           packet->rhf & RHF_ECC_ERR ? "ecc " : "",
1548                           packet->rhf & RHF_VCRC_ERR ? "vcrc " : "",
1549                           packet->rhf & RHF_ICRC_ERR ? "icrc " : "",
1550                           rte);
1551}
1552
1553/*
1554 * The following functions are called by the interrupt handler. They are type
1555 * specific handlers for each packet type.
1556 */
1557int process_receive_ib(struct hfi1_packet *packet)
1558{
1559        if (unlikely(hfi1_dbg_fault_packet(packet)))
1560                return RHF_RCV_CONTINUE;
1561
1562        if (hfi1_setup_9B_packet(packet))
1563                return RHF_RCV_CONTINUE;
1564
1565        trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
1566                          packet->rcd->ctxt,
1567                          rhf_err_flags(packet->rhf),
1568                          RHF_RCV_TYPE_IB,
1569                          packet->hlen,
1570                          packet->tlen,
1571                          packet->updegr,
1572                          rhf_egr_index(packet->rhf));
1573
1574        if (unlikely(
1575                 (hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
1576                 (packet->rhf & RHF_DC_ERR))))
1577                return RHF_RCV_CONTINUE;
1578
1579        if (unlikely(rhf_err_flags(packet->rhf))) {
1580                handle_eflags(packet);
1581                return RHF_RCV_CONTINUE;
1582        }
1583
1584        hfi1_ib_rcv(packet);
1585        return RHF_RCV_CONTINUE;
1586}
1587
1588static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
1589{
1590        /* Packet received in VNIC context via RSM */
1591        if (packet->rcd->is_vnic)
1592                return true;
1593
1594        if ((hfi1_16B_get_l2(packet->ebuf) == OPA_16B_L2_TYPE) &&
1595            (hfi1_16B_get_l4(packet->ebuf) == OPA_16B_L4_ETHR))
1596                return true;
1597
1598        return false;
1599}
1600
1601int process_receive_bypass(struct hfi1_packet *packet)
1602{
1603        struct hfi1_devdata *dd = packet->rcd->dd;
1604
1605        if (hfi1_is_vnic_packet(packet)) {
1606                hfi1_vnic_bypass_rcv(packet);
1607                return RHF_RCV_CONTINUE;
1608        }
1609
1610        if (hfi1_setup_bypass_packet(packet))
1611                return RHF_RCV_CONTINUE;
1612
1613        if (unlikely(rhf_err_flags(packet->rhf))) {
1614                handle_eflags(packet);
1615                return RHF_RCV_CONTINUE;
1616        }
1617
1618        if (hfi1_16B_get_l2(packet->hdr) == 0x2) {
1619                hfi1_16B_rcv(packet);
1620        } else {
1621                dd_dev_err(dd,
1622                           "Bypass packets other than 16B are not supported in normal operation. Dropping\n");
1623                incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
1624                if (!(dd->err_info_rcvport.status_and_code &
1625                      OPA_EI_STATUS_SMASK)) {
1626                        u64 *flits = packet->ebuf;
1627
1628                        if (flits && !(packet->rhf & RHF_LEN_ERR)) {
1629                                dd->err_info_rcvport.packet_flit1 = flits[0];
1630                                dd->err_info_rcvport.packet_flit2 =
1631                                        packet->tlen > sizeof(flits[0]) ?
1632                                        flits[1] : 0;
1633                        }
1634                        dd->err_info_rcvport.status_and_code |=
1635                                (OPA_EI_STATUS_SMASK | BAD_L2_ERR);
1636                }
1637        }
1638        return RHF_RCV_CONTINUE;
1639}
1640
1641int process_receive_error(struct hfi1_packet *packet)
1642{
1643        /* KHdrHCRCErr -- KDETH packet with a bad HCRC */
1644        if (unlikely(
1645                 hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
1646                 rhf_rcv_type_err(packet->rhf) == 3))
1647                return RHF_RCV_CONTINUE;
1648
1649        hfi1_setup_ib_header(packet);
1650        handle_eflags(packet);
1651
1652        if (unlikely(rhf_err_flags(packet->rhf)))
1653                dd_dev_err(packet->rcd->dd,
1654                           "Unhandled error packet received. Dropping.\n");
1655
1656        return RHF_RCV_CONTINUE;
1657}
1658
1659int kdeth_process_expected(struct hfi1_packet *packet)
1660{
1661        if (unlikely(hfi1_dbg_fault_packet(packet)))
1662                return RHF_RCV_CONTINUE;
1663
1664        hfi1_setup_ib_header(packet);
1665        if (unlikely(rhf_err_flags(packet->rhf)))
1666                handle_eflags(packet);
1667
1668        dd_dev_err(packet->rcd->dd,
1669                   "Unhandled expected packet received. Dropping.\n");
1670        return RHF_RCV_CONTINUE;
1671}
1672
1673int kdeth_process_eager(struct hfi1_packet *packet)
1674{
1675        hfi1_setup_ib_header(packet);
1676        if (unlikely(rhf_err_flags(packet->rhf)))
1677                handle_eflags(packet);
1678        if (unlikely(hfi1_dbg_fault_packet(packet)))
1679                return RHF_RCV_CONTINUE;
1680
1681        dd_dev_err(packet->rcd->dd,
1682                   "Unhandled eager packet received. Dropping.\n");
1683        return RHF_RCV_CONTINUE;
1684}
1685
1686int process_receive_invalid(struct hfi1_packet *packet)
1687{
1688        dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n",
1689                   rhf_rcv_type(packet->rhf));
1690        return RHF_RCV_CONTINUE;
1691}
1692
1693void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd)
1694{
1695        struct hfi1_packet packet;
1696        struct ps_mdata mdata;
1697
1698        seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s head %llu tail %llu\n",
1699                   rcd->ctxt, rcd->rcvhdrq_cnt, rcd->rcvhdrqentsize,
1700                   HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ?
1701                   "dma_rtail" : "nodma_rtail",
1702                   read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD) &
1703                   RCV_HDR_HEAD_HEAD_MASK,
1704                   read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL));
1705
1706        init_packet(rcd, &packet);
1707        init_ps_mdata(&mdata, &packet);
1708
1709        while (1) {
1710                struct hfi1_devdata *dd = rcd->dd;
1711                __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
1712                                         dd->rhf_offset;
1713                struct ib_header *hdr;
1714                u64 rhf = rhf_to_cpu(rhf_addr);
1715                u32 etype = rhf_rcv_type(rhf), qpn;
1716                u8 opcode;
1717                u32 psn;
1718                u8 lnh;
1719
1720                if (ps_done(&mdata, rhf, rcd))
1721                        break;
1722
1723                if (ps_skip(&mdata, rhf, rcd))
1724                        goto next;
1725
1726                if (etype > RHF_RCV_TYPE_IB)
1727                        goto next;
1728
1729                packet.hdr = hfi1_get_msgheader(dd, rhf_addr);
1730                hdr = packet.hdr;
1731
1732                lnh = be16_to_cpu(hdr->lrh[0]) & 3;
1733
1734                if (lnh == HFI1_LRH_BTH)
1735                        packet.ohdr = &hdr->u.oth;
1736                else if (lnh == HFI1_LRH_GRH)
1737                        packet.ohdr = &hdr->u.l.oth;
1738                else
1739                        goto next; /* just in case */
1740
1741                opcode = (be32_to_cpu(packet.ohdr->bth[0]) >> 24);
1742                qpn = be32_to_cpu(packet.ohdr->bth[1]) & RVT_QPN_MASK;
1743                psn = mask_psn(be32_to_cpu(packet.ohdr->bth[2]));
1744
1745                seq_printf(s, "\tEnt %u: opcode 0x%x, qpn 0x%x, psn 0x%x\n",
1746                           mdata.ps_head, opcode, qpn, psn);
1747next:
1748                update_ps_mdata(&mdata, rcd);
1749        }
1750}
1751