linux/drivers/staging/rdma/hfi1/chip.c
<<
>>
Prefs
   1/*
   2 *
   3 * This file is provided under a dual BSD/GPLv2 license.  When using or
   4 * redistributing this file, you may do so under either license.
   5 *
   6 * GPL LICENSE SUMMARY
   7 *
   8 * Copyright(c) 2015 Intel Corporation.
   9 *
  10 * This program is free software; you can redistribute it and/or modify
  11 * it under the terms of version 2 of the GNU General Public License as
  12 * published by the Free Software Foundation.
  13 *
  14 * This program is distributed in the hope that it will be useful, but
  15 * WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 * General Public License for more details.
  18 *
  19 * BSD LICENSE
  20 *
  21 * Copyright(c) 2015 Intel Corporation.
  22 *
  23 * Redistribution and use in source and binary forms, with or without
  24 * modification, are permitted provided that the following conditions
  25 * are met:
  26 *
  27 *  - Redistributions of source code must retain the above copyright
  28 *    notice, this list of conditions and the following disclaimer.
  29 *  - Redistributions in binary form must reproduce the above copyright
  30 *    notice, this list of conditions and the following disclaimer in
  31 *    the documentation and/or other materials provided with the
  32 *    distribution.
  33 *  - Neither the name of Intel Corporation nor the names of its
  34 *    contributors may be used to endorse or promote products derived
  35 *    from this software without specific prior written permission.
  36 *
  37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  48 *
  49 */
  50
  51/*
  52 * This file contains all of the code that is specific to the HFI chip
  53 */
  54
  55#include <linux/pci.h>
  56#include <linux/delay.h>
  57#include <linux/interrupt.h>
  58#include <linux/module.h>
  59
  60#include "hfi.h"
  61#include "trace.h"
  62#include "mad.h"
  63#include "pio.h"
  64#include "sdma.h"
  65#include "eprom.h"
  66
  67#define NUM_IB_PORTS 1
  68
  69uint kdeth_qp;
  70module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
  71MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
  72
  73uint num_vls = HFI1_MAX_VLS_SUPPORTED;
  74module_param(num_vls, uint, S_IRUGO);
  75MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)");
  76
  77/*
  78 * Default time to aggregate two 10K packets from the idle state
  79 * (timer not running). The timer starts at the end of the first packet,
  80 * so only the time for one 10K packet and header plus a bit extra is needed.
  81 * 10 * 1024 + 64 header byte = 10304 byte
  82 * 10304 byte / 12.5 GB/s = 824.32ns
  83 */
  84uint rcv_intr_timeout = (824 + 16); /* 16 is for coalescing interrupt */
  85module_param(rcv_intr_timeout, uint, S_IRUGO);
  86MODULE_PARM_DESC(rcv_intr_timeout, "Receive interrupt mitigation timeout in ns");
  87
  88uint rcv_intr_count = 16; /* same as qib */
  89module_param(rcv_intr_count, uint, S_IRUGO);
  90MODULE_PARM_DESC(rcv_intr_count, "Receive interrupt mitigation count");
  91
  92ushort link_crc_mask = SUPPORTED_CRCS;
  93module_param(link_crc_mask, ushort, S_IRUGO);
  94MODULE_PARM_DESC(link_crc_mask, "CRCs to use on the link");
  95
  96uint loopback;
  97module_param_named(loopback, loopback, uint, S_IRUGO);
  98MODULE_PARM_DESC(loopback, "Put into loopback mode (1 = serdes, 3 = external cable");
  99
 100/* Other driver tunables */
 101uint rcv_intr_dynamic = 1; /* enable dynamic mode for rcv int mitigation*/
 102static ushort crc_14b_sideband = 1;
 103static uint use_flr = 1;
 104uint quick_linkup; /* skip LNI */
 105
 106struct flag_table {
 107        u64 flag;       /* the flag */
 108        char *str;      /* description string */
 109        u16 extra;      /* extra information */
 110        u16 unused0;
 111        u32 unused1;
 112};
 113
 114/* str must be a string constant */
 115#define FLAG_ENTRY(str, extra, flag) {flag, str, extra}
 116#define FLAG_ENTRY0(str, flag) {flag, str, 0}
 117
 118/* Send Error Consequences */
 119#define SEC_WRITE_DROPPED       0x1
 120#define SEC_PACKET_DROPPED      0x2
 121#define SEC_SC_HALTED           0x4     /* per-context only */
 122#define SEC_SPC_FREEZE          0x8     /* per-HFI only */
 123
 124#define VL15CTXT                  1
 125#define MIN_KERNEL_KCTXTS         2
 126#define NUM_MAP_REGS             32
 127
 128/* Bit offset into the GUID which carries HFI id information */
 129#define GUID_HFI_INDEX_SHIFT     39
 130
 131/* extract the emulation revision */
 132#define emulator_rev(dd) ((dd)->irev >> 8)
 133/* parallel and serial emulation versions are 3 and 4 respectively */
 134#define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3)
 135#define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4)
 136
 137/* RSM fields */
 138
 139/* packet type */
 140#define IB_PACKET_TYPE         2ull
 141#define QW_SHIFT               6ull
 142/* QPN[7..1] */
 143#define QPN_WIDTH              7ull
 144
 145/* LRH.BTH: QW 0, OFFSET 48 - for match */
 146#define LRH_BTH_QW             0ull
 147#define LRH_BTH_BIT_OFFSET     48ull
 148#define LRH_BTH_OFFSET(off)    ((LRH_BTH_QW << QW_SHIFT) | (off))
 149#define LRH_BTH_MATCH_OFFSET   LRH_BTH_OFFSET(LRH_BTH_BIT_OFFSET)
 150#define LRH_BTH_SELECT
 151#define LRH_BTH_MASK           3ull
 152#define LRH_BTH_VALUE          2ull
 153
 154/* LRH.SC[3..0] QW 0, OFFSET 56 - for match */
 155#define LRH_SC_QW              0ull
 156#define LRH_SC_BIT_OFFSET      56ull
 157#define LRH_SC_OFFSET(off)     ((LRH_SC_QW << QW_SHIFT) | (off))
 158#define LRH_SC_MATCH_OFFSET    LRH_SC_OFFSET(LRH_SC_BIT_OFFSET)
 159#define LRH_SC_MASK            128ull
 160#define LRH_SC_VALUE           0ull
 161
 162/* SC[n..0] QW 0, OFFSET 60 - for select */
 163#define LRH_SC_SELECT_OFFSET  ((LRH_SC_QW << QW_SHIFT) | (60ull))
 164
 165/* QPN[m+n:1] QW 1, OFFSET 1 */
 166#define QPN_SELECT_OFFSET      ((1ull << QW_SHIFT) | (1ull))
 167
 168/* defines to build power on SC2VL table */
 169#define SC2VL_VAL( \
 170        num, \
 171        sc0, sc0val, \
 172        sc1, sc1val, \
 173        sc2, sc2val, \
 174        sc3, sc3val, \
 175        sc4, sc4val, \
 176        sc5, sc5val, \
 177        sc6, sc6val, \
 178        sc7, sc7val) \
 179( \
 180        ((u64)(sc0val) << SEND_SC2VLT##num##_SC##sc0##_SHIFT) | \
 181        ((u64)(sc1val) << SEND_SC2VLT##num##_SC##sc1##_SHIFT) | \
 182        ((u64)(sc2val) << SEND_SC2VLT##num##_SC##sc2##_SHIFT) | \
 183        ((u64)(sc3val) << SEND_SC2VLT##num##_SC##sc3##_SHIFT) | \
 184        ((u64)(sc4val) << SEND_SC2VLT##num##_SC##sc4##_SHIFT) | \
 185        ((u64)(sc5val) << SEND_SC2VLT##num##_SC##sc5##_SHIFT) | \
 186        ((u64)(sc6val) << SEND_SC2VLT##num##_SC##sc6##_SHIFT) | \
 187        ((u64)(sc7val) << SEND_SC2VLT##num##_SC##sc7##_SHIFT)   \
 188)
 189
 190#define DC_SC_VL_VAL( \
 191        range, \
 192        e0, e0val, \
 193        e1, e1val, \
 194        e2, e2val, \
 195        e3, e3val, \
 196        e4, e4val, \
 197        e5, e5val, \
 198        e6, e6val, \
 199        e7, e7val, \
 200        e8, e8val, \
 201        e9, e9val, \
 202        e10, e10val, \
 203        e11, e11val, \
 204        e12, e12val, \
 205        e13, e13val, \
 206        e14, e14val, \
 207        e15, e15val) \
 208( \
 209        ((u64)(e0val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e0##_SHIFT) | \
 210        ((u64)(e1val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e1##_SHIFT) | \
 211        ((u64)(e2val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e2##_SHIFT) | \
 212        ((u64)(e3val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e3##_SHIFT) | \
 213        ((u64)(e4val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e4##_SHIFT) | \
 214        ((u64)(e5val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e5##_SHIFT) | \
 215        ((u64)(e6val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e6##_SHIFT) | \
 216        ((u64)(e7val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e7##_SHIFT) | \
 217        ((u64)(e8val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e8##_SHIFT) | \
 218        ((u64)(e9val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e9##_SHIFT) | \
 219        ((u64)(e10val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e10##_SHIFT) | \
 220        ((u64)(e11val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e11##_SHIFT) | \
 221        ((u64)(e12val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e12##_SHIFT) | \
 222        ((u64)(e13val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e13##_SHIFT) | \
 223        ((u64)(e14val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e14##_SHIFT) | \
 224        ((u64)(e15val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e15##_SHIFT) \
 225)
 226
 227/* all CceStatus sub-block freeze bits */
 228#define ALL_FROZE (CCE_STATUS_SDMA_FROZE_SMASK \
 229                        | CCE_STATUS_RXE_FROZE_SMASK \
 230                        | CCE_STATUS_TXE_FROZE_SMASK \
 231                        | CCE_STATUS_TXE_PIO_FROZE_SMASK)
 232/* all CceStatus sub-block TXE pause bits */
 233#define ALL_TXE_PAUSE (CCE_STATUS_TXE_PIO_PAUSED_SMASK \
 234                        | CCE_STATUS_TXE_PAUSED_SMASK \
 235                        | CCE_STATUS_SDMA_PAUSED_SMASK)
 236/* all CceStatus sub-block RXE pause bits */
 237#define ALL_RXE_PAUSE CCE_STATUS_RXE_PAUSED_SMASK
 238
 239/*
 240 * CCE Error flags.
 241 */
 242static struct flag_table cce_err_status_flags[] = {
 243/* 0*/  FLAG_ENTRY0("CceCsrParityErr",
 244                CCE_ERR_STATUS_CCE_CSR_PARITY_ERR_SMASK),
 245/* 1*/  FLAG_ENTRY0("CceCsrReadBadAddrErr",
 246                CCE_ERR_STATUS_CCE_CSR_READ_BAD_ADDR_ERR_SMASK),
 247/* 2*/  FLAG_ENTRY0("CceCsrWriteBadAddrErr",
 248                CCE_ERR_STATUS_CCE_CSR_WRITE_BAD_ADDR_ERR_SMASK),
 249/* 3*/  FLAG_ENTRY0("CceTrgtAsyncFifoParityErr",
 250                CCE_ERR_STATUS_CCE_TRGT_ASYNC_FIFO_PARITY_ERR_SMASK),
 251/* 4*/  FLAG_ENTRY0("CceTrgtAccessErr",
 252                CCE_ERR_STATUS_CCE_TRGT_ACCESS_ERR_SMASK),
 253/* 5*/  FLAG_ENTRY0("CceRspdDataParityErr",
 254                CCE_ERR_STATUS_CCE_RSPD_DATA_PARITY_ERR_SMASK),
 255/* 6*/  FLAG_ENTRY0("CceCli0AsyncFifoParityErr",
 256                CCE_ERR_STATUS_CCE_CLI0_ASYNC_FIFO_PARITY_ERR_SMASK),
 257/* 7*/  FLAG_ENTRY0("CceCsrCfgBusParityErr",
 258                CCE_ERR_STATUS_CCE_CSR_CFG_BUS_PARITY_ERR_SMASK),
 259/* 8*/  FLAG_ENTRY0("CceCli2AsyncFifoParityErr",
 260                CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK),
 261/* 9*/  FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
 262            CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_PIO_CRDT_PARITY_ERR_SMASK),
 263/*10*/  FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
 264            CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_SDMA_HD_PARITY_ERR_SMASK),
 265/*11*/  FLAG_ENTRY0("CceCli1AsyncFifoRxdmaParityError",
 266            CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_RXDMA_PARITY_ERROR_SMASK),
 267/*12*/  FLAG_ENTRY0("CceCli1AsyncFifoDbgParityError",
 268                CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_DBG_PARITY_ERROR_SMASK),
 269/*13*/  FLAG_ENTRY0("PcicRetryMemCorErr",
 270                CCE_ERR_STATUS_PCIC_RETRY_MEM_COR_ERR_SMASK),
 271/*14*/  FLAG_ENTRY0("PcicRetryMemCorErr",
 272                CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_COR_ERR_SMASK),
 273/*15*/  FLAG_ENTRY0("PcicPostHdQCorErr",
 274                CCE_ERR_STATUS_PCIC_POST_HD_QCOR_ERR_SMASK),
 275/*16*/  FLAG_ENTRY0("PcicPostHdQCorErr",
 276                CCE_ERR_STATUS_PCIC_POST_DAT_QCOR_ERR_SMASK),
 277/*17*/  FLAG_ENTRY0("PcicPostHdQCorErr",
 278                CCE_ERR_STATUS_PCIC_CPL_HD_QCOR_ERR_SMASK),
 279/*18*/  FLAG_ENTRY0("PcicCplDatQCorErr",
 280                CCE_ERR_STATUS_PCIC_CPL_DAT_QCOR_ERR_SMASK),
 281/*19*/  FLAG_ENTRY0("PcicNPostHQParityErr",
 282                CCE_ERR_STATUS_PCIC_NPOST_HQ_PARITY_ERR_SMASK),
 283/*20*/  FLAG_ENTRY0("PcicNPostDatQParityErr",
 284                CCE_ERR_STATUS_PCIC_NPOST_DAT_QPARITY_ERR_SMASK),
 285/*21*/  FLAG_ENTRY0("PcicRetryMemUncErr",
 286                CCE_ERR_STATUS_PCIC_RETRY_MEM_UNC_ERR_SMASK),
 287/*22*/  FLAG_ENTRY0("PcicRetrySotMemUncErr",
 288                CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_UNC_ERR_SMASK),
 289/*23*/  FLAG_ENTRY0("PcicPostHdQUncErr",
 290                CCE_ERR_STATUS_PCIC_POST_HD_QUNC_ERR_SMASK),
 291/*24*/  FLAG_ENTRY0("PcicPostDatQUncErr",
 292                CCE_ERR_STATUS_PCIC_POST_DAT_QUNC_ERR_SMASK),
 293/*25*/  FLAG_ENTRY0("PcicCplHdQUncErr",
 294                CCE_ERR_STATUS_PCIC_CPL_HD_QUNC_ERR_SMASK),
 295/*26*/  FLAG_ENTRY0("PcicCplDatQUncErr",
 296                CCE_ERR_STATUS_PCIC_CPL_DAT_QUNC_ERR_SMASK),
 297/*27*/  FLAG_ENTRY0("PcicTransmitFrontParityErr",
 298                CCE_ERR_STATUS_PCIC_TRANSMIT_FRONT_PARITY_ERR_SMASK),
 299/*28*/  FLAG_ENTRY0("PcicTransmitBackParityErr",
 300                CCE_ERR_STATUS_PCIC_TRANSMIT_BACK_PARITY_ERR_SMASK),
 301/*29*/  FLAG_ENTRY0("PcicReceiveParityErr",
 302                CCE_ERR_STATUS_PCIC_RECEIVE_PARITY_ERR_SMASK),
 303/*30*/  FLAG_ENTRY0("CceTrgtCplTimeoutErr",
 304                CCE_ERR_STATUS_CCE_TRGT_CPL_TIMEOUT_ERR_SMASK),
 305/*31*/  FLAG_ENTRY0("LATriggered",
 306                CCE_ERR_STATUS_LA_TRIGGERED_SMASK),
 307/*32*/  FLAG_ENTRY0("CceSegReadBadAddrErr",
 308                CCE_ERR_STATUS_CCE_SEG_READ_BAD_ADDR_ERR_SMASK),
 309/*33*/  FLAG_ENTRY0("CceSegWriteBadAddrErr",
 310                CCE_ERR_STATUS_CCE_SEG_WRITE_BAD_ADDR_ERR_SMASK),
 311/*34*/  FLAG_ENTRY0("CceRcplAsyncFifoParityErr",
 312                CCE_ERR_STATUS_CCE_RCPL_ASYNC_FIFO_PARITY_ERR_SMASK),
 313/*35*/  FLAG_ENTRY0("CceRxdmaConvFifoParityErr",
 314                CCE_ERR_STATUS_CCE_RXDMA_CONV_FIFO_PARITY_ERR_SMASK),
 315/*36*/  FLAG_ENTRY0("CceMsixTableCorErr",
 316                CCE_ERR_STATUS_CCE_MSIX_TABLE_COR_ERR_SMASK),
 317/*37*/  FLAG_ENTRY0("CceMsixTableUncErr",
 318                CCE_ERR_STATUS_CCE_MSIX_TABLE_UNC_ERR_SMASK),
 319/*38*/  FLAG_ENTRY0("CceIntMapCorErr",
 320                CCE_ERR_STATUS_CCE_INT_MAP_COR_ERR_SMASK),
 321/*39*/  FLAG_ENTRY0("CceIntMapUncErr",
 322                CCE_ERR_STATUS_CCE_INT_MAP_UNC_ERR_SMASK),
 323/*40*/  FLAG_ENTRY0("CceMsixCsrParityErr",
 324                CCE_ERR_STATUS_CCE_MSIX_CSR_PARITY_ERR_SMASK),
 325/*41-63 reserved*/
 326};
 327
 328/*
 329 * Misc Error flags
 330 */
 331#define MES(text) MISC_ERR_STATUS_MISC_##text##_ERR_SMASK
 332static struct flag_table misc_err_status_flags[] = {
 333/* 0*/  FLAG_ENTRY0("CSR_PARITY", MES(CSR_PARITY)),
 334/* 1*/  FLAG_ENTRY0("CSR_READ_BAD_ADDR", MES(CSR_READ_BAD_ADDR)),
 335/* 2*/  FLAG_ENTRY0("CSR_WRITE_BAD_ADDR", MES(CSR_WRITE_BAD_ADDR)),
 336/* 3*/  FLAG_ENTRY0("SBUS_WRITE_FAILED", MES(SBUS_WRITE_FAILED)),
 337/* 4*/  FLAG_ENTRY0("KEY_MISMATCH", MES(KEY_MISMATCH)),
 338/* 5*/  FLAG_ENTRY0("FW_AUTH_FAILED", MES(FW_AUTH_FAILED)),
 339/* 6*/  FLAG_ENTRY0("EFUSE_CSR_PARITY", MES(EFUSE_CSR_PARITY)),
 340/* 7*/  FLAG_ENTRY0("EFUSE_READ_BAD_ADDR", MES(EFUSE_READ_BAD_ADDR)),
 341/* 8*/  FLAG_ENTRY0("EFUSE_WRITE", MES(EFUSE_WRITE)),
 342/* 9*/  FLAG_ENTRY0("EFUSE_DONE_PARITY", MES(EFUSE_DONE_PARITY)),
 343/*10*/  FLAG_ENTRY0("INVALID_EEP_CMD", MES(INVALID_EEP_CMD)),
 344/*11*/  FLAG_ENTRY0("MBIST_FAIL", MES(MBIST_FAIL)),
 345/*12*/  FLAG_ENTRY0("PLL_LOCK_FAIL", MES(PLL_LOCK_FAIL))
 346};
 347
 348/*
 349 * TXE PIO Error flags and consequences
 350 */
 351static struct flag_table pio_err_status_flags[] = {
 352/* 0*/  FLAG_ENTRY("PioWriteBadCtxt",
 353        SEC_WRITE_DROPPED,
 354        SEND_PIO_ERR_STATUS_PIO_WRITE_BAD_CTXT_ERR_SMASK),
 355/* 1*/  FLAG_ENTRY("PioWriteAddrParity",
 356        SEC_SPC_FREEZE,
 357        SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK),
 358/* 2*/  FLAG_ENTRY("PioCsrParity",
 359        SEC_SPC_FREEZE,
 360        SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK),
 361/* 3*/  FLAG_ENTRY("PioSbMemFifo0",
 362        SEC_SPC_FREEZE,
 363        SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK),
 364/* 4*/  FLAG_ENTRY("PioSbMemFifo1",
 365        SEC_SPC_FREEZE,
 366        SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK),
 367/* 5*/  FLAG_ENTRY("PioPccFifoParity",
 368        SEC_SPC_FREEZE,
 369        SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK),
 370/* 6*/  FLAG_ENTRY("PioPecFifoParity",
 371        SEC_SPC_FREEZE,
 372        SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK),
 373/* 7*/  FLAG_ENTRY("PioSbrdctlCrrelParity",
 374        SEC_SPC_FREEZE,
 375        SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK),
 376/* 8*/  FLAG_ENTRY("PioSbrdctrlCrrelFifoParity",
 377        SEC_SPC_FREEZE,
 378        SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK),
 379/* 9*/  FLAG_ENTRY("PioPktEvictFifoParityErr",
 380        SEC_SPC_FREEZE,
 381        SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK),
 382/*10*/  FLAG_ENTRY("PioSmPktResetParity",
 383        SEC_SPC_FREEZE,
 384        SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK),
 385/*11*/  FLAG_ENTRY("PioVlLenMemBank0Unc",
 386        SEC_SPC_FREEZE,
 387        SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK),
 388/*12*/  FLAG_ENTRY("PioVlLenMemBank1Unc",
 389        SEC_SPC_FREEZE,
 390        SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK),
 391/*13*/  FLAG_ENTRY("PioVlLenMemBank0Cor",
 392        0,
 393        SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_COR_ERR_SMASK),
 394/*14*/  FLAG_ENTRY("PioVlLenMemBank1Cor",
 395        0,
 396        SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_COR_ERR_SMASK),
 397/*15*/  FLAG_ENTRY("PioCreditRetFifoParity",
 398        SEC_SPC_FREEZE,
 399        SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK),
 400/*16*/  FLAG_ENTRY("PioPpmcPblFifo",
 401        SEC_SPC_FREEZE,
 402        SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK),
 403/*17*/  FLAG_ENTRY("PioInitSmIn",
 404        0,
 405        SEND_PIO_ERR_STATUS_PIO_INIT_SM_IN_ERR_SMASK),
 406/*18*/  FLAG_ENTRY("PioPktEvictSmOrArbSm",
 407        SEC_SPC_FREEZE,
 408        SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK),
 409/*19*/  FLAG_ENTRY("PioHostAddrMemUnc",
 410        SEC_SPC_FREEZE,
 411        SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK),
 412/*20*/  FLAG_ENTRY("PioHostAddrMemCor",
 413        0,
 414        SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_COR_ERR_SMASK),
 415/*21*/  FLAG_ENTRY("PioWriteDataParity",
 416        SEC_SPC_FREEZE,
 417        SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK),
 418/*22*/  FLAG_ENTRY("PioStateMachine",
 419        SEC_SPC_FREEZE,
 420        SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK),
 421/*23*/  FLAG_ENTRY("PioWriteQwValidParity",
 422        SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
 423        SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK),
 424/*24*/  FLAG_ENTRY("PioBlockQwCountParity",
 425        SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
 426        SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK),
 427/*25*/  FLAG_ENTRY("PioVlfVlLenParity",
 428        SEC_SPC_FREEZE,
 429        SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK),
 430/*26*/  FLAG_ENTRY("PioVlfSopParity",
 431        SEC_SPC_FREEZE,
 432        SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK),
 433/*27*/  FLAG_ENTRY("PioVlFifoParity",
 434        SEC_SPC_FREEZE,
 435        SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK),
 436/*28*/  FLAG_ENTRY("PioPpmcBqcMemParity",
 437        SEC_SPC_FREEZE,
 438        SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK),
 439/*29*/  FLAG_ENTRY("PioPpmcSopLen",
 440        SEC_SPC_FREEZE,
 441        SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK),
 442/*30-31 reserved*/
 443/*32*/  FLAG_ENTRY("PioCurrentFreeCntParity",
 444        SEC_SPC_FREEZE,
 445        SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK),
 446/*33*/  FLAG_ENTRY("PioLastReturnedCntParity",
 447        SEC_SPC_FREEZE,
 448        SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK),
 449/*34*/  FLAG_ENTRY("PioPccSopHeadParity",
 450        SEC_SPC_FREEZE,
 451        SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK),
 452/*35*/  FLAG_ENTRY("PioPecSopHeadParityErr",
 453        SEC_SPC_FREEZE,
 454        SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK),
 455/*36-63 reserved*/
 456};
 457
 458/* TXE PIO errors that cause an SPC freeze */
 459#define ALL_PIO_FREEZE_ERR \
 460        (SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK \
 461        | SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK \
 462        | SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK \
 463        | SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK \
 464        | SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK \
 465        | SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK \
 466        | SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK \
 467        | SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK \
 468        | SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK \
 469        | SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK \
 470        | SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK \
 471        | SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK \
 472        | SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK \
 473        | SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK \
 474        | SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK \
 475        | SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK \
 476        | SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK \
 477        | SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK \
 478        | SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK \
 479        | SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK \
 480        | SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK \
 481        | SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK \
 482        | SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK \
 483        | SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK \
 484        | SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK \
 485        | SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK \
 486        | SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK \
 487        | SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK \
 488        | SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK)
 489
 490/*
 491 * TXE SDMA Error flags
 492 */
 493static struct flag_table sdma_err_status_flags[] = {
 494/* 0*/  FLAG_ENTRY0("SDmaRpyTagErr",
 495                SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK),
 496/* 1*/  FLAG_ENTRY0("SDmaCsrParityErr",
 497                SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK),
 498/* 2*/  FLAG_ENTRY0("SDmaPcieReqTrackingUncErr",
 499                SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK),
 500/* 3*/  FLAG_ENTRY0("SDmaPcieReqTrackingCorErr",
 501                SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_COR_ERR_SMASK),
 502/*04-63 reserved*/
 503};
 504
 505/* TXE SDMA errors that cause an SPC freeze */
 506#define ALL_SDMA_FREEZE_ERR  \
 507                (SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK \
 508                | SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK \
 509                | SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK)
 510
 511/*
 512 * TXE Egress Error flags
 513 */
 514#define SEES(text) SEND_EGRESS_ERR_STATUS_##text##_ERR_SMASK
 515static struct flag_table egress_err_status_flags[] = {
 516/* 0*/  FLAG_ENTRY0("TxPktIntegrityMemCorErr", SEES(TX_PKT_INTEGRITY_MEM_COR)),
 517/* 1*/  FLAG_ENTRY0("TxPktIntegrityMemUncErr", SEES(TX_PKT_INTEGRITY_MEM_UNC)),
 518/* 2 reserved */
 519/* 3*/  FLAG_ENTRY0("TxEgressFifoUnderrunOrParityErr",
 520                SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY)),
 521/* 4*/  FLAG_ENTRY0("TxLinkdownErr", SEES(TX_LINKDOWN)),
 522/* 5*/  FLAG_ENTRY0("TxIncorrectLinkStateErr", SEES(TX_INCORRECT_LINK_STATE)),
 523/* 6 reserved */
 524/* 7*/  FLAG_ENTRY0("TxPioLaunchIntfParityErr",
 525                SEES(TX_PIO_LAUNCH_INTF_PARITY)),
 526/* 8*/  FLAG_ENTRY0("TxSdmaLaunchIntfParityErr",
 527                SEES(TX_SDMA_LAUNCH_INTF_PARITY)),
 528/* 9-10 reserved */
 529/*11*/  FLAG_ENTRY0("TxSbrdCtlStateMachineParityErr",
 530                SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY)),
 531/*12*/  FLAG_ENTRY0("TxIllegalVLErr", SEES(TX_ILLEGAL_VL)),
 532/*13*/  FLAG_ENTRY0("TxLaunchCsrParityErr", SEES(TX_LAUNCH_CSR_PARITY)),
 533/*14*/  FLAG_ENTRY0("TxSbrdCtlCsrParityErr", SEES(TX_SBRD_CTL_CSR_PARITY)),
 534/*15*/  FLAG_ENTRY0("TxConfigParityErr", SEES(TX_CONFIG_PARITY)),
 535/*16*/  FLAG_ENTRY0("TxSdma0DisallowedPacketErr",
 536                SEES(TX_SDMA0_DISALLOWED_PACKET)),
 537/*17*/  FLAG_ENTRY0("TxSdma1DisallowedPacketErr",
 538                SEES(TX_SDMA1_DISALLOWED_PACKET)),
 539/*18*/  FLAG_ENTRY0("TxSdma2DisallowedPacketErr",
 540                SEES(TX_SDMA2_DISALLOWED_PACKET)),
 541/*19*/  FLAG_ENTRY0("TxSdma3DisallowedPacketErr",
 542                SEES(TX_SDMA3_DISALLOWED_PACKET)),
 543/*20*/  FLAG_ENTRY0("TxSdma4DisallowedPacketErr",
 544                SEES(TX_SDMA4_DISALLOWED_PACKET)),
 545/*21*/  FLAG_ENTRY0("TxSdma5DisallowedPacketErr",
 546                SEES(TX_SDMA5_DISALLOWED_PACKET)),
 547/*22*/  FLAG_ENTRY0("TxSdma6DisallowedPacketErr",
 548                SEES(TX_SDMA6_DISALLOWED_PACKET)),
 549/*23*/  FLAG_ENTRY0("TxSdma7DisallowedPacketErr",
 550                SEES(TX_SDMA7_DISALLOWED_PACKET)),
 551/*24*/  FLAG_ENTRY0("TxSdma8DisallowedPacketErr",
 552                SEES(TX_SDMA8_DISALLOWED_PACKET)),
 553/*25*/  FLAG_ENTRY0("TxSdma9DisallowedPacketErr",
 554                SEES(TX_SDMA9_DISALLOWED_PACKET)),
 555/*26*/  FLAG_ENTRY0("TxSdma10DisallowedPacketErr",
 556                SEES(TX_SDMA10_DISALLOWED_PACKET)),
 557/*27*/  FLAG_ENTRY0("TxSdma11DisallowedPacketErr",
 558                SEES(TX_SDMA11_DISALLOWED_PACKET)),
 559/*28*/  FLAG_ENTRY0("TxSdma12DisallowedPacketErr",
 560                SEES(TX_SDMA12_DISALLOWED_PACKET)),
 561/*29*/  FLAG_ENTRY0("TxSdma13DisallowedPacketErr",
 562                SEES(TX_SDMA13_DISALLOWED_PACKET)),
 563/*30*/  FLAG_ENTRY0("TxSdma14DisallowedPacketErr",
 564                SEES(TX_SDMA14_DISALLOWED_PACKET)),
 565/*31*/  FLAG_ENTRY0("TxSdma15DisallowedPacketErr",
 566                SEES(TX_SDMA15_DISALLOWED_PACKET)),
 567/*32*/  FLAG_ENTRY0("TxLaunchFifo0UncOrParityErr",
 568                SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY)),
 569/*33*/  FLAG_ENTRY0("TxLaunchFifo1UncOrParityErr",
 570                SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY)),
 571/*34*/  FLAG_ENTRY0("TxLaunchFifo2UncOrParityErr",
 572                SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY)),
 573/*35*/  FLAG_ENTRY0("TxLaunchFifo3UncOrParityErr",
 574                SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY)),
 575/*36*/  FLAG_ENTRY0("TxLaunchFifo4UncOrParityErr",
 576                SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY)),
 577/*37*/  FLAG_ENTRY0("TxLaunchFifo5UncOrParityErr",
 578                SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY)),
 579/*38*/  FLAG_ENTRY0("TxLaunchFifo6UncOrParityErr",
 580                SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY)),
 581/*39*/  FLAG_ENTRY0("TxLaunchFifo7UncOrParityErr",
 582                SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY)),
 583/*40*/  FLAG_ENTRY0("TxLaunchFifo8UncOrParityErr",
 584                SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY)),
 585/*41*/  FLAG_ENTRY0("TxCreditReturnParityErr", SEES(TX_CREDIT_RETURN_PARITY)),
 586/*42*/  FLAG_ENTRY0("TxSbHdrUncErr", SEES(TX_SB_HDR_UNC)),
 587/*43*/  FLAG_ENTRY0("TxReadSdmaMemoryUncErr", SEES(TX_READ_SDMA_MEMORY_UNC)),
 588/*44*/  FLAG_ENTRY0("TxReadPioMemoryUncErr", SEES(TX_READ_PIO_MEMORY_UNC)),
 589/*45*/  FLAG_ENTRY0("TxEgressFifoUncErr", SEES(TX_EGRESS_FIFO_UNC)),
 590/*46*/  FLAG_ENTRY0("TxHcrcInsertionErr", SEES(TX_HCRC_INSERTION)),
 591/*47*/  FLAG_ENTRY0("TxCreditReturnVLErr", SEES(TX_CREDIT_RETURN_VL)),
 592/*48*/  FLAG_ENTRY0("TxLaunchFifo0CorErr", SEES(TX_LAUNCH_FIFO0_COR)),
 593/*49*/  FLAG_ENTRY0("TxLaunchFifo1CorErr", SEES(TX_LAUNCH_FIFO1_COR)),
 594/*50*/  FLAG_ENTRY0("TxLaunchFifo2CorErr", SEES(TX_LAUNCH_FIFO2_COR)),
 595/*51*/  FLAG_ENTRY0("TxLaunchFifo3CorErr", SEES(TX_LAUNCH_FIFO3_COR)),
 596/*52*/  FLAG_ENTRY0("TxLaunchFifo4CorErr", SEES(TX_LAUNCH_FIFO4_COR)),
 597/*53*/  FLAG_ENTRY0("TxLaunchFifo5CorErr", SEES(TX_LAUNCH_FIFO5_COR)),
 598/*54*/  FLAG_ENTRY0("TxLaunchFifo6CorErr", SEES(TX_LAUNCH_FIFO6_COR)),
 599/*55*/  FLAG_ENTRY0("TxLaunchFifo7CorErr", SEES(TX_LAUNCH_FIFO7_COR)),
 600/*56*/  FLAG_ENTRY0("TxLaunchFifo8CorErr", SEES(TX_LAUNCH_FIFO8_COR)),
 601/*57*/  FLAG_ENTRY0("TxCreditOverrunErr", SEES(TX_CREDIT_OVERRUN)),
 602/*58*/  FLAG_ENTRY0("TxSbHdrCorErr", SEES(TX_SB_HDR_COR)),
 603/*59*/  FLAG_ENTRY0("TxReadSdmaMemoryCorErr", SEES(TX_READ_SDMA_MEMORY_COR)),
 604/*60*/  FLAG_ENTRY0("TxReadPioMemoryCorErr", SEES(TX_READ_PIO_MEMORY_COR)),
 605/*61*/  FLAG_ENTRY0("TxEgressFifoCorErr", SEES(TX_EGRESS_FIFO_COR)),
 606/*62*/  FLAG_ENTRY0("TxReadSdmaMemoryCsrUncErr",
 607                SEES(TX_READ_SDMA_MEMORY_CSR_UNC)),
 608/*63*/  FLAG_ENTRY0("TxReadPioMemoryCsrUncErr",
 609                SEES(TX_READ_PIO_MEMORY_CSR_UNC)),
 610};
 611
 612/*
 613 * TXE Egress Error Info flags
 614 */
 615#define SEEI(text) SEND_EGRESS_ERR_INFO_##text##_ERR_SMASK
 616static struct flag_table egress_err_info_flags[] = {
 617/* 0*/  FLAG_ENTRY0("Reserved", 0ull),
 618/* 1*/  FLAG_ENTRY0("VLErr", SEEI(VL)),
 619/* 2*/  FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
 620/* 3*/  FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
 621/* 4*/  FLAG_ENTRY0("PartitionKeyErr", SEEI(PARTITION_KEY)),
 622/* 5*/  FLAG_ENTRY0("SLIDErr", SEEI(SLID)),
 623/* 6*/  FLAG_ENTRY0("OpcodeErr", SEEI(OPCODE)),
 624/* 7*/  FLAG_ENTRY0("VLMappingErr", SEEI(VL_MAPPING)),
 625/* 8*/  FLAG_ENTRY0("RawErr", SEEI(RAW)),
 626/* 9*/  FLAG_ENTRY0("RawIPv6Err", SEEI(RAW_IPV6)),
 627/*10*/  FLAG_ENTRY0("GRHErr", SEEI(GRH)),
 628/*11*/  FLAG_ENTRY0("BypassErr", SEEI(BYPASS)),
 629/*12*/  FLAG_ENTRY0("KDETHPacketsErr", SEEI(KDETH_PACKETS)),
 630/*13*/  FLAG_ENTRY0("NonKDETHPacketsErr", SEEI(NON_KDETH_PACKETS)),
 631/*14*/  FLAG_ENTRY0("TooSmallIBPacketsErr", SEEI(TOO_SMALL_IB_PACKETS)),
 632/*15*/  FLAG_ENTRY0("TooSmallBypassPacketsErr", SEEI(TOO_SMALL_BYPASS_PACKETS)),
 633/*16*/  FLAG_ENTRY0("PbcTestErr", SEEI(PBC_TEST)),
 634/*17*/  FLAG_ENTRY0("BadPktLenErr", SEEI(BAD_PKT_LEN)),
 635/*18*/  FLAG_ENTRY0("TooLongIBPacketErr", SEEI(TOO_LONG_IB_PACKET)),
 636/*19*/  FLAG_ENTRY0("TooLongBypassPacketsErr", SEEI(TOO_LONG_BYPASS_PACKETS)),
 637/*20*/  FLAG_ENTRY0("PbcStaticRateControlErr", SEEI(PBC_STATIC_RATE_CONTROL)),
 638/*21*/  FLAG_ENTRY0("BypassBadPktLenErr", SEEI(BAD_PKT_LEN)),
 639};
 640
 641/* TXE Egress errors that cause an SPC freeze */
 642#define ALL_TXE_EGRESS_FREEZE_ERR \
 643        (SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY) \
 644        | SEES(TX_PIO_LAUNCH_INTF_PARITY) \
 645        | SEES(TX_SDMA_LAUNCH_INTF_PARITY) \
 646        | SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY) \
 647        | SEES(TX_LAUNCH_CSR_PARITY) \
 648        | SEES(TX_SBRD_CTL_CSR_PARITY) \
 649        | SEES(TX_CONFIG_PARITY) \
 650        | SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY) \
 651        | SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY) \
 652        | SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY) \
 653        | SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY) \
 654        | SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY) \
 655        | SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY) \
 656        | SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY) \
 657        | SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY) \
 658        | SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY) \
 659        | SEES(TX_CREDIT_RETURN_PARITY))
 660
 661/*
 662 * TXE Send error flags
 663 */
 664#define SES(name) SEND_ERR_STATUS_SEND_##name##_ERR_SMASK
 665static struct flag_table send_err_status_flags[] = {
 666/* 0*/  FLAG_ENTRY0("SDmaRpyTagErr", SES(CSR_PARITY)),
 667/* 1*/  FLAG_ENTRY0("SendCsrReadBadAddrErr", SES(CSR_READ_BAD_ADDR)),
 668/* 2*/  FLAG_ENTRY0("SendCsrWriteBadAddrErr", SES(CSR_WRITE_BAD_ADDR))
 669};
 670
 671/*
 672 * TXE Send Context Error flags and consequences
 673 */
 674static struct flag_table sc_err_status_flags[] = {
 675/* 0*/  FLAG_ENTRY("InconsistentSop",
 676                SEC_PACKET_DROPPED | SEC_SC_HALTED,
 677                SEND_CTXT_ERR_STATUS_PIO_INCONSISTENT_SOP_ERR_SMASK),
 678/* 1*/  FLAG_ENTRY("DisallowedPacket",
 679                SEC_PACKET_DROPPED | SEC_SC_HALTED,
 680                SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK),
 681/* 2*/  FLAG_ENTRY("WriteCrossesBoundary",
 682                SEC_WRITE_DROPPED | SEC_SC_HALTED,
 683                SEND_CTXT_ERR_STATUS_PIO_WRITE_CROSSES_BOUNDARY_ERR_SMASK),
 684/* 3*/  FLAG_ENTRY("WriteOverflow",
 685                SEC_WRITE_DROPPED | SEC_SC_HALTED,
 686                SEND_CTXT_ERR_STATUS_PIO_WRITE_OVERFLOW_ERR_SMASK),
 687/* 4*/  FLAG_ENTRY("WriteOutOfBounds",
 688                SEC_WRITE_DROPPED | SEC_SC_HALTED,
 689                SEND_CTXT_ERR_STATUS_PIO_WRITE_OUT_OF_BOUNDS_ERR_SMASK),
 690/* 5-63 reserved*/
 691};
 692
 693/*
 694 * RXE Receive Error flags
 695 */
 696#define RXES(name) RCV_ERR_STATUS_RX_##name##_ERR_SMASK
 697static struct flag_table rxe_err_status_flags[] = {
 698/* 0*/  FLAG_ENTRY0("RxDmaCsrCorErr", RXES(DMA_CSR_COR)),
 699/* 1*/  FLAG_ENTRY0("RxDcIntfParityErr", RXES(DC_INTF_PARITY)),
 700/* 2*/  FLAG_ENTRY0("RxRcvHdrUncErr", RXES(RCV_HDR_UNC)),
 701/* 3*/  FLAG_ENTRY0("RxRcvHdrCorErr", RXES(RCV_HDR_COR)),
 702/* 4*/  FLAG_ENTRY0("RxRcvDataUncErr", RXES(RCV_DATA_UNC)),
 703/* 5*/  FLAG_ENTRY0("RxRcvDataCorErr", RXES(RCV_DATA_COR)),
 704/* 6*/  FLAG_ENTRY0("RxRcvQpMapTableUncErr", RXES(RCV_QP_MAP_TABLE_UNC)),
 705/* 7*/  FLAG_ENTRY0("RxRcvQpMapTableCorErr", RXES(RCV_QP_MAP_TABLE_COR)),
 706/* 8*/  FLAG_ENTRY0("RxRcvCsrParityErr", RXES(RCV_CSR_PARITY)),
 707/* 9*/  FLAG_ENTRY0("RxDcSopEopParityErr", RXES(DC_SOP_EOP_PARITY)),
 708/*10*/  FLAG_ENTRY0("RxDmaFlagUncErr", RXES(DMA_FLAG_UNC)),
 709/*11*/  FLAG_ENTRY0("RxDmaFlagCorErr", RXES(DMA_FLAG_COR)),
 710/*12*/  FLAG_ENTRY0("RxRcvFsmEncodingErr", RXES(RCV_FSM_ENCODING)),
 711/*13*/  FLAG_ENTRY0("RxRbufFreeListUncErr", RXES(RBUF_FREE_LIST_UNC)),
 712/*14*/  FLAG_ENTRY0("RxRbufFreeListCorErr", RXES(RBUF_FREE_LIST_COR)),
 713/*15*/  FLAG_ENTRY0("RxRbufLookupDesRegUncErr", RXES(RBUF_LOOKUP_DES_REG_UNC)),
 714/*16*/  FLAG_ENTRY0("RxRbufLookupDesRegUncCorErr",
 715                RXES(RBUF_LOOKUP_DES_REG_UNC_COR)),
 716/*17*/  FLAG_ENTRY0("RxRbufLookupDesUncErr", RXES(RBUF_LOOKUP_DES_UNC)),
 717/*18*/  FLAG_ENTRY0("RxRbufLookupDesCorErr", RXES(RBUF_LOOKUP_DES_COR)),
 718/*19*/  FLAG_ENTRY0("RxRbufBlockListReadUncErr",
 719                RXES(RBUF_BLOCK_LIST_READ_UNC)),
 720/*20*/  FLAG_ENTRY0("RxRbufBlockListReadCorErr",
 721                RXES(RBUF_BLOCK_LIST_READ_COR)),
 722/*21*/  FLAG_ENTRY0("RxRbufCsrQHeadBufNumParityErr",
 723                RXES(RBUF_CSR_QHEAD_BUF_NUM_PARITY)),
 724/*22*/  FLAG_ENTRY0("RxRbufCsrQEntCntParityErr",
 725                RXES(RBUF_CSR_QENT_CNT_PARITY)),
 726/*23*/  FLAG_ENTRY0("RxRbufCsrQNextBufParityErr",
 727                RXES(RBUF_CSR_QNEXT_BUF_PARITY)),
 728/*24*/  FLAG_ENTRY0("RxRbufCsrQVldBitParityErr",
 729                RXES(RBUF_CSR_QVLD_BIT_PARITY)),
 730/*25*/  FLAG_ENTRY0("RxRbufCsrQHdPtrParityErr", RXES(RBUF_CSR_QHD_PTR_PARITY)),
 731/*26*/  FLAG_ENTRY0("RxRbufCsrQTlPtrParityErr", RXES(RBUF_CSR_QTL_PTR_PARITY)),
 732/*27*/  FLAG_ENTRY0("RxRbufCsrQNumOfPktParityErr",
 733                RXES(RBUF_CSR_QNUM_OF_PKT_PARITY)),
 734/*28*/  FLAG_ENTRY0("RxRbufCsrQEOPDWParityErr", RXES(RBUF_CSR_QEOPDW_PARITY)),
 735/*29*/  FLAG_ENTRY0("RxRbufCtxIdParityErr", RXES(RBUF_CTX_ID_PARITY)),
 736/*30*/  FLAG_ENTRY0("RxRBufBadLookupErr", RXES(RBUF_BAD_LOOKUP)),
 737/*31*/  FLAG_ENTRY0("RxRbufFullErr", RXES(RBUF_FULL)),
 738/*32*/  FLAG_ENTRY0("RxRbufEmptyErr", RXES(RBUF_EMPTY)),
 739/*33*/  FLAG_ENTRY0("RxRbufFlRdAddrParityErr", RXES(RBUF_FL_RD_ADDR_PARITY)),
 740/*34*/  FLAG_ENTRY0("RxRbufFlWrAddrParityErr", RXES(RBUF_FL_WR_ADDR_PARITY)),
 741/*35*/  FLAG_ENTRY0("RxRbufFlInitdoneParityErr",
 742                RXES(RBUF_FL_INITDONE_PARITY)),
 743/*36*/  FLAG_ENTRY0("RxRbufFlInitWrAddrParityErr",
 744                RXES(RBUF_FL_INIT_WR_ADDR_PARITY)),
 745/*37*/  FLAG_ENTRY0("RxRbufNextFreeBufUncErr", RXES(RBUF_NEXT_FREE_BUF_UNC)),
 746/*38*/  FLAG_ENTRY0("RxRbufNextFreeBufCorErr", RXES(RBUF_NEXT_FREE_BUF_COR)),
 747/*39*/  FLAG_ENTRY0("RxLookupDesPart1UncErr", RXES(LOOKUP_DES_PART1_UNC)),
 748/*40*/  FLAG_ENTRY0("RxLookupDesPart1UncCorErr",
 749                RXES(LOOKUP_DES_PART1_UNC_COR)),
 750/*41*/  FLAG_ENTRY0("RxLookupDesPart2ParityErr",
 751                RXES(LOOKUP_DES_PART2_PARITY)),
 752/*42*/  FLAG_ENTRY0("RxLookupRcvArrayUncErr", RXES(LOOKUP_RCV_ARRAY_UNC)),
 753/*43*/  FLAG_ENTRY0("RxLookupRcvArrayCorErr", RXES(LOOKUP_RCV_ARRAY_COR)),
 754/*44*/  FLAG_ENTRY0("RxLookupCsrParityErr", RXES(LOOKUP_CSR_PARITY)),
 755/*45*/  FLAG_ENTRY0("RxHqIntrCsrParityErr", RXES(HQ_INTR_CSR_PARITY)),
 756/*46*/  FLAG_ENTRY0("RxHqIntrFsmErr", RXES(HQ_INTR_FSM)),
 757/*47*/  FLAG_ENTRY0("RxRbufDescPart1UncErr", RXES(RBUF_DESC_PART1_UNC)),
 758/*48*/  FLAG_ENTRY0("RxRbufDescPart1CorErr", RXES(RBUF_DESC_PART1_COR)),
 759/*49*/  FLAG_ENTRY0("RxRbufDescPart2UncErr", RXES(RBUF_DESC_PART2_UNC)),
 760/*50*/  FLAG_ENTRY0("RxRbufDescPart2CorErr", RXES(RBUF_DESC_PART2_COR)),
 761/*51*/  FLAG_ENTRY0("RxDmaHdrFifoRdUncErr", RXES(DMA_HDR_FIFO_RD_UNC)),
 762/*52*/  FLAG_ENTRY0("RxDmaHdrFifoRdCorErr", RXES(DMA_HDR_FIFO_RD_COR)),
 763/*53*/  FLAG_ENTRY0("RxDmaDataFifoRdUncErr", RXES(DMA_DATA_FIFO_RD_UNC)),
 764/*54*/  FLAG_ENTRY0("RxDmaDataFifoRdCorErr", RXES(DMA_DATA_FIFO_RD_COR)),
 765/*55*/  FLAG_ENTRY0("RxRbufDataUncErr", RXES(RBUF_DATA_UNC)),
 766/*56*/  FLAG_ENTRY0("RxRbufDataCorErr", RXES(RBUF_DATA_COR)),
 767/*57*/  FLAG_ENTRY0("RxDmaCsrParityErr", RXES(DMA_CSR_PARITY)),
 768/*58*/  FLAG_ENTRY0("RxDmaEqFsmEncodingErr", RXES(DMA_EQ_FSM_ENCODING)),
 769/*59*/  FLAG_ENTRY0("RxDmaDqFsmEncodingErr", RXES(DMA_DQ_FSM_ENCODING)),
 770/*60*/  FLAG_ENTRY0("RxDmaCsrUncErr", RXES(DMA_CSR_UNC)),
 771/*61*/  FLAG_ENTRY0("RxCsrReadBadAddrErr", RXES(CSR_READ_BAD_ADDR)),
 772/*62*/  FLAG_ENTRY0("RxCsrWriteBadAddrErr", RXES(CSR_WRITE_BAD_ADDR)),
 773/*63*/  FLAG_ENTRY0("RxCsrParityErr", RXES(CSR_PARITY))
 774};
 775
 776/* RXE errors that will trigger an SPC freeze */
 777#define ALL_RXE_FREEZE_ERR  \
 778        (RCV_ERR_STATUS_RX_RCV_QP_MAP_TABLE_UNC_ERR_SMASK \
 779        | RCV_ERR_STATUS_RX_RCV_CSR_PARITY_ERR_SMASK \
 780        | RCV_ERR_STATUS_RX_DMA_FLAG_UNC_ERR_SMASK \
 781        | RCV_ERR_STATUS_RX_RCV_FSM_ENCODING_ERR_SMASK \
 782        | RCV_ERR_STATUS_RX_RBUF_FREE_LIST_UNC_ERR_SMASK \
 783        | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_ERR_SMASK \
 784        | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_COR_ERR_SMASK \
 785        | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_UNC_ERR_SMASK \
 786        | RCV_ERR_STATUS_RX_RBUF_BLOCK_LIST_READ_UNC_ERR_SMASK \
 787        | RCV_ERR_STATUS_RX_RBUF_CSR_QHEAD_BUF_NUM_PARITY_ERR_SMASK \
 788        | RCV_ERR_STATUS_RX_RBUF_CSR_QENT_CNT_PARITY_ERR_SMASK \
 789        | RCV_ERR_STATUS_RX_RBUF_CSR_QNEXT_BUF_PARITY_ERR_SMASK \
 790        | RCV_ERR_STATUS_RX_RBUF_CSR_QVLD_BIT_PARITY_ERR_SMASK \
 791        | RCV_ERR_STATUS_RX_RBUF_CSR_QHD_PTR_PARITY_ERR_SMASK \
 792        | RCV_ERR_STATUS_RX_RBUF_CSR_QTL_PTR_PARITY_ERR_SMASK \
 793        | RCV_ERR_STATUS_RX_RBUF_CSR_QNUM_OF_PKT_PARITY_ERR_SMASK \
 794        | RCV_ERR_STATUS_RX_RBUF_CSR_QEOPDW_PARITY_ERR_SMASK \
 795        | RCV_ERR_STATUS_RX_RBUF_CTX_ID_PARITY_ERR_SMASK \
 796        | RCV_ERR_STATUS_RX_RBUF_BAD_LOOKUP_ERR_SMASK \
 797        | RCV_ERR_STATUS_RX_RBUF_FULL_ERR_SMASK \
 798        | RCV_ERR_STATUS_RX_RBUF_EMPTY_ERR_SMASK \
 799        | RCV_ERR_STATUS_RX_RBUF_FL_RD_ADDR_PARITY_ERR_SMASK \
 800        | RCV_ERR_STATUS_RX_RBUF_FL_WR_ADDR_PARITY_ERR_SMASK \
 801        | RCV_ERR_STATUS_RX_RBUF_FL_INITDONE_PARITY_ERR_SMASK \
 802        | RCV_ERR_STATUS_RX_RBUF_FL_INIT_WR_ADDR_PARITY_ERR_SMASK \
 803        | RCV_ERR_STATUS_RX_RBUF_NEXT_FREE_BUF_UNC_ERR_SMASK \
 804        | RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_ERR_SMASK \
 805        | RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_COR_ERR_SMASK \
 806        | RCV_ERR_STATUS_RX_LOOKUP_DES_PART2_PARITY_ERR_SMASK \
 807        | RCV_ERR_STATUS_RX_LOOKUP_RCV_ARRAY_UNC_ERR_SMASK \
 808        | RCV_ERR_STATUS_RX_LOOKUP_CSR_PARITY_ERR_SMASK \
 809        | RCV_ERR_STATUS_RX_HQ_INTR_CSR_PARITY_ERR_SMASK \
 810        | RCV_ERR_STATUS_RX_HQ_INTR_FSM_ERR_SMASK \
 811        | RCV_ERR_STATUS_RX_RBUF_DESC_PART1_UNC_ERR_SMASK \
 812        | RCV_ERR_STATUS_RX_RBUF_DESC_PART1_COR_ERR_SMASK \
 813        | RCV_ERR_STATUS_RX_RBUF_DESC_PART2_UNC_ERR_SMASK \
 814        | RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK \
 815        | RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK \
 816        | RCV_ERR_STATUS_RX_RBUF_DATA_UNC_ERR_SMASK \
 817        | RCV_ERR_STATUS_RX_DMA_CSR_PARITY_ERR_SMASK \
 818        | RCV_ERR_STATUS_RX_DMA_EQ_FSM_ENCODING_ERR_SMASK \
 819        | RCV_ERR_STATUS_RX_DMA_DQ_FSM_ENCODING_ERR_SMASK \
 820        | RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK \
 821        | RCV_ERR_STATUS_RX_CSR_PARITY_ERR_SMASK)
 822
 823#define RXE_FREEZE_ABORT_MASK \
 824        (RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK | \
 825        RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK | \
 826        RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK)
 827
 828/*
 829 * DCC Error Flags
 830 */
 831#define DCCE(name) DCC_ERR_FLG_##name##_SMASK
 832static struct flag_table dcc_err_flags[] = {
 833        FLAG_ENTRY0("bad_l2_err", DCCE(BAD_L2_ERR)),
 834        FLAG_ENTRY0("bad_sc_err", DCCE(BAD_SC_ERR)),
 835        FLAG_ENTRY0("bad_mid_tail_err", DCCE(BAD_MID_TAIL_ERR)),
 836        FLAG_ENTRY0("bad_preemption_err", DCCE(BAD_PREEMPTION_ERR)),
 837        FLAG_ENTRY0("preemption_err", DCCE(PREEMPTION_ERR)),
 838        FLAG_ENTRY0("preemptionvl15_err", DCCE(PREEMPTIONVL15_ERR)),
 839        FLAG_ENTRY0("bad_vl_marker_err", DCCE(BAD_VL_MARKER_ERR)),
 840        FLAG_ENTRY0("bad_dlid_target_err", DCCE(BAD_DLID_TARGET_ERR)),
 841        FLAG_ENTRY0("bad_lver_err", DCCE(BAD_LVER_ERR)),
 842        FLAG_ENTRY0("uncorrectable_err", DCCE(UNCORRECTABLE_ERR)),
 843        FLAG_ENTRY0("bad_crdt_ack_err", DCCE(BAD_CRDT_ACK_ERR)),
 844        FLAG_ENTRY0("unsup_pkt_type", DCCE(UNSUP_PKT_TYPE)),
 845        FLAG_ENTRY0("bad_ctrl_flit_err", DCCE(BAD_CTRL_FLIT_ERR)),
 846        FLAG_ENTRY0("event_cntr_parity_err", DCCE(EVENT_CNTR_PARITY_ERR)),
 847        FLAG_ENTRY0("event_cntr_rollover_err", DCCE(EVENT_CNTR_ROLLOVER_ERR)),
 848        FLAG_ENTRY0("link_err", DCCE(LINK_ERR)),
 849        FLAG_ENTRY0("misc_cntr_rollover_err", DCCE(MISC_CNTR_ROLLOVER_ERR)),
 850        FLAG_ENTRY0("bad_ctrl_dist_err", DCCE(BAD_CTRL_DIST_ERR)),
 851        FLAG_ENTRY0("bad_tail_dist_err", DCCE(BAD_TAIL_DIST_ERR)),
 852        FLAG_ENTRY0("bad_head_dist_err", DCCE(BAD_HEAD_DIST_ERR)),
 853        FLAG_ENTRY0("nonvl15_state_err", DCCE(NONVL15_STATE_ERR)),
 854        FLAG_ENTRY0("vl15_multi_err", DCCE(VL15_MULTI_ERR)),
 855        FLAG_ENTRY0("bad_pkt_length_err", DCCE(BAD_PKT_LENGTH_ERR)),
 856        FLAG_ENTRY0("unsup_vl_err", DCCE(UNSUP_VL_ERR)),
 857        FLAG_ENTRY0("perm_nvl15_err", DCCE(PERM_NVL15_ERR)),
 858        FLAG_ENTRY0("slid_zero_err", DCCE(SLID_ZERO_ERR)),
 859        FLAG_ENTRY0("dlid_zero_err", DCCE(DLID_ZERO_ERR)),
 860        FLAG_ENTRY0("length_mtu_err", DCCE(LENGTH_MTU_ERR)),
 861        FLAG_ENTRY0("rx_early_drop_err", DCCE(RX_EARLY_DROP_ERR)),
 862        FLAG_ENTRY0("late_short_err", DCCE(LATE_SHORT_ERR)),
 863        FLAG_ENTRY0("late_long_err", DCCE(LATE_LONG_ERR)),
 864        FLAG_ENTRY0("late_ebp_err", DCCE(LATE_EBP_ERR)),
 865        FLAG_ENTRY0("fpe_tx_fifo_ovflw_err", DCCE(FPE_TX_FIFO_OVFLW_ERR)),
 866        FLAG_ENTRY0("fpe_tx_fifo_unflw_err", DCCE(FPE_TX_FIFO_UNFLW_ERR)),
 867        FLAG_ENTRY0("csr_access_blocked_host", DCCE(CSR_ACCESS_BLOCKED_HOST)),
 868        FLAG_ENTRY0("csr_access_blocked_uc", DCCE(CSR_ACCESS_BLOCKED_UC)),
 869        FLAG_ENTRY0("tx_ctrl_parity_err", DCCE(TX_CTRL_PARITY_ERR)),
 870        FLAG_ENTRY0("tx_ctrl_parity_mbe_err", DCCE(TX_CTRL_PARITY_MBE_ERR)),
 871        FLAG_ENTRY0("tx_sc_parity_err", DCCE(TX_SC_PARITY_ERR)),
 872        FLAG_ENTRY0("rx_ctrl_parity_mbe_err", DCCE(RX_CTRL_PARITY_MBE_ERR)),
 873        FLAG_ENTRY0("csr_parity_err", DCCE(CSR_PARITY_ERR)),
 874        FLAG_ENTRY0("csr_inval_addr", DCCE(CSR_INVAL_ADDR)),
 875        FLAG_ENTRY0("tx_byte_shft_parity_err", DCCE(TX_BYTE_SHFT_PARITY_ERR)),
 876        FLAG_ENTRY0("rx_byte_shft_parity_err", DCCE(RX_BYTE_SHFT_PARITY_ERR)),
 877        FLAG_ENTRY0("fmconfig_err", DCCE(FMCONFIG_ERR)),
 878        FLAG_ENTRY0("rcvport_err", DCCE(RCVPORT_ERR)),
 879};
 880
 881/*
 882 * LCB error flags
 883 */
 884#define LCBE(name) DC_LCB_ERR_FLG_##name##_SMASK
 885static struct flag_table lcb_err_flags[] = {
 886/* 0*/  FLAG_ENTRY0("CSR_PARITY_ERR", LCBE(CSR_PARITY_ERR)),
 887/* 1*/  FLAG_ENTRY0("INVALID_CSR_ADDR", LCBE(INVALID_CSR_ADDR)),
 888/* 2*/  FLAG_ENTRY0("RST_FOR_FAILED_DESKEW", LCBE(RST_FOR_FAILED_DESKEW)),
 889/* 3*/  FLAG_ENTRY0("ALL_LNS_FAILED_REINIT_TEST",
 890                LCBE(ALL_LNS_FAILED_REINIT_TEST)),
 891/* 4*/  FLAG_ENTRY0("LOST_REINIT_STALL_OR_TOS", LCBE(LOST_REINIT_STALL_OR_TOS)),
 892/* 5*/  FLAG_ENTRY0("TX_LESS_THAN_FOUR_LNS", LCBE(TX_LESS_THAN_FOUR_LNS)),
 893/* 6*/  FLAG_ENTRY0("RX_LESS_THAN_FOUR_LNS", LCBE(RX_LESS_THAN_FOUR_LNS)),
 894/* 7*/  FLAG_ENTRY0("SEQ_CRC_ERR", LCBE(SEQ_CRC_ERR)),
 895/* 8*/  FLAG_ENTRY0("REINIT_FROM_PEER", LCBE(REINIT_FROM_PEER)),
 896/* 9*/  FLAG_ENTRY0("REINIT_FOR_LN_DEGRADE", LCBE(REINIT_FOR_LN_DEGRADE)),
 897/*10*/  FLAG_ENTRY0("CRC_ERR_CNT_HIT_LIMIT", LCBE(CRC_ERR_CNT_HIT_LIMIT)),
 898/*11*/  FLAG_ENTRY0("RCLK_STOPPED", LCBE(RCLK_STOPPED)),
 899/*12*/  FLAG_ENTRY0("UNEXPECTED_REPLAY_MARKER", LCBE(UNEXPECTED_REPLAY_MARKER)),
 900/*13*/  FLAG_ENTRY0("UNEXPECTED_ROUND_TRIP_MARKER",
 901                LCBE(UNEXPECTED_ROUND_TRIP_MARKER)),
 902/*14*/  FLAG_ENTRY0("ILLEGAL_NULL_LTP", LCBE(ILLEGAL_NULL_LTP)),
 903/*15*/  FLAG_ENTRY0("ILLEGAL_FLIT_ENCODING", LCBE(ILLEGAL_FLIT_ENCODING)),
 904/*16*/  FLAG_ENTRY0("FLIT_INPUT_BUF_OFLW", LCBE(FLIT_INPUT_BUF_OFLW)),
 905/*17*/  FLAG_ENTRY0("VL_ACK_INPUT_BUF_OFLW", LCBE(VL_ACK_INPUT_BUF_OFLW)),
 906/*18*/  FLAG_ENTRY0("VL_ACK_INPUT_PARITY_ERR", LCBE(VL_ACK_INPUT_PARITY_ERR)),
 907/*19*/  FLAG_ENTRY0("VL_ACK_INPUT_WRONG_CRC_MODE",
 908                LCBE(VL_ACK_INPUT_WRONG_CRC_MODE)),
 909/*20*/  FLAG_ENTRY0("FLIT_INPUT_BUF_MBE", LCBE(FLIT_INPUT_BUF_MBE)),
 910/*21*/  FLAG_ENTRY0("FLIT_INPUT_BUF_SBE", LCBE(FLIT_INPUT_BUF_SBE)),
 911/*22*/  FLAG_ENTRY0("REPLAY_BUF_MBE", LCBE(REPLAY_BUF_MBE)),
 912/*23*/  FLAG_ENTRY0("REPLAY_BUF_SBE", LCBE(REPLAY_BUF_SBE)),
 913/*24*/  FLAG_ENTRY0("CREDIT_RETURN_FLIT_MBE", LCBE(CREDIT_RETURN_FLIT_MBE)),
 914/*25*/  FLAG_ENTRY0("RST_FOR_LINK_TIMEOUT", LCBE(RST_FOR_LINK_TIMEOUT)),
 915/*26*/  FLAG_ENTRY0("RST_FOR_INCOMPLT_RND_TRIP",
 916                LCBE(RST_FOR_INCOMPLT_RND_TRIP)),
 917/*27*/  FLAG_ENTRY0("HOLD_REINIT", LCBE(HOLD_REINIT)),
 918/*28*/  FLAG_ENTRY0("NEG_EDGE_LINK_TRANSFER_ACTIVE",
 919                LCBE(NEG_EDGE_LINK_TRANSFER_ACTIVE)),
 920/*29*/  FLAG_ENTRY0("REDUNDANT_FLIT_PARITY_ERR",
 921                LCBE(REDUNDANT_FLIT_PARITY_ERR))
 922};
 923
 924/*
 925 * DC8051 Error Flags
 926 */
 927#define D8E(name) DC_DC8051_ERR_FLG_##name##_SMASK
 928static struct flag_table dc8051_err_flags[] = {
 929        FLAG_ENTRY0("SET_BY_8051", D8E(SET_BY_8051)),
 930        FLAG_ENTRY0("LOST_8051_HEART_BEAT", D8E(LOST_8051_HEART_BEAT)),
 931        FLAG_ENTRY0("CRAM_MBE", D8E(CRAM_MBE)),
 932        FLAG_ENTRY0("CRAM_SBE", D8E(CRAM_SBE)),
 933        FLAG_ENTRY0("DRAM_MBE", D8E(DRAM_MBE)),
 934        FLAG_ENTRY0("DRAM_SBE", D8E(DRAM_SBE)),
 935        FLAG_ENTRY0("IRAM_MBE", D8E(IRAM_MBE)),
 936        FLAG_ENTRY0("IRAM_SBE", D8E(IRAM_SBE)),
 937        FLAG_ENTRY0("UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES",
 938                D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
 939        FLAG_ENTRY0("INVALID_CSR_ADDR", D8E(INVALID_CSR_ADDR)),
 940};
 941
 942/*
 943 * DC8051 Information Error flags
 944 *
 945 * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR field.
 946 */
 947static struct flag_table dc8051_info_err_flags[] = {
 948        FLAG_ENTRY0("Spico ROM check failed",  SPICO_ROM_FAILED),
 949        FLAG_ENTRY0("Unknown frame received",  UNKNOWN_FRAME),
 950        FLAG_ENTRY0("Target BER not met",      TARGET_BER_NOT_MET),
 951        FLAG_ENTRY0("Serdes internal loopback failure",
 952                                        FAILED_SERDES_INTERNAL_LOOPBACK),
 953        FLAG_ENTRY0("Failed SerDes init",      FAILED_SERDES_INIT),
 954        FLAG_ENTRY0("Failed LNI(Polling)",     FAILED_LNI_POLLING),
 955        FLAG_ENTRY0("Failed LNI(Debounce)",    FAILED_LNI_DEBOUNCE),
 956        FLAG_ENTRY0("Failed LNI(EstbComm)",    FAILED_LNI_ESTBCOMM),
 957        FLAG_ENTRY0("Failed LNI(OptEq)",       FAILED_LNI_OPTEQ),
 958        FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
 959        FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
 960        FLAG_ENTRY0("Failed LNI(ConfigLT)",    FAILED_LNI_CONFIGLT)
 961};
 962
 963/*
 964 * DC8051 Information Host Information flags
 965 *
 966 * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG field.
 967 */
 968static struct flag_table dc8051_info_host_msg_flags[] = {
 969        FLAG_ENTRY0("Host request done", 0x0001),
 970        FLAG_ENTRY0("BC SMA message", 0x0002),
 971        FLAG_ENTRY0("BC PWR_MGM message", 0x0004),
 972        FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008),
 973        FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010),
 974        FLAG_ENTRY0("External device config request", 0x0020),
 975        FLAG_ENTRY0("VerifyCap all frames received", 0x0040),
 976        FLAG_ENTRY0("LinkUp achieved", 0x0080),
 977        FLAG_ENTRY0("Link going down", 0x0100),
 978};
 979
 980
 981static u32 encoded_size(u32 size);
 982static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate);
 983static int set_physical_link_state(struct hfi1_devdata *dd, u64 state);
 984static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
 985                               u8 *continuous);
 986static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
 987                                  u8 *vcu, u16 *vl15buf, u8 *crc_sizes);
 988static void read_vc_remote_link_width(struct hfi1_devdata *dd,
 989                                      u8 *remote_tx_rate, u16 *link_widths);
 990static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
 991                                     u8 *flag_bits, u16 *link_widths);
 992static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
 993                                  u8 *device_rev);
 994static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed);
 995static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx);
 996static int read_tx_settings(struct hfi1_devdata *dd, u8 *enable_lane_tx,
 997                            u8 *tx_polarity_inversion,
 998                            u8 *rx_polarity_inversion, u8 *max_rate);
 999static void handle_sdma_eng_err(struct hfi1_devdata *dd,
1000                                unsigned int context, u64 err_status);
1001static void handle_qsfp_int(struct hfi1_devdata *dd, u32 source, u64 reg);
1002static void handle_dcc_err(struct hfi1_devdata *dd,
1003                           unsigned int context, u64 err_status);
1004static void handle_lcb_err(struct hfi1_devdata *dd,
1005                           unsigned int context, u64 err_status);
1006static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg);
1007static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1008static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1009static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1010static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1011static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1012static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1013static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1014static void set_partition_keys(struct hfi1_pportdata *);
1015static const char *link_state_name(u32 state);
1016static const char *link_state_reason_name(struct hfi1_pportdata *ppd,
1017                                          u32 state);
1018static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
1019                           u64 *out_data);
1020static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
1021static int thermal_init(struct hfi1_devdata *dd);
1022
1023static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
1024                                  int msecs);
1025static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
1026static void handle_temp_err(struct hfi1_devdata *);
1027static void dc_shutdown(struct hfi1_devdata *);
1028static void dc_start(struct hfi1_devdata *);
1029
1030/*
1031 * Error interrupt table entry.  This is used as input to the interrupt
1032 * "clear down" routine used for all second tier error interrupt register.
1033 * Second tier interrupt registers have a single bit representing them
1034 * in the top-level CceIntStatus.
1035 */
1036struct err_reg_info {
1037        u32 status;             /* status CSR offset */
1038        u32 clear;              /* clear CSR offset */
1039        u32 mask;               /* mask CSR offset */
1040        void (*handler)(struct hfi1_devdata *dd, u32 source, u64 reg);
1041        const char *desc;
1042};
1043
1044#define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
1045#define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
1046#define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
1047
1048/*
1049 * Helpers for building HFI and DC error interrupt table entries.  Different
1050 * helpers are needed because of inconsistent register names.
1051 */
1052#define EE(reg, handler, desc) \
1053        { reg##_STATUS, reg##_CLEAR, reg##_MASK, \
1054                handler, desc }
1055#define DC_EE1(reg, handler, desc) \
1056        { reg##_FLG, reg##_FLG_CLR, reg##_FLG_EN, handler, desc }
1057#define DC_EE2(reg, handler, desc) \
1058        { reg##_FLG, reg##_CLR, reg##_EN, handler, desc }
1059
1060/*
1061 * Table of the "misc" grouping of error interrupts.  Each entry refers to
1062 * another register containing more information.
1063 */
1064static const struct err_reg_info misc_errs[NUM_MISC_ERRS] = {
1065/* 0*/  EE(CCE_ERR,             handle_cce_err,    "CceErr"),
1066/* 1*/  EE(RCV_ERR,             handle_rxe_err,    "RxeErr"),
1067/* 2*/  EE(MISC_ERR,    handle_misc_err,   "MiscErr"),
1068/* 3*/  { 0, 0, 0, NULL }, /* reserved */
1069/* 4*/  EE(SEND_PIO_ERR,    handle_pio_err,    "PioErr"),
1070/* 5*/  EE(SEND_DMA_ERR,    handle_sdma_err,   "SDmaErr"),
1071/* 6*/  EE(SEND_EGRESS_ERR, handle_egress_err, "EgressErr"),
1072/* 7*/  EE(SEND_ERR,    handle_txe_err,    "TxeErr")
1073        /* the rest are reserved */
1074};
1075
1076/*
1077 * Index into the Various section of the interrupt sources
1078 * corresponding to the Critical Temperature interrupt.
1079 */
1080#define TCRIT_INT_SOURCE 4
1081
1082/*
1083 * SDMA error interrupt entry - refers to another register containing more
1084 * information.
1085 */
1086static const struct err_reg_info sdma_eng_err =
1087        EE(SEND_DMA_ENG_ERR, handle_sdma_eng_err, "SDmaEngErr");
1088
1089static const struct err_reg_info various_err[NUM_VARIOUS] = {
1090/* 0*/  { 0, 0, 0, NULL }, /* PbcInt */
1091/* 1*/  { 0, 0, 0, NULL }, /* GpioAssertInt */
1092/* 2*/  EE(ASIC_QSFP1,  handle_qsfp_int,        "QSFP1"),
1093/* 3*/  EE(ASIC_QSFP2,  handle_qsfp_int,        "QSFP2"),
1094/* 4*/  { 0, 0, 0, NULL }, /* TCritInt */
1095        /* rest are reserved */
1096};
1097
1098/*
1099 * The DC encoding of mtu_cap for 10K MTU in the DCC_CFG_PORT_CONFIG
1100 * register can not be derived from the MTU value because 10K is not
1101 * a power of 2. Therefore, we need a constant. Everything else can
1102 * be calculated.
1103 */
1104#define DCC_CFG_PORT_MTU_CAP_10240 7
1105
1106/*
1107 * Table of the DC grouping of error interrupts.  Each entry refers to
1108 * another register containing more information.
1109 */
1110static const struct err_reg_info dc_errs[NUM_DC_ERRS] = {
1111/* 0*/  DC_EE1(DCC_ERR,         handle_dcc_err,        "DCC Err"),
1112/* 1*/  DC_EE2(DC_LCB_ERR,      handle_lcb_err,        "LCB Err"),
1113/* 2*/  DC_EE2(DC_DC8051_ERR,   handle_8051_interrupt, "DC8051 Interrupt"),
1114/* 3*/  /* dc_lbm_int - special, see is_dc_int() */
1115        /* the rest are reserved */
1116};
1117
1118struct cntr_entry {
1119        /*
1120         * counter name
1121         */
1122        char *name;
1123
1124        /*
1125         * csr to read for name (if applicable)
1126         */
1127        u64 csr;
1128
1129        /*
1130         * offset into dd or ppd to store the counter's value
1131         */
1132        int offset;
1133
1134        /*
1135         * flags
1136         */
1137        u8 flags;
1138
1139        /*
1140         * accessor for stat element, context either dd or ppd
1141         */
1142        u64 (*rw_cntr)(const struct cntr_entry *,
1143                               void *context,
1144                               int vl,
1145                               int mode,
1146                               u64 data);
1147};
1148
1149#define C_RCV_HDR_OVF_FIRST C_RCV_HDR_OVF_0
1150#define C_RCV_HDR_OVF_LAST C_RCV_HDR_OVF_159
1151
1152#define CNTR_ELEM(name, csr, offset, flags, accessor) \
1153{ \
1154        name, \
1155        csr, \
1156        offset, \
1157        flags, \
1158        accessor \
1159}
1160
1161/* 32bit RXE */
1162#define RXE32_PORT_CNTR_ELEM(name, counter, flags) \
1163CNTR_ELEM(#name, \
1164          (counter * 8 + RCV_COUNTER_ARRAY32), \
1165          0, flags | CNTR_32BIT, \
1166          port_access_u32_csr)
1167
1168#define RXE32_DEV_CNTR_ELEM(name, counter, flags) \
1169CNTR_ELEM(#name, \
1170          (counter * 8 + RCV_COUNTER_ARRAY32), \
1171          0, flags | CNTR_32BIT, \
1172          dev_access_u32_csr)
1173
1174/* 64bit RXE */
1175#define RXE64_PORT_CNTR_ELEM(name, counter, flags) \
1176CNTR_ELEM(#name, \
1177          (counter * 8 + RCV_COUNTER_ARRAY64), \
1178          0, flags, \
1179          port_access_u64_csr)
1180
1181#define RXE64_DEV_CNTR_ELEM(name, counter, flags) \
1182CNTR_ELEM(#name, \
1183          (counter * 8 + RCV_COUNTER_ARRAY64), \
1184          0, flags, \
1185          dev_access_u64_csr)
1186
1187#define OVR_LBL(ctx) C_RCV_HDR_OVF_ ## ctx
1188#define OVR_ELM(ctx) \
1189CNTR_ELEM("RcvHdrOvr" #ctx, \
1190          (RCV_HDR_OVFL_CNT + ctx*0x100), \
1191          0, CNTR_NORMAL, port_access_u64_csr)
1192
1193/* 32bit TXE */
1194#define TXE32_PORT_CNTR_ELEM(name, counter, flags) \
1195CNTR_ELEM(#name, \
1196          (counter * 8 + SEND_COUNTER_ARRAY32), \
1197          0, flags | CNTR_32BIT, \
1198          port_access_u32_csr)
1199
1200/* 64bit TXE */
1201#define TXE64_PORT_CNTR_ELEM(name, counter, flags) \
1202CNTR_ELEM(#name, \
1203          (counter * 8 + SEND_COUNTER_ARRAY64), \
1204          0, flags, \
1205          port_access_u64_csr)
1206
1207# define TX64_DEV_CNTR_ELEM(name, counter, flags) \
1208CNTR_ELEM(#name,\
1209          counter * 8 + SEND_COUNTER_ARRAY64, \
1210          0, \
1211          flags, \
1212          dev_access_u64_csr)
1213
1214/* CCE */
1215#define CCE_PERF_DEV_CNTR_ELEM(name, counter, flags) \
1216CNTR_ELEM(#name, \
1217          (counter * 8 + CCE_COUNTER_ARRAY32), \
1218          0, flags | CNTR_32BIT, \
1219          dev_access_u32_csr)
1220
1221#define CCE_INT_DEV_CNTR_ELEM(name, counter, flags) \
1222CNTR_ELEM(#name, \
1223          (counter * 8 + CCE_INT_COUNTER_ARRAY32), \
1224          0, flags | CNTR_32BIT, \
1225          dev_access_u32_csr)
1226
1227/* DC */
1228#define DC_PERF_CNTR(name, counter, flags) \
1229CNTR_ELEM(#name, \
1230          counter, \
1231          0, \
1232          flags, \
1233          dev_access_u64_csr)
1234
1235#define DC_PERF_CNTR_LCB(name, counter, flags) \
1236CNTR_ELEM(#name, \
1237          counter, \
1238          0, \
1239          flags, \
1240          dc_access_lcb_cntr)
1241
1242/* ibp counters */
1243#define SW_IBP_CNTR(name, cntr) \
1244CNTR_ELEM(#name, \
1245          0, \
1246          0, \
1247          CNTR_SYNTH, \
1248          access_ibp_##cntr)
1249
1250u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
1251{
1252        u64 val;
1253
1254        if (dd->flags & HFI1_PRESENT) {
1255                val = readq((void __iomem *)dd->kregbase + offset);
1256                return val;
1257        }
1258        return -1;
1259}
1260
1261void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
1262{
1263        if (dd->flags & HFI1_PRESENT)
1264                writeq(value, (void __iomem *)dd->kregbase + offset);
1265}
1266
1267void __iomem *get_csr_addr(
1268        struct hfi1_devdata *dd,
1269        u32 offset)
1270{
1271        return (void __iomem *)dd->kregbase + offset;
1272}
1273
1274static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
1275                                 int mode, u64 value)
1276{
1277        u64 ret;
1278
1279
1280        if (mode == CNTR_MODE_R) {
1281                ret = read_csr(dd, csr);
1282        } else if (mode == CNTR_MODE_W) {
1283                write_csr(dd, csr, value);
1284                ret = value;
1285        } else {
1286                dd_dev_err(dd, "Invalid cntr register access mode");
1287                return 0;
1288        }
1289
1290        hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, ret, mode);
1291        return ret;
1292}
1293
1294/* Dev Access */
1295static u64 dev_access_u32_csr(const struct cntr_entry *entry,
1296                            void *context, int vl, int mode, u64 data)
1297{
1298        struct hfi1_devdata *dd = context;
1299
1300        if (vl != CNTR_INVALID_VL)
1301                return 0;
1302        return read_write_csr(dd, entry->csr, mode, data);
1303}
1304
1305static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context,
1306                            int vl, int mode, u64 data)
1307{
1308        struct hfi1_devdata *dd = context;
1309
1310        u64 val = 0;
1311        u64 csr = entry->csr;
1312
1313        if (entry->flags & CNTR_VL) {
1314                if (vl == CNTR_INVALID_VL)
1315                        return 0;
1316                csr += 8 * vl;
1317        } else {
1318                if (vl != CNTR_INVALID_VL)
1319                        return 0;
1320        }
1321
1322        val = read_write_csr(dd, csr, mode, data);
1323        return val;
1324}
1325
1326static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
1327                            int vl, int mode, u64 data)
1328{
1329        struct hfi1_devdata *dd = context;
1330        u32 csr = entry->csr;
1331        int ret = 0;
1332
1333        if (vl != CNTR_INVALID_VL)
1334                return 0;
1335        if (mode == CNTR_MODE_R)
1336                ret = read_lcb_csr(dd, csr, &data);
1337        else if (mode == CNTR_MODE_W)
1338                ret = write_lcb_csr(dd, csr, data);
1339
1340        if (ret) {
1341                dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr);
1342                return 0;
1343        }
1344
1345        hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, data, mode);
1346        return data;
1347}
1348
1349/* Port Access */
1350static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context,
1351                             int vl, int mode, u64 data)
1352{
1353        struct hfi1_pportdata *ppd = context;
1354
1355        if (vl != CNTR_INVALID_VL)
1356                return 0;
1357        return read_write_csr(ppd->dd, entry->csr, mode, data);
1358}
1359
1360static u64 port_access_u64_csr(const struct cntr_entry *entry,
1361                             void *context, int vl, int mode, u64 data)
1362{
1363        struct hfi1_pportdata *ppd = context;
1364        u64 val;
1365        u64 csr = entry->csr;
1366
1367        if (entry->flags & CNTR_VL) {
1368                if (vl == CNTR_INVALID_VL)
1369                        return 0;
1370                csr += 8 * vl;
1371        } else {
1372                if (vl != CNTR_INVALID_VL)
1373                        return 0;
1374        }
1375        val = read_write_csr(ppd->dd, csr, mode, data);
1376        return val;
1377}
1378
1379/* Software defined */
1380static inline u64 read_write_sw(struct hfi1_devdata *dd, u64 *cntr, int mode,
1381                                u64 data)
1382{
1383        u64 ret;
1384
1385        if (mode == CNTR_MODE_R) {
1386                ret = *cntr;
1387        } else if (mode == CNTR_MODE_W) {
1388                *cntr = data;
1389                ret = data;
1390        } else {
1391                dd_dev_err(dd, "Invalid cntr sw access mode");
1392                return 0;
1393        }
1394
1395        hfi1_cdbg(CNTR, "val 0x%llx mode %d", ret, mode);
1396
1397        return ret;
1398}
1399
1400static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context,
1401                               int vl, int mode, u64 data)
1402{
1403        struct hfi1_pportdata *ppd = context;
1404
1405        if (vl != CNTR_INVALID_VL)
1406                return 0;
1407        return read_write_sw(ppd->dd, &ppd->link_downed, mode, data);
1408}
1409
1410static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context,
1411                               int vl, int mode, u64 data)
1412{
1413        struct hfi1_pportdata *ppd = context;
1414
1415        if (vl != CNTR_INVALID_VL)
1416                return 0;
1417        return read_write_sw(ppd->dd, &ppd->link_up, mode, data);
1418}
1419
1420static u64 access_sw_xmit_discards(const struct cntr_entry *entry,
1421                                    void *context, int vl, int mode, u64 data)
1422{
1423        struct hfi1_pportdata *ppd = context;
1424
1425        if (vl != CNTR_INVALID_VL)
1426                return 0;
1427
1428        return read_write_sw(ppd->dd, &ppd->port_xmit_discards, mode, data);
1429}
1430
1431static u64 access_xmit_constraint_errs(const struct cntr_entry *entry,
1432                                     void *context, int vl, int mode, u64 data)
1433{
1434        struct hfi1_pportdata *ppd = context;
1435
1436        if (vl != CNTR_INVALID_VL)
1437                return 0;
1438
1439        return read_write_sw(ppd->dd, &ppd->port_xmit_constraint_errors,
1440                             mode, data);
1441}
1442
1443static u64 access_rcv_constraint_errs(const struct cntr_entry *entry,
1444                                     void *context, int vl, int mode, u64 data)
1445{
1446        struct hfi1_pportdata *ppd = context;
1447
1448        if (vl != CNTR_INVALID_VL)
1449                return 0;
1450
1451        return read_write_sw(ppd->dd, &ppd->port_rcv_constraint_errors,
1452                             mode, data);
1453}
1454
1455u64 get_all_cpu_total(u64 __percpu *cntr)
1456{
1457        int cpu;
1458        u64 counter = 0;
1459
1460        for_each_possible_cpu(cpu)
1461                counter += *per_cpu_ptr(cntr, cpu);
1462        return counter;
1463}
1464
1465static u64 read_write_cpu(struct hfi1_devdata *dd, u64 *z_val,
1466                          u64 __percpu *cntr,
1467                          int vl, int mode, u64 data)
1468{
1469
1470        u64 ret = 0;
1471
1472        if (vl != CNTR_INVALID_VL)
1473                return 0;
1474
1475        if (mode == CNTR_MODE_R) {
1476                ret = get_all_cpu_total(cntr) - *z_val;
1477        } else if (mode == CNTR_MODE_W) {
1478                /* A write can only zero the counter */
1479                if (data == 0)
1480                        *z_val = get_all_cpu_total(cntr);
1481                else
1482                        dd_dev_err(dd, "Per CPU cntrs can only be zeroed");
1483        } else {
1484                dd_dev_err(dd, "Invalid cntr sw cpu access mode");
1485                return 0;
1486        }
1487
1488        return ret;
1489}
1490
1491static u64 access_sw_cpu_intr(const struct cntr_entry *entry,
1492                              void *context, int vl, int mode, u64 data)
1493{
1494        struct hfi1_devdata *dd = context;
1495
1496        return read_write_cpu(dd, &dd->z_int_counter, dd->int_counter, vl,
1497                              mode, data);
1498}
1499
1500static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry,
1501                              void *context, int vl, int mode, u64 data)
1502{
1503        struct hfi1_devdata *dd = context;
1504
1505        return read_write_cpu(dd, &dd->z_rcv_limit, dd->rcv_limit, vl,
1506                              mode, data);
1507}
1508
1509static u64 access_sw_pio_wait(const struct cntr_entry *entry,
1510                              void *context, int vl, int mode, u64 data)
1511{
1512        struct hfi1_devdata *dd = context;
1513
1514        return dd->verbs_dev.n_piowait;
1515}
1516
1517static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
1518                              void *context, int vl, int mode, u64 data)
1519{
1520        struct hfi1_devdata *dd = context;
1521
1522        return dd->verbs_dev.n_txwait;
1523}
1524
1525static u64 access_sw_kmem_wait(const struct cntr_entry *entry,
1526                               void *context, int vl, int mode, u64 data)
1527{
1528        struct hfi1_devdata *dd = context;
1529
1530        return dd->verbs_dev.n_kmem_wait;
1531}
1532
1533static u64 access_sw_send_schedule(const struct cntr_entry *entry,
1534                               void *context, int vl, int mode, u64 data)
1535{
1536        struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1537
1538        return dd->verbs_dev.n_send_schedule;
1539}
1540
1541#define def_access_sw_cpu(cntr) \
1542static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry,               \
1543                              void *context, int vl, int mode, u64 data)      \
1544{                                                                             \
1545        struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;        \
1546        return read_write_cpu(ppd->dd, &ppd->ibport_data.z_ ##cntr,           \
1547                              ppd->ibport_data.cntr, vl,                      \
1548                              mode, data);                                    \
1549}
1550
1551def_access_sw_cpu(rc_acks);
1552def_access_sw_cpu(rc_qacks);
1553def_access_sw_cpu(rc_delayed_comp);
1554
1555#define def_access_ibp_counter(cntr) \
1556static u64 access_ibp_##cntr(const struct cntr_entry *entry,                  \
1557                                void *context, int vl, int mode, u64 data)    \
1558{                                                                             \
1559        struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;        \
1560                                                                              \
1561        if (vl != CNTR_INVALID_VL)                                            \
1562                return 0;                                                     \
1563                                                                              \
1564        return read_write_sw(ppd->dd, &ppd->ibport_data.n_ ##cntr,            \
1565                             mode, data);                                     \
1566}
1567
1568def_access_ibp_counter(loop_pkts);
1569def_access_ibp_counter(rc_resends);
1570def_access_ibp_counter(rnr_naks);
1571def_access_ibp_counter(other_naks);
1572def_access_ibp_counter(rc_timeouts);
1573def_access_ibp_counter(pkt_drops);
1574def_access_ibp_counter(dmawait);
1575def_access_ibp_counter(rc_seqnak);
1576def_access_ibp_counter(rc_dupreq);
1577def_access_ibp_counter(rdma_seq);
1578def_access_ibp_counter(unaligned);
1579def_access_ibp_counter(seq_naks);
1580
1581static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
1582[C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
1583[C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT,
1584                        CNTR_NORMAL),
1585[C_RX_TID_INVALID] = RXE32_DEV_CNTR_ELEM(RxTIDInvalid, RCV_TID_VALID_ERR_CNT,
1586                        CNTR_NORMAL),
1587[C_RX_TID_FLGMS] = RXE32_DEV_CNTR_ELEM(RxTidFLGMs,
1588                        RCV_TID_FLOW_GEN_MISMATCH_CNT,
1589                        CNTR_NORMAL),
1590[C_RX_CTX_RHQS] = RXE32_DEV_CNTR_ELEM(RxCtxRHQS, RCV_CONTEXT_RHQ_STALL,
1591                        CNTR_NORMAL),
1592[C_RX_CTX_EGRS] = RXE32_DEV_CNTR_ELEM(RxCtxEgrS, RCV_CONTEXT_EGR_STALL,
1593                        CNTR_NORMAL),
1594[C_RCV_TID_FLSMS] = RXE32_DEV_CNTR_ELEM(RxTidFLSMs,
1595                        RCV_TID_FLOW_SEQ_MISMATCH_CNT, CNTR_NORMAL),
1596[C_CCE_PCI_CR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciCrSt,
1597                        CCE_PCIE_POSTED_CRDT_STALL_CNT, CNTR_NORMAL),
1598[C_CCE_PCI_TR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciTrSt, CCE_PCIE_TRGT_STALL_CNT,
1599                        CNTR_NORMAL),
1600[C_CCE_PIO_WR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePioWrSt, CCE_PIO_WR_STALL_CNT,
1601                        CNTR_NORMAL),
1602[C_CCE_ERR_INT] = CCE_INT_DEV_CNTR_ELEM(CceErrInt, CCE_ERR_INT_CNT,
1603                        CNTR_NORMAL),
1604[C_CCE_SDMA_INT] = CCE_INT_DEV_CNTR_ELEM(CceSdmaInt, CCE_SDMA_INT_CNT,
1605                        CNTR_NORMAL),
1606[C_CCE_MISC_INT] = CCE_INT_DEV_CNTR_ELEM(CceMiscInt, CCE_MISC_INT_CNT,
1607                        CNTR_NORMAL),
1608[C_CCE_RCV_AV_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvAvInt, CCE_RCV_AVAIL_INT_CNT,
1609                        CNTR_NORMAL),
1610[C_CCE_RCV_URG_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvUrgInt,
1611                        CCE_RCV_URGENT_INT_CNT, CNTR_NORMAL),
1612[C_CCE_SEND_CR_INT] = CCE_INT_DEV_CNTR_ELEM(CceSndCrInt,
1613                        CCE_SEND_CREDIT_INT_CNT, CNTR_NORMAL),
1614[C_DC_UNC_ERR] = DC_PERF_CNTR(DcUnctblErr, DCC_ERR_UNCORRECTABLE_CNT,
1615                              CNTR_SYNTH),
1616[C_DC_RCV_ERR] = DC_PERF_CNTR(DcRecvErr, DCC_ERR_PORTRCV_ERR_CNT, CNTR_SYNTH),
1617[C_DC_FM_CFG_ERR] = DC_PERF_CNTR(DcFmCfgErr, DCC_ERR_FMCONFIG_ERR_CNT,
1618                                 CNTR_SYNTH),
1619[C_DC_RMT_PHY_ERR] = DC_PERF_CNTR(DcRmtPhyErr, DCC_ERR_RCVREMOTE_PHY_ERR_CNT,
1620                                  CNTR_SYNTH),
1621[C_DC_DROPPED_PKT] = DC_PERF_CNTR(DcDroppedPkt, DCC_ERR_DROPPED_PKT_CNT,
1622                                  CNTR_SYNTH),
1623[C_DC_MC_XMIT_PKTS] = DC_PERF_CNTR(DcMcXmitPkts,
1624                                   DCC_PRF_PORT_XMIT_MULTICAST_CNT, CNTR_SYNTH),
1625[C_DC_MC_RCV_PKTS] = DC_PERF_CNTR(DcMcRcvPkts,
1626                                  DCC_PRF_PORT_RCV_MULTICAST_PKT_CNT,
1627                                  CNTR_SYNTH),
1628[C_DC_XMIT_CERR] = DC_PERF_CNTR(DcXmitCorr,
1629                                DCC_PRF_PORT_XMIT_CORRECTABLE_CNT, CNTR_SYNTH),
1630[C_DC_RCV_CERR] = DC_PERF_CNTR(DcRcvCorrCnt, DCC_PRF_PORT_RCV_CORRECTABLE_CNT,
1631                               CNTR_SYNTH),
1632[C_DC_RCV_FCC] = DC_PERF_CNTR(DcRxFCntl, DCC_PRF_RX_FLOW_CRTL_CNT,
1633                              CNTR_SYNTH),
1634[C_DC_XMIT_FCC] = DC_PERF_CNTR(DcXmitFCntl, DCC_PRF_TX_FLOW_CRTL_CNT,
1635                               CNTR_SYNTH),
1636[C_DC_XMIT_FLITS] = DC_PERF_CNTR(DcXmitFlits, DCC_PRF_PORT_XMIT_DATA_CNT,
1637                                 CNTR_SYNTH),
1638[C_DC_RCV_FLITS] = DC_PERF_CNTR(DcRcvFlits, DCC_PRF_PORT_RCV_DATA_CNT,
1639                                CNTR_SYNTH),
1640[C_DC_XMIT_PKTS] = DC_PERF_CNTR(DcXmitPkts, DCC_PRF_PORT_XMIT_PKTS_CNT,
1641                                CNTR_SYNTH),
1642[C_DC_RCV_PKTS] = DC_PERF_CNTR(DcRcvPkts, DCC_PRF_PORT_RCV_PKTS_CNT,
1643                               CNTR_SYNTH),
1644[C_DC_RX_FLIT_VL] = DC_PERF_CNTR(DcRxFlitVl, DCC_PRF_PORT_VL_RCV_DATA_CNT,
1645                                 CNTR_SYNTH | CNTR_VL),
1646[C_DC_RX_PKT_VL] = DC_PERF_CNTR(DcRxPktVl, DCC_PRF_PORT_VL_RCV_PKTS_CNT,
1647                                CNTR_SYNTH | CNTR_VL),
1648[C_DC_RCV_FCN] = DC_PERF_CNTR(DcRcvFcn, DCC_PRF_PORT_RCV_FECN_CNT, CNTR_SYNTH),
1649[C_DC_RCV_FCN_VL] = DC_PERF_CNTR(DcRcvFcnVl, DCC_PRF_PORT_VL_RCV_FECN_CNT,
1650                                 CNTR_SYNTH | CNTR_VL),
1651[C_DC_RCV_BCN] = DC_PERF_CNTR(DcRcvBcn, DCC_PRF_PORT_RCV_BECN_CNT, CNTR_SYNTH),
1652[C_DC_RCV_BCN_VL] = DC_PERF_CNTR(DcRcvBcnVl, DCC_PRF_PORT_VL_RCV_BECN_CNT,
1653                                 CNTR_SYNTH | CNTR_VL),
1654[C_DC_RCV_BBL] = DC_PERF_CNTR(DcRcvBbl, DCC_PRF_PORT_RCV_BUBBLE_CNT,
1655                              CNTR_SYNTH),
1656[C_DC_RCV_BBL_VL] = DC_PERF_CNTR(DcRcvBblVl, DCC_PRF_PORT_VL_RCV_BUBBLE_CNT,
1657                                 CNTR_SYNTH | CNTR_VL),
1658[C_DC_MARK_FECN] = DC_PERF_CNTR(DcMarkFcn, DCC_PRF_PORT_MARK_FECN_CNT,
1659                                CNTR_SYNTH),
1660[C_DC_MARK_FECN_VL] = DC_PERF_CNTR(DcMarkFcnVl, DCC_PRF_PORT_VL_MARK_FECN_CNT,
1661                                   CNTR_SYNTH | CNTR_VL),
1662[C_DC_TOTAL_CRC] =
1663        DC_PERF_CNTR_LCB(DcTotCrc, DC_LCB_ERR_INFO_TOTAL_CRC_ERR,
1664                         CNTR_SYNTH),
1665[C_DC_CRC_LN0] = DC_PERF_CNTR_LCB(DcCrcLn0, DC_LCB_ERR_INFO_CRC_ERR_LN0,
1666                                  CNTR_SYNTH),
1667[C_DC_CRC_LN1] = DC_PERF_CNTR_LCB(DcCrcLn1, DC_LCB_ERR_INFO_CRC_ERR_LN1,
1668                                  CNTR_SYNTH),
1669[C_DC_CRC_LN2] = DC_PERF_CNTR_LCB(DcCrcLn2, DC_LCB_ERR_INFO_CRC_ERR_LN2,
1670                                  CNTR_SYNTH),
1671[C_DC_CRC_LN3] = DC_PERF_CNTR_LCB(DcCrcLn3, DC_LCB_ERR_INFO_CRC_ERR_LN3,
1672                                  CNTR_SYNTH),
1673[C_DC_CRC_MULT_LN] =
1674        DC_PERF_CNTR_LCB(DcMultLn, DC_LCB_ERR_INFO_CRC_ERR_MULTI_LN,
1675                         CNTR_SYNTH),
1676[C_DC_TX_REPLAY] = DC_PERF_CNTR_LCB(DcTxReplay, DC_LCB_ERR_INFO_TX_REPLAY_CNT,
1677                                    CNTR_SYNTH),
1678[C_DC_RX_REPLAY] = DC_PERF_CNTR_LCB(DcRxReplay, DC_LCB_ERR_INFO_RX_REPLAY_CNT,
1679                                    CNTR_SYNTH),
1680[C_DC_SEQ_CRC_CNT] =
1681        DC_PERF_CNTR_LCB(DcLinkSeqCrc, DC_LCB_ERR_INFO_SEQ_CRC_CNT,
1682                         CNTR_SYNTH),
1683[C_DC_ESC0_ONLY_CNT] =
1684        DC_PERF_CNTR_LCB(DcEsc0, DC_LCB_ERR_INFO_ESCAPE_0_ONLY_CNT,
1685                         CNTR_SYNTH),
1686[C_DC_ESC0_PLUS1_CNT] =
1687        DC_PERF_CNTR_LCB(DcEsc1, DC_LCB_ERR_INFO_ESCAPE_0_PLUS1_CNT,
1688                         CNTR_SYNTH),
1689[C_DC_ESC0_PLUS2_CNT] =
1690        DC_PERF_CNTR_LCB(DcEsc0Plus2, DC_LCB_ERR_INFO_ESCAPE_0_PLUS2_CNT,
1691                         CNTR_SYNTH),
1692[C_DC_REINIT_FROM_PEER_CNT] =
1693        DC_PERF_CNTR_LCB(DcReinitPeer, DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT,
1694                         CNTR_SYNTH),
1695[C_DC_SBE_CNT] = DC_PERF_CNTR_LCB(DcSbe, DC_LCB_ERR_INFO_SBE_CNT,
1696                                  CNTR_SYNTH),
1697[C_DC_MISC_FLG_CNT] =
1698        DC_PERF_CNTR_LCB(DcMiscFlg, DC_LCB_ERR_INFO_MISC_FLG_CNT,
1699                         CNTR_SYNTH),
1700[C_DC_PRF_GOOD_LTP_CNT] =
1701        DC_PERF_CNTR_LCB(DcGoodLTP, DC_LCB_PRF_GOOD_LTP_CNT, CNTR_SYNTH),
1702[C_DC_PRF_ACCEPTED_LTP_CNT] =
1703        DC_PERF_CNTR_LCB(DcAccLTP, DC_LCB_PRF_ACCEPTED_LTP_CNT,
1704                         CNTR_SYNTH),
1705[C_DC_PRF_RX_FLIT_CNT] =
1706        DC_PERF_CNTR_LCB(DcPrfRxFlit, DC_LCB_PRF_RX_FLIT_CNT, CNTR_SYNTH),
1707[C_DC_PRF_TX_FLIT_CNT] =
1708        DC_PERF_CNTR_LCB(DcPrfTxFlit, DC_LCB_PRF_TX_FLIT_CNT, CNTR_SYNTH),
1709[C_DC_PRF_CLK_CNTR] =
1710        DC_PERF_CNTR_LCB(DcPrfClk, DC_LCB_PRF_CLK_CNTR, CNTR_SYNTH),
1711[C_DC_PG_DBG_FLIT_CRDTS_CNT] =
1712        DC_PERF_CNTR_LCB(DcFltCrdts, DC_LCB_PG_DBG_FLIT_CRDTS_CNT, CNTR_SYNTH),
1713[C_DC_PG_STS_PAUSE_COMPLETE_CNT] =
1714        DC_PERF_CNTR_LCB(DcPauseComp, DC_LCB_PG_STS_PAUSE_COMPLETE_CNT,
1715                         CNTR_SYNTH),
1716[C_DC_PG_STS_TX_SBE_CNT] =
1717        DC_PERF_CNTR_LCB(DcStsTxSbe, DC_LCB_PG_STS_TX_SBE_CNT, CNTR_SYNTH),
1718[C_DC_PG_STS_TX_MBE_CNT] =
1719        DC_PERF_CNTR_LCB(DcStsTxMbe, DC_LCB_PG_STS_TX_MBE_CNT,
1720                         CNTR_SYNTH),
1721[C_SW_CPU_INTR] = CNTR_ELEM("Intr", 0, 0, CNTR_NORMAL,
1722                            access_sw_cpu_intr),
1723[C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL,
1724                            access_sw_cpu_rcv_limit),
1725[C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL,
1726                            access_sw_vtx_wait),
1727[C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
1728                            access_sw_pio_wait),
1729[C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
1730                            access_sw_kmem_wait),
1731[C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
1732                            access_sw_send_schedule),
1733};
1734
1735static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
1736[C_TX_UNSUP_VL] = TXE32_PORT_CNTR_ELEM(TxUnVLErr, SEND_UNSUP_VL_ERR_CNT,
1737                        CNTR_NORMAL),
1738[C_TX_INVAL_LEN] = TXE32_PORT_CNTR_ELEM(TxInvalLen, SEND_LEN_ERR_CNT,
1739                        CNTR_NORMAL),
1740[C_TX_MM_LEN_ERR] = TXE32_PORT_CNTR_ELEM(TxMMLenErr, SEND_MAX_MIN_LEN_ERR_CNT,
1741                        CNTR_NORMAL),
1742[C_TX_UNDERRUN] = TXE32_PORT_CNTR_ELEM(TxUnderrun, SEND_UNDERRUN_CNT,
1743                        CNTR_NORMAL),
1744[C_TX_FLOW_STALL] = TXE32_PORT_CNTR_ELEM(TxFlowStall, SEND_FLOW_STALL_CNT,
1745                        CNTR_NORMAL),
1746[C_TX_DROPPED] = TXE32_PORT_CNTR_ELEM(TxDropped, SEND_DROPPED_PKT_CNT,
1747                        CNTR_NORMAL),
1748[C_TX_HDR_ERR] = TXE32_PORT_CNTR_ELEM(TxHdrErr, SEND_HEADERS_ERR_CNT,
1749                        CNTR_NORMAL),
1750[C_TX_PKT] = TXE64_PORT_CNTR_ELEM(TxPkt, SEND_DATA_PKT_CNT, CNTR_NORMAL),
1751[C_TX_WORDS] = TXE64_PORT_CNTR_ELEM(TxWords, SEND_DWORD_CNT, CNTR_NORMAL),
1752[C_TX_WAIT] = TXE64_PORT_CNTR_ELEM(TxWait, SEND_WAIT_CNT, CNTR_SYNTH),
1753[C_TX_FLIT_VL] = TXE64_PORT_CNTR_ELEM(TxFlitVL, SEND_DATA_VL0_CNT,
1754                        CNTR_SYNTH | CNTR_VL),
1755[C_TX_PKT_VL] = TXE64_PORT_CNTR_ELEM(TxPktVL, SEND_DATA_PKT_VL0_CNT,
1756                        CNTR_SYNTH | CNTR_VL),
1757[C_TX_WAIT_VL] = TXE64_PORT_CNTR_ELEM(TxWaitVL, SEND_WAIT_VL0_CNT,
1758                        CNTR_SYNTH | CNTR_VL),
1759[C_RX_PKT] = RXE64_PORT_CNTR_ELEM(RxPkt, RCV_DATA_PKT_CNT, CNTR_NORMAL),
1760[C_RX_WORDS] = RXE64_PORT_CNTR_ELEM(RxWords, RCV_DWORD_CNT, CNTR_NORMAL),
1761[C_SW_LINK_DOWN] = CNTR_ELEM("SwLinkDown", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1762                        access_sw_link_dn_cnt),
1763[C_SW_LINK_UP] = CNTR_ELEM("SwLinkUp", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1764                        access_sw_link_up_cnt),
1765[C_SW_XMIT_DSCD] = CNTR_ELEM("XmitDscd", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1766                        access_sw_xmit_discards),
1767[C_SW_XMIT_DSCD_VL] = CNTR_ELEM("XmitDscdVl", 0, 0,
1768                        CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
1769                        access_sw_xmit_discards),
1770[C_SW_XMIT_CSTR_ERR] = CNTR_ELEM("XmitCstrErr", 0, 0, CNTR_SYNTH,
1771                        access_xmit_constraint_errs),
1772[C_SW_RCV_CSTR_ERR] = CNTR_ELEM("RcvCstrErr", 0, 0, CNTR_SYNTH,
1773                        access_rcv_constraint_errs),
1774[C_SW_IBP_LOOP_PKTS] = SW_IBP_CNTR(LoopPkts, loop_pkts),
1775[C_SW_IBP_RC_RESENDS] = SW_IBP_CNTR(RcResend, rc_resends),
1776[C_SW_IBP_RNR_NAKS] = SW_IBP_CNTR(RnrNak, rnr_naks),
1777[C_SW_IBP_OTHER_NAKS] = SW_IBP_CNTR(OtherNak, other_naks),
1778[C_SW_IBP_RC_TIMEOUTS] = SW_IBP_CNTR(RcTimeOut, rc_timeouts),
1779[C_SW_IBP_PKT_DROPS] = SW_IBP_CNTR(PktDrop, pkt_drops),
1780[C_SW_IBP_DMA_WAIT] = SW_IBP_CNTR(DmaWait, dmawait),
1781[C_SW_IBP_RC_SEQNAK] = SW_IBP_CNTR(RcSeqNak, rc_seqnak),
1782[C_SW_IBP_RC_DUPREQ] = SW_IBP_CNTR(RcDupRew, rc_dupreq),
1783[C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq),
1784[C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned),
1785[C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks),
1786[C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
1787                               access_sw_cpu_rc_acks),
1788[C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
1789                               access_sw_cpu_rc_qacks),
1790[C_SW_CPU_RC_DELAYED_COMP] = CNTR_ELEM("RcDelayComp", 0, 0, CNTR_NORMAL,
1791                               access_sw_cpu_rc_delayed_comp),
1792[OVR_LBL(0)] = OVR_ELM(0), [OVR_LBL(1)] = OVR_ELM(1),
1793[OVR_LBL(2)] = OVR_ELM(2), [OVR_LBL(3)] = OVR_ELM(3),
1794[OVR_LBL(4)] = OVR_ELM(4), [OVR_LBL(5)] = OVR_ELM(5),
1795[OVR_LBL(6)] = OVR_ELM(6), [OVR_LBL(7)] = OVR_ELM(7),
1796[OVR_LBL(8)] = OVR_ELM(8), [OVR_LBL(9)] = OVR_ELM(9),
1797[OVR_LBL(10)] = OVR_ELM(10), [OVR_LBL(11)] = OVR_ELM(11),
1798[OVR_LBL(12)] = OVR_ELM(12), [OVR_LBL(13)] = OVR_ELM(13),
1799[OVR_LBL(14)] = OVR_ELM(14), [OVR_LBL(15)] = OVR_ELM(15),
1800[OVR_LBL(16)] = OVR_ELM(16), [OVR_LBL(17)] = OVR_ELM(17),
1801[OVR_LBL(18)] = OVR_ELM(18), [OVR_LBL(19)] = OVR_ELM(19),
1802[OVR_LBL(20)] = OVR_ELM(20), [OVR_LBL(21)] = OVR_ELM(21),
1803[OVR_LBL(22)] = OVR_ELM(22), [OVR_LBL(23)] = OVR_ELM(23),
1804[OVR_LBL(24)] = OVR_ELM(24), [OVR_LBL(25)] = OVR_ELM(25),
1805[OVR_LBL(26)] = OVR_ELM(26), [OVR_LBL(27)] = OVR_ELM(27),
1806[OVR_LBL(28)] = OVR_ELM(28), [OVR_LBL(29)] = OVR_ELM(29),
1807[OVR_LBL(30)] = OVR_ELM(30), [OVR_LBL(31)] = OVR_ELM(31),
1808[OVR_LBL(32)] = OVR_ELM(32), [OVR_LBL(33)] = OVR_ELM(33),
1809[OVR_LBL(34)] = OVR_ELM(34), [OVR_LBL(35)] = OVR_ELM(35),
1810[OVR_LBL(36)] = OVR_ELM(36), [OVR_LBL(37)] = OVR_ELM(37),
1811[OVR_LBL(38)] = OVR_ELM(38), [OVR_LBL(39)] = OVR_ELM(39),
1812[OVR_LBL(40)] = OVR_ELM(40), [OVR_LBL(41)] = OVR_ELM(41),
1813[OVR_LBL(42)] = OVR_ELM(42), [OVR_LBL(43)] = OVR_ELM(43),
1814[OVR_LBL(44)] = OVR_ELM(44), [OVR_LBL(45)] = OVR_ELM(45),
1815[OVR_LBL(46)] = OVR_ELM(46), [OVR_LBL(47)] = OVR_ELM(47),
1816[OVR_LBL(48)] = OVR_ELM(48), [OVR_LBL(49)] = OVR_ELM(49),
1817[OVR_LBL(50)] = OVR_ELM(50), [OVR_LBL(51)] = OVR_ELM(51),
1818[OVR_LBL(52)] = OVR_ELM(52), [OVR_LBL(53)] = OVR_ELM(53),
1819[OVR_LBL(54)] = OVR_ELM(54), [OVR_LBL(55)] = OVR_ELM(55),
1820[OVR_LBL(56)] = OVR_ELM(56), [OVR_LBL(57)] = OVR_ELM(57),
1821[OVR_LBL(58)] = OVR_ELM(58), [OVR_LBL(59)] = OVR_ELM(59),
1822[OVR_LBL(60)] = OVR_ELM(60), [OVR_LBL(61)] = OVR_ELM(61),
1823[OVR_LBL(62)] = OVR_ELM(62), [OVR_LBL(63)] = OVR_ELM(63),
1824[OVR_LBL(64)] = OVR_ELM(64), [OVR_LBL(65)] = OVR_ELM(65),
1825[OVR_LBL(66)] = OVR_ELM(66), [OVR_LBL(67)] = OVR_ELM(67),
1826[OVR_LBL(68)] = OVR_ELM(68), [OVR_LBL(69)] = OVR_ELM(69),
1827[OVR_LBL(70)] = OVR_ELM(70), [OVR_LBL(71)] = OVR_ELM(71),
1828[OVR_LBL(72)] = OVR_ELM(72), [OVR_LBL(73)] = OVR_ELM(73),
1829[OVR_LBL(74)] = OVR_ELM(74), [OVR_LBL(75)] = OVR_ELM(75),
1830[OVR_LBL(76)] = OVR_ELM(76), [OVR_LBL(77)] = OVR_ELM(77),
1831[OVR_LBL(78)] = OVR_ELM(78), [OVR_LBL(79)] = OVR_ELM(79),
1832[OVR_LBL(80)] = OVR_ELM(80), [OVR_LBL(81)] = OVR_ELM(81),
1833[OVR_LBL(82)] = OVR_ELM(82), [OVR_LBL(83)] = OVR_ELM(83),
1834[OVR_LBL(84)] = OVR_ELM(84), [OVR_LBL(85)] = OVR_ELM(85),
1835[OVR_LBL(86)] = OVR_ELM(86), [OVR_LBL(87)] = OVR_ELM(87),
1836[OVR_LBL(88)] = OVR_ELM(88), [OVR_LBL(89)] = OVR_ELM(89),
1837[OVR_LBL(90)] = OVR_ELM(90), [OVR_LBL(91)] = OVR_ELM(91),
1838[OVR_LBL(92)] = OVR_ELM(92), [OVR_LBL(93)] = OVR_ELM(93),
1839[OVR_LBL(94)] = OVR_ELM(94), [OVR_LBL(95)] = OVR_ELM(95),
1840[OVR_LBL(96)] = OVR_ELM(96), [OVR_LBL(97)] = OVR_ELM(97),
1841[OVR_LBL(98)] = OVR_ELM(98), [OVR_LBL(99)] = OVR_ELM(99),
1842[OVR_LBL(100)] = OVR_ELM(100), [OVR_LBL(101)] = OVR_ELM(101),
1843[OVR_LBL(102)] = OVR_ELM(102), [OVR_LBL(103)] = OVR_ELM(103),
1844[OVR_LBL(104)] = OVR_ELM(104), [OVR_LBL(105)] = OVR_ELM(105),
1845[OVR_LBL(106)] = OVR_ELM(106), [OVR_LBL(107)] = OVR_ELM(107),
1846[OVR_LBL(108)] = OVR_ELM(108), [OVR_LBL(109)] = OVR_ELM(109),
1847[OVR_LBL(110)] = OVR_ELM(110), [OVR_LBL(111)] = OVR_ELM(111),
1848[OVR_LBL(112)] = OVR_ELM(112), [OVR_LBL(113)] = OVR_ELM(113),
1849[OVR_LBL(114)] = OVR_ELM(114), [OVR_LBL(115)] = OVR_ELM(115),
1850[OVR_LBL(116)] = OVR_ELM(116), [OVR_LBL(117)] = OVR_ELM(117),
1851[OVR_LBL(118)] = OVR_ELM(118), [OVR_LBL(119)] = OVR_ELM(119),
1852[OVR_LBL(120)] = OVR_ELM(120), [OVR_LBL(121)] = OVR_ELM(121),
1853[OVR_LBL(122)] = OVR_ELM(122), [OVR_LBL(123)] = OVR_ELM(123),
1854[OVR_LBL(124)] = OVR_ELM(124), [OVR_LBL(125)] = OVR_ELM(125),
1855[OVR_LBL(126)] = OVR_ELM(126), [OVR_LBL(127)] = OVR_ELM(127),
1856[OVR_LBL(128)] = OVR_ELM(128), [OVR_LBL(129)] = OVR_ELM(129),
1857[OVR_LBL(130)] = OVR_ELM(130), [OVR_LBL(131)] = OVR_ELM(131),
1858[OVR_LBL(132)] = OVR_ELM(132), [OVR_LBL(133)] = OVR_ELM(133),
1859[OVR_LBL(134)] = OVR_ELM(134), [OVR_LBL(135)] = OVR_ELM(135),
1860[OVR_LBL(136)] = OVR_ELM(136), [OVR_LBL(137)] = OVR_ELM(137),
1861[OVR_LBL(138)] = OVR_ELM(138), [OVR_LBL(139)] = OVR_ELM(139),
1862[OVR_LBL(140)] = OVR_ELM(140), [OVR_LBL(141)] = OVR_ELM(141),
1863[OVR_LBL(142)] = OVR_ELM(142), [OVR_LBL(143)] = OVR_ELM(143),
1864[OVR_LBL(144)] = OVR_ELM(144), [OVR_LBL(145)] = OVR_ELM(145),
1865[OVR_LBL(146)] = OVR_ELM(146), [OVR_LBL(147)] = OVR_ELM(147),
1866[OVR_LBL(148)] = OVR_ELM(148), [OVR_LBL(149)] = OVR_ELM(149),
1867[OVR_LBL(150)] = OVR_ELM(150), [OVR_LBL(151)] = OVR_ELM(151),
1868[OVR_LBL(152)] = OVR_ELM(152), [OVR_LBL(153)] = OVR_ELM(153),
1869[OVR_LBL(154)] = OVR_ELM(154), [OVR_LBL(155)] = OVR_ELM(155),
1870[OVR_LBL(156)] = OVR_ELM(156), [OVR_LBL(157)] = OVR_ELM(157),
1871[OVR_LBL(158)] = OVR_ELM(158), [OVR_LBL(159)] = OVR_ELM(159),
1872};
1873
1874/* ======================================================================== */
1875
1876/* return true if this is chip revision revision a0 */
1877int is_a0(struct hfi1_devdata *dd)
1878{
1879        return ((dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
1880                        & CCE_REVISION_CHIP_REV_MINOR_MASK) == 0;
1881}
1882
1883/* return true if this is chip revision revision a */
1884int is_ax(struct hfi1_devdata *dd)
1885{
1886        u8 chip_rev_minor =
1887                dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1888                        & CCE_REVISION_CHIP_REV_MINOR_MASK;
1889        return (chip_rev_minor & 0xf0) == 0;
1890}
1891
1892/* return true if this is chip revision revision b */
1893int is_bx(struct hfi1_devdata *dd)
1894{
1895        u8 chip_rev_minor =
1896                dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1897                        & CCE_REVISION_CHIP_REV_MINOR_MASK;
1898        return !!(chip_rev_minor & 0x10);
1899}
1900
1901/*
1902 * Append string s to buffer buf.  Arguments curp and len are the current
1903 * position and remaining length, respectively.
1904 *
1905 * return 0 on success, 1 on out of room
1906 */
1907static int append_str(char *buf, char **curp, int *lenp, const char *s)
1908{
1909        char *p = *curp;
1910        int len = *lenp;
1911        int result = 0; /* success */
1912        char c;
1913
1914        /* add a comma, if first in the buffer */
1915        if (p != buf) {
1916                if (len == 0) {
1917                        result = 1; /* out of room */
1918                        goto done;
1919                }
1920                *p++ = ',';
1921                len--;
1922        }
1923
1924        /* copy the string */
1925        while ((c = *s++) != 0) {
1926                if (len == 0) {
1927                        result = 1; /* out of room */
1928                        goto done;
1929                }
1930                *p++ = c;
1931                len--;
1932        }
1933
1934done:
1935        /* write return values */
1936        *curp = p;
1937        *lenp = len;
1938
1939        return result;
1940}
1941
1942/*
1943 * Using the given flag table, print a comma separated string into
1944 * the buffer.  End in '*' if the buffer is too short.
1945 */
1946static char *flag_string(char *buf, int buf_len, u64 flags,
1947                                struct flag_table *table, int table_size)
1948{
1949        char extra[32];
1950        char *p = buf;
1951        int len = buf_len;
1952        int no_room = 0;
1953        int i;
1954
1955        /* make sure there is at least 2 so we can form "*" */
1956        if (len < 2)
1957                return "";
1958
1959        len--;  /* leave room for a nul */
1960        for (i = 0; i < table_size; i++) {
1961                if (flags & table[i].flag) {
1962                        no_room = append_str(buf, &p, &len, table[i].str);
1963                        if (no_room)
1964                                break;
1965                        flags &= ~table[i].flag;
1966                }
1967        }
1968
1969        /* any undocumented bits left? */
1970        if (!no_room && flags) {
1971                snprintf(extra, sizeof(extra), "bits 0x%llx", flags);
1972                no_room = append_str(buf, &p, &len, extra);
1973        }
1974
1975        /* add * if ran out of room */
1976        if (no_room) {
1977                /* may need to back up to add space for a '*' */
1978                if (len == 0)
1979                        --p;
1980                *p++ = '*';
1981        }
1982
1983        /* add final nul - space already allocated above */
1984        *p = 0;
1985        return buf;
1986}
1987
1988/* first 8 CCE error interrupt source names */
1989static const char * const cce_misc_names[] = {
1990        "CceErrInt",            /* 0 */
1991        "RxeErrInt",            /* 1 */
1992        "MiscErrInt",           /* 2 */
1993        "Reserved3",            /* 3 */
1994        "PioErrInt",            /* 4 */
1995        "SDmaErrInt",           /* 5 */
1996        "EgressErrInt",         /* 6 */
1997        "TxeErrInt"             /* 7 */
1998};
1999
2000/*
2001 * Return the miscellaneous error interrupt name.
2002 */
2003static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source)
2004{
2005        if (source < ARRAY_SIZE(cce_misc_names))
2006                strncpy(buf, cce_misc_names[source], bsize);
2007        else
2008                snprintf(buf,
2009                        bsize,
2010                        "Reserved%u",
2011                        source + IS_GENERAL_ERR_START);
2012
2013        return buf;
2014}
2015
2016/*
2017 * Return the SDMA engine error interrupt name.
2018 */
2019static char *is_sdma_eng_err_name(char *buf, size_t bsize, unsigned int source)
2020{
2021        snprintf(buf, bsize, "SDmaEngErrInt%u", source);
2022        return buf;
2023}
2024
2025/*
2026 * Return the send context error interrupt name.
2027 */
2028static char *is_sendctxt_err_name(char *buf, size_t bsize, unsigned int source)
2029{
2030        snprintf(buf, bsize, "SendCtxtErrInt%u", source);
2031        return buf;
2032}
2033
2034static const char * const various_names[] = {
2035        "PbcInt",
2036        "GpioAssertInt",
2037        "Qsfp1Int",
2038        "Qsfp2Int",
2039        "TCritInt"
2040};
2041
2042/*
2043 * Return the various interrupt name.
2044 */
2045static char *is_various_name(char *buf, size_t bsize, unsigned int source)
2046{
2047        if (source < ARRAY_SIZE(various_names))
2048                strncpy(buf, various_names[source], bsize);
2049        else
2050                snprintf(buf, bsize, "Reserved%u", source+IS_VARIOUS_START);
2051        return buf;
2052}
2053
2054/*
2055 * Return the DC interrupt name.
2056 */
2057static char *is_dc_name(char *buf, size_t bsize, unsigned int source)
2058{
2059        static const char * const dc_int_names[] = {
2060                "common",
2061                "lcb",
2062                "8051",
2063                "lbm"   /* local block merge */
2064        };
2065
2066        if (source < ARRAY_SIZE(dc_int_names))
2067                snprintf(buf, bsize, "dc_%s_int", dc_int_names[source]);
2068        else
2069                snprintf(buf, bsize, "DCInt%u", source);
2070        return buf;
2071}
2072
2073static const char * const sdma_int_names[] = {
2074        "SDmaInt",
2075        "SdmaIdleInt",
2076        "SdmaProgressInt",
2077};
2078
2079/*
2080 * Return the SDMA engine interrupt name.
2081 */
2082static char *is_sdma_eng_name(char *buf, size_t bsize, unsigned int source)
2083{
2084        /* what interrupt */
2085        unsigned int what  = source / TXE_NUM_SDMA_ENGINES;
2086        /* which engine */
2087        unsigned int which = source % TXE_NUM_SDMA_ENGINES;
2088
2089        if (likely(what < 3))
2090                snprintf(buf, bsize, "%s%u", sdma_int_names[what], which);
2091        else
2092                snprintf(buf, bsize, "Invalid SDMA interrupt %u", source);
2093        return buf;
2094}
2095
2096/*
2097 * Return the receive available interrupt name.
2098 */
2099static char *is_rcv_avail_name(char *buf, size_t bsize, unsigned int source)
2100{
2101        snprintf(buf, bsize, "RcvAvailInt%u", source);
2102        return buf;
2103}
2104
2105/*
2106 * Return the receive urgent interrupt name.
2107 */
2108static char *is_rcv_urgent_name(char *buf, size_t bsize, unsigned int source)
2109{
2110        snprintf(buf, bsize, "RcvUrgentInt%u", source);
2111        return buf;
2112}
2113
2114/*
2115 * Return the send credit interrupt name.
2116 */
2117static char *is_send_credit_name(char *buf, size_t bsize, unsigned int source)
2118{
2119        snprintf(buf, bsize, "SendCreditInt%u", source);
2120        return buf;
2121}
2122
2123/*
2124 * Return the reserved interrupt name.
2125 */
2126static char *is_reserved_name(char *buf, size_t bsize, unsigned int source)
2127{
2128        snprintf(buf, bsize, "Reserved%u", source + IS_RESERVED_START);
2129        return buf;
2130}
2131
2132static char *cce_err_status_string(char *buf, int buf_len, u64 flags)
2133{
2134        return flag_string(buf, buf_len, flags,
2135                        cce_err_status_flags, ARRAY_SIZE(cce_err_status_flags));
2136}
2137
2138static char *rxe_err_status_string(char *buf, int buf_len, u64 flags)
2139{
2140        return flag_string(buf, buf_len, flags,
2141                        rxe_err_status_flags, ARRAY_SIZE(rxe_err_status_flags));
2142}
2143
2144static char *misc_err_status_string(char *buf, int buf_len, u64 flags)
2145{
2146        return flag_string(buf, buf_len, flags, misc_err_status_flags,
2147                        ARRAY_SIZE(misc_err_status_flags));
2148}
2149
2150static char *pio_err_status_string(char *buf, int buf_len, u64 flags)
2151{
2152        return flag_string(buf, buf_len, flags,
2153                        pio_err_status_flags, ARRAY_SIZE(pio_err_status_flags));
2154}
2155
2156static char *sdma_err_status_string(char *buf, int buf_len, u64 flags)
2157{
2158        return flag_string(buf, buf_len, flags,
2159                        sdma_err_status_flags,
2160                        ARRAY_SIZE(sdma_err_status_flags));
2161}
2162
2163static char *egress_err_status_string(char *buf, int buf_len, u64 flags)
2164{
2165        return flag_string(buf, buf_len, flags,
2166                egress_err_status_flags, ARRAY_SIZE(egress_err_status_flags));
2167}
2168
2169static char *egress_err_info_string(char *buf, int buf_len, u64 flags)
2170{
2171        return flag_string(buf, buf_len, flags,
2172                egress_err_info_flags, ARRAY_SIZE(egress_err_info_flags));
2173}
2174
2175static char *send_err_status_string(char *buf, int buf_len, u64 flags)
2176{
2177        return flag_string(buf, buf_len, flags,
2178                        send_err_status_flags,
2179                        ARRAY_SIZE(send_err_status_flags));
2180}
2181
2182static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2183{
2184        char buf[96];
2185
2186        /*
2187         * For most these errors, there is nothing that can be done except
2188         * report or record it.
2189         */
2190        dd_dev_info(dd, "CCE Error: %s\n",
2191                cce_err_status_string(buf, sizeof(buf), reg));
2192
2193        if ((reg & CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK)
2194                        && is_a0(dd)
2195                        && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) {
2196                /* this error requires a manual drop into SPC freeze mode */
2197                /* then a fix up */
2198                start_freeze_handling(dd->pport, FREEZE_SELF);
2199        }
2200}
2201
2202/*
2203 * Check counters for receive errors that do not have an interrupt
2204 * associated with them.
2205 */
2206#define RCVERR_CHECK_TIME 10
2207static void update_rcverr_timer(unsigned long opaque)
2208{
2209        struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
2210        struct hfi1_pportdata *ppd = dd->pport;
2211        u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2212
2213        if (dd->rcv_ovfl_cnt < cur_ovfl_cnt &&
2214                ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
2215                dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
2216                set_link_down_reason(ppd,
2217                  OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
2218                        OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
2219                queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
2220        }
2221        dd->rcv_ovfl_cnt = (u32) cur_ovfl_cnt;
2222
2223        mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2224}
2225
2226static int init_rcverr(struct hfi1_devdata *dd)
2227{
2228        setup_timer(&dd->rcverr_timer, update_rcverr_timer, (unsigned long)dd);
2229        /* Assume the hardware counter has been reset */
2230        dd->rcv_ovfl_cnt = 0;
2231        return mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2232}
2233
2234static void free_rcverr(struct hfi1_devdata *dd)
2235{
2236        if (dd->rcverr_timer.data)
2237                del_timer_sync(&dd->rcverr_timer);
2238        dd->rcverr_timer.data = 0;
2239}
2240
2241static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2242{
2243        char buf[96];
2244
2245        dd_dev_info(dd, "Receive Error: %s\n",
2246                rxe_err_status_string(buf, sizeof(buf), reg));
2247
2248        if (reg & ALL_RXE_FREEZE_ERR) {
2249                int flags = 0;
2250
2251                /*
2252                 * Freeze mode recovery is disabled for the errors
2253                 * in RXE_FREEZE_ABORT_MASK
2254                 */
2255                if (is_a0(dd) && (reg & RXE_FREEZE_ABORT_MASK))
2256                        flags = FREEZE_ABORT;
2257
2258                start_freeze_handling(dd->pport, flags);
2259        }
2260}
2261
2262static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2263{
2264        char buf[96];
2265
2266        dd_dev_info(dd, "Misc Error: %s",
2267                misc_err_status_string(buf, sizeof(buf), reg));
2268}
2269
2270static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2271{
2272        char buf[96];
2273
2274        dd_dev_info(dd, "PIO Error: %s\n",
2275                pio_err_status_string(buf, sizeof(buf), reg));
2276
2277        if (reg & ALL_PIO_FREEZE_ERR)
2278                start_freeze_handling(dd->pport, 0);
2279}
2280
2281static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2282{
2283        char buf[96];
2284
2285        dd_dev_info(dd, "SDMA Error: %s\n",
2286                sdma_err_status_string(buf, sizeof(buf), reg));
2287
2288        if (reg & ALL_SDMA_FREEZE_ERR)
2289                start_freeze_handling(dd->pport, 0);
2290}
2291
2292static void count_port_inactive(struct hfi1_devdata *dd)
2293{
2294        struct hfi1_pportdata *ppd = dd->pport;
2295
2296        if (ppd->port_xmit_discards < ~(u64)0)
2297                ppd->port_xmit_discards++;
2298}
2299
2300/*
2301 * We have had a "disallowed packet" error during egress. Determine the
2302 * integrity check which failed, and update relevant error counter, etc.
2303 *
2304 * Note that the SEND_EGRESS_ERR_INFO register has only a single
2305 * bit of state per integrity check, and so we can miss the reason for an
2306 * egress error if more than one packet fails the same integrity check
2307 * since we cleared the corresponding bit in SEND_EGRESS_ERR_INFO.
2308 */
2309static void handle_send_egress_err_info(struct hfi1_devdata *dd)
2310{
2311        struct hfi1_pportdata *ppd = dd->pport;
2312        u64 src = read_csr(dd, SEND_EGRESS_ERR_SOURCE); /* read first */
2313        u64 info = read_csr(dd, SEND_EGRESS_ERR_INFO);
2314        char buf[96];
2315
2316        /* clear down all observed info as quickly as possible after read */
2317        write_csr(dd, SEND_EGRESS_ERR_INFO, info);
2318
2319        dd_dev_info(dd,
2320                "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
2321                info, egress_err_info_string(buf, sizeof(buf), info), src);
2322
2323        /* Eventually add other counters for each bit */
2324
2325        if (info & SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK) {
2326                if (ppd->port_xmit_discards < ~(u64)0)
2327                        ppd->port_xmit_discards++;
2328        }
2329}
2330
2331/*
2332 * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2333 * register. Does it represent a 'port inactive' error?
2334 */
2335static inline int port_inactive_err(u64 posn)
2336{
2337        return (posn >= SEES(TX_LINKDOWN) &&
2338                posn <= SEES(TX_INCORRECT_LINK_STATE));
2339}
2340
2341/*
2342 * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2343 * register. Does it represent a 'disallowed packet' error?
2344 */
2345static inline int disallowed_pkt_err(u64 posn)
2346{
2347        return (posn >= SEES(TX_SDMA0_DISALLOWED_PACKET) &&
2348                posn <= SEES(TX_SDMA15_DISALLOWED_PACKET));
2349}
2350
2351static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2352{
2353        u64 reg_copy = reg, handled = 0;
2354        char buf[96];
2355
2356        if (reg & ALL_TXE_EGRESS_FREEZE_ERR)
2357                start_freeze_handling(dd->pport, 0);
2358        if (is_a0(dd) && (reg &
2359                    SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK)
2360                    && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
2361                start_freeze_handling(dd->pport, 0);
2362
2363        while (reg_copy) {
2364                int posn = fls64(reg_copy);
2365                /*
2366                 * fls64() returns a 1-based offset, but we generally
2367                 * want 0-based offsets.
2368                 */
2369                int shift = posn - 1;
2370
2371                if (port_inactive_err(shift)) {
2372                        count_port_inactive(dd);
2373                        handled |= (1ULL << shift);
2374                } else if (disallowed_pkt_err(shift)) {
2375                        handle_send_egress_err_info(dd);
2376                        handled |= (1ULL << shift);
2377                }
2378                clear_bit(shift, (unsigned long *)&reg_copy);
2379        }
2380
2381        reg &= ~handled;
2382
2383        if (reg)
2384                dd_dev_info(dd, "Egress Error: %s\n",
2385                        egress_err_status_string(buf, sizeof(buf), reg));
2386}
2387
2388static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2389{
2390        char buf[96];
2391
2392        dd_dev_info(dd, "Send Error: %s\n",
2393                send_err_status_string(buf, sizeof(buf), reg));
2394
2395}
2396
2397/*
2398 * The maximum number of times the error clear down will loop before
2399 * blocking a repeating error.  This value is arbitrary.
2400 */
2401#define MAX_CLEAR_COUNT 20
2402
2403/*
2404 * Clear and handle an error register.  All error interrupts are funneled
2405 * through here to have a central location to correctly handle single-
2406 * or multi-shot errors.
2407 *
2408 * For non per-context registers, call this routine with a context value
2409 * of 0 so the per-context offset is zero.
2410 *
2411 * If the handler loops too many times, assume that something is wrong
2412 * and can't be fixed, so mask the error bits.
2413 */
2414static void interrupt_clear_down(struct hfi1_devdata *dd,
2415                                 u32 context,
2416                                 const struct err_reg_info *eri)
2417{
2418        u64 reg;
2419        u32 count;
2420
2421        /* read in a loop until no more errors are seen */
2422        count = 0;
2423        while (1) {
2424                reg = read_kctxt_csr(dd, context, eri->status);
2425                if (reg == 0)
2426                        break;
2427                write_kctxt_csr(dd, context, eri->clear, reg);
2428                if (likely(eri->handler))
2429                        eri->handler(dd, context, reg);
2430                count++;
2431                if (count > MAX_CLEAR_COUNT) {
2432                        u64 mask;
2433
2434                        dd_dev_err(dd, "Repeating %s bits 0x%llx - masking\n",
2435                                eri->desc, reg);
2436                        /*
2437                         * Read-modify-write so any other masked bits
2438                         * remain masked.
2439                         */
2440                        mask = read_kctxt_csr(dd, context, eri->mask);
2441                        mask &= ~reg;
2442                        write_kctxt_csr(dd, context, eri->mask, mask);
2443                        break;
2444                }
2445        }
2446}
2447
2448/*
2449 * CCE block "misc" interrupt.  Source is < 16.
2450 */
2451static void is_misc_err_int(struct hfi1_devdata *dd, unsigned int source)
2452{
2453        const struct err_reg_info *eri = &misc_errs[source];
2454
2455        if (eri->handler) {
2456                interrupt_clear_down(dd, 0, eri);
2457        } else {
2458                dd_dev_err(dd, "Unexpected misc interrupt (%u) - reserved\n",
2459                        source);
2460        }
2461}
2462
2463static char *send_context_err_status_string(char *buf, int buf_len, u64 flags)
2464{
2465        return flag_string(buf, buf_len, flags,
2466                        sc_err_status_flags, ARRAY_SIZE(sc_err_status_flags));
2467}
2468
2469/*
2470 * Send context error interrupt.  Source (hw_context) is < 160.
2471 *
2472 * All send context errors cause the send context to halt.  The normal
2473 * clear-down mechanism cannot be used because we cannot clear the
2474 * error bits until several other long-running items are done first.
2475 * This is OK because with the context halted, nothing else is going
2476 * to happen on it anyway.
2477 */
2478static void is_sendctxt_err_int(struct hfi1_devdata *dd,
2479                                unsigned int hw_context)
2480{
2481        struct send_context_info *sci;
2482        struct send_context *sc;
2483        char flags[96];
2484        u64 status;
2485        u32 sw_index;
2486
2487        sw_index = dd->hw_to_sw[hw_context];
2488        if (sw_index >= dd->num_send_contexts) {
2489                dd_dev_err(dd,
2490                        "out of range sw index %u for send context %u\n",
2491                        sw_index, hw_context);
2492                return;
2493        }
2494        sci = &dd->send_contexts[sw_index];
2495        sc = sci->sc;
2496        if (!sc) {
2497                dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
2498                        sw_index, hw_context);
2499                return;
2500        }
2501
2502        /* tell the software that a halt has begun */
2503        sc_stop(sc, SCF_HALTED);
2504
2505        status = read_kctxt_csr(dd, hw_context, SEND_CTXT_ERR_STATUS);
2506
2507        dd_dev_info(dd, "Send Context %u(%u) Error: %s\n", sw_index, hw_context,
2508                send_context_err_status_string(flags, sizeof(flags), status));
2509
2510        if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK)
2511                handle_send_egress_err_info(dd);
2512
2513        /*
2514         * Automatically restart halted kernel contexts out of interrupt
2515         * context.  User contexts must ask the driver to restart the context.
2516         */
2517        if (sc->type != SC_USER)
2518                queue_work(dd->pport->hfi1_wq, &sc->halt_work);
2519}
2520
2521static void handle_sdma_eng_err(struct hfi1_devdata *dd,
2522                                unsigned int source, u64 status)
2523{
2524        struct sdma_engine *sde;
2525
2526        sde = &dd->per_sdma[source];
2527#ifdef CONFIG_SDMA_VERBOSITY
2528        dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2529                   slashstrip(__FILE__), __LINE__, __func__);
2530        dd_dev_err(sde->dd, "CONFIG SDMA(%u) source: %u status 0x%llx\n",
2531                   sde->this_idx, source, (unsigned long long)status);
2532#endif
2533        sdma_engine_error(sde, status);
2534}
2535
2536/*
2537 * CCE block SDMA error interrupt.  Source is < 16.
2538 */
2539static void is_sdma_eng_err_int(struct hfi1_devdata *dd, unsigned int source)
2540{
2541#ifdef CONFIG_SDMA_VERBOSITY
2542        struct sdma_engine *sde = &dd->per_sdma[source];
2543
2544        dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2545                   slashstrip(__FILE__), __LINE__, __func__);
2546        dd_dev_err(dd, "CONFIG SDMA(%u) source: %u\n", sde->this_idx,
2547                   source);
2548        sdma_dumpstate(sde);
2549#endif
2550        interrupt_clear_down(dd, source, &sdma_eng_err);
2551}
2552
2553/*
2554 * CCE block "various" interrupt.  Source is < 8.
2555 */
2556static void is_various_int(struct hfi1_devdata *dd, unsigned int source)
2557{
2558        const struct err_reg_info *eri = &various_err[source];
2559
2560        /*
2561         * TCritInt cannot go through interrupt_clear_down()
2562         * because it is not a second tier interrupt. The handler
2563         * should be called directly.
2564         */
2565        if (source == TCRIT_INT_SOURCE)
2566                handle_temp_err(dd);
2567        else if (eri->handler)
2568                interrupt_clear_down(dd, 0, eri);
2569        else
2570                dd_dev_info(dd,
2571                        "%s: Unimplemented/reserved interrupt %d\n",
2572                        __func__, source);
2573}
2574
2575static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
2576{
2577        /* source is always zero */
2578        struct hfi1_pportdata *ppd = dd->pport;
2579        unsigned long flags;
2580        u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
2581
2582        if (reg & QSFP_HFI0_MODPRST_N) {
2583
2584                dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n",
2585                                __func__);
2586
2587                if (!qsfp_mod_present(ppd)) {
2588                        ppd->driver_link_ready = 0;
2589                        /*
2590                         * Cable removed, reset all our information about the
2591                         * cache and cable capabilities
2592                         */
2593
2594                        spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2595                        /*
2596                         * We don't set cache_refresh_required here as we expect
2597                         * an interrupt when a cable is inserted
2598                         */
2599                        ppd->qsfp_info.cache_valid = 0;
2600                        ppd->qsfp_info.qsfp_interrupt_functional = 0;
2601                        spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2602                                                flags);
2603                        write_csr(dd,
2604                                        dd->hfi1_id ?
2605                                                ASIC_QSFP2_INVERT :
2606                                                ASIC_QSFP1_INVERT,
2607                                qsfp_int_mgmt);
2608                        if (ppd->host_link_state == HLS_DN_POLL) {
2609                                /*
2610                                 * The link is still in POLL. This means
2611                                 * that the normal link down processing
2612                                 * will not happen. We have to do it here
2613                                 * before turning the DC off.
2614                                 */
2615                                queue_work(ppd->hfi1_wq, &ppd->link_down_work);
2616                        }
2617                } else {
2618                        spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2619                        ppd->qsfp_info.cache_valid = 0;
2620                        ppd->qsfp_info.cache_refresh_required = 1;
2621                        spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2622                                                flags);
2623
2624                        qsfp_int_mgmt &= ~(u64)QSFP_HFI0_MODPRST_N;
2625                        write_csr(dd,
2626                                        dd->hfi1_id ?
2627                                                ASIC_QSFP2_INVERT :
2628                                                ASIC_QSFP1_INVERT,
2629                                qsfp_int_mgmt);
2630                }
2631        }
2632
2633        if (reg & QSFP_HFI0_INT_N) {
2634
2635                dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n",
2636                                __func__);
2637                spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2638                ppd->qsfp_info.check_interrupt_flags = 1;
2639                ppd->qsfp_info.qsfp_interrupt_functional = 1;
2640                spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
2641        }
2642
2643        /* Schedule the QSFP work only if there is a cable attached. */
2644        if (qsfp_mod_present(ppd))
2645                queue_work(ppd->hfi1_wq, &ppd->qsfp_info.qsfp_work);
2646}
2647
2648static int request_host_lcb_access(struct hfi1_devdata *dd)
2649{
2650        int ret;
2651
2652        ret = do_8051_command(dd, HCMD_MISC,
2653                (u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2654                NULL);
2655        if (ret != HCMD_SUCCESS) {
2656                dd_dev_err(dd, "%s: command failed with error %d\n",
2657                        __func__, ret);
2658        }
2659        return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2660}
2661
2662static int request_8051_lcb_access(struct hfi1_devdata *dd)
2663{
2664        int ret;
2665
2666        ret = do_8051_command(dd, HCMD_MISC,
2667                (u64)HCMD_MISC_GRANT_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2668                NULL);
2669        if (ret != HCMD_SUCCESS) {
2670                dd_dev_err(dd, "%s: command failed with error %d\n",
2671                        __func__, ret);
2672        }
2673        return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2674}
2675
2676/*
2677 * Set the LCB selector - allow host access.  The DCC selector always
2678 * points to the host.
2679 */
2680static inline void set_host_lcb_access(struct hfi1_devdata *dd)
2681{
2682        write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2683                                DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK
2684                                | DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
2685}
2686
2687/*
2688 * Clear the LCB selector - allow 8051 access.  The DCC selector always
2689 * points to the host.
2690 */
2691static inline void set_8051_lcb_access(struct hfi1_devdata *dd)
2692{
2693        write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2694                                DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
2695}
2696
2697/*
2698 * Acquire LCB access from the 8051.  If the host already has access,
2699 * just increment a counter.  Otherwise, inform the 8051 that the
2700 * host is taking access.
2701 *
2702 * Returns:
2703 *      0 on success
2704 *      -EBUSY if the 8051 has control and cannot be disturbed
2705 *      -errno if unable to acquire access from the 8051
2706 */
2707int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2708{
2709        struct hfi1_pportdata *ppd = dd->pport;
2710        int ret = 0;
2711
2712        /*
2713         * Use the host link state lock so the operation of this routine
2714         * { link state check, selector change, count increment } can occur
2715         * as a unit against a link state change.  Otherwise there is a
2716         * race between the state change and the count increment.
2717         */
2718        if (sleep_ok) {
2719                mutex_lock(&ppd->hls_lock);
2720        } else {
2721                while (!mutex_trylock(&ppd->hls_lock))
2722                        udelay(1);
2723        }
2724
2725        /* this access is valid only when the link is up */
2726        if ((ppd->host_link_state & HLS_UP) == 0) {
2727                dd_dev_info(dd, "%s: link state %s not up\n",
2728                        __func__, link_state_name(ppd->host_link_state));
2729                ret = -EBUSY;
2730                goto done;
2731        }
2732
2733        if (dd->lcb_access_count == 0) {
2734                ret = request_host_lcb_access(dd);
2735                if (ret) {
2736                        dd_dev_err(dd,
2737                                "%s: unable to acquire LCB access, err %d\n",
2738                                __func__, ret);
2739                        goto done;
2740                }
2741                set_host_lcb_access(dd);
2742        }
2743        dd->lcb_access_count++;
2744done:
2745        mutex_unlock(&ppd->hls_lock);
2746        return ret;
2747}
2748
2749/*
2750 * Release LCB access by decrementing the use count.  If the count is moving
2751 * from 1 to 0, inform 8051 that it has control back.
2752 *
2753 * Returns:
2754 *      0 on success
2755 *      -errno if unable to release access to the 8051
2756 */
2757int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2758{
2759        int ret = 0;
2760
2761        /*
2762         * Use the host link state lock because the acquire needed it.
2763         * Here, we only need to keep { selector change, count decrement }
2764         * as a unit.
2765         */
2766        if (sleep_ok) {
2767                mutex_lock(&dd->pport->hls_lock);
2768        } else {
2769                while (!mutex_trylock(&dd->pport->hls_lock))
2770                        udelay(1);
2771        }
2772
2773        if (dd->lcb_access_count == 0) {
2774                dd_dev_err(dd, "%s: LCB access count is zero.  Skipping.\n",
2775                        __func__);
2776                goto done;
2777        }
2778
2779        if (dd->lcb_access_count == 1) {
2780                set_8051_lcb_access(dd);
2781                ret = request_8051_lcb_access(dd);
2782                if (ret) {
2783                        dd_dev_err(dd,
2784                                "%s: unable to release LCB access, err %d\n",
2785                                __func__, ret);
2786                        /* restore host access if the grant didn't work */
2787                        set_host_lcb_access(dd);
2788                        goto done;
2789                }
2790        }
2791        dd->lcb_access_count--;
2792done:
2793        mutex_unlock(&dd->pport->hls_lock);
2794        return ret;
2795}
2796
2797/*
2798 * Initialize LCB access variables and state.  Called during driver load,
2799 * after most of the initialization is finished.
2800 *
2801 * The DC default is LCB access on for the host.  The driver defaults to
2802 * leaving access to the 8051.  Assign access now - this constrains the call
2803 * to this routine to be after all LCB set-up is done.  In particular, after
2804 * hf1_init_dd() -> set_up_interrupts() -> clear_all_interrupts()
2805 */
2806static void init_lcb_access(struct hfi1_devdata *dd)
2807{
2808        dd->lcb_access_count = 0;
2809}
2810
2811/*
2812 * Write a response back to a 8051 request.
2813 */
2814static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data)
2815{
2816        write_csr(dd, DC_DC8051_CFG_EXT_DEV_0,
2817                DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK
2818                | (u64)return_code << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT
2819                | (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
2820}
2821
2822/*
2823 * Handle requests from the 8051.
2824 */
2825static void handle_8051_request(struct hfi1_devdata *dd)
2826{
2827        u64 reg;
2828        u16 data;
2829        u8 type;
2830
2831        reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1);
2832        if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0)
2833                return; /* no request */
2834
2835        /* zero out COMPLETED so the response is seen */
2836        write_csr(dd, DC_DC8051_CFG_EXT_DEV_0, 0);
2837
2838        /* extract request details */
2839        type = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_SHIFT)
2840                        & DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_MASK;
2841        data = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT)
2842                        & DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_MASK;
2843
2844        switch (type) {
2845        case HREQ_LOAD_CONFIG:
2846        case HREQ_SAVE_CONFIG:
2847        case HREQ_READ_CONFIG:
2848        case HREQ_SET_TX_EQ_ABS:
2849        case HREQ_SET_TX_EQ_REL:
2850        case HREQ_ENABLE:
2851                dd_dev_info(dd, "8051 request: request 0x%x not supported\n",
2852                        type);
2853                hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2854                break;
2855
2856        case HREQ_CONFIG_DONE:
2857                hreq_response(dd, HREQ_SUCCESS, 0);
2858                break;
2859
2860        case HREQ_INTERFACE_TEST:
2861                hreq_response(dd, HREQ_SUCCESS, data);
2862                break;
2863
2864        default:
2865                dd_dev_err(dd, "8051 request: unknown request 0x%x\n", type);
2866                hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2867                break;
2868        }
2869}
2870
2871static void write_global_credit(struct hfi1_devdata *dd,
2872                                u8 vau, u16 total, u16 shared)
2873{
2874        write_csr(dd, SEND_CM_GLOBAL_CREDIT,
2875                ((u64)total
2876                        << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
2877                | ((u64)shared
2878                        << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
2879                | ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
2880}
2881
2882/*
2883 * Set up initial VL15 credits of the remote.  Assumes the rest of
2884 * the CM credit registers are zero from a previous global or credit reset .
2885 */
2886void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
2887{
2888        /* leave shared count at zero for both global and VL15 */
2889        write_global_credit(dd, vau, vl15buf, 0);
2890
2891        /* We may need some credits for another VL when sending packets
2892         * with the snoop interface. Dividing it down the middle for VL15
2893         * and VL0 should suffice.
2894         */
2895        if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) {
2896                write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1)
2897                    << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2898                write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1)
2899                    << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT);
2900        } else {
2901                write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
2902                        << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2903        }
2904}
2905
2906/*
2907 * Zero all credit details from the previous connection and
2908 * reset the CM manager's internal counters.
2909 */
2910void reset_link_credits(struct hfi1_devdata *dd)
2911{
2912        int i;
2913
2914        /* remove all previous VL credit limits */
2915        for (i = 0; i < TXE_NUM_DATA_VL; i++)
2916                write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
2917        write_csr(dd, SEND_CM_CREDIT_VL15, 0);
2918        write_global_credit(dd, 0, 0, 0);
2919        /* reset the CM block */
2920        pio_send_control(dd, PSC_CM_RESET);
2921}
2922
2923/* convert a vCU to a CU */
2924static u32 vcu_to_cu(u8 vcu)
2925{
2926        return 1 << vcu;
2927}
2928
2929/* convert a CU to a vCU */
2930static u8 cu_to_vcu(u32 cu)
2931{
2932        return ilog2(cu);
2933}
2934
2935/* convert a vAU to an AU */
2936static u32 vau_to_au(u8 vau)
2937{
2938        return 8 * (1 << vau);
2939}
2940
2941static void set_linkup_defaults(struct hfi1_pportdata *ppd)
2942{
2943        ppd->sm_trap_qp = 0x0;
2944        ppd->sa_qp = 0x1;
2945}
2946
2947/*
2948 * Graceful LCB shutdown.  This leaves the LCB FIFOs in reset.
2949 */
2950static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
2951{
2952        u64 reg;
2953
2954        /* clear lcb run: LCB_CFG_RUN.EN = 0 */
2955        write_csr(dd, DC_LCB_CFG_RUN, 0);
2956        /* set tx fifo reset: LCB_CFG_TX_FIFOS_RESET.VAL = 1 */
2957        write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET,
2958                1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
2959        /* set dcc reset csr: DCC_CFG_RESET.{reset_lcb,reset_rx_fpe} = 1 */
2960        dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
2961        reg = read_csr(dd, DCC_CFG_RESET);
2962        write_csr(dd, DCC_CFG_RESET,
2963                reg
2964                | (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT)
2965                | (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
2966        (void) read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
2967        if (!abort) {
2968                udelay(1);    /* must hold for the longer of 16cclks or 20ns */
2969                write_csr(dd, DCC_CFG_RESET, reg);
2970                write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
2971        }
2972}
2973
2974/*
2975 * This routine should be called after the link has been transitioned to
2976 * OFFLINE (OFFLINE state has the side effect of putting the SerDes into
2977 * reset).
2978 *
2979 * The expectation is that the caller of this routine would have taken
2980 * care of properly transitioning the link into the correct state.
2981 */
2982static void dc_shutdown(struct hfi1_devdata *dd)
2983{
2984        unsigned long flags;
2985
2986        spin_lock_irqsave(&dd->dc8051_lock, flags);
2987        if (dd->dc_shutdown) {
2988                spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2989                return;
2990        }
2991        dd->dc_shutdown = 1;
2992        spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2993        /* Shutdown the LCB */
2994        lcb_shutdown(dd, 1);
2995        /* Going to OFFLINE would have causes the 8051 to put the
2996         * SerDes into reset already. Just need to shut down the 8051,
2997         * itself. */
2998        write_csr(dd, DC_DC8051_CFG_RST, 0x1);
2999}
3000
3001/* Calling this after the DC has been brought out of reset should not
3002 * do any damage. */
3003static void dc_start(struct hfi1_devdata *dd)
3004{
3005        unsigned long flags;
3006        int ret;
3007
3008        spin_lock_irqsave(&dd->dc8051_lock, flags);
3009        if (!dd->dc_shutdown)
3010                goto done;
3011        spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3012        /* Take the 8051 out of reset */
3013        write_csr(dd, DC_DC8051_CFG_RST, 0ull);
3014        /* Wait until 8051 is ready */
3015        ret = wait_fm_ready(dd, TIMEOUT_8051_START);
3016        if (ret) {
3017                dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
3018                        __func__);
3019        }
3020        /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
3021        write_csr(dd, DCC_CFG_RESET, 0x10);
3022        /* lcb_shutdown() with abort=1 does not restore these */
3023        write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
3024        spin_lock_irqsave(&dd->dc8051_lock, flags);
3025        dd->dc_shutdown = 0;
3026done:
3027        spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3028}
3029
3030/*
3031 * These LCB adjustments are for the Aurora SerDes core in the FPGA.
3032 */
3033static void adjust_lcb_for_fpga_serdes(struct hfi1_devdata *dd)
3034{
3035        u64 rx_radr, tx_radr;
3036        u32 version;
3037
3038        if (dd->icode != ICODE_FPGA_EMULATION)
3039                return;
3040
3041        /*
3042         * These LCB defaults on emulator _s are good, nothing to do here:
3043         *      LCB_CFG_TX_FIFOS_RADR
3044         *      LCB_CFG_RX_FIFOS_RADR
3045         *      LCB_CFG_LN_DCLK
3046         *      LCB_CFG_IGNORE_LOST_RCLK
3047         */
3048        if (is_emulator_s(dd))
3049                return;
3050        /* else this is _p */
3051
3052        version = emulator_rev(dd);
3053        if (!is_a0(dd))
3054                version = 0x2d; /* all B0 use 0x2d or higher settings */
3055
3056        if (version <= 0x12) {
3057                /* release 0x12 and below */
3058
3059                /*
3060                 * LCB_CFG_RX_FIFOS_RADR.RST_VAL = 0x9
3061                 * LCB_CFG_RX_FIFOS_RADR.OK_TO_JUMP_VAL = 0x9
3062                 * LCB_CFG_RX_FIFOS_RADR.DO_NOT_JUMP_VAL = 0xa
3063                 */
3064                rx_radr =
3065                      0xaull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3066                    | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3067                    | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3068                /*
3069                 * LCB_CFG_TX_FIFOS_RADR.ON_REINIT = 0 (default)
3070                 * LCB_CFG_TX_FIFOS_RADR.RST_VAL = 6
3071                 */
3072                tx_radr = 6ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3073        } else if (version <= 0x18) {
3074                /* release 0x13 up to 0x18 */
3075                /* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3076                rx_radr =
3077                      0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3078                    | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3079                    | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3080                tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3081        } else if (version == 0x19) {
3082                /* release 0x19 */
3083                /* LCB_CFG_RX_FIFOS_RADR = 0xa99 */
3084                rx_radr =
3085                      0xAull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3086                    | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3087                    | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3088                tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3089        } else if (version == 0x1a) {
3090                /* release 0x1a */
3091                /* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3092                rx_radr =
3093                      0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3094                    | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3095                    | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3096                tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3097                write_csr(dd, DC_LCB_CFG_LN_DCLK, 1ull);
3098        } else {
3099                /* release 0x1b and higher */
3100                /* LCB_CFG_RX_FIFOS_RADR = 0x877 */
3101                rx_radr =
3102                      0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3103                    | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3104                    | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3105                tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3106        }
3107
3108        write_csr(dd, DC_LCB_CFG_RX_FIFOS_RADR, rx_radr);
3109        /* LCB_CFG_IGNORE_LOST_RCLK.EN = 1 */
3110        write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
3111                DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
3112        write_csr(dd, DC_LCB_CFG_TX_FIFOS_RADR, tx_radr);
3113}
3114
3115/*
3116 * Handle a SMA idle message
3117 *
3118 * This is a work-queue function outside of the interrupt.
3119 */
3120void handle_sma_message(struct work_struct *work)
3121{
3122        struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3123                                                        sma_message_work);
3124        struct hfi1_devdata *dd = ppd->dd;
3125        u64 msg;
3126        int ret;
3127
3128        /* msg is bytes 1-4 of the 40-bit idle message - the command code
3129           is stripped off */
3130        ret = read_idle_sma(dd, &msg);
3131        if (ret)
3132                return;
3133        dd_dev_info(dd, "%s: SMA message 0x%llx\n", __func__, msg);
3134        /*
3135         * React to the SMA message.  Byte[1] (0 for us) is the command.
3136         */
3137        switch (msg & 0xff) {
3138        case SMA_IDLE_ARM:
3139                /*
3140                 * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3141                 * State Transitions
3142                 *
3143                 * Only expected in INIT or ARMED, discard otherwise.
3144                 */
3145                if (ppd->host_link_state & (HLS_UP_INIT | HLS_UP_ARMED))
3146                        ppd->neighbor_normal = 1;
3147                break;
3148        case SMA_IDLE_ACTIVE:
3149                /*
3150                 * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3151                 * State Transitions
3152                 *
3153                 * Can activate the node.  Discard otherwise.
3154                 */
3155                if (ppd->host_link_state == HLS_UP_ARMED
3156                                        && ppd->is_active_optimize_enabled) {
3157                        ppd->neighbor_normal = 1;
3158                        ret = set_link_state(ppd, HLS_UP_ACTIVE);
3159                        if (ret)
3160                                dd_dev_err(
3161                                        dd,
3162                                        "%s: received Active SMA idle message, couldn't set link to Active\n",
3163                                        __func__);
3164                }
3165                break;
3166        default:
3167                dd_dev_err(dd,
3168                        "%s: received unexpected SMA idle message 0x%llx\n",
3169                        __func__, msg);
3170                break;
3171        }
3172}
3173
3174static void adjust_rcvctrl(struct hfi1_devdata *dd, u64 add, u64 clear)
3175{
3176        u64 rcvctrl;
3177        unsigned long flags;
3178
3179        spin_lock_irqsave(&dd->rcvctrl_lock, flags);
3180        rcvctrl = read_csr(dd, RCV_CTRL);
3181        rcvctrl |= add;
3182        rcvctrl &= ~clear;
3183        write_csr(dd, RCV_CTRL, rcvctrl);
3184        spin_unlock_irqrestore(&dd->rcvctrl_lock, flags);
3185}
3186
3187static inline void add_rcvctrl(struct hfi1_devdata *dd, u64 add)
3188{
3189        adjust_rcvctrl(dd, add, 0);
3190}
3191
3192static inline void clear_rcvctrl(struct hfi1_devdata *dd, u64 clear)
3193{
3194        adjust_rcvctrl(dd, 0, clear);
3195}
3196
3197/*
3198 * Called from all interrupt handlers to start handling an SPC freeze.
3199 */
3200void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
3201{
3202        struct hfi1_devdata *dd = ppd->dd;
3203        struct send_context *sc;
3204        int i;
3205
3206        if (flags & FREEZE_SELF)
3207                write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3208
3209        /* enter frozen mode */
3210        dd->flags |= HFI1_FROZEN;
3211
3212        /* notify all SDMA engines that they are going into a freeze */
3213        sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN));
3214
3215        /* do halt pre-handling on all enabled send contexts */
3216        for (i = 0; i < dd->num_send_contexts; i++) {
3217                sc = dd->send_contexts[i].sc;
3218                if (sc && (sc->flags & SCF_ENABLED))
3219                        sc_stop(sc, SCF_FROZEN | SCF_HALTED);
3220        }
3221
3222        /* Send context are frozen. Notify user space */
3223        hfi1_set_uevent_bits(ppd, _HFI1_EVENT_FROZEN_BIT);
3224
3225        if (flags & FREEZE_ABORT) {
3226                dd_dev_err(dd,
3227                           "Aborted freeze recovery. Please REBOOT system\n");
3228                return;
3229        }
3230        /* queue non-interrupt handler */
3231        queue_work(ppd->hfi1_wq, &ppd->freeze_work);
3232}
3233
3234/*
3235 * Wait until all 4 sub-blocks indicate that they have frozen or unfrozen,
3236 * depending on the "freeze" parameter.
3237 *
3238 * No need to return an error if it times out, our only option
3239 * is to proceed anyway.
3240 */
3241static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze)
3242{
3243        unsigned long timeout;
3244        u64 reg;
3245
3246        timeout = jiffies + msecs_to_jiffies(FREEZE_STATUS_TIMEOUT);
3247        while (1) {
3248                reg = read_csr(dd, CCE_STATUS);
3249                if (freeze) {
3250                        /* waiting until all indicators are set */
3251                        if ((reg & ALL_FROZE) == ALL_FROZE)
3252                                return; /* all done */
3253                } else {
3254                        /* waiting until all indicators are clear */
3255                        if ((reg & ALL_FROZE) == 0)
3256                                return; /* all done */
3257                }
3258
3259                if (time_after(jiffies, timeout)) {
3260                        dd_dev_err(dd,
3261                                "Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
3262                                freeze ? "" : "un",
3263                                reg & ALL_FROZE,
3264                                freeze ? ALL_FROZE : 0ull);
3265                        return;
3266                }
3267                usleep_range(80, 120);
3268        }
3269}
3270
3271/*
3272 * Do all freeze handling for the RXE block.
3273 */
3274static void rxe_freeze(struct hfi1_devdata *dd)
3275{
3276        int i;
3277
3278        /* disable port */
3279        clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3280
3281        /* disable all receive contexts */
3282        for (i = 0; i < dd->num_rcv_contexts; i++)
3283                hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, i);
3284}
3285
3286/*
3287 * Unfreeze handling for the RXE block - kernel contexts only.
3288 * This will also enable the port.  User contexts will do unfreeze
3289 * handling on a per-context basis as they call into the driver.
3290 *
3291 */
3292static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
3293{
3294        int i;
3295
3296        /* enable all kernel contexts */
3297        for (i = 0; i < dd->n_krcv_queues; i++)
3298                hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, i);
3299
3300        /* enable port */
3301        add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3302}
3303
3304/*
3305 * Non-interrupt SPC freeze handling.
3306 *
3307 * This is a work-queue function outside of the triggering interrupt.
3308 */
3309void handle_freeze(struct work_struct *work)
3310{
3311        struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3312                                                                freeze_work);
3313        struct hfi1_devdata *dd = ppd->dd;
3314
3315        /* wait for freeze indicators on all affected blocks */
3316        dd_dev_info(dd, "Entering SPC freeze\n");
3317        wait_for_freeze_status(dd, 1);
3318
3319        /* SPC is now frozen */
3320
3321        /* do send PIO freeze steps */
3322        pio_freeze(dd);
3323
3324        /* do send DMA freeze steps */
3325        sdma_freeze(dd);
3326
3327        /* do send egress freeze steps - nothing to do */
3328
3329        /* do receive freeze steps */
3330        rxe_freeze(dd);
3331
3332        /*
3333         * Unfreeze the hardware - clear the freeze, wait for each
3334         * block's frozen bit to clear, then clear the frozen flag.
3335         */
3336        write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3337        wait_for_freeze_status(dd, 0);
3338
3339        if (is_a0(dd)) {
3340                write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3341                wait_for_freeze_status(dd, 1);
3342                write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3343                wait_for_freeze_status(dd, 0);
3344        }
3345
3346        /* do send PIO unfreeze steps for kernel contexts */
3347        pio_kernel_unfreeze(dd);
3348
3349        /* do send DMA unfreeze steps */
3350        sdma_unfreeze(dd);
3351
3352        /* do send egress unfreeze steps - nothing to do */
3353
3354        /* do receive unfreeze steps for kernel contexts */
3355        rxe_kernel_unfreeze(dd);
3356
3357        /*
3358         * The unfreeze procedure touches global device registers when
3359         * it disables and re-enables RXE. Mark the device unfrozen
3360         * after all that is done so other parts of the driver waiting
3361         * for the device to unfreeze don't do things out of order.
3362         *
3363         * The above implies that the meaning of HFI1_FROZEN flag is
3364         * "Device has gone into freeze mode and freeze mode handling
3365         * is still in progress."
3366         *
3367         * The flag will be removed when freeze mode processing has
3368         * completed.
3369         */
3370        dd->flags &= ~HFI1_FROZEN;
3371        wake_up(&dd->event_queue);
3372
3373        /* no longer frozen */
3374        dd_dev_err(dd, "Exiting SPC freeze\n");
3375}
3376
3377/*
3378 * Handle a link up interrupt from the 8051.
3379 *
3380 * This is a work-queue function outside of the interrupt.
3381 */
3382void handle_link_up(struct work_struct *work)
3383{
3384        struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3385                                                                link_up_work);
3386        set_link_state(ppd, HLS_UP_INIT);
3387
3388        /* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
3389        read_ltp_rtt(ppd->dd);
3390        /*
3391         * OPA specifies that certain counters are cleared on a transition
3392         * to link up, so do that.
3393         */
3394        clear_linkup_counters(ppd->dd);
3395        /*
3396         * And (re)set link up default values.
3397         */
3398        set_linkup_defaults(ppd);
3399
3400        /* enforce link speed enabled */
3401        if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
3402                /* oops - current speed is not enabled, bounce */
3403                dd_dev_err(ppd->dd,
3404                        "Link speed active 0x%x is outside enabled 0x%x, downing link\n",
3405                        ppd->link_speed_active, ppd->link_speed_enabled);
3406                set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
3407                        OPA_LINKDOWN_REASON_SPEED_POLICY);
3408                set_link_state(ppd, HLS_DN_OFFLINE);
3409                start_link(ppd);
3410        }
3411}
3412
3413/* Several pieces of LNI information were cached for SMA in ppd.
3414 * Reset these on link down */
3415static void reset_neighbor_info(struct hfi1_pportdata *ppd)
3416{
3417        ppd->neighbor_guid = 0;
3418        ppd->neighbor_port_number = 0;
3419        ppd->neighbor_type = 0;
3420        ppd->neighbor_fm_security = 0;
3421}
3422
3423/*
3424 * Handle a link down interrupt from the 8051.
3425 *
3426 * This is a work-queue function outside of the interrupt.
3427 */
3428void handle_link_down(struct work_struct *work)
3429{
3430        u8 lcl_reason, neigh_reason = 0;
3431        struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3432                                                                link_down_work);
3433
3434        /* go offline first, then deal with reasons */
3435        set_link_state(ppd, HLS_DN_OFFLINE);
3436
3437        lcl_reason = 0;
3438        read_planned_down_reason_code(ppd->dd, &neigh_reason);
3439
3440        /*
3441         * If no reason, assume peer-initiated but missed
3442         * LinkGoingDown idle flits.
3443         */
3444        if (neigh_reason == 0)
3445                lcl_reason = OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN;
3446
3447        set_link_down_reason(ppd, lcl_reason, neigh_reason, 0);
3448
3449        reset_neighbor_info(ppd);
3450
3451        /* disable the port */
3452        clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3453
3454        /* If there is no cable attached, turn the DC off. Otherwise,
3455         * start the link bring up. */
3456        if (!qsfp_mod_present(ppd))
3457                dc_shutdown(ppd->dd);
3458        else
3459                start_link(ppd);
3460}
3461
3462void handle_link_bounce(struct work_struct *work)
3463{
3464        struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3465                                                        link_bounce_work);
3466
3467        /*
3468         * Only do something if the link is currently up.
3469         */
3470        if (ppd->host_link_state & HLS_UP) {
3471                set_link_state(ppd, HLS_DN_OFFLINE);
3472                start_link(ppd);
3473        } else {
3474                dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
3475                        __func__, link_state_name(ppd->host_link_state));
3476        }
3477}
3478
3479/*
3480 * Mask conversion: Capability exchange to Port LTP.  The capability
3481 * exchange has an implicit 16b CRC that is mandatory.
3482 */
3483static int cap_to_port_ltp(int cap)
3484{
3485        int port_ltp = PORT_LTP_CRC_MODE_16; /* this mode is mandatory */
3486
3487        if (cap & CAP_CRC_14B)
3488                port_ltp |= PORT_LTP_CRC_MODE_14;
3489        if (cap & CAP_CRC_48B)
3490                port_ltp |= PORT_LTP_CRC_MODE_48;
3491        if (cap & CAP_CRC_12B_16B_PER_LANE)
3492                port_ltp |= PORT_LTP_CRC_MODE_PER_LANE;
3493
3494        return port_ltp;
3495}
3496
3497/*
3498 * Convert an OPA Port LTP mask to capability mask
3499 */
3500int port_ltp_to_cap(int port_ltp)
3501{
3502        int cap_mask = 0;
3503
3504        if (port_ltp & PORT_LTP_CRC_MODE_14)
3505                cap_mask |= CAP_CRC_14B;
3506        if (port_ltp & PORT_LTP_CRC_MODE_48)
3507                cap_mask |= CAP_CRC_48B;
3508        if (port_ltp & PORT_LTP_CRC_MODE_PER_LANE)
3509                cap_mask |= CAP_CRC_12B_16B_PER_LANE;
3510
3511        return cap_mask;
3512}
3513
3514/*
3515 * Convert a single DC LCB CRC mode to an OPA Port LTP mask.
3516 */
3517static int lcb_to_port_ltp(int lcb_crc)
3518{
3519        int port_ltp = 0;
3520
3521        if (lcb_crc == LCB_CRC_12B_16B_PER_LANE)
3522                port_ltp = PORT_LTP_CRC_MODE_PER_LANE;
3523        else if (lcb_crc == LCB_CRC_48B)
3524                port_ltp = PORT_LTP_CRC_MODE_48;
3525        else if (lcb_crc == LCB_CRC_14B)
3526                port_ltp = PORT_LTP_CRC_MODE_14;
3527        else
3528                port_ltp = PORT_LTP_CRC_MODE_16;
3529
3530        return port_ltp;
3531}
3532
3533/*
3534 * Our neighbor has indicated that we are allowed to act as a fabric
3535 * manager, so place the full management partition key in the second
3536 * (0-based) pkey array position (see OPAv1, section 20.2.2.6.8). Note
3537 * that we should already have the limited management partition key in
3538 * array element 1, and also that the port is not yet up when
3539 * add_full_mgmt_pkey() is invoked.
3540 */
3541static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
3542{
3543        struct hfi1_devdata *dd = ppd->dd;
3544
3545        /* Sanity check - ppd->pkeys[2] should be 0 */
3546        if (ppd->pkeys[2] != 0)
3547                dd_dev_err(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n",
3548                           __func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
3549        ppd->pkeys[2] = FULL_MGMT_P_KEY;
3550        (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
3551}
3552
3553/*
3554 * Convert the given link width to the OPA link width bitmask.
3555 */
3556static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
3557{
3558        switch (width) {
3559        case 0:
3560                /*
3561                 * Simulator and quick linkup do not set the width.
3562                 * Just set it to 4x without complaint.
3563                 */
3564                if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR || quick_linkup)
3565                        return OPA_LINK_WIDTH_4X;
3566                return 0; /* no lanes up */
3567        case 1: return OPA_LINK_WIDTH_1X;
3568        case 2: return OPA_LINK_WIDTH_2X;
3569        case 3: return OPA_LINK_WIDTH_3X;
3570        default:
3571                dd_dev_info(dd, "%s: invalid width %d, using 4\n",
3572                        __func__, width);
3573                /* fall through */
3574        case 4: return OPA_LINK_WIDTH_4X;
3575        }
3576}
3577
3578/*
3579 * Do a population count on the bottom nibble.
3580 */
3581static const u8 bit_counts[16] = {
3582        0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
3583};
3584static inline u8 nibble_to_count(u8 nibble)
3585{
3586        return bit_counts[nibble & 0xf];
3587}
3588
3589/*
3590 * Read the active lane information from the 8051 registers and return
3591 * their widths.
3592 *
3593 * Active lane information is found in these 8051 registers:
3594 *      enable_lane_tx
3595 *      enable_lane_rx
3596 */
3597static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
3598                            u16 *rx_width)
3599{
3600        u16 tx, rx;
3601        u8 enable_lane_rx;
3602        u8 enable_lane_tx;
3603        u8 tx_polarity_inversion;
3604        u8 rx_polarity_inversion;
3605        u8 max_rate;
3606
3607        /* read the active lanes */
3608        read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
3609                                &rx_polarity_inversion, &max_rate);
3610        read_local_lni(dd, &enable_lane_rx);
3611
3612        /* convert to counts */
3613        tx = nibble_to_count(enable_lane_tx);
3614        rx = nibble_to_count(enable_lane_rx);
3615
3616        /*
3617         * Set link_speed_active here, overriding what was set in
3618         * handle_verify_cap().  The ASIC 8051 firmware does not correctly
3619         * set the max_rate field in handle_verify_cap until v0.19.
3620         */
3621        if ((dd->icode == ICODE_RTL_SILICON)
3622                                && (dd->dc8051_ver < dc8051_ver(0, 19))) {
3623                /* max_rate: 0 = 12.5G, 1 = 25G */
3624                switch (max_rate) {
3625                case 0:
3626                        dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G;
3627                        break;
3628                default:
3629                        dd_dev_err(dd,
3630                                "%s: unexpected max rate %d, using 25Gb\n",
3631                                __func__, (int)max_rate);
3632                        /* fall through */
3633                case 1:
3634                        dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
3635                        break;
3636                }
3637        }
3638
3639        dd_dev_info(dd,
3640                "Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
3641                enable_lane_tx, tx, enable_lane_rx, rx);
3642        *tx_width = link_width_to_bits(dd, tx);
3643        *rx_width = link_width_to_bits(dd, rx);
3644}
3645
3646/*
3647 * Read verify_cap_local_fm_link_width[1] to obtain the link widths.
3648 * Valid after the end of VerifyCap and during LinkUp.  Does not change
3649 * after link up.  I.e. look elsewhere for downgrade information.
3650 *
3651 * Bits are:
3652 *      + bits [7:4] contain the number of active transmitters
3653 *      + bits [3:0] contain the number of active receivers
3654 * These are numbers 1 through 4 and can be different values if the
3655 * link is asymmetric.
3656 *
3657 * verify_cap_local_fm_link_width[0] retains its original value.
3658 */
3659static void get_linkup_widths(struct hfi1_devdata *dd, u16 *tx_width,
3660                              u16 *rx_width)
3661{
3662        u16 widths, tx, rx;
3663        u8 misc_bits, local_flags;
3664        u16 active_tx, active_rx;
3665
3666        read_vc_local_link_width(dd, &misc_bits, &local_flags, &widths);
3667        tx = widths >> 12;
3668        rx = (widths >> 8) & 0xf;
3669
3670        *tx_width = link_width_to_bits(dd, tx);
3671        *rx_width = link_width_to_bits(dd, rx);
3672
3673        /* print the active widths */
3674        get_link_widths(dd, &active_tx, &active_rx);
3675}
3676
3677/*
3678 * Set ppd->link_width_active and ppd->link_width_downgrade_active using
3679 * hardware information when the link first comes up.
3680 *
3681 * The link width is not available until after VerifyCap.AllFramesReceived
3682 * (the trigger for handle_verify_cap), so this is outside that routine
3683 * and should be called when the 8051 signals linkup.
3684 */
3685void get_linkup_link_widths(struct hfi1_pportdata *ppd)
3686{
3687        u16 tx_width, rx_width;
3688
3689        /* get end-of-LNI link widths */
3690        get_linkup_widths(ppd->dd, &tx_width, &rx_width);
3691
3692        /* use tx_width as the link is supposed to be symmetric on link up */
3693        ppd->link_width_active = tx_width;
3694        /* link width downgrade active (LWD.A) starts out matching LW.A */
3695        ppd->link_width_downgrade_tx_active = ppd->link_width_active;
3696        ppd->link_width_downgrade_rx_active = ppd->link_width_active;
3697        /* per OPA spec, on link up LWD.E resets to LWD.S */
3698        ppd->link_width_downgrade_enabled = ppd->link_width_downgrade_supported;
3699        /* cache the active egress rate (units {10^6 bits/sec]) */
3700        ppd->current_egress_rate = active_egress_rate(ppd);
3701}
3702
3703/*
3704 * Handle a verify capabilities interrupt from the 8051.
3705 *
3706 * This is a work-queue function outside of the interrupt.
3707 */
3708void handle_verify_cap(struct work_struct *work)
3709{
3710        struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3711                                                                link_vc_work);
3712        struct hfi1_devdata *dd = ppd->dd;
3713        u64 reg;
3714        u8 power_management;
3715        u8 continious;
3716        u8 vcu;
3717        u8 vau;
3718        u8 z;
3719        u16 vl15buf;
3720        u16 link_widths;
3721        u16 crc_mask;
3722        u16 crc_val;
3723        u16 device_id;
3724        u16 active_tx, active_rx;
3725        u8 partner_supported_crc;
3726        u8 remote_tx_rate;
3727        u8 device_rev;
3728
3729        set_link_state(ppd, HLS_VERIFY_CAP);
3730
3731        lcb_shutdown(dd, 0);
3732        adjust_lcb_for_fpga_serdes(dd);
3733
3734        /*
3735         * These are now valid:
3736         *      remote VerifyCap fields in the general LNI config
3737         *      CSR DC8051_STS_REMOTE_GUID
3738         *      CSR DC8051_STS_REMOTE_NODE_TYPE
3739         *      CSR DC8051_STS_REMOTE_FM_SECURITY
3740         *      CSR DC8051_STS_REMOTE_PORT_NO
3741         */
3742
3743        read_vc_remote_phy(dd, &power_management, &continious);
3744        read_vc_remote_fabric(
3745                dd,
3746                &vau,
3747                &z,
3748                &vcu,
3749                &vl15buf,
3750                &partner_supported_crc);
3751        read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths);
3752        read_remote_device_id(dd, &device_id, &device_rev);
3753        /*
3754         * And the 'MgmtAllowed' information, which is exchanged during
3755         * LNI, is also be available at this point.
3756         */
3757        read_mgmt_allowed(dd, &ppd->mgmt_allowed);
3758        /* print the active widths */
3759        get_link_widths(dd, &active_tx, &active_rx);
3760        dd_dev_info(dd,
3761                "Peer PHY: power management 0x%x, continuous updates 0x%x\n",
3762                (int)power_management, (int)continious);
3763        dd_dev_info(dd,
3764                "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
3765                (int)vau,
3766                (int)z,
3767                (int)vcu,
3768                (int)vl15buf,
3769                (int)partner_supported_crc);
3770        dd_dev_info(dd, "Peer Link Width: tx rate 0x%x, widths 0x%x\n",
3771                (u32)remote_tx_rate, (u32)link_widths);
3772        dd_dev_info(dd, "Peer Device ID: 0x%04x, Revision 0x%02x\n",
3773                (u32)device_id, (u32)device_rev);
3774        /*
3775         * The peer vAU value just read is the peer receiver value.  HFI does
3776         * not support a transmit vAU of 0 (AU == 8).  We advertised that
3777         * with Z=1 in the fabric capabilities sent to the peer.  The peer
3778         * will see our Z=1, and, if it advertised a vAU of 0, will move its
3779         * receive to vAU of 1 (AU == 16).  Do the same here.  We do not care
3780         * about the peer Z value - our sent vAU is 3 (hardwired) and is not
3781         * subject to the Z value exception.
3782         */
3783        if (vau == 0)
3784                vau = 1;
3785        set_up_vl15(dd, vau, vl15buf);
3786
3787        /* set up the LCB CRC mode */
3788        crc_mask = ppd->port_crc_mode_enabled & partner_supported_crc;
3789
3790        /* order is important: use the lowest bit in common */
3791        if (crc_mask & CAP_CRC_14B)
3792                crc_val = LCB_CRC_14B;
3793        else if (crc_mask & CAP_CRC_48B)
3794                crc_val = LCB_CRC_48B;
3795        else if (crc_mask & CAP_CRC_12B_16B_PER_LANE)
3796                crc_val = LCB_CRC_12B_16B_PER_LANE;
3797        else
3798                crc_val = LCB_CRC_16B;
3799
3800        dd_dev_info(dd, "Final LCB CRC mode: %d\n", (int)crc_val);
3801        write_csr(dd, DC_LCB_CFG_CRC_MODE,
3802                  (u64)crc_val << DC_LCB_CFG_CRC_MODE_TX_VAL_SHIFT);
3803
3804        /* set (14b only) or clear sideband credit */
3805        reg = read_csr(dd, SEND_CM_CTRL);
3806        if (crc_val == LCB_CRC_14B && crc_14b_sideband) {
3807                write_csr(dd, SEND_CM_CTRL,
3808                        reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3809        } else {
3810                write_csr(dd, SEND_CM_CTRL,
3811                        reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3812        }
3813
3814        ppd->link_speed_active = 0;     /* invalid value */
3815        if (dd->dc8051_ver < dc8051_ver(0, 20)) {
3816                /* remote_tx_rate: 0 = 12.5G, 1 = 25G */
3817                switch (remote_tx_rate) {
3818                case 0:
3819                        ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3820                        break;
3821                case 1:
3822                        ppd->link_speed_active = OPA_LINK_SPEED_25G;
3823                        break;
3824                }
3825        } else {
3826                /* actual rate is highest bit of the ANDed rates */
3827                u8 rate = remote_tx_rate & ppd->local_tx_rate;
3828
3829                if (rate & 2)
3830                        ppd->link_speed_active = OPA_LINK_SPEED_25G;
3831                else if (rate & 1)
3832                        ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3833        }
3834        if (ppd->link_speed_active == 0) {
3835                dd_dev_err(dd, "%s: unexpected remote tx rate %d, using 25Gb\n",
3836                        __func__, (int)remote_tx_rate);
3837                ppd->link_speed_active = OPA_LINK_SPEED_25G;
3838        }
3839
3840        /*
3841         * Cache the values of the supported, enabled, and active
3842         * LTP CRC modes to return in 'portinfo' queries. But the bit
3843         * flags that are returned in the portinfo query differ from
3844         * what's in the link_crc_mask, crc_sizes, and crc_val
3845         * variables. Convert these here.
3846         */
3847        ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
3848                /* supported crc modes */
3849        ppd->port_ltp_crc_mode |=
3850                cap_to_port_ltp(ppd->port_crc_mode_enabled) << 4;
3851                /* enabled crc modes */
3852        ppd->port_ltp_crc_mode |= lcb_to_port_ltp(crc_val);
3853                /* active crc mode */
3854
3855        /* set up the remote credit return table */
3856        assign_remote_cm_au_table(dd, vcu);
3857
3858        /*
3859         * The LCB is reset on entry to handle_verify_cap(), so this must
3860         * be applied on every link up.
3861         *
3862         * Adjust LCB error kill enable to kill the link if
3863         * these RBUF errors are seen:
3864         *      REPLAY_BUF_MBE_SMASK
3865         *      FLIT_INPUT_BUF_MBE_SMASK
3866         */
3867        if (is_a0(dd)) {                        /* fixed in B0 */
3868                reg = read_csr(dd, DC_LCB_CFG_LINK_KILL_EN);
3869                reg |= DC_LCB_CFG_LINK_KILL_EN_REPLAY_BUF_MBE_SMASK
3870                        | DC_LCB_CFG_LINK_KILL_EN_FLIT_INPUT_BUF_MBE_SMASK;
3871                write_csr(dd, DC_LCB_CFG_LINK_KILL_EN, reg);
3872        }
3873
3874        /* pull LCB fifos out of reset - all fifo clocks must be stable */
3875        write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
3876
3877        /* give 8051 access to the LCB CSRs */
3878        write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
3879        set_8051_lcb_access(dd);
3880
3881        ppd->neighbor_guid =
3882                read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
3883        ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
3884                                        DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
3885        ppd->neighbor_type =
3886                read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
3887                DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
3888        ppd->neighbor_fm_security =
3889                read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
3890                DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
3891        dd_dev_info(dd,
3892                "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
3893                ppd->neighbor_guid, ppd->neighbor_type,
3894                ppd->mgmt_allowed, ppd->neighbor_fm_security);
3895        if (ppd->mgmt_allowed)
3896                add_full_mgmt_pkey(ppd);
3897
3898        /* tell the 8051 to go to LinkUp */
3899        set_link_state(ppd, HLS_GOING_UP);
3900}
3901
3902/*
3903 * Apply the link width downgrade enabled policy against the current active
3904 * link widths.
3905 *
3906 * Called when the enabled policy changes or the active link widths change.
3907 */
3908void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths)
3909{
3910        int skip = 1;
3911        int do_bounce = 0;
3912        u16 lwde = ppd->link_width_downgrade_enabled;
3913        u16 tx, rx;
3914
3915        mutex_lock(&ppd->hls_lock);
3916        /* only apply if the link is up */
3917        if (ppd->host_link_state & HLS_UP)
3918                skip = 0;
3919        mutex_unlock(&ppd->hls_lock);
3920        if (skip)
3921                return;
3922
3923        if (refresh_widths) {
3924                get_link_widths(ppd->dd, &tx, &rx);
3925                ppd->link_width_downgrade_tx_active = tx;
3926                ppd->link_width_downgrade_rx_active = rx;
3927        }
3928
3929        if (lwde == 0) {
3930                /* downgrade is disabled */
3931
3932                /* bounce if not at starting active width */
3933                if ((ppd->link_width_active !=
3934                                        ppd->link_width_downgrade_tx_active)
3935                                || (ppd->link_width_active !=
3936                                        ppd->link_width_downgrade_rx_active)) {
3937                        dd_dev_err(ppd->dd,
3938                                "Link downgrade is disabled and link has downgraded, downing link\n");
3939                        dd_dev_err(ppd->dd,
3940                                "  original 0x%x, tx active 0x%x, rx active 0x%x\n",
3941                                ppd->link_width_active,
3942                                ppd->link_width_downgrade_tx_active,
3943                                ppd->link_width_downgrade_rx_active);
3944                        do_bounce = 1;
3945                }
3946        } else if ((lwde & ppd->link_width_downgrade_tx_active) == 0
3947                || (lwde & ppd->link_width_downgrade_rx_active) == 0) {
3948                /* Tx or Rx is outside the enabled policy */
3949                dd_dev_err(ppd->dd,
3950                        "Link is outside of downgrade allowed, downing link\n");
3951                dd_dev_err(ppd->dd,
3952                        "  enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
3953                        lwde,
3954                        ppd->link_width_downgrade_tx_active,
3955                        ppd->link_width_downgrade_rx_active);
3956                do_bounce = 1;
3957        }
3958
3959        if (do_bounce) {
3960                set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
3961                  OPA_LINKDOWN_REASON_WIDTH_POLICY);
3962                set_link_state(ppd, HLS_DN_OFFLINE);
3963                start_link(ppd);
3964        }
3965}
3966
3967/*
3968 * Handle a link downgrade interrupt from the 8051.
3969 *
3970 * This is a work-queue function outside of the interrupt.
3971 */
3972void handle_link_downgrade(struct work_struct *work)
3973{
3974        struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3975                                                        link_downgrade_work);
3976
3977        dd_dev_info(ppd->dd, "8051: Link width downgrade\n");
3978        apply_link_downgrade_policy(ppd, 1);
3979}
3980
3981static char *dcc_err_string(char *buf, int buf_len, u64 flags)
3982{
3983        return flag_string(buf, buf_len, flags, dcc_err_flags,
3984                ARRAY_SIZE(dcc_err_flags));
3985}
3986
3987static char *lcb_err_string(char *buf, int buf_len, u64 flags)
3988{
3989        return flag_string(buf, buf_len, flags, lcb_err_flags,
3990                ARRAY_SIZE(lcb_err_flags));
3991}
3992
3993static char *dc8051_err_string(char *buf, int buf_len, u64 flags)
3994{
3995        return flag_string(buf, buf_len, flags, dc8051_err_flags,
3996                ARRAY_SIZE(dc8051_err_flags));
3997}
3998
3999static char *dc8051_info_err_string(char *buf, int buf_len, u64 flags)
4000{
4001        return flag_string(buf, buf_len, flags, dc8051_info_err_flags,
4002                ARRAY_SIZE(dc8051_info_err_flags));
4003}
4004
4005static char *dc8051_info_host_msg_string(char *buf, int buf_len, u64 flags)
4006{
4007        return flag_string(buf, buf_len, flags, dc8051_info_host_msg_flags,
4008                ARRAY_SIZE(dc8051_info_host_msg_flags));
4009}
4010
4011static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
4012{
4013        struct hfi1_pportdata *ppd = dd->pport;
4014        u64 info, err, host_msg;
4015        int queue_link_down = 0;
4016        char buf[96];
4017
4018        /* look at the flags */
4019        if (reg & DC_DC8051_ERR_FLG_SET_BY_8051_SMASK) {
4020                /* 8051 information set by firmware */
4021                /* read DC8051_DBG_ERR_INFO_SET_BY_8051 for details */
4022                info = read_csr(dd, DC_DC8051_DBG_ERR_INFO_SET_BY_8051);
4023                err = (info >> DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_SHIFT)
4024                        & DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_MASK;
4025                host_msg = (info >>
4026                        DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_SHIFT)
4027                        & DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_MASK;
4028
4029                /*
4030                 * Handle error flags.
4031                 */
4032                if (err & FAILED_LNI) {
4033                        /*
4034                         * LNI error indications are cleared by the 8051
4035                         * only when starting polling.  Only pay attention
4036                         * to them when in the states that occur during
4037                         * LNI.
4038                         */
4039                        if (ppd->host_link_state
4040                            & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
4041                                queue_link_down = 1;
4042                                dd_dev_info(dd, "Link error: %s\n",
4043                                        dc8051_info_err_string(buf,
4044                                                sizeof(buf),
4045                                                err & FAILED_LNI));
4046                        }
4047                        err &= ~(u64)FAILED_LNI;
4048                }
4049                if (err) {
4050                        /* report remaining errors, but do not do anything */
4051                        dd_dev_err(dd, "8051 info error: %s\n",
4052                                dc8051_info_err_string(buf, sizeof(buf), err));
4053                }
4054
4055                /*
4056                 * Handle host message flags.
4057                 */
4058                if (host_msg & HOST_REQ_DONE) {
4059                        /*
4060                         * Presently, the driver does a busy wait for
4061                         * host requests to complete.  This is only an
4062                         * informational message.
4063                         * NOTE: The 8051 clears the host message
4064                         * information *on the next 8051 command*.
4065                         * Therefore, when linkup is achieved,
4066                         * this flag will still be set.
4067                         */
4068                        host_msg &= ~(u64)HOST_REQ_DONE;
4069                }
4070                if (host_msg & BC_SMA_MSG) {
4071                        queue_work(ppd->hfi1_wq, &ppd->sma_message_work);
4072                        host_msg &= ~(u64)BC_SMA_MSG;
4073                }
4074                if (host_msg & LINKUP_ACHIEVED) {
4075                        dd_dev_info(dd, "8051: Link up\n");
4076                        queue_work(ppd->hfi1_wq, &ppd->link_up_work);
4077                        host_msg &= ~(u64)LINKUP_ACHIEVED;
4078                }
4079                if (host_msg & EXT_DEVICE_CFG_REQ) {
4080                        handle_8051_request(dd);
4081                        host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
4082                }
4083                if (host_msg & VERIFY_CAP_FRAME) {
4084                        queue_work(ppd->hfi1_wq, &ppd->link_vc_work);
4085                        host_msg &= ~(u64)VERIFY_CAP_FRAME;
4086                }
4087                if (host_msg & LINK_GOING_DOWN) {
4088                        const char *extra = "";
4089                        /* no downgrade action needed if going down */
4090                        if (host_msg & LINK_WIDTH_DOWNGRADED) {
4091                                host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4092                                extra = " (ignoring downgrade)";
4093                        }
4094                        dd_dev_info(dd, "8051: Link down%s\n", extra);
4095                        queue_link_down = 1;
4096                        host_msg &= ~(u64)LINK_GOING_DOWN;
4097                }
4098                if (host_msg & LINK_WIDTH_DOWNGRADED) {
4099                        queue_work(ppd->hfi1_wq, &ppd->link_downgrade_work);
4100                        host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4101                }
4102                if (host_msg) {
4103                        /* report remaining messages, but do not do anything */
4104                        dd_dev_info(dd, "8051 info host message: %s\n",
4105                                dc8051_info_host_msg_string(buf, sizeof(buf),
4106                                        host_msg));
4107                }
4108
4109                reg &= ~DC_DC8051_ERR_FLG_SET_BY_8051_SMASK;
4110        }
4111        if (reg & DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK) {
4112                /*
4113                 * Lost the 8051 heartbeat.  If this happens, we
4114                 * receive constant interrupts about it.  Disable
4115                 * the interrupt after the first.
4116                 */
4117                dd_dev_err(dd, "Lost 8051 heartbeat\n");
4118                write_csr(dd, DC_DC8051_ERR_EN,
4119                        read_csr(dd, DC_DC8051_ERR_EN)
4120                          & ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
4121
4122                reg &= ~DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK;
4123        }
4124        if (reg) {
4125                /* report the error, but do not do anything */
4126                dd_dev_err(dd, "8051 error: %s\n",
4127                        dc8051_err_string(buf, sizeof(buf), reg));
4128        }
4129
4130        if (queue_link_down) {
4131                /* if the link is already going down or disabled, do not
4132                 * queue another */
4133                if ((ppd->host_link_state
4134                                    & (HLS_GOING_OFFLINE|HLS_LINK_COOLDOWN))
4135                                || ppd->link_enabled == 0) {
4136                        dd_dev_info(dd, "%s: not queuing link down\n",
4137                                __func__);
4138                } else {
4139                        queue_work(ppd->hfi1_wq, &ppd->link_down_work);
4140                }
4141        }
4142}
4143
4144static const char * const fm_config_txt[] = {
4145[0] =
4146        "BadHeadDist: Distance violation between two head flits",
4147[1] =
4148        "BadTailDist: Distance violation between two tail flits",
4149[2] =
4150        "BadCtrlDist: Distance violation between two credit control flits",
4151[3] =
4152        "BadCrdAck: Credits return for unsupported VL",
4153[4] =
4154        "UnsupportedVLMarker: Received VL Marker",
4155[5] =
4156        "BadPreempt: Exceeded the preemption nesting level",
4157[6] =
4158        "BadControlFlit: Received unsupported control flit",
4159/* no 7 */
4160[8] =
4161        "UnsupportedVLMarker: Received VL Marker for unconfigured or disabled VL",
4162};
4163
4164static const char * const port_rcv_txt[] = {
4165[1] =
4166        "BadPktLen: Illegal PktLen",
4167[2] =
4168        "PktLenTooLong: Packet longer than PktLen",
4169[3] =
4170        "PktLenTooShort: Packet shorter than PktLen",
4171[4] =
4172        "BadSLID: Illegal SLID (0, using multicast as SLID, does not include security validation of SLID)",
4173[5] =
4174        "BadDLID: Illegal DLID (0, doesn't match HFI)",
4175[6] =
4176        "BadL2: Illegal L2 opcode",
4177[7] =
4178        "BadSC: Unsupported SC",
4179[9] =
4180        "BadRC: Illegal RC",
4181[11] =
4182        "PreemptError: Preempting with same VL",
4183[12] =
4184        "PreemptVL15: Preempting a VL15 packet",
4185};
4186
4187#define OPA_LDR_FMCONFIG_OFFSET 16
4188#define OPA_LDR_PORTRCV_OFFSET 0
4189static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4190{
4191        u64 info, hdr0, hdr1;
4192        const char *extra;
4193        char buf[96];
4194        struct hfi1_pportdata *ppd = dd->pport;
4195        u8 lcl_reason = 0;
4196        int do_bounce = 0;
4197
4198        if (reg & DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK) {
4199                if (!(dd->err_info_uncorrectable & OPA_EI_STATUS_SMASK)) {
4200                        info = read_csr(dd, DCC_ERR_INFO_UNCORRECTABLE);
4201                        dd->err_info_uncorrectable = info & OPA_EI_CODE_SMASK;
4202                        /* set status bit */
4203                        dd->err_info_uncorrectable |= OPA_EI_STATUS_SMASK;
4204                }
4205                reg &= ~DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK;
4206        }
4207
4208        if (reg & DCC_ERR_FLG_LINK_ERR_SMASK) {
4209                struct hfi1_pportdata *ppd = dd->pport;
4210                /* this counter saturates at (2^32) - 1 */
4211                if (ppd->link_downed < (u32)UINT_MAX)
4212                        ppd->link_downed++;
4213                reg &= ~DCC_ERR_FLG_LINK_ERR_SMASK;
4214        }
4215
4216        if (reg & DCC_ERR_FLG_FMCONFIG_ERR_SMASK) {
4217                u8 reason_valid = 1;
4218
4219                info = read_csr(dd, DCC_ERR_INFO_FMCONFIG);
4220                if (!(dd->err_info_fmconfig & OPA_EI_STATUS_SMASK)) {
4221                        dd->err_info_fmconfig = info & OPA_EI_CODE_SMASK;
4222                        /* set status bit */
4223                        dd->err_info_fmconfig |= OPA_EI_STATUS_SMASK;
4224                }
4225                switch (info) {
4226                case 0:
4227                case 1:
4228                case 2:
4229                case 3:
4230                case 4:
4231                case 5:
4232                case 6:
4233                        extra = fm_config_txt[info];
4234                        break;
4235                case 8:
4236                        extra = fm_config_txt[info];
4237                        if (ppd->port_error_action &
4238                            OPA_PI_MASK_FM_CFG_UNSUPPORTED_VL_MARKER) {
4239                                do_bounce = 1;
4240                                /*
4241                                 * lcl_reason cannot be derived from info
4242                                 * for this error
4243                                 */
4244                                lcl_reason =
4245                                  OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER;
4246                        }
4247                        break;
4248                default:
4249                        reason_valid = 0;
4250                        snprintf(buf, sizeof(buf), "reserved%lld", info);
4251                        extra = buf;
4252                        break;
4253                }
4254
4255                if (reason_valid && !do_bounce) {
4256                        do_bounce = ppd->port_error_action &
4257                                        (1 << (OPA_LDR_FMCONFIG_OFFSET + info));
4258                        lcl_reason = info + OPA_LINKDOWN_REASON_BAD_HEAD_DIST;
4259                }
4260
4261                /* just report this */
4262                dd_dev_info(dd, "DCC Error: fmconfig error: %s\n", extra);
4263                reg &= ~DCC_ERR_FLG_FMCONFIG_ERR_SMASK;
4264        }
4265
4266        if (reg & DCC_ERR_FLG_RCVPORT_ERR_SMASK) {
4267                u8 reason_valid = 1;
4268
4269                info = read_csr(dd, DCC_ERR_INFO_PORTRCV);
4270                hdr0 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR0);
4271                hdr1 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR1);
4272                if (!(dd->err_info_rcvport.status_and_code &
4273                      OPA_EI_STATUS_SMASK)) {
4274                        dd->err_info_rcvport.status_and_code =
4275                                info & OPA_EI_CODE_SMASK;
4276                        /* set status bit */
4277                        dd->err_info_rcvport.status_and_code |=
4278                                OPA_EI_STATUS_SMASK;
4279                        /* save first 2 flits in the packet that caused
4280                         * the error */
4281                         dd->err_info_rcvport.packet_flit1 = hdr0;
4282                         dd->err_info_rcvport.packet_flit2 = hdr1;
4283                }
4284                switch (info) {
4285                case 1:
4286                case 2:
4287                case 3:
4288                case 4:
4289                case 5:
4290                case 6:
4291                case 7:
4292                case 9:
4293                case 11:
4294                case 12:
4295                        extra = port_rcv_txt[info];
4296                        break;
4297                default:
4298                        reason_valid = 0;
4299                        snprintf(buf, sizeof(buf), "reserved%lld", info);
4300                        extra = buf;
4301                        break;
4302                }
4303
4304                if (reason_valid && !do_bounce) {
4305                        do_bounce = ppd->port_error_action &
4306                                        (1 << (OPA_LDR_PORTRCV_OFFSET + info));
4307                        lcl_reason = info + OPA_LINKDOWN_REASON_RCV_ERROR_0;
4308                }
4309
4310                /* just report this */
4311                dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra);
4312                dd_dev_info(dd, "           hdr0 0x%llx, hdr1 0x%llx\n",
4313                        hdr0, hdr1);
4314
4315                reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK;
4316        }
4317
4318        if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK) {
4319                /* informative only */
4320                dd_dev_info(dd, "8051 access to LCB blocked\n");
4321                reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK;
4322        }
4323        if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK) {
4324                /* informative only */
4325                dd_dev_info(dd, "host access to LCB blocked\n");
4326                reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
4327        }
4328
4329        /* report any remaining errors */
4330        if (reg)
4331                dd_dev_info(dd, "DCC Error: %s\n",
4332                        dcc_err_string(buf, sizeof(buf), reg));
4333
4334        if (lcl_reason == 0)
4335                lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN;
4336
4337        if (do_bounce) {
4338                dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
4339                set_link_down_reason(ppd, lcl_reason, 0, lcl_reason);
4340                queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
4341        }
4342}
4343
4344static void handle_lcb_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4345{
4346        char buf[96];
4347
4348        dd_dev_info(dd, "LCB Error: %s\n",
4349                lcb_err_string(buf, sizeof(buf), reg));
4350}
4351
4352/*
4353 * CCE block DC interrupt.  Source is < 8.
4354 */
4355static void is_dc_int(struct hfi1_devdata *dd, unsigned int source)
4356{
4357        const struct err_reg_info *eri = &dc_errs[source];
4358
4359        if (eri->handler) {
4360                interrupt_clear_down(dd, 0, eri);
4361        } else if (source == 3 /* dc_lbm_int */) {
4362                /*
4363                 * This indicates that a parity error has occurred on the
4364                 * address/control lines presented to the LBM.  The error
4365                 * is a single pulse, there is no associated error flag,
4366                 * and it is non-maskable.  This is because if a parity
4367                 * error occurs on the request the request is dropped.
4368                 * This should never occur, but it is nice to know if it
4369                 * ever does.
4370                 */
4371                dd_dev_err(dd, "Parity error in DC LBM block\n");
4372        } else {
4373                dd_dev_err(dd, "Invalid DC interrupt %u\n", source);
4374        }
4375}
4376
4377/*
4378 * TX block send credit interrupt.  Source is < 160.
4379 */
4380static void is_send_credit_int(struct hfi1_devdata *dd, unsigned int source)
4381{
4382        sc_group_release_update(dd, source);
4383}
4384
4385/*
4386 * TX block SDMA interrupt.  Source is < 48.
4387 *
4388 * SDMA interrupts are grouped by type:
4389 *
4390 *       0 -  N-1 = SDma
4391 *       N - 2N-1 = SDmaProgress
4392 *      2N - 3N-1 = SDmaIdle
4393 */
4394static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source)
4395{
4396        /* what interrupt */
4397        unsigned int what  = source / TXE_NUM_SDMA_ENGINES;
4398        /* which engine */
4399        unsigned int which = source % TXE_NUM_SDMA_ENGINES;
4400
4401#ifdef CONFIG_SDMA_VERBOSITY
4402        dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", which,
4403                   slashstrip(__FILE__), __LINE__, __func__);
4404        sdma_dumpstate(&dd->per_sdma[which]);
4405#endif
4406
4407        if (likely(what < 3 && which < dd->num_sdma)) {
4408                sdma_engine_interrupt(&dd->per_sdma[which], 1ull << source);
4409        } else {
4410                /* should not happen */
4411                dd_dev_err(dd, "Invalid SDMA interrupt 0x%x\n", source);
4412        }
4413}
4414
4415/*
4416 * RX block receive available interrupt.  Source is < 160.
4417 */
4418static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
4419{
4420        struct hfi1_ctxtdata *rcd;
4421        char *err_detail;
4422
4423        if (likely(source < dd->num_rcv_contexts)) {
4424                rcd = dd->rcd[source];
4425                if (rcd) {
4426                        if (source < dd->first_user_ctxt)
4427                                rcd->do_interrupt(rcd, 0);
4428                        else
4429                                handle_user_interrupt(rcd);
4430                        return; /* OK */
4431                }
4432                /* received an interrupt, but no rcd */
4433                err_detail = "dataless";
4434        } else {
4435                /* received an interrupt, but are not using that context */
4436                err_detail = "out of range";
4437        }
4438        dd_dev_err(dd, "unexpected %s receive available context interrupt %u\n",
4439                err_detail, source);
4440}
4441
4442/*
4443 * RX block receive urgent interrupt.  Source is < 160.
4444 */
4445static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
4446{
4447        struct hfi1_ctxtdata *rcd;
4448        char *err_detail;
4449
4450        if (likely(source < dd->num_rcv_contexts)) {
4451                rcd = dd->rcd[source];
4452                if (rcd) {
4453                        /* only pay attention to user urgent interrupts */
4454                        if (source >= dd->first_user_ctxt)
4455                                handle_user_interrupt(rcd);
4456                        return; /* OK */
4457                }
4458                /* received an interrupt, but no rcd */
4459                err_detail = "dataless";
4460        } else {
4461                /* received an interrupt, but are not using that context */
4462                err_detail = "out of range";
4463        }
4464        dd_dev_err(dd, "unexpected %s receive urgent context interrupt %u\n",
4465                err_detail, source);
4466}
4467
4468/*
4469 * Reserved range interrupt.  Should not be called in normal operation.
4470 */
4471static void is_reserved_int(struct hfi1_devdata *dd, unsigned int source)
4472{
4473        char name[64];
4474
4475        dd_dev_err(dd, "unexpected %s interrupt\n",
4476                                is_reserved_name(name, sizeof(name), source));
4477}
4478
4479static const struct is_table is_table[] = {
4480/* start                     end
4481                                name func               interrupt func */
4482{ IS_GENERAL_ERR_START,  IS_GENERAL_ERR_END,
4483                                is_misc_err_name,       is_misc_err_int },
4484{ IS_SDMAENG_ERR_START,  IS_SDMAENG_ERR_END,
4485                                is_sdma_eng_err_name,   is_sdma_eng_err_int },
4486{ IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
4487                                is_sendctxt_err_name,   is_sendctxt_err_int },
4488{ IS_SDMA_START,             IS_SDMA_END,
4489                                is_sdma_eng_name,       is_sdma_eng_int },
4490{ IS_VARIOUS_START,          IS_VARIOUS_END,
4491                                is_various_name,        is_various_int },
4492{ IS_DC_START,       IS_DC_END,
4493                                is_dc_name,             is_dc_int },
4494{ IS_RCVAVAIL_START,     IS_RCVAVAIL_END,
4495                                is_rcv_avail_name,      is_rcv_avail_int },
4496{ IS_RCVURGENT_START,    IS_RCVURGENT_END,
4497                                is_rcv_urgent_name,     is_rcv_urgent_int },
4498{ IS_SENDCREDIT_START,   IS_SENDCREDIT_END,
4499                                is_send_credit_name,    is_send_credit_int},
4500{ IS_RESERVED_START,     IS_RESERVED_END,
4501                                is_reserved_name,       is_reserved_int},
4502};
4503
4504/*
4505 * Interrupt source interrupt - called when the given source has an interrupt.
4506 * Source is a bit index into an array of 64-bit integers.
4507 */
4508static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
4509{
4510        const struct is_table *entry;
4511
4512        /* avoids a double compare by walking the table in-order */
4513        for (entry = &is_table[0]; entry->is_name; entry++) {
4514                if (source < entry->end) {
4515                        trace_hfi1_interrupt(dd, entry, source);
4516                        entry->is_int(dd, source - entry->start);
4517                        return;
4518                }
4519        }
4520        /* fell off the end */
4521        dd_dev_err(dd, "invalid interrupt source %u\n", source);
4522}
4523
4524/*
4525 * General interrupt handler.  This is able to correctly handle
4526 * all interrupts in case INTx is used.
4527 */
4528static irqreturn_t general_interrupt(int irq, void *data)
4529{
4530        struct hfi1_devdata *dd = data;
4531        u64 regs[CCE_NUM_INT_CSRS];
4532        u32 bit;
4533        int i;
4534
4535        this_cpu_inc(*dd->int_counter);
4536
4537        /* phase 1: scan and clear all handled interrupts */
4538        for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
4539                if (dd->gi_mask[i] == 0) {
4540                        regs[i] = 0;    /* used later */
4541                        continue;
4542                }
4543                regs[i] = read_csr(dd, CCE_INT_STATUS + (8 * i)) &
4544                                dd->gi_mask[i];
4545                /* only clear if anything is set */
4546                if (regs[i])
4547                        write_csr(dd, CCE_INT_CLEAR + (8 * i), regs[i]);
4548        }
4549
4550        /* phase 2: call the appropriate handler */
4551        for_each_set_bit(bit, (unsigned long *)&regs[0],
4552                                                CCE_NUM_INT_CSRS*64) {
4553                is_interrupt(dd, bit);
4554        }
4555
4556        return IRQ_HANDLED;
4557}
4558
4559static irqreturn_t sdma_interrupt(int irq, void *data)
4560{
4561        struct sdma_engine *sde = data;
4562        struct hfi1_devdata *dd = sde->dd;
4563        u64 status;
4564
4565#ifdef CONFIG_SDMA_VERBOSITY
4566        dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
4567                   slashstrip(__FILE__), __LINE__, __func__);
4568        sdma_dumpstate(sde);
4569#endif
4570
4571        this_cpu_inc(*dd->int_counter);
4572
4573        /* This read_csr is really bad in the hot path */
4574        status = read_csr(dd,
4575                        CCE_INT_STATUS + (8*(IS_SDMA_START/64)))
4576                        & sde->imask;
4577        if (likely(status)) {
4578                /* clear the interrupt(s) */
4579                write_csr(dd,
4580                        CCE_INT_CLEAR + (8*(IS_SDMA_START/64)),
4581                        status);
4582
4583                /* handle the interrupt(s) */
4584                sdma_engine_interrupt(sde, status);
4585        } else
4586                dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
4587                        sde->this_idx);
4588
4589        return IRQ_HANDLED;
4590}
4591
4592/*
4593 * Clear the receive interrupt, forcing the write and making sure
4594 * we have data from the chip, pushing everything in front of it
4595 * back to the host.
4596 */
4597static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
4598{
4599        struct hfi1_devdata *dd = rcd->dd;
4600        u32 addr = CCE_INT_CLEAR + (8 * rcd->ireg);
4601
4602        mmiowb();       /* make sure everything before is written */
4603        write_csr(dd, addr, rcd->imask);
4604        /* force the above write on the chip and get a value back */
4605        (void)read_csr(dd, addr);
4606}
4607
4608/* force the receive interrupt */
4609static inline void force_recv_intr(struct hfi1_ctxtdata *rcd)
4610{
4611        write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask);
4612}
4613
4614/* return non-zero if a packet is present */
4615static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
4616{
4617        if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
4618                return (rcd->seq_cnt ==
4619                                rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
4620
4621        /* else is RDMA rtail */
4622        return (rcd->head != get_rcvhdrtail(rcd));
4623}
4624
4625/*
4626 * Receive packet IRQ handler.  This routine expects to be on its own IRQ.
4627 * This routine will try to handle packets immediately (latency), but if
4628 * it finds too many, it will invoke the thread handler (bandwitdh).  The
4629 * chip receive interupt is *not* cleared down until this or the thread (if
4630 * invoked) is finished.  The intent is to avoid extra interrupts while we
4631 * are processing packets anyway.
4632 */
4633static irqreturn_t receive_context_interrupt(int irq, void *data)
4634{
4635        struct hfi1_ctxtdata *rcd = data;
4636        struct hfi1_devdata *dd = rcd->dd;
4637        int disposition;
4638        int present;
4639
4640        trace_hfi1_receive_interrupt(dd, rcd->ctxt);
4641        this_cpu_inc(*dd->int_counter);
4642
4643        /* receive interrupt remains blocked while processing packets */
4644        disposition = rcd->do_interrupt(rcd, 0);
4645
4646        /*
4647         * Too many packets were seen while processing packets in this
4648         * IRQ handler.  Invoke the handler thread.  The receive interrupt
4649         * remains blocked.
4650         */
4651        if (disposition == RCV_PKT_LIMIT)
4652                return IRQ_WAKE_THREAD;
4653
4654        /*
4655         * The packet processor detected no more packets.  Clear the receive
4656         * interrupt and recheck for a packet packet that may have arrived
4657         * after the previous check and interrupt clear.  If a packet arrived,
4658         * force another interrupt.
4659         */
4660        clear_recv_intr(rcd);
4661        present = check_packet_present(rcd);
4662        if (present)
4663                force_recv_intr(rcd);
4664
4665        return IRQ_HANDLED;
4666}
4667
4668/*
4669 * Receive packet thread handler.  This expects to be invoked with the
4670 * receive interrupt still blocked.
4671 */
4672static irqreturn_t receive_context_thread(int irq, void *data)
4673{
4674        struct hfi1_ctxtdata *rcd = data;
4675        int present;
4676
4677        /* receive interrupt is still blocked from the IRQ handler */
4678        (void)rcd->do_interrupt(rcd, 1);
4679
4680        /*
4681         * The packet processor will only return if it detected no more
4682         * packets.  Hold IRQs here so we can safely clear the interrupt and
4683         * recheck for a packet that may have arrived after the previous
4684         * check and the interrupt clear.  If a packet arrived, force another
4685         * interrupt.
4686         */
4687        local_irq_disable();
4688        clear_recv_intr(rcd);
4689        present = check_packet_present(rcd);
4690        if (present)
4691                force_recv_intr(rcd);
4692        local_irq_enable();
4693
4694        return IRQ_HANDLED;
4695}
4696
4697/* ========================================================================= */
4698
4699u32 read_physical_state(struct hfi1_devdata *dd)
4700{
4701        u64 reg;
4702
4703        reg = read_csr(dd, DC_DC8051_STS_CUR_STATE);
4704        return (reg >> DC_DC8051_STS_CUR_STATE_PORT_SHIFT)
4705                                & DC_DC8051_STS_CUR_STATE_PORT_MASK;
4706}
4707
4708static u32 read_logical_state(struct hfi1_devdata *dd)
4709{
4710        u64 reg;
4711
4712        reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4713        return (reg >> DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT)
4714                                & DCC_CFG_PORT_CONFIG_LINK_STATE_MASK;
4715}
4716
4717static void set_logical_state(struct hfi1_devdata *dd, u32 chip_lstate)
4718{
4719        u64 reg;
4720
4721        reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4722        /* clear current state, set new state */
4723        reg &= ~DCC_CFG_PORT_CONFIG_LINK_STATE_SMASK;
4724        reg |= (u64)chip_lstate << DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT;
4725        write_csr(dd, DCC_CFG_PORT_CONFIG, reg);
4726}
4727
4728/*
4729 * Use the 8051 to read a LCB CSR.
4730 */
4731static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data)
4732{
4733        u32 regno;
4734        int ret;
4735
4736        if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
4737                if (acquire_lcb_access(dd, 0) == 0) {
4738                        *data = read_csr(dd, addr);
4739                        release_lcb_access(dd, 0);
4740                        return 0;
4741                }
4742                return -EBUSY;
4743        }
4744
4745        /* register is an index of LCB registers: (offset - base) / 8 */
4746        regno = (addr - DC_LCB_CFG_RUN) >> 3;
4747        ret = do_8051_command(dd, HCMD_READ_LCB_CSR, regno, data);
4748        if (ret != HCMD_SUCCESS)
4749                return -EBUSY;
4750        return 0;
4751}
4752
4753/*
4754 * Read an LCB CSR.  Access may not be in host control, so check.
4755 * Return 0 on success, -EBUSY on failure.
4756 */
4757int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data)
4758{
4759        struct hfi1_pportdata *ppd = dd->pport;
4760
4761        /* if up, go through the 8051 for the value */
4762        if (ppd->host_link_state & HLS_UP)
4763                return read_lcb_via_8051(dd, addr, data);
4764        /* if going up or down, no access */
4765        if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4766                return -EBUSY;
4767        /* otherwise, host has access */
4768        *data = read_csr(dd, addr);
4769        return 0;
4770}
4771
4772/*
4773 * Use the 8051 to write a LCB CSR.
4774 */
4775static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data)
4776{
4777
4778        if (acquire_lcb_access(dd, 0) == 0) {
4779                write_csr(dd, addr, data);
4780                release_lcb_access(dd, 0);
4781                return 0;
4782        }
4783        return -EBUSY;
4784}
4785
4786/*
4787 * Write an LCB CSR.  Access may not be in host control, so check.
4788 * Return 0 on success, -EBUSY on failure.
4789 */
4790int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data)
4791{
4792        struct hfi1_pportdata *ppd = dd->pport;
4793
4794        /* if up, go through the 8051 for the value */
4795        if (ppd->host_link_state & HLS_UP)
4796                return write_lcb_via_8051(dd, addr, data);
4797        /* if going up or down, no access */
4798        if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4799                return -EBUSY;
4800        /* otherwise, host has access */
4801        write_csr(dd, addr, data);
4802        return 0;
4803}
4804
4805/*
4806 * Returns:
4807 *      < 0 = Linux error, not able to get access
4808 *      > 0 = 8051 command RETURN_CODE
4809 */
4810static int do_8051_command(
4811        struct hfi1_devdata *dd,
4812        u32 type,
4813        u64 in_data,
4814        u64 *out_data)
4815{
4816        u64 reg, completed;
4817        int return_code;
4818        unsigned long flags;
4819        unsigned long timeout;
4820
4821        hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
4822
4823        /*
4824         * Alternative to holding the lock for a long time:
4825         * - keep busy wait - have other users bounce off
4826         */
4827        spin_lock_irqsave(&dd->dc8051_lock, flags);
4828
4829        /* We can't send any commands to the 8051 if it's in reset */
4830        if (dd->dc_shutdown) {
4831                return_code = -ENODEV;
4832                goto fail;
4833        }
4834
4835        /*
4836         * If an 8051 host command timed out previously, then the 8051 is
4837         * stuck.
4838         *
4839         * On first timeout, attempt to reset and restart the entire DC
4840         * block (including 8051). (Is this too big of a hammer?)
4841         *
4842         * If the 8051 times out a second time, the reset did not bring it
4843         * back to healthy life. In that case, fail any subsequent commands.
4844         */
4845        if (dd->dc8051_timed_out) {
4846                if (dd->dc8051_timed_out > 1) {
4847                        dd_dev_err(dd,
4848                                   "Previous 8051 host command timed out, skipping command %u\n",
4849                                   type);
4850                        return_code = -ENXIO;
4851                        goto fail;
4852                }
4853                spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4854                dc_shutdown(dd);
4855                dc_start(dd);
4856                spin_lock_irqsave(&dd->dc8051_lock, flags);
4857        }
4858
4859        /*
4860         * If there is no timeout, then the 8051 command interface is
4861         * waiting for a command.
4862         */
4863
4864        /*
4865         * Do two writes: the first to stabilize the type and req_data, the
4866         * second to activate.
4867         */
4868        reg = ((u64)type & DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_MASK)
4869                        << DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_SHIFT
4870                | (in_data & DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_MASK)
4871                        << DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_SHIFT;
4872        write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4873        reg |= DC_DC8051_CFG_HOST_CMD_0_REQ_NEW_SMASK;
4874        write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4875
4876        /* wait for completion, alternate: interrupt */
4877        timeout = jiffies + msecs_to_jiffies(DC8051_COMMAND_TIMEOUT);
4878        while (1) {
4879                reg = read_csr(dd, DC_DC8051_CFG_HOST_CMD_1);
4880                completed = reg & DC_DC8051_CFG_HOST_CMD_1_COMPLETED_SMASK;
4881                if (completed)
4882                        break;
4883                if (time_after(jiffies, timeout)) {
4884                        dd->dc8051_timed_out++;
4885                        dd_dev_err(dd, "8051 host command %u timeout\n", type);
4886                        if (out_data)
4887                                *out_data = 0;
4888                        return_code = -ETIMEDOUT;
4889                        goto fail;
4890                }
4891                udelay(2);
4892        }
4893
4894        if (out_data) {
4895                *out_data = (reg >> DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_SHIFT)
4896                                & DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_MASK;
4897                if (type == HCMD_READ_LCB_CSR) {
4898                        /* top 16 bits are in a different register */
4899                        *out_data |= (read_csr(dd, DC_DC8051_CFG_EXT_DEV_1)
4900                                & DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SMASK)
4901                                << (48
4902                                    - DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT);
4903                }
4904        }
4905        return_code = (reg >> DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_SHIFT)
4906                                & DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_MASK;
4907        dd->dc8051_timed_out = 0;
4908        /*
4909         * Clear command for next user.
4910         */
4911        write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
4912
4913fail:
4914        spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4915
4916        return return_code;
4917}
4918
4919static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
4920{
4921        return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
4922}
4923
4924static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
4925                            u8 lane_id, u32 config_data)
4926{
4927        u64 data;
4928        int ret;
4929
4930        data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT
4931                | (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT
4932                | (u64)config_data << LOAD_DATA_DATA_SHIFT;
4933        ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
4934        if (ret != HCMD_SUCCESS) {
4935                dd_dev_err(dd,
4936                        "load 8051 config: field id %d, lane %d, err %d\n",
4937                        (int)field_id, (int)lane_id, ret);
4938        }
4939        return ret;
4940}
4941
4942/*
4943 * Read the 8051 firmware "registers".  Use the RAM directly.  Always
4944 * set the result, even on error.
4945 * Return 0 on success, -errno on failure
4946 */
4947static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
4948                            u32 *result)
4949{
4950        u64 big_data;
4951        u32 addr;
4952        int ret;
4953
4954        /* address start depends on the lane_id */
4955        if (lane_id < 4)
4956                addr = (4 * NUM_GENERAL_FIELDS)
4957                        + (lane_id * 4 * NUM_LANE_FIELDS);
4958        else
4959                addr = 0;
4960        addr += field_id * 4;
4961
4962        /* read is in 8-byte chunks, hardware will truncate the address down */
4963        ret = read_8051_data(dd, addr, 8, &big_data);
4964
4965        if (ret == 0) {
4966                /* extract the 4 bytes we want */
4967                if (addr & 0x4)
4968                        *result = (u32)(big_data >> 32);
4969                else
4970                        *result = (u32)big_data;
4971        } else {
4972                *result = 0;
4973                dd_dev_err(dd, "%s: direct read failed, lane %d, field %d!\n",
4974                        __func__, lane_id, field_id);
4975        }
4976
4977        return ret;
4978}
4979
4980static int write_vc_local_phy(struct hfi1_devdata *dd, u8 power_management,
4981                              u8 continuous)
4982{
4983        u32 frame;
4984
4985        frame = continuous << CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT
4986                | power_management << POWER_MANAGEMENT_SHIFT;
4987        return load_8051_config(dd, VERIFY_CAP_LOCAL_PHY,
4988                                GENERAL_CONFIG, frame);
4989}
4990
4991static int write_vc_local_fabric(struct hfi1_devdata *dd, u8 vau, u8 z, u8 vcu,
4992                                 u16 vl15buf, u8 crc_sizes)
4993{
4994        u32 frame;
4995
4996        frame = (u32)vau << VAU_SHIFT
4997                | (u32)z << Z_SHIFT
4998                | (u32)vcu << VCU_SHIFT
4999                | (u32)vl15buf << VL15BUF_SHIFT
5000                | (u32)crc_sizes << CRC_SIZES_SHIFT;
5001        return load_8051_config(dd, VERIFY_CAP_LOCAL_FABRIC,
5002                                GENERAL_CONFIG, frame);
5003}
5004
5005static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
5006                                     u8 *flag_bits, u16 *link_widths)
5007{
5008        u32 frame;
5009
5010        read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
5011                                &frame);
5012        *misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
5013        *flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
5014        *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
5015}
5016
5017static int write_vc_local_link_width(struct hfi1_devdata *dd,
5018                                     u8 misc_bits,
5019                                     u8 flag_bits,
5020                                     u16 link_widths)
5021{
5022        u32 frame;
5023
5024        frame = (u32)misc_bits << MISC_CONFIG_BITS_SHIFT
5025                | (u32)flag_bits << LOCAL_FLAG_BITS_SHIFT
5026                | (u32)link_widths << LINK_WIDTH_SHIFT;
5027        return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
5028                     frame);
5029}
5030
5031static int write_local_device_id(struct hfi1_devdata *dd, u16 device_id,
5032                                 u8 device_rev)
5033{
5034        u32 frame;
5035
5036        frame = ((u32)device_id << LOCAL_DEVICE_ID_SHIFT)
5037                | ((u32)device_rev << LOCAL_DEVICE_REV_SHIFT);
5038        return load_8051_config(dd, LOCAL_DEVICE_ID, GENERAL_CONFIG, frame);
5039}
5040
5041static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
5042                                  u8 *device_rev)
5043{
5044        u32 frame;
5045
5046        read_8051_config(dd, REMOTE_DEVICE_ID, GENERAL_CONFIG, &frame);
5047        *device_id = (frame >> REMOTE_DEVICE_ID_SHIFT) & REMOTE_DEVICE_ID_MASK;
5048        *device_rev = (frame >> REMOTE_DEVICE_REV_SHIFT)
5049                        & REMOTE_DEVICE_REV_MASK;
5050}
5051
5052void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b)
5053{
5054        u32 frame;
5055
5056        read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame);
5057        *ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK;
5058        *ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK;
5059}
5060
5061static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
5062                               u8 *continuous)
5063{
5064        u32 frame;
5065
5066        read_8051_config(dd, VERIFY_CAP_REMOTE_PHY, GENERAL_CONFIG, &frame);
5067        *power_management = (frame >> POWER_MANAGEMENT_SHIFT)
5068                                        & POWER_MANAGEMENT_MASK;
5069        *continuous = (frame >> CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT)
5070                                        & CONTINIOUS_REMOTE_UPDATE_SUPPORT_MASK;
5071}
5072
5073static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
5074                                  u8 *vcu, u16 *vl15buf, u8 *crc_sizes)
5075{
5076        u32 frame;
5077
5078        read_8051_config(dd, VERIFY_CAP_REMOTE_FABRIC, GENERAL_CONFIG, &frame);
5079        *vau = (frame >> VAU_SHIFT) & VAU_MASK;
5080        *z = (frame >> Z_SHIFT) & Z_MASK;
5081        *vcu = (frame >> VCU_SHIFT) & VCU_MASK;
5082        *vl15buf = (frame >> VL15BUF_SHIFT) & VL15BUF_MASK;
5083        *crc_sizes = (frame >> CRC_SIZES_SHIFT) & CRC_SIZES_MASK;
5084}
5085
5086static void read_vc_remote_link_width(struct hfi1_devdata *dd,
5087                                      u8 *remote_tx_rate,
5088                                      u16 *link_widths)
5089{
5090        u32 frame;
5091
5092        read_8051_config(dd, VERIFY_CAP_REMOTE_LINK_WIDTH, GENERAL_CONFIG,
5093                                &frame);
5094        *remote_tx_rate = (frame >> REMOTE_TX_RATE_SHIFT)
5095                                & REMOTE_TX_RATE_MASK;
5096        *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
5097}
5098
5099static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx)
5100{
5101        u32 frame;
5102
5103        read_8051_config(dd, LOCAL_LNI_INFO, GENERAL_CONFIG, &frame);
5104        *enable_lane_rx = (frame >> ENABLE_LANE_RX_SHIFT) & ENABLE_LANE_RX_MASK;
5105}
5106
5107static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed)
5108{
5109        u32 frame;
5110
5111        read_8051_config(dd, REMOTE_LNI_INFO, GENERAL_CONFIG, &frame);
5112        *mgmt_allowed = (frame >> MGMT_ALLOWED_SHIFT) & MGMT_ALLOWED_MASK;
5113}
5114
5115static void read_last_local_state(struct hfi1_devdata *dd, u32 *lls)
5116{
5117        read_8051_config(dd, LAST_LOCAL_STATE_COMPLETE, GENERAL_CONFIG, lls);
5118}
5119
5120static void read_last_remote_state(struct hfi1_devdata *dd, u32 *lrs)
5121{
5122        read_8051_config(dd, LAST_REMOTE_STATE_COMPLETE, GENERAL_CONFIG, lrs);
5123}
5124
5125void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality)
5126{
5127        u32 frame;
5128        int ret;
5129
5130        *link_quality = 0;
5131        if (dd->pport->host_link_state & HLS_UP) {
5132                ret = read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG,
5133                                        &frame);
5134                if (ret == 0)
5135                        *link_quality = (frame >> LINK_QUALITY_SHIFT)
5136                                                & LINK_QUALITY_MASK;
5137        }
5138}
5139
5140static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc)
5141{
5142        u32 frame;
5143
5144        read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG, &frame);
5145        *pdrrc = (frame >> DOWN_REMOTE_REASON_SHIFT) & DOWN_REMOTE_REASON_MASK;
5146}
5147
5148static int read_tx_settings(struct hfi1_devdata *dd,
5149                            u8 *enable_lane_tx,
5150                            u8 *tx_polarity_inversion,
5151                            u8 *rx_polarity_inversion,
5152                            u8 *max_rate)
5153{
5154        u32 frame;
5155        int ret;
5156
5157        ret = read_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, &frame);
5158        *enable_lane_tx = (frame >> ENABLE_LANE_TX_SHIFT)
5159                                & ENABLE_LANE_TX_MASK;
5160        *tx_polarity_inversion = (frame >> TX_POLARITY_INVERSION_SHIFT)
5161                                & TX_POLARITY_INVERSION_MASK;
5162        *rx_polarity_inversion = (frame >> RX_POLARITY_INVERSION_SHIFT)
5163                                & RX_POLARITY_INVERSION_MASK;
5164        *max_rate = (frame >> MAX_RATE_SHIFT) & MAX_RATE_MASK;
5165        return ret;
5166}
5167
5168static int write_tx_settings(struct hfi1_devdata *dd,
5169                             u8 enable_lane_tx,
5170                             u8 tx_polarity_inversion,
5171                             u8 rx_polarity_inversion,
5172                             u8 max_rate)
5173{
5174        u32 frame;
5175
5176        /* no need to mask, all variable sizes match field widths */
5177        frame = enable_lane_tx << ENABLE_LANE_TX_SHIFT
5178                | tx_polarity_inversion << TX_POLARITY_INVERSION_SHIFT
5179                | rx_polarity_inversion << RX_POLARITY_INVERSION_SHIFT
5180                | max_rate << MAX_RATE_SHIFT;
5181        return load_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, frame);
5182}
5183
5184static void check_fabric_firmware_versions(struct hfi1_devdata *dd)
5185{
5186        u32 frame, version, prod_id;
5187        int ret, lane;
5188
5189        /* 4 lanes */
5190        for (lane = 0; lane < 4; lane++) {
5191                ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame);
5192                if (ret) {
5193                        dd_dev_err(
5194                                dd,
5195                                "Unable to read lane %d firmware details\n",
5196                                lane);
5197                        continue;
5198                }
5199                version = (frame >> SPICO_ROM_VERSION_SHIFT)
5200                                        & SPICO_ROM_VERSION_MASK;
5201                prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT)
5202                                        & SPICO_ROM_PROD_ID_MASK;
5203                dd_dev_info(dd,
5204                        "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
5205                        lane, version, prod_id);
5206        }
5207}
5208
5209/*
5210 * Read an idle LCB message.
5211 *
5212 * Returns 0 on success, -EINVAL on error
5213 */
5214static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out)
5215{
5216        int ret;
5217
5218        ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG,
5219                type, data_out);
5220        if (ret != HCMD_SUCCESS) {
5221                dd_dev_err(dd, "read idle message: type %d, err %d\n",
5222                        (u32)type, ret);
5223                return -EINVAL;
5224        }
5225        dd_dev_info(dd, "%s: read idle message 0x%llx\n", __func__, *data_out);
5226        /* return only the payload as we already know the type */
5227        *data_out >>= IDLE_PAYLOAD_SHIFT;
5228        return 0;
5229}
5230
5231/*
5232 * Read an idle SMA message.  To be done in response to a notification from
5233 * the 8051.
5234 *
5235 * Returns 0 on success, -EINVAL on error
5236 */
5237static int read_idle_sma(struct hfi1_devdata *dd, u64 *data)
5238{
5239        return read_idle_message(dd,
5240                        (u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT, data);
5241}
5242
5243/*
5244 * Send an idle LCB message.
5245 *
5246 * Returns 0 on success, -EINVAL on error
5247 */
5248static int send_idle_message(struct hfi1_devdata *dd, u64 data)
5249{
5250        int ret;
5251
5252        dd_dev_info(dd, "%s: sending idle message 0x%llx\n", __func__, data);
5253        ret = do_8051_command(dd, HCMD_SEND_LCB_IDLE_MSG, data, NULL);
5254        if (ret != HCMD_SUCCESS) {
5255                dd_dev_err(dd, "send idle message: data 0x%llx, err %d\n",
5256                        data, ret);
5257                return -EINVAL;
5258        }
5259        return 0;
5260}
5261
5262/*
5263 * Send an idle SMA message.
5264 *
5265 * Returns 0 on success, -EINVAL on error
5266 */
5267int send_idle_sma(struct hfi1_devdata *dd, u64 message)
5268{
5269        u64 data;
5270
5271        data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT)
5272                | ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
5273        return send_idle_message(dd, data);
5274}
5275
5276/*
5277 * Initialize the LCB then do a quick link up.  This may or may not be
5278 * in loopback.
5279 *
5280 * return 0 on success, -errno on error
5281 */
5282static int do_quick_linkup(struct hfi1_devdata *dd)
5283{
5284        u64 reg;
5285        unsigned long timeout;
5286        int ret;
5287
5288        lcb_shutdown(dd, 0);
5289
5290        if (loopback) {
5291                /* LCB_CFG_LOOPBACK.VAL = 2 */
5292                /* LCB_CFG_LANE_WIDTH.VAL = 0 */
5293                write_csr(dd, DC_LCB_CFG_LOOPBACK,
5294                        IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
5295                write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
5296        }
5297
5298        /* start the LCBs */
5299        /* LCB_CFG_TX_FIFOS_RESET.VAL = 0 */
5300        write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
5301
5302        /* simulator only loopback steps */
5303        if (loopback && dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
5304                /* LCB_CFG_RUN.EN = 1 */
5305                write_csr(dd, DC_LCB_CFG_RUN,
5306                        1ull << DC_LCB_CFG_RUN_EN_SHIFT);
5307
5308                /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
5309                timeout = jiffies + msecs_to_jiffies(10);
5310                while (1) {
5311                        reg = read_csr(dd,
5312                                DC_LCB_STS_LINK_TRANSFER_ACTIVE);
5313                        if (reg)
5314                                break;
5315                        if (time_after(jiffies, timeout)) {
5316                                dd_dev_err(dd,
5317                                        "timeout waiting for LINK_TRANSFER_ACTIVE\n");
5318                                return -ETIMEDOUT;
5319                        }
5320                        udelay(2);
5321                }
5322
5323                write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
5324                        1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
5325        }
5326
5327        if (!loopback) {
5328                /*
5329                 * When doing quick linkup and not in loopback, both
5330                 * sides must be done with LCB set-up before either
5331                 * starts the quick linkup.  Put a delay here so that
5332                 * both sides can be started and have a chance to be
5333                 * done with LCB set up before resuming.
5334                 */
5335                dd_dev_err(dd,
5336                        "Pausing for peer to be finished with LCB set up\n");
5337                msleep(5000);
5338                dd_dev_err(dd,
5339                        "Continuing with quick linkup\n");
5340        }
5341
5342        write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
5343        set_8051_lcb_access(dd);
5344
5345        /*
5346         * State "quick" LinkUp request sets the physical link state to
5347         * LinkUp without a verify capability sequence.
5348         * This state is in simulator v37 and later.
5349         */
5350        ret = set_physical_link_state(dd, PLS_QUICK_LINKUP);
5351        if (ret != HCMD_SUCCESS) {
5352                dd_dev_err(dd,
5353                        "%s: set physical link state to quick LinkUp failed with return %d\n",
5354                        __func__, ret);
5355
5356                set_host_lcb_access(dd);
5357                write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
5358
5359                if (ret >= 0)
5360                        ret = -EINVAL;
5361                return ret;
5362        }
5363
5364        return 0; /* success */
5365}
5366
5367/*
5368 * Set the SerDes to internal loopback mode.
5369 * Returns 0 on success, -errno on error.
5370 */
5371static int set_serdes_loopback_mode(struct hfi1_devdata *dd)
5372{
5373        int ret;
5374
5375        ret = set_physical_link_state(dd, PLS_INTERNAL_SERDES_LOOPBACK);
5376        if (ret == HCMD_SUCCESS)
5377                return 0;
5378        dd_dev_err(dd,
5379                "Set physical link state to SerDes Loopback failed with return %d\n",
5380                ret);
5381        if (ret >= 0)
5382                ret = -EINVAL;
5383        return ret;
5384}
5385
5386/*
5387 * Do all special steps to set up loopback.
5388 */
5389static int init_loopback(struct hfi1_devdata *dd)
5390{
5391        dd_dev_info(dd, "Entering loopback mode\n");
5392
5393        /* all loopbacks should disable self GUID check */
5394        write_csr(dd, DC_DC8051_CFG_MODE,
5395                (read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
5396
5397        /*
5398         * The simulator has only one loopback option - LCB.  Switch
5399         * to that option, which includes quick link up.
5400         *
5401         * Accept all valid loopback values.
5402         */
5403        if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5404                && (loopback == LOOPBACK_SERDES
5405                        || loopback == LOOPBACK_LCB
5406                        || loopback == LOOPBACK_CABLE)) {
5407                loopback = LOOPBACK_LCB;
5408                quick_linkup = 1;
5409                return 0;
5410        }
5411
5412        /* handle serdes loopback */
5413        if (loopback == LOOPBACK_SERDES) {
5414                /* internal serdes loopack needs quick linkup on RTL */
5415                if (dd->icode == ICODE_RTL_SILICON)
5416                        quick_linkup = 1;
5417                return set_serdes_loopback_mode(dd);
5418        }
5419
5420        /* LCB loopback - handled at poll time */
5421        if (loopback == LOOPBACK_LCB) {
5422                quick_linkup = 1; /* LCB is always quick linkup */
5423
5424                /* not supported in emulation due to emulation RTL changes */
5425                if (dd->icode == ICODE_FPGA_EMULATION) {
5426                        dd_dev_err(dd,
5427                                "LCB loopback not supported in emulation\n");
5428                        return -EINVAL;
5429                }
5430                return 0;
5431        }
5432
5433        /* external cable loopback requires no extra steps */
5434        if (loopback == LOOPBACK_CABLE)
5435                return 0;
5436
5437        dd_dev_err(dd, "Invalid loopback mode %d\n", loopback);
5438        return -EINVAL;
5439}
5440
5441/*
5442 * Translate from the OPA_LINK_WIDTH handed to us by the FM to bits
5443 * used in the Verify Capability link width attribute.
5444 */
5445static u16 opa_to_vc_link_widths(u16 opa_widths)
5446{
5447        int i;
5448        u16 result = 0;
5449
5450        static const struct link_bits {
5451                u16 from;
5452                u16 to;
5453        } opa_link_xlate[] = {
5454                { OPA_LINK_WIDTH_1X, 1 << (1-1)  },
5455                { OPA_LINK_WIDTH_2X, 1 << (2-1)  },
5456                { OPA_LINK_WIDTH_3X, 1 << (3-1)  },
5457                { OPA_LINK_WIDTH_4X, 1 << (4-1)  },
5458        };
5459
5460        for (i = 0; i < ARRAY_SIZE(opa_link_xlate); i++) {
5461                if (opa_widths & opa_link_xlate[i].from)
5462                        result |= opa_link_xlate[i].to;
5463        }
5464        return result;
5465}
5466
5467/*
5468 * Set link attributes before moving to polling.
5469 */
5470static int set_local_link_attributes(struct hfi1_pportdata *ppd)
5471{
5472        struct hfi1_devdata *dd = ppd->dd;
5473        u8 enable_lane_tx;
5474        u8 tx_polarity_inversion;
5475        u8 rx_polarity_inversion;
5476        int ret;
5477
5478        /* reset our fabric serdes to clear any lingering problems */
5479        fabric_serdes_reset(dd);
5480
5481        /* set the local tx rate - need to read-modify-write */
5482        ret = read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
5483                &rx_polarity_inversion, &ppd->local_tx_rate);
5484        if (ret)
5485                goto set_local_link_attributes_fail;
5486
5487        if (dd->dc8051_ver < dc8051_ver(0, 20)) {
5488                /* set the tx rate to the fastest enabled */
5489                if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5490                        ppd->local_tx_rate = 1;
5491                else
5492                        ppd->local_tx_rate = 0;
5493        } else {
5494                /* set the tx rate to all enabled */
5495                ppd->local_tx_rate = 0;
5496                if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5497                        ppd->local_tx_rate |= 2;
5498                if (ppd->link_speed_enabled & OPA_LINK_SPEED_12_5G)
5499                        ppd->local_tx_rate |= 1;
5500        }
5501
5502        enable_lane_tx = 0xF; /* enable all four lanes */
5503        ret = write_tx_settings(dd, enable_lane_tx, tx_polarity_inversion,
5504                     rx_polarity_inversion, ppd->local_tx_rate);
5505        if (ret != HCMD_SUCCESS)
5506                goto set_local_link_attributes_fail;
5507
5508        /*
5509         * DC supports continuous updates.
5510         */
5511        ret = write_vc_local_phy(dd, 0 /* no power management */,
5512                                     1 /* continuous updates */);
5513        if (ret != HCMD_SUCCESS)
5514                goto set_local_link_attributes_fail;
5515
5516        /* z=1 in the next call: AU of 0 is not supported by the hardware */
5517        ret = write_vc_local_fabric(dd, dd->vau, 1, dd->vcu, dd->vl15_init,
5518                                    ppd->port_crc_mode_enabled);
5519        if (ret != HCMD_SUCCESS)
5520                goto set_local_link_attributes_fail;
5521
5522        ret = write_vc_local_link_width(dd, 0, 0,
5523                     opa_to_vc_link_widths(ppd->link_width_enabled));
5524        if (ret != HCMD_SUCCESS)
5525                goto set_local_link_attributes_fail;
5526
5527        /* let peer know who we are */
5528        ret = write_local_device_id(dd, dd->pcidev->device, dd->minrev);
5529        if (ret == HCMD_SUCCESS)
5530                return 0;
5531
5532set_local_link_attributes_fail:
5533        dd_dev_err(dd,
5534                "Failed to set local link attributes, return 0x%x\n",
5535                ret);
5536        return ret;
5537}
5538
5539/*
5540 * Call this to start the link.  Schedule a retry if the cable is not
5541 * present or if unable to start polling.  Do not do anything if the
5542 * link is disabled.  Returns 0 if link is disabled or moved to polling
5543 */
5544int start_link(struct hfi1_pportdata *ppd)
5545{
5546        if (!ppd->link_enabled) {
5547                dd_dev_info(ppd->dd,
5548                        "%s: stopping link start because link is disabled\n",
5549                        __func__);
5550                return 0;
5551        }
5552        if (!ppd->driver_link_ready) {
5553                dd_dev_info(ppd->dd,
5554                        "%s: stopping link start because driver is not ready\n",
5555                        __func__);
5556                return 0;
5557        }
5558
5559        if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES ||
5560                        loopback == LOOPBACK_LCB ||
5561                        ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5562                return set_link_state(ppd, HLS_DN_POLL);
5563
5564        dd_dev_info(ppd->dd,
5565                "%s: stopping link start because no cable is present\n",
5566                __func__);
5567        return -EAGAIN;
5568}
5569
5570static void reset_qsfp(struct hfi1_pportdata *ppd)
5571{
5572        struct hfi1_devdata *dd = ppd->dd;
5573        u64 mask, qsfp_mask;
5574
5575        mask = (u64)QSFP_HFI0_RESET_N;
5576        qsfp_mask = read_csr(dd,
5577                dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
5578        qsfp_mask |= mask;
5579        write_csr(dd,
5580                dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE,
5581                qsfp_mask);
5582
5583        qsfp_mask = read_csr(dd,
5584                dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
5585        qsfp_mask &= ~mask;
5586        write_csr(dd,
5587                dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5588                qsfp_mask);
5589
5590        udelay(10);
5591
5592        qsfp_mask |= mask;
5593        write_csr(dd,
5594                dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5595                qsfp_mask);
5596}
5597
5598static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
5599                                        u8 *qsfp_interrupt_status)
5600{
5601        struct hfi1_devdata *dd = ppd->dd;
5602
5603        if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
5604                (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
5605                dd_dev_info(dd,
5606                        "%s: QSFP cable on fire\n",
5607                        __func__);
5608
5609        if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
5610                (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
5611                dd_dev_info(dd,
5612                        "%s: QSFP cable temperature too low\n",
5613                        __func__);
5614
5615        if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
5616                (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
5617                dd_dev_info(dd,
5618                        "%s: QSFP supply voltage too high\n",
5619                        __func__);
5620
5621        if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
5622                (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
5623                dd_dev_info(dd,
5624                        "%s: QSFP supply voltage too low\n",
5625                        __func__);
5626
5627        /* Byte 2 is vendor specific */
5628
5629        if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
5630                (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
5631                dd_dev_info(dd,
5632                        "%s: Cable RX channel 1/2 power too high\n",
5633                        __func__);
5634
5635        if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
5636                (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
5637                dd_dev_info(dd,
5638                        "%s: Cable RX channel 1/2 power too low\n",
5639                        __func__);
5640
5641        if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
5642                (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
5643                dd_dev_info(dd,
5644                        "%s: Cable RX channel 3/4 power too high\n",
5645                        __func__);
5646
5647        if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
5648                (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
5649                dd_dev_info(dd,
5650                        "%s: Cable RX channel 3/4 power too low\n",
5651                        __func__);
5652
5653        if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
5654                (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
5655                dd_dev_info(dd,
5656                        "%s: Cable TX channel 1/2 bias too high\n",
5657                        __func__);
5658
5659        if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
5660                (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
5661                dd_dev_info(dd,
5662                        "%s: Cable TX channel 1/2 bias too low\n",
5663                        __func__);
5664
5665        if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
5666                (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
5667                dd_dev_info(dd,
5668                        "%s: Cable TX channel 3/4 bias too high\n",
5669                        __func__);
5670
5671        if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
5672                (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
5673                dd_dev_info(dd,
5674                        "%s: Cable TX channel 3/4 bias too low\n",
5675                        __func__);
5676
5677        if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
5678                (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
5679                dd_dev_info(dd,
5680                        "%s: Cable TX channel 1/2 power too high\n",
5681                        __func__);
5682
5683        if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
5684                (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
5685                dd_dev_info(dd,
5686                        "%s: Cable TX channel 1/2 power too low\n",
5687                        __func__);
5688
5689        if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
5690                (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
5691                dd_dev_info(dd,
5692                        "%s: Cable TX channel 3/4 power too high\n",
5693                        __func__);
5694
5695        if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
5696                (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
5697                dd_dev_info(dd,
5698                        "%s: Cable TX channel 3/4 power too low\n",
5699                        __func__);
5700
5701        /* Bytes 9-10 and 11-12 are reserved */
5702        /* Bytes 13-15 are vendor specific */
5703
5704        return 0;
5705}
5706
5707static int do_pre_lni_host_behaviors(struct hfi1_pportdata *ppd)
5708{
5709        refresh_qsfp_cache(ppd, &ppd->qsfp_info);
5710
5711        return 0;
5712}
5713
5714static int do_qsfp_intr_fallback(struct hfi1_pportdata *ppd)
5715{
5716        struct hfi1_devdata *dd = ppd->dd;
5717        u8 qsfp_interrupt_status = 0;
5718
5719        if (qsfp_read(ppd, dd->hfi1_id, 2, &qsfp_interrupt_status, 1)
5720                != 1) {
5721                dd_dev_info(dd,
5722                        "%s: Failed to read status of QSFP module\n",
5723                        __func__);
5724                return -EIO;
5725        }
5726
5727        /* We don't care about alarms & warnings with a non-functional INT_N */
5728        if (!(qsfp_interrupt_status & QSFP_DATA_NOT_READY))
5729                do_pre_lni_host_behaviors(ppd);
5730
5731        return 0;
5732}
5733
5734/* This routine will only be scheduled if the QSFP module is present */
5735static void qsfp_event(struct work_struct *work)
5736{
5737        struct qsfp_data *qd;
5738        struct hfi1_pportdata *ppd;
5739        struct hfi1_devdata *dd;
5740
5741        qd = container_of(work, struct qsfp_data, qsfp_work);
5742        ppd = qd->ppd;
5743        dd = ppd->dd;
5744
5745        /* Sanity check */
5746        if (!qsfp_mod_present(ppd))
5747                return;
5748
5749        /*
5750         * Turn DC back on after cables has been
5751         * re-inserted. Up until now, the DC has been in
5752         * reset to save power.
5753         */
5754        dc_start(dd);
5755
5756        if (qd->cache_refresh_required) {
5757                msleep(3000);
5758                reset_qsfp(ppd);
5759
5760                /* Check for QSFP interrupt after t_init (SFF 8679)
5761                 * + extra
5762                 */
5763                msleep(3000);
5764                if (!qd->qsfp_interrupt_functional) {
5765                        if (do_qsfp_intr_fallback(ppd) < 0)
5766                                dd_dev_info(dd, "%s: QSFP fallback failed\n",
5767                                        __func__);
5768                        ppd->driver_link_ready = 1;
5769                        start_link(ppd);
5770                }
5771        }
5772
5773        if (qd->check_interrupt_flags) {
5774                u8 qsfp_interrupt_status[16] = {0,};
5775
5776                if (qsfp_read(ppd, dd->hfi1_id, 6,
5777                              &qsfp_interrupt_status[0], 16) != 16) {
5778                        dd_dev_info(dd,
5779                                "%s: Failed to read status of QSFP module\n",
5780                                __func__);
5781                } else {
5782                        unsigned long flags;
5783                        u8 data_status;
5784
5785                        spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
5786                        ppd->qsfp_info.check_interrupt_flags = 0;
5787                        spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
5788                                                                flags);
5789
5790                        if (qsfp_read(ppd, dd->hfi1_id, 2, &data_status, 1)
5791                                 != 1) {
5792                                dd_dev_info(dd,
5793                                "%s: Failed to read status of QSFP module\n",
5794                                        __func__);
5795                        }
5796                        if (!(data_status & QSFP_DATA_NOT_READY)) {
5797                                do_pre_lni_host_behaviors(ppd);
5798                                start_link(ppd);
5799                        } else
5800                                handle_qsfp_error_conditions(ppd,
5801                                                qsfp_interrupt_status);
5802                }
5803        }
5804}
5805
5806void init_qsfp(struct hfi1_pportdata *ppd)
5807{
5808        struct hfi1_devdata *dd = ppd->dd;
5809        u64 qsfp_mask;
5810
5811        if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
5812                        ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
5813                ppd->driver_link_ready = 1;
5814                return;
5815        }
5816
5817        ppd->qsfp_info.ppd = ppd;
5818        INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
5819
5820        qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
5821        /* Clear current status to avoid spurious interrupts */
5822        write_csr(dd,
5823                        dd->hfi1_id ?
5824                                ASIC_QSFP2_CLEAR :
5825                                ASIC_QSFP1_CLEAR,
5826                qsfp_mask);
5827
5828        /* Handle active low nature of INT_N and MODPRST_N pins */
5829        if (qsfp_mod_present(ppd))
5830                qsfp_mask &= ~(u64)QSFP_HFI0_MODPRST_N;
5831        write_csr(dd,
5832                  dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
5833                  qsfp_mask);
5834
5835        /* Allow only INT_N and MODPRST_N to trigger QSFP interrupts */
5836        qsfp_mask |= (u64)QSFP_HFI0_MODPRST_N;
5837        write_csr(dd,
5838                dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
5839                qsfp_mask);
5840
5841        if (qsfp_mod_present(ppd)) {
5842                msleep(3000);
5843                reset_qsfp(ppd);
5844
5845                /* Check for QSFP interrupt after t_init (SFF 8679)
5846                 * + extra
5847                 */
5848                msleep(3000);
5849                if (!ppd->qsfp_info.qsfp_interrupt_functional) {
5850                        if (do_qsfp_intr_fallback(ppd) < 0)
5851                                dd_dev_info(dd,
5852                                        "%s: QSFP fallback failed\n",
5853                                        __func__);
5854                        ppd->driver_link_ready = 1;
5855                }
5856        }
5857}
5858
5859int bringup_serdes(struct hfi1_pportdata *ppd)
5860{
5861        struct hfi1_devdata *dd = ppd->dd;
5862        u64 guid;
5863        int ret;
5864
5865        if (HFI1_CAP_IS_KSET(EXTENDED_PSN))
5866                add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK);
5867
5868        guid = ppd->guid;
5869        if (!guid) {
5870                if (dd->base_guid)
5871                        guid = dd->base_guid + ppd->port - 1;
5872                ppd->guid = guid;
5873        }
5874
5875        /* the link defaults to enabled */
5876        ppd->link_enabled = 1;
5877        /* Set linkinit_reason on power up per OPA spec */
5878        ppd->linkinit_reason = OPA_LINKINIT_REASON_LINKUP;
5879
5880        if (loopback) {
5881                ret = init_loopback(dd);
5882                if (ret < 0)
5883                        return ret;
5884        }
5885
5886        return start_link(ppd);
5887}
5888
5889void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
5890{
5891        struct hfi1_devdata *dd = ppd->dd;
5892
5893        /*
5894         * Shut down the link and keep it down.   First turn off that the
5895         * driver wants to allow the link to be up (driver_link_ready).
5896         * Then make sure the link is not automatically restarted
5897         * (link_enabled).  Cancel any pending restart.  And finally
5898         * go offline.
5899         */
5900        ppd->driver_link_ready = 0;
5901        ppd->link_enabled = 0;
5902
5903        set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
5904          OPA_LINKDOWN_REASON_SMA_DISABLED);
5905        set_link_state(ppd, HLS_DN_OFFLINE);
5906
5907        /* disable the port */
5908        clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
5909}
5910
5911static inline int init_cpu_counters(struct hfi1_devdata *dd)
5912{
5913        struct hfi1_pportdata *ppd;
5914        int i;
5915
5916        ppd = (struct hfi1_pportdata *)(dd + 1);
5917        for (i = 0; i < dd->num_pports; i++, ppd++) {
5918                ppd->ibport_data.rc_acks = NULL;
5919                ppd->ibport_data.rc_qacks = NULL;
5920                ppd->ibport_data.rc_acks = alloc_percpu(u64);
5921                ppd->ibport_data.rc_qacks = alloc_percpu(u64);
5922                ppd->ibport_data.rc_delayed_comp = alloc_percpu(u64);
5923                if ((ppd->ibport_data.rc_acks == NULL) ||
5924                    (ppd->ibport_data.rc_delayed_comp == NULL) ||
5925                    (ppd->ibport_data.rc_qacks == NULL))
5926                        return -ENOMEM;
5927        }
5928
5929        return 0;
5930}
5931
5932static const char * const pt_names[] = {
5933        "expected",
5934        "eager",
5935        "invalid"
5936};
5937
5938static const char *pt_name(u32 type)
5939{
5940        return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type];
5941}
5942
5943/*
5944 * index is the index into the receive array
5945 */
5946void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
5947                  u32 type, unsigned long pa, u16 order)
5948{
5949        u64 reg;
5950        void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc :
5951                              (dd->kregbase + RCV_ARRAY));
5952
5953        if (!(dd->flags & HFI1_PRESENT))
5954                goto done;
5955
5956        if (type == PT_INVALID) {
5957                pa = 0;
5958        } else if (type > PT_INVALID) {
5959                dd_dev_err(dd,
5960                        "unexpected receive array type %u for index %u, not handled\n",
5961                        type, index);
5962                goto done;
5963        }
5964
5965        hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx",
5966                  pt_name(type), index, pa, (unsigned long)order);
5967
5968#define RT_ADDR_SHIFT 12        /* 4KB kernel address boundary */
5969        reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK
5970                | (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT
5971                | ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK)
5972                                        << RCV_ARRAY_RT_ADDR_SHIFT;
5973        writeq(reg, base + (index * 8));
5974
5975        if (type == PT_EAGER)
5976                /*
5977                 * Eager entries are written one-by-one so we have to push them
5978                 * after we write the entry.
5979                 */
5980                flush_wc();
5981done:
5982        return;
5983}
5984
5985void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
5986{
5987        struct hfi1_devdata *dd = rcd->dd;
5988        u32 i;
5989
5990        /* this could be optimized */
5991        for (i = rcd->eager_base; i < rcd->eager_base +
5992                     rcd->egrbufs.alloced; i++)
5993                hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
5994
5995        for (i = rcd->expected_base;
5996                        i < rcd->expected_base + rcd->expected_count; i++)
5997                hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
5998}
5999
6000int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd,
6001                        struct hfi1_ctxt_info *kinfo)
6002{
6003        kinfo->runtime_flags = (HFI1_MISC_GET() << HFI1_CAP_USER_SHIFT) |
6004                HFI1_CAP_UGET(MASK) | HFI1_CAP_KGET(K2U);
6005        return 0;
6006}
6007
6008struct hfi1_message_header *hfi1_get_msgheader(
6009                                struct hfi1_devdata *dd, __le32 *rhf_addr)
6010{
6011        u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
6012
6013        return (struct hfi1_message_header *)
6014                (rhf_addr - dd->rhf_offset + offset);
6015}
6016
6017static const char * const ib_cfg_name_strings[] = {
6018        "HFI1_IB_CFG_LIDLMC",
6019        "HFI1_IB_CFG_LWID_DG_ENB",
6020        "HFI1_IB_CFG_LWID_ENB",
6021        "HFI1_IB_CFG_LWID",
6022        "HFI1_IB_CFG_SPD_ENB",
6023        "HFI1_IB_CFG_SPD",
6024        "HFI1_IB_CFG_RXPOL_ENB",
6025        "HFI1_IB_CFG_LREV_ENB",
6026        "HFI1_IB_CFG_LINKLATENCY",
6027        "HFI1_IB_CFG_HRTBT",
6028        "HFI1_IB_CFG_OP_VLS",
6029        "HFI1_IB_CFG_VL_HIGH_CAP",
6030        "HFI1_IB_CFG_VL_LOW_CAP",
6031        "HFI1_IB_CFG_OVERRUN_THRESH",
6032        "HFI1_IB_CFG_PHYERR_THRESH",
6033        "HFI1_IB_CFG_LINKDEFAULT",
6034        "HFI1_IB_CFG_PKEYS",
6035        "HFI1_IB_CFG_MTU",
6036        "HFI1_IB_CFG_LSTATE",
6037        "HFI1_IB_CFG_VL_HIGH_LIMIT",
6038        "HFI1_IB_CFG_PMA_TICKS",
6039        "HFI1_IB_CFG_PORT"
6040};
6041
6042static const char *ib_cfg_name(int which)
6043{
6044        if (which < 0 || which >= ARRAY_SIZE(ib_cfg_name_strings))
6045                return "invalid";
6046        return ib_cfg_name_strings[which];
6047}
6048
6049int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which)
6050{
6051        struct hfi1_devdata *dd = ppd->dd;
6052        int val = 0;
6053
6054        switch (which) {
6055        case HFI1_IB_CFG_LWID_ENB: /* allowed Link-width */
6056                val = ppd->link_width_enabled;
6057                break;
6058        case HFI1_IB_CFG_LWID: /* currently active Link-width */
6059                val = ppd->link_width_active;
6060                break;
6061        case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
6062                val = ppd->link_speed_enabled;
6063                break;
6064        case HFI1_IB_CFG_SPD: /* current Link speed */
6065                val = ppd->link_speed_active;
6066                break;
6067
6068        case HFI1_IB_CFG_RXPOL_ENB: /* Auto-RX-polarity enable */
6069        case HFI1_IB_CFG_LREV_ENB: /* Auto-Lane-reversal enable */
6070        case HFI1_IB_CFG_LINKLATENCY:
6071                goto unimplemented;
6072
6073        case HFI1_IB_CFG_OP_VLS:
6074                val = ppd->vls_operational;
6075                break;
6076        case HFI1_IB_CFG_VL_HIGH_CAP: /* VL arb high priority table size */
6077                val = VL_ARB_HIGH_PRIO_TABLE_SIZE;
6078                break;
6079        case HFI1_IB_CFG_VL_LOW_CAP: /* VL arb low priority table size */
6080                val = VL_ARB_LOW_PRIO_TABLE_SIZE;
6081                break;
6082        case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
6083                val = ppd->overrun_threshold;
6084                break;
6085        case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
6086                val = ppd->phy_error_threshold;
6087                break;
6088        case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
6089                val = dd->link_default;
6090                break;
6091
6092        case HFI1_IB_CFG_HRTBT: /* Heartbeat off/enable/auto */
6093        case HFI1_IB_CFG_PMA_TICKS:
6094        default:
6095unimplemented:
6096                if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6097                        dd_dev_info(
6098                                dd,
6099                                "%s: which %s: not implemented\n",
6100                                __func__,
6101                                ib_cfg_name(which));
6102                break;
6103        }
6104
6105        return val;
6106}
6107
6108/*
6109 * The largest MAD packet size.
6110 */
6111#define MAX_MAD_PACKET 2048
6112
6113/*
6114 * Return the maximum header bytes that can go on the _wire_
6115 * for this device. This count includes the ICRC which is
6116 * not part of the packet held in memory but it is appended
6117 * by the HW.
6118 * This is dependent on the device's receive header entry size.
6119 * HFI allows this to be set per-receive context, but the
6120 * driver presently enforces a global value.
6121 */
6122u32 lrh_max_header_bytes(struct hfi1_devdata *dd)
6123{
6124        /*
6125         * The maximum non-payload (MTU) bytes in LRH.PktLen are
6126         * the Receive Header Entry Size minus the PBC (or RHF) size
6127         * plus one DW for the ICRC appended by HW.
6128         *
6129         * dd->rcd[0].rcvhdrqentsize is in DW.
6130         * We use rcd[0] as all context will have the same value. Also,
6131         * the first kernel context would have been allocated by now so
6132         * we are guaranteed a valid value.
6133         */
6134        return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
6135}
6136
6137/*
6138 * Set Send Length
6139 * @ppd - per port data
6140 *
6141 * Set the MTU by limiting how many DWs may be sent.  The SendLenCheck*
6142 * registers compare against LRH.PktLen, so use the max bytes included
6143 * in the LRH.
6144 *
6145 * This routine changes all VL values except VL15, which it maintains at
6146 * the same value.
6147 */
6148static void set_send_length(struct hfi1_pportdata *ppd)
6149{
6150        struct hfi1_devdata *dd = ppd->dd;
6151        u32 max_hb = lrh_max_header_bytes(dd), maxvlmtu = 0, dcmtu;
6152        u64 len1 = 0, len2 = (((dd->vld[15].mtu + max_hb) >> 2)
6153                              & SEND_LEN_CHECK1_LEN_VL15_MASK) <<
6154                SEND_LEN_CHECK1_LEN_VL15_SHIFT;
6155        int i;
6156
6157        for (i = 0; i < ppd->vls_supported; i++) {
6158                if (dd->vld[i].mtu > maxvlmtu)
6159                        maxvlmtu = dd->vld[i].mtu;
6160                if (i <= 3)
6161                        len1 |= (((dd->vld[i].mtu + max_hb) >> 2)
6162                                 & SEND_LEN_CHECK0_LEN_VL0_MASK) <<
6163                                ((i % 4) * SEND_LEN_CHECK0_LEN_VL1_SHIFT);
6164                else
6165                        len2 |= (((dd->vld[i].mtu + max_hb) >> 2)
6166                                 & SEND_LEN_CHECK1_LEN_VL4_MASK) <<
6167                                ((i % 4) * SEND_LEN_CHECK1_LEN_VL5_SHIFT);
6168        }
6169        write_csr(dd, SEND_LEN_CHECK0, len1);
6170        write_csr(dd, SEND_LEN_CHECK1, len2);
6171        /* adjust kernel credit return thresholds based on new MTUs */
6172        /* all kernel receive contexts have the same hdrqentsize */
6173        for (i = 0; i < ppd->vls_supported; i++) {
6174                sc_set_cr_threshold(dd->vld[i].sc,
6175                        sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu,
6176                                dd->rcd[0]->rcvhdrqentsize));
6177        }
6178        sc_set_cr_threshold(dd->vld[15].sc,
6179                sc_mtu_to_threshold(dd->vld[15].sc, dd->vld[15].mtu,
6180                        dd->rcd[0]->rcvhdrqentsize));
6181
6182        /* Adjust maximum MTU for the port in DC */
6183        dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 :
6184                (ilog2(maxvlmtu >> 8) + 1);
6185        len1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG);
6186        len1 &= ~DCC_CFG_PORT_CONFIG_MTU_CAP_SMASK;
6187        len1 |= ((u64)dcmtu & DCC_CFG_PORT_CONFIG_MTU_CAP_MASK) <<
6188                DCC_CFG_PORT_CONFIG_MTU_CAP_SHIFT;
6189        write_csr(ppd->dd, DCC_CFG_PORT_CONFIG, len1);
6190}
6191
6192static void set_lidlmc(struct hfi1_pportdata *ppd)
6193{
6194        int i;
6195        u64 sreg = 0;
6196        struct hfi1_devdata *dd = ppd->dd;
6197        u32 mask = ~((1U << ppd->lmc) - 1);
6198        u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1);
6199
6200        if (dd->hfi1_snoop.mode_flag)
6201                dd_dev_info(dd, "Set lid/lmc while snooping");
6202
6203        c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
6204                | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
6205        c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
6206                        << DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT)|
6207              ((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK)
6208                        << DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT);
6209        write_csr(ppd->dd, DCC_CFG_PORT_CONFIG1, c1);
6210
6211        /*
6212         * Iterate over all the send contexts and set their SLID check
6213         */
6214        sreg = ((mask & SEND_CTXT_CHECK_SLID_MASK_MASK) <<
6215                        SEND_CTXT_CHECK_SLID_MASK_SHIFT) |
6216               (((ppd->lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) <<
6217                        SEND_CTXT_CHECK_SLID_VALUE_SHIFT);
6218
6219        for (i = 0; i < dd->chip_send_contexts; i++) {
6220                hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x",
6221                          i, (u32)sreg);
6222                write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg);
6223        }
6224
6225        /* Now we have to do the same thing for the sdma engines */
6226        sdma_update_lmc(dd, mask, ppd->lid);
6227}
6228
6229static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
6230{
6231        unsigned long timeout;
6232        u32 curr_state;
6233
6234        timeout = jiffies + msecs_to_jiffies(msecs);
6235        while (1) {
6236                curr_state = read_physical_state(dd);
6237                if (curr_state == state)
6238                        break;
6239                if (time_after(jiffies, timeout)) {
6240                        dd_dev_err(dd,
6241                                "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
6242                                state, curr_state);
6243                        return -ETIMEDOUT;
6244                }
6245                usleep_range(1950, 2050); /* sleep 2ms-ish */
6246        }
6247
6248        return 0;
6249}
6250
6251/*
6252 * Helper for set_link_state().  Do not call except from that routine.
6253 * Expects ppd->hls_mutex to be held.
6254 *
6255 * @rem_reason value to be sent to the neighbor
6256 *
6257 * LinkDownReasons only set if transition succeeds.
6258 */
6259static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
6260{
6261        struct hfi1_devdata *dd = ppd->dd;
6262        u32 pstate, previous_state;
6263        u32 last_local_state;
6264        u32 last_remote_state;
6265        int ret;
6266        int do_transition;
6267        int do_wait;
6268
6269        previous_state = ppd->host_link_state;
6270        ppd->host_link_state = HLS_GOING_OFFLINE;
6271        pstate = read_physical_state(dd);
6272        if (pstate == PLS_OFFLINE) {
6273                do_transition = 0;      /* in right state */
6274                do_wait = 0;            /* ...no need to wait */
6275        } else if ((pstate & 0xff) == PLS_OFFLINE) {
6276                do_transition = 0;      /* in an offline transient state */
6277                do_wait = 1;            /* ...wait for it to settle */
6278        } else {
6279                do_transition = 1;      /* need to move to offline */
6280                do_wait = 1;            /* ...will need to wait */
6281        }
6282
6283        if (do_transition) {
6284                ret = set_physical_link_state(dd,
6285                        PLS_OFFLINE | (rem_reason << 8));
6286
6287                if (ret != HCMD_SUCCESS) {
6288                        dd_dev_err(dd,
6289                                "Failed to transition to Offline link state, return %d\n",
6290                                ret);
6291                        return -EINVAL;
6292                }
6293                if (ppd->offline_disabled_reason == OPA_LINKDOWN_REASON_NONE)
6294                        ppd->offline_disabled_reason =
6295                        OPA_LINKDOWN_REASON_TRANSIENT;
6296        }
6297
6298        if (do_wait) {
6299                /* it can take a while for the link to go down */
6300                ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000);
6301                if (ret < 0)
6302                        return ret;
6303        }
6304
6305        /* make sure the logical state is also down */
6306        wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
6307
6308        /*
6309         * Now in charge of LCB - must be after the physical state is
6310         * offline.quiet and before host_link_state is changed.
6311         */
6312        set_host_lcb_access(dd);
6313        write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
6314        ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
6315
6316        /*
6317         * The LNI has a mandatory wait time after the physical state
6318         * moves to Offline.Quiet.  The wait time may be different
6319         * depending on how the link went down.  The 8051 firmware
6320         * will observe the needed wait time and only move to ready
6321         * when that is completed.  The largest of the quiet timeouts
6322         * is 2.5s, so wait that long and then a bit more.
6323         */
6324        ret = wait_fm_ready(dd, 3000);
6325        if (ret) {
6326                dd_dev_err(dd,
6327                        "After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
6328                /* state is really offline, so make it so */
6329                ppd->host_link_state = HLS_DN_OFFLINE;
6330                return ret;
6331        }
6332
6333        /*
6334         * The state is now offline and the 8051 is ready to accept host
6335         * requests.
6336         *      - change our state
6337         *      - notify others if we were previously in a linkup state
6338         */
6339        ppd->host_link_state = HLS_DN_OFFLINE;
6340        if (previous_state & HLS_UP) {
6341                /* went down while link was up */
6342                handle_linkup_change(dd, 0);
6343        } else if (previous_state
6344                        & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
6345                /* went down while attempting link up */
6346                /* byte 1 of last_*_state is the failure reason */
6347                read_last_local_state(dd, &last_local_state);
6348                read_last_remote_state(dd, &last_remote_state);
6349                dd_dev_err(dd,
6350                        "LNI failure last states: local 0x%08x, remote 0x%08x\n",
6351                        last_local_state, last_remote_state);
6352        }
6353
6354        /* the active link width (downgrade) is 0 on link down */
6355        ppd->link_width_active = 0;
6356        ppd->link_width_downgrade_tx_active = 0;
6357        ppd->link_width_downgrade_rx_active = 0;
6358        ppd->current_egress_rate = 0;
6359        return 0;
6360}
6361
6362/* return the link state name */
6363static const char *link_state_name(u32 state)
6364{
6365        const char *name;
6366        int n = ilog2(state);
6367        static const char * const names[] = {
6368                [__HLS_UP_INIT_BP]       = "INIT",
6369                [__HLS_UP_ARMED_BP]      = "ARMED",
6370                [__HLS_UP_ACTIVE_BP]     = "ACTIVE",
6371                [__HLS_DN_DOWNDEF_BP]    = "DOWNDEF",
6372                [__HLS_DN_POLL_BP]       = "POLL",
6373                [__HLS_DN_DISABLE_BP]    = "DISABLE",
6374                [__HLS_DN_OFFLINE_BP]    = "OFFLINE",
6375                [__HLS_VERIFY_CAP_BP]    = "VERIFY_CAP",
6376                [__HLS_GOING_UP_BP]      = "GOING_UP",
6377                [__HLS_GOING_OFFLINE_BP] = "GOING_OFFLINE",
6378                [__HLS_LINK_COOLDOWN_BP] = "LINK_COOLDOWN"
6379        };
6380
6381        name = n < ARRAY_SIZE(names) ? names[n] : NULL;
6382        return name ? name : "unknown";
6383}
6384
6385/* return the link state reason name */
6386static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state)
6387{
6388        if (state == HLS_UP_INIT) {
6389                switch (ppd->linkinit_reason) {
6390                case OPA_LINKINIT_REASON_LINKUP:
6391                        return "(LINKUP)";
6392                case OPA_LINKINIT_REASON_FLAPPING:
6393                        return "(FLAPPING)";
6394                case OPA_LINKINIT_OUTSIDE_POLICY:
6395                        return "(OUTSIDE_POLICY)";
6396                case OPA_LINKINIT_QUARANTINED:
6397                        return "(QUARANTINED)";
6398                case OPA_LINKINIT_INSUFIC_CAPABILITY:
6399                        return "(INSUFIC_CAPABILITY)";
6400                default:
6401                        break;
6402                }
6403        }
6404        return "";
6405}
6406
6407/*
6408 * driver_physical_state - convert the driver's notion of a port's
6409 * state (an HLS_*) into a physical state (a {IB,OPA}_PORTPHYSSTATE_*).
6410 * Return -1 (converted to a u32) to indicate error.
6411 */
6412u32 driver_physical_state(struct hfi1_pportdata *ppd)
6413{
6414        switch (ppd->host_link_state) {
6415        case HLS_UP_INIT:
6416        case HLS_UP_ARMED:
6417        case HLS_UP_ACTIVE:
6418                return IB_PORTPHYSSTATE_LINKUP;
6419        case HLS_DN_POLL:
6420                return IB_PORTPHYSSTATE_POLLING;
6421        case HLS_DN_DISABLE:
6422                return IB_PORTPHYSSTATE_DISABLED;
6423        case HLS_DN_OFFLINE:
6424                return OPA_PORTPHYSSTATE_OFFLINE;
6425        case HLS_VERIFY_CAP:
6426                return IB_PORTPHYSSTATE_POLLING;
6427        case HLS_GOING_UP:
6428                return IB_PORTPHYSSTATE_POLLING;
6429        case HLS_GOING_OFFLINE:
6430                return OPA_PORTPHYSSTATE_OFFLINE;
6431        case HLS_LINK_COOLDOWN:
6432                return OPA_PORTPHYSSTATE_OFFLINE;
6433        case HLS_DN_DOWNDEF:
6434        default:
6435                dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6436                           ppd->host_link_state);
6437                return  -1;
6438        }
6439}
6440
6441/*
6442 * driver_logical_state - convert the driver's notion of a port's
6443 * state (an HLS_*) into a logical state (a IB_PORT_*). Return -1
6444 * (converted to a u32) to indicate error.
6445 */
6446u32 driver_logical_state(struct hfi1_pportdata *ppd)
6447{
6448        if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP))
6449                return IB_PORT_DOWN;
6450
6451        switch (ppd->host_link_state & HLS_UP) {
6452        case HLS_UP_INIT:
6453                return IB_PORT_INIT;
6454        case HLS_UP_ARMED:
6455                return IB_PORT_ARMED;
6456        case HLS_UP_ACTIVE:
6457                return IB_PORT_ACTIVE;
6458        default:
6459                dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6460                           ppd->host_link_state);
6461        return -1;
6462        }
6463}
6464
6465void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
6466                          u8 neigh_reason, u8 rem_reason)
6467{
6468        if (ppd->local_link_down_reason.latest == 0 &&
6469            ppd->neigh_link_down_reason.latest == 0) {
6470                ppd->local_link_down_reason.latest = lcl_reason;
6471                ppd->neigh_link_down_reason.latest = neigh_reason;
6472                ppd->remote_link_down_reason = rem_reason;
6473        }
6474}
6475
6476/*
6477 * Change the physical and/or logical link state.
6478 *
6479 * Do not call this routine while inside an interrupt.  It contains
6480 * calls to routines that can take multiple seconds to finish.
6481 *
6482 * Returns 0 on success, -errno on failure.
6483 */
6484int set_link_state(struct hfi1_pportdata *ppd, u32 state)
6485{
6486        struct hfi1_devdata *dd = ppd->dd;
6487        struct ib_event event = {.device = NULL};
6488        int ret1, ret = 0;
6489        int was_up, is_down;
6490        int orig_new_state, poll_bounce;
6491
6492        mutex_lock(&ppd->hls_lock);
6493
6494        orig_new_state = state;
6495        if (state == HLS_DN_DOWNDEF)
6496                state = dd->link_default;
6497
6498        /* interpret poll -> poll as a link bounce */
6499        poll_bounce = ppd->host_link_state == HLS_DN_POLL
6500                                && state == HLS_DN_POLL;
6501
6502        dd_dev_info(dd, "%s: current %s, new %s %s%s\n", __func__,
6503                link_state_name(ppd->host_link_state),
6504                link_state_name(orig_new_state),
6505                poll_bounce ? "(bounce) " : "",
6506                link_state_reason_name(ppd, state));
6507
6508        was_up = !!(ppd->host_link_state & HLS_UP);
6509
6510        /*
6511         * If we're going to a (HLS_*) link state that implies the logical
6512         * link state is neither of (IB_PORT_ARMED, IB_PORT_ACTIVE), then
6513         * reset is_sm_config_started to 0.
6514         */
6515        if (!(state & (HLS_UP_ARMED | HLS_UP_ACTIVE)))
6516                ppd->is_sm_config_started = 0;
6517
6518        /*
6519         * Do nothing if the states match.  Let a poll to poll link bounce
6520         * go through.
6521         */
6522        if (ppd->host_link_state == state && !poll_bounce)
6523                goto done;
6524
6525        switch (state) {
6526        case HLS_UP_INIT:
6527                if (ppd->host_link_state == HLS_DN_POLL && (quick_linkup
6528                            || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
6529                        /*
6530                         * Quick link up jumps from polling to here.
6531                         *
6532                         * Whether in normal or loopback mode, the
6533                         * simulator jumps from polling to link up.
6534                         * Accept that here.
6535                         */
6536                        /* OK */;
6537                } else if (ppd->host_link_state != HLS_GOING_UP) {
6538                        goto unexpected;
6539                }
6540
6541                ppd->host_link_state = HLS_UP_INIT;
6542                ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
6543                if (ret) {
6544                        /* logical state didn't change, stay at going_up */
6545                        ppd->host_link_state = HLS_GOING_UP;
6546                        dd_dev_err(dd,
6547                                "%s: logical state did not change to INIT\n",
6548                                __func__);
6549                } else {
6550                        /* clear old transient LINKINIT_REASON code */
6551                        if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
6552                                ppd->linkinit_reason =
6553                                        OPA_LINKINIT_REASON_LINKUP;
6554
6555                        /* enable the port */
6556                        add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
6557
6558                        handle_linkup_change(dd, 1);
6559                }
6560                break;
6561        case HLS_UP_ARMED:
6562                if (ppd->host_link_state != HLS_UP_INIT)
6563                        goto unexpected;
6564
6565                ppd->host_link_state = HLS_UP_ARMED;
6566                set_logical_state(dd, LSTATE_ARMED);
6567                ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000);
6568                if (ret) {
6569                        /* logical state didn't change, stay at init */
6570                        ppd->host_link_state = HLS_UP_INIT;
6571                        dd_dev_err(dd,
6572                                "%s: logical state did not change to ARMED\n",
6573                                __func__);
6574                }
6575                /*
6576                 * The simulator does not currently implement SMA messages,
6577                 * so neighbor_normal is not set.  Set it here when we first
6578                 * move to Armed.
6579                 */
6580                if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
6581                        ppd->neighbor_normal = 1;
6582                break;
6583        case HLS_UP_ACTIVE:
6584                if (ppd->host_link_state != HLS_UP_ARMED)
6585                        goto unexpected;
6586
6587                ppd->host_link_state = HLS_UP_ACTIVE;
6588                set_logical_state(dd, LSTATE_ACTIVE);
6589                ret = wait_logical_linkstate(ppd, IB_PORT_ACTIVE, 1000);
6590                if (ret) {
6591                        /* logical state didn't change, stay at armed */
6592                        ppd->host_link_state = HLS_UP_ARMED;
6593                        dd_dev_err(dd,
6594                                "%s: logical state did not change to ACTIVE\n",
6595                                __func__);
6596                } else {
6597
6598                        /* tell all engines to go running */
6599                        sdma_all_running(dd);
6600
6601                        /* Signal the IB layer that the port has went active */
6602                        event.device = &dd->verbs_dev.ibdev;
6603                        event.element.port_num = ppd->port;
6604                        event.event = IB_EVENT_PORT_ACTIVE;
6605                }
6606                break;
6607        case HLS_DN_POLL:
6608                if ((ppd->host_link_state == HLS_DN_DISABLE ||
6609                     ppd->host_link_state == HLS_DN_OFFLINE) &&
6610                    dd->dc_shutdown)
6611                        dc_start(dd);
6612                /* Hand LED control to the DC */
6613                write_csr(dd, DCC_CFG_LED_CNTRL, 0);
6614
6615                if (ppd->host_link_state != HLS_DN_OFFLINE) {
6616                        u8 tmp = ppd->link_enabled;
6617
6618                        ret = goto_offline(ppd, ppd->remote_link_down_reason);
6619                        if (ret) {
6620                                ppd->link_enabled = tmp;
6621                                break;
6622                        }
6623                        ppd->remote_link_down_reason = 0;
6624
6625                        if (ppd->driver_link_ready)
6626                                ppd->link_enabled = 1;
6627                }
6628
6629                ret = set_local_link_attributes(ppd);
6630                if (ret)
6631                        break;
6632
6633                ppd->port_error_action = 0;
6634                ppd->host_link_state = HLS_DN_POLL;
6635
6636                if (quick_linkup) {
6637                        /* quick linkup does not go into polling */
6638                        ret = do_quick_linkup(dd);
6639                } else {
6640                        ret1 = set_physical_link_state(dd, PLS_POLLING);
6641                        if (ret1 != HCMD_SUCCESS) {
6642                                dd_dev_err(dd,
6643                                        "Failed to transition to Polling link state, return 0x%x\n",
6644                                        ret1);
6645                                ret = -EINVAL;
6646                        }
6647                }
6648                ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
6649                /*
6650                 * If an error occurred above, go back to offline.  The
6651                 * caller may reschedule another attempt.
6652                 */
6653                if (ret)
6654                        goto_offline(ppd, 0);
6655                break;
6656        case HLS_DN_DISABLE:
6657                /* link is disabled */
6658                ppd->link_enabled = 0;
6659
6660                /* allow any state to transition to disabled */
6661
6662                /* must transition to offline first */
6663                if (ppd->host_link_state != HLS_DN_OFFLINE) {
6664                        ret = goto_offline(ppd, ppd->remote_link_down_reason);
6665                        if (ret)
6666                                break;
6667                        ppd->remote_link_down_reason = 0;
6668                }
6669
6670                ret1 = set_physical_link_state(dd, PLS_DISABLED);
6671                if (ret1 != HCMD_SUCCESS) {
6672                        dd_dev_err(dd,
6673                                "Failed to transition to Disabled link state, return 0x%x\n",
6674                                ret1);
6675                        ret = -EINVAL;
6676                        break;
6677                }
6678                ppd->host_link_state = HLS_DN_DISABLE;
6679                dc_shutdown(dd);
6680                break;
6681        case HLS_DN_OFFLINE:
6682                if (ppd->host_link_state == HLS_DN_DISABLE)
6683                        dc_start(dd);
6684
6685                /* allow any state to transition to offline */
6686                ret = goto_offline(ppd, ppd->remote_link_down_reason);
6687                if (!ret)
6688                        ppd->remote_link_down_reason = 0;
6689                break;
6690        case HLS_VERIFY_CAP:
6691                if (ppd->host_link_state != HLS_DN_POLL)
6692                        goto unexpected;
6693                ppd->host_link_state = HLS_VERIFY_CAP;
6694                break;
6695        case HLS_GOING_UP:
6696                if (ppd->host_link_state != HLS_VERIFY_CAP)
6697                        goto unexpected;
6698
6699                ret1 = set_physical_link_state(dd, PLS_LINKUP);
6700                if (ret1 != HCMD_SUCCESS) {
6701                        dd_dev_err(dd,
6702                                "Failed to transition to link up state, return 0x%x\n",
6703                                ret1);
6704                        ret = -EINVAL;
6705                        break;
6706                }
6707                ppd->host_link_state = HLS_GOING_UP;
6708                break;
6709
6710        case HLS_GOING_OFFLINE:         /* transient within goto_offline() */
6711        case HLS_LINK_COOLDOWN:         /* transient within goto_offline() */
6712        default:
6713                dd_dev_info(dd, "%s: state 0x%x: not supported\n",
6714                        __func__, state);
6715                ret = -EINVAL;
6716                break;
6717        }
6718
6719        is_down = !!(ppd->host_link_state & (HLS_DN_POLL |
6720                        HLS_DN_DISABLE | HLS_DN_OFFLINE));
6721
6722        if (was_up && is_down && ppd->local_link_down_reason.sma == 0 &&
6723            ppd->neigh_link_down_reason.sma == 0) {
6724                ppd->local_link_down_reason.sma =
6725                  ppd->local_link_down_reason.latest;
6726                ppd->neigh_link_down_reason.sma =
6727                  ppd->neigh_link_down_reason.latest;
6728        }
6729
6730        goto done;
6731
6732unexpected:
6733        dd_dev_err(dd, "%s: unexpected state transition from %s to %s\n",
6734                __func__, link_state_name(ppd->host_link_state),
6735                link_state_name(state));
6736        ret = -EINVAL;
6737
6738done:
6739        mutex_unlock(&ppd->hls_lock);
6740
6741        if (event.device)
6742                ib_dispatch_event(&event);
6743
6744        return ret;
6745}
6746
6747int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
6748{
6749        u64 reg;
6750        int ret = 0;
6751
6752        switch (which) {
6753        case HFI1_IB_CFG_LIDLMC:
6754                set_lidlmc(ppd);
6755                break;
6756        case HFI1_IB_CFG_VL_HIGH_LIMIT:
6757                /*
6758                 * The VL Arbitrator high limit is sent in units of 4k
6759                 * bytes, while HFI stores it in units of 64 bytes.
6760                 */
6761                val *= 4096/64;
6762                reg = ((u64)val & SEND_HIGH_PRIORITY_LIMIT_LIMIT_MASK)
6763                        << SEND_HIGH_PRIORITY_LIMIT_LIMIT_SHIFT;
6764                write_csr(ppd->dd, SEND_HIGH_PRIORITY_LIMIT, reg);
6765                break;
6766        case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
6767                /* HFI only supports POLL as the default link down state */
6768                if (val != HLS_DN_POLL)
6769                        ret = -EINVAL;
6770                break;
6771        case HFI1_IB_CFG_OP_VLS:
6772                if (ppd->vls_operational != val) {
6773                        ppd->vls_operational = val;
6774                        if (!ppd->port)
6775                                ret = -EINVAL;
6776                        else
6777                                ret = sdma_map_init(
6778                                        ppd->dd,
6779                                        ppd->port - 1,
6780                                        val,
6781                                        NULL);
6782                }
6783                break;
6784        /*
6785         * For link width, link width downgrade, and speed enable, always AND
6786         * the setting with what is actually supported.  This has two benefits.
6787         * First, enabled can't have unsupported values, no matter what the
6788         * SM or FM might want.  Second, the ALL_SUPPORTED wildcards that mean
6789         * "fill in with your supported value" have all the bits in the
6790         * field set, so simply ANDing with supported has the desired result.
6791         */
6792        case HFI1_IB_CFG_LWID_ENB: /* set allowed Link-width */
6793                ppd->link_width_enabled = val & ppd->link_width_supported;
6794                break;
6795        case HFI1_IB_CFG_LWID_DG_ENB: /* set allowed link width downgrade */
6796                ppd->link_width_downgrade_enabled =
6797                                val & ppd->link_width_downgrade_supported;
6798                break;
6799        case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
6800                ppd->link_speed_enabled = val & ppd->link_speed_supported;
6801                break;
6802        case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
6803                /*
6804                 * HFI does not follow IB specs, save this value
6805                 * so we can report it, if asked.
6806                 */
6807                ppd->overrun_threshold = val;
6808                break;
6809        case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
6810                /*
6811                 * HFI does not follow IB specs, save this value
6812                 * so we can report it, if asked.
6813                 */
6814                ppd->phy_error_threshold = val;
6815                break;
6816
6817        case HFI1_IB_CFG_MTU:
6818                set_send_length(ppd);
6819                break;
6820
6821        case HFI1_IB_CFG_PKEYS:
6822                if (HFI1_CAP_IS_KSET(PKEY_CHECK))
6823                        set_partition_keys(ppd);
6824                break;
6825
6826        default:
6827                if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6828                        dd_dev_info(ppd->dd,
6829                          "%s: which %s, val 0x%x: not implemented\n",
6830                          __func__, ib_cfg_name(which), val);
6831                break;
6832        }
6833        return ret;
6834}
6835
6836/* begin functions related to vl arbitration table caching */
6837static void init_vl_arb_caches(struct hfi1_pportdata *ppd)
6838{
6839        int i;
6840
6841        BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6842                        VL_ARB_LOW_PRIO_TABLE_SIZE);
6843        BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6844                        VL_ARB_HIGH_PRIO_TABLE_SIZE);
6845
6846        /*
6847         * Note that we always return values directly from the
6848         * 'vl_arb_cache' (and do no CSR reads) in response to a
6849         * 'Get(VLArbTable)'. This is obviously correct after a
6850         * 'Set(VLArbTable)', since the cache will then be up to
6851         * date. But it's also correct prior to any 'Set(VLArbTable)'
6852         * since then both the cache, and the relevant h/w registers
6853         * will be zeroed.
6854         */
6855
6856        for (i = 0; i < MAX_PRIO_TABLE; i++)
6857                spin_lock_init(&ppd->vl_arb_cache[i].lock);
6858}
6859
6860/*
6861 * vl_arb_lock_cache
6862 *
6863 * All other vl_arb_* functions should be called only after locking
6864 * the cache.
6865 */
6866static inline struct vl_arb_cache *
6867vl_arb_lock_cache(struct hfi1_pportdata *ppd, int idx)
6868{
6869        if (idx != LO_PRIO_TABLE && idx != HI_PRIO_TABLE)
6870                return NULL;
6871        spin_lock(&ppd->vl_arb_cache[idx].lock);
6872        return &ppd->vl_arb_cache[idx];
6873}
6874
6875static inline void vl_arb_unlock_cache(struct hfi1_pportdata *ppd, int idx)
6876{
6877        spin_unlock(&ppd->vl_arb_cache[idx].lock);
6878}
6879
6880static void vl_arb_get_cache(struct vl_arb_cache *cache,
6881                             struct ib_vl_weight_elem *vl)
6882{
6883        memcpy(vl, cache->table, VL_ARB_TABLE_SIZE * sizeof(*vl));
6884}
6885
6886static void vl_arb_set_cache(struct vl_arb_cache *cache,
6887                             struct ib_vl_weight_elem *vl)
6888{
6889        memcpy(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6890}
6891
6892static int vl_arb_match_cache(struct vl_arb_cache *cache,
6893                              struct ib_vl_weight_elem *vl)
6894{
6895        return !memcmp(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6896}
6897/* end functions related to vl arbitration table caching */
6898
6899static int set_vl_weights(struct hfi1_pportdata *ppd, u32 target,
6900                          u32 size, struct ib_vl_weight_elem *vl)
6901{
6902        struct hfi1_devdata *dd = ppd->dd;
6903        u64 reg;
6904        unsigned int i, is_up = 0;
6905        int drain, ret = 0;
6906
6907        mutex_lock(&ppd->hls_lock);
6908
6909        if (ppd->host_link_state & HLS_UP)
6910                is_up = 1;
6911
6912        drain = !is_ax(dd) && is_up;
6913
6914        if (drain)
6915                /*
6916                 * Before adjusting VL arbitration weights, empty per-VL
6917                 * FIFOs, otherwise a packet whose VL weight is being
6918                 * set to 0 could get stuck in a FIFO with no chance to
6919                 * egress.
6920                 */
6921                ret = stop_drain_data_vls(dd);
6922
6923        if (ret) {
6924                dd_dev_err(
6925                        dd,
6926                        "%s: cannot stop/drain VLs - refusing to change VL arbitration weights\n",
6927                        __func__);
6928                goto err;
6929        }
6930
6931        for (i = 0; i < size; i++, vl++) {
6932                /*
6933                 * NOTE: The low priority shift and mask are used here, but
6934                 * they are the same for both the low and high registers.
6935                 */
6936                reg = (((u64)vl->vl & SEND_LOW_PRIORITY_LIST_VL_MASK)
6937                                << SEND_LOW_PRIORITY_LIST_VL_SHIFT)
6938                      | (((u64)vl->weight
6939                                & SEND_LOW_PRIORITY_LIST_WEIGHT_MASK)
6940                                << SEND_LOW_PRIORITY_LIST_WEIGHT_SHIFT);
6941                write_csr(dd, target + (i * 8), reg);
6942        }
6943        pio_send_control(dd, PSC_GLOBAL_VLARB_ENABLE);
6944
6945        if (drain)
6946                open_fill_data_vls(dd); /* reopen all VLs */
6947
6948err:
6949        mutex_unlock(&ppd->hls_lock);
6950
6951        return ret;
6952}
6953
6954/*
6955 * Read one credit merge VL register.
6956 */
6957static void read_one_cm_vl(struct hfi1_devdata *dd, u32 csr,
6958                           struct vl_limit *vll)
6959{
6960        u64 reg = read_csr(dd, csr);
6961
6962        vll->dedicated = cpu_to_be16(
6963                (reg >> SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT)
6964                & SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_MASK);
6965        vll->shared = cpu_to_be16(
6966                (reg >> SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT)
6967                & SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_MASK);
6968}
6969
6970/*
6971 * Read the current credit merge limits.
6972 */
6973static int get_buffer_control(struct hfi1_devdata *dd,
6974                              struct buffer_control *bc, u16 *overall_limit)
6975{
6976        u64 reg;
6977        int i;
6978
6979        /* not all entries are filled in */
6980        memset(bc, 0, sizeof(*bc));
6981
6982        /* OPA and HFI have a 1-1 mapping */
6983        for (i = 0; i < TXE_NUM_DATA_VL; i++)
6984                read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8*i), &bc->vl[i]);
6985
6986        /* NOTE: assumes that VL* and VL15 CSRs are bit-wise identical */
6987        read_one_cm_vl(dd, SEND_CM_CREDIT_VL15, &bc->vl[15]);
6988
6989        reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
6990        bc->overall_shared_limit = cpu_to_be16(
6991                (reg >> SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
6992                & SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_MASK);
6993        if (overall_limit)
6994                *overall_limit = (reg
6995                        >> SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
6996                        & SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_MASK;
6997        return sizeof(struct buffer_control);
6998}
6999
7000static int get_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
7001{
7002        u64 reg;
7003        int i;
7004
7005        /* each register contains 16 SC->VLnt mappings, 4 bits each */
7006        reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_15_0);
7007        for (i = 0; i < sizeof(u64); i++) {
7008                u8 byte = *(((u8 *)&reg) + i);
7009
7010                dp->vlnt[2 * i] = byte & 0xf;
7011                dp->vlnt[(2 * i) + 1] = (byte & 0xf0) >> 4;
7012        }
7013
7014        reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_31_16);
7015        for (i = 0; i < sizeof(u64); i++) {
7016                u8 byte = *(((u8 *)&reg) + i);
7017
7018                dp->vlnt[16 + (2 * i)] = byte & 0xf;
7019                dp->vlnt[16 + (2 * i) + 1] = (byte & 0xf0) >> 4;
7020        }
7021        return sizeof(struct sc2vlnt);
7022}
7023
7024static void get_vlarb_preempt(struct hfi1_devdata *dd, u32 nelems,
7025                              struct ib_vl_weight_elem *vl)
7026{
7027        unsigned int i;
7028
7029        for (i = 0; i < nelems; i++, vl++) {
7030                vl->vl = 0xf;
7031                vl->weight = 0;
7032        }
7033}
7034
7035static void set_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
7036{
7037        write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0,
7038                DC_SC_VL_VAL(15_0,
7039                0, dp->vlnt[0] & 0xf,
7040                1, dp->vlnt[1] & 0xf,
7041                2, dp->vlnt[2] & 0xf,
7042                3, dp->vlnt[3] & 0xf,
7043                4, dp->vlnt[4] & 0xf,
7044                5, dp->vlnt[5] & 0xf,
7045                6, dp->vlnt[6] & 0xf,
7046                7, dp->vlnt[7] & 0xf,
7047                8, dp->vlnt[8] & 0xf,
7048                9, dp->vlnt[9] & 0xf,
7049                10, dp->vlnt[10] & 0xf,
7050                11, dp->vlnt[11] & 0xf,
7051                12, dp->vlnt[12] & 0xf,
7052                13, dp->vlnt[13] & 0xf,
7053                14, dp->vlnt[14] & 0xf,
7054                15, dp->vlnt[15] & 0xf));
7055        write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16,
7056                DC_SC_VL_VAL(31_16,
7057                16, dp->vlnt[16] & 0xf,
7058                17, dp->vlnt[17] & 0xf,
7059                18, dp->vlnt[18] & 0xf,
7060                19, dp->vlnt[19] & 0xf,
7061                20, dp->vlnt[20] & 0xf,
7062                21, dp->vlnt[21] & 0xf,
7063                22, dp->vlnt[22] & 0xf,
7064                23, dp->vlnt[23] & 0xf,
7065                24, dp->vlnt[24] & 0xf,
7066                25, dp->vlnt[25] & 0xf,
7067                26, dp->vlnt[26] & 0xf,
7068                27, dp->vlnt[27] & 0xf,
7069                28, dp->vlnt[28] & 0xf,
7070                29, dp->vlnt[29] & 0xf,
7071                30, dp->vlnt[30] & 0xf,
7072                31, dp->vlnt[31] & 0xf));
7073}
7074
7075static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what,
7076                        u16 limit)
7077{
7078        if (limit != 0)
7079                dd_dev_info(dd, "Invalid %s limit %d on VL %d, ignoring\n",
7080                        what, (int)limit, idx);
7081}
7082
7083/* change only the shared limit portion of SendCmGLobalCredit */
7084static void set_global_shared(struct hfi1_devdata *dd, u16 limit)
7085{
7086        u64 reg;
7087
7088        reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
7089        reg &= ~SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SMASK;
7090        reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT;
7091        write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7092}
7093
7094/* change only the total credit limit portion of SendCmGLobalCredit */
7095static void set_global_limit(struct hfi1_devdata *dd, u16 limit)
7096{
7097        u64 reg;
7098
7099        reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
7100        reg &= ~SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SMASK;
7101        reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT;
7102        write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7103}
7104
7105/* set the given per-VL shared limit */
7106static void set_vl_shared(struct hfi1_devdata *dd, int vl, u16 limit)
7107{
7108        u64 reg;
7109        u32 addr;
7110
7111        if (vl < TXE_NUM_DATA_VL)
7112                addr = SEND_CM_CREDIT_VL + (8 * vl);
7113        else
7114                addr = SEND_CM_CREDIT_VL15;
7115
7116        reg = read_csr(dd, addr);
7117        reg &= ~SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SMASK;
7118        reg |= (u64)limit << SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT;
7119        write_csr(dd, addr, reg);
7120}
7121
7122/* set the given per-VL dedicated limit */
7123static void set_vl_dedicated(struct hfi1_devdata *dd, int vl, u16 limit)
7124{
7125        u64 reg;
7126        u32 addr;
7127
7128        if (vl < TXE_NUM_DATA_VL)
7129                addr = SEND_CM_CREDIT_VL + (8 * vl);
7130        else
7131                addr = SEND_CM_CREDIT_VL15;
7132
7133        reg = read_csr(dd, addr);
7134        reg &= ~SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SMASK;
7135        reg |= (u64)limit << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT;
7136        write_csr(dd, addr, reg);
7137}
7138
7139/* spin until the given per-VL status mask bits clear */
7140static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask,
7141                                     const char *which)
7142{
7143        unsigned long timeout;
7144        u64 reg;
7145
7146        timeout = jiffies + msecs_to_jiffies(VL_STATUS_CLEAR_TIMEOUT);
7147        while (1) {
7148                reg = read_csr(dd, SEND_CM_CREDIT_USED_STATUS) & mask;
7149
7150                if (reg == 0)
7151                        return; /* success */
7152                if (time_after(jiffies, timeout))
7153                        break;          /* timed out */
7154                udelay(1);
7155        }
7156
7157        dd_dev_err(dd,
7158                "%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
7159                which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
7160        /*
7161         * If this occurs, it is likely there was a credit loss on the link.
7162         * The only recovery from that is a link bounce.
7163         */
7164        dd_dev_err(dd,
7165                "Continuing anyway.  A credit loss may occur.  Suggest a link bounce\n");
7166}
7167
7168/*
7169 * The number of credits on the VLs may be changed while everything
7170 * is "live", but the following algorithm must be followed due to
7171 * how the hardware is actually implemented.  In particular,
7172 * Return_Credit_Status[] is the only correct status check.
7173 *
7174 * if (reducing Global_Shared_Credit_Limit or any shared limit changing)
7175 *     set Global_Shared_Credit_Limit = 0
7176 *     use_all_vl = 1
7177 * mask0 = all VLs that are changing either dedicated or shared limits
7178 * set Shared_Limit[mask0] = 0
7179 * spin until Return_Credit_Status[use_all_vl ? all VL : mask0] == 0
7180 * if (changing any dedicated limit)
7181 *     mask1 = all VLs that are lowering dedicated limits
7182 *     lower Dedicated_Limit[mask1]
7183 *     spin until Return_Credit_Status[mask1] == 0
7184 *     raise Dedicated_Limits
7185 * raise Shared_Limits
7186 * raise Global_Shared_Credit_Limit
7187 *
7188 * lower = if the new limit is lower, set the limit to the new value
7189 * raise = if the new limit is higher than the current value (may be changed
7190 *      earlier in the algorithm), set the new limit to the new value
7191 */
7192static int set_buffer_control(struct hfi1_devdata *dd,
7193                              struct buffer_control *new_bc)
7194{
7195        u64 changing_mask, ld_mask, stat_mask;
7196        int change_count;
7197        int i, use_all_mask;
7198        int this_shared_changing;
7199        /*
7200         * A0: add the variable any_shared_limit_changing below and in the
7201         * algorithm above.  If removing A0 support, it can be removed.
7202         */
7203        int any_shared_limit_changing;
7204        struct buffer_control cur_bc;
7205        u8 changing[OPA_MAX_VLS];
7206        u8 lowering_dedicated[OPA_MAX_VLS];
7207        u16 cur_total;
7208        u32 new_total = 0;
7209        const u64 all_mask =
7210        SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK
7211         | SEND_CM_CREDIT_USED_STATUS_VL1_RETURN_CREDIT_STATUS_SMASK
7212         | SEND_CM_CREDIT_USED_STATUS_VL2_RETURN_CREDIT_STATUS_SMASK
7213         | SEND_CM_CREDIT_USED_STATUS_VL3_RETURN_CREDIT_STATUS_SMASK
7214         | SEND_CM_CREDIT_USED_STATUS_VL4_RETURN_CREDIT_STATUS_SMASK
7215         | SEND_CM_CREDIT_USED_STATUS_VL5_RETURN_CREDIT_STATUS_SMASK
7216         | SEND_CM_CREDIT_USED_STATUS_VL6_RETURN_CREDIT_STATUS_SMASK
7217         | SEND_CM_CREDIT_USED_STATUS_VL7_RETURN_CREDIT_STATUS_SMASK
7218         | SEND_CM_CREDIT_USED_STATUS_VL15_RETURN_CREDIT_STATUS_SMASK;
7219
7220#define valid_vl(idx) ((idx) < TXE_NUM_DATA_VL || (idx) == 15)
7221#define NUM_USABLE_VLS 16       /* look at VL15 and less */
7222
7223
7224        /* find the new total credits, do sanity check on unused VLs */
7225        for (i = 0; i < OPA_MAX_VLS; i++) {
7226                if (valid_vl(i)) {
7227                        new_total += be16_to_cpu(new_bc->vl[i].dedicated);
7228                        continue;
7229                }
7230                nonzero_msg(dd, i, "dedicated",
7231                        be16_to_cpu(new_bc->vl[i].dedicated));
7232                nonzero_msg(dd, i, "shared",
7233                        be16_to_cpu(new_bc->vl[i].shared));
7234                new_bc->vl[i].dedicated = 0;
7235                new_bc->vl[i].shared = 0;
7236        }
7237        new_total += be16_to_cpu(new_bc->overall_shared_limit);
7238        if (new_total > (u32)dd->link_credits)
7239                return -EINVAL;
7240        /* fetch the current values */
7241        get_buffer_control(dd, &cur_bc, &cur_total);
7242
7243        /*
7244         * Create the masks we will use.
7245         */
7246        memset(changing, 0, sizeof(changing));
7247        memset(lowering_dedicated, 0, sizeof(lowering_dedicated));
7248        /* NOTE: Assumes that the individual VL bits are adjacent and in
7249           increasing order */
7250        stat_mask =
7251                SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK;
7252        changing_mask = 0;
7253        ld_mask = 0;
7254        change_count = 0;
7255        any_shared_limit_changing = 0;
7256        for (i = 0; i < NUM_USABLE_VLS; i++, stat_mask <<= 1) {
7257                if (!valid_vl(i))
7258                        continue;
7259                this_shared_changing = new_bc->vl[i].shared
7260                                                != cur_bc.vl[i].shared;
7261                if (this_shared_changing)
7262                        any_shared_limit_changing = 1;
7263                if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated
7264                                || this_shared_changing) {
7265                        changing[i] = 1;
7266                        changing_mask |= stat_mask;
7267                        change_count++;
7268                }
7269                if (be16_to_cpu(new_bc->vl[i].dedicated) <
7270                                        be16_to_cpu(cur_bc.vl[i].dedicated)) {
7271                        lowering_dedicated[i] = 1;
7272                        ld_mask |= stat_mask;
7273                }
7274        }
7275
7276        /* bracket the credit change with a total adjustment */
7277        if (new_total > cur_total)
7278                set_global_limit(dd, new_total);
7279
7280        /*
7281         * Start the credit change algorithm.
7282         */
7283        use_all_mask = 0;
7284        if ((be16_to_cpu(new_bc->overall_shared_limit) <
7285                                be16_to_cpu(cur_bc.overall_shared_limit))
7286                        || (is_a0(dd) && any_shared_limit_changing)) {
7287                set_global_shared(dd, 0);
7288                cur_bc.overall_shared_limit = 0;
7289                use_all_mask = 1;
7290        }
7291
7292        for (i = 0; i < NUM_USABLE_VLS; i++) {
7293                if (!valid_vl(i))
7294                        continue;
7295
7296                if (changing[i]) {
7297                        set_vl_shared(dd, i, 0);
7298                        cur_bc.vl[i].shared = 0;
7299                }
7300        }
7301
7302        wait_for_vl_status_clear(dd, use_all_mask ? all_mask : changing_mask,
7303                "shared");
7304
7305        if (change_count > 0) {
7306                for (i = 0; i < NUM_USABLE_VLS; i++) {
7307                        if (!valid_vl(i))
7308                                continue;
7309
7310                        if (lowering_dedicated[i]) {
7311                                set_vl_dedicated(dd, i,
7312                                        be16_to_cpu(new_bc->vl[i].dedicated));
7313                                cur_bc.vl[i].dedicated =
7314                                                new_bc->vl[i].dedicated;
7315                        }
7316                }
7317
7318                wait_for_vl_status_clear(dd, ld_mask, "dedicated");
7319
7320                /* now raise all dedicated that are going up */
7321                for (i = 0; i < NUM_USABLE_VLS; i++) {
7322                        if (!valid_vl(i))
7323                                continue;
7324
7325                        if (be16_to_cpu(new_bc->vl[i].dedicated) >
7326                                        be16_to_cpu(cur_bc.vl[i].dedicated))
7327                                set_vl_dedicated(dd, i,
7328                                        be16_to_cpu(new_bc->vl[i].dedicated));
7329                }
7330        }
7331
7332        /* next raise all shared that are going up */
7333        for (i = 0; i < NUM_USABLE_VLS; i++) {
7334                if (!valid_vl(i))
7335                        continue;
7336
7337                if (be16_to_cpu(new_bc->vl[i].shared) >
7338                                be16_to_cpu(cur_bc.vl[i].shared))
7339                        set_vl_shared(dd, i, be16_to_cpu(new_bc->vl[i].shared));
7340        }
7341
7342        /* finally raise the global shared */
7343        if (be16_to_cpu(new_bc->overall_shared_limit) >
7344                        be16_to_cpu(cur_bc.overall_shared_limit))
7345                set_global_shared(dd,
7346                        be16_to_cpu(new_bc->overall_shared_limit));
7347
7348        /* bracket the credit change with a total adjustment */
7349        if (new_total < cur_total)
7350                set_global_limit(dd, new_total);
7351        return 0;
7352}
7353
7354/*
7355 * Read the given fabric manager table. Return the size of the
7356 * table (in bytes) on success, and a negative error code on
7357 * failure.
7358 */
7359int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t)
7360
7361{
7362        int size;
7363        struct vl_arb_cache *vlc;
7364
7365        switch (which) {
7366        case FM_TBL_VL_HIGH_ARB:
7367                size = 256;
7368                /*
7369                 * OPA specifies 128 elements (of 2 bytes each), though
7370                 * HFI supports only 16 elements in h/w.
7371                 */
7372                vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7373                vl_arb_get_cache(vlc, t);
7374                vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7375                break;
7376        case FM_TBL_VL_LOW_ARB:
7377                size = 256;
7378                /*
7379                 * OPA specifies 128 elements (of 2 bytes each), though
7380                 * HFI supports only 16 elements in h/w.
7381                 */
7382                vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7383                vl_arb_get_cache(vlc, t);
7384                vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7385                break;
7386        case FM_TBL_BUFFER_CONTROL:
7387                size = get_buffer_control(ppd->dd, t, NULL);
7388                break;
7389        case FM_TBL_SC2VLNT:
7390                size = get_sc2vlnt(ppd->dd, t);
7391                break;
7392        case FM_TBL_VL_PREEMPT_ELEMS:
7393                size = 256;
7394                /* OPA specifies 128 elements, of 2 bytes each */
7395                get_vlarb_preempt(ppd->dd, OPA_MAX_VLS, t);
7396                break;
7397        case FM_TBL_VL_PREEMPT_MATRIX:
7398                size = 256;
7399                /*
7400                 * OPA specifies that this is the same size as the VL
7401                 * arbitration tables (i.e., 256 bytes).
7402                 */
7403                break;
7404        default:
7405                return -EINVAL;
7406        }
7407        return size;
7408}
7409
7410/*
7411 * Write the given fabric manager table.
7412 */
7413int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t)
7414{
7415        int ret = 0;
7416        struct vl_arb_cache *vlc;
7417
7418        switch (which) {
7419        case FM_TBL_VL_HIGH_ARB:
7420                vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7421                if (vl_arb_match_cache(vlc, t)) {
7422                        vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7423                        break;
7424                }
7425                vl_arb_set_cache(vlc, t);
7426                vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7427                ret = set_vl_weights(ppd, SEND_HIGH_PRIORITY_LIST,
7428                                     VL_ARB_HIGH_PRIO_TABLE_SIZE, t);
7429                break;
7430        case FM_TBL_VL_LOW_ARB:
7431                vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7432                if (vl_arb_match_cache(vlc, t)) {
7433                        vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7434                        break;
7435                }
7436                vl_arb_set_cache(vlc, t);
7437                vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7438                ret = set_vl_weights(ppd, SEND_LOW_PRIORITY_LIST,
7439                                     VL_ARB_LOW_PRIO_TABLE_SIZE, t);
7440                break;
7441        case FM_TBL_BUFFER_CONTROL:
7442                ret = set_buffer_control(ppd->dd, t);
7443                break;
7444        case FM_TBL_SC2VLNT:
7445                set_sc2vlnt(ppd->dd, t);
7446                break;
7447        default:
7448                ret = -EINVAL;
7449        }
7450        return ret;
7451}
7452
7453/*
7454 * Disable all data VLs.
7455 *
7456 * Return 0 if disabled, non-zero if the VLs cannot be disabled.
7457 */
7458static int disable_data_vls(struct hfi1_devdata *dd)
7459{
7460        if (is_a0(dd))
7461                return 1;
7462
7463        pio_send_control(dd, PSC_DATA_VL_DISABLE);
7464
7465        return 0;
7466}
7467
7468/*
7469 * open_fill_data_vls() - the counterpart to stop_drain_data_vls().
7470 * Just re-enables all data VLs (the "fill" part happens
7471 * automatically - the name was chosen for symmetry with
7472 * stop_drain_data_vls()).
7473 *
7474 * Return 0 if successful, non-zero if the VLs cannot be enabled.
7475 */
7476int open_fill_data_vls(struct hfi1_devdata *dd)
7477{
7478        if (is_a0(dd))
7479                return 1;
7480
7481        pio_send_control(dd, PSC_DATA_VL_ENABLE);
7482
7483        return 0;
7484}
7485
7486/*
7487 * drain_data_vls() - assumes that disable_data_vls() has been called,
7488 * wait for occupancy (of per-VL FIFOs) for all contexts, and SDMA
7489 * engines to drop to 0.
7490 */
7491static void drain_data_vls(struct hfi1_devdata *dd)
7492{
7493        sc_wait(dd);
7494        sdma_wait(dd);
7495        pause_for_credit_return(dd);
7496}
7497
7498/*
7499 * stop_drain_data_vls() - disable, then drain all per-VL fifos.
7500 *
7501 * Use open_fill_data_vls() to resume using data VLs.  This pair is
7502 * meant to be used like this:
7503 *
7504 * stop_drain_data_vls(dd);
7505 * // do things with per-VL resources
7506 * open_fill_data_vls(dd);
7507 */
7508int stop_drain_data_vls(struct hfi1_devdata *dd)
7509{
7510        int ret;
7511
7512        ret = disable_data_vls(dd);
7513        if (ret == 0)
7514                drain_data_vls(dd);
7515
7516        return ret;
7517}
7518
7519/*
7520 * Convert a nanosecond time to a cclock count.  No matter how slow
7521 * the cclock, a non-zero ns will always have a non-zero result.
7522 */
7523u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns)
7524{
7525        u32 cclocks;
7526
7527        if (dd->icode == ICODE_FPGA_EMULATION)
7528                cclocks = (ns * 1000) / FPGA_CCLOCK_PS;
7529        else  /* simulation pretends to be ASIC */
7530                cclocks = (ns * 1000) / ASIC_CCLOCK_PS;
7531        if (ns && !cclocks)     /* if ns nonzero, must be at least 1 */
7532                cclocks = 1;
7533        return cclocks;
7534}
7535
7536/*
7537 * Convert a cclock count to nanoseconds. Not matter how slow
7538 * the cclock, a non-zero cclocks will always have a non-zero result.
7539 */
7540u32 cclock_to_ns(struct hfi1_devdata *dd, u32 cclocks)
7541{
7542        u32 ns;
7543
7544        if (dd->icode == ICODE_FPGA_EMULATION)
7545                ns = (cclocks * FPGA_CCLOCK_PS) / 1000;
7546        else  /* simulation pretends to be ASIC */
7547                ns = (cclocks * ASIC_CCLOCK_PS) / 1000;
7548        if (cclocks && !ns)
7549                ns = 1;
7550        return ns;
7551}
7552
7553/*
7554 * Dynamically adjust the receive interrupt timeout for a context based on
7555 * incoming packet rate.
7556 *
7557 * NOTE: Dynamic adjustment does not allow rcv_intr_count to be zero.
7558 */
7559static void adjust_rcv_timeout(struct hfi1_ctxtdata *rcd, u32 npkts)
7560{
7561        struct hfi1_devdata *dd = rcd->dd;
7562        u32 timeout = rcd->rcvavail_timeout;
7563
7564        /*
7565         * This algorithm doubles or halves the timeout depending on whether
7566         * the number of packets received in this interrupt were less than or
7567         * greater equal the interrupt count.
7568         *
7569         * The calculations below do not allow a steady state to be achieved.
7570         * Only at the endpoints it is possible to have an unchanging
7571         * timeout.
7572         */
7573        if (npkts < rcv_intr_count) {
7574                /*
7575                 * Not enough packets arrived before the timeout, adjust
7576                 * timeout downward.
7577                 */
7578                if (timeout < 2) /* already at minimum? */
7579                        return;
7580                timeout >>= 1;
7581        } else {
7582                /*
7583                 * More than enough packets arrived before the timeout, adjust
7584                 * timeout upward.
7585                 */
7586                if (timeout >= dd->rcv_intr_timeout_csr) /* already at max? */
7587                        return;
7588                timeout = min(timeout << 1, dd->rcv_intr_timeout_csr);
7589        }
7590
7591        rcd->rcvavail_timeout = timeout;
7592        /* timeout cannot be larger than rcv_intr_timeout_csr which has already
7593           been verified to be in range */
7594        write_kctxt_csr(dd, rcd->ctxt, RCV_AVAIL_TIME_OUT,
7595                (u64)timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7596}
7597
7598void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
7599                    u32 intr_adjust, u32 npkts)
7600{
7601        struct hfi1_devdata *dd = rcd->dd;
7602        u64 reg;
7603        u32 ctxt = rcd->ctxt;
7604
7605        /*
7606         * Need to write timeout register before updating RcvHdrHead to ensure
7607         * that a new value is used when the HW decides to restart counting.
7608         */
7609        if (intr_adjust)
7610                adjust_rcv_timeout(rcd, npkts);
7611        if (updegr) {
7612                reg = (egrhd & RCV_EGR_INDEX_HEAD_HEAD_MASK)
7613                        << RCV_EGR_INDEX_HEAD_HEAD_SHIFT;
7614                write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, reg);
7615        }
7616        mmiowb();
7617        reg = ((u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT) |
7618                (((u64)hd & RCV_HDR_HEAD_HEAD_MASK)
7619                        << RCV_HDR_HEAD_HEAD_SHIFT);
7620        write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7621        mmiowb();
7622}
7623
7624u32 hdrqempty(struct hfi1_ctxtdata *rcd)
7625{
7626        u32 head, tail;
7627
7628        head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD)
7629                & RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT;
7630
7631        if (rcd->rcvhdrtail_kvaddr)
7632                tail = get_rcvhdrtail(rcd);
7633        else
7634                tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
7635
7636        return head == tail;
7637}
7638
7639/*
7640 * Context Control and Receive Array encoding for buffer size:
7641 *      0x0 invalid
7642 *      0x1   4 KB
7643 *      0x2   8 KB
7644 *      0x3  16 KB
7645 *      0x4  32 KB
7646 *      0x5  64 KB
7647 *      0x6 128 KB
7648 *      0x7 256 KB
7649 *      0x8 512 KB (Receive Array only)
7650 *      0x9   1 MB (Receive Array only)
7651 *      0xa   2 MB (Receive Array only)
7652 *
7653 *      0xB-0xF - reserved (Receive Array only)
7654 *
7655 *
7656 * This routine assumes that the value has already been sanity checked.
7657 */
7658static u32 encoded_size(u32 size)
7659{
7660        switch (size) {
7661        case   4*1024: return 0x1;
7662        case   8*1024: return 0x2;
7663        case  16*1024: return 0x3;
7664        case  32*1024: return 0x4;
7665        case  64*1024: return 0x5;
7666        case 128*1024: return 0x6;
7667        case 256*1024: return 0x7;
7668        case 512*1024: return 0x8;
7669        case   1*1024*1024: return 0x9;
7670        case   2*1024*1024: return 0xa;
7671        }
7672        return 0x1;     /* if invalid, go with the minimum size */
7673}
7674
7675void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
7676{
7677        struct hfi1_ctxtdata *rcd;
7678        u64 rcvctrl, reg;
7679        int did_enable = 0;
7680
7681        rcd = dd->rcd[ctxt];
7682        if (!rcd)
7683                return;
7684
7685        hfi1_cdbg(RCVCTRL, "ctxt %d op 0x%x", ctxt, op);
7686
7687        rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL);
7688        /* if the context already enabled, don't do the extra steps */
7689        if ((op & HFI1_RCVCTRL_CTXT_ENB)
7690                        && !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
7691                /* reset the tail and hdr addresses, and sequence count */
7692                write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
7693                                rcd->rcvhdrq_phys);
7694                if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
7695                        write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
7696                                        rcd->rcvhdrqtailaddr_phys);
7697                rcd->seq_cnt = 1;
7698
7699                /* reset the cached receive header queue head value */
7700                rcd->head = 0;
7701
7702                /*
7703                 * Zero the receive header queue so we don't get false
7704                 * positives when checking the sequence number.  The
7705                 * sequence numbers could land exactly on the same spot.
7706                 * E.g. a rcd restart before the receive header wrapped.
7707                 */
7708                memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size);
7709
7710                /* starting timeout */
7711                rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr;
7712
7713                /* enable the context */
7714                rcvctrl |= RCV_CTXT_CTRL_ENABLE_SMASK;
7715
7716                /* clean the egr buffer size first */
7717                rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7718                rcvctrl |= ((u64)encoded_size(rcd->egrbufs.rcvtid_size)
7719                                & RCV_CTXT_CTRL_EGR_BUF_SIZE_MASK)
7720                                        << RCV_CTXT_CTRL_EGR_BUF_SIZE_SHIFT;
7721
7722                /* zero RcvHdrHead - set RcvHdrHead.Counter after enable */
7723                write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0);
7724                did_enable = 1;
7725
7726                /* zero RcvEgrIndexHead */
7727                write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, 0);
7728
7729                /* set eager count and base index */
7730                reg = (((u64)(rcd->egrbufs.alloced >> RCV_SHIFT)
7731                        & RCV_EGR_CTRL_EGR_CNT_MASK)
7732                       << RCV_EGR_CTRL_EGR_CNT_SHIFT) |
7733                        (((rcd->eager_base >> RCV_SHIFT)
7734                          & RCV_EGR_CTRL_EGR_BASE_INDEX_MASK)
7735                         << RCV_EGR_CTRL_EGR_BASE_INDEX_SHIFT);
7736                write_kctxt_csr(dd, ctxt, RCV_EGR_CTRL, reg);
7737
7738                /*
7739                 * Set TID (expected) count and base index.
7740                 * rcd->expected_count is set to individual RcvArray entries,
7741                 * not pairs, and the CSR takes a pair-count in groups of
7742                 * four, so divide by 8.
7743                 */
7744                reg = (((rcd->expected_count >> RCV_SHIFT)
7745                                        & RCV_TID_CTRL_TID_PAIR_CNT_MASK)
7746                                << RCV_TID_CTRL_TID_PAIR_CNT_SHIFT) |
7747                      (((rcd->expected_base >> RCV_SHIFT)
7748                                        & RCV_TID_CTRL_TID_BASE_INDEX_MASK)
7749                                << RCV_TID_CTRL_TID_BASE_INDEX_SHIFT);
7750                write_kctxt_csr(dd, ctxt, RCV_TID_CTRL, reg);
7751                if (ctxt == VL15CTXT)
7752                        write_csr(dd, RCV_VL15, VL15CTXT);
7753        }
7754        if (op & HFI1_RCVCTRL_CTXT_DIS) {
7755                write_csr(dd, RCV_VL15, 0);
7756                rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
7757        }
7758        if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
7759                rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7760        if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
7761                rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7762        if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
7763                rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7764        if (op & HFI1_RCVCTRL_TAILUPD_DIS)
7765                rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7766        if (op & HFI1_RCVCTRL_TIDFLOW_ENB)
7767                rcvctrl |= RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7768        if (op & HFI1_RCVCTRL_TIDFLOW_DIS)
7769                rcvctrl &= ~RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7770        if (op & HFI1_RCVCTRL_ONE_PKT_EGR_ENB) {
7771                /* In one-packet-per-eager mode, the size comes from
7772                   the RcvArray entry. */
7773                rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7774                rcvctrl |= RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7775        }
7776        if (op & HFI1_RCVCTRL_ONE_PKT_EGR_DIS)
7777                rcvctrl &= ~RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7778        if (op & HFI1_RCVCTRL_NO_RHQ_DROP_ENB)
7779                rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7780        if (op & HFI1_RCVCTRL_NO_RHQ_DROP_DIS)
7781                rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7782        if (op & HFI1_RCVCTRL_NO_EGR_DROP_ENB)
7783                rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7784        if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
7785                rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7786        rcd->rcvctrl = rcvctrl;
7787        hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
7788        write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl);
7789
7790        /* work around sticky RcvCtxtStatus.BlockedRHQFull */
7791        if (did_enable
7792            && (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
7793                reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7794                if (reg != 0) {
7795                        dd_dev_info(dd, "ctxt %d status %lld (blocked)\n",
7796                                ctxt, reg);
7797                        read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7798                        write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x10);
7799                        write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x00);
7800                        read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7801                        reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7802                        dd_dev_info(dd, "ctxt %d status %lld (%s blocked)\n",
7803                                ctxt, reg, reg == 0 ? "not" : "still");
7804                }
7805        }
7806
7807        if (did_enable) {
7808                /*
7809                 * The interrupt timeout and count must be set after
7810                 * the context is enabled to take effect.
7811                 */
7812                /* set interrupt timeout */
7813                write_kctxt_csr(dd, ctxt, RCV_AVAIL_TIME_OUT,
7814                        (u64)rcd->rcvavail_timeout <<
7815                                RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7816
7817                /* set RcvHdrHead.Counter, zero RcvHdrHead.Head (again) */
7818                reg = (u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT;
7819                write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7820        }
7821
7822        if (op & (HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_CTXT_DIS))
7823                /*
7824                 * If the context has been disabled and the Tail Update has
7825                 * been cleared, clear the RCV_HDR_TAIL_ADDR CSR so
7826                 * it doesn't contain an address that is invalid.
7827                 */
7828                write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, 0);
7829}
7830
7831u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
7832                    u64 **cntrp)
7833{
7834        int ret;
7835        u64 val = 0;
7836
7837        if (namep) {
7838                ret = dd->cntrnameslen;
7839                if (pos != 0) {
7840                        dd_dev_err(dd, "read_cntrs does not support indexing");
7841                        return 0;
7842                }
7843                *namep = dd->cntrnames;
7844        } else {
7845                const struct cntr_entry *entry;
7846                int i, j;
7847
7848                ret = (dd->ndevcntrs) * sizeof(u64);
7849                if (pos != 0) {
7850                        dd_dev_err(dd, "read_cntrs does not support indexing");
7851                        return 0;
7852                }
7853
7854                /* Get the start of the block of counters */
7855                *cntrp = dd->cntrs;
7856
7857                /*
7858                 * Now go and fill in each counter in the block.
7859                 */
7860                for (i = 0; i < DEV_CNTR_LAST; i++) {
7861                        entry = &dev_cntrs[i];
7862                        hfi1_cdbg(CNTR, "reading %s", entry->name);
7863                        if (entry->flags & CNTR_DISABLED) {
7864                                /* Nothing */
7865                                hfi1_cdbg(CNTR, "\tDisabled\n");
7866                        } else {
7867                                if (entry->flags & CNTR_VL) {
7868                                        hfi1_cdbg(CNTR, "\tPer VL\n");
7869                                        for (j = 0; j < C_VL_COUNT; j++) {
7870                                                val = entry->rw_cntr(entry,
7871                                                                  dd, j,
7872                                                                  CNTR_MODE_R,
7873                                                                  0);
7874                                                hfi1_cdbg(
7875                                                   CNTR,
7876                                                   "\t\tRead 0x%llx for %d\n",
7877                                                   val, j);
7878                                                dd->cntrs[entry->offset + j] =
7879                                                                            val;
7880                                        }
7881                                } else {
7882                                        val = entry->rw_cntr(entry, dd,
7883                                                        CNTR_INVALID_VL,
7884                                                        CNTR_MODE_R, 0);
7885                                        dd->cntrs[entry->offset] = val;
7886                                        hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
7887                                }
7888                        }
7889                }
7890        }
7891        return ret;
7892}
7893
7894/*
7895 * Used by sysfs to create files for hfi stats to read
7896 */
7897u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
7898                        char **namep, u64 **cntrp)
7899{
7900        int ret;
7901        u64 val = 0;
7902
7903        if (namep) {
7904                ret = dd->portcntrnameslen;
7905                if (pos != 0) {
7906                        dd_dev_err(dd, "index not supported");
7907                        return 0;
7908                }
7909                *namep = dd->portcntrnames;
7910        } else {
7911                const struct cntr_entry *entry;
7912                struct hfi1_pportdata *ppd;
7913                int i, j;
7914
7915                ret = (dd->nportcntrs) * sizeof(u64);
7916                if (pos != 0) {
7917                        dd_dev_err(dd, "indexing not supported");
7918                        return 0;
7919                }
7920                ppd = (struct hfi1_pportdata *)(dd + 1 + port);
7921                *cntrp = ppd->cntrs;
7922
7923                for (i = 0; i < PORT_CNTR_LAST; i++) {
7924                        entry = &port_cntrs[i];
7925                        hfi1_cdbg(CNTR, "reading %s", entry->name);
7926                        if (entry->flags & CNTR_DISABLED) {
7927                                /* Nothing */
7928                                hfi1_cdbg(CNTR, "\tDisabled\n");
7929                                continue;
7930                        }
7931
7932                        if (entry->flags & CNTR_VL) {
7933                                hfi1_cdbg(CNTR, "\tPer VL");
7934                                for (j = 0; j < C_VL_COUNT; j++) {
7935                                        val = entry->rw_cntr(entry, ppd, j,
7936                                                               CNTR_MODE_R,
7937                                                               0);
7938                                        hfi1_cdbg(
7939                                           CNTR,
7940                                           "\t\tRead 0x%llx for %d",
7941                                           val, j);
7942                                        ppd->cntrs[entry->offset + j] = val;
7943                                }
7944                        } else {
7945                                val = entry->rw_cntr(entry, ppd,
7946                                                       CNTR_INVALID_VL,
7947                                                       CNTR_MODE_R,
7948                                                       0);
7949                                ppd->cntrs[entry->offset] = val;
7950                                hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
7951                        }
7952                }
7953        }
7954        return ret;
7955}
7956
7957static void free_cntrs(struct hfi1_devdata *dd)
7958{
7959        struct hfi1_pportdata *ppd;
7960        int i;
7961
7962        if (dd->synth_stats_timer.data)
7963                del_timer_sync(&dd->synth_stats_timer);
7964        dd->synth_stats_timer.data = 0;
7965        ppd = (struct hfi1_pportdata *)(dd + 1);
7966        for (i = 0; i < dd->num_pports; i++, ppd++) {
7967                kfree(ppd->cntrs);
7968                kfree(ppd->scntrs);
7969                free_percpu(ppd->ibport_data.rc_acks);
7970                free_percpu(ppd->ibport_data.rc_qacks);
7971                free_percpu(ppd->ibport_data.rc_delayed_comp);
7972                ppd->cntrs = NULL;
7973                ppd->scntrs = NULL;
7974                ppd->ibport_data.rc_acks = NULL;
7975                ppd->ibport_data.rc_qacks = NULL;
7976                ppd->ibport_data.rc_delayed_comp = NULL;
7977        }
7978        kfree(dd->portcntrnames);
7979        dd->portcntrnames = NULL;
7980        kfree(dd->cntrs);
7981        dd->cntrs = NULL;
7982        kfree(dd->scntrs);
7983        dd->scntrs = NULL;
7984        kfree(dd->cntrnames);
7985        dd->cntrnames = NULL;
7986}
7987
7988#define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL
7989#define CNTR_32BIT_MAX 0x00000000FFFFFFFF
7990
7991static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry,
7992                              u64 *psval, void *context, int vl)
7993{
7994        u64 val;
7995        u64 sval = *psval;
7996
7997        if (entry->flags & CNTR_DISABLED) {
7998                dd_dev_err(dd, "Counter %s not enabled", entry->name);
7999                return 0;
8000        }
8001
8002        hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
8003
8004        val = entry->rw_cntr(entry, context, vl, CNTR_MODE_R, 0);
8005
8006        /* If its a synthetic counter there is more work we need to do */
8007        if (entry->flags & CNTR_SYNTH) {
8008                if (sval == CNTR_MAX) {
8009                        /* No need to read already saturated */
8010                        return CNTR_MAX;
8011                }
8012
8013                if (entry->flags & CNTR_32BIT) {
8014                        /* 32bit counters can wrap multiple times */
8015                        u64 upper = sval >> 32;
8016                        u64 lower = (sval << 32) >> 32;
8017
8018                        if (lower > val) { /* hw wrapped */
8019                                if (upper == CNTR_32BIT_MAX)
8020                                        val = CNTR_MAX;
8021                                else
8022                                        upper++;
8023                        }
8024
8025                        if (val != CNTR_MAX)
8026                                val = (upper << 32) | val;
8027
8028                } else {
8029                        /* If we rolled we are saturated */
8030                        if ((val < sval) || (val > CNTR_MAX))
8031                                val = CNTR_MAX;
8032                }
8033        }
8034
8035        *psval = val;
8036
8037        hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
8038
8039        return val;
8040}
8041
8042static u64 write_dev_port_cntr(struct hfi1_devdata *dd,
8043                               struct cntr_entry *entry,
8044                               u64 *psval, void *context, int vl, u64 data)
8045{
8046        u64 val;
8047
8048        if (entry->flags & CNTR_DISABLED) {
8049                dd_dev_err(dd, "Counter %s not enabled", entry->name);
8050                return 0;
8051        }
8052
8053        hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
8054
8055        if (entry->flags & CNTR_SYNTH) {
8056                *psval = data;
8057                if (entry->flags & CNTR_32BIT) {
8058                        val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
8059                                             (data << 32) >> 32);
8060                        val = data; /* return the full 64bit value */
8061                } else {
8062                        val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
8063                                             data);
8064                }
8065        } else {
8066                val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W, data);
8067        }
8068
8069        *psval = val;
8070
8071        hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
8072
8073        return val;
8074}
8075
8076u64 read_dev_cntr(struct hfi1_devdata *dd, int index, int vl)
8077{
8078        struct cntr_entry *entry;
8079        u64 *sval;
8080
8081        entry = &dev_cntrs[index];
8082        sval = dd->scntrs + entry->offset;
8083
8084        if (vl != CNTR_INVALID_VL)
8085                sval += vl;
8086
8087        return read_dev_port_cntr(dd, entry, sval, dd, vl);
8088}
8089
8090u64 write_dev_cntr(struct hfi1_devdata *dd, int index, int vl, u64 data)
8091{
8092        struct cntr_entry *entry;
8093        u64 *sval;
8094
8095        entry = &dev_cntrs[index];
8096        sval = dd->scntrs + entry->offset;
8097
8098        if (vl != CNTR_INVALID_VL)
8099                sval += vl;
8100
8101        return write_dev_port_cntr(dd, entry, sval, dd, vl, data);
8102}
8103
8104u64 read_port_cntr(struct hfi1_pportdata *ppd, int index, int vl)
8105{
8106        struct cntr_entry *entry;
8107        u64 *sval;
8108
8109        entry = &port_cntrs[index];
8110        sval = ppd->scntrs + entry->offset;
8111
8112        if (vl != CNTR_INVALID_VL)
8113                sval += vl;
8114
8115        if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8116            (index <= C_RCV_HDR_OVF_LAST)) {
8117                /* We do not want to bother for disabled contexts */
8118                return 0;
8119        }
8120
8121        return read_dev_port_cntr(ppd->dd, entry, sval, ppd, vl);
8122}
8123
8124u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data)
8125{
8126        struct cntr_entry *entry;
8127        u64 *sval;
8128
8129        entry = &port_cntrs[index];
8130        sval = ppd->scntrs + entry->offset;
8131
8132        if (vl != CNTR_INVALID_VL)
8133                sval += vl;
8134
8135        if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8136            (index <= C_RCV_HDR_OVF_LAST)) {
8137                /* We do not want to bother for disabled contexts */
8138                return 0;
8139        }
8140
8141        return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data);
8142}
8143
8144static void update_synth_timer(unsigned long opaque)
8145{
8146        u64 cur_tx;
8147        u64 cur_rx;
8148        u64 total_flits;
8149        u8 update = 0;
8150        int i, j, vl;
8151        struct hfi1_pportdata *ppd;
8152        struct cntr_entry *entry;
8153
8154        struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
8155
8156        /*
8157         * Rather than keep beating on the CSRs pick a minimal set that we can
8158         * check to watch for potential roll over. We can do this by looking at
8159         * the number of flits sent/recv. If the total flits exceeds 32bits then
8160         * we have to iterate all the counters and update.
8161         */
8162        entry = &dev_cntrs[C_DC_RCV_FLITS];
8163        cur_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8164
8165        entry = &dev_cntrs[C_DC_XMIT_FLITS];
8166        cur_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8167
8168        hfi1_cdbg(
8169            CNTR,
8170            "[%d] curr tx=0x%llx rx=0x%llx :: last tx=0x%llx rx=0x%llx\n",
8171            dd->unit, cur_tx, cur_rx, dd->last_tx, dd->last_rx);
8172
8173        if ((cur_tx < dd->last_tx) || (cur_rx < dd->last_rx)) {
8174                /*
8175                 * May not be strictly necessary to update but it won't hurt and
8176                 * simplifies the logic here.
8177                 */
8178                update = 1;
8179                hfi1_cdbg(CNTR, "[%d] Tripwire counter rolled, updating",
8180                          dd->unit);
8181        } else {
8182                total_flits = (cur_tx - dd->last_tx) + (cur_rx - dd->last_rx);
8183                hfi1_cdbg(CNTR,
8184                          "[%d] total flits 0x%llx limit 0x%llx\n", dd->unit,
8185                          total_flits, (u64)CNTR_32BIT_MAX);
8186                if (total_flits >= CNTR_32BIT_MAX) {
8187                        hfi1_cdbg(CNTR, "[%d] 32bit limit hit, updating",
8188                                  dd->unit);
8189                        update = 1;
8190                }
8191        }
8192
8193        if (update) {
8194                hfi1_cdbg(CNTR, "[%d] Updating dd and ppd counters", dd->unit);
8195                for (i = 0; i < DEV_CNTR_LAST; i++) {
8196                        entry = &dev_cntrs[i];
8197                        if (entry->flags & CNTR_VL) {
8198                                for (vl = 0; vl < C_VL_COUNT; vl++)
8199                                        read_dev_cntr(dd, i, vl);
8200                        } else {
8201                                read_dev_cntr(dd, i, CNTR_INVALID_VL);
8202                        }
8203                }
8204                ppd = (struct hfi1_pportdata *)(dd + 1);
8205                for (i = 0; i < dd->num_pports; i++, ppd++) {
8206                        for (j = 0; j < PORT_CNTR_LAST; j++) {
8207                                entry = &port_cntrs[j];
8208                                if (entry->flags & CNTR_VL) {
8209                                        for (vl = 0; vl < C_VL_COUNT; vl++)
8210                                                read_port_cntr(ppd, j, vl);
8211                                } else {
8212                                        read_port_cntr(ppd, j, CNTR_INVALID_VL);
8213                                }
8214                        }
8215                }
8216
8217                /*
8218                 * We want the value in the register. The goal is to keep track
8219                 * of the number of "ticks" not the counter value. In other
8220                 * words if the register rolls we want to notice it and go ahead
8221                 * and force an update.
8222                 */
8223                entry = &dev_cntrs[C_DC_XMIT_FLITS];
8224                dd->last_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8225                                                CNTR_MODE_R, 0);
8226
8227                entry = &dev_cntrs[C_DC_RCV_FLITS];
8228                dd->last_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8229                                                CNTR_MODE_R, 0);
8230
8231                hfi1_cdbg(CNTR, "[%d] setting last tx/rx to 0x%llx 0x%llx",
8232                          dd->unit, dd->last_tx, dd->last_rx);
8233
8234        } else {
8235                hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit);
8236        }
8237
8238mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8239}
8240
8241#define C_MAX_NAME 13 /* 12 chars + one for /0 */
8242static int init_cntrs(struct hfi1_devdata *dd)
8243{
8244        int i, rcv_ctxts, index, j;
8245        size_t sz;
8246        char *p;
8247        char name[C_MAX_NAME];
8248        struct hfi1_pportdata *ppd;
8249
8250        /* set up the stats timer; the add_timer is done at the end */
8251        setup_timer(&dd->synth_stats_timer, update_synth_timer,
8252                    (unsigned long)dd);
8253
8254        /***********************/
8255        /* per device counters */
8256        /***********************/
8257
8258        /* size names and determine how many we have*/
8259        dd->ndevcntrs = 0;
8260        sz = 0;
8261        index = 0;
8262
8263        for (i = 0; i < DEV_CNTR_LAST; i++) {
8264                hfi1_dbg_early("Init cntr %s\n", dev_cntrs[i].name);
8265                if (dev_cntrs[i].flags & CNTR_DISABLED) {
8266                        hfi1_dbg_early("\tSkipping %s\n", dev_cntrs[i].name);
8267                        continue;
8268                }
8269
8270                if (dev_cntrs[i].flags & CNTR_VL) {
8271                        hfi1_dbg_early("\tProcessing VL cntr\n");
8272                        dev_cntrs[i].offset = index;
8273                        for (j = 0; j < C_VL_COUNT; j++) {
8274                                memset(name, '\0', C_MAX_NAME);
8275                                snprintf(name, C_MAX_NAME, "%s%d",
8276                                        dev_cntrs[i].name,
8277                                        vl_from_idx(j));
8278                                sz += strlen(name);
8279                                sz++;
8280                                hfi1_dbg_early("\t\t%s\n", name);
8281                                dd->ndevcntrs++;
8282                                index++;
8283                        }
8284                } else {
8285                        /* +1 for newline  */
8286                        sz += strlen(dev_cntrs[i].name) + 1;
8287                        dd->ndevcntrs++;
8288                        dev_cntrs[i].offset = index;
8289                        index++;
8290                        hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name);
8291                }
8292        }
8293
8294        /* allocate space for the counter values */
8295        dd->cntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8296        if (!dd->cntrs)
8297                goto bail;
8298
8299        dd->scntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8300        if (!dd->scntrs)
8301                goto bail;
8302
8303
8304        /* allocate space for the counter names */
8305        dd->cntrnameslen = sz;
8306        dd->cntrnames = kmalloc(sz, GFP_KERNEL);
8307        if (!dd->cntrnames)
8308                goto bail;
8309
8310        /* fill in the names */
8311        for (p = dd->cntrnames, i = 0, index = 0; i < DEV_CNTR_LAST; i++) {
8312                if (dev_cntrs[i].flags & CNTR_DISABLED) {
8313                        /* Nothing */
8314                } else {
8315                        if (dev_cntrs[i].flags & CNTR_VL) {
8316                                for (j = 0; j < C_VL_COUNT; j++) {
8317                                        memset(name, '\0', C_MAX_NAME);
8318                                        snprintf(name, C_MAX_NAME, "%s%d",
8319                                                dev_cntrs[i].name,
8320                                                vl_from_idx(j));
8321                                        memcpy(p, name, strlen(name));
8322                                        p += strlen(name);
8323                                        *p++ = '\n';
8324                                }
8325                        } else {
8326                                memcpy(p, dev_cntrs[i].name,
8327                                       strlen(dev_cntrs[i].name));
8328                                p += strlen(dev_cntrs[i].name);
8329                                *p++ = '\n';
8330                        }
8331                        index++;
8332                }
8333        }
8334
8335        /*********************/
8336        /* per port counters */
8337        /*********************/
8338
8339        /*
8340         * Go through the counters for the overflows and disable the ones we
8341         * don't need. This varies based on platform so we need to do it
8342         * dynamically here.
8343         */
8344        rcv_ctxts = dd->num_rcv_contexts;
8345        for (i = C_RCV_HDR_OVF_FIRST + rcv_ctxts;
8346             i <= C_RCV_HDR_OVF_LAST; i++) {
8347                port_cntrs[i].flags |= CNTR_DISABLED;
8348        }
8349
8350        /* size port counter names and determine how many we have*/
8351        sz = 0;
8352        dd->nportcntrs = 0;
8353        for (i = 0; i < PORT_CNTR_LAST; i++) {
8354                hfi1_dbg_early("Init pcntr %s\n", port_cntrs[i].name);
8355                if (port_cntrs[i].flags & CNTR_DISABLED) {
8356                        hfi1_dbg_early("\tSkipping %s\n", port_cntrs[i].name);
8357                        continue;
8358                }
8359
8360                if (port_cntrs[i].flags & CNTR_VL) {
8361                        hfi1_dbg_early("\tProcessing VL cntr\n");
8362                        port_cntrs[i].offset = dd->nportcntrs;
8363                        for (j = 0; j < C_VL_COUNT; j++) {
8364                                memset(name, '\0', C_MAX_NAME);
8365                                snprintf(name, C_MAX_NAME, "%s%d",
8366                                        port_cntrs[i].name,
8367                                        vl_from_idx(j));
8368                                sz += strlen(name);
8369                                sz++;
8370                                hfi1_dbg_early("\t\t%s\n", name);
8371                                dd->nportcntrs++;
8372                        }
8373                } else {
8374                        /* +1 for newline  */
8375                        sz += strlen(port_cntrs[i].name) + 1;
8376                        port_cntrs[i].offset = dd->nportcntrs;
8377                        dd->nportcntrs++;
8378                        hfi1_dbg_early("\tAdding %s\n", port_cntrs[i].name);
8379                }
8380        }
8381
8382        /* allocate space for the counter names */
8383        dd->portcntrnameslen = sz;
8384        dd->portcntrnames = kmalloc(sz, GFP_KERNEL);
8385        if (!dd->portcntrnames)
8386                goto bail;
8387
8388        /* fill in port cntr names */
8389        for (p = dd->portcntrnames, i = 0; i < PORT_CNTR_LAST; i++) {
8390                if (port_cntrs[i].flags & CNTR_DISABLED)
8391                        continue;
8392
8393                if (port_cntrs[i].flags & CNTR_VL) {
8394                        for (j = 0; j < C_VL_COUNT; j++) {
8395                                memset(name, '\0', C_MAX_NAME);
8396                                snprintf(name, C_MAX_NAME, "%s%d",
8397                                        port_cntrs[i].name,
8398                                        vl_from_idx(j));
8399                                memcpy(p, name, strlen(name));
8400                                p += strlen(name);
8401                                *p++ = '\n';
8402                        }
8403                } else {
8404                        memcpy(p, port_cntrs[i].name,
8405                               strlen(port_cntrs[i].name));
8406                        p += strlen(port_cntrs[i].name);
8407                        *p++ = '\n';
8408                }
8409        }
8410
8411        /* allocate per port storage for counter values */
8412        ppd = (struct hfi1_pportdata *)(dd + 1);
8413        for (i = 0; i < dd->num_pports; i++, ppd++) {
8414                ppd->cntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8415                if (!ppd->cntrs)
8416                        goto bail;
8417
8418                ppd->scntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8419                if (!ppd->scntrs)
8420                        goto bail;
8421        }
8422
8423        /* CPU counters need to be allocated and zeroed */
8424        if (init_cpu_counters(dd))
8425                goto bail;
8426
8427        mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8428        return 0;
8429bail:
8430        free_cntrs(dd);
8431        return -ENOMEM;
8432}
8433
8434
8435static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
8436{
8437        switch (chip_lstate) {
8438        default:
8439                dd_dev_err(dd,
8440                         "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
8441                         chip_lstate);
8442                /* fall through */
8443        case LSTATE_DOWN:
8444                return IB_PORT_DOWN;
8445        case LSTATE_INIT:
8446                return IB_PORT_INIT;
8447        case LSTATE_ARMED:
8448                return IB_PORT_ARMED;
8449        case LSTATE_ACTIVE:
8450                return IB_PORT_ACTIVE;
8451        }
8452}
8453
8454u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
8455{
8456        /* look at the HFI meta-states only */
8457        switch (chip_pstate & 0xf0) {
8458        default:
8459                dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
8460                        chip_pstate);
8461                /* fall through */
8462        case PLS_DISABLED:
8463                return IB_PORTPHYSSTATE_DISABLED;
8464        case PLS_OFFLINE:
8465                return OPA_PORTPHYSSTATE_OFFLINE;
8466        case PLS_POLLING:
8467                return IB_PORTPHYSSTATE_POLLING;
8468        case PLS_CONFIGPHY:
8469                return IB_PORTPHYSSTATE_TRAINING;
8470        case PLS_LINKUP:
8471                return IB_PORTPHYSSTATE_LINKUP;
8472        case PLS_PHYTEST:
8473                return IB_PORTPHYSSTATE_PHY_TEST;
8474        }
8475}
8476
8477/* return the OPA port logical state name */
8478const char *opa_lstate_name(u32 lstate)
8479{
8480        static const char * const port_logical_names[] = {
8481                "PORT_NOP",
8482                "PORT_DOWN",
8483                "PORT_INIT",
8484                "PORT_ARMED",
8485                "PORT_ACTIVE",
8486                "PORT_ACTIVE_DEFER",
8487        };
8488        if (lstate < ARRAY_SIZE(port_logical_names))
8489                return port_logical_names[lstate];
8490        return "unknown";
8491}
8492
8493/* return the OPA port physical state name */
8494const char *opa_pstate_name(u32 pstate)
8495{
8496        static const char * const port_physical_names[] = {
8497                "PHYS_NOP",
8498                "reserved1",
8499                "PHYS_POLL",
8500                "PHYS_DISABLED",
8501                "PHYS_TRAINING",
8502                "PHYS_LINKUP",
8503                "PHYS_LINK_ERR_RECOVER",
8504                "PHYS_PHY_TEST",
8505                "reserved8",
8506                "PHYS_OFFLINE",
8507                "PHYS_GANGED",
8508                "PHYS_TEST",
8509        };
8510        if (pstate < ARRAY_SIZE(port_physical_names))
8511                return port_physical_names[pstate];
8512        return "unknown";
8513}
8514
8515/*
8516 * Read the hardware link state and set the driver's cached value of it.
8517 * Return the (new) current value.
8518 */
8519u32 get_logical_state(struct hfi1_pportdata *ppd)
8520{
8521        u32 new_state;
8522
8523        new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd));
8524        if (new_state != ppd->lstate) {
8525                dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
8526                        opa_lstate_name(new_state), new_state);
8527                ppd->lstate = new_state;
8528        }
8529        /*
8530         * Set port status flags in the page mapped into userspace
8531         * memory. Do it here to ensure a reliable state - this is
8532         * the only function called by all state handling code.
8533         * Always set the flags due to the fact that the cache value
8534         * might have been changed explicitly outside of this
8535         * function.
8536         */
8537        if (ppd->statusp) {
8538                switch (ppd->lstate) {
8539                case IB_PORT_DOWN:
8540                case IB_PORT_INIT:
8541                        *ppd->statusp &= ~(HFI1_STATUS_IB_CONF |
8542                                           HFI1_STATUS_IB_READY);
8543                        break;
8544                case IB_PORT_ARMED:
8545                        *ppd->statusp |= HFI1_STATUS_IB_CONF;
8546                        break;
8547                case IB_PORT_ACTIVE:
8548                        *ppd->statusp |= HFI1_STATUS_IB_READY;
8549                        break;
8550                }
8551        }
8552        return ppd->lstate;
8553}
8554
8555/**
8556 * wait_logical_linkstate - wait for an IB link state change to occur
8557 * @ppd: port device
8558 * @state: the state to wait for
8559 * @msecs: the number of milliseconds to wait
8560 *
8561 * Wait up to msecs milliseconds for IB link state change to occur.
8562 * For now, take the easy polling route.
8563 * Returns 0 if state reached, otherwise -ETIMEDOUT.
8564 */
8565static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
8566                                  int msecs)
8567{
8568        unsigned long timeout;
8569
8570        timeout = jiffies + msecs_to_jiffies(msecs);
8571        while (1) {
8572                if (get_logical_state(ppd) == state)
8573                        return 0;
8574                if (time_after(jiffies, timeout))
8575                        break;
8576                msleep(20);
8577        }
8578        dd_dev_err(ppd->dd, "timeout waiting for link state 0x%x\n", state);
8579
8580        return -ETIMEDOUT;
8581}
8582
8583u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
8584{
8585        static u32 remembered_state = 0xff;
8586        u32 pstate;
8587        u32 ib_pstate;
8588
8589        pstate = read_physical_state(ppd->dd);
8590        ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
8591        if (remembered_state != ib_pstate) {
8592                dd_dev_info(ppd->dd,
8593                        "%s: physical state changed to %s (0x%x), phy 0x%x\n",
8594                        __func__, opa_pstate_name(ib_pstate), ib_pstate,
8595                        pstate);
8596                remembered_state = ib_pstate;
8597        }
8598        return ib_pstate;
8599}
8600
8601/*
8602 * Read/modify/write ASIC_QSFP register bits as selected by mask
8603 * data: 0 or 1 in the positions depending on what needs to be written
8604 * dir: 0 for read, 1 for write
8605 * mask: select by setting
8606 *      I2CCLK  (bit 0)
8607 *      I2CDATA (bit 1)
8608 */
8609u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir,
8610                  u32 mask)
8611{
8612        u64 qsfp_oe, target_oe;
8613
8614        target_oe = target ? ASIC_QSFP2_OE : ASIC_QSFP1_OE;
8615        if (mask) {
8616                /* We are writing register bits, so lock access */
8617                dir &= mask;
8618                data &= mask;
8619
8620                qsfp_oe = read_csr(dd, target_oe);
8621                qsfp_oe = (qsfp_oe & ~(u64)mask) | (u64)dir;
8622                write_csr(dd, target_oe, qsfp_oe);
8623        }
8624        /* We are exclusively reading bits here, but it is unlikely
8625         * we'll get valid data when we set the direction of the pin
8626         * in the same call, so read should call this function again
8627         * to get valid data
8628         */
8629        return read_csr(dd, target ? ASIC_QSFP2_IN : ASIC_QSFP1_IN);
8630}
8631
8632#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
8633(r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8634
8635#define SET_STATIC_RATE_CONTROL_SMASK(r) \
8636(r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8637
8638int hfi1_init_ctxt(struct send_context *sc)
8639{
8640        if (sc != NULL) {
8641                struct hfi1_devdata *dd = sc->dd;
8642                u64 reg;
8643                u8 set = (sc->type == SC_USER ?
8644                          HFI1_CAP_IS_USET(STATIC_RATE_CTRL) :
8645                          HFI1_CAP_IS_KSET(STATIC_RATE_CTRL));
8646                reg = read_kctxt_csr(dd, sc->hw_context,
8647                                     SEND_CTXT_CHECK_ENABLE);
8648                if (set)
8649                        CLEAR_STATIC_RATE_CONTROL_SMASK(reg);
8650                else
8651                        SET_STATIC_RATE_CONTROL_SMASK(reg);
8652                write_kctxt_csr(dd, sc->hw_context,
8653                                SEND_CTXT_CHECK_ENABLE, reg);
8654        }
8655        return 0;
8656}
8657
8658int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
8659{
8660        int ret = 0;
8661        u64 reg;
8662
8663        if (dd->icode != ICODE_RTL_SILICON) {
8664                if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
8665                        dd_dev_info(dd, "%s: tempsense not supported by HW\n",
8666                                    __func__);
8667                return -EINVAL;
8668        }
8669        reg = read_csr(dd, ASIC_STS_THERM);
8670        temp->curr = ((reg >> ASIC_STS_THERM_CURR_TEMP_SHIFT) &
8671                      ASIC_STS_THERM_CURR_TEMP_MASK);
8672        temp->lo_lim = ((reg >> ASIC_STS_THERM_LO_TEMP_SHIFT) &
8673                        ASIC_STS_THERM_LO_TEMP_MASK);
8674        temp->hi_lim = ((reg >> ASIC_STS_THERM_HI_TEMP_SHIFT) &
8675                        ASIC_STS_THERM_HI_TEMP_MASK);
8676        temp->crit_lim = ((reg >> ASIC_STS_THERM_CRIT_TEMP_SHIFT) &
8677                          ASIC_STS_THERM_CRIT_TEMP_MASK);
8678        /* triggers is a 3-bit value - 1 bit per trigger. */
8679        temp->triggers = (u8)((reg >> ASIC_STS_THERM_LOW_SHIFT) & 0x7);
8680
8681        return ret;
8682}
8683
8684/* ========================================================================= */
8685
8686/*
8687 * Enable/disable chip from delivering interrupts.
8688 */
8689void set_intr_state(struct hfi1_devdata *dd, u32 enable)
8690{
8691        int i;
8692
8693        /*
8694         * In HFI, the mask needs to be 1 to allow interrupts.
8695         */
8696        if (enable) {
8697                u64 cce_int_mask;
8698                const int qsfp1_int_smask = QSFP1_INT % 64;
8699                const int qsfp2_int_smask = QSFP2_INT % 64;
8700
8701                /* enable all interrupts */
8702                for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8703                        write_csr(dd, CCE_INT_MASK + (8*i), ~(u64)0);
8704
8705                /*
8706                 * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
8707                 * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
8708                 * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
8709                 * the index of the appropriate CSR in the CCEIntMask CSR array
8710                 */
8711                cce_int_mask = read_csr(dd, CCE_INT_MASK +
8712                                                (8*(QSFP1_INT/64)));
8713                if (dd->hfi1_id) {
8714                        cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
8715                        write_csr(dd, CCE_INT_MASK + (8*(QSFP1_INT/64)),
8716                                        cce_int_mask);
8717                } else {
8718                        cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
8719                        write_csr(dd, CCE_INT_MASK + (8*(QSFP2_INT/64)),
8720                                        cce_int_mask);
8721                }
8722        } else {
8723                for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8724                        write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
8725        }
8726}
8727
8728/*
8729 * Clear all interrupt sources on the chip.
8730 */
8731static void clear_all_interrupts(struct hfi1_devdata *dd)
8732{
8733        int i;
8734
8735        for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8736                write_csr(dd, CCE_INT_CLEAR + (8*i), ~(u64)0);
8737
8738        write_csr(dd, CCE_ERR_CLEAR, ~(u64)0);
8739        write_csr(dd, MISC_ERR_CLEAR, ~(u64)0);
8740        write_csr(dd, RCV_ERR_CLEAR, ~(u64)0);
8741        write_csr(dd, SEND_ERR_CLEAR, ~(u64)0);
8742        write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0);
8743        write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0);
8744        write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0);
8745        for (i = 0; i < dd->chip_send_contexts; i++)
8746                write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0);
8747        for (i = 0; i < dd->chip_sdma_engines; i++)
8748                write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0);
8749
8750        write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0);
8751        write_csr(dd, DC_LCB_ERR_CLR, ~(u64)0);
8752        write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
8753}
8754
8755/* Move to pcie.c? */
8756static void disable_intx(struct pci_dev *pdev)
8757{
8758        pci_intx(pdev, 0);
8759}
8760
8761static void clean_up_interrupts(struct hfi1_devdata *dd)
8762{
8763        int i;
8764
8765        /* remove irqs - must happen before disabling/turning off */
8766        if (dd->num_msix_entries) {
8767                /* MSI-X */
8768                struct hfi1_msix_entry *me = dd->msix_entries;
8769
8770                for (i = 0; i < dd->num_msix_entries; i++, me++) {
8771                        if (me->arg == NULL) /* => no irq, no affinity */
8772                                break;
8773                        irq_set_affinity_hint(dd->msix_entries[i].msix.vector,
8774                                        NULL);
8775                        free_irq(me->msix.vector, me->arg);
8776                }
8777        } else {
8778                /* INTx */
8779                if (dd->requested_intx_irq) {
8780                        free_irq(dd->pcidev->irq, dd);
8781                        dd->requested_intx_irq = 0;
8782                }
8783        }
8784
8785        /* turn off interrupts */
8786        if (dd->num_msix_entries) {
8787                /* MSI-X */
8788                hfi1_nomsix(dd);
8789        } else {
8790                /* INTx */
8791                disable_intx(dd->pcidev);
8792        }
8793
8794        /* clean structures */
8795        for (i = 0; i < dd->num_msix_entries; i++)
8796                free_cpumask_var(dd->msix_entries[i].mask);
8797        kfree(dd->msix_entries);
8798        dd->msix_entries = NULL;
8799        dd->num_msix_entries = 0;
8800}
8801
8802/*
8803 * Remap the interrupt source from the general handler to the given MSI-X
8804 * interrupt.
8805 */
8806static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
8807{
8808        u64 reg;
8809        int m, n;
8810
8811        /* clear from the handled mask of the general interrupt */
8812        m = isrc / 64;
8813        n = isrc % 64;
8814        dd->gi_mask[m] &= ~((u64)1 << n);
8815
8816        /* direct the chip source to the given MSI-X interrupt */
8817        m = isrc / 8;
8818        n = isrc % 8;
8819        reg = read_csr(dd, CCE_INT_MAP + (8*m));
8820        reg &= ~((u64)0xff << (8*n));
8821        reg |= ((u64)msix_intr & 0xff) << (8*n);
8822        write_csr(dd, CCE_INT_MAP + (8*m), reg);
8823}
8824
8825static void remap_sdma_interrupts(struct hfi1_devdata *dd,
8826                                  int engine, int msix_intr)
8827{
8828        /*
8829         * SDMA engine interrupt sources grouped by type, rather than
8830         * engine.  Per-engine interrupts are as follows:
8831         *      SDMA
8832         *      SDMAProgress
8833         *      SDMAIdle
8834         */
8835        remap_intr(dd, IS_SDMA_START + 0*TXE_NUM_SDMA_ENGINES + engine,
8836                msix_intr);
8837        remap_intr(dd, IS_SDMA_START + 1*TXE_NUM_SDMA_ENGINES + engine,
8838                msix_intr);
8839        remap_intr(dd, IS_SDMA_START + 2*TXE_NUM_SDMA_ENGINES + engine,
8840                msix_intr);
8841}
8842
8843static void remap_receive_available_interrupt(struct hfi1_devdata *dd,
8844                                              int rx, int msix_intr)
8845{
8846        remap_intr(dd, IS_RCVAVAIL_START + rx, msix_intr);
8847}
8848
8849static int request_intx_irq(struct hfi1_devdata *dd)
8850{
8851        int ret;
8852
8853        snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME"_%d",
8854                dd->unit);
8855        ret = request_irq(dd->pcidev->irq, general_interrupt,
8856                                  IRQF_SHARED, dd->intx_name, dd);
8857        if (ret)
8858                dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
8859                                ret);
8860        else
8861                dd->requested_intx_irq = 1;
8862        return ret;
8863}
8864
8865static int request_msix_irqs(struct hfi1_devdata *dd)
8866{
8867        const struct cpumask *local_mask;
8868        cpumask_var_t def, rcv;
8869        bool def_ret, rcv_ret;
8870        int first_general, last_general;
8871        int first_sdma, last_sdma;
8872        int first_rx, last_rx;
8873        int first_cpu, restart_cpu, curr_cpu;
8874        int rcv_cpu, sdma_cpu;
8875        int i, ret = 0, possible;
8876        int ht;
8877
8878        /* calculate the ranges we are going to use */
8879        first_general = 0;
8880        first_sdma = last_general = first_general + 1;
8881        first_rx = last_sdma = first_sdma + dd->num_sdma;
8882        last_rx = first_rx + dd->n_krcv_queues;
8883
8884        /*
8885         * Interrupt affinity.
8886         *
8887         * non-rcv avail gets a default mask that
8888         * starts as possible cpus with threads reset
8889         * and each rcv avail reset.
8890         *
8891         * rcv avail gets node relative 1 wrapping back
8892         * to the node relative 1 as necessary.
8893         *
8894         */
8895        local_mask = cpumask_of_pcibus(dd->pcidev->bus);
8896        /* if first cpu is invalid, use NUMA 0 */
8897        if (cpumask_first(local_mask) >= nr_cpu_ids)
8898                local_mask = topology_core_cpumask(0);
8899
8900        def_ret = zalloc_cpumask_var(&def, GFP_KERNEL);
8901        rcv_ret = zalloc_cpumask_var(&rcv, GFP_KERNEL);
8902        if (!def_ret || !rcv_ret)
8903                goto bail;
8904        /* use local mask as default */
8905        cpumask_copy(def, local_mask);
8906        possible = cpumask_weight(def);
8907        /* disarm threads from default */
8908        ht = cpumask_weight(
8909                        topology_sibling_cpumask(cpumask_first(local_mask)));
8910        for (i = possible/ht; i < possible; i++)
8911                cpumask_clear_cpu(i, def);
8912        /* reset possible */
8913        possible = cpumask_weight(def);
8914        /* def now has full cores on chosen node*/
8915        first_cpu = cpumask_first(def);
8916        if (nr_cpu_ids >= first_cpu)
8917                first_cpu++;
8918        restart_cpu = first_cpu;
8919        curr_cpu = restart_cpu;
8920
8921        for (i = first_cpu; i < dd->n_krcv_queues + first_cpu; i++) {
8922                cpumask_clear_cpu(curr_cpu, def);
8923                cpumask_set_cpu(curr_cpu, rcv);
8924                if (curr_cpu >= possible)
8925                        curr_cpu = restart_cpu;
8926                else
8927                        curr_cpu++;
8928        }
8929        /* def mask has non-rcv, rcv has recv mask */
8930        rcv_cpu = cpumask_first(rcv);
8931        sdma_cpu = cpumask_first(def);
8932
8933        /*
8934         * Sanity check - the code expects all SDMA chip source
8935         * interrupts to be in the same CSR, starting at bit 0.  Verify
8936         * that this is true by checking the bit location of the start.
8937         */
8938        BUILD_BUG_ON(IS_SDMA_START % 64);
8939
8940        for (i = 0; i < dd->num_msix_entries; i++) {
8941                struct hfi1_msix_entry *me = &dd->msix_entries[i];
8942                const char *err_info;
8943                irq_handler_t handler;
8944                irq_handler_t thread = NULL;
8945                void *arg;
8946                int idx;
8947                struct hfi1_ctxtdata *rcd = NULL;
8948                struct sdma_engine *sde = NULL;
8949
8950                /* obtain the arguments to request_irq */
8951                if (first_general <= i && i < last_general) {
8952                        idx = i - first_general;
8953                        handler = general_interrupt;
8954                        arg = dd;
8955                        snprintf(me->name, sizeof(me->name),
8956                                DRIVER_NAME"_%d", dd->unit);
8957                        err_info = "general";
8958                } else if (first_sdma <= i && i < last_sdma) {
8959                        idx = i - first_sdma;
8960                        sde = &dd->per_sdma[idx];
8961                        handler = sdma_interrupt;
8962                        arg = sde;
8963                        snprintf(me->name, sizeof(me->name),
8964                                DRIVER_NAME"_%d sdma%d", dd->unit, idx);
8965                        err_info = "sdma";
8966                        remap_sdma_interrupts(dd, idx, i);
8967                } else if (first_rx <= i && i < last_rx) {
8968                        idx = i - first_rx;
8969                        rcd = dd->rcd[idx];
8970                        /* no interrupt if no rcd */
8971                        if (!rcd)
8972                                continue;
8973                        /*
8974                         * Set the interrupt register and mask for this
8975                         * context's interrupt.
8976                         */
8977                        rcd->ireg = (IS_RCVAVAIL_START+idx) / 64;
8978                        rcd->imask = ((u64)1) <<
8979                                        ((IS_RCVAVAIL_START+idx) % 64);
8980                        handler = receive_context_interrupt;
8981                        thread = receive_context_thread;
8982                        arg = rcd;
8983                        snprintf(me->name, sizeof(me->name),
8984                                DRIVER_NAME"_%d kctxt%d", dd->unit, idx);
8985                        err_info = "receive context";
8986                        remap_receive_available_interrupt(dd, idx, i);
8987                } else {
8988                        /* not in our expected range - complain, then
8989                           ignore it */
8990                        dd_dev_err(dd,
8991                                "Unexpected extra MSI-X interrupt %d\n", i);
8992                        continue;
8993                }
8994                /* no argument, no interrupt */
8995                if (arg == NULL)
8996                        continue;
8997                /* make sure the name is terminated */
8998                me->name[sizeof(me->name)-1] = 0;
8999
9000                ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
9001                                                me->name, arg);
9002                if (ret) {
9003                        dd_dev_err(dd,
9004                                "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
9005                                 err_info, me->msix.vector, idx, ret);
9006                        return ret;
9007                }
9008                /*
9009                 * assign arg after request_irq call, so it will be
9010                 * cleaned up
9011                 */
9012                me->arg = arg;
9013
9014                if (!zalloc_cpumask_var(
9015                        &dd->msix_entries[i].mask,
9016                        GFP_KERNEL))
9017                        goto bail;
9018                if (handler == sdma_interrupt) {
9019                        dd_dev_info(dd, "sdma engine %d cpu %d\n",
9020                                sde->this_idx, sdma_cpu);
9021                        cpumask_set_cpu(sdma_cpu, dd->msix_entries[i].mask);
9022                        sdma_cpu = cpumask_next(sdma_cpu, def);
9023                        if (sdma_cpu >= nr_cpu_ids)
9024                                sdma_cpu = cpumask_first(def);
9025                } else if (handler == receive_context_interrupt) {
9026                        dd_dev_info(dd, "rcv ctxt %d cpu %d\n",
9027                                rcd->ctxt, rcv_cpu);
9028                        cpumask_set_cpu(rcv_cpu, dd->msix_entries[i].mask);
9029                        rcv_cpu = cpumask_next(rcv_cpu, rcv);
9030                        if (rcv_cpu >= nr_cpu_ids)
9031                                rcv_cpu = cpumask_first(rcv);
9032                } else {
9033                        /* otherwise first def */
9034                        dd_dev_info(dd, "%s cpu %d\n",
9035                                err_info, cpumask_first(def));
9036                        cpumask_set_cpu(
9037                                cpumask_first(def), dd->msix_entries[i].mask);
9038                }
9039                irq_set_affinity_hint(
9040                        dd->msix_entries[i].msix.vector,
9041                        dd->msix_entries[i].mask);
9042        }
9043
9044out:
9045        free_cpumask_var(def);
9046        free_cpumask_var(rcv);
9047        return ret;
9048bail:
9049        ret = -ENOMEM;
9050        goto  out;
9051}
9052
9053/*
9054 * Set the general handler to accept all interrupts, remap all
9055 * chip interrupts back to MSI-X 0.
9056 */
9057static void reset_interrupts(struct hfi1_devdata *dd)
9058{
9059        int i;
9060
9061        /* all interrupts handled by the general handler */
9062        for (i = 0; i < CCE_NUM_INT_CSRS; i++)
9063                dd->gi_mask[i] = ~(u64)0;
9064
9065        /* all chip interrupts map to MSI-X 0 */
9066        for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9067                write_csr(dd, CCE_INT_MAP + (8*i), 0);
9068}
9069
9070static int set_up_interrupts(struct hfi1_devdata *dd)
9071{
9072        struct hfi1_msix_entry *entries;
9073        u32 total, request;
9074        int i, ret;
9075        int single_interrupt = 0; /* we expect to have all the interrupts */
9076
9077        /*
9078         * Interrupt count:
9079         *      1 general, "slow path" interrupt (includes the SDMA engines
9080         *              slow source, SDMACleanupDone)
9081         *      N interrupts - one per used SDMA engine
9082         *      M interrupt - one per kernel receive context
9083         */
9084        total = 1 + dd->num_sdma + dd->n_krcv_queues;
9085
9086        entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
9087        if (!entries) {
9088                ret = -ENOMEM;
9089                goto fail;
9090        }
9091        /* 1-1 MSI-X entry assignment */
9092        for (i = 0; i < total; i++)
9093                entries[i].msix.entry = i;
9094
9095        /* ask for MSI-X interrupts */
9096        request = total;
9097        request_msix(dd, &request, entries);
9098
9099        if (request == 0) {
9100                /* using INTx */
9101                /* dd->num_msix_entries already zero */
9102                kfree(entries);
9103                single_interrupt = 1;
9104                dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n");
9105        } else {
9106                /* using MSI-X */
9107                dd->num_msix_entries = request;
9108                dd->msix_entries = entries;
9109
9110                if (request != total) {
9111                        /* using MSI-X, with reduced interrupts */
9112                        dd_dev_err(
9113                                dd,
9114                                "cannot handle reduced interrupt case, want %u, got %u\n",
9115                                total, request);
9116                        ret = -EINVAL;
9117                        goto fail;
9118                }
9119                dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
9120        }
9121
9122        /* mask all interrupts */
9123        set_intr_state(dd, 0);
9124        /* clear all pending interrupts */
9125        clear_all_interrupts(dd);
9126
9127        /* reset general handler mask, chip MSI-X mappings */
9128        reset_interrupts(dd);
9129
9130        if (single_interrupt)
9131                ret = request_intx_irq(dd);
9132        else
9133                ret = request_msix_irqs(dd);
9134        if (ret)
9135                goto fail;
9136
9137        return 0;
9138
9139fail:
9140        clean_up_interrupts(dd);
9141        return ret;
9142}
9143
9144/*
9145 * Set up context values in dd.  Sets:
9146 *
9147 *      num_rcv_contexts - number of contexts being used
9148 *      n_krcv_queues - number of kernel contexts
9149 *      first_user_ctxt - first non-kernel context in array of contexts
9150 *      freectxts  - number of free user contexts
9151 *      num_send_contexts - number of PIO send contexts being used
9152 */
9153static int set_up_context_variables(struct hfi1_devdata *dd)
9154{
9155        int num_kernel_contexts;
9156        int num_user_contexts;
9157        int total_contexts;
9158        int ret;
9159        unsigned ngroups;
9160
9161        /*
9162         * Kernel contexts: (to be fixed later):
9163         * - min or 2 or 1 context/numa
9164         * - Context 0 - default/errors
9165         * - Context 1 - VL15
9166         */
9167        if (n_krcvqs)
9168                num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS;
9169        else
9170                num_kernel_contexts = num_online_nodes();
9171        num_kernel_contexts =
9172                max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts);
9173        /*
9174         * Every kernel receive context needs an ACK send context.
9175         * one send context is allocated for each VL{0-7} and VL15
9176         */
9177        if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
9178                dd_dev_err(dd,
9179                           "Reducing # kernel rcv contexts to: %d, from %d\n",
9180                           (int)(dd->chip_send_contexts - num_vls - 1),
9181                           (int)num_kernel_contexts);
9182                num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
9183        }
9184        /*
9185         * User contexts: (to be fixed later)
9186         *      - set to num_rcv_contexts if non-zero
9187         *      - default to 1 user context per CPU
9188         */
9189        if (num_rcv_contexts)
9190                num_user_contexts = num_rcv_contexts;
9191        else
9192                num_user_contexts = num_online_cpus();
9193
9194        total_contexts = num_kernel_contexts + num_user_contexts;
9195
9196        /*
9197         * Adjust the counts given a global max.
9198         */
9199        if (total_contexts > dd->chip_rcv_contexts) {
9200                dd_dev_err(dd,
9201                           "Reducing # user receive contexts to: %d, from %d\n",
9202                           (int)(dd->chip_rcv_contexts - num_kernel_contexts),
9203                           (int)num_user_contexts);
9204                num_user_contexts = dd->chip_rcv_contexts - num_kernel_contexts;
9205                /* recalculate */
9206                total_contexts = num_kernel_contexts + num_user_contexts;
9207        }
9208
9209        /* the first N are kernel contexts, the rest are user contexts */
9210        dd->num_rcv_contexts = total_contexts;
9211        dd->n_krcv_queues = num_kernel_contexts;
9212        dd->first_user_ctxt = num_kernel_contexts;
9213        dd->freectxts = num_user_contexts;
9214        dd_dev_info(dd,
9215                "rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
9216                (int)dd->chip_rcv_contexts,
9217                (int)dd->num_rcv_contexts,
9218                (int)dd->n_krcv_queues,
9219                (int)dd->num_rcv_contexts - dd->n_krcv_queues);
9220
9221        /*
9222         * Receive array allocation:
9223         *   All RcvArray entries are divided into groups of 8. This
9224         *   is required by the hardware and will speed up writes to
9225         *   consecutive entries by using write-combining of the entire
9226         *   cacheline.
9227         *
9228         *   The number of groups are evenly divided among all contexts.
9229         *   any left over groups will be given to the first N user
9230         *   contexts.
9231         */
9232        dd->rcv_entries.group_size = RCV_INCREMENT;
9233        ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size;
9234        dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts;
9235        dd->rcv_entries.nctxt_extra = ngroups -
9236                (dd->num_rcv_contexts * dd->rcv_entries.ngroups);
9237        dd_dev_info(dd, "RcvArray groups %u, ctxts extra %u\n",
9238                    dd->rcv_entries.ngroups,
9239                    dd->rcv_entries.nctxt_extra);
9240        if (dd->rcv_entries.ngroups * dd->rcv_entries.group_size >
9241            MAX_EAGER_ENTRIES * 2) {
9242                dd->rcv_entries.ngroups = (MAX_EAGER_ENTRIES * 2) /
9243                        dd->rcv_entries.group_size;
9244                dd_dev_info(dd,
9245                   "RcvArray group count too high, change to %u\n",
9246                   dd->rcv_entries.ngroups);
9247                dd->rcv_entries.nctxt_extra = 0;
9248        }
9249        /*
9250         * PIO send contexts
9251         */
9252        ret = init_sc_pools_and_sizes(dd);
9253        if (ret >= 0) { /* success */
9254                dd->num_send_contexts = ret;
9255                dd_dev_info(
9256                        dd,
9257                        "send contexts: chip %d, used %d (kernel %d, ack %d, user %d)\n",
9258                        dd->chip_send_contexts,
9259                        dd->num_send_contexts,
9260                        dd->sc_sizes[SC_KERNEL].count,
9261                        dd->sc_sizes[SC_ACK].count,
9262                        dd->sc_sizes[SC_USER].count);
9263                ret = 0;        /* success */
9264        }
9265
9266        return ret;
9267}
9268
9269/*
9270 * Set the device/port partition key table. The MAD code
9271 * will ensure that, at least, the partial management
9272 * partition key is present in the table.
9273 */
9274static void set_partition_keys(struct hfi1_pportdata *ppd)
9275{
9276        struct hfi1_devdata *dd = ppd->dd;
9277        u64 reg = 0;
9278        int i;
9279
9280        dd_dev_info(dd, "Setting partition keys\n");
9281        for (i = 0; i < hfi1_get_npkeys(dd); i++) {
9282                reg |= (ppd->pkeys[i] &
9283                        RCV_PARTITION_KEY_PARTITION_KEY_A_MASK) <<
9284                        ((i % 4) *
9285                         RCV_PARTITION_KEY_PARTITION_KEY_B_SHIFT);
9286                /* Each register holds 4 PKey values. */
9287                if ((i % 4) == 3) {
9288                        write_csr(dd, RCV_PARTITION_KEY +
9289                                  ((i - 3) * 2), reg);
9290                        reg = 0;
9291                }
9292        }
9293
9294        /* Always enable HW pkeys check when pkeys table is set */
9295        add_rcvctrl(dd, RCV_CTRL_RCV_PARTITION_KEY_ENABLE_SMASK);
9296}
9297
9298/*
9299 * These CSRs and memories are uninitialized on reset and must be
9300 * written before reading to set the ECC/parity bits.
9301 *
9302 * NOTE: All user context CSRs that are not mmaped write-only
9303 * (e.g. the TID flows) must be initialized even if the driver never
9304 * reads them.
9305 */
9306static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
9307{
9308        int i, j;
9309
9310        /* CceIntMap */
9311        for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9312                write_csr(dd, CCE_INT_MAP+(8*i), 0);
9313
9314        /* SendCtxtCreditReturnAddr */
9315        for (i = 0; i < dd->chip_send_contexts; i++)
9316                write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9317
9318        /* PIO Send buffers */
9319        /* SDMA Send buffers */
9320        /* These are not normally read, and (presently) have no method
9321           to be read, so are not pre-initialized */
9322
9323        /* RcvHdrAddr */
9324        /* RcvHdrTailAddr */
9325        /* RcvTidFlowTable */
9326        for (i = 0; i < dd->chip_rcv_contexts; i++) {
9327                write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9328                write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9329                for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
9330                        write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE+(8*j), 0);
9331        }
9332
9333        /* RcvArray */
9334        for (i = 0; i < dd->chip_rcv_array_count; i++)
9335                write_csr(dd, RCV_ARRAY + (8*i),
9336                                        RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
9337
9338        /* RcvQPMapTable */
9339        for (i = 0; i < 32; i++)
9340                write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9341}
9342
9343/*
9344 * Use the ctrl_bits in CceCtrl to clear the status_bits in CceStatus.
9345 */
9346static void clear_cce_status(struct hfi1_devdata *dd, u64 status_bits,
9347                             u64 ctrl_bits)
9348{
9349        unsigned long timeout;
9350        u64 reg;
9351
9352        /* is the condition present? */
9353        reg = read_csr(dd, CCE_STATUS);
9354        if ((reg & status_bits) == 0)
9355                return;
9356
9357        /* clear the condition */
9358        write_csr(dd, CCE_CTRL, ctrl_bits);
9359
9360        /* wait for the condition to clear */
9361        timeout = jiffies + msecs_to_jiffies(CCE_STATUS_TIMEOUT);
9362        while (1) {
9363                reg = read_csr(dd, CCE_STATUS);
9364                if ((reg & status_bits) == 0)
9365                        return;
9366                if (time_after(jiffies, timeout)) {
9367                        dd_dev_err(dd,
9368                                "Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
9369                                status_bits, reg & status_bits);
9370                        return;
9371                }
9372                udelay(1);
9373        }
9374}
9375
9376/* set CCE CSRs to chip reset defaults */
9377static void reset_cce_csrs(struct hfi1_devdata *dd)
9378{
9379        int i;
9380
9381        /* CCE_REVISION read-only */
9382        /* CCE_REVISION2 read-only */
9383        /* CCE_CTRL - bits clear automatically */
9384        /* CCE_STATUS read-only, use CceCtrl to clear */
9385        clear_cce_status(dd, ALL_FROZE, CCE_CTRL_SPC_UNFREEZE_SMASK);
9386        clear_cce_status(dd, ALL_TXE_PAUSE, CCE_CTRL_TXE_RESUME_SMASK);
9387        clear_cce_status(dd, ALL_RXE_PAUSE, CCE_CTRL_RXE_RESUME_SMASK);
9388        for (i = 0; i < CCE_NUM_SCRATCH; i++)
9389                write_csr(dd, CCE_SCRATCH + (8 * i), 0);
9390        /* CCE_ERR_STATUS read-only */
9391        write_csr(dd, CCE_ERR_MASK, 0);
9392        write_csr(dd, CCE_ERR_CLEAR, ~0ull);
9393        /* CCE_ERR_FORCE leave alone */
9394        for (i = 0; i < CCE_NUM_32_BIT_COUNTERS; i++)
9395                write_csr(dd, CCE_COUNTER_ARRAY32 + (8 * i), 0);
9396        write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_RESETCSR);
9397        /* CCE_PCIE_CTRL leave alone */
9398        for (i = 0; i < CCE_NUM_MSIX_VECTORS; i++) {
9399                write_csr(dd, CCE_MSIX_TABLE_LOWER + (8 * i), 0);
9400                write_csr(dd, CCE_MSIX_TABLE_UPPER + (8 * i),
9401                                        CCE_MSIX_TABLE_UPPER_RESETCSR);
9402        }
9403        for (i = 0; i < CCE_NUM_MSIX_PBAS; i++) {
9404                /* CCE_MSIX_PBA read-only */
9405                write_csr(dd, CCE_MSIX_INT_GRANTED, ~0ull);
9406                write_csr(dd, CCE_MSIX_VEC_CLR_WITHOUT_INT, ~0ull);
9407        }
9408        for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9409                write_csr(dd, CCE_INT_MAP, 0);
9410        for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
9411                /* CCE_INT_STATUS read-only */
9412                write_csr(dd, CCE_INT_MASK + (8 * i), 0);
9413                write_csr(dd, CCE_INT_CLEAR + (8 * i), ~0ull);
9414                /* CCE_INT_FORCE leave alone */
9415                /* CCE_INT_BLOCKED read-only */
9416        }
9417        for (i = 0; i < CCE_NUM_32_BIT_INT_COUNTERS; i++)
9418                write_csr(dd, CCE_INT_COUNTER_ARRAY32 + (8 * i), 0);
9419}
9420
9421/* set ASIC CSRs to chip reset defaults */
9422static void reset_asic_csrs(struct hfi1_devdata *dd)
9423{
9424        int i;
9425
9426        /*
9427         * If the HFIs are shared between separate nodes or VMs,
9428         * then more will need to be done here.  One idea is a module
9429         * parameter that returns early, letting the first power-on or
9430         * a known first load do the reset and blocking all others.
9431         */
9432
9433        if (!(dd->flags & HFI1_DO_INIT_ASIC))
9434                return;
9435
9436        if (dd->icode != ICODE_FPGA_EMULATION) {
9437                /* emulation does not have an SBus - leave these alone */
9438                /*
9439                 * All writes to ASIC_CFG_SBUS_REQUEST do something.
9440                 * Notes:
9441                 * o The reset is not zero if aimed at the core.  See the
9442                 *   SBus documentation for details.
9443                 * o If the SBus firmware has been updated (e.g. by the BIOS),
9444                 *   will the reset revert that?
9445                 */
9446                /* ASIC_CFG_SBUS_REQUEST leave alone */
9447                write_csr(dd, ASIC_CFG_SBUS_EXECUTE, 0);
9448        }
9449        /* ASIC_SBUS_RESULT read-only */
9450        write_csr(dd, ASIC_STS_SBUS_COUNTERS, 0);
9451        for (i = 0; i < ASIC_NUM_SCRATCH; i++)
9452                write_csr(dd, ASIC_CFG_SCRATCH + (8 * i), 0);
9453        write_csr(dd, ASIC_CFG_MUTEX, 0);       /* this will clear it */
9454
9455        /* We might want to retain this state across FLR if we ever use it */
9456        write_csr(dd, ASIC_CFG_DRV_STR, 0);
9457
9458        write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0);
9459        /* ASIC_STS_THERM read-only */
9460        /* ASIC_CFG_RESET leave alone */
9461
9462        write_csr(dd, ASIC_PCIE_SD_HOST_CMD, 0);
9463        /* ASIC_PCIE_SD_HOST_STATUS read-only */
9464        write_csr(dd, ASIC_PCIE_SD_INTRPT_DATA_CODE, 0);
9465        write_csr(dd, ASIC_PCIE_SD_INTRPT_ENABLE, 0);
9466        /* ASIC_PCIE_SD_INTRPT_PROGRESS read-only */
9467        write_csr(dd, ASIC_PCIE_SD_INTRPT_STATUS, ~0ull); /* clear */
9468        /* ASIC_HFI0_PCIE_SD_INTRPT_RSPD_DATA read-only */
9469        /* ASIC_HFI1_PCIE_SD_INTRPT_RSPD_DATA read-only */
9470        for (i = 0; i < 16; i++)
9471                write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (8 * i), 0);
9472
9473        /* ASIC_GPIO_IN read-only */
9474        write_csr(dd, ASIC_GPIO_OE, 0);
9475        write_csr(dd, ASIC_GPIO_INVERT, 0);
9476        write_csr(dd, ASIC_GPIO_OUT, 0);
9477        write_csr(dd, ASIC_GPIO_MASK, 0);
9478        /* ASIC_GPIO_STATUS read-only */
9479        write_csr(dd, ASIC_GPIO_CLEAR, ~0ull);
9480        /* ASIC_GPIO_FORCE leave alone */
9481
9482        /* ASIC_QSFP1_IN read-only */
9483        write_csr(dd, ASIC_QSFP1_OE, 0);
9484        write_csr(dd, ASIC_QSFP1_INVERT, 0);
9485        write_csr(dd, ASIC_QSFP1_OUT, 0);
9486        write_csr(dd, ASIC_QSFP1_MASK, 0);
9487        /* ASIC_QSFP1_STATUS read-only */
9488        write_csr(dd, ASIC_QSFP1_CLEAR, ~0ull);
9489        /* ASIC_QSFP1_FORCE leave alone */
9490
9491        /* ASIC_QSFP2_IN read-only */
9492        write_csr(dd, ASIC_QSFP2_OE, 0);
9493        write_csr(dd, ASIC_QSFP2_INVERT, 0);
9494        write_csr(dd, ASIC_QSFP2_OUT, 0);
9495        write_csr(dd, ASIC_QSFP2_MASK, 0);
9496        /* ASIC_QSFP2_STATUS read-only */
9497        write_csr(dd, ASIC_QSFP2_CLEAR, ~0ull);
9498        /* ASIC_QSFP2_FORCE leave alone */
9499
9500        write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_RESETCSR);
9501        /* this also writes a NOP command, clearing paging mode */
9502        write_csr(dd, ASIC_EEP_ADDR_CMD, 0);
9503        write_csr(dd, ASIC_EEP_DATA, 0);
9504}
9505
9506/* set MISC CSRs to chip reset defaults */
9507static void reset_misc_csrs(struct hfi1_devdata *dd)
9508{
9509        int i;
9510
9511        for (i = 0; i < 32; i++) {
9512                write_csr(dd, MISC_CFG_RSA_R2 + (8 * i), 0);
9513                write_csr(dd, MISC_CFG_RSA_SIGNATURE + (8 * i), 0);
9514                write_csr(dd, MISC_CFG_RSA_MODULUS + (8 * i), 0);
9515        }
9516        /* MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
9517           only be written 128-byte chunks */
9518        /* init RSA engine to clear lingering errors */
9519        write_csr(dd, MISC_CFG_RSA_CMD, 1);
9520        write_csr(dd, MISC_CFG_RSA_MU, 0);
9521        write_csr(dd, MISC_CFG_FW_CTRL, 0);
9522        /* MISC_STS_8051_DIGEST read-only */
9523        /* MISC_STS_SBM_DIGEST read-only */
9524        /* MISC_STS_PCIE_DIGEST read-only */
9525        /* MISC_STS_FAB_DIGEST read-only */
9526        /* MISC_ERR_STATUS read-only */
9527        write_csr(dd, MISC_ERR_MASK, 0);
9528        write_csr(dd, MISC_ERR_CLEAR, ~0ull);
9529        /* MISC_ERR_FORCE leave alone */
9530}
9531
9532/* set TXE CSRs to chip reset defaults */
9533static void reset_txe_csrs(struct hfi1_devdata *dd)
9534{
9535        int i;
9536
9537        /*
9538         * TXE Kernel CSRs
9539         */
9540        write_csr(dd, SEND_CTRL, 0);
9541        __cm_reset(dd, 0);      /* reset CM internal state */
9542        /* SEND_CONTEXTS read-only */
9543        /* SEND_DMA_ENGINES read-only */
9544        /* SEND_PIO_MEM_SIZE read-only */
9545        /* SEND_DMA_MEM_SIZE read-only */
9546        write_csr(dd, SEND_HIGH_PRIORITY_LIMIT, 0);
9547        pio_reset_all(dd);      /* SEND_PIO_INIT_CTXT */
9548        /* SEND_PIO_ERR_STATUS read-only */
9549        write_csr(dd, SEND_PIO_ERR_MASK, 0);
9550        write_csr(dd, SEND_PIO_ERR_CLEAR, ~0ull);
9551        /* SEND_PIO_ERR_FORCE leave alone */
9552        /* SEND_DMA_ERR_STATUS read-only */
9553        write_csr(dd, SEND_DMA_ERR_MASK, 0);
9554        write_csr(dd, SEND_DMA_ERR_CLEAR, ~0ull);
9555        /* SEND_DMA_ERR_FORCE leave alone */
9556        /* SEND_EGRESS_ERR_STATUS read-only */
9557        write_csr(dd, SEND_EGRESS_ERR_MASK, 0);
9558        write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~0ull);
9559        /* SEND_EGRESS_ERR_FORCE leave alone */
9560        write_csr(dd, SEND_BTH_QP, 0);
9561        write_csr(dd, SEND_STATIC_RATE_CONTROL, 0);
9562        write_csr(dd, SEND_SC2VLT0, 0);
9563        write_csr(dd, SEND_SC2VLT1, 0);
9564        write_csr(dd, SEND_SC2VLT2, 0);
9565        write_csr(dd, SEND_SC2VLT3, 0);
9566        write_csr(dd, SEND_LEN_CHECK0, 0);
9567        write_csr(dd, SEND_LEN_CHECK1, 0);
9568        /* SEND_ERR_STATUS read-only */
9569        write_csr(dd, SEND_ERR_MASK, 0);
9570        write_csr(dd, SEND_ERR_CLEAR, ~0ull);
9571        /* SEND_ERR_FORCE read-only */
9572        for (i = 0; i < VL_ARB_LOW_PRIO_TABLE_SIZE; i++)
9573                write_csr(dd, SEND_LOW_PRIORITY_LIST + (8*i), 0);
9574        for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
9575                write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8*i), 0);
9576        for (i = 0; i < dd->chip_send_contexts/NUM_CONTEXTS_PER_SET; i++)
9577                write_csr(dd, SEND_CONTEXT_SET_CTRL + (8*i), 0);
9578        for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
9579                write_csr(dd, SEND_COUNTER_ARRAY32 + (8*i), 0);
9580        for (i = 0; i < TXE_NUM_64_BIT_COUNTER; i++)
9581                write_csr(dd, SEND_COUNTER_ARRAY64 + (8*i), 0);
9582        write_csr(dd, SEND_CM_CTRL, SEND_CM_CTRL_RESETCSR);
9583        write_csr(dd, SEND_CM_GLOBAL_CREDIT,
9584                                        SEND_CM_GLOBAL_CREDIT_RESETCSR);
9585        /* SEND_CM_CREDIT_USED_STATUS read-only */
9586        write_csr(dd, SEND_CM_TIMER_CTRL, 0);
9587        write_csr(dd, SEND_CM_LOCAL_AU_TABLE0_TO3, 0);
9588        write_csr(dd, SEND_CM_LOCAL_AU_TABLE4_TO7, 0);
9589        write_csr(dd, SEND_CM_REMOTE_AU_TABLE0_TO3, 0);
9590        write_csr(dd, SEND_CM_REMOTE_AU_TABLE4_TO7, 0);
9591        for (i = 0; i < TXE_NUM_DATA_VL; i++)
9592                write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
9593        write_csr(dd, SEND_CM_CREDIT_VL15, 0);
9594        /* SEND_CM_CREDIT_USED_VL read-only */
9595        /* SEND_CM_CREDIT_USED_VL15 read-only */
9596        /* SEND_EGRESS_CTXT_STATUS read-only */
9597        /* SEND_EGRESS_SEND_DMA_STATUS read-only */
9598        write_csr(dd, SEND_EGRESS_ERR_INFO, ~0ull);
9599        /* SEND_EGRESS_ERR_INFO read-only */
9600        /* SEND_EGRESS_ERR_SOURCE read-only */
9601
9602        /*
9603         * TXE Per-Context CSRs
9604         */
9605        for (i = 0; i < dd->chip_send_contexts; i++) {
9606                write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9607                write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0);
9608                write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9609                write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_FORCE, 0);
9610                write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, 0);
9611                write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~0ull);
9612                write_kctxt_csr(dd, i, SEND_CTXT_CHECK_ENABLE, 0);
9613                write_kctxt_csr(dd, i, SEND_CTXT_CHECK_VL, 0);
9614                write_kctxt_csr(dd, i, SEND_CTXT_CHECK_JOB_KEY, 0);
9615                write_kctxt_csr(dd, i, SEND_CTXT_CHECK_PARTITION_KEY, 0);
9616                write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, 0);
9617                write_kctxt_csr(dd, i, SEND_CTXT_CHECK_OPCODE, 0);
9618        }
9619
9620        /*
9621         * TXE Per-SDMA CSRs
9622         */
9623        for (i = 0; i < dd->chip_sdma_engines; i++) {
9624                write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9625                /* SEND_DMA_STATUS read-only */
9626                write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0);
9627                write_kctxt_csr(dd, i, SEND_DMA_LEN_GEN, 0);
9628                write_kctxt_csr(dd, i, SEND_DMA_TAIL, 0);
9629                /* SEND_DMA_HEAD read-only */
9630                write_kctxt_csr(dd, i, SEND_DMA_HEAD_ADDR, 0);
9631                write_kctxt_csr(dd, i, SEND_DMA_PRIORITY_THLD, 0);
9632                /* SEND_DMA_IDLE_CNT read-only */
9633                write_kctxt_csr(dd, i, SEND_DMA_RELOAD_CNT, 0);
9634                write_kctxt_csr(dd, i, SEND_DMA_DESC_CNT, 0);
9635                /* SEND_DMA_DESC_FETCHED_CNT read-only */
9636                /* SEND_DMA_ENG_ERR_STATUS read-only */
9637                write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, 0);
9638                write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~0ull);
9639                /* SEND_DMA_ENG_ERR_FORCE leave alone */
9640                write_kctxt_csr(dd, i, SEND_DMA_CHECK_ENABLE, 0);
9641                write_kctxt_csr(dd, i, SEND_DMA_CHECK_VL, 0);
9642                write_kctxt_csr(dd, i, SEND_DMA_CHECK_JOB_KEY, 0);
9643                write_kctxt_csr(dd, i, SEND_DMA_CHECK_PARTITION_KEY, 0);
9644                write_kctxt_csr(dd, i, SEND_DMA_CHECK_SLID, 0);
9645                write_kctxt_csr(dd, i, SEND_DMA_CHECK_OPCODE, 0);
9646                write_kctxt_csr(dd, i, SEND_DMA_MEMORY, 0);
9647        }
9648}
9649
9650/*
9651 * Expect on entry:
9652 * o Packet ingress is disabled, i.e. RcvCtrl.RcvPortEnable == 0
9653 */
9654static void init_rbufs(struct hfi1_devdata *dd)
9655{
9656        u64 reg;
9657        int count;
9658
9659        /*
9660         * Wait for DMA to stop: RxRbufPktPending and RxPktInProgress are
9661         * clear.
9662         */
9663        count = 0;
9664        while (1) {
9665                reg = read_csr(dd, RCV_STATUS);
9666                if ((reg & (RCV_STATUS_RX_RBUF_PKT_PENDING_SMASK
9667                            | RCV_STATUS_RX_PKT_IN_PROGRESS_SMASK)) == 0)
9668                        break;
9669                /*
9670                 * Give up after 1ms - maximum wait time.
9671                 *
9672                 * RBuf size is 148KiB.  Slowest possible is PCIe Gen1 x1 at
9673                 * 250MB/s bandwidth.  Lower rate to 66% for overhead to get:
9674                 *      148 KB / (66% * 250MB/s) = 920us
9675                 */
9676                if (count++ > 500) {
9677                        dd_dev_err(dd,
9678                                "%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
9679                                __func__, reg);
9680                        break;
9681                }
9682                udelay(2); /* do not busy-wait the CSR */
9683        }
9684
9685        /* start the init - expect RcvCtrl to be 0 */
9686        write_csr(dd, RCV_CTRL, RCV_CTRL_RX_RBUF_INIT_SMASK);
9687
9688        /*
9689         * Read to force the write of Rcvtrl.RxRbufInit.  There is a brief
9690         * period after the write before RcvStatus.RxRbufInitDone is valid.
9691         * The delay in the first run through the loop below is sufficient and
9692         * required before the first read of RcvStatus.RxRbufInintDone.
9693         */
9694        read_csr(dd, RCV_CTRL);
9695
9696        /* wait for the init to finish */
9697        count = 0;
9698        while (1) {
9699                /* delay is required first time through - see above */
9700                udelay(2); /* do not busy-wait the CSR */
9701                reg = read_csr(dd, RCV_STATUS);
9702                if (reg & (RCV_STATUS_RX_RBUF_INIT_DONE_SMASK))
9703                        break;
9704
9705                /* give up after 100us - slowest possible at 33MHz is 73us */
9706                if (count++ > 50) {
9707                        dd_dev_err(dd,
9708                                "%s: RcvStatus.RxRbufInit not set, continuing\n",
9709                                __func__);
9710                        break;
9711                }
9712        }
9713}
9714
9715/* set RXE CSRs to chip reset defaults */
9716static void reset_rxe_csrs(struct hfi1_devdata *dd)
9717{
9718        int i, j;
9719
9720        /*
9721         * RXE Kernel CSRs
9722         */
9723        write_csr(dd, RCV_CTRL, 0);
9724        init_rbufs(dd);
9725        /* RCV_STATUS read-only */
9726        /* RCV_CONTEXTS read-only */
9727        /* RCV_ARRAY_CNT read-only */
9728        /* RCV_BUF_SIZE read-only */
9729        write_csr(dd, RCV_BTH_QP, 0);
9730        write_csr(dd, RCV_MULTICAST, 0);
9731        write_csr(dd, RCV_BYPASS, 0);
9732        write_csr(dd, RCV_VL15, 0);
9733        /* this is a clear-down */
9734        write_csr(dd, RCV_ERR_INFO,
9735                        RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
9736        /* RCV_ERR_STATUS read-only */
9737        write_csr(dd, RCV_ERR_MASK, 0);
9738        write_csr(dd, RCV_ERR_CLEAR, ~0ull);
9739        /* RCV_ERR_FORCE leave alone */
9740        for (i = 0; i < 32; i++)
9741                write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9742        for (i = 0; i < 4; i++)
9743                write_csr(dd, RCV_PARTITION_KEY + (8 * i), 0);
9744        for (i = 0; i < RXE_NUM_32_BIT_COUNTERS; i++)
9745                write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0);
9746        for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++)
9747                write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0);
9748        for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) {
9749                write_csr(dd, RCV_RSM_CFG + (8 * i), 0);
9750                write_csr(dd, RCV_RSM_SELECT + (8 * i), 0);
9751                write_csr(dd, RCV_RSM_MATCH + (8 * i), 0);
9752        }
9753        for (i = 0; i < 32; i++)
9754                write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0);
9755
9756        /*
9757         * RXE Kernel and User Per-Context CSRs
9758         */
9759        for (i = 0; i < dd->chip_rcv_contexts; i++) {
9760                /* kernel */
9761                write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0);
9762                /* RCV_CTXT_STATUS read-only */
9763                write_kctxt_csr(dd, i, RCV_EGR_CTRL, 0);
9764                write_kctxt_csr(dd, i, RCV_TID_CTRL, 0);
9765                write_kctxt_csr(dd, i, RCV_KEY_CTRL, 0);
9766                write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9767                write_kctxt_csr(dd, i, RCV_HDR_CNT, 0);
9768                write_kctxt_csr(dd, i, RCV_HDR_ENT_SIZE, 0);
9769                write_kctxt_csr(dd, i, RCV_HDR_SIZE, 0);
9770                write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9771                write_kctxt_csr(dd, i, RCV_AVAIL_TIME_OUT, 0);
9772                write_kctxt_csr(dd, i, RCV_HDR_OVFL_CNT, 0);
9773
9774                /* user */
9775                /* RCV_HDR_TAIL read-only */
9776                write_uctxt_csr(dd, i, RCV_HDR_HEAD, 0);
9777                /* RCV_EGR_INDEX_TAIL read-only */
9778                write_uctxt_csr(dd, i, RCV_EGR_INDEX_HEAD, 0);
9779                /* RCV_EGR_OFFSET_TAIL read-only */
9780                for (j = 0; j < RXE_NUM_TID_FLOWS; j++) {
9781                        write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j),
9782                                0);
9783                }
9784        }
9785}
9786
9787/*
9788 * Set sc2vl tables.
9789 *
9790 * They power on to zeros, so to avoid send context errors
9791 * they need to be set:
9792 *
9793 * SC 0-7 -> VL 0-7 (respectively)
9794 * SC 15  -> VL 15
9795 * otherwise
9796 *        -> VL 0
9797 */
9798static void init_sc2vl_tables(struct hfi1_devdata *dd)
9799{
9800        int i;
9801        /* init per architecture spec, constrained by hardware capability */
9802
9803        /* HFI maps sent packets */
9804        write_csr(dd, SEND_SC2VLT0, SC2VL_VAL(
9805                0,
9806                0, 0, 1, 1,
9807                2, 2, 3, 3,
9808                4, 4, 5, 5,
9809                6, 6, 7, 7));
9810        write_csr(dd, SEND_SC2VLT1, SC2VL_VAL(
9811                1,
9812                8, 0, 9, 0,
9813                10, 0, 11, 0,
9814                12, 0, 13, 0,
9815                14, 0, 15, 15));
9816        write_csr(dd, SEND_SC2VLT2, SC2VL_VAL(
9817                2,
9818                16, 0, 17, 0,
9819                18, 0, 19, 0,
9820                20, 0, 21, 0,
9821                22, 0, 23, 0));
9822        write_csr(dd, SEND_SC2VLT3, SC2VL_VAL(
9823                3,
9824                24, 0, 25, 0,
9825                26, 0, 27, 0,
9826                28, 0, 29, 0,
9827                30, 0, 31, 0));
9828
9829        /* DC maps received packets */
9830        write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0, DC_SC_VL_VAL(
9831                15_0,
9832                0, 0, 1, 1,  2, 2,  3, 3,  4, 4,  5, 5,  6, 6,  7,  7,
9833                8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15, 15));
9834        write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16, DC_SC_VL_VAL(
9835                31_16,
9836                16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, 0,
9837                24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31, 0));
9838
9839        /* initialize the cached sc2vl values consistently with h/w */
9840        for (i = 0; i < 32; i++) {
9841                if (i < 8 || i == 15)
9842                        *((u8 *)(dd->sc2vl) + i) = (u8)i;
9843                else
9844                        *((u8 *)(dd->sc2vl) + i) = 0;
9845        }
9846}
9847
9848/*
9849 * Read chip sizes and then reset parts to sane, disabled, values.  We cannot
9850 * depend on the chip going through a power-on reset - a driver may be loaded
9851 * and unloaded many times.
9852 *
9853 * Do not write any CSR values to the chip in this routine - there may be
9854 * a reset following the (possible) FLR in this routine.
9855 *
9856 */
9857static void init_chip(struct hfi1_devdata *dd)
9858{
9859        int i;
9860
9861        /*
9862         * Put the HFI CSRs in a known state.
9863         * Combine this with a DC reset.
9864         *
9865         * Stop the device from doing anything while we do a
9866         * reset.  We know there are no other active users of
9867         * the device since we are now in charge.  Turn off
9868         * off all outbound and inbound traffic and make sure
9869         * the device does not generate any interrupts.
9870         */
9871
9872        /* disable send contexts and SDMA engines */
9873        write_csr(dd, SEND_CTRL, 0);
9874        for (i = 0; i < dd->chip_send_contexts; i++)
9875                write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9876        for (i = 0; i < dd->chip_sdma_engines; i++)
9877                write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9878        /* disable port (turn off RXE inbound traffic) and contexts */
9879        write_csr(dd, RCV_CTRL, 0);
9880        for (i = 0; i < dd->chip_rcv_contexts; i++)
9881                write_csr(dd, RCV_CTXT_CTRL, 0);
9882        /* mask all interrupt sources */
9883        for (i = 0; i < CCE_NUM_INT_CSRS; i++)
9884                write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
9885
9886        /*
9887         * DC Reset: do a full DC reset before the register clear.
9888         * A recommended length of time to hold is one CSR read,
9889         * so reread the CceDcCtrl.  Then, hold the DC in reset
9890         * across the clear.
9891         */
9892        write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
9893        (void) read_csr(dd, CCE_DC_CTRL);
9894
9895        if (use_flr) {
9896                /*
9897                 * A FLR will reset the SPC core and part of the PCIe.
9898                 * The parts that need to be restored have already been
9899                 * saved.
9900                 */
9901                dd_dev_info(dd, "Resetting CSRs with FLR\n");
9902
9903                /* do the FLR, the DC reset will remain */
9904                hfi1_pcie_flr(dd);
9905
9906                /* restore command and BARs */
9907                restore_pci_variables(dd);
9908
9909                if (is_a0(dd)) {
9910                        dd_dev_info(dd, "Resetting CSRs with FLR\n");
9911                        hfi1_pcie_flr(dd);
9912                        restore_pci_variables(dd);
9913                }
9914
9915                reset_asic_csrs(dd);
9916        } else {
9917                dd_dev_info(dd, "Resetting CSRs with writes\n");
9918                reset_cce_csrs(dd);
9919                reset_txe_csrs(dd);
9920                reset_rxe_csrs(dd);
9921                reset_asic_csrs(dd);
9922                reset_misc_csrs(dd);
9923        }
9924        /* clear the DC reset */
9925        write_csr(dd, CCE_DC_CTRL, 0);
9926
9927        /* Set the LED off */
9928        if (is_a0(dd))
9929                setextled(dd, 0);
9930        /*
9931         * Clear the QSFP reset.
9932         * A0 leaves the out lines floating on power on, then on an FLR
9933         * enforces a 0 on all out pins.  The driver does not touch
9934         * ASIC_QSFPn_OUT otherwise.  This leaves RESET_N low and
9935         * anything  plugged constantly in reset, if it pays attention
9936         * to RESET_N.
9937         * A prime example of this is SiPh. For now, set all pins high.
9938         * I2CCLK and I2CDAT will change per direction, and INT_N and
9939         * MODPRS_N are input only and their value is ignored.
9940         */
9941        if (is_a0(dd)) {
9942                write_csr(dd, ASIC_QSFP1_OUT, 0x1f);
9943                write_csr(dd, ASIC_QSFP2_OUT, 0x1f);
9944        }
9945}
9946
9947static void init_early_variables(struct hfi1_devdata *dd)
9948{
9949        int i;
9950
9951        /* assign link credit variables */
9952        dd->vau = CM_VAU;
9953        dd->link_credits = CM_GLOBAL_CREDITS;
9954        if (is_a0(dd))
9955                dd->link_credits--;
9956        dd->vcu = cu_to_vcu(hfi1_cu);
9957        /* enough room for 8 MAD packets plus header - 17K */
9958        dd->vl15_init = (8 * (2048 + 128)) / vau_to_au(dd->vau);
9959        if (dd->vl15_init > dd->link_credits)
9960                dd->vl15_init = dd->link_credits;
9961
9962        write_uninitialized_csrs_and_memories(dd);
9963
9964        if (HFI1_CAP_IS_KSET(PKEY_CHECK))
9965                for (i = 0; i < dd->num_pports; i++) {
9966                        struct hfi1_pportdata *ppd = &dd->pport[i];
9967
9968                        set_partition_keys(ppd);
9969                }
9970        init_sc2vl_tables(dd);
9971}
9972
9973static void init_kdeth_qp(struct hfi1_devdata *dd)
9974{
9975        /* user changed the KDETH_QP */
9976        if (kdeth_qp != 0 && kdeth_qp >= 0xff) {
9977                /* out of range or illegal value */
9978                dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring");
9979                kdeth_qp = 0;
9980        }
9981        if (kdeth_qp == 0)      /* not set, or failed range check */
9982                kdeth_qp = DEFAULT_KDETH_QP;
9983
9984        write_csr(dd, SEND_BTH_QP,
9985                        (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK)
9986                                << SEND_BTH_QP_KDETH_QP_SHIFT);
9987
9988        write_csr(dd, RCV_BTH_QP,
9989                        (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK)
9990                                << RCV_BTH_QP_KDETH_QP_SHIFT);
9991}
9992
9993/**
9994 * init_qpmap_table
9995 * @dd - device data
9996 * @first_ctxt - first context
9997 * @last_ctxt - first context
9998 *
9999 * This return sets the qpn mapping table that
10000 * is indexed by qpn[8:1].
10001 *
10002 * The routine will round robin the 256 settings
10003 * from first_ctxt to last_ctxt.
10004 *
10005 * The first/last looks ahead to having specialized
10006 * receive contexts for mgmt and bypass.  Normal
10007 * verbs traffic will assumed to be on a range
10008 * of receive contexts.
10009 */
10010static void init_qpmap_table(struct hfi1_devdata *dd,
10011                             u32 first_ctxt,
10012                             u32 last_ctxt)
10013{
10014        u64 reg = 0;
10015        u64 regno = RCV_QP_MAP_TABLE;
10016        int i;
10017        u64 ctxt = first_ctxt;
10018
10019        for (i = 0; i < 256;) {
10020                if (ctxt == VL15CTXT) {
10021                        ctxt++;
10022                        if (ctxt > last_ctxt)
10023                                ctxt = first_ctxt;
10024                        continue;
10025                }
10026                reg |= ctxt << (8 * (i % 8));
10027                i++;
10028                ctxt++;
10029                if (ctxt > last_ctxt)
10030                        ctxt = first_ctxt;
10031                if (i % 8 == 0) {
10032                        write_csr(dd, regno, reg);
10033                        reg = 0;
10034                        regno += 8;
10035                }
10036        }
10037        if (i % 8)
10038                write_csr(dd, regno, reg);
10039
10040        add_rcvctrl(dd, RCV_CTRL_RCV_QP_MAP_ENABLE_SMASK
10041                        | RCV_CTRL_RCV_BYPASS_ENABLE_SMASK);
10042}
10043
10044/**
10045 * init_qos - init RX qos
10046 * @dd - device data
10047 * @first_context
10048 *
10049 * This routine initializes Rule 0 and the
10050 * RSM map table to implement qos.
10051 *
10052 * If all of the limit tests succeed,
10053 * qos is applied based on the array
10054 * interpretation of krcvqs where
10055 * entry 0 is VL0.
10056 *
10057 * The number of vl bits (n) and the number of qpn
10058 * bits (m) are computed to feed both the RSM map table
10059 * and the single rule.
10060 *
10061 */
10062static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt)
10063{
10064        u8 max_by_vl = 0;
10065        unsigned qpns_per_vl, ctxt, i, qpn, n = 1, m;
10066        u64 *rsmmap;
10067        u64 reg;
10068        u8  rxcontext = is_a0(dd) ? 0 : 0xff;  /* 0 is default if a0 ver. */
10069
10070        /* validate */
10071        if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS ||
10072            num_vls == 1 ||
10073            krcvqsset <= 1)
10074                goto bail;
10075        for (i = 0; i < min_t(unsigned, num_vls, krcvqsset); i++)
10076                if (krcvqs[i] > max_by_vl)
10077                        max_by_vl = krcvqs[i];
10078        if (max_by_vl > 32)
10079                goto bail;
10080        qpns_per_vl = __roundup_pow_of_two(max_by_vl);
10081        /* determine bits vl */
10082        n = ilog2(num_vls);
10083        /* determine bits for qpn */
10084        m = ilog2(qpns_per_vl);
10085        if ((m + n) > 7)
10086                goto bail;
10087        if (num_vls * qpns_per_vl > dd->chip_rcv_contexts)
10088                goto bail;
10089        rsmmap = kmalloc_array(NUM_MAP_REGS, sizeof(u64), GFP_KERNEL);
10090        memset(rsmmap, rxcontext, NUM_MAP_REGS * sizeof(u64));
10091        /* init the local copy of the table */
10092        for (i = 0, ctxt = first_ctxt; i < num_vls; i++) {
10093                unsigned tctxt;
10094
10095                for (qpn = 0, tctxt = ctxt;
10096                     krcvqs[i] && qpn < qpns_per_vl; qpn++) {
10097                        unsigned idx, regoff, regidx;
10098
10099                        /* generate index <= 128 */
10100                        idx = (qpn << n) ^ i;
10101                        regoff = (idx % 8) * 8;
10102                        regidx = idx / 8;
10103                        reg = rsmmap[regidx];
10104                        /* replace 0xff with context number */
10105                        reg &= ~(RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK
10106                                << regoff);
10107                        reg |= (u64)(tctxt++) << regoff;
10108                        rsmmap[regidx] = reg;
10109                        if (tctxt == ctxt + krcvqs[i])
10110                                tctxt = ctxt;
10111                }
10112                ctxt += krcvqs[i];
10113        }
10114        /* flush cached copies to chip */
10115        for (i = 0; i < NUM_MAP_REGS; i++)
10116                write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]);
10117        /* add rule0 */
10118        write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */,
10119                RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK
10120                        << RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
10121                2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
10122        write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */,
10123                LRH_BTH_MATCH_OFFSET
10124                        << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
10125                LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
10126                LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
10127                ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
10128                QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
10129                ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
10130        write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */,
10131                LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
10132                LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
10133                LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
10134                LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
10135        /* Enable RSM */
10136        add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
10137        kfree(rsmmap);
10138        /* map everything else (non-VL15) to context 0 */
10139        init_qpmap_table(
10140                dd,
10141                0,
10142                0);
10143        dd->qos_shift = n + 1;
10144        return;
10145bail:
10146        dd->qos_shift = 1;
10147        init_qpmap_table(
10148                dd,
10149                dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0,
10150                dd->n_krcv_queues - 1);
10151}
10152
10153static void init_rxe(struct hfi1_devdata *dd)
10154{
10155        /* enable all receive errors */
10156        write_csr(dd, RCV_ERR_MASK, ~0ull);
10157        /* setup QPN map table - start where VL15 context leaves off */
10158        init_qos(
10159                dd,
10160                dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0);
10161        /*
10162         * make sure RcvCtrl.RcvWcb <= PCIe Device Control
10163         * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config
10164         * space, PciCfgCap2.MaxPayloadSize in HFI).  There is only one
10165         * invalid configuration: RcvCtrl.RcvWcb set to its max of 256 and
10166         * Max_PayLoad_Size set to its minimum of 128.
10167         *
10168         * Presently, RcvCtrl.RcvWcb is not modified from its default of 0
10169         * (64 bytes).  Max_Payload_Size is possibly modified upward in
10170         * tune_pcie_caps() which is called after this routine.
10171         */
10172}
10173
10174static void init_other(struct hfi1_devdata *dd)
10175{
10176        /* enable all CCE errors */
10177        write_csr(dd, CCE_ERR_MASK, ~0ull);
10178        /* enable *some* Misc errors */
10179        write_csr(dd, MISC_ERR_MASK, DRIVER_MISC_MASK);
10180        /* enable all DC errors, except LCB */
10181        write_csr(dd, DCC_ERR_FLG_EN, ~0ull);
10182        write_csr(dd, DC_DC8051_ERR_EN, ~0ull);
10183}
10184
10185/*
10186 * Fill out the given AU table using the given CU.  A CU is defined in terms
10187 * AUs.  The table is a an encoding: given the index, how many AUs does that
10188 * represent?
10189 *
10190 * NOTE: Assumes that the register layout is the same for the
10191 * local and remote tables.
10192 */
10193static void assign_cm_au_table(struct hfi1_devdata *dd, u32 cu,
10194                               u32 csr0to3, u32 csr4to7)
10195{
10196        write_csr(dd, csr0to3,
10197                   0ull <<
10198                        SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT
10199                |  1ull <<
10200                        SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT
10201                |  2ull * cu <<
10202                        SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT
10203                |  4ull * cu <<
10204                        SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
10205        write_csr(dd, csr4to7,
10206                   8ull * cu <<
10207                        SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT
10208                | 16ull * cu <<
10209                        SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT
10210                | 32ull * cu <<
10211                        SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT
10212                | 64ull * cu <<
10213                        SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
10214
10215}
10216
10217static void assign_local_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10218{
10219        assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_LOCAL_AU_TABLE0_TO3,
10220                                        SEND_CM_LOCAL_AU_TABLE4_TO7);
10221}
10222
10223void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10224{
10225        assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_REMOTE_AU_TABLE0_TO3,
10226                                        SEND_CM_REMOTE_AU_TABLE4_TO7);
10227}
10228
10229static void init_txe(struct hfi1_devdata *dd)
10230{
10231        int i;
10232
10233        /* enable all PIO, SDMA, general, and Egress errors */
10234        write_csr(dd, SEND_PIO_ERR_MASK, ~0ull);
10235        write_csr(dd, SEND_DMA_ERR_MASK, ~0ull);
10236        write_csr(dd, SEND_ERR_MASK, ~0ull);
10237        write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull);
10238
10239        /* enable all per-context and per-SDMA engine errors */
10240        for (i = 0; i < dd->chip_send_contexts; i++)
10241                write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull);
10242        for (i = 0; i < dd->chip_sdma_engines; i++)
10243                write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull);
10244
10245        /* set the local CU to AU mapping */
10246        assign_local_cm_au_table(dd, dd->vcu);
10247
10248        /*
10249         * Set reasonable default for Credit Return Timer
10250         * Don't set on Simulator - causes it to choke.
10251         */
10252        if (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)
10253                write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE);
10254}
10255
10256int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey)
10257{
10258        struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10259        unsigned sctxt;
10260        int ret = 0;
10261        u64 reg;
10262
10263        if (!rcd || !rcd->sc) {
10264                ret = -EINVAL;
10265                goto done;
10266        }
10267        sctxt = rcd->sc->hw_context;
10268        reg = SEND_CTXT_CHECK_JOB_KEY_MASK_SMASK | /* mask is always 1's */
10269                ((jkey & SEND_CTXT_CHECK_JOB_KEY_VALUE_MASK) <<
10270                 SEND_CTXT_CHECK_JOB_KEY_VALUE_SHIFT);
10271        /* JOB_KEY_ALLOW_PERMISSIVE is not allowed by default */
10272        if (HFI1_CAP_KGET_MASK(rcd->flags, ALLOW_PERM_JKEY))
10273                reg |= SEND_CTXT_CHECK_JOB_KEY_ALLOW_PERMISSIVE_SMASK;
10274        write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg);
10275        /*
10276         * Enable send-side J_KEY integrity check, unless this is A0 h/w
10277         * (due to A0 erratum).
10278         */
10279        if (!is_a0(dd)) {
10280                reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10281                reg |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10282                write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10283        }
10284
10285        /* Enable J_KEY check on receive context. */
10286        reg = RCV_KEY_CTRL_JOB_KEY_ENABLE_SMASK |
10287                ((jkey & RCV_KEY_CTRL_JOB_KEY_VALUE_MASK) <<
10288                 RCV_KEY_CTRL_JOB_KEY_VALUE_SHIFT);
10289        write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, reg);
10290done:
10291        return ret;
10292}
10293
10294int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt)
10295{
10296        struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10297        unsigned sctxt;
10298        int ret = 0;
10299        u64 reg;
10300
10301        if (!rcd || !rcd->sc) {
10302                ret = -EINVAL;
10303                goto done;
10304        }
10305        sctxt = rcd->sc->hw_context;
10306        write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, 0);
10307        /*
10308         * Disable send-side J_KEY integrity check, unless this is A0 h/w.
10309         * This check would not have been enabled for A0 h/w, see
10310         * set_ctxt_jkey().
10311         */
10312        if (!is_a0(dd)) {
10313                reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10314                reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10315                write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10316        }
10317        /* Turn off the J_KEY on the receive side */
10318        write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, 0);
10319done:
10320        return ret;
10321}
10322
10323int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey)
10324{
10325        struct hfi1_ctxtdata *rcd;
10326        unsigned sctxt;
10327        int ret = 0;
10328        u64 reg;
10329
10330        if (ctxt < dd->num_rcv_contexts)
10331                rcd = dd->rcd[ctxt];
10332        else {
10333                ret = -EINVAL;
10334                goto done;
10335        }
10336        if (!rcd || !rcd->sc) {
10337                ret = -EINVAL;
10338                goto done;
10339        }
10340        sctxt = rcd->sc->hw_context;
10341        reg = ((u64)pkey & SEND_CTXT_CHECK_PARTITION_KEY_VALUE_MASK) <<
10342                SEND_CTXT_CHECK_PARTITION_KEY_VALUE_SHIFT;
10343        write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg);
10344        reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10345        reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10346        write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10347done:
10348        return ret;
10349}
10350
10351int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt)
10352{
10353        struct hfi1_ctxtdata *rcd;
10354        unsigned sctxt;
10355        int ret = 0;
10356        u64 reg;
10357
10358        if (ctxt < dd->num_rcv_contexts)
10359                rcd = dd->rcd[ctxt];
10360        else {
10361                ret = -EINVAL;
10362                goto done;
10363        }
10364        if (!rcd || !rcd->sc) {
10365                ret = -EINVAL;
10366                goto done;
10367        }
10368        sctxt = rcd->sc->hw_context;
10369        reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10370        reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10371        write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10372        write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
10373done:
10374        return ret;
10375}
10376
10377/*
10378 * Start doing the clean up the the chip. Our clean up happens in multiple
10379 * stages and this is just the first.
10380 */
10381void hfi1_start_cleanup(struct hfi1_devdata *dd)
10382{
10383        free_cntrs(dd);
10384        free_rcverr(dd);
10385        clean_up_interrupts(dd);
10386}
10387
10388#define HFI_BASE_GUID(dev) \
10389        ((dev)->base_guid & ~(1ULL << GUID_HFI_INDEX_SHIFT))
10390
10391/*
10392 * Certain chip functions need to be initialized only once per asic
10393 * instead of per-device. This function finds the peer device and
10394 * checks whether that chip initialization needs to be done by this
10395 * device.
10396 */
10397static void asic_should_init(struct hfi1_devdata *dd)
10398{
10399        unsigned long flags;
10400        struct hfi1_devdata *tmp, *peer = NULL;
10401
10402        spin_lock_irqsave(&hfi1_devs_lock, flags);
10403        /* Find our peer device */
10404        list_for_each_entry(tmp, &hfi1_dev_list, list) {
10405                if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(tmp)) &&
10406                    dd->unit != tmp->unit) {
10407                        peer = tmp;
10408                        break;
10409                }
10410        }
10411
10412        /*
10413         * "Claim" the ASIC for initialization if it hasn't been
10414         " "claimed" yet.
10415         */
10416        if (!peer || !(peer->flags & HFI1_DO_INIT_ASIC))
10417                dd->flags |= HFI1_DO_INIT_ASIC;
10418        spin_unlock_irqrestore(&hfi1_devs_lock, flags);
10419}
10420
10421/**
10422 * Allocate and initialize the device structure for the hfi.
10423 * @dev: the pci_dev for hfi1_ib device
10424 * @ent: pci_device_id struct for this dev
10425 *
10426 * Also allocates, initializes, and returns the devdata struct for this
10427 * device instance
10428 *
10429 * This is global, and is called directly at init to set up the
10430 * chip-specific function pointers for later use.
10431 */
10432struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
10433                                  const struct pci_device_id *ent)
10434{
10435        struct hfi1_devdata *dd;
10436        struct hfi1_pportdata *ppd;
10437        u64 reg;
10438        int i, ret;
10439        static const char * const inames[] = { /* implementation names */
10440                "RTL silicon",
10441                "RTL VCS simulation",
10442                "RTL FPGA emulation",
10443                "Functional simulator"
10444        };
10445
10446        dd = hfi1_alloc_devdata(pdev,
10447                NUM_IB_PORTS * sizeof(struct hfi1_pportdata));
10448        if (IS_ERR(dd))
10449                goto bail;
10450        ppd = dd->pport;
10451        for (i = 0; i < dd->num_pports; i++, ppd++) {
10452                int vl;
10453                /* init common fields */
10454                hfi1_init_pportdata(pdev, ppd, dd, 0, 1);
10455                /* DC supports 4 link widths */
10456                ppd->link_width_supported =
10457                        OPA_LINK_WIDTH_1X | OPA_LINK_WIDTH_2X |
10458                        OPA_LINK_WIDTH_3X | OPA_LINK_WIDTH_4X;
10459                ppd->link_width_downgrade_supported =
10460                        ppd->link_width_supported;
10461                /* start out enabling only 4X */
10462                ppd->link_width_enabled = OPA_LINK_WIDTH_4X;
10463                ppd->link_width_downgrade_enabled =
10464                                        ppd->link_width_downgrade_supported;
10465                /* link width active is 0 when link is down */
10466                /* link width downgrade active is 0 when link is down */
10467
10468                if (num_vls < HFI1_MIN_VLS_SUPPORTED
10469                        || num_vls > HFI1_MAX_VLS_SUPPORTED) {
10470                        hfi1_early_err(&pdev->dev,
10471                                       "Invalid num_vls %u, using %u VLs\n",
10472                                    num_vls, HFI1_MAX_VLS_SUPPORTED);
10473                        num_vls = HFI1_MAX_VLS_SUPPORTED;
10474                }
10475                ppd->vls_supported = num_vls;
10476                ppd->vls_operational = ppd->vls_supported;
10477                /* Set the default MTU. */
10478                for (vl = 0; vl < num_vls; vl++)
10479                        dd->vld[vl].mtu = hfi1_max_mtu;
10480                dd->vld[15].mtu = MAX_MAD_PACKET;
10481                /*
10482                 * Set the initial values to reasonable default, will be set
10483                 * for real when link is up.
10484                 */
10485                ppd->lstate = IB_PORT_DOWN;
10486                ppd->overrun_threshold = 0x4;
10487                ppd->phy_error_threshold = 0xf;
10488                ppd->port_crc_mode_enabled = link_crc_mask;
10489                /* initialize supported LTP CRC mode */
10490                ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
10491                /* initialize enabled LTP CRC mode */
10492                ppd->port_ltp_crc_mode |= cap_to_port_ltp(link_crc_mask) << 4;
10493                /* start in offline */
10494                ppd->host_link_state = HLS_DN_OFFLINE;
10495                init_vl_arb_caches(ppd);
10496        }
10497
10498        dd->link_default = HLS_DN_POLL;
10499
10500        /*
10501         * Do remaining PCIe setup and save PCIe values in dd.
10502         * Any error printing is already done by the init code.
10503         * On return, we have the chip mapped.
10504         */
10505        ret = hfi1_pcie_ddinit(dd, pdev, ent);
10506        if (ret < 0)
10507                goto bail_free;
10508
10509        /* verify that reads actually work, save revision for reset check */
10510        dd->revision = read_csr(dd, CCE_REVISION);
10511        if (dd->revision == ~(u64)0) {
10512                dd_dev_err(dd, "cannot read chip CSRs\n");
10513                ret = -EINVAL;
10514                goto bail_cleanup;
10515        }
10516        dd->majrev = (dd->revision >> CCE_REVISION_CHIP_REV_MAJOR_SHIFT)
10517                        & CCE_REVISION_CHIP_REV_MAJOR_MASK;
10518        dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
10519                        & CCE_REVISION_CHIP_REV_MINOR_MASK;
10520
10521        /* obtain the hardware ID - NOT related to unit, which is a
10522           software enumeration */
10523        reg = read_csr(dd, CCE_REVISION2);
10524        dd->hfi1_id = (reg >> CCE_REVISION2_HFI_ID_SHIFT)
10525                                        & CCE_REVISION2_HFI_ID_MASK;
10526        /* the variable size will remove unwanted bits */
10527        dd->icode = reg >> CCE_REVISION2_IMPL_CODE_SHIFT;
10528        dd->irev = reg >> CCE_REVISION2_IMPL_REVISION_SHIFT;
10529        dd_dev_info(dd, "Implementation: %s, revision 0x%x\n",
10530                dd->icode < ARRAY_SIZE(inames) ? inames[dd->icode] : "unknown",
10531                (int)dd->irev);
10532
10533        /* speeds the hardware can support */
10534        dd->pport->link_speed_supported = OPA_LINK_SPEED_25G;
10535        /* speeds allowed to run at */
10536        dd->pport->link_speed_enabled = dd->pport->link_speed_supported;
10537        /* give a reasonable active value, will be set on link up */
10538        dd->pport->link_speed_active = OPA_LINK_SPEED_25G;
10539
10540        dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS);
10541        dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS);
10542        dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES);
10543        dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE);
10544        dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE);
10545        /* fix up link widths for emulation _p */
10546        ppd = dd->pport;
10547        if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) {
10548                ppd->link_width_supported =
10549                        ppd->link_width_enabled =
10550                        ppd->link_width_downgrade_supported =
10551                        ppd->link_width_downgrade_enabled =
10552                                OPA_LINK_WIDTH_1X;
10553        }
10554        /* insure num_vls isn't larger than number of sdma engines */
10555        if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) {
10556                dd_dev_err(dd, "num_vls %u too large, using %u VLs\n",
10557                                num_vls, HFI1_MAX_VLS_SUPPORTED);
10558                ppd->vls_supported = num_vls = HFI1_MAX_VLS_SUPPORTED;
10559                ppd->vls_operational = ppd->vls_supported;
10560        }
10561
10562        /*
10563         * Convert the ns parameter to the 64 * cclocks used in the CSR.
10564         * Limit the max if larger than the field holds.  If timeout is
10565         * non-zero, then the calculated field will be at least 1.
10566         *
10567         * Must be after icode is set up - the cclock rate depends
10568         * on knowing the hardware being used.
10569         */
10570        dd->rcv_intr_timeout_csr = ns_to_cclock(dd, rcv_intr_timeout) / 64;
10571        if (dd->rcv_intr_timeout_csr >
10572                        RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK)
10573                dd->rcv_intr_timeout_csr =
10574                        RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK;
10575        else if (dd->rcv_intr_timeout_csr == 0 && rcv_intr_timeout)
10576                dd->rcv_intr_timeout_csr = 1;
10577
10578        /* needs to be done before we look for the peer device */
10579        read_guid(dd);
10580
10581        /* should this device init the ASIC block? */
10582        asic_should_init(dd);
10583
10584        /* obtain chip sizes, reset chip CSRs */
10585        init_chip(dd);
10586
10587        /* read in the PCIe link speed information */
10588        ret = pcie_speeds(dd);
10589        if (ret)
10590                goto bail_cleanup;
10591
10592        /* read in firmware */
10593        ret = hfi1_firmware_init(dd);
10594        if (ret)
10595                goto bail_cleanup;
10596
10597        /*
10598         * In general, the PCIe Gen3 transition must occur after the
10599         * chip has been idled (so it won't initiate any PCIe transactions
10600         * e.g. an interrupt) and before the driver changes any registers
10601         * (the transition will reset the registers).
10602         *
10603         * In particular, place this call after:
10604         * - init_chip()     - the chip will not initiate any PCIe transactions
10605         * - pcie_speeds()   - reads the current link speed
10606         * - hfi1_firmware_init() - the needed firmware is ready to be
10607         *                          downloaded
10608         */
10609        ret = do_pcie_gen3_transition(dd);
10610        if (ret)
10611                goto bail_cleanup;
10612
10613        /* start setting dd values and adjusting CSRs */
10614        init_early_variables(dd);
10615
10616        parse_platform_config(dd);
10617
10618        /* add board names as they are defined */
10619        dd->boardname = kmalloc(64, GFP_KERNEL);
10620        if (!dd->boardname)
10621                goto bail_cleanup;
10622        snprintf(dd->boardname, 64, "Board ID 0x%llx",
10623                 dd->revision >> CCE_REVISION_BOARD_ID_LOWER_NIBBLE_SHIFT
10624                    & CCE_REVISION_BOARD_ID_LOWER_NIBBLE_MASK);
10625
10626        snprintf(dd->boardversion, BOARD_VERS_MAX,
10627                 "ChipABI %u.%u, %s, ChipRev %u.%u, SW Compat %llu\n",
10628                 HFI1_CHIP_VERS_MAJ, HFI1_CHIP_VERS_MIN,
10629                 dd->boardname,
10630                 (u32)dd->majrev,
10631                 (u32)dd->minrev,
10632                 (dd->revision >> CCE_REVISION_SW_SHIFT)
10633                    & CCE_REVISION_SW_MASK);
10634
10635        ret = set_up_context_variables(dd);
10636        if (ret)
10637                goto bail_cleanup;
10638
10639        /* set initial RXE CSRs */
10640        init_rxe(dd);
10641        /* set initial TXE CSRs */
10642        init_txe(dd);
10643        /* set initial non-RXE, non-TXE CSRs */
10644        init_other(dd);
10645        /* set up KDETH QP prefix in both RX and TX CSRs */
10646        init_kdeth_qp(dd);
10647
10648        /* send contexts must be set up before receive contexts */
10649        ret = init_send_contexts(dd);
10650        if (ret)
10651                goto bail_cleanup;
10652
10653        ret = hfi1_create_ctxts(dd);
10654        if (ret)
10655                goto bail_cleanup;
10656
10657        dd->rcvhdrsize = DEFAULT_RCVHDRSIZE;
10658        /*
10659         * rcd[0] is guaranteed to be valid by this point. Also, all
10660         * context are using the same value, as per the module parameter.
10661         */
10662        dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
10663
10664        ret = init_pervl_scs(dd);
10665        if (ret)
10666                goto bail_cleanup;
10667
10668        /* sdma init */
10669        for (i = 0; i < dd->num_pports; ++i) {
10670                ret = sdma_init(dd, i);
10671                if (ret)
10672                        goto bail_cleanup;
10673        }
10674
10675        /* use contexts created by hfi1_create_ctxts */
10676        ret = set_up_interrupts(dd);
10677        if (ret)
10678                goto bail_cleanup;
10679
10680        /* set up LCB access - must be after set_up_interrupts() */
10681        init_lcb_access(dd);
10682
10683        snprintf(dd->serial, SERIAL_MAX, "0x%08llx\n",
10684                 dd->base_guid & 0xFFFFFF);
10685
10686        dd->oui1 = dd->base_guid >> 56 & 0xFF;
10687        dd->oui2 = dd->base_guid >> 48 & 0xFF;
10688        dd->oui3 = dd->base_guid >> 40 & 0xFF;
10689
10690        ret = load_firmware(dd); /* asymmetric with dispose_firmware() */
10691        if (ret)
10692                goto bail_clear_intr;
10693        check_fabric_firmware_versions(dd);
10694
10695        thermal_init(dd);
10696
10697        ret = init_cntrs(dd);
10698        if (ret)
10699                goto bail_clear_intr;
10700
10701        ret = init_rcverr(dd);
10702        if (ret)
10703                goto bail_free_cntrs;
10704
10705        ret = eprom_init(dd);
10706        if (ret)
10707                goto bail_free_rcverr;
10708
10709        goto bail;
10710
10711bail_free_rcverr:
10712        free_rcverr(dd);
10713bail_free_cntrs:
10714        free_cntrs(dd);
10715bail_clear_intr:
10716        clean_up_interrupts(dd);
10717bail_cleanup:
10718        hfi1_pcie_ddcleanup(dd);
10719bail_free:
10720        hfi1_free_devdata(dd);
10721        dd = ERR_PTR(ret);
10722bail:
10723        return dd;
10724}
10725
10726static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
10727                        u32 dw_len)
10728{
10729        u32 delta_cycles;
10730        u32 current_egress_rate = ppd->current_egress_rate;
10731        /* rates here are in units of 10^6 bits/sec */
10732
10733        if (desired_egress_rate == -1)
10734                return 0; /* shouldn't happen */
10735
10736        if (desired_egress_rate >= current_egress_rate)
10737                return 0; /* we can't help go faster, only slower */
10738
10739        delta_cycles = egress_cycles(dw_len * 4, desired_egress_rate) -
10740                        egress_cycles(dw_len * 4, current_egress_rate);
10741
10742        return (u16)delta_cycles;
10743}
10744
10745
10746/**
10747 * create_pbc - build a pbc for transmission
10748 * @flags: special case flags or-ed in built pbc
10749 * @srate: static rate
10750 * @vl: vl
10751 * @dwlen: dword length (header words + data words + pbc words)
10752 *
10753 * Create a PBC with the given flags, rate, VL, and length.
10754 *
10755 * NOTE: The PBC created will not insert any HCRC - all callers but one are
10756 * for verbs, which does not use this PSM feature.  The lone other caller
10757 * is for the diagnostic interface which calls this if the user does not
10758 * supply their own PBC.
10759 */
10760u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
10761               u32 dw_len)
10762{
10763        u64 pbc, delay = 0;
10764
10765        if (unlikely(srate_mbs))
10766                delay = delay_cycles(ppd, srate_mbs, dw_len);
10767
10768        pbc = flags
10769                | (delay << PBC_STATIC_RATE_CONTROL_COUNT_SHIFT)
10770                | ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
10771                | (vl & PBC_VL_MASK) << PBC_VL_SHIFT
10772                | (dw_len & PBC_LENGTH_DWS_MASK)
10773                        << PBC_LENGTH_DWS_SHIFT;
10774
10775        return pbc;
10776}
10777
10778#define SBUS_THERMAL    0x4f
10779#define SBUS_THERM_MONITOR_MODE 0x1
10780
10781#define THERM_FAILURE(dev, ret, reason) \
10782        dd_dev_err((dd),                                                \
10783                   "Thermal sensor initialization failed: %s (%d)\n",   \
10784                   (reason), (ret))
10785
10786/*
10787 * Initialize the Avago Thermal sensor.
10788 *
10789 * After initialization, enable polling of thermal sensor through
10790 * SBus interface. In order for this to work, the SBus Master
10791 * firmware has to be loaded due to the fact that the HW polling
10792 * logic uses SBus interrupts, which are not supported with
10793 * default firmware. Otherwise, no data will be returned through
10794 * the ASIC_STS_THERM CSR.
10795 */
10796static int thermal_init(struct hfi1_devdata *dd)
10797{
10798        int ret = 0;
10799
10800        if (dd->icode != ICODE_RTL_SILICON ||
10801            !(dd->flags & HFI1_DO_INIT_ASIC))
10802                return ret;
10803
10804        acquire_hw_mutex(dd);
10805        dd_dev_info(dd, "Initializing thermal sensor\n");
10806
10807        /* Thermal Sensor Initialization */
10808        /*    Step 1: Reset the Thermal SBus Receiver */
10809        ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10810                                RESET_SBUS_RECEIVER, 0);
10811        if (ret) {
10812                THERM_FAILURE(dd, ret, "Bus Reset");
10813                goto done;
10814        }
10815        /*    Step 2: Set Reset bit in Thermal block */
10816        ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10817                                WRITE_SBUS_RECEIVER, 0x1);
10818        if (ret) {
10819                THERM_FAILURE(dd, ret, "Therm Block Reset");
10820                goto done;
10821        }
10822        /*    Step 3: Write clock divider value (100MHz -> 2MHz) */
10823        ret = sbus_request_slow(dd, SBUS_THERMAL, 0x1,
10824                                WRITE_SBUS_RECEIVER, 0x32);
10825        if (ret) {
10826                THERM_FAILURE(dd, ret, "Write Clock Div");
10827                goto done;
10828        }
10829        /*    Step 4: Select temperature mode */
10830        ret = sbus_request_slow(dd, SBUS_THERMAL, 0x3,
10831                                WRITE_SBUS_RECEIVER,
10832                                SBUS_THERM_MONITOR_MODE);
10833        if (ret) {
10834                THERM_FAILURE(dd, ret, "Write Mode Sel");
10835                goto done;
10836        }
10837        /*    Step 5: De-assert block reset and start conversion */
10838        ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10839                                WRITE_SBUS_RECEIVER, 0x2);
10840        if (ret) {
10841                THERM_FAILURE(dd, ret, "Write Reset Deassert");
10842                goto done;
10843        }
10844        /*    Step 5.1: Wait for first conversion (21.5ms per spec) */
10845        msleep(22);
10846
10847        /* Enable polling of thermal readings */
10848        write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1);
10849done:
10850        release_hw_mutex(dd);
10851        return ret;
10852}
10853
10854static void handle_temp_err(struct hfi1_devdata *dd)
10855{
10856        struct hfi1_pportdata *ppd = &dd->pport[0];
10857        /*
10858         * Thermal Critical Interrupt
10859         * Put the device into forced freeze mode, take link down to
10860         * offline, and put DC into reset.
10861         */
10862        dd_dev_emerg(dd,
10863                     "Critical temperature reached! Forcing device into freeze mode!\n");
10864        dd->flags |= HFI1_FORCED_FREEZE;
10865        start_freeze_handling(ppd, FREEZE_SELF|FREEZE_ABORT);
10866        /*
10867         * Shut DC down as much and as quickly as possible.
10868         *
10869         * Step 1: Take the link down to OFFLINE. This will cause the
10870         *         8051 to put the Serdes in reset. However, we don't want to
10871         *         go through the entire link state machine since we want to
10872         *         shutdown ASAP. Furthermore, this is not a graceful shutdown
10873         *         but rather an attempt to save the chip.
10874         *         Code below is almost the same as quiet_serdes() but avoids
10875         *         all the extra work and the sleeps.
10876         */
10877        ppd->driver_link_ready = 0;
10878        ppd->link_enabled = 0;
10879        set_physical_link_state(dd, PLS_OFFLINE |
10880                                (OPA_LINKDOWN_REASON_SMA_DISABLED << 8));
10881        /*
10882         * Step 2: Shutdown LCB and 8051
10883         *         After shutdown, do not restore DC_CFG_RESET value.
10884         */
10885        dc_shutdown(dd);
10886}
10887