linux/drivers/edac/i7core_edac.c
<<
>>
Prefs
   1/* Intel i7 core/Nehalem Memory Controller kernel module
   2 *
   3 * This driver supports the memory controllers found on the Intel
   4 * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
   5 * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
   6 * and Westmere-EP.
   7 *
   8 * This file may be distributed under the terms of the
   9 * GNU General Public License version 2 only.
  10 *
  11 * Copyright (c) 2009-2010 by:
  12 *       Mauro Carvalho Chehab
  13 *
  14 * Red Hat Inc. http://www.redhat.com
  15 *
  16 * Forked and adapted from the i5400_edac driver
  17 *
  18 * Based on the following public Intel datasheets:
  19 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
  20 * Datasheet, Volume 2:
  21 *      http://download.intel.com/design/processor/datashts/320835.pdf
  22 * Intel Xeon Processor 5500 Series Datasheet Volume 2
  23 *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
  24 * also available at:
  25 *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
  26 */
  27
  28#include <linux/module.h>
  29#include <linux/init.h>
  30#include <linux/pci.h>
  31#include <linux/pci_ids.h>
  32#include <linux/slab.h>
  33#include <linux/delay.h>
  34#include <linux/dmi.h>
  35#include <linux/edac.h>
  36#include <linux/mmzone.h>
  37#include <linux/smp.h>
  38#include <asm/mce.h>
  39#include <asm/processor.h>
  40#include <asm/div64.h>
  41
  42#include "edac_module.h"
  43
  44/* Static vars */
  45static LIST_HEAD(i7core_edac_list);
  46static DEFINE_MUTEX(i7core_edac_lock);
  47static int probed;
  48
  49static int use_pci_fixup;
  50module_param(use_pci_fixup, int, 0444);
  51MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
  52/*
  53 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
  54 * registers start at bus 255, and are not reported by BIOS.
  55 * We currently find devices with only 2 sockets. In order to support more QPI
  56 * Quick Path Interconnect, just increment this number.
  57 */
  58#define MAX_SOCKET_BUSES        2
  59
  60
  61/*
  62 * Alter this version for the module when modifications are made
  63 */
  64#define I7CORE_REVISION    " Ver: 1.0.0"
  65#define EDAC_MOD_STR      "i7core_edac"
  66
  67/*
  68 * Debug macros
  69 */
  70#define i7core_printk(level, fmt, arg...)                       \
  71        edac_printk(level, "i7core", fmt, ##arg)
  72
  73#define i7core_mc_printk(mci, level, fmt, arg...)               \
  74        edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
  75
  76/*
  77 * i7core Memory Controller Registers
  78 */
  79
  80        /* OFFSETS for Device 0 Function 0 */
  81
  82#define MC_CFG_CONTROL  0x90
  83  #define MC_CFG_UNLOCK         0x02
  84  #define MC_CFG_LOCK           0x00
  85
  86        /* OFFSETS for Device 3 Function 0 */
  87
  88#define MC_CONTROL      0x48
  89#define MC_STATUS       0x4c
  90#define MC_MAX_DOD      0x64
  91
  92/*
  93 * OFFSETS for Device 3 Function 4, as indicated on Xeon 5500 datasheet:
  94 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
  95 */
  96
  97#define MC_TEST_ERR_RCV1        0x60
  98  #define DIMM2_COR_ERR(r)                      ((r) & 0x7fff)
  99
 100#define MC_TEST_ERR_RCV0        0x64
 101  #define DIMM1_COR_ERR(r)                      (((r) >> 16) & 0x7fff)
 102  #define DIMM0_COR_ERR(r)                      ((r) & 0x7fff)
 103
 104/* OFFSETS for Device 3 Function 2, as indicated on Xeon 5500 datasheet */
 105#define MC_SSRCONTROL           0x48
 106  #define SSR_MODE_DISABLE      0x00
 107  #define SSR_MODE_ENABLE       0x01
 108  #define SSR_MODE_MASK         0x03
 109
 110#define MC_SCRUB_CONTROL        0x4c
 111  #define STARTSCRUB            (1 << 24)
 112  #define SCRUBINTERVAL_MASK    0xffffff
 113
 114#define MC_COR_ECC_CNT_0        0x80
 115#define MC_COR_ECC_CNT_1        0x84
 116#define MC_COR_ECC_CNT_2        0x88
 117#define MC_COR_ECC_CNT_3        0x8c
 118#define MC_COR_ECC_CNT_4        0x90
 119#define MC_COR_ECC_CNT_5        0x94
 120
 121#define DIMM_TOP_COR_ERR(r)                     (((r) >> 16) & 0x7fff)
 122#define DIMM_BOT_COR_ERR(r)                     ((r) & 0x7fff)
 123
 124
 125        /* OFFSETS for Devices 4,5 and 6 Function 0 */
 126
 127#define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
 128  #define THREE_DIMMS_PRESENT           (1 << 24)
 129  #define SINGLE_QUAD_RANK_PRESENT      (1 << 23)
 130  #define QUAD_RANK_PRESENT             (1 << 22)
 131  #define REGISTERED_DIMM               (1 << 15)
 132
 133#define MC_CHANNEL_MAPPER       0x60
 134  #define RDLCH(r, ch)          ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
 135  #define WRLCH(r, ch)          ((((r) >> (ch * 6)) & 0x07) - 1)
 136
 137#define MC_CHANNEL_RANK_PRESENT 0x7c
 138  #define RANK_PRESENT_MASK             0xffff
 139
 140#define MC_CHANNEL_ADDR_MATCH   0xf0
 141#define MC_CHANNEL_ERROR_MASK   0xf8
 142#define MC_CHANNEL_ERROR_INJECT 0xfc
 143  #define INJECT_ADDR_PARITY    0x10
 144  #define INJECT_ECC            0x08
 145  #define MASK_CACHELINE        0x06
 146  #define MASK_FULL_CACHELINE   0x06
 147  #define MASK_MSB32_CACHELINE  0x04
 148  #define MASK_LSB32_CACHELINE  0x02
 149  #define NO_MASK_CACHELINE     0x00
 150  #define REPEAT_EN             0x01
 151
 152        /* OFFSETS for Devices 4,5 and 6 Function 1 */
 153
 154#define MC_DOD_CH_DIMM0         0x48
 155#define MC_DOD_CH_DIMM1         0x4c
 156#define MC_DOD_CH_DIMM2         0x50
 157  #define RANKOFFSET_MASK       ((1 << 12) | (1 << 11) | (1 << 10))
 158  #define RANKOFFSET(x)         ((x & RANKOFFSET_MASK) >> 10)
 159  #define DIMM_PRESENT_MASK     (1 << 9)
 160  #define DIMM_PRESENT(x)       (((x) & DIMM_PRESENT_MASK) >> 9)
 161  #define MC_DOD_NUMBANK_MASK           ((1 << 8) | (1 << 7))
 162  #define MC_DOD_NUMBANK(x)             (((x) & MC_DOD_NUMBANK_MASK) >> 7)
 163  #define MC_DOD_NUMRANK_MASK           ((1 << 6) | (1 << 5))
 164  #define MC_DOD_NUMRANK(x)             (((x) & MC_DOD_NUMRANK_MASK) >> 5)
 165  #define MC_DOD_NUMROW_MASK            ((1 << 4) | (1 << 3) | (1 << 2))
 166  #define MC_DOD_NUMROW(x)              (((x) & MC_DOD_NUMROW_MASK) >> 2)
 167  #define MC_DOD_NUMCOL_MASK            3
 168  #define MC_DOD_NUMCOL(x)              ((x) & MC_DOD_NUMCOL_MASK)
 169
 170#define MC_RANK_PRESENT         0x7c
 171
 172#define MC_SAG_CH_0     0x80
 173#define MC_SAG_CH_1     0x84
 174#define MC_SAG_CH_2     0x88
 175#define MC_SAG_CH_3     0x8c
 176#define MC_SAG_CH_4     0x90
 177#define MC_SAG_CH_5     0x94
 178#define MC_SAG_CH_6     0x98
 179#define MC_SAG_CH_7     0x9c
 180
 181#define MC_RIR_LIMIT_CH_0       0x40
 182#define MC_RIR_LIMIT_CH_1       0x44
 183#define MC_RIR_LIMIT_CH_2       0x48
 184#define MC_RIR_LIMIT_CH_3       0x4C
 185#define MC_RIR_LIMIT_CH_4       0x50
 186#define MC_RIR_LIMIT_CH_5       0x54
 187#define MC_RIR_LIMIT_CH_6       0x58
 188#define MC_RIR_LIMIT_CH_7       0x5C
 189#define MC_RIR_LIMIT_MASK       ((1 << 10) - 1)
 190
 191#define MC_RIR_WAY_CH           0x80
 192  #define MC_RIR_WAY_OFFSET_MASK        (((1 << 14) - 1) & ~0x7)
 193  #define MC_RIR_WAY_RANK_MASK          0x7
 194
 195/*
 196 * i7core structs
 197 */
 198
 199#define NUM_CHANS 3
 200#define MAX_DIMMS 3             /* Max DIMMS per channel */
 201#define MAX_MCR_FUNC  4
 202#define MAX_CHAN_FUNC 3
 203
 204struct i7core_info {
 205        u32     mc_control;
 206        u32     mc_status;
 207        u32     max_dod;
 208        u32     ch_map;
 209};
 210
 211
 212struct i7core_inject {
 213        int     enable;
 214
 215        u32     section;
 216        u32     type;
 217        u32     eccmask;
 218
 219        /* Error address mask */
 220        int channel, dimm, rank, bank, page, col;
 221};
 222
 223struct i7core_channel {
 224        bool            is_3dimms_present;
 225        bool            is_single_4rank;
 226        bool            has_4rank;
 227        u32             dimms;
 228};
 229
 230struct pci_id_descr {
 231        int                     dev;
 232        int                     func;
 233        int                     dev_id;
 234        int                     optional;
 235};
 236
 237struct pci_id_table {
 238        const struct pci_id_descr       *descr;
 239        int                             n_devs;
 240};
 241
 242struct i7core_dev {
 243        struct list_head        list;
 244        u8                      socket;
 245        struct pci_dev          **pdev;
 246        int                     n_devs;
 247        struct mem_ctl_info     *mci;
 248};
 249
 250struct i7core_pvt {
 251        struct device *addrmatch_dev, *chancounts_dev;
 252
 253        struct pci_dev  *pci_noncore;
 254        struct pci_dev  *pci_mcr[MAX_MCR_FUNC + 1];
 255        struct pci_dev  *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
 256
 257        struct i7core_dev *i7core_dev;
 258
 259        struct i7core_info      info;
 260        struct i7core_inject    inject;
 261        struct i7core_channel   channel[NUM_CHANS];
 262
 263        int             ce_count_available;
 264
 265                        /* ECC corrected errors counts per udimm */
 266        unsigned long   udimm_ce_count[MAX_DIMMS];
 267        int             udimm_last_ce_count[MAX_DIMMS];
 268                        /* ECC corrected errors counts per rdimm */
 269        unsigned long   rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
 270        int             rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
 271
 272        bool            is_registered, enable_scrub;
 273
 274        /* DCLK Frequency used for computing scrub rate */
 275        int                     dclk_freq;
 276
 277        /* Struct to control EDAC polling */
 278        struct edac_pci_ctl_info *i7core_pci;
 279};
 280
 281#define PCI_DESCR(device, function, device_id)  \
 282        .dev = (device),                        \
 283        .func = (function),                     \
 284        .dev_id = (device_id)
 285
 286static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
 287                /* Memory controller */
 288        { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR)     },
 289        { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD)  },
 290                        /* Exists only for RDIMM */
 291        { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1  },
 292        { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
 293
 294                /* Channel 0 */
 295        { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
 296        { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
 297        { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
 298        { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC)   },
 299
 300                /* Channel 1 */
 301        { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
 302        { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
 303        { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
 304        { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC)   },
 305
 306                /* Channel 2 */
 307        { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
 308        { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
 309        { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
 310        { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC)   },
 311
 312                /* Generic Non-core registers */
 313        /*
 314         * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
 315         * On Xeon 55xx, however, it has a different id (8086:2c40). So,
 316         * the probing code needs to test for the other address in case of
 317         * failure of this one
 318         */
 319        { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE)  },
 320
 321};
 322
 323static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
 324        { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR)         },
 325        { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD)      },
 326        { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST)     },
 327
 328        { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
 329        { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
 330        { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
 331        { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC)   },
 332
 333        { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
 334        { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
 335        { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
 336        { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC)   },
 337
 338        /*
 339         * This is the PCI device has an alternate address on some
 340         * processors like Core i7 860
 341         */
 342        { PCI_DESCR( 0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE)     },
 343};
 344
 345static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
 346                /* Memory controller */
 347        { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2)     },
 348        { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2)  },
 349                        /* Exists only for RDIMM */
 350        { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1  },
 351        { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
 352
 353                /* Channel 0 */
 354        { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
 355        { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
 356        { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
 357        { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2)   },
 358
 359                /* Channel 1 */
 360        { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
 361        { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
 362        { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
 363        { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2)   },
 364
 365                /* Channel 2 */
 366        { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
 367        { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
 368        { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
 369        { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2)   },
 370
 371                /* Generic Non-core registers */
 372        { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2)  },
 373
 374};
 375
 376#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
 377static const struct pci_id_table pci_dev_table[] = {
 378        PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
 379        PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
 380        PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
 381        {0,}                    /* 0 terminated list. */
 382};
 383
 384/*
 385 *      pci_device_id   table for which devices we are looking for
 386 */
 387static const struct pci_device_id i7core_pci_tbl[] = {
 388        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
 389        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
 390        {0,}                    /* 0 terminated list. */
 391};
 392
 393/****************************************************************************
 394                        Ancillary status routines
 395 ****************************************************************************/
 396
 397        /* MC_CONTROL bits */
 398#define CH_ACTIVE(pvt, ch)      ((pvt)->info.mc_control & (1 << (8 + ch)))
 399#define ECCx8(pvt)              ((pvt)->info.mc_control & (1 << 1))
 400
 401        /* MC_STATUS bits */
 402#define ECC_ENABLED(pvt)        ((pvt)->info.mc_status & (1 << 4))
 403#define CH_DISABLED(pvt, ch)    ((pvt)->info.mc_status & (1 << ch))
 404
 405        /* MC_MAX_DOD read functions */
 406static inline int numdimms(u32 dimms)
 407{
 408        return (dimms & 0x3) + 1;
 409}
 410
 411static inline int numrank(u32 rank)
 412{
 413        static const int ranks[] = { 1, 2, 4, -EINVAL };
 414
 415        return ranks[rank & 0x3];
 416}
 417
 418static inline int numbank(u32 bank)
 419{
 420        static const int banks[] = { 4, 8, 16, -EINVAL };
 421
 422        return banks[bank & 0x3];
 423}
 424
 425static inline int numrow(u32 row)
 426{
 427        static const int rows[] = {
 428                1 << 12, 1 << 13, 1 << 14, 1 << 15,
 429                1 << 16, -EINVAL, -EINVAL, -EINVAL,
 430        };
 431
 432        return rows[row & 0x7];
 433}
 434
 435static inline int numcol(u32 col)
 436{
 437        static const int cols[] = {
 438                1 << 10, 1 << 11, 1 << 12, -EINVAL,
 439        };
 440        return cols[col & 0x3];
 441}
 442
 443static struct i7core_dev *get_i7core_dev(u8 socket)
 444{
 445        struct i7core_dev *i7core_dev;
 446
 447        list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
 448                if (i7core_dev->socket == socket)
 449                        return i7core_dev;
 450        }
 451
 452        return NULL;
 453}
 454
 455static struct i7core_dev *alloc_i7core_dev(u8 socket,
 456                                           const struct pci_id_table *table)
 457{
 458        struct i7core_dev *i7core_dev;
 459
 460        i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
 461        if (!i7core_dev)
 462                return NULL;
 463
 464        i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
 465                                   GFP_KERNEL);
 466        if (!i7core_dev->pdev) {
 467                kfree(i7core_dev);
 468                return NULL;
 469        }
 470
 471        i7core_dev->socket = socket;
 472        i7core_dev->n_devs = table->n_devs;
 473        list_add_tail(&i7core_dev->list, &i7core_edac_list);
 474
 475        return i7core_dev;
 476}
 477
 478static void free_i7core_dev(struct i7core_dev *i7core_dev)
 479{
 480        list_del(&i7core_dev->list);
 481        kfree(i7core_dev->pdev);
 482        kfree(i7core_dev);
 483}
 484
 485/****************************************************************************
 486                        Memory check routines
 487 ****************************************************************************/
 488
 489static int get_dimm_config(struct mem_ctl_info *mci)
 490{
 491        struct i7core_pvt *pvt = mci->pvt_info;
 492        struct pci_dev *pdev;
 493        int i, j;
 494        enum edac_type mode;
 495        enum mem_type mtype;
 496        struct dimm_info *dimm;
 497
 498        /* Get data from the MC register, function 0 */
 499        pdev = pvt->pci_mcr[0];
 500        if (!pdev)
 501                return -ENODEV;
 502
 503        /* Device 3 function 0 reads */
 504        pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
 505        pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
 506        pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
 507        pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
 508
 509        edac_dbg(0, "QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
 510                 pvt->i7core_dev->socket, pvt->info.mc_control,
 511                 pvt->info.mc_status, pvt->info.max_dod, pvt->info.ch_map);
 512
 513        if (ECC_ENABLED(pvt)) {
 514                edac_dbg(0, "ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
 515                if (ECCx8(pvt))
 516                        mode = EDAC_S8ECD8ED;
 517                else
 518                        mode = EDAC_S4ECD4ED;
 519        } else {
 520                edac_dbg(0, "ECC disabled\n");
 521                mode = EDAC_NONE;
 522        }
 523
 524        /* FIXME: need to handle the error codes */
 525        edac_dbg(0, "DOD Max limits: DIMMS: %d, %d-ranked, %d-banked x%x x 0x%x\n",
 526                 numdimms(pvt->info.max_dod),
 527                 numrank(pvt->info.max_dod >> 2),
 528                 numbank(pvt->info.max_dod >> 4),
 529                 numrow(pvt->info.max_dod >> 6),
 530                 numcol(pvt->info.max_dod >> 9));
 531
 532        for (i = 0; i < NUM_CHANS; i++) {
 533                u32 data, dimm_dod[3], value[8];
 534
 535                if (!pvt->pci_ch[i][0])
 536                        continue;
 537
 538                if (!CH_ACTIVE(pvt, i)) {
 539                        edac_dbg(0, "Channel %i is not active\n", i);
 540                        continue;
 541                }
 542                if (CH_DISABLED(pvt, i)) {
 543                        edac_dbg(0, "Channel %i is disabled\n", i);
 544                        continue;
 545                }
 546
 547                /* Devices 4-6 function 0 */
 548                pci_read_config_dword(pvt->pci_ch[i][0],
 549                                MC_CHANNEL_DIMM_INIT_PARAMS, &data);
 550
 551
 552                if (data & THREE_DIMMS_PRESENT)
 553                        pvt->channel[i].is_3dimms_present = true;
 554
 555                if (data & SINGLE_QUAD_RANK_PRESENT)
 556                        pvt->channel[i].is_single_4rank = true;
 557
 558                if (data & QUAD_RANK_PRESENT)
 559                        pvt->channel[i].has_4rank = true;
 560
 561                if (data & REGISTERED_DIMM)
 562                        mtype = MEM_RDDR3;
 563                else
 564                        mtype = MEM_DDR3;
 565
 566                /* Devices 4-6 function 1 */
 567                pci_read_config_dword(pvt->pci_ch[i][1],
 568                                MC_DOD_CH_DIMM0, &dimm_dod[0]);
 569                pci_read_config_dword(pvt->pci_ch[i][1],
 570                                MC_DOD_CH_DIMM1, &dimm_dod[1]);
 571                pci_read_config_dword(pvt->pci_ch[i][1],
 572                                MC_DOD_CH_DIMM2, &dimm_dod[2]);
 573
 574                edac_dbg(0, "Ch%d phy rd%d, wr%d (0x%08x): %s%s%s%cDIMMs\n",
 575                         i,
 576                         RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
 577                         data,
 578                         pvt->channel[i].is_3dimms_present ? "3DIMMS " : "",
 579                         pvt->channel[i].is_3dimms_present ? "SINGLE_4R " : "",
 580                         pvt->channel[i].has_4rank ? "HAS_4R " : "",
 581                         (data & REGISTERED_DIMM) ? 'R' : 'U');
 582
 583                for (j = 0; j < 3; j++) {
 584                        u32 banks, ranks, rows, cols;
 585                        u32 size, npages;
 586
 587                        if (!DIMM_PRESENT(dimm_dod[j]))
 588                                continue;
 589
 590                        dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
 591                                       i, j, 0);
 592                        banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
 593                        ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
 594                        rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
 595                        cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
 596
 597                        /* DDR3 has 8 I/O banks */
 598                        size = (rows * cols * banks * ranks) >> (20 - 3);
 599
 600                        edac_dbg(0, "\tdimm %d %d Mb offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n",
 601                                 j, size,
 602                                 RANKOFFSET(dimm_dod[j]),
 603                                 banks, ranks, rows, cols);
 604
 605                        npages = MiB_TO_PAGES(size);
 606
 607                        dimm->nr_pages = npages;
 608
 609                        switch (banks) {
 610                        case 4:
 611                                dimm->dtype = DEV_X4;
 612                                break;
 613                        case 8:
 614                                dimm->dtype = DEV_X8;
 615                                break;
 616                        case 16:
 617                                dimm->dtype = DEV_X16;
 618                                break;
 619                        default:
 620                                dimm->dtype = DEV_UNKNOWN;
 621                        }
 622
 623                        snprintf(dimm->label, sizeof(dimm->label),
 624                                 "CPU#%uChannel#%u_DIMM#%u",
 625                                 pvt->i7core_dev->socket, i, j);
 626                        dimm->grain = 8;
 627                        dimm->edac_mode = mode;
 628                        dimm->mtype = mtype;
 629                }
 630
 631                pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
 632                pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
 633                pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
 634                pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
 635                pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
 636                pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
 637                pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
 638                pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
 639                edac_dbg(1, "\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
 640                for (j = 0; j < 8; j++)
 641                        edac_dbg(1, "\t\t%#x\t%#x\t%#x\n",
 642                                 (value[j] >> 27) & 0x1,
 643                                 (value[j] >> 24) & 0x7,
 644                                 (value[j] & ((1 << 24) - 1)));
 645        }
 646
 647        return 0;
 648}
 649
 650/****************************************************************************
 651                        Error insertion routines
 652 ****************************************************************************/
 653
 654#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
 655
 656/* The i7core has independent error injection features per channel.
 657   However, to have a simpler code, we don't allow enabling error injection
 658   on more than one channel.
 659   Also, since a change at an inject parameter will be applied only at enable,
 660   we're disabling error injection on all write calls to the sysfs nodes that
 661   controls the error code injection.
 662 */
 663static int disable_inject(const struct mem_ctl_info *mci)
 664{
 665        struct i7core_pvt *pvt = mci->pvt_info;
 666
 667        pvt->inject.enable = 0;
 668
 669        if (!pvt->pci_ch[pvt->inject.channel][0])
 670                return -ENODEV;
 671
 672        pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
 673                                MC_CHANNEL_ERROR_INJECT, 0);
 674
 675        return 0;
 676}
 677
 678/*
 679 * i7core inject inject.section
 680 *
 681 *      accept and store error injection inject.section value
 682 *      bit 0 - refers to the lower 32-byte half cacheline
 683 *      bit 1 - refers to the upper 32-byte half cacheline
 684 */
 685static ssize_t i7core_inject_section_store(struct device *dev,
 686                                           struct device_attribute *mattr,
 687                                           const char *data, size_t count)
 688{
 689        struct mem_ctl_info *mci = to_mci(dev);
 690        struct i7core_pvt *pvt = mci->pvt_info;
 691        unsigned long value;
 692        int rc;
 693
 694        if (pvt->inject.enable)
 695                disable_inject(mci);
 696
 697        rc = kstrtoul(data, 10, &value);
 698        if ((rc < 0) || (value > 3))
 699                return -EIO;
 700
 701        pvt->inject.section = (u32) value;
 702        return count;
 703}
 704
 705static ssize_t i7core_inject_section_show(struct device *dev,
 706                                          struct device_attribute *mattr,
 707                                          char *data)
 708{
 709        struct mem_ctl_info *mci = to_mci(dev);
 710        struct i7core_pvt *pvt = mci->pvt_info;
 711        return sprintf(data, "0x%08x\n", pvt->inject.section);
 712}
 713
 714/*
 715 * i7core inject.type
 716 *
 717 *      accept and store error injection inject.section value
 718 *      bit 0 - repeat enable - Enable error repetition
 719 *      bit 1 - inject ECC error
 720 *      bit 2 - inject parity error
 721 */
 722static ssize_t i7core_inject_type_store(struct device *dev,
 723                                        struct device_attribute *mattr,
 724                                        const char *data, size_t count)
 725{
 726        struct mem_ctl_info *mci = to_mci(dev);
 727struct i7core_pvt *pvt = mci->pvt_info;
 728        unsigned long value;
 729        int rc;
 730
 731        if (pvt->inject.enable)
 732                disable_inject(mci);
 733
 734        rc = kstrtoul(data, 10, &value);
 735        if ((rc < 0) || (value > 7))
 736                return -EIO;
 737
 738        pvt->inject.type = (u32) value;
 739        return count;
 740}
 741
 742static ssize_t i7core_inject_type_show(struct device *dev,
 743                                       struct device_attribute *mattr,
 744                                       char *data)
 745{
 746        struct mem_ctl_info *mci = to_mci(dev);
 747        struct i7core_pvt *pvt = mci->pvt_info;
 748
 749        return sprintf(data, "0x%08x\n", pvt->inject.type);
 750}
 751
 752/*
 753 * i7core_inject_inject.eccmask_store
 754 *
 755 * The type of error (UE/CE) will depend on the inject.eccmask value:
 756 *   Any bits set to a 1 will flip the corresponding ECC bit
 757 *   Correctable errors can be injected by flipping 1 bit or the bits within
 758 *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
 759 *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
 760 *   uncorrectable error to be injected.
 761 */
 762static ssize_t i7core_inject_eccmask_store(struct device *dev,
 763                                           struct device_attribute *mattr,
 764                                           const char *data, size_t count)
 765{
 766        struct mem_ctl_info *mci = to_mci(dev);
 767        struct i7core_pvt *pvt = mci->pvt_info;
 768        unsigned long value;
 769        int rc;
 770
 771        if (pvt->inject.enable)
 772                disable_inject(mci);
 773
 774        rc = kstrtoul(data, 10, &value);
 775        if (rc < 0)
 776                return -EIO;
 777
 778        pvt->inject.eccmask = (u32) value;
 779        return count;
 780}
 781
 782static ssize_t i7core_inject_eccmask_show(struct device *dev,
 783                                          struct device_attribute *mattr,
 784                                          char *data)
 785{
 786        struct mem_ctl_info *mci = to_mci(dev);
 787        struct i7core_pvt *pvt = mci->pvt_info;
 788
 789        return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
 790}
 791
 792/*
 793 * i7core_addrmatch
 794 *
 795 * The type of error (UE/CE) will depend on the inject.eccmask value:
 796 *   Any bits set to a 1 will flip the corresponding ECC bit
 797 *   Correctable errors can be injected by flipping 1 bit or the bits within
 798 *   a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
 799 *   23:16 and 31:24). Flipping bits in two symbol pairs will cause an
 800 *   uncorrectable error to be injected.
 801 */
 802
 803#define DECLARE_ADDR_MATCH(param, limit)                        \
 804static ssize_t i7core_inject_store_##param(                     \
 805        struct device *dev,                                     \
 806        struct device_attribute *mattr,                         \
 807        const char *data, size_t count)                         \
 808{                                                               \
 809        struct mem_ctl_info *mci = dev_get_drvdata(dev);        \
 810        struct i7core_pvt *pvt;                                 \
 811        long value;                                             \
 812        int rc;                                                 \
 813                                                                \
 814        edac_dbg(1, "\n");                                      \
 815        pvt = mci->pvt_info;                                    \
 816                                                                \
 817        if (pvt->inject.enable)                                 \
 818                disable_inject(mci);                            \
 819                                                                \
 820        if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
 821                value = -1;                                     \
 822        else {                                                  \
 823                rc = kstrtoul(data, 10, &value);                \
 824                if ((rc < 0) || (value >= limit))               \
 825                        return -EIO;                            \
 826        }                                                       \
 827                                                                \
 828        pvt->inject.param = value;                              \
 829                                                                \
 830        return count;                                           \
 831}                                                               \
 832                                                                \
 833static ssize_t i7core_inject_show_##param(                      \
 834        struct device *dev,                                     \
 835        struct device_attribute *mattr,                         \
 836        char *data)                                             \
 837{                                                               \
 838        struct mem_ctl_info *mci = dev_get_drvdata(dev);        \
 839        struct i7core_pvt *pvt;                                 \
 840                                                                \
 841        pvt = mci->pvt_info;                                    \
 842        edac_dbg(1, "pvt=%p\n", pvt);                           \
 843        if (pvt->inject.param < 0)                              \
 844                return sprintf(data, "any\n");                  \
 845        else                                                    \
 846                return sprintf(data, "%d\n", pvt->inject.param);\
 847}
 848
 849#define ATTR_ADDR_MATCH(param)                                  \
 850        static DEVICE_ATTR(param, S_IRUGO | S_IWUSR,            \
 851                    i7core_inject_show_##param,                 \
 852                    i7core_inject_store_##param)
 853
 854DECLARE_ADDR_MATCH(channel, 3);
 855DECLARE_ADDR_MATCH(dimm, 3);
 856DECLARE_ADDR_MATCH(rank, 4);
 857DECLARE_ADDR_MATCH(bank, 32);
 858DECLARE_ADDR_MATCH(page, 0x10000);
 859DECLARE_ADDR_MATCH(col, 0x4000);
 860
 861ATTR_ADDR_MATCH(channel);
 862ATTR_ADDR_MATCH(dimm);
 863ATTR_ADDR_MATCH(rank);
 864ATTR_ADDR_MATCH(bank);
 865ATTR_ADDR_MATCH(page);
 866ATTR_ADDR_MATCH(col);
 867
 868static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
 869{
 870        u32 read;
 871        int count;
 872
 873        edac_dbg(0, "setting pci %02x:%02x.%x reg=%02x value=%08x\n",
 874                 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
 875                 where, val);
 876
 877        for (count = 0; count < 10; count++) {
 878                if (count)
 879                        msleep(100);
 880                pci_write_config_dword(dev, where, val);
 881                pci_read_config_dword(dev, where, &read);
 882
 883                if (read == val)
 884                        return 0;
 885        }
 886
 887        i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
 888                "write=%08x. Read=%08x\n",
 889                dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
 890                where, val, read);
 891
 892        return -EINVAL;
 893}
 894
 895/*
 896 * This routine prepares the Memory Controller for error injection.
 897 * The error will be injected when some process tries to write to the
 898 * memory that matches the given criteria.
 899 * The criteria can be set in terms of a mask where dimm, rank, bank, page
 900 * and col can be specified.
 901 * A -1 value for any of the mask items will make the MCU to ignore
 902 * that matching criteria for error injection.
 903 *
 904 * It should be noticed that the error will only happen after a write operation
 905 * on a memory that matches the condition. if REPEAT_EN is not enabled at
 906 * inject mask, then it will produce just one error. Otherwise, it will repeat
 907 * until the injectmask would be cleaned.
 908 *
 909 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
 910 *    is reliable enough to check if the MC is using the
 911 *    three channels. However, this is not clear at the datasheet.
 912 */
 913static ssize_t i7core_inject_enable_store(struct device *dev,
 914                                          struct device_attribute *mattr,
 915                                          const char *data, size_t count)
 916{
 917        struct mem_ctl_info *mci = to_mci(dev);
 918        struct i7core_pvt *pvt = mci->pvt_info;
 919        u32 injectmask;
 920        u64 mask = 0;
 921        int  rc;
 922        long enable;
 923
 924        if (!pvt->pci_ch[pvt->inject.channel][0])
 925                return 0;
 926
 927        rc = kstrtoul(data, 10, &enable);
 928        if ((rc < 0))
 929                return 0;
 930
 931        if (enable) {
 932                pvt->inject.enable = 1;
 933        } else {
 934                disable_inject(mci);
 935                return count;
 936        }
 937
 938        /* Sets pvt->inject.dimm mask */
 939        if (pvt->inject.dimm < 0)
 940                mask |= 1LL << 41;
 941        else {
 942                if (pvt->channel[pvt->inject.channel].dimms > 2)
 943                        mask |= (pvt->inject.dimm & 0x3LL) << 35;
 944                else
 945                        mask |= (pvt->inject.dimm & 0x1LL) << 36;
 946        }
 947
 948        /* Sets pvt->inject.rank mask */
 949        if (pvt->inject.rank < 0)
 950                mask |= 1LL << 40;
 951        else {
 952                if (pvt->channel[pvt->inject.channel].dimms > 2)
 953                        mask |= (pvt->inject.rank & 0x1LL) << 34;
 954                else
 955                        mask |= (pvt->inject.rank & 0x3LL) << 34;
 956        }
 957
 958        /* Sets pvt->inject.bank mask */
 959        if (pvt->inject.bank < 0)
 960                mask |= 1LL << 39;
 961        else
 962                mask |= (pvt->inject.bank & 0x15LL) << 30;
 963
 964        /* Sets pvt->inject.page mask */
 965        if (pvt->inject.page < 0)
 966                mask |= 1LL << 38;
 967        else
 968                mask |= (pvt->inject.page & 0xffff) << 14;
 969
 970        /* Sets pvt->inject.column mask */
 971        if (pvt->inject.col < 0)
 972                mask |= 1LL << 37;
 973        else
 974                mask |= (pvt->inject.col & 0x3fff);
 975
 976        /*
 977         * bit    0: REPEAT_EN
 978         * bits 1-2: MASK_HALF_CACHELINE
 979         * bit    3: INJECT_ECC
 980         * bit    4: INJECT_ADDR_PARITY
 981         */
 982
 983        injectmask = (pvt->inject.type & 1) |
 984                     (pvt->inject.section & 0x3) << 1 |
 985                     (pvt->inject.type & 0x6) << (3 - 1);
 986
 987        /* Unlock writes to registers - this register is write only */
 988        pci_write_config_dword(pvt->pci_noncore,
 989                               MC_CFG_CONTROL, 0x2);
 990
 991        write_and_test(pvt->pci_ch[pvt->inject.channel][0],
 992                               MC_CHANNEL_ADDR_MATCH, mask);
 993        write_and_test(pvt->pci_ch[pvt->inject.channel][0],
 994                               MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
 995
 996        write_and_test(pvt->pci_ch[pvt->inject.channel][0],
 997                               MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
 998
 999        write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1000                               MC_CHANNEL_ERROR_INJECT, injectmask);
1001
1002        /*
1003         * This is something undocumented, based on my tests
1004         * Without writing 8 to this register, errors aren't injected. Not sure
1005         * why.
1006         */
1007        pci_write_config_dword(pvt->pci_noncore,
1008                               MC_CFG_CONTROL, 8);
1009
1010        edac_dbg(0, "Error inject addr match 0x%016llx, ecc 0x%08x, inject 0x%08x\n",
1011                 mask, pvt->inject.eccmask, injectmask);
1012
1013
1014        return count;
1015}
1016
1017static ssize_t i7core_inject_enable_show(struct device *dev,
1018                                         struct device_attribute *mattr,
1019                                         char *data)
1020{
1021        struct mem_ctl_info *mci = to_mci(dev);
1022        struct i7core_pvt *pvt = mci->pvt_info;
1023        u32 injectmask;
1024
1025        if (!pvt->pci_ch[pvt->inject.channel][0])
1026                return 0;
1027
1028        pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1029                               MC_CHANNEL_ERROR_INJECT, &injectmask);
1030
1031        edac_dbg(0, "Inject error read: 0x%018x\n", injectmask);
1032
1033        if (injectmask & 0x0c)
1034                pvt->inject.enable = 1;
1035
1036        return sprintf(data, "%d\n", pvt->inject.enable);
1037}
1038
1039#define DECLARE_COUNTER(param)                                  \
1040static ssize_t i7core_show_counter_##param(                     \
1041        struct device *dev,                                     \
1042        struct device_attribute *mattr,                         \
1043        char *data)                                             \
1044{                                                               \
1045        struct mem_ctl_info *mci = dev_get_drvdata(dev);        \
1046        struct i7core_pvt *pvt = mci->pvt_info;                 \
1047                                                                \
1048        edac_dbg(1, "\n");                                      \
1049        if (!pvt->ce_count_available || (pvt->is_registered))   \
1050                return sprintf(data, "data unavailable\n");     \
1051        return sprintf(data, "%lu\n",                           \
1052                        pvt->udimm_ce_count[param]);            \
1053}
1054
1055#define ATTR_COUNTER(param)                                     \
1056        static DEVICE_ATTR(udimm##param, S_IRUGO | S_IWUSR,     \
1057                    i7core_show_counter_##param,                \
1058                    NULL)
1059
1060DECLARE_COUNTER(0);
1061DECLARE_COUNTER(1);
1062DECLARE_COUNTER(2);
1063
1064ATTR_COUNTER(0);
1065ATTR_COUNTER(1);
1066ATTR_COUNTER(2);
1067
1068/*
1069 * inject_addrmatch device sysfs struct
1070 */
1071
1072static struct attribute *i7core_addrmatch_attrs[] = {
1073        &dev_attr_channel.attr,
1074        &dev_attr_dimm.attr,
1075        &dev_attr_rank.attr,
1076        &dev_attr_bank.attr,
1077        &dev_attr_page.attr,
1078        &dev_attr_col.attr,
1079        NULL
1080};
1081
1082static const struct attribute_group addrmatch_grp = {
1083        .attrs  = i7core_addrmatch_attrs,
1084};
1085
1086static const struct attribute_group *addrmatch_groups[] = {
1087        &addrmatch_grp,
1088        NULL
1089};
1090
1091static void addrmatch_release(struct device *device)
1092{
1093        edac_dbg(1, "Releasing device %s\n", dev_name(device));
1094        kfree(device);
1095}
1096
1097static const struct device_type addrmatch_type = {
1098        .groups         = addrmatch_groups,
1099        .release        = addrmatch_release,
1100};
1101
1102/*
1103 * all_channel_counts sysfs struct
1104 */
1105
1106static struct attribute *i7core_udimm_counters_attrs[] = {
1107        &dev_attr_udimm0.attr,
1108        &dev_attr_udimm1.attr,
1109        &dev_attr_udimm2.attr,
1110        NULL
1111};
1112
1113static const struct attribute_group all_channel_counts_grp = {
1114        .attrs  = i7core_udimm_counters_attrs,
1115};
1116
1117static const struct attribute_group *all_channel_counts_groups[] = {
1118        &all_channel_counts_grp,
1119        NULL
1120};
1121
1122static void all_channel_counts_release(struct device *device)
1123{
1124        edac_dbg(1, "Releasing device %s\n", dev_name(device));
1125        kfree(device);
1126}
1127
1128static const struct device_type all_channel_counts_type = {
1129        .groups         = all_channel_counts_groups,
1130        .release        = all_channel_counts_release,
1131};
1132
1133/*
1134 * inject sysfs attributes
1135 */
1136
1137static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR,
1138                   i7core_inject_section_show, i7core_inject_section_store);
1139
1140static DEVICE_ATTR(inject_type, S_IRUGO | S_IWUSR,
1141                   i7core_inject_type_show, i7core_inject_type_store);
1142
1143
1144static DEVICE_ATTR(inject_eccmask, S_IRUGO | S_IWUSR,
1145                   i7core_inject_eccmask_show, i7core_inject_eccmask_store);
1146
1147static DEVICE_ATTR(inject_enable, S_IRUGO | S_IWUSR,
1148                   i7core_inject_enable_show, i7core_inject_enable_store);
1149
1150static struct attribute *i7core_dev_attrs[] = {
1151        &dev_attr_inject_section.attr,
1152        &dev_attr_inject_type.attr,
1153        &dev_attr_inject_eccmask.attr,
1154        &dev_attr_inject_enable.attr,
1155        NULL
1156};
1157
1158ATTRIBUTE_GROUPS(i7core_dev);
1159
1160static int i7core_create_sysfs_devices(struct mem_ctl_info *mci)
1161{
1162        struct i7core_pvt *pvt = mci->pvt_info;
1163        int rc;
1164
1165        pvt->addrmatch_dev = kzalloc(sizeof(*pvt->addrmatch_dev), GFP_KERNEL);
1166        if (!pvt->addrmatch_dev)
1167                return -ENOMEM;
1168
1169        pvt->addrmatch_dev->type = &addrmatch_type;
1170        pvt->addrmatch_dev->bus = mci->dev.bus;
1171        device_initialize(pvt->addrmatch_dev);
1172        pvt->addrmatch_dev->parent = &mci->dev;
1173        dev_set_name(pvt->addrmatch_dev, "inject_addrmatch");
1174        dev_set_drvdata(pvt->addrmatch_dev, mci);
1175
1176        edac_dbg(1, "creating %s\n", dev_name(pvt->addrmatch_dev));
1177
1178        rc = device_add(pvt->addrmatch_dev);
1179        if (rc < 0)
1180                return rc;
1181
1182        if (!pvt->is_registered) {
1183                pvt->chancounts_dev = kzalloc(sizeof(*pvt->chancounts_dev),
1184                                              GFP_KERNEL);
1185                if (!pvt->chancounts_dev) {
1186                        put_device(pvt->addrmatch_dev);
1187                        device_del(pvt->addrmatch_dev);
1188                        return -ENOMEM;
1189                }
1190
1191                pvt->chancounts_dev->type = &all_channel_counts_type;
1192                pvt->chancounts_dev->bus = mci->dev.bus;
1193                device_initialize(pvt->chancounts_dev);
1194                pvt->chancounts_dev->parent = &mci->dev;
1195                dev_set_name(pvt->chancounts_dev, "all_channel_counts");
1196                dev_set_drvdata(pvt->chancounts_dev, mci);
1197
1198                edac_dbg(1, "creating %s\n", dev_name(pvt->chancounts_dev));
1199
1200                rc = device_add(pvt->chancounts_dev);
1201                if (rc < 0)
1202                        return rc;
1203        }
1204        return 0;
1205}
1206
1207static void i7core_delete_sysfs_devices(struct mem_ctl_info *mci)
1208{
1209        struct i7core_pvt *pvt = mci->pvt_info;
1210
1211        edac_dbg(1, "\n");
1212
1213        if (!pvt->is_registered) {
1214                put_device(pvt->chancounts_dev);
1215                device_del(pvt->chancounts_dev);
1216        }
1217        put_device(pvt->addrmatch_dev);
1218        device_del(pvt->addrmatch_dev);
1219}
1220
1221/****************************************************************************
1222        Device initialization routines: put/get, init/exit
1223 ****************************************************************************/
1224
1225/*
1226 *      i7core_put_all_devices  'put' all the devices that we have
1227 *                              reserved via 'get'
1228 */
1229static void i7core_put_devices(struct i7core_dev *i7core_dev)
1230{
1231        int i;
1232
1233        edac_dbg(0, "\n");
1234        for (i = 0; i < i7core_dev->n_devs; i++) {
1235                struct pci_dev *pdev = i7core_dev->pdev[i];
1236                if (!pdev)
1237                        continue;
1238                edac_dbg(0, "Removing dev %02x:%02x.%d\n",
1239                         pdev->bus->number,
1240                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1241                pci_dev_put(pdev);
1242        }
1243}
1244
1245static void i7core_put_all_devices(void)
1246{
1247        struct i7core_dev *i7core_dev, *tmp;
1248
1249        list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1250                i7core_put_devices(i7core_dev);
1251                free_i7core_dev(i7core_dev);
1252        }
1253}
1254
1255static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1256{
1257        struct pci_dev *pdev = NULL;
1258        int i;
1259
1260        /*
1261         * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1262         * aren't announced by acpi. So, we need to use a legacy scan probing
1263         * to detect them
1264         */
1265        while (table && table->descr) {
1266                pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1267                if (unlikely(!pdev)) {
1268                        for (i = 0; i < MAX_SOCKET_BUSES; i++)
1269                                pcibios_scan_specific_bus(255-i);
1270                }
1271                pci_dev_put(pdev);
1272                table++;
1273        }
1274}
1275
1276static unsigned i7core_pci_lastbus(void)
1277{
1278        int last_bus = 0, bus;
1279        struct pci_bus *b = NULL;
1280
1281        while ((b = pci_find_next_bus(b)) != NULL) {
1282                bus = b->number;
1283                edac_dbg(0, "Found bus %d\n", bus);
1284                if (bus > last_bus)
1285                        last_bus = bus;
1286        }
1287
1288        edac_dbg(0, "Last bus %d\n", last_bus);
1289
1290        return last_bus;
1291}
1292
1293/*
1294 *      i7core_get_all_devices  Find and perform 'get' operation on the MCH's
1295 *                      device/functions we want to reference for this driver
1296 *
1297 *                      Need to 'get' device 16 func 1 and func 2
1298 */
1299static int i7core_get_onedevice(struct pci_dev **prev,
1300                                const struct pci_id_table *table,
1301                                const unsigned devno,
1302                                const unsigned last_bus)
1303{
1304        struct i7core_dev *i7core_dev;
1305        const struct pci_id_descr *dev_descr = &table->descr[devno];
1306
1307        struct pci_dev *pdev = NULL;
1308        u8 bus = 0;
1309        u8 socket = 0;
1310
1311        pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1312                              dev_descr->dev_id, *prev);
1313
1314        /*
1315         * On Xeon 55xx, the Intel QuickPath Arch Generic Non-core regs
1316         * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1317         * to probe for the alternate address in case of failure
1318         */
1319        if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) {
1320                pci_dev_get(*prev);     /* pci_get_device will put it */
1321                pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1322                                      PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1323        }
1324
1325        if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE &&
1326            !pdev) {
1327                pci_dev_get(*prev);     /* pci_get_device will put it */
1328                pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1329                                      PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT,
1330                                      *prev);
1331        }
1332
1333        if (!pdev) {
1334                if (*prev) {
1335                        *prev = pdev;
1336                        return 0;
1337                }
1338
1339                if (dev_descr->optional)
1340                        return 0;
1341
1342                if (devno == 0)
1343                        return -ENODEV;
1344
1345                i7core_printk(KERN_INFO,
1346                        "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1347                        dev_descr->dev, dev_descr->func,
1348                        PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1349
1350                /* End of list, leave */
1351                return -ENODEV;
1352        }
1353        bus = pdev->bus->number;
1354
1355        socket = last_bus - bus;
1356
1357        i7core_dev = get_i7core_dev(socket);
1358        if (!i7core_dev) {
1359                i7core_dev = alloc_i7core_dev(socket, table);
1360                if (!i7core_dev) {
1361                        pci_dev_put(pdev);
1362                        return -ENOMEM;
1363                }
1364        }
1365
1366        if (i7core_dev->pdev[devno]) {
1367                i7core_printk(KERN_ERR,
1368                        "Duplicated device for "
1369                        "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1370                        bus, dev_descr->dev, dev_descr->func,
1371                        PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1372                pci_dev_put(pdev);
1373                return -ENODEV;
1374        }
1375
1376        i7core_dev->pdev[devno] = pdev;
1377
1378        /* Sanity check */
1379        if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1380                        PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1381                i7core_printk(KERN_ERR,
1382                        "Device PCI ID %04x:%04x "
1383                        "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1384                        PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1385                        bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1386                        bus, dev_descr->dev, dev_descr->func);
1387                return -ENODEV;
1388        }
1389
1390        /* Be sure that the device is enabled */
1391        if (unlikely(pci_enable_device(pdev) < 0)) {
1392                i7core_printk(KERN_ERR,
1393                        "Couldn't enable "
1394                        "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1395                        bus, dev_descr->dev, dev_descr->func,
1396                        PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1397                return -ENODEV;
1398        }
1399
1400        edac_dbg(0, "Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1401                 socket, bus, dev_descr->dev,
1402                 dev_descr->func,
1403                 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1404
1405        /*
1406         * As stated on drivers/pci/search.c, the reference count for
1407         * @from is always decremented if it is not %NULL. So, as we need
1408         * to get all devices up to null, we need to do a get for the device
1409         */
1410        pci_dev_get(pdev);
1411
1412        *prev = pdev;
1413
1414        return 0;
1415}
1416
1417static int i7core_get_all_devices(void)
1418{
1419        int i, rc, last_bus;
1420        struct pci_dev *pdev = NULL;
1421        const struct pci_id_table *table = pci_dev_table;
1422
1423        last_bus = i7core_pci_lastbus();
1424
1425        while (table && table->descr) {
1426                for (i = 0; i < table->n_devs; i++) {
1427                        pdev = NULL;
1428                        do {
1429                                rc = i7core_get_onedevice(&pdev, table, i,
1430                                                          last_bus);
1431                                if (rc < 0) {
1432                                        if (i == 0) {
1433                                                i = table->n_devs;
1434                                                break;
1435                                        }
1436                                        i7core_put_all_devices();
1437                                        return -ENODEV;
1438                                }
1439                        } while (pdev);
1440                }
1441                table++;
1442        }
1443
1444        return 0;
1445}
1446
1447static int mci_bind_devs(struct mem_ctl_info *mci,
1448                         struct i7core_dev *i7core_dev)
1449{
1450        struct i7core_pvt *pvt = mci->pvt_info;
1451        struct pci_dev *pdev;
1452        int i, func, slot;
1453        char *family;
1454
1455        pvt->is_registered = false;
1456        pvt->enable_scrub  = false;
1457        for (i = 0; i < i7core_dev->n_devs; i++) {
1458                pdev = i7core_dev->pdev[i];
1459                if (!pdev)
1460                        continue;
1461
1462                func = PCI_FUNC(pdev->devfn);
1463                slot = PCI_SLOT(pdev->devfn);
1464                if (slot == 3) {
1465                        if (unlikely(func > MAX_MCR_FUNC))
1466                                goto error;
1467                        pvt->pci_mcr[func] = pdev;
1468                } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1469                        if (unlikely(func > MAX_CHAN_FUNC))
1470                                goto error;
1471                        pvt->pci_ch[slot - 4][func] = pdev;
1472                } else if (!slot && !func) {
1473                        pvt->pci_noncore = pdev;
1474
1475                        /* Detect the processor family */
1476                        switch (pdev->device) {
1477                        case PCI_DEVICE_ID_INTEL_I7_NONCORE:
1478                                family = "Xeon 35xx/ i7core";
1479                                pvt->enable_scrub = false;
1480                                break;
1481                        case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT:
1482                                family = "i7-800/i5-700";
1483                                pvt->enable_scrub = false;
1484                                break;
1485                        case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE:
1486                                family = "Xeon 34xx";
1487                                pvt->enable_scrub = false;
1488                                break;
1489                        case PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT:
1490                                family = "Xeon 55xx";
1491                                pvt->enable_scrub = true;
1492                                break;
1493                        case PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_REV2:
1494                                family = "Xeon 56xx / i7-900";
1495                                pvt->enable_scrub = true;
1496                                break;
1497                        default:
1498                                family = "unknown";
1499                                pvt->enable_scrub = false;
1500                        }
1501                        edac_dbg(0, "Detected a processor type %s\n", family);
1502                } else
1503                        goto error;
1504
1505                edac_dbg(0, "Associated fn %d.%d, dev = %p, socket %d\n",
1506                         PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1507                         pdev, i7core_dev->socket);
1508
1509                if (PCI_SLOT(pdev->devfn) == 3 &&
1510                        PCI_FUNC(pdev->devfn) == 2)
1511                        pvt->is_registered = true;
1512        }
1513
1514        return 0;
1515
1516error:
1517        i7core_printk(KERN_ERR, "Device %d, function %d "
1518                      "is out of the expected range\n",
1519                      slot, func);
1520        return -EINVAL;
1521}
1522
1523/****************************************************************************
1524                        Error check routines
1525 ****************************************************************************/
1526
1527static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1528                                         const int chan,
1529                                         const int new0,
1530                                         const int new1,
1531                                         const int new2)
1532{
1533        struct i7core_pvt *pvt = mci->pvt_info;
1534        int add0 = 0, add1 = 0, add2 = 0;
1535        /* Updates CE counters if it is not the first time here */
1536        if (pvt->ce_count_available) {
1537                /* Updates CE counters */
1538
1539                add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1540                add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1541                add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1542
1543                if (add2 < 0)
1544                        add2 += 0x7fff;
1545                pvt->rdimm_ce_count[chan][2] += add2;
1546
1547                if (add1 < 0)
1548                        add1 += 0x7fff;
1549                pvt->rdimm_ce_count[chan][1] += add1;
1550
1551                if (add0 < 0)
1552                        add0 += 0x7fff;
1553                pvt->rdimm_ce_count[chan][0] += add0;
1554        } else
1555                pvt->ce_count_available = 1;
1556
1557        /* Store the new values */
1558        pvt->rdimm_last_ce_count[chan][2] = new2;
1559        pvt->rdimm_last_ce_count[chan][1] = new1;
1560        pvt->rdimm_last_ce_count[chan][0] = new0;
1561
1562        /*updated the edac core */
1563        if (add0 != 0)
1564                edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add0,
1565                                     0, 0, 0,
1566                                     chan, 0, -1, "error", "");
1567        if (add1 != 0)
1568                edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add1,
1569                                     0, 0, 0,
1570                                     chan, 1, -1, "error", "");
1571        if (add2 != 0)
1572                edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add2,
1573                                     0, 0, 0,
1574                                     chan, 2, -1, "error", "");
1575}
1576
1577static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1578{
1579        struct i7core_pvt *pvt = mci->pvt_info;
1580        u32 rcv[3][2];
1581        int i, new0, new1, new2;
1582
1583        /*Read DEV 3: FUN 2:  MC_COR_ECC_CNT regs directly*/
1584        pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1585                                                                &rcv[0][0]);
1586        pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1587                                                                &rcv[0][1]);
1588        pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1589                                                                &rcv[1][0]);
1590        pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1591                                                                &rcv[1][1]);
1592        pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1593                                                                &rcv[2][0]);
1594        pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1595                                                                &rcv[2][1]);
1596        for (i = 0 ; i < 3; i++) {
1597                edac_dbg(3, "MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1598                         (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1599                /*if the channel has 3 dimms*/
1600                if (pvt->channel[i].dimms > 2) {
1601                        new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1602                        new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1603                        new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1604                } else {
1605                        new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1606                                        DIMM_BOT_COR_ERR(rcv[i][0]);
1607                        new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1608                                        DIMM_BOT_COR_ERR(rcv[i][1]);
1609                        new2 = 0;
1610                }
1611
1612                i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1613        }
1614}
1615
1616/* This function is based on the device 3 function 4 registers as described on:
1617 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1618 *      http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1619 * also available at:
1620 *      http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1621 */
1622static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1623{
1624        struct i7core_pvt *pvt = mci->pvt_info;
1625        u32 rcv1, rcv0;
1626        int new0, new1, new2;
1627
1628        if (!pvt->pci_mcr[4]) {
1629                edac_dbg(0, "MCR registers not found\n");
1630                return;
1631        }
1632
1633        /* Corrected test errors */
1634        pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1635        pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1636
1637        /* Store the new values */
1638        new2 = DIMM2_COR_ERR(rcv1);
1639        new1 = DIMM1_COR_ERR(rcv0);
1640        new0 = DIMM0_COR_ERR(rcv0);
1641
1642        /* Updates CE counters if it is not the first time here */
1643        if (pvt->ce_count_available) {
1644                /* Updates CE counters */
1645                int add0, add1, add2;
1646
1647                add2 = new2 - pvt->udimm_last_ce_count[2];
1648                add1 = new1 - pvt->udimm_last_ce_count[1];
1649                add0 = new0 - pvt->udimm_last_ce_count[0];
1650
1651                if (add2 < 0)
1652                        add2 += 0x7fff;
1653                pvt->udimm_ce_count[2] += add2;
1654
1655                if (add1 < 0)
1656                        add1 += 0x7fff;
1657                pvt->udimm_ce_count[1] += add1;
1658
1659                if (add0 < 0)
1660                        add0 += 0x7fff;
1661                pvt->udimm_ce_count[0] += add0;
1662
1663                if (add0 | add1 | add2)
1664                        i7core_printk(KERN_ERR, "New Corrected error(s): "
1665                                      "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1666                                      add0, add1, add2);
1667        } else
1668                pvt->ce_count_available = 1;
1669
1670        /* Store the new values */
1671        pvt->udimm_last_ce_count[2] = new2;
1672        pvt->udimm_last_ce_count[1] = new1;
1673        pvt->udimm_last_ce_count[0] = new0;
1674}
1675
1676/*
1677 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1678 * Architectures Software Developer’s Manual Volume 3B.
1679 * Nehalem are defined as family 0x06, model 0x1a
1680 *
1681 * The MCA registers used here are the following ones:
1682 *     struct mce field MCA Register
1683 *     m->status        MSR_IA32_MC8_STATUS
1684 *     m->addr          MSR_IA32_MC8_ADDR
1685 *     m->misc          MSR_IA32_MC8_MISC
1686 * In the case of Nehalem, the error information is masked at .status and .misc
1687 * fields
1688 */
1689static void i7core_mce_output_error(struct mem_ctl_info *mci,
1690                                    const struct mce *m)
1691{
1692        struct i7core_pvt *pvt = mci->pvt_info;
1693        char *optype, *err;
1694        enum hw_event_mc_err_type tp_event;
1695        unsigned long error = m->status & 0x1ff0000l;
1696        bool uncorrected_error = m->mcgstatus & 1ll << 61;
1697        bool ripv = m->mcgstatus & 1;
1698        u32 optypenum = (m->status >> 4) & 0x07;
1699        u32 core_err_cnt = (m->status >> 38) & 0x7fff;
1700        u32 dimm = (m->misc >> 16) & 0x3;
1701        u32 channel = (m->misc >> 18) & 0x3;
1702        u32 syndrome = m->misc >> 32;
1703        u32 errnum = find_first_bit(&error, 32);
1704
1705        if (uncorrected_error) {
1706                if (ripv)
1707                        tp_event = HW_EVENT_ERR_FATAL;
1708                else
1709                        tp_event = HW_EVENT_ERR_UNCORRECTED;
1710        } else {
1711                tp_event = HW_EVENT_ERR_CORRECTED;
1712        }
1713
1714        switch (optypenum) {
1715        case 0:
1716                optype = "generic undef request";
1717                break;
1718        case 1:
1719                optype = "read error";
1720                break;
1721        case 2:
1722                optype = "write error";
1723                break;
1724        case 3:
1725                optype = "addr/cmd error";
1726                break;
1727        case 4:
1728                optype = "scrubbing error";
1729                break;
1730        default:
1731                optype = "reserved";
1732                break;
1733        }
1734
1735        switch (errnum) {
1736        case 16:
1737                err = "read ECC error";
1738                break;
1739        case 17:
1740                err = "RAS ECC error";
1741                break;
1742        case 18:
1743                err = "write parity error";
1744                break;
1745        case 19:
1746                err = "redundacy loss";
1747                break;
1748        case 20:
1749                err = "reserved";
1750                break;
1751        case 21:
1752                err = "memory range error";
1753                break;
1754        case 22:
1755                err = "RTID out of range";
1756                break;
1757        case 23:
1758                err = "address parity error";
1759                break;
1760        case 24:
1761                err = "byte enable parity error";
1762                break;
1763        default:
1764                err = "unknown";
1765        }
1766
1767        /*
1768         * Call the helper to output message
1769         * FIXME: what to do if core_err_cnt > 1? Currently, it generates
1770         * only one event
1771         */
1772        if (uncorrected_error || !pvt->is_registered)
1773                edac_mc_handle_error(tp_event, mci, core_err_cnt,
1774                                     m->addr >> PAGE_SHIFT,
1775                                     m->addr & ~PAGE_MASK,
1776                                     syndrome,
1777                                     channel, dimm, -1,
1778                                     err, optype);
1779}
1780
1781/*
1782 *      i7core_check_error      Retrieve and process errors reported by the
1783 *                              hardware. Called by the Core module.
1784 */
1785static void i7core_check_error(struct mem_ctl_info *mci, struct mce *m)
1786{
1787        struct i7core_pvt *pvt = mci->pvt_info;
1788
1789        i7core_mce_output_error(mci, m);
1790
1791        /*
1792         * Now, let's increment CE error counts
1793         */
1794        if (!pvt->is_registered)
1795                i7core_udimm_check_mc_ecc_err(mci);
1796        else
1797                i7core_rdimm_check_mc_ecc_err(mci);
1798}
1799
1800/*
1801 * Check that logging is enabled and that this is the right type
1802 * of error for us to handle.
1803 */
1804static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
1805                                  void *data)
1806{
1807        struct mce *mce = (struct mce *)data;
1808        struct i7core_dev *i7_dev;
1809        struct mem_ctl_info *mci;
1810        struct i7core_pvt *pvt;
1811
1812        i7_dev = get_i7core_dev(mce->socketid);
1813        if (!i7_dev)
1814                return NOTIFY_DONE;
1815
1816        mci = i7_dev->mci;
1817        pvt = mci->pvt_info;
1818
1819        /*
1820         * Just let mcelog handle it if the error is
1821         * outside the memory controller
1822         */
1823        if (((mce->status & 0xffff) >> 7) != 1)
1824                return NOTIFY_DONE;
1825
1826        /* Bank 8 registers are the only ones that we know how to handle */
1827        if (mce->bank != 8)
1828                return NOTIFY_DONE;
1829
1830        i7core_check_error(mci, mce);
1831
1832        /* Advise mcelog that the errors were handled */
1833        return NOTIFY_STOP;
1834}
1835
1836static struct notifier_block i7_mce_dec = {
1837        .notifier_call  = i7core_mce_check_error,
1838        .priority       = MCE_PRIO_EDAC,
1839};
1840
1841struct memdev_dmi_entry {
1842        u8 type;
1843        u8 length;
1844        u16 handle;
1845        u16 phys_mem_array_handle;
1846        u16 mem_err_info_handle;
1847        u16 total_width;
1848        u16 data_width;
1849        u16 size;
1850        u8 form;
1851        u8 device_set;
1852        u8 device_locator;
1853        u8 bank_locator;
1854        u8 memory_type;
1855        u16 type_detail;
1856        u16 speed;
1857        u8 manufacturer;
1858        u8 serial_number;
1859        u8 asset_tag;
1860        u8 part_number;
1861        u8 attributes;
1862        u32 extended_size;
1863        u16 conf_mem_clk_speed;
1864} __attribute__((__packed__));
1865
1866
1867/*
1868 * Decode the DRAM Clock Frequency, be paranoid, make sure that all
1869 * memory devices show the same speed, and if they don't then consider
1870 * all speeds to be invalid.
1871 */
1872static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
1873{
1874        int *dclk_freq = _dclk_freq;
1875        u16 dmi_mem_clk_speed;
1876
1877        if (*dclk_freq == -1)
1878                return;
1879
1880        if (dh->type == DMI_ENTRY_MEM_DEVICE) {
1881                struct memdev_dmi_entry *memdev_dmi_entry =
1882                        (struct memdev_dmi_entry *)dh;
1883                unsigned long conf_mem_clk_speed_offset =
1884                        (unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
1885                        (unsigned long)&memdev_dmi_entry->type;
1886                unsigned long speed_offset =
1887                        (unsigned long)&memdev_dmi_entry->speed -
1888                        (unsigned long)&memdev_dmi_entry->type;
1889
1890                /* Check that a DIMM is present */
1891                if (memdev_dmi_entry->size == 0)
1892                        return;
1893
1894                /*
1895                 * Pick the configured speed if it's available, otherwise
1896                 * pick the DIMM speed, or we don't have a speed.
1897                 */
1898                if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
1899                        dmi_mem_clk_speed =
1900                                memdev_dmi_entry->conf_mem_clk_speed;
1901                } else if (memdev_dmi_entry->length > speed_offset) {
1902                        dmi_mem_clk_speed = memdev_dmi_entry->speed;
1903                } else {
1904                        *dclk_freq = -1;
1905                        return;
1906                }
1907
1908                if (*dclk_freq == 0) {
1909                        /* First pass, speed was 0 */
1910                        if (dmi_mem_clk_speed > 0) {
1911                                /* Set speed if a valid speed is read */
1912                                *dclk_freq = dmi_mem_clk_speed;
1913                        } else {
1914                                /* Otherwise we don't have a valid speed */
1915                                *dclk_freq = -1;
1916                        }
1917                } else if (*dclk_freq > 0 &&
1918                           *dclk_freq != dmi_mem_clk_speed) {
1919                        /*
1920                         * If we have a speed, check that all DIMMS are the same
1921                         * speed, otherwise set the speed as invalid.
1922                         */
1923                        *dclk_freq = -1;
1924                }
1925        }
1926}
1927
1928/*
1929 * The default DCLK frequency is used as a fallback if we
1930 * fail to find anything reliable in the DMI. The value
1931 * is taken straight from the datasheet.
1932 */
1933#define DEFAULT_DCLK_FREQ 800
1934
1935static int get_dclk_freq(void)
1936{
1937        int dclk_freq = 0;
1938
1939        dmi_walk(decode_dclk, (void *)&dclk_freq);
1940
1941        if (dclk_freq < 1)
1942                return DEFAULT_DCLK_FREQ;
1943
1944        return dclk_freq;
1945}
1946
1947/*
1948 * set_sdram_scrub_rate         This routine sets byte/sec bandwidth scrub rate
1949 *                              to hardware according to SCRUBINTERVAL formula
1950 *                              found in datasheet.
1951 */
1952static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
1953{
1954        struct i7core_pvt *pvt = mci->pvt_info;
1955        struct pci_dev *pdev;
1956        u32 dw_scrub;
1957        u32 dw_ssr;
1958
1959        /* Get data from the MC register, function 2 */
1960        pdev = pvt->pci_mcr[2];
1961        if (!pdev)
1962                return -ENODEV;
1963
1964        pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
1965
1966        if (new_bw == 0) {
1967                /* Prepare to disable petrol scrub */
1968                dw_scrub &= ~STARTSCRUB;
1969                /* Stop the patrol scrub engine */
1970                write_and_test(pdev, MC_SCRUB_CONTROL,
1971                               dw_scrub & ~SCRUBINTERVAL_MASK);
1972
1973                /* Get current status of scrub rate and set bit to disable */
1974                pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
1975                dw_ssr &= ~SSR_MODE_MASK;
1976                dw_ssr |= SSR_MODE_DISABLE;
1977        } else {
1978                const int cache_line_size = 64;
1979                const u32 freq_dclk_mhz = pvt->dclk_freq;
1980                unsigned long long scrub_interval;
1981                /*
1982                 * Translate the desired scrub rate to a register value and
1983                 * program the corresponding register value.
1984                 */
1985                scrub_interval = (unsigned long long)freq_dclk_mhz *
1986                        cache_line_size * 1000000;
1987                do_div(scrub_interval, new_bw);
1988
1989                if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
1990                        return -EINVAL;
1991
1992                dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
1993
1994                /* Start the patrol scrub engine */
1995                pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
1996                                       STARTSCRUB | dw_scrub);
1997
1998                /* Get current status of scrub rate and set bit to enable */
1999                pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2000                dw_ssr &= ~SSR_MODE_MASK;
2001                dw_ssr |= SSR_MODE_ENABLE;
2002        }
2003        /* Disable or enable scrubbing */
2004        pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
2005
2006        return new_bw;
2007}
2008
2009/*
2010 * get_sdram_scrub_rate         This routine convert current scrub rate value
2011 *                              into byte/sec bandwidth according to
2012 *                              SCRUBINTERVAL formula found in datasheet.
2013 */
2014static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
2015{
2016        struct i7core_pvt *pvt = mci->pvt_info;
2017        struct pci_dev *pdev;
2018        const u32 cache_line_size = 64;
2019        const u32 freq_dclk_mhz = pvt->dclk_freq;
2020        unsigned long long scrub_rate;
2021        u32 scrubval;
2022
2023        /* Get data from the MC register, function 2 */
2024        pdev = pvt->pci_mcr[2];
2025        if (!pdev)
2026                return -ENODEV;
2027
2028        /* Get current scrub control data */
2029        pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
2030
2031        /* Mask highest 8-bits to 0 */
2032        scrubval &=  SCRUBINTERVAL_MASK;
2033        if (!scrubval)
2034                return 0;
2035
2036        /* Calculate scrub rate value into byte/sec bandwidth */
2037        scrub_rate =  (unsigned long long)freq_dclk_mhz *
2038                1000000 * cache_line_size;
2039        do_div(scrub_rate, scrubval);
2040        return (int)scrub_rate;
2041}
2042
2043static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
2044{
2045        struct i7core_pvt *pvt = mci->pvt_info;
2046        u32 pci_lock;
2047
2048        /* Unlock writes to pci registers */
2049        pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2050        pci_lock &= ~0x3;
2051        pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2052                               pci_lock | MC_CFG_UNLOCK);
2053
2054        mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
2055        mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
2056}
2057
2058static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
2059{
2060        struct i7core_pvt *pvt = mci->pvt_info;
2061        u32 pci_lock;
2062
2063        /* Lock writes to pci registers */
2064        pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2065        pci_lock &= ~0x3;
2066        pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2067                               pci_lock | MC_CFG_LOCK);
2068}
2069
2070static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
2071{
2072        pvt->i7core_pci = edac_pci_create_generic_ctl(
2073                                                &pvt->i7core_dev->pdev[0]->dev,
2074                                                EDAC_MOD_STR);
2075        if (unlikely(!pvt->i7core_pci))
2076                i7core_printk(KERN_WARNING,
2077                              "Unable to setup PCI error report via EDAC\n");
2078}
2079
2080static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
2081{
2082        if (likely(pvt->i7core_pci))
2083                edac_pci_release_generic_ctl(pvt->i7core_pci);
2084        else
2085                i7core_printk(KERN_ERR,
2086                                "Couldn't find mem_ctl_info for socket %d\n",
2087                                pvt->i7core_dev->socket);
2088        pvt->i7core_pci = NULL;
2089}
2090
2091static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
2092{
2093        struct mem_ctl_info *mci = i7core_dev->mci;
2094        struct i7core_pvt *pvt;
2095
2096        if (unlikely(!mci || !mci->pvt_info)) {
2097                edac_dbg(0, "MC: dev = %p\n", &i7core_dev->pdev[0]->dev);
2098
2099                i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
2100                return;
2101        }
2102
2103        pvt = mci->pvt_info;
2104
2105        edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev);
2106
2107        /* Disable scrubrate setting */
2108        if (pvt->enable_scrub)
2109                disable_sdram_scrub_setting(mci);
2110
2111        /* Disable EDAC polling */
2112        i7core_pci_ctl_release(pvt);
2113
2114        /* Remove MC sysfs nodes */
2115        i7core_delete_sysfs_devices(mci);
2116        edac_mc_del_mc(mci->pdev);
2117
2118        edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
2119        kfree(mci->ctl_name);
2120        edac_mc_free(mci);
2121        i7core_dev->mci = NULL;
2122}
2123
2124static int i7core_register_mci(struct i7core_dev *i7core_dev)
2125{
2126        struct mem_ctl_info *mci;
2127        struct i7core_pvt *pvt;
2128        int rc;
2129        struct edac_mc_layer layers[2];
2130
2131        /* allocate a new MC control structure */
2132
2133        layers[0].type = EDAC_MC_LAYER_CHANNEL;
2134        layers[0].size = NUM_CHANS;
2135        layers[0].is_virt_csrow = false;
2136        layers[1].type = EDAC_MC_LAYER_SLOT;
2137        layers[1].size = MAX_DIMMS;
2138        layers[1].is_virt_csrow = true;
2139        mci = edac_mc_alloc(i7core_dev->socket, ARRAY_SIZE(layers), layers,
2140                            sizeof(*pvt));
2141        if (unlikely(!mci))
2142                return -ENOMEM;
2143
2144        edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev);
2145
2146        pvt = mci->pvt_info;
2147        memset(pvt, 0, sizeof(*pvt));
2148
2149        /* Associates i7core_dev and mci for future usage */
2150        pvt->i7core_dev = i7core_dev;
2151        i7core_dev->mci = mci;
2152
2153        /*
2154         * FIXME: how to handle RDDR3 at MCI level? It is possible to have
2155         * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
2156         * memory channels
2157         */
2158        mci->mtype_cap = MEM_FLAG_DDR3;
2159        mci->edac_ctl_cap = EDAC_FLAG_NONE;
2160        mci->edac_cap = EDAC_FLAG_NONE;
2161        mci->mod_name = "i7core_edac.c";
2162
2163        mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d", i7core_dev->socket);
2164        if (!mci->ctl_name) {
2165                rc = -ENOMEM;
2166                goto fail1;
2167        }
2168
2169        mci->dev_name = pci_name(i7core_dev->pdev[0]);
2170        mci->ctl_page_to_phys = NULL;
2171
2172        /* Store pci devices at mci for faster access */
2173        rc = mci_bind_devs(mci, i7core_dev);
2174        if (unlikely(rc < 0))
2175                goto fail0;
2176
2177
2178        /* Get dimm basic config */
2179        get_dimm_config(mci);
2180        /* record ptr to the generic device */
2181        mci->pdev = &i7core_dev->pdev[0]->dev;
2182
2183        /* Enable scrubrate setting */
2184        if (pvt->enable_scrub)
2185                enable_sdram_scrub_setting(mci);
2186
2187        /* add this new MC control structure to EDAC's list of MCs */
2188        if (unlikely(edac_mc_add_mc_with_groups(mci, i7core_dev_groups))) {
2189                edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
2190                /* FIXME: perhaps some code should go here that disables error
2191                 * reporting if we just enabled it
2192                 */
2193
2194                rc = -EINVAL;
2195                goto fail0;
2196        }
2197        if (i7core_create_sysfs_devices(mci)) {
2198                edac_dbg(0, "MC: failed to create sysfs nodes\n");
2199                edac_mc_del_mc(mci->pdev);
2200                rc = -EINVAL;
2201                goto fail0;
2202        }
2203
2204        /* Default error mask is any memory */
2205        pvt->inject.channel = 0;
2206        pvt->inject.dimm = -1;
2207        pvt->inject.rank = -1;
2208        pvt->inject.bank = -1;
2209        pvt->inject.page = -1;
2210        pvt->inject.col = -1;
2211
2212        /* allocating generic PCI control info */
2213        i7core_pci_ctl_create(pvt);
2214
2215        /* DCLK for scrub rate setting */
2216        pvt->dclk_freq = get_dclk_freq();
2217
2218        return 0;
2219
2220fail0:
2221        kfree(mci->ctl_name);
2222
2223fail1:
2224        edac_mc_free(mci);
2225        i7core_dev->mci = NULL;
2226        return rc;
2227}
2228
2229/*
2230 *      i7core_probe    Probe for ONE instance of device to see if it is
2231 *                      present.
2232 *      return:
2233 *              0 for FOUND a device
2234 *              < 0 for error code
2235 */
2236
2237static int i7core_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2238{
2239        int rc, count = 0;
2240        struct i7core_dev *i7core_dev;
2241
2242        /* get the pci devices we want to reserve for our use */
2243        mutex_lock(&i7core_edac_lock);
2244
2245        /*
2246         * All memory controllers are allocated at the first pass.
2247         */
2248        if (unlikely(probed >= 1)) {
2249                mutex_unlock(&i7core_edac_lock);
2250                return -ENODEV;
2251        }
2252        probed++;
2253
2254        rc = i7core_get_all_devices();
2255        if (unlikely(rc < 0))
2256                goto fail0;
2257
2258        list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2259                count++;
2260                rc = i7core_register_mci(i7core_dev);
2261                if (unlikely(rc < 0))
2262                        goto fail1;
2263        }
2264
2265        /*
2266         * Nehalem-EX uses a different memory controller. However, as the
2267         * memory controller is not visible on some Nehalem/Nehalem-EP, we
2268         * need to indirectly probe via a X58 PCI device. The same devices
2269         * are found on (some) Nehalem-EX. So, on those machines, the
2270         * probe routine needs to return -ENODEV, as the actual Memory
2271         * Controller registers won't be detected.
2272         */
2273        if (!count) {
2274                rc = -ENODEV;
2275                goto fail1;
2276        }
2277
2278        i7core_printk(KERN_INFO,
2279                      "Driver loaded, %d memory controller(s) found.\n",
2280                      count);
2281
2282        mutex_unlock(&i7core_edac_lock);
2283        return 0;
2284
2285fail1:
2286        list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2287                i7core_unregister_mci(i7core_dev);
2288
2289        i7core_put_all_devices();
2290fail0:
2291        mutex_unlock(&i7core_edac_lock);
2292        return rc;
2293}
2294
2295/*
2296 *      i7core_remove   destructor for one instance of device
2297 *
2298 */
2299static void i7core_remove(struct pci_dev *pdev)
2300{
2301        struct i7core_dev *i7core_dev;
2302
2303        edac_dbg(0, "\n");
2304
2305        /*
2306         * we have a trouble here: pdev value for removal will be wrong, since
2307         * it will point to the X58 register used to detect that the machine
2308         * is a Nehalem or upper design. However, due to the way several PCI
2309         * devices are grouped together to provide MC functionality, we need
2310         * to use a different method for releasing the devices
2311         */
2312
2313        mutex_lock(&i7core_edac_lock);
2314
2315        if (unlikely(!probed)) {
2316                mutex_unlock(&i7core_edac_lock);
2317                return;
2318        }
2319
2320        list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2321                i7core_unregister_mci(i7core_dev);
2322
2323        /* Release PCI resources */
2324        i7core_put_all_devices();
2325
2326        probed--;
2327
2328        mutex_unlock(&i7core_edac_lock);
2329}
2330
2331MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2332
2333/*
2334 *      i7core_driver   pci_driver structure for this module
2335 *
2336 */
2337static struct pci_driver i7core_driver = {
2338        .name     = "i7core_edac",
2339        .probe    = i7core_probe,
2340        .remove   = i7core_remove,
2341        .id_table = i7core_pci_tbl,
2342};
2343
2344/*
2345 *      i7core_init             Module entry function
2346 *                      Try to initialize this module for its devices
2347 */
2348static int __init i7core_init(void)
2349{
2350        int pci_rc;
2351
2352        edac_dbg(2, "\n");
2353
2354        /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2355        opstate_init();
2356
2357        if (use_pci_fixup)
2358                i7core_xeon_pci_fixup(pci_dev_table);
2359
2360        pci_rc = pci_register_driver(&i7core_driver);
2361
2362        if (pci_rc >= 0) {
2363                mce_register_decode_chain(&i7_mce_dec);
2364                return 0;
2365        }
2366
2367        i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2368                      pci_rc);
2369
2370        return pci_rc;
2371}
2372
2373/*
2374 *      i7core_exit()   Module exit function
2375 *                      Unregister the driver
2376 */
2377static void __exit i7core_exit(void)
2378{
2379        edac_dbg(2, "\n");
2380        pci_unregister_driver(&i7core_driver);
2381        mce_unregister_decode_chain(&i7_mce_dec);
2382}
2383
2384module_init(i7core_init);
2385module_exit(i7core_exit);
2386
2387MODULE_LICENSE("GPL");
2388MODULE_AUTHOR("Mauro Carvalho Chehab");
2389MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2390MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2391                   I7CORE_REVISION);
2392
2393module_param(edac_op_state, int, 0444);
2394MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
2395