linux/drivers/edac/i3200_edac.c
<<
>>
Prefs
   1/*
   2 * Intel 3200/3210 Memory Controller kernel module
   3 * Copyright (C) 2008-2009 Akamai Technologies, Inc.
   4 * Portions by Hitoshi Mitake <h.mitake@gmail.com>.
   5 *
   6 * This file may be distributed under the terms of the
   7 * GNU General Public License.
   8 */
   9
  10#include <linux/module.h>
  11#include <linux/init.h>
  12#include <linux/pci.h>
  13#include <linux/pci_ids.h>
  14#include <linux/edac.h>
  15#include <linux/io.h>
  16#include "edac_core.h"
  17
  18#include <asm-generic/io-64-nonatomic-lo-hi.h>
  19
  20#define I3200_REVISION        "1.1"
  21
  22#define EDAC_MOD_STR        "i3200_edac"
  23
  24#define PCI_DEVICE_ID_INTEL_3200_HB    0x29f0
  25
  26#define I3200_DIMMS             4
  27#define I3200_RANKS             8
  28#define I3200_RANKS_PER_CHANNEL 4
  29#define I3200_CHANNELS          2
  30
  31/* Intel 3200 register addresses - device 0 function 0 - DRAM Controller */
  32
  33#define I3200_MCHBAR_LOW        0x48    /* MCH Memory Mapped Register BAR */
  34#define I3200_MCHBAR_HIGH       0x4c
  35#define I3200_MCHBAR_MASK       0xfffffc000ULL  /* bits 35:14 */
  36#define I3200_MMR_WINDOW_SIZE   16384
  37
  38#define I3200_TOM               0xa0    /* Top of Memory (16b)
  39                 *
  40                 * 15:10 reserved
  41                 *  9:0  total populated physical memory
  42                 */
  43#define I3200_TOM_MASK          0x3ff   /* bits 9:0 */
  44#define I3200_TOM_SHIFT         26      /* 64MiB grain */
  45
  46#define I3200_ERRSTS            0xc8    /* Error Status Register (16b)
  47                 *
  48                 * 15    reserved
  49                 * 14    Isochronous TBWRR Run Behind FIFO Full
  50                 *       (ITCV)
  51                 * 13    Isochronous TBWRR Run Behind FIFO Put
  52                 *       (ITSTV)
  53                 * 12    reserved
  54                 * 11    MCH Thermal Sensor Event
  55                 *       for SMI/SCI/SERR (GTSE)
  56                 * 10    reserved
  57                 *  9    LOCK to non-DRAM Memory Flag (LCKF)
  58                 *  8    reserved
  59                 *  7    DRAM Throttle Flag (DTF)
  60                 *  6:2  reserved
  61                 *  1    Multi-bit DRAM ECC Error Flag (DMERR)
  62                 *  0    Single-bit DRAM ECC Error Flag (DSERR)
  63                 */
  64#define I3200_ERRSTS_UE         0x0002
  65#define I3200_ERRSTS_CE         0x0001
  66#define I3200_ERRSTS_BITS       (I3200_ERRSTS_UE | I3200_ERRSTS_CE)
  67
  68
  69/* Intel  MMIO register space - device 0 function 0 - MMR space */
  70
  71#define I3200_C0DRB     0x200   /* Channel 0 DRAM Rank Boundary (16b x 4)
  72                 *
  73                 * 15:10 reserved
  74                 *  9:0  Channel 0 DRAM Rank Boundary Address
  75                 */
  76#define I3200_C1DRB     0x600   /* Channel 1 DRAM Rank Boundary (16b x 4) */
  77#define I3200_DRB_MASK  0x3ff   /* bits 9:0 */
  78#define I3200_DRB_SHIFT 26      /* 64MiB grain */
  79
  80#define I3200_C0ECCERRLOG       0x280   /* Channel 0 ECC Error Log (64b)
  81                 *
  82                 * 63:48 Error Column Address (ERRCOL)
  83                 * 47:32 Error Row Address (ERRROW)
  84                 * 31:29 Error Bank Address (ERRBANK)
  85                 * 28:27 Error Rank Address (ERRRANK)
  86                 * 26:24 reserved
  87                 * 23:16 Error Syndrome (ERRSYND)
  88                 * 15: 2 reserved
  89                 *    1  Multiple Bit Error Status (MERRSTS)
  90                 *    0  Correctable Error Status (CERRSTS)
  91                 */
  92#define I3200_C1ECCERRLOG               0x680   /* Chan 1 ECC Error Log (64b) */
  93#define I3200_ECCERRLOG_CE              0x1
  94#define I3200_ECCERRLOG_UE              0x2
  95#define I3200_ECCERRLOG_RANK_BITS       0x18000000
  96#define I3200_ECCERRLOG_RANK_SHIFT      27
  97#define I3200_ECCERRLOG_SYNDROME_BITS   0xff0000
  98#define I3200_ECCERRLOG_SYNDROME_SHIFT  16
  99#define I3200_CAPID0                    0xe0    /* P.95 of spec for details */
 100
 101struct i3200_priv {
 102        void __iomem *window;
 103};
 104
 105static int nr_channels;
 106
 107static int how_many_channels(struct pci_dev *pdev)
 108{
 109        int n_channels;
 110
 111        unsigned char capid0_8b; /* 8th byte of CAPID0 */
 112
 113        pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b);
 114
 115        if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
 116                edac_dbg(0, "In single channel mode\n");
 117                n_channels = 1;
 118        } else {
 119                edac_dbg(0, "In dual channel mode\n");
 120                n_channels = 2;
 121        }
 122
 123        if (capid0_8b & 0x10) /* check if both channels are filled */
 124                edac_dbg(0, "2 DIMMS per channel disabled\n");
 125        else
 126                edac_dbg(0, "2 DIMMS per channel enabled\n");
 127
 128        return n_channels;
 129}
 130
 131static unsigned long eccerrlog_syndrome(u64 log)
 132{
 133        return (log & I3200_ECCERRLOG_SYNDROME_BITS) >>
 134                I3200_ECCERRLOG_SYNDROME_SHIFT;
 135}
 136
 137static int eccerrlog_row(int channel, u64 log)
 138{
 139        u64 rank = ((log & I3200_ECCERRLOG_RANK_BITS) >>
 140                I3200_ECCERRLOG_RANK_SHIFT);
 141        return rank | (channel * I3200_RANKS_PER_CHANNEL);
 142}
 143
 144enum i3200_chips {
 145        I3200 = 0,
 146};
 147
 148struct i3200_dev_info {
 149        const char *ctl_name;
 150};
 151
 152struct i3200_error_info {
 153        u16 errsts;
 154        u16 errsts2;
 155        u64 eccerrlog[I3200_CHANNELS];
 156};
 157
 158static const struct i3200_dev_info i3200_devs[] = {
 159        [I3200] = {
 160                .ctl_name = "i3200"
 161        },
 162};
 163
 164static struct pci_dev *mci_pdev;
 165static int i3200_registered = 1;
 166
 167
 168static void i3200_clear_error_info(struct mem_ctl_info *mci)
 169{
 170        struct pci_dev *pdev;
 171
 172        pdev = to_pci_dev(mci->pdev);
 173
 174        /*
 175         * Clear any error bits.
 176         * (Yes, we really clear bits by writing 1 to them.)
 177         */
 178        pci_write_bits16(pdev, I3200_ERRSTS, I3200_ERRSTS_BITS,
 179                I3200_ERRSTS_BITS);
 180}
 181
 182static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci,
 183                struct i3200_error_info *info)
 184{
 185        struct pci_dev *pdev;
 186        struct i3200_priv *priv = mci->pvt_info;
 187        void __iomem *window = priv->window;
 188
 189        pdev = to_pci_dev(mci->pdev);
 190
 191        /*
 192         * This is a mess because there is no atomic way to read all the
 193         * registers at once and the registers can transition from CE being
 194         * overwritten by UE.
 195         */
 196        pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts);
 197        if (!(info->errsts & I3200_ERRSTS_BITS))
 198                return;
 199
 200        info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
 201        if (nr_channels == 2)
 202                info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
 203
 204        pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts2);
 205
 206        /*
 207         * If the error is the same for both reads then the first set
 208         * of reads is valid.  If there is a change then there is a CE
 209         * with no info and the second set of reads is valid and
 210         * should be UE info.
 211         */
 212        if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
 213                info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
 214                if (nr_channels == 2)
 215                        info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
 216        }
 217
 218        i3200_clear_error_info(mci);
 219}
 220
 221static void i3200_process_error_info(struct mem_ctl_info *mci,
 222                struct i3200_error_info *info)
 223{
 224        int channel;
 225        u64 log;
 226
 227        if (!(info->errsts & I3200_ERRSTS_BITS))
 228                return;
 229
 230        if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
 231                edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
 232                                     -1, -1, -1, "UE overwrote CE", "");
 233                info->errsts = info->errsts2;
 234        }
 235
 236        for (channel = 0; channel < nr_channels; channel++) {
 237                log = info->eccerrlog[channel];
 238                if (log & I3200_ECCERRLOG_UE) {
 239                        edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
 240                                             0, 0, 0,
 241                                             eccerrlog_row(channel, log),
 242                                             -1, -1,
 243                                             "i3000 UE", "");
 244                } else if (log & I3200_ECCERRLOG_CE) {
 245                        edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
 246                                             0, 0, eccerrlog_syndrome(log),
 247                                             eccerrlog_row(channel, log),
 248                                             -1, -1,
 249                                             "i3000 UE", "");
 250                }
 251        }
 252}
 253
 254static void i3200_check(struct mem_ctl_info *mci)
 255{
 256        struct i3200_error_info info;
 257
 258        edac_dbg(1, "MC%d\n", mci->mc_idx);
 259        i3200_get_and_clear_error_info(mci, &info);
 260        i3200_process_error_info(mci, &info);
 261}
 262
 263
 264void __iomem *i3200_map_mchbar(struct pci_dev *pdev)
 265{
 266        union {
 267                u64 mchbar;
 268                struct {
 269                        u32 mchbar_low;
 270                        u32 mchbar_high;
 271                };
 272        } u;
 273        void __iomem *window;
 274
 275        pci_read_config_dword(pdev, I3200_MCHBAR_LOW, &u.mchbar_low);
 276        pci_read_config_dword(pdev, I3200_MCHBAR_HIGH, &u.mchbar_high);
 277        u.mchbar &= I3200_MCHBAR_MASK;
 278
 279        if (u.mchbar != (resource_size_t)u.mchbar) {
 280                printk(KERN_ERR
 281                        "i3200: mmio space beyond accessible range (0x%llx)\n",
 282                        (unsigned long long)u.mchbar);
 283                return NULL;
 284        }
 285
 286        window = ioremap_nocache(u.mchbar, I3200_MMR_WINDOW_SIZE);
 287        if (!window)
 288                printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n",
 289                        (unsigned long long)u.mchbar);
 290
 291        return window;
 292}
 293
 294
 295static void i3200_get_drbs(void __iomem *window,
 296        u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
 297{
 298        int i;
 299
 300        for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) {
 301                drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK;
 302                drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK;
 303
 304                edac_dbg(0, "drb[0][%d] = %d, drb[1][%d] = %d\n", i, drbs[0][i], i, drbs[1][i]);
 305        }
 306}
 307
 308static bool i3200_is_stacked(struct pci_dev *pdev,
 309        u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
 310{
 311        u16 tom;
 312
 313        pci_read_config_word(pdev, I3200_TOM, &tom);
 314        tom &= I3200_TOM_MASK;
 315
 316        return drbs[I3200_CHANNELS - 1][I3200_RANKS_PER_CHANNEL - 1] == tom;
 317}
 318
 319static unsigned long drb_to_nr_pages(
 320        u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL], bool stacked,
 321        int channel, int rank)
 322{
 323        int n;
 324
 325        n = drbs[channel][rank];
 326        if (!n)
 327                return 0;
 328
 329        if (rank > 0)
 330                n -= drbs[channel][rank - 1];
 331        if (stacked && (channel == 1) &&
 332        drbs[channel][rank] == drbs[channel][I3200_RANKS_PER_CHANNEL - 1])
 333                n -= drbs[0][I3200_RANKS_PER_CHANNEL - 1];
 334
 335        n <<= (I3200_DRB_SHIFT - PAGE_SHIFT);
 336        return n;
 337}
 338
 339static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
 340{
 341        int rc;
 342        int i, j;
 343        struct mem_ctl_info *mci = NULL;
 344        struct edac_mc_layer layers[2];
 345        u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL];
 346        bool stacked;
 347        void __iomem *window;
 348        struct i3200_priv *priv;
 349
 350        edac_dbg(0, "MC:\n");
 351
 352        window = i3200_map_mchbar(pdev);
 353        if (!window)
 354                return -ENODEV;
 355
 356        i3200_get_drbs(window, drbs);
 357        nr_channels = how_many_channels(pdev);
 358
 359        layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
 360        layers[0].size = I3200_DIMMS;
 361        layers[0].is_virt_csrow = true;
 362        layers[1].type = EDAC_MC_LAYER_CHANNEL;
 363        layers[1].size = nr_channels;
 364        layers[1].is_virt_csrow = false;
 365        mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
 366                            sizeof(struct i3200_priv));
 367        if (!mci)
 368                return -ENOMEM;
 369
 370        edac_dbg(3, "MC: init mci\n");
 371
 372        mci->pdev = &pdev->dev;
 373        mci->mtype_cap = MEM_FLAG_DDR2;
 374
 375        mci->edac_ctl_cap = EDAC_FLAG_SECDED;
 376        mci->edac_cap = EDAC_FLAG_SECDED;
 377
 378        mci->mod_name = EDAC_MOD_STR;
 379        mci->mod_ver = I3200_REVISION;
 380        mci->ctl_name = i3200_devs[dev_idx].ctl_name;
 381        mci->dev_name = pci_name(pdev);
 382        mci->edac_check = i3200_check;
 383        mci->ctl_page_to_phys = NULL;
 384        priv = mci->pvt_info;
 385        priv->window = window;
 386
 387        stacked = i3200_is_stacked(pdev, drbs);
 388
 389        /*
 390         * The dram rank boundary (DRB) reg values are boundary addresses
 391         * for each DRAM rank with a granularity of 64MB.  DRB regs are
 392         * cumulative; the last one will contain the total memory
 393         * contained in all ranks.
 394         */
 395        for (i = 0; i < I3200_DIMMS; i++) {
 396                unsigned long nr_pages;
 397
 398                for (j = 0; j < nr_channels; j++) {
 399                        struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
 400                                                               mci->n_layers, i, j, 0);
 401
 402                        nr_pages = drb_to_nr_pages(drbs, stacked, j, i);
 403                        if (nr_pages == 0)
 404                                continue;
 405
 406                        edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j,
 407                                 stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages));
 408
 409                        dimm->nr_pages = nr_pages;
 410                        dimm->grain = nr_pages << PAGE_SHIFT;
 411                        dimm->mtype = MEM_DDR2;
 412                        dimm->dtype = DEV_UNKNOWN;
 413                        dimm->edac_mode = EDAC_UNKNOWN;
 414                }
 415        }
 416
 417        i3200_clear_error_info(mci);
 418
 419        rc = -ENODEV;
 420        if (edac_mc_add_mc(mci)) {
 421                edac_dbg(3, "MC: failed edac_mc_add_mc()\n");
 422                goto fail;
 423        }
 424
 425        /* get this far and it's successful */
 426        edac_dbg(3, "MC: success\n");
 427        return 0;
 428
 429fail:
 430        iounmap(window);
 431        if (mci)
 432                edac_mc_free(mci);
 433
 434        return rc;
 435}
 436
 437static int i3200_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 438{
 439        int rc;
 440
 441        edac_dbg(0, "MC:\n");
 442
 443        if (pci_enable_device(pdev) < 0)
 444                return -EIO;
 445
 446        rc = i3200_probe1(pdev, ent->driver_data);
 447        if (!mci_pdev)
 448                mci_pdev = pci_dev_get(pdev);
 449
 450        return rc;
 451}
 452
 453static void i3200_remove_one(struct pci_dev *pdev)
 454{
 455        struct mem_ctl_info *mci;
 456        struct i3200_priv *priv;
 457
 458        edac_dbg(0, "\n");
 459
 460        mci = edac_mc_del_mc(&pdev->dev);
 461        if (!mci)
 462                return;
 463
 464        priv = mci->pvt_info;
 465        iounmap(priv->window);
 466
 467        edac_mc_free(mci);
 468}
 469
 470static DEFINE_PCI_DEVICE_TABLE(i3200_pci_tbl) = {
 471        {
 472                PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 473                I3200},
 474        {
 475                0,
 476        }            /* 0 terminated list. */
 477};
 478
 479MODULE_DEVICE_TABLE(pci, i3200_pci_tbl);
 480
 481static struct pci_driver i3200_driver = {
 482        .name = EDAC_MOD_STR,
 483        .probe = i3200_init_one,
 484        .remove = i3200_remove_one,
 485        .id_table = i3200_pci_tbl,
 486};
 487
 488static int __init i3200_init(void)
 489{
 490        int pci_rc;
 491
 492        edac_dbg(3, "MC:\n");
 493
 494        /* Ensure that the OPSTATE is set correctly for POLL or NMI */
 495        opstate_init();
 496
 497        pci_rc = pci_register_driver(&i3200_driver);
 498        if (pci_rc < 0)
 499                goto fail0;
 500
 501        if (!mci_pdev) {
 502                i3200_registered = 0;
 503                mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
 504                                PCI_DEVICE_ID_INTEL_3200_HB, NULL);
 505                if (!mci_pdev) {
 506                        edac_dbg(0, "i3200 pci_get_device fail\n");
 507                        pci_rc = -ENODEV;
 508                        goto fail1;
 509                }
 510
 511                pci_rc = i3200_init_one(mci_pdev, i3200_pci_tbl);
 512                if (pci_rc < 0) {
 513                        edac_dbg(0, "i3200 init fail\n");
 514                        pci_rc = -ENODEV;
 515                        goto fail1;
 516                }
 517        }
 518
 519        return 0;
 520
 521fail1:
 522        pci_unregister_driver(&i3200_driver);
 523
 524fail0:
 525        if (mci_pdev)
 526                pci_dev_put(mci_pdev);
 527
 528        return pci_rc;
 529}
 530
 531static void __exit i3200_exit(void)
 532{
 533        edac_dbg(3, "MC:\n");
 534
 535        pci_unregister_driver(&i3200_driver);
 536        if (!i3200_registered) {
 537                i3200_remove_one(mci_pdev);
 538                pci_dev_put(mci_pdev);
 539        }
 540}
 541
 542module_init(i3200_init);
 543module_exit(i3200_exit);
 544
 545MODULE_LICENSE("GPL");
 546MODULE_AUTHOR("Akamai Technologies, Inc.");
 547MODULE_DESCRIPTION("MC support for Intel 3200 memory hub controllers");
 548
 549module_param(edac_op_state, int, 0444);
 550MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
 551