linux/drivers/edac/i3200_edac.c
<<
>>
Prefs
   1/*
   2 * Intel 3200/3210 Memory Controller kernel module
   3 * Copyright (C) 2008-2009 Akamai Technologies, Inc.
   4 * Portions by Hitoshi Mitake <h.mitake@gmail.com>.
   5 *
   6 * This file may be distributed under the terms of the
   7 * GNU General Public License.
   8 */
   9
  10#include <linux/module.h>
  11#include <linux/init.h>
  12#include <linux/pci.h>
  13#include <linux/pci_ids.h>
  14#include <linux/edac.h>
  15#include <linux/io.h>
  16#include "edac_module.h"
  17
  18#include <linux/io-64-nonatomic-lo-hi.h>
  19
  20#define EDAC_MOD_STR        "i3200_edac"
  21
  22#define PCI_DEVICE_ID_INTEL_3200_HB    0x29f0
  23
  24#define I3200_DIMMS             4
  25#define I3200_RANKS             8
  26#define I3200_RANKS_PER_CHANNEL 4
  27#define I3200_CHANNELS          2
  28
  29/* Intel 3200 register addresses - device 0 function 0 - DRAM Controller */
  30
  31#define I3200_MCHBAR_LOW        0x48    /* MCH Memory Mapped Register BAR */
  32#define I3200_MCHBAR_HIGH       0x4c
  33#define I3200_MCHBAR_MASK       0xfffffc000ULL  /* bits 35:14 */
  34#define I3200_MMR_WINDOW_SIZE   16384
  35
  36#define I3200_TOM               0xa0    /* Top of Memory (16b)
  37                 *
  38                 * 15:10 reserved
  39                 *  9:0  total populated physical memory
  40                 */
  41#define I3200_TOM_MASK          0x3ff   /* bits 9:0 */
  42#define I3200_TOM_SHIFT         26      /* 64MiB grain */
  43
  44#define I3200_ERRSTS            0xc8    /* Error Status Register (16b)
  45                 *
  46                 * 15    reserved
  47                 * 14    Isochronous TBWRR Run Behind FIFO Full
  48                 *       (ITCV)
  49                 * 13    Isochronous TBWRR Run Behind FIFO Put
  50                 *       (ITSTV)
  51                 * 12    reserved
  52                 * 11    MCH Thermal Sensor Event
  53                 *       for SMI/SCI/SERR (GTSE)
  54                 * 10    reserved
  55                 *  9    LOCK to non-DRAM Memory Flag (LCKF)
  56                 *  8    reserved
  57                 *  7    DRAM Throttle Flag (DTF)
  58                 *  6:2  reserved
  59                 *  1    Multi-bit DRAM ECC Error Flag (DMERR)
  60                 *  0    Single-bit DRAM ECC Error Flag (DSERR)
  61                 */
  62#define I3200_ERRSTS_UE         0x0002
  63#define I3200_ERRSTS_CE         0x0001
  64#define I3200_ERRSTS_BITS       (I3200_ERRSTS_UE | I3200_ERRSTS_CE)
  65
  66
  67/* Intel  MMIO register space - device 0 function 0 - MMR space */
  68
  69#define I3200_C0DRB     0x200   /* Channel 0 DRAM Rank Boundary (16b x 4)
  70                 *
  71                 * 15:10 reserved
  72                 *  9:0  Channel 0 DRAM Rank Boundary Address
  73                 */
  74#define I3200_C1DRB     0x600   /* Channel 1 DRAM Rank Boundary (16b x 4) */
  75#define I3200_DRB_MASK  0x3ff   /* bits 9:0 */
  76#define I3200_DRB_SHIFT 26      /* 64MiB grain */
  77
  78#define I3200_C0ECCERRLOG       0x280   /* Channel 0 ECC Error Log (64b)
  79                 *
  80                 * 63:48 Error Column Address (ERRCOL)
  81                 * 47:32 Error Row Address (ERRROW)
  82                 * 31:29 Error Bank Address (ERRBANK)
  83                 * 28:27 Error Rank Address (ERRRANK)
  84                 * 26:24 reserved
  85                 * 23:16 Error Syndrome (ERRSYND)
  86                 * 15: 2 reserved
  87                 *    1  Multiple Bit Error Status (MERRSTS)
  88                 *    0  Correctable Error Status (CERRSTS)
  89                 */
  90#define I3200_C1ECCERRLOG               0x680   /* Chan 1 ECC Error Log (64b) */
  91#define I3200_ECCERRLOG_CE              0x1
  92#define I3200_ECCERRLOG_UE              0x2
  93#define I3200_ECCERRLOG_RANK_BITS       0x18000000
  94#define I3200_ECCERRLOG_RANK_SHIFT      27
  95#define I3200_ECCERRLOG_SYNDROME_BITS   0xff0000
  96#define I3200_ECCERRLOG_SYNDROME_SHIFT  16
  97#define I3200_CAPID0                    0xe0    /* P.95 of spec for details */
  98
  99struct i3200_priv {
 100        void __iomem *window;
 101};
 102
 103static int nr_channels;
 104
 105static int how_many_channels(struct pci_dev *pdev)
 106{
 107        int n_channels;
 108
 109        unsigned char capid0_8b; /* 8th byte of CAPID0 */
 110
 111        pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b);
 112
 113        if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
 114                edac_dbg(0, "In single channel mode\n");
 115                n_channels = 1;
 116        } else {
 117                edac_dbg(0, "In dual channel mode\n");
 118                n_channels = 2;
 119        }
 120
 121        if (capid0_8b & 0x10) /* check if both channels are filled */
 122                edac_dbg(0, "2 DIMMS per channel disabled\n");
 123        else
 124                edac_dbg(0, "2 DIMMS per channel enabled\n");
 125
 126        return n_channels;
 127}
 128
 129static unsigned long eccerrlog_syndrome(u64 log)
 130{
 131        return (log & I3200_ECCERRLOG_SYNDROME_BITS) >>
 132                I3200_ECCERRLOG_SYNDROME_SHIFT;
 133}
 134
 135static int eccerrlog_row(int channel, u64 log)
 136{
 137        u64 rank = ((log & I3200_ECCERRLOG_RANK_BITS) >>
 138                I3200_ECCERRLOG_RANK_SHIFT);
 139        return rank | (channel * I3200_RANKS_PER_CHANNEL);
 140}
 141
 142enum i3200_chips {
 143        I3200 = 0,
 144};
 145
 146struct i3200_dev_info {
 147        const char *ctl_name;
 148};
 149
 150struct i3200_error_info {
 151        u16 errsts;
 152        u16 errsts2;
 153        u64 eccerrlog[I3200_CHANNELS];
 154};
 155
 156static const struct i3200_dev_info i3200_devs[] = {
 157        [I3200] = {
 158                .ctl_name = "i3200"
 159        },
 160};
 161
 162static struct pci_dev *mci_pdev;
 163static int i3200_registered = 1;
 164
 165
 166static void i3200_clear_error_info(struct mem_ctl_info *mci)
 167{
 168        struct pci_dev *pdev;
 169
 170        pdev = to_pci_dev(mci->pdev);
 171
 172        /*
 173         * Clear any error bits.
 174         * (Yes, we really clear bits by writing 1 to them.)
 175         */
 176        pci_write_bits16(pdev, I3200_ERRSTS, I3200_ERRSTS_BITS,
 177                I3200_ERRSTS_BITS);
 178}
 179
 180static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci,
 181                struct i3200_error_info *info)
 182{
 183        struct pci_dev *pdev;
 184        struct i3200_priv *priv = mci->pvt_info;
 185        void __iomem *window = priv->window;
 186
 187        pdev = to_pci_dev(mci->pdev);
 188
 189        /*
 190         * This is a mess because there is no atomic way to read all the
 191         * registers at once and the registers can transition from CE being
 192         * overwritten by UE.
 193         */
 194        pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts);
 195        if (!(info->errsts & I3200_ERRSTS_BITS))
 196                return;
 197
 198        info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
 199        if (nr_channels == 2)
 200                info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
 201
 202        pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts2);
 203
 204        /*
 205         * If the error is the same for both reads then the first set
 206         * of reads is valid.  If there is a change then there is a CE
 207         * with no info and the second set of reads is valid and
 208         * should be UE info.
 209         */
 210        if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
 211                info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
 212                if (nr_channels == 2)
 213                        info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
 214        }
 215
 216        i3200_clear_error_info(mci);
 217}
 218
 219static void i3200_process_error_info(struct mem_ctl_info *mci,
 220                struct i3200_error_info *info)
 221{
 222        int channel;
 223        u64 log;
 224
 225        if (!(info->errsts & I3200_ERRSTS_BITS))
 226                return;
 227
 228        if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
 229                edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
 230                                     -1, -1, -1, "UE overwrote CE", "");
 231                info->errsts = info->errsts2;
 232        }
 233
 234        for (channel = 0; channel < nr_channels; channel++) {
 235                log = info->eccerrlog[channel];
 236                if (log & I3200_ECCERRLOG_UE) {
 237                        edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
 238                                             0, 0, 0,
 239                                             eccerrlog_row(channel, log),
 240                                             -1, -1,
 241                                             "i3000 UE", "");
 242                } else if (log & I3200_ECCERRLOG_CE) {
 243                        edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
 244                                             0, 0, eccerrlog_syndrome(log),
 245                                             eccerrlog_row(channel, log),
 246                                             -1, -1,
 247                                             "i3000 CE", "");
 248                }
 249        }
 250}
 251
 252static void i3200_check(struct mem_ctl_info *mci)
 253{
 254        struct i3200_error_info info;
 255
 256        i3200_get_and_clear_error_info(mci, &info);
 257        i3200_process_error_info(mci, &info);
 258}
 259
 260static void __iomem *i3200_map_mchbar(struct pci_dev *pdev)
 261{
 262        union {
 263                u64 mchbar;
 264                struct {
 265                        u32 mchbar_low;
 266                        u32 mchbar_high;
 267                };
 268        } u;
 269        void __iomem *window;
 270
 271        pci_read_config_dword(pdev, I3200_MCHBAR_LOW, &u.mchbar_low);
 272        pci_read_config_dword(pdev, I3200_MCHBAR_HIGH, &u.mchbar_high);
 273        u.mchbar &= I3200_MCHBAR_MASK;
 274
 275        if (u.mchbar != (resource_size_t)u.mchbar) {
 276                printk(KERN_ERR
 277                        "i3200: mmio space beyond accessible range (0x%llx)\n",
 278                        (unsigned long long)u.mchbar);
 279                return NULL;
 280        }
 281
 282        window = ioremap(u.mchbar, I3200_MMR_WINDOW_SIZE);
 283        if (!window)
 284                printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n",
 285                        (unsigned long long)u.mchbar);
 286
 287        return window;
 288}
 289
 290
 291static void i3200_get_drbs(void __iomem *window,
 292        u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
 293{
 294        int i;
 295
 296        for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) {
 297                drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK;
 298                drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK;
 299
 300                edac_dbg(0, "drb[0][%d] = %d, drb[1][%d] = %d\n", i, drbs[0][i], i, drbs[1][i]);
 301        }
 302}
 303
 304static bool i3200_is_stacked(struct pci_dev *pdev,
 305        u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
 306{
 307        u16 tom;
 308
 309        pci_read_config_word(pdev, I3200_TOM, &tom);
 310        tom &= I3200_TOM_MASK;
 311
 312        return drbs[I3200_CHANNELS - 1][I3200_RANKS_PER_CHANNEL - 1] == tom;
 313}
 314
 315static unsigned long drb_to_nr_pages(
 316        u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL], bool stacked,
 317        int channel, int rank)
 318{
 319        int n;
 320
 321        n = drbs[channel][rank];
 322        if (!n)
 323                return 0;
 324
 325        if (rank > 0)
 326                n -= drbs[channel][rank - 1];
 327        if (stacked && (channel == 1) &&
 328        drbs[channel][rank] == drbs[channel][I3200_RANKS_PER_CHANNEL - 1])
 329                n -= drbs[0][I3200_RANKS_PER_CHANNEL - 1];
 330
 331        n <<= (I3200_DRB_SHIFT - PAGE_SHIFT);
 332        return n;
 333}
 334
 335static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
 336{
 337        int rc;
 338        int i, j;
 339        struct mem_ctl_info *mci = NULL;
 340        struct edac_mc_layer layers[2];
 341        u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL];
 342        bool stacked;
 343        void __iomem *window;
 344        struct i3200_priv *priv;
 345
 346        edac_dbg(0, "MC:\n");
 347
 348        window = i3200_map_mchbar(pdev);
 349        if (!window)
 350                return -ENODEV;
 351
 352        i3200_get_drbs(window, drbs);
 353        nr_channels = how_many_channels(pdev);
 354
 355        layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
 356        layers[0].size = I3200_DIMMS;
 357        layers[0].is_virt_csrow = true;
 358        layers[1].type = EDAC_MC_LAYER_CHANNEL;
 359        layers[1].size = nr_channels;
 360        layers[1].is_virt_csrow = false;
 361        mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
 362                            sizeof(struct i3200_priv));
 363        if (!mci)
 364                return -ENOMEM;
 365
 366        edac_dbg(3, "MC: init mci\n");
 367
 368        mci->pdev = &pdev->dev;
 369        mci->mtype_cap = MEM_FLAG_DDR2;
 370
 371        mci->edac_ctl_cap = EDAC_FLAG_SECDED;
 372        mci->edac_cap = EDAC_FLAG_SECDED;
 373
 374        mci->mod_name = EDAC_MOD_STR;
 375        mci->ctl_name = i3200_devs[dev_idx].ctl_name;
 376        mci->dev_name = pci_name(pdev);
 377        mci->edac_check = i3200_check;
 378        mci->ctl_page_to_phys = NULL;
 379        priv = mci->pvt_info;
 380        priv->window = window;
 381
 382        stacked = i3200_is_stacked(pdev, drbs);
 383
 384        /*
 385         * The dram rank boundary (DRB) reg values are boundary addresses
 386         * for each DRAM rank with a granularity of 64MB.  DRB regs are
 387         * cumulative; the last one will contain the total memory
 388         * contained in all ranks.
 389         */
 390        for (i = 0; i < I3200_DIMMS; i++) {
 391                unsigned long nr_pages;
 392
 393                for (j = 0; j < nr_channels; j++) {
 394                        struct dimm_info *dimm = edac_get_dimm(mci, i, j, 0);
 395
 396                        nr_pages = drb_to_nr_pages(drbs, stacked, j, i);
 397                        if (nr_pages == 0)
 398                                continue;
 399
 400                        edac_dbg(0, "csrow %d, channel %d%s, size = %ld MiB\n", i, j,
 401                                 stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages));
 402
 403                        dimm->nr_pages = nr_pages;
 404                        dimm->grain = nr_pages << PAGE_SHIFT;
 405                        dimm->mtype = MEM_DDR2;
 406                        dimm->dtype = DEV_UNKNOWN;
 407                        dimm->edac_mode = EDAC_UNKNOWN;
 408                }
 409        }
 410
 411        i3200_clear_error_info(mci);
 412
 413        rc = -ENODEV;
 414        if (edac_mc_add_mc(mci)) {
 415                edac_dbg(3, "MC: failed edac_mc_add_mc()\n");
 416                goto fail;
 417        }
 418
 419        /* get this far and it's successful */
 420        edac_dbg(3, "MC: success\n");
 421        return 0;
 422
 423fail:
 424        iounmap(window);
 425        if (mci)
 426                edac_mc_free(mci);
 427
 428        return rc;
 429}
 430
 431static int i3200_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 432{
 433        int rc;
 434
 435        edac_dbg(0, "MC:\n");
 436
 437        if (pci_enable_device(pdev) < 0)
 438                return -EIO;
 439
 440        rc = i3200_probe1(pdev, ent->driver_data);
 441        if (!mci_pdev)
 442                mci_pdev = pci_dev_get(pdev);
 443
 444        return rc;
 445}
 446
 447static void i3200_remove_one(struct pci_dev *pdev)
 448{
 449        struct mem_ctl_info *mci;
 450        struct i3200_priv *priv;
 451
 452        edac_dbg(0, "\n");
 453
 454        mci = edac_mc_del_mc(&pdev->dev);
 455        if (!mci)
 456                return;
 457
 458        priv = mci->pvt_info;
 459        iounmap(priv->window);
 460
 461        edac_mc_free(mci);
 462
 463        pci_disable_device(pdev);
 464}
 465
 466static const struct pci_device_id i3200_pci_tbl[] = {
 467        {
 468                PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 469                I3200},
 470        {
 471                0,
 472        }            /* 0 terminated list. */
 473};
 474
 475MODULE_DEVICE_TABLE(pci, i3200_pci_tbl);
 476
 477static struct pci_driver i3200_driver = {
 478        .name = EDAC_MOD_STR,
 479        .probe = i3200_init_one,
 480        .remove = i3200_remove_one,
 481        .id_table = i3200_pci_tbl,
 482};
 483
 484static int __init i3200_init(void)
 485{
 486        int pci_rc;
 487
 488        edac_dbg(3, "MC:\n");
 489
 490        /* Ensure that the OPSTATE is set correctly for POLL or NMI */
 491        opstate_init();
 492
 493        pci_rc = pci_register_driver(&i3200_driver);
 494        if (pci_rc < 0)
 495                goto fail0;
 496
 497        if (!mci_pdev) {
 498                i3200_registered = 0;
 499                mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
 500                                PCI_DEVICE_ID_INTEL_3200_HB, NULL);
 501                if (!mci_pdev) {
 502                        edac_dbg(0, "i3200 pci_get_device fail\n");
 503                        pci_rc = -ENODEV;
 504                        goto fail1;
 505                }
 506
 507                pci_rc = i3200_init_one(mci_pdev, i3200_pci_tbl);
 508                if (pci_rc < 0) {
 509                        edac_dbg(0, "i3200 init fail\n");
 510                        pci_rc = -ENODEV;
 511                        goto fail1;
 512                }
 513        }
 514
 515        return 0;
 516
 517fail1:
 518        pci_unregister_driver(&i3200_driver);
 519
 520fail0:
 521        pci_dev_put(mci_pdev);
 522
 523        return pci_rc;
 524}
 525
 526static void __exit i3200_exit(void)
 527{
 528        edac_dbg(3, "MC:\n");
 529
 530        pci_unregister_driver(&i3200_driver);
 531        if (!i3200_registered) {
 532                i3200_remove_one(mci_pdev);
 533                pci_dev_put(mci_pdev);
 534        }
 535}
 536
 537module_init(i3200_init);
 538module_exit(i3200_exit);
 539
 540MODULE_LICENSE("GPL");
 541MODULE_AUTHOR("Akamai Technologies, Inc.");
 542MODULE_DESCRIPTION("MC support for Intel 3200 memory hub controllers");
 543
 544module_param(edac_op_state, int, 0444);
 545MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
 546