linux/drivers/edac/i3200_edac.c
<<
>>
Prefs
   1/*
   2 * Intel 3200/3210 Memory Controller kernel module
   3 * Copyright (C) 2008-2009 Akamai Technologies, Inc.
   4 * Portions by Hitoshi Mitake <h.mitake@gmail.com>.
   5 *
   6 * This file may be distributed under the terms of the
   7 * GNU General Public License.
   8 */
   9
  10#include <linux/module.h>
  11#include <linux/init.h>
  12#include <linux/pci.h>
  13#include <linux/pci_ids.h>
  14#include <linux/edac.h>
  15#include <linux/io.h>
  16#include "edac_module.h"
  17
  18#include <linux/io-64-nonatomic-lo-hi.h>
  19
  20#define EDAC_MOD_STR        "i3200_edac"
  21
  22#define PCI_DEVICE_ID_INTEL_3200_HB    0x29f0
  23
  24#define I3200_DIMMS             4
  25#define I3200_RANKS             8
  26#define I3200_RANKS_PER_CHANNEL 4
  27#define I3200_CHANNELS          2
  28
  29/* Intel 3200 register addresses - device 0 function 0 - DRAM Controller */
  30
  31#define I3200_MCHBAR_LOW        0x48    /* MCH Memory Mapped Register BAR */
  32#define I3200_MCHBAR_HIGH       0x4c
  33#define I3200_MCHBAR_MASK       0xfffffc000ULL  /* bits 35:14 */
  34#define I3200_MMR_WINDOW_SIZE   16384
  35
  36#define I3200_TOM               0xa0    /* Top of Memory (16b)
  37                 *
  38                 * 15:10 reserved
  39                 *  9:0  total populated physical memory
  40                 */
  41#define I3200_TOM_MASK          0x3ff   /* bits 9:0 */
  42#define I3200_TOM_SHIFT         26      /* 64MiB grain */
  43
  44#define I3200_ERRSTS            0xc8    /* Error Status Register (16b)
  45                 *
  46                 * 15    reserved
  47                 * 14    Isochronous TBWRR Run Behind FIFO Full
  48                 *       (ITCV)
  49                 * 13    Isochronous TBWRR Run Behind FIFO Put
  50                 *       (ITSTV)
  51                 * 12    reserved
  52                 * 11    MCH Thermal Sensor Event
  53                 *       for SMI/SCI/SERR (GTSE)
  54                 * 10    reserved
  55                 *  9    LOCK to non-DRAM Memory Flag (LCKF)
  56                 *  8    reserved
  57                 *  7    DRAM Throttle Flag (DTF)
  58                 *  6:2  reserved
  59                 *  1    Multi-bit DRAM ECC Error Flag (DMERR)
  60                 *  0    Single-bit DRAM ECC Error Flag (DSERR)
  61                 */
  62#define I3200_ERRSTS_UE         0x0002
  63#define I3200_ERRSTS_CE         0x0001
  64#define I3200_ERRSTS_BITS       (I3200_ERRSTS_UE | I3200_ERRSTS_CE)
  65
  66
  67/* Intel  MMIO register space - device 0 function 0 - MMR space */
  68
  69#define I3200_C0DRB     0x200   /* Channel 0 DRAM Rank Boundary (16b x 4)
  70                 *
  71                 * 15:10 reserved
  72                 *  9:0  Channel 0 DRAM Rank Boundary Address
  73                 */
  74#define I3200_C1DRB     0x600   /* Channel 1 DRAM Rank Boundary (16b x 4) */
  75#define I3200_DRB_MASK  0x3ff   /* bits 9:0 */
  76#define I3200_DRB_SHIFT 26      /* 64MiB grain */
  77
  78#define I3200_C0ECCERRLOG       0x280   /* Channel 0 ECC Error Log (64b)
  79                 *
  80                 * 63:48 Error Column Address (ERRCOL)
  81                 * 47:32 Error Row Address (ERRROW)
  82                 * 31:29 Error Bank Address (ERRBANK)
  83                 * 28:27 Error Rank Address (ERRRANK)
  84                 * 26:24 reserved
  85                 * 23:16 Error Syndrome (ERRSYND)
  86                 * 15: 2 reserved
  87                 *    1  Multiple Bit Error Status (MERRSTS)
  88                 *    0  Correctable Error Status (CERRSTS)
  89                 */
  90#define I3200_C1ECCERRLOG               0x680   /* Chan 1 ECC Error Log (64b) */
  91#define I3200_ECCERRLOG_CE              0x1
  92#define I3200_ECCERRLOG_UE              0x2
  93#define I3200_ECCERRLOG_RANK_BITS       0x18000000
  94#define I3200_ECCERRLOG_RANK_SHIFT      27
  95#define I3200_ECCERRLOG_SYNDROME_BITS   0xff0000
  96#define I3200_ECCERRLOG_SYNDROME_SHIFT  16
  97#define I3200_CAPID0                    0xe0    /* P.95 of spec for details */
  98
  99struct i3200_priv {
 100        void __iomem *window;
 101};
 102
 103static int nr_channels;
 104
 105static int how_many_channels(struct pci_dev *pdev)
 106{
 107        int n_channels;
 108
 109        unsigned char capid0_8b; /* 8th byte of CAPID0 */
 110
 111        pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b);
 112
 113        if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
 114                edac_dbg(0, "In single channel mode\n");
 115                n_channels = 1;
 116        } else {
 117                edac_dbg(0, "In dual channel mode\n");
 118                n_channels = 2;
 119        }
 120
 121        if (capid0_8b & 0x10) /* check if both channels are filled */
 122                edac_dbg(0, "2 DIMMS per channel disabled\n");
 123        else
 124                edac_dbg(0, "2 DIMMS per channel enabled\n");
 125
 126        return n_channels;
 127}
 128
 129static unsigned long eccerrlog_syndrome(u64 log)
 130{
 131        return (log & I3200_ECCERRLOG_SYNDROME_BITS) >>
 132                I3200_ECCERRLOG_SYNDROME_SHIFT;
 133}
 134
 135static int eccerrlog_row(int channel, u64 log)
 136{
 137        u64 rank = ((log & I3200_ECCERRLOG_RANK_BITS) >>
 138                I3200_ECCERRLOG_RANK_SHIFT);
 139        return rank | (channel * I3200_RANKS_PER_CHANNEL);
 140}
 141
 142enum i3200_chips {
 143        I3200 = 0,
 144};
 145
 146struct i3200_dev_info {
 147        const char *ctl_name;
 148};
 149
 150struct i3200_error_info {
 151        u16 errsts;
 152        u16 errsts2;
 153        u64 eccerrlog[I3200_CHANNELS];
 154};
 155
 156static const struct i3200_dev_info i3200_devs[] = {
 157        [I3200] = {
 158                .ctl_name = "i3200"
 159        },
 160};
 161
 162static struct pci_dev *mci_pdev;
 163static int i3200_registered = 1;
 164
 165
 166static void i3200_clear_error_info(struct mem_ctl_info *mci)
 167{
 168        struct pci_dev *pdev;
 169
 170        pdev = to_pci_dev(mci->pdev);
 171
 172        /*
 173         * Clear any error bits.
 174         * (Yes, we really clear bits by writing 1 to them.)
 175         */
 176        pci_write_bits16(pdev, I3200_ERRSTS, I3200_ERRSTS_BITS,
 177                I3200_ERRSTS_BITS);
 178}
 179
 180static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci,
 181                struct i3200_error_info *info)
 182{
 183        struct pci_dev *pdev;
 184        struct i3200_priv *priv = mci->pvt_info;
 185        void __iomem *window = priv->window;
 186
 187        pdev = to_pci_dev(mci->pdev);
 188
 189        /*
 190         * This is a mess because there is no atomic way to read all the
 191         * registers at once and the registers can transition from CE being
 192         * overwritten by UE.
 193         */
 194        pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts);
 195        if (!(info->errsts & I3200_ERRSTS_BITS))
 196                return;
 197
 198        info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
 199        if (nr_channels == 2)
 200                info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
 201
 202        pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts2);
 203
 204        /*
 205         * If the error is the same for both reads then the first set
 206         * of reads is valid.  If there is a change then there is a CE
 207         * with no info and the second set of reads is valid and
 208         * should be UE info.
 209         */
 210        if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
 211                info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
 212                if (nr_channels == 2)
 213                        info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
 214        }
 215
 216        i3200_clear_error_info(mci);
 217}
 218
 219static void i3200_process_error_info(struct mem_ctl_info *mci,
 220                struct i3200_error_info *info)
 221{
 222        int channel;
 223        u64 log;
 224
 225        if (!(info->errsts & I3200_ERRSTS_BITS))
 226                return;
 227
 228        if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
 229                edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
 230                                     -1, -1, -1, "UE overwrote CE", "");
 231                info->errsts = info->errsts2;
 232        }
 233
 234        for (channel = 0; channel < nr_channels; channel++) {
 235                log = info->eccerrlog[channel];
 236                if (log & I3200_ECCERRLOG_UE) {
 237                        edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
 238                                             0, 0, 0,
 239                                             eccerrlog_row(channel, log),
 240                                             -1, -1,
 241                                             "i3000 UE", "");
 242                } else if (log & I3200_ECCERRLOG_CE) {
 243                        edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
 244                                             0, 0, eccerrlog_syndrome(log),
 245                                             eccerrlog_row(channel, log),
 246                                             -1, -1,
 247                                             "i3000 CE", "");
 248                }
 249        }
 250}
 251
 252static void i3200_check(struct mem_ctl_info *mci)
 253{
 254        struct i3200_error_info info;
 255
 256        edac_dbg(1, "MC%d\n", mci->mc_idx);
 257        i3200_get_and_clear_error_info(mci, &info);
 258        i3200_process_error_info(mci, &info);
 259}
 260
 261static void __iomem *i3200_map_mchbar(struct pci_dev *pdev)
 262{
 263        union {
 264                u64 mchbar;
 265                struct {
 266                        u32 mchbar_low;
 267                        u32 mchbar_high;
 268                };
 269        } u;
 270        void __iomem *window;
 271
 272        pci_read_config_dword(pdev, I3200_MCHBAR_LOW, &u.mchbar_low);
 273        pci_read_config_dword(pdev, I3200_MCHBAR_HIGH, &u.mchbar_high);
 274        u.mchbar &= I3200_MCHBAR_MASK;
 275
 276        if (u.mchbar != (resource_size_t)u.mchbar) {
 277                printk(KERN_ERR
 278                        "i3200: mmio space beyond accessible range (0x%llx)\n",
 279                        (unsigned long long)u.mchbar);
 280                return NULL;
 281        }
 282
 283        window = ioremap_nocache(u.mchbar, I3200_MMR_WINDOW_SIZE);
 284        if (!window)
 285                printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n",
 286                        (unsigned long long)u.mchbar);
 287
 288        return window;
 289}
 290
 291
 292static void i3200_get_drbs(void __iomem *window,
 293        u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
 294{
 295        int i;
 296
 297        for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) {
 298                drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK;
 299                drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK;
 300
 301                edac_dbg(0, "drb[0][%d] = %d, drb[1][%d] = %d\n", i, drbs[0][i], i, drbs[1][i]);
 302        }
 303}
 304
 305static bool i3200_is_stacked(struct pci_dev *pdev,
 306        u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
 307{
 308        u16 tom;
 309
 310        pci_read_config_word(pdev, I3200_TOM, &tom);
 311        tom &= I3200_TOM_MASK;
 312
 313        return drbs[I3200_CHANNELS - 1][I3200_RANKS_PER_CHANNEL - 1] == tom;
 314}
 315
 316static unsigned long drb_to_nr_pages(
 317        u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL], bool stacked,
 318        int channel, int rank)
 319{
 320        int n;
 321
 322        n = drbs[channel][rank];
 323        if (!n)
 324                return 0;
 325
 326        if (rank > 0)
 327                n -= drbs[channel][rank - 1];
 328        if (stacked && (channel == 1) &&
 329        drbs[channel][rank] == drbs[channel][I3200_RANKS_PER_CHANNEL - 1])
 330                n -= drbs[0][I3200_RANKS_PER_CHANNEL - 1];
 331
 332        n <<= (I3200_DRB_SHIFT - PAGE_SHIFT);
 333        return n;
 334}
 335
 336static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
 337{
 338        int rc;
 339        int i, j;
 340        struct mem_ctl_info *mci = NULL;
 341        struct edac_mc_layer layers[2];
 342        u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL];
 343        bool stacked;
 344        void __iomem *window;
 345        struct i3200_priv *priv;
 346
 347        edac_dbg(0, "MC:\n");
 348
 349        window = i3200_map_mchbar(pdev);
 350        if (!window)
 351                return -ENODEV;
 352
 353        i3200_get_drbs(window, drbs);
 354        nr_channels = how_many_channels(pdev);
 355
 356        layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
 357        layers[0].size = I3200_DIMMS;
 358        layers[0].is_virt_csrow = true;
 359        layers[1].type = EDAC_MC_LAYER_CHANNEL;
 360        layers[1].size = nr_channels;
 361        layers[1].is_virt_csrow = false;
 362        mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
 363                            sizeof(struct i3200_priv));
 364        if (!mci)
 365                return -ENOMEM;
 366
 367        edac_dbg(3, "MC: init mci\n");
 368
 369        mci->pdev = &pdev->dev;
 370        mci->mtype_cap = MEM_FLAG_DDR2;
 371
 372        mci->edac_ctl_cap = EDAC_FLAG_SECDED;
 373        mci->edac_cap = EDAC_FLAG_SECDED;
 374
 375        mci->mod_name = EDAC_MOD_STR;
 376        mci->ctl_name = i3200_devs[dev_idx].ctl_name;
 377        mci->dev_name = pci_name(pdev);
 378        mci->edac_check = i3200_check;
 379        mci->ctl_page_to_phys = NULL;
 380        priv = mci->pvt_info;
 381        priv->window = window;
 382
 383        stacked = i3200_is_stacked(pdev, drbs);
 384
 385        /*
 386         * The dram rank boundary (DRB) reg values are boundary addresses
 387         * for each DRAM rank with a granularity of 64MB.  DRB regs are
 388         * cumulative; the last one will contain the total memory
 389         * contained in all ranks.
 390         */
 391        for (i = 0; i < I3200_DIMMS; i++) {
 392                unsigned long nr_pages;
 393
 394                for (j = 0; j < nr_channels; j++) {
 395                        struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
 396                                                               mci->n_layers, i, j, 0);
 397
 398                        nr_pages = drb_to_nr_pages(drbs, stacked, j, i);
 399                        if (nr_pages == 0)
 400                                continue;
 401
 402                        edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j,
 403                                 stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages));
 404
 405                        dimm->nr_pages = nr_pages;
 406                        dimm->grain = nr_pages << PAGE_SHIFT;
 407                        dimm->mtype = MEM_DDR2;
 408                        dimm->dtype = DEV_UNKNOWN;
 409                        dimm->edac_mode = EDAC_UNKNOWN;
 410                }
 411        }
 412
 413        i3200_clear_error_info(mci);
 414
 415        rc = -ENODEV;
 416        if (edac_mc_add_mc(mci)) {
 417                edac_dbg(3, "MC: failed edac_mc_add_mc()\n");
 418                goto fail;
 419        }
 420
 421        /* get this far and it's successful */
 422        edac_dbg(3, "MC: success\n");
 423        return 0;
 424
 425fail:
 426        iounmap(window);
 427        if (mci)
 428                edac_mc_free(mci);
 429
 430        return rc;
 431}
 432
 433static int i3200_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 434{
 435        int rc;
 436
 437        edac_dbg(0, "MC:\n");
 438
 439        if (pci_enable_device(pdev) < 0)
 440                return -EIO;
 441
 442        rc = i3200_probe1(pdev, ent->driver_data);
 443        if (!mci_pdev)
 444                mci_pdev = pci_dev_get(pdev);
 445
 446        return rc;
 447}
 448
 449static void i3200_remove_one(struct pci_dev *pdev)
 450{
 451        struct mem_ctl_info *mci;
 452        struct i3200_priv *priv;
 453
 454        edac_dbg(0, "\n");
 455
 456        mci = edac_mc_del_mc(&pdev->dev);
 457        if (!mci)
 458                return;
 459
 460        priv = mci->pvt_info;
 461        iounmap(priv->window);
 462
 463        edac_mc_free(mci);
 464
 465        pci_disable_device(pdev);
 466}
 467
 468static const struct pci_device_id i3200_pci_tbl[] = {
 469        {
 470                PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 471                I3200},
 472        {
 473                0,
 474        }            /* 0 terminated list. */
 475};
 476
 477MODULE_DEVICE_TABLE(pci, i3200_pci_tbl);
 478
 479static struct pci_driver i3200_driver = {
 480        .name = EDAC_MOD_STR,
 481        .probe = i3200_init_one,
 482        .remove = i3200_remove_one,
 483        .id_table = i3200_pci_tbl,
 484};
 485
 486static int __init i3200_init(void)
 487{
 488        int pci_rc;
 489
 490        edac_dbg(3, "MC:\n");
 491
 492        /* Ensure that the OPSTATE is set correctly for POLL or NMI */
 493        opstate_init();
 494
 495        pci_rc = pci_register_driver(&i3200_driver);
 496        if (pci_rc < 0)
 497                goto fail0;
 498
 499        if (!mci_pdev) {
 500                i3200_registered = 0;
 501                mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
 502                                PCI_DEVICE_ID_INTEL_3200_HB, NULL);
 503                if (!mci_pdev) {
 504                        edac_dbg(0, "i3200 pci_get_device fail\n");
 505                        pci_rc = -ENODEV;
 506                        goto fail1;
 507                }
 508
 509                pci_rc = i3200_init_one(mci_pdev, i3200_pci_tbl);
 510                if (pci_rc < 0) {
 511                        edac_dbg(0, "i3200 init fail\n");
 512                        pci_rc = -ENODEV;
 513                        goto fail1;
 514                }
 515        }
 516
 517        return 0;
 518
 519fail1:
 520        pci_unregister_driver(&i3200_driver);
 521
 522fail0:
 523        pci_dev_put(mci_pdev);
 524
 525        return pci_rc;
 526}
 527
 528static void __exit i3200_exit(void)
 529{
 530        edac_dbg(3, "MC:\n");
 531
 532        pci_unregister_driver(&i3200_driver);
 533        if (!i3200_registered) {
 534                i3200_remove_one(mci_pdev);
 535                pci_dev_put(mci_pdev);
 536        }
 537}
 538
 539module_init(i3200_init);
 540module_exit(i3200_exit);
 541
 542MODULE_LICENSE("GPL");
 543MODULE_AUTHOR("Akamai Technologies, Inc.");
 544MODULE_DESCRIPTION("MC support for Intel 3200 memory hub controllers");
 545
 546module_param(edac_op_state, int, 0444);
 547MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
 548