linux/drivers/edac/i82860_edac.c
<<
>>
Prefs
   1/*
   2 * Intel 82860 Memory Controller kernel module
   3 * (C) 2005 Red Hat (http://www.redhat.com)
   4 * This file may be distributed under the terms of the
   5 * GNU General Public License.
   6 *
   7 * Written by Ben Woodard <woodard@redhat.com>
   8 * shamelessly copied from and based upon the edac_i82875 driver
   9 * by Thayne Harbaugh of Linux Networx. (http://lnxi.com)
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/init.h>
  14#include <linux/pci.h>
  15#include <linux/pci_ids.h>
  16#include <linux/edac.h>
  17#include "edac_core.h"
  18
  19#define  I82860_REVISION " Ver: 2.0.2"
  20#define EDAC_MOD_STR    "i82860_edac"
  21
  22#define i82860_printk(level, fmt, arg...) \
  23        edac_printk(level, "i82860", fmt, ##arg)
  24
  25#define i82860_mc_printk(mci, level, fmt, arg...) \
  26        edac_mc_chipset_printk(mci, level, "i82860", fmt, ##arg)
  27
  28#ifndef PCI_DEVICE_ID_INTEL_82860_0
  29#define PCI_DEVICE_ID_INTEL_82860_0     0x2531
  30#endif                          /* PCI_DEVICE_ID_INTEL_82860_0 */
  31
  32#define I82860_MCHCFG 0x50
  33#define I82860_GBA 0x60
  34#define I82860_GBA_MASK 0x7FF
  35#define I82860_GBA_SHIFT 24
  36#define I82860_ERRSTS 0xC8
  37#define I82860_EAP 0xE4
  38#define I82860_DERRCTL_STS 0xE2
  39
  40enum i82860_chips {
  41        I82860 = 0,
  42};
  43
  44struct i82860_dev_info {
  45        const char *ctl_name;
  46};
  47
  48struct i82860_error_info {
  49        u16 errsts;
  50        u32 eap;
  51        u16 derrsyn;
  52        u16 errsts2;
  53};
  54
  55static const struct i82860_dev_info i82860_devs[] = {
  56        [I82860] = {
  57                .ctl_name = "i82860"},
  58};
  59
  60static struct pci_dev *mci_pdev;        /* init dev: in case that AGP code
  61                                         * has already registered driver
  62                                         */
  63static struct edac_pci_ctl_info *i82860_pci;
  64
  65static void i82860_get_error_info(struct mem_ctl_info *mci,
  66                                struct i82860_error_info *info)
  67{
  68        struct pci_dev *pdev;
  69
  70        pdev = to_pci_dev(mci->pdev);
  71
  72        /*
  73         * This is a mess because there is no atomic way to read all the
  74         * registers at once and the registers can transition from CE being
  75         * overwritten by UE.
  76         */
  77        pci_read_config_word(pdev, I82860_ERRSTS, &info->errsts);
  78        pci_read_config_dword(pdev, I82860_EAP, &info->eap);
  79        pci_read_config_word(pdev, I82860_DERRCTL_STS, &info->derrsyn);
  80        pci_read_config_word(pdev, I82860_ERRSTS, &info->errsts2);
  81
  82        pci_write_bits16(pdev, I82860_ERRSTS, 0x0003, 0x0003);
  83
  84        /*
  85         * If the error is the same for both reads then the first set of reads
  86         * is valid.  If there is a change then there is a CE no info and the
  87         * second set of reads is valid and should be UE info.
  88         */
  89        if (!(info->errsts2 & 0x0003))
  90                return;
  91
  92        if ((info->errsts ^ info->errsts2) & 0x0003) {
  93                pci_read_config_dword(pdev, I82860_EAP, &info->eap);
  94                pci_read_config_word(pdev, I82860_DERRCTL_STS, &info->derrsyn);
  95        }
  96}
  97
  98static int i82860_process_error_info(struct mem_ctl_info *mci,
  99                                struct i82860_error_info *info,
 100                                int handle_errors)
 101{
 102        struct dimm_info *dimm;
 103        int row;
 104
 105        if (!(info->errsts2 & 0x0003))
 106                return 0;
 107
 108        if (!handle_errors)
 109                return 1;
 110
 111        if ((info->errsts ^ info->errsts2) & 0x0003) {
 112                edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0,
 113                                     -1, -1, -1, "UE overwrote CE", "");
 114                info->errsts = info->errsts2;
 115        }
 116
 117        info->eap >>= PAGE_SHIFT;
 118        row = edac_mc_find_csrow_by_page(mci, info->eap);
 119        dimm = mci->csrows[row]->channels[0]->dimm;
 120
 121        if (info->errsts & 0x0002)
 122                edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
 123                                     info->eap, 0, 0,
 124                                     dimm->location[0], dimm->location[1], -1,
 125                                     "i82860 UE", "");
 126        else
 127                edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
 128                                     info->eap, 0, info->derrsyn,
 129                                     dimm->location[0], dimm->location[1], -1,
 130                                     "i82860 CE", "");
 131
 132        return 1;
 133}
 134
 135static void i82860_check(struct mem_ctl_info *mci)
 136{
 137        struct i82860_error_info info;
 138
 139        edac_dbg(1, "MC%d\n", mci->mc_idx);
 140        i82860_get_error_info(mci, &info);
 141        i82860_process_error_info(mci, &info, 1);
 142}
 143
 144static void i82860_init_csrows(struct mem_ctl_info *mci, struct pci_dev *pdev)
 145{
 146        unsigned long last_cumul_size;
 147        u16 mchcfg_ddim;        /* DRAM Data Integrity Mode 0=none, 2=edac */
 148        u16 value;
 149        u32 cumul_size;
 150        struct csrow_info *csrow;
 151        struct dimm_info *dimm;
 152        int index;
 153
 154        pci_read_config_word(pdev, I82860_MCHCFG, &mchcfg_ddim);
 155        mchcfg_ddim = mchcfg_ddim & 0x180;
 156        last_cumul_size = 0;
 157
 158        /* The group row boundary (GRA) reg values are boundary address
 159         * for each DRAM row with a granularity of 16MB.  GRA regs are
 160         * cumulative; therefore GRA15 will contain the total memory contained
 161         * in all eight rows.
 162         */
 163        for (index = 0; index < mci->nr_csrows; index++) {
 164                csrow = mci->csrows[index];
 165                dimm = csrow->channels[0]->dimm;
 166
 167                pci_read_config_word(pdev, I82860_GBA + index * 2, &value);
 168                cumul_size = (value & I82860_GBA_MASK) <<
 169                        (I82860_GBA_SHIFT - PAGE_SHIFT);
 170                edac_dbg(3, "(%d) cumul_size 0x%x\n", index, cumul_size);
 171
 172                if (cumul_size == last_cumul_size)
 173                        continue;       /* not populated */
 174
 175                csrow->first_page = last_cumul_size;
 176                csrow->last_page = cumul_size - 1;
 177                dimm->nr_pages = cumul_size - last_cumul_size;
 178                last_cumul_size = cumul_size;
 179                dimm->grain = 1 << 12;  /* I82860_EAP has 4KiB reolution */
 180                dimm->mtype = MEM_RMBS;
 181                dimm->dtype = DEV_UNKNOWN;
 182                dimm->edac_mode = mchcfg_ddim ? EDAC_SECDED : EDAC_NONE;
 183        }
 184}
 185
 186static int i82860_probe1(struct pci_dev *pdev, int dev_idx)
 187{
 188        struct mem_ctl_info *mci;
 189        struct edac_mc_layer layers[2];
 190        struct i82860_error_info discard;
 191
 192        /*
 193         * RDRAM has channels but these don't map onto the csrow abstraction.
 194         * According with the datasheet, there are 2 Rambus channels, supporting
 195         * up to 16 direct RDRAM devices.
 196         * The device groups from the GRA registers seem to map reasonably
 197         * well onto the notion of a chip select row.
 198         * There are 16 GRA registers and since the name is associated with
 199         * the channel and the GRA registers map to physical devices so we are
 200         * going to make 1 channel for group.
 201         */
 202        layers[0].type = EDAC_MC_LAYER_CHANNEL;
 203        layers[0].size = 2;
 204        layers[0].is_virt_csrow = true;
 205        layers[1].type = EDAC_MC_LAYER_SLOT;
 206        layers[1].size = 8;
 207        layers[1].is_virt_csrow = true;
 208        mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
 209        if (!mci)
 210                return -ENOMEM;
 211
 212        edac_dbg(3, "init mci\n");
 213        mci->pdev = &pdev->dev;
 214        mci->mtype_cap = MEM_FLAG_DDR;
 215        mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
 216        /* I"m not sure about this but I think that all RDRAM is SECDED */
 217        mci->edac_cap = EDAC_FLAG_SECDED;
 218        mci->mod_name = EDAC_MOD_STR;
 219        mci->mod_ver = I82860_REVISION;
 220        mci->ctl_name = i82860_devs[dev_idx].ctl_name;
 221        mci->dev_name = pci_name(pdev);
 222        mci->edac_check = i82860_check;
 223        mci->ctl_page_to_phys = NULL;
 224        i82860_init_csrows(mci, pdev);
 225        i82860_get_error_info(mci, &discard);   /* clear counters */
 226
 227        /* Here we assume that we will never see multiple instances of this
 228         * type of memory controller.  The ID is therefore hardcoded to 0.
 229         */
 230        if (edac_mc_add_mc(mci)) {
 231                edac_dbg(3, "failed edac_mc_add_mc()\n");
 232                goto fail;
 233        }
 234
 235        /* allocating generic PCI control info */
 236        i82860_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
 237        if (!i82860_pci) {
 238                printk(KERN_WARNING
 239                        "%s(): Unable to create PCI control\n",
 240                        __func__);
 241                printk(KERN_WARNING
 242                        "%s(): PCI error report via EDAC not setup\n",
 243                        __func__);
 244        }
 245
 246        /* get this far and it's successful */
 247        edac_dbg(3, "success\n");
 248
 249        return 0;
 250
 251fail:
 252        edac_mc_free(mci);
 253        return -ENODEV;
 254}
 255
 256/* returns count (>= 0), or negative on error */
 257static int i82860_init_one(struct pci_dev *pdev,
 258                           const struct pci_device_id *ent)
 259{
 260        int rc;
 261
 262        edac_dbg(0, "\n");
 263        i82860_printk(KERN_INFO, "i82860 init one\n");
 264
 265        if (pci_enable_device(pdev) < 0)
 266                return -EIO;
 267
 268        rc = i82860_probe1(pdev, ent->driver_data);
 269
 270        if (rc == 0)
 271                mci_pdev = pci_dev_get(pdev);
 272
 273        return rc;
 274}
 275
 276static void i82860_remove_one(struct pci_dev *pdev)
 277{
 278        struct mem_ctl_info *mci;
 279
 280        edac_dbg(0, "\n");
 281
 282        if (i82860_pci)
 283                edac_pci_release_generic_ctl(i82860_pci);
 284
 285        if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
 286                return;
 287
 288        edac_mc_free(mci);
 289}
 290
 291static const struct pci_device_id i82860_pci_tbl[] = {
 292        {
 293         PCI_VEND_DEV(INTEL, 82860_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 294         I82860},
 295        {
 296         0,
 297         }                      /* 0 terminated list. */
 298};
 299
 300MODULE_DEVICE_TABLE(pci, i82860_pci_tbl);
 301
 302static struct pci_driver i82860_driver = {
 303        .name = EDAC_MOD_STR,
 304        .probe = i82860_init_one,
 305        .remove = i82860_remove_one,
 306        .id_table = i82860_pci_tbl,
 307};
 308
 309static int __init i82860_init(void)
 310{
 311        int pci_rc;
 312
 313        edac_dbg(3, "\n");
 314
 315       /* Ensure that the OPSTATE is set correctly for POLL or NMI */
 316       opstate_init();
 317
 318        if ((pci_rc = pci_register_driver(&i82860_driver)) < 0)
 319                goto fail0;
 320
 321        if (!mci_pdev) {
 322                mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
 323                                        PCI_DEVICE_ID_INTEL_82860_0, NULL);
 324
 325                if (mci_pdev == NULL) {
 326                        edac_dbg(0, "860 pci_get_device fail\n");
 327                        pci_rc = -ENODEV;
 328                        goto fail1;
 329                }
 330
 331                pci_rc = i82860_init_one(mci_pdev, i82860_pci_tbl);
 332
 333                if (pci_rc < 0) {
 334                        edac_dbg(0, "860 init fail\n");
 335                        pci_rc = -ENODEV;
 336                        goto fail1;
 337                }
 338        }
 339
 340        return 0;
 341
 342fail1:
 343        pci_unregister_driver(&i82860_driver);
 344
 345fail0:
 346        pci_dev_put(mci_pdev);
 347        return pci_rc;
 348}
 349
 350static void __exit i82860_exit(void)
 351{
 352        edac_dbg(3, "\n");
 353        pci_unregister_driver(&i82860_driver);
 354        pci_dev_put(mci_pdev);
 355}
 356
 357module_init(i82860_init);
 358module_exit(i82860_exit);
 359
 360MODULE_LICENSE("GPL");
 361MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com) "
 362                "Ben Woodard <woodard@redhat.com>");
 363MODULE_DESCRIPTION("ECC support for Intel 82860 memory hub controllers");
 364
 365module_param(edac_op_state, int, 0444);
 366MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
 367