linux/drivers/edac/edac_mc.c
<<
>>
Prefs
   1/*
   2 * edac_mc kernel module
   3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
   4 * This file may be distributed under the terms of the
   5 * GNU General Public License.
   6 *
   7 * Written by Thayne Harbaugh
   8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
   9 *      http://www.anime.net/~goemon/linux-ecc/
  10 *
  11 * Modified by Dave Peterson and Doug Thompson
  12 *
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/proc_fs.h>
  17#include <linux/kernel.h>
  18#include <linux/types.h>
  19#include <linux/smp.h>
  20#include <linux/init.h>
  21#include <linux/sysctl.h>
  22#include <linux/highmem.h>
  23#include <linux/timer.h>
  24#include <linux/slab.h>
  25#include <linux/jiffies.h>
  26#include <linux/spinlock.h>
  27#include <linux/list.h>
  28#include <linux/ctype.h>
  29#include <linux/edac.h>
  30#include <asm/uaccess.h>
  31#include <asm/page.h>
  32#include <asm/edac.h>
  33#include "edac_core.h"
  34#include "edac_module.h"
  35
  36/* lock to memory controller's control array */
  37static DEFINE_MUTEX(mem_ctls_mutex);
  38static LIST_HEAD(mc_devices);
  39
  40#ifdef CONFIG_EDAC_DEBUG
  41
  42static void edac_mc_dump_channel(struct rank_info *chan)
  43{
  44        debugf4("\tchannel = %p\n", chan);
  45        debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
  46        debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
  47        debugf4("\tchannel->dimm = %p\n", chan->dimm);
  48}
  49
  50static void edac_mc_dump_dimm(struct dimm_info *dimm)
  51{
  52        int i;
  53
  54        debugf4("\tdimm = %p\n", dimm);
  55        debugf4("\tdimm->label = '%s'\n", dimm->label);
  56        debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
  57        debugf4("\tdimm location ");
  58        for (i = 0; i < dimm->mci->n_layers; i++) {
  59                printk(KERN_CONT "%d", dimm->location[i]);
  60                if (i < dimm->mci->n_layers - 1)
  61                        printk(KERN_CONT ".");
  62        }
  63        printk(KERN_CONT "\n");
  64        debugf4("\tdimm->grain = %d\n", dimm->grain);
  65        debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
  66}
  67
  68static void edac_mc_dump_csrow(struct csrow_info *csrow)
  69{
  70        debugf4("\tcsrow = %p\n", csrow);
  71        debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
  72        debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
  73        debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
  74        debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
  75        debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
  76        debugf4("\tcsrow->channels = %p\n", csrow->channels);
  77        debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
  78}
  79
  80static void edac_mc_dump_mci(struct mem_ctl_info *mci)
  81{
  82        debugf3("\tmci = %p\n", mci);
  83        debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
  84        debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
  85        debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
  86        debugf4("\tmci->edac_check = %p\n", mci->edac_check);
  87        debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
  88                mci->nr_csrows, mci->csrows);
  89        debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
  90                mci->tot_dimms, mci->dimms);
  91        debugf3("\tdev = %p\n", mci->dev);
  92        debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
  93        debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
  94}
  95
  96#endif                          /* CONFIG_EDAC_DEBUG */
  97
  98/*
  99 * keep those in sync with the enum mem_type
 100 */
 101const char *edac_mem_types[] = {
 102        "Empty csrow",
 103        "Reserved csrow type",
 104        "Unknown csrow type",
 105        "Fast page mode RAM",
 106        "Extended data out RAM",
 107        "Burst Extended data out RAM",
 108        "Single data rate SDRAM",
 109        "Registered single data rate SDRAM",
 110        "Double data rate SDRAM",
 111        "Registered Double data rate SDRAM",
 112        "Rambus DRAM",
 113        "Unbuffered DDR2 RAM",
 114        "Fully buffered DDR2",
 115        "Registered DDR2 RAM",
 116        "Rambus XDR",
 117        "Unbuffered DDR3 RAM",
 118        "Registered DDR3 RAM",
 119};
 120EXPORT_SYMBOL_GPL(edac_mem_types);
 121
 122/**
 123 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
 124 * @p:          pointer to a pointer with the memory offset to be used. At
 125 *              return, this will be incremented to point to the next offset
 126 * @size:       Size of the data structure to be reserved
 127 * @n_elems:    Number of elements that should be reserved
 128 *
 129 * If 'size' is a constant, the compiler will optimize this whole function
 130 * down to either a no-op or the addition of a constant to the value of '*p'.
 131 *
 132 * The 'p' pointer is absolutely needed to keep the proper advancing
 133 * further in memory to the proper offsets when allocating the struct along
 134 * with its embedded structs, as edac_device_alloc_ctl_info() does it
 135 * above, for example.
 136 *
 137 * At return, the pointer 'p' will be incremented to be used on a next call
 138 * to this function.
 139 */
 140void *edac_align_ptr(void **p, unsigned size, int n_elems)
 141{
 142        unsigned align, r;
 143        void *ptr = *p;
 144
 145        *p += size * n_elems;
 146
 147        /*
 148         * 'p' can possibly be an unaligned item X such that sizeof(X) is
 149         * 'size'.  Adjust 'p' so that its alignment is at least as
 150         * stringent as what the compiler would provide for X and return
 151         * the aligned result.
 152         * Here we assume that the alignment of a "long long" is the most
 153         * stringent alignment that the compiler will ever provide by default.
 154         * As far as I know, this is a reasonable assumption.
 155         */
 156        if (size > sizeof(long))
 157                align = sizeof(long long);
 158        else if (size > sizeof(int))
 159                align = sizeof(long);
 160        else if (size > sizeof(short))
 161                align = sizeof(int);
 162        else if (size > sizeof(char))
 163                align = sizeof(short);
 164        else
 165                return (char *)ptr;
 166
 167        r = (unsigned long)p % align;
 168
 169        if (r == 0)
 170                return (char *)ptr;
 171
 172        *p += align - r;
 173
 174        return (void *)(((unsigned long)ptr) + align - r);
 175}
 176
 177/**
 178 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
 179 * @mc_num:             Memory controller number
 180 * @n_layers:           Number of MC hierarchy layers
 181 * layers:              Describes each layer as seen by the Memory Controller
 182 * @size_pvt:           size of private storage needed
 183 *
 184 *
 185 * Everything is kmalloc'ed as one big chunk - more efficient.
 186 * Only can be used if all structures have the same lifetime - otherwise
 187 * you have to allocate and initialize your own structures.
 188 *
 189 * Use edac_mc_free() to free mc structures allocated by this function.
 190 *
 191 * NOTE: drivers handle multi-rank memories in different ways: in some
 192 * drivers, one multi-rank memory stick is mapped as one entry, while, in
 193 * others, a single multi-rank memory stick would be mapped into several
 194 * entries. Currently, this function will allocate multiple struct dimm_info
 195 * on such scenarios, as grouping the multiple ranks require drivers change.
 196 *
 197 * Returns:
 198 *      On failure: NULL
 199 *      On success: struct mem_ctl_info pointer
 200 */
 201struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 202                                   unsigned n_layers,
 203                                   struct edac_mc_layer *layers,
 204                                   unsigned sz_pvt)
 205{
 206        struct mem_ctl_info *mci;
 207        struct edac_mc_layer *layer;
 208        struct csrow_info *csi, *csr;
 209        struct rank_info *chi, *chp, *chan;
 210        struct dimm_info *dimm;
 211        u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
 212        unsigned pos[EDAC_MAX_LAYERS];
 213        unsigned size, tot_dimms = 1, count = 1;
 214        unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
 215        void *pvt, *p, *ptr = NULL;
 216        int i, j, err, row, chn, n, len;
 217        bool per_rank = false;
 218
 219        BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
 220        /*
 221         * Calculate the total amount of dimms and csrows/cschannels while
 222         * in the old API emulation mode
 223         */
 224        for (i = 0; i < n_layers; i++) {
 225                tot_dimms *= layers[i].size;
 226                if (layers[i].is_virt_csrow)
 227                        tot_csrows *= layers[i].size;
 228                else
 229                        tot_channels *= layers[i].size;
 230
 231                if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
 232                        per_rank = true;
 233        }
 234
 235        /* Figure out the offsets of the various items from the start of an mc
 236         * structure.  We want the alignment of each item to be at least as
 237         * stringent as what the compiler would provide if we could simply
 238         * hardcode everything into a single struct.
 239         */
 240        mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
 241        layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
 242        csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows);
 243        chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels);
 244        dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms);
 245        for (i = 0; i < n_layers; i++) {
 246                count *= layers[i].size;
 247                debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
 248                ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 249                ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 250                tot_errcount += 2 * count;
 251        }
 252
 253        debugf4("%s: allocating %d error counters\n", __func__, tot_errcount);
 254        pvt = edac_align_ptr(&ptr, sz_pvt, 1);
 255        size = ((unsigned long)pvt) + sz_pvt;
 256
 257        debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
 258                __func__, size,
 259                tot_dimms,
 260                per_rank ? "ranks" : "dimms",
 261                tot_csrows * tot_channels);
 262        mci = kzalloc(size, GFP_KERNEL);
 263        if (mci == NULL)
 264                return NULL;
 265
 266        /* Adjust pointers so they point within the memory we just allocated
 267         * rather than an imaginary chunk of memory located at address 0.
 268         */
 269        layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
 270        csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
 271        chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
 272        dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
 273        for (i = 0; i < n_layers; i++) {
 274                mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
 275                mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
 276        }
 277        pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
 278
 279        /* setup index and various internal pointers */
 280        mci->mc_idx = mc_num;
 281        mci->csrows = csi;
 282        mci->dimms  = dimm;
 283        mci->tot_dimms = tot_dimms;
 284        mci->pvt_info = pvt;
 285        mci->n_layers = n_layers;
 286        mci->layers = layer;
 287        memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
 288        mci->nr_csrows = tot_csrows;
 289        mci->num_cschannel = tot_channels;
 290        mci->mem_is_per_rank = per_rank;
 291
 292        /*
 293         * Fill the csrow struct
 294         */
 295        for (row = 0; row < tot_csrows; row++) {
 296                csr = &csi[row];
 297                csr->csrow_idx = row;
 298                csr->mci = mci;
 299                csr->nr_channels = tot_channels;
 300                chp = &chi[row * tot_channels];
 301                csr->channels = chp;
 302
 303                for (chn = 0; chn < tot_channels; chn++) {
 304                        chan = &chp[chn];
 305                        chan->chan_idx = chn;
 306                        chan->csrow = csr;
 307                }
 308        }
 309
 310        /*
 311         * Fill the dimm struct
 312         */
 313        memset(&pos, 0, sizeof(pos));
 314        row = 0;
 315        chn = 0;
 316        debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
 317                per_rank ? "ranks" : "dimms");
 318        for (i = 0; i < tot_dimms; i++) {
 319                chan = &csi[row].channels[chn];
 320                dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers,
 321                               pos[0], pos[1], pos[2]);
 322                dimm->mci = mci;
 323
 324                debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__,
 325                        i, per_rank ? "rank" : "dimm", (dimm - mci->dimms),
 326                        pos[0], pos[1], pos[2], row, chn);
 327
 328                /*
 329                 * Copy DIMM location and initialize it.
 330                 */
 331                len = sizeof(dimm->label);
 332                p = dimm->label;
 333                n = snprintf(p, len, "mc#%u", mc_num);
 334                p += n;
 335                len -= n;
 336                for (j = 0; j < n_layers; j++) {
 337                        n = snprintf(p, len, "%s#%u",
 338                                     edac_layer_name[layers[j].type],
 339                                     pos[j]);
 340                        p += n;
 341                        len -= n;
 342                        dimm->location[j] = pos[j];
 343
 344                        if (len <= 0)
 345                                break;
 346                }
 347
 348                /* Link it to the csrows old API data */
 349                chan->dimm = dimm;
 350                dimm->csrow = row;
 351                dimm->cschannel = chn;
 352
 353                /* Increment csrow location */
 354                row++;
 355                if (row == tot_csrows) {
 356                        row = 0;
 357                        chn++;
 358                }
 359
 360                /* Increment dimm location */
 361                for (j = n_layers - 1; j >= 0; j--) {
 362                        pos[j]++;
 363                        if (pos[j] < layers[j].size)
 364                                break;
 365                        pos[j] = 0;
 366                }
 367        }
 368
 369        mci->op_state = OP_ALLOC;
 370        INIT_LIST_HEAD(&mci->grp_kobj_list);
 371
 372        /*
 373         * Initialize the 'root' kobj for the edac_mc controller
 374         */
 375        err = edac_mc_register_sysfs_main_kobj(mci);
 376        if (err) {
 377                kfree(mci);
 378                return NULL;
 379        }
 380
 381        /* at this point, the root kobj is valid, and in order to
 382         * 'free' the object, then the function:
 383         *      edac_mc_unregister_sysfs_main_kobj() must be called
 384         * which will perform kobj unregistration and the actual free
 385         * will occur during the kobject callback operation
 386         */
 387        return mci;
 388}
 389EXPORT_SYMBOL_GPL(edac_mc_alloc);
 390
 391/**
 392 * edac_mc_free
 393 *      'Free' a previously allocated 'mci' structure
 394 * @mci: pointer to a struct mem_ctl_info structure
 395 */
 396void edac_mc_free(struct mem_ctl_info *mci)
 397{
 398        debugf1("%s()\n", __func__);
 399
 400        edac_mc_unregister_sysfs_main_kobj(mci);
 401
 402        /* free the mci instance memory here */
 403        kfree(mci);
 404}
 405EXPORT_SYMBOL_GPL(edac_mc_free);
 406
 407
 408/**
 409 * find_mci_by_dev
 410 *
 411 *      scan list of controllers looking for the one that manages
 412 *      the 'dev' device
 413 * @dev: pointer to a struct device related with the MCI
 414 */
 415struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 416{
 417        struct mem_ctl_info *mci;
 418        struct list_head *item;
 419
 420        debugf3("%s()\n", __func__);
 421
 422        list_for_each(item, &mc_devices) {
 423                mci = list_entry(item, struct mem_ctl_info, link);
 424
 425                if (mci->dev == dev)
 426                        return mci;
 427        }
 428
 429        return NULL;
 430}
 431EXPORT_SYMBOL_GPL(find_mci_by_dev);
 432
 433/*
 434 * handler for EDAC to check if NMI type handler has asserted interrupt
 435 */
 436static int edac_mc_assert_error_check_and_clear(void)
 437{
 438        int old_state;
 439
 440        if (edac_op_state == EDAC_OPSTATE_POLL)
 441                return 1;
 442
 443        old_state = edac_err_assert;
 444        edac_err_assert = 0;
 445
 446        return old_state;
 447}
 448
 449/*
 450 * edac_mc_workq_function
 451 *      performs the operation scheduled by a workq request
 452 */
 453static void edac_mc_workq_function(struct work_struct *work_req)
 454{
 455        struct delayed_work *d_work = to_delayed_work(work_req);
 456        struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
 457
 458        mutex_lock(&mem_ctls_mutex);
 459
 460        /* if this control struct has movd to offline state, we are done */
 461        if (mci->op_state == OP_OFFLINE) {
 462                mutex_unlock(&mem_ctls_mutex);
 463                return;
 464        }
 465
 466        /* Only poll controllers that are running polled and have a check */
 467        if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
 468                mci->edac_check(mci);
 469
 470        mutex_unlock(&mem_ctls_mutex);
 471
 472        /* Reschedule */
 473        queue_delayed_work(edac_workqueue, &mci->work,
 474                        msecs_to_jiffies(edac_mc_get_poll_msec()));
 475}
 476
 477/*
 478 * edac_mc_workq_setup
 479 *      initialize a workq item for this mci
 480 *      passing in the new delay period in msec
 481 *
 482 *      locking model:
 483 *
 484 *              called with the mem_ctls_mutex held
 485 */
 486static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 487{
 488        debugf0("%s()\n", __func__);
 489
 490        /* if this instance is not in the POLL state, then simply return */
 491        if (mci->op_state != OP_RUNNING_POLL)
 492                return;
 493
 494        INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
 495        queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
 496}
 497
 498/*
 499 * edac_mc_workq_teardown
 500 *      stop the workq processing on this mci
 501 *
 502 *      locking model:
 503 *
 504 *              called WITHOUT lock held
 505 */
 506static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
 507{
 508        int status;
 509
 510        if (mci->op_state != OP_RUNNING_POLL)
 511                return;
 512
 513        status = cancel_delayed_work(&mci->work);
 514        if (status == 0) {
 515                debugf0("%s() not canceled, flush the queue\n",
 516                        __func__);
 517
 518                /* workq instance might be running, wait for it */
 519                flush_workqueue(edac_workqueue);
 520        }
 521}
 522
 523/*
 524 * edac_mc_reset_delay_period(unsigned long value)
 525 *
 526 *      user space has updated our poll period value, need to
 527 *      reset our workq delays
 528 */
 529void edac_mc_reset_delay_period(int value)
 530{
 531        struct mem_ctl_info *mci;
 532        struct list_head *item;
 533
 534        mutex_lock(&mem_ctls_mutex);
 535
 536        /* scan the list and turn off all workq timers, doing so under lock
 537         */
 538        list_for_each(item, &mc_devices) {
 539                mci = list_entry(item, struct mem_ctl_info, link);
 540
 541                if (mci->op_state == OP_RUNNING_POLL)
 542                        cancel_delayed_work(&mci->work);
 543        }
 544
 545        mutex_unlock(&mem_ctls_mutex);
 546
 547
 548        /* re-walk the list, and reset the poll delay */
 549        mutex_lock(&mem_ctls_mutex);
 550
 551        list_for_each(item, &mc_devices) {
 552                mci = list_entry(item, struct mem_ctl_info, link);
 553
 554                edac_mc_workq_setup(mci, (unsigned long) value);
 555        }
 556
 557        mutex_unlock(&mem_ctls_mutex);
 558}
 559
 560
 561
 562/* Return 0 on success, 1 on failure.
 563 * Before calling this function, caller must
 564 * assign a unique value to mci->mc_idx.
 565 *
 566 *      locking model:
 567 *
 568 *              called with the mem_ctls_mutex lock held
 569 */
 570static int add_mc_to_global_list(struct mem_ctl_info *mci)
 571{
 572        struct list_head *item, *insert_before;
 573        struct mem_ctl_info *p;
 574
 575        insert_before = &mc_devices;
 576
 577        p = find_mci_by_dev(mci->dev);
 578        if (unlikely(p != NULL))
 579                goto fail0;
 580
 581        list_for_each(item, &mc_devices) {
 582                p = list_entry(item, struct mem_ctl_info, link);
 583
 584                if (p->mc_idx >= mci->mc_idx) {
 585                        if (unlikely(p->mc_idx == mci->mc_idx))
 586                                goto fail1;
 587
 588                        insert_before = item;
 589                        break;
 590                }
 591        }
 592
 593        list_add_tail_rcu(&mci->link, insert_before);
 594        atomic_inc(&edac_handlers);
 595        return 0;
 596
 597fail0:
 598        edac_printk(KERN_WARNING, EDAC_MC,
 599                "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
 600                edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
 601        return 1;
 602
 603fail1:
 604        edac_printk(KERN_WARNING, EDAC_MC,
 605                "bug in low-level driver: attempt to assign\n"
 606                "    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
 607        return 1;
 608}
 609
 610static void del_mc_from_global_list(struct mem_ctl_info *mci)
 611{
 612        atomic_dec(&edac_handlers);
 613        list_del_rcu(&mci->link);
 614
 615        /* these are for safe removal of devices from global list while
 616         * NMI handlers may be traversing list
 617         */
 618        synchronize_rcu();
 619        INIT_LIST_HEAD(&mci->link);
 620}
 621
 622/**
 623 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 624 *
 625 * If found, return a pointer to the structure.
 626 * Else return NULL.
 627 *
 628 * Caller must hold mem_ctls_mutex.
 629 */
 630struct mem_ctl_info *edac_mc_find(int idx)
 631{
 632        struct list_head *item;
 633        struct mem_ctl_info *mci;
 634
 635        list_for_each(item, &mc_devices) {
 636                mci = list_entry(item, struct mem_ctl_info, link);
 637
 638                if (mci->mc_idx >= idx) {
 639                        if (mci->mc_idx == idx)
 640                                return mci;
 641
 642                        break;
 643                }
 644        }
 645
 646        return NULL;
 647}
 648EXPORT_SYMBOL(edac_mc_find);
 649
 650/**
 651 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 652 *                 create sysfs entries associated with mci structure
 653 * @mci: pointer to the mci structure to be added to the list
 654 *
 655 * Return:
 656 *      0       Success
 657 *      !0      Failure
 658 */
 659
 660/* FIXME - should a warning be printed if no error detection? correction? */
 661int edac_mc_add_mc(struct mem_ctl_info *mci)
 662{
 663        debugf0("%s()\n", __func__);
 664
 665#ifdef CONFIG_EDAC_DEBUG
 666        if (edac_debug_level >= 3)
 667                edac_mc_dump_mci(mci);
 668
 669        if (edac_debug_level >= 4) {
 670                int i;
 671
 672                for (i = 0; i < mci->nr_csrows; i++) {
 673                        int j;
 674
 675                        edac_mc_dump_csrow(&mci->csrows[i]);
 676                        for (j = 0; j < mci->csrows[i].nr_channels; j++)
 677                                edac_mc_dump_channel(&mci->csrows[i].
 678                                                channels[j]);
 679                }
 680                for (i = 0; i < mci->tot_dimms; i++)
 681                        edac_mc_dump_dimm(&mci->dimms[i]);
 682        }
 683#endif
 684        mutex_lock(&mem_ctls_mutex);
 685
 686        if (add_mc_to_global_list(mci))
 687                goto fail0;
 688
 689        /* set load time so that error rate can be tracked */
 690        mci->start_time = jiffies;
 691
 692        if (edac_create_sysfs_mci_device(mci)) {
 693                edac_mc_printk(mci, KERN_WARNING,
 694                        "failed to create sysfs device\n");
 695                goto fail1;
 696        }
 697
 698        /* If there IS a check routine, then we are running POLLED */
 699        if (mci->edac_check != NULL) {
 700                /* This instance is NOW RUNNING */
 701                mci->op_state = OP_RUNNING_POLL;
 702
 703                edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
 704        } else {
 705                mci->op_state = OP_RUNNING_INTERRUPT;
 706        }
 707
 708        /* Report action taken */
 709        edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
 710                " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 711
 712        mutex_unlock(&mem_ctls_mutex);
 713        return 0;
 714
 715fail1:
 716        del_mc_from_global_list(mci);
 717
 718fail0:
 719        mutex_unlock(&mem_ctls_mutex);
 720        return 1;
 721}
 722EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 723
 724/**
 725 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 726 *                 remove mci structure from global list
 727 * @pdev: Pointer to 'struct device' representing mci structure to remove.
 728 *
 729 * Return pointer to removed mci structure, or NULL if device not found.
 730 */
 731struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 732{
 733        struct mem_ctl_info *mci;
 734
 735        debugf0("%s()\n", __func__);
 736
 737        mutex_lock(&mem_ctls_mutex);
 738
 739        /* find the requested mci struct in the global list */
 740        mci = find_mci_by_dev(dev);
 741        if (mci == NULL) {
 742                mutex_unlock(&mem_ctls_mutex);
 743                return NULL;
 744        }
 745
 746        del_mc_from_global_list(mci);
 747        mutex_unlock(&mem_ctls_mutex);
 748
 749        /* flush workq processes */
 750        edac_mc_workq_teardown(mci);
 751
 752        /* marking MCI offline */
 753        mci->op_state = OP_OFFLINE;
 754
 755        /* remove from sysfs */
 756        edac_remove_sysfs_mci_device(mci);
 757
 758        edac_printk(KERN_INFO, EDAC_MC,
 759                "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
 760                mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 761
 762        return mci;
 763}
 764EXPORT_SYMBOL_GPL(edac_mc_del_mc);
 765
 766static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 767                                u32 size)
 768{
 769        struct page *pg;
 770        void *virt_addr;
 771        unsigned long flags = 0;
 772
 773        debugf3("%s()\n", __func__);
 774
 775        /* ECC error page was not in our memory. Ignore it. */
 776        if (!pfn_valid(page))
 777                return;
 778
 779        /* Find the actual page structure then map it and fix */
 780        pg = pfn_to_page(page);
 781
 782        if (PageHighMem(pg))
 783                local_irq_save(flags);
 784
 785        virt_addr = kmap_atomic(pg);
 786
 787        /* Perform architecture specific atomic scrub operation */
 788        atomic_scrub(virt_addr + offset, size);
 789
 790        /* Unmap and complete */
 791        kunmap_atomic(virt_addr);
 792
 793        if (PageHighMem(pg))
 794                local_irq_restore(flags);
 795}
 796
 797/* FIXME - should return -1 */
 798int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 799{
 800        struct csrow_info *csrows = mci->csrows;
 801        int row, i, j, n;
 802
 803        debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
 804        row = -1;
 805
 806        for (i = 0; i < mci->nr_csrows; i++) {
 807                struct csrow_info *csrow = &csrows[i];
 808                n = 0;
 809                for (j = 0; j < csrow->nr_channels; j++) {
 810                        struct dimm_info *dimm = csrow->channels[j].dimm;
 811                        n += dimm->nr_pages;
 812                }
 813                if (n == 0)
 814                        continue;
 815
 816                debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
 817                        "mask(0x%lx)\n", mci->mc_idx, __func__,
 818                        csrow->first_page, page, csrow->last_page,
 819                        csrow->page_mask);
 820
 821                if ((page >= csrow->first_page) &&
 822                    (page <= csrow->last_page) &&
 823                    ((page & csrow->page_mask) ==
 824                     (csrow->first_page & csrow->page_mask))) {
 825                        row = i;
 826                        break;
 827                }
 828        }
 829
 830        if (row == -1)
 831                edac_mc_printk(mci, KERN_ERR,
 832                        "could not look up page error address %lx\n",
 833                        (unsigned long)page);
 834
 835        return row;
 836}
 837EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
 838
 839const char *edac_layer_name[] = {
 840        [EDAC_MC_LAYER_BRANCH] = "branch",
 841        [EDAC_MC_LAYER_CHANNEL] = "channel",
 842        [EDAC_MC_LAYER_SLOT] = "slot",
 843        [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
 844};
 845EXPORT_SYMBOL_GPL(edac_layer_name);
 846
 847static void edac_inc_ce_error(struct mem_ctl_info *mci,
 848                                    bool enable_per_layer_report,
 849                                    const int pos[EDAC_MAX_LAYERS])
 850{
 851        int i, index = 0;
 852
 853        mci->ce_mc++;
 854
 855        if (!enable_per_layer_report) {
 856                mci->ce_noinfo_count++;
 857                return;
 858        }
 859
 860        for (i = 0; i < mci->n_layers; i++) {
 861                if (pos[i] < 0)
 862                        break;
 863                index += pos[i];
 864                mci->ce_per_layer[i][index]++;
 865
 866                if (i < mci->n_layers - 1)
 867                        index *= mci->layers[i + 1].size;
 868        }
 869}
 870
 871static void edac_inc_ue_error(struct mem_ctl_info *mci,
 872                                    bool enable_per_layer_report,
 873                                    const int pos[EDAC_MAX_LAYERS])
 874{
 875        int i, index = 0;
 876
 877        mci->ue_mc++;
 878
 879        if (!enable_per_layer_report) {
 880                mci->ce_noinfo_count++;
 881                return;
 882        }
 883
 884        for (i = 0; i < mci->n_layers; i++) {
 885                if (pos[i] < 0)
 886                        break;
 887                index += pos[i];
 888                mci->ue_per_layer[i][index]++;
 889
 890                if (i < mci->n_layers - 1)
 891                        index *= mci->layers[i + 1].size;
 892        }
 893}
 894
 895static void edac_ce_error(struct mem_ctl_info *mci,
 896                          const int pos[EDAC_MAX_LAYERS],
 897                          const char *msg,
 898                          const char *location,
 899                          const char *label,
 900                          const char *detail,
 901                          const char *other_detail,
 902                          const bool enable_per_layer_report,
 903                          const unsigned long page_frame_number,
 904                          const unsigned long offset_in_page,
 905                          u32 grain)
 906{
 907        unsigned long remapped_page;
 908
 909        if (edac_mc_get_log_ce()) {
 910                if (other_detail && *other_detail)
 911                        edac_mc_printk(mci, KERN_WARNING,
 912                                       "CE %s on %s (%s%s - %s)\n",
 913                                       msg, label, location,
 914                                       detail, other_detail);
 915                else
 916                        edac_mc_printk(mci, KERN_WARNING,
 917                                       "CE %s on %s (%s%s)\n",
 918                                       msg, label, location,
 919                                       detail);
 920        }
 921        edac_inc_ce_error(mci, enable_per_layer_report, pos);
 922
 923        if (mci->scrub_mode & SCRUB_SW_SRC) {
 924                /*
 925                        * Some memory controllers (called MCs below) can remap
 926                        * memory so that it is still available at a different
 927                        * address when PCI devices map into memory.
 928                        * MC's that can't do this, lose the memory where PCI
 929                        * devices are mapped. This mapping is MC-dependent
 930                        * and so we call back into the MC driver for it to
 931                        * map the MC page to a physical (CPU) page which can
 932                        * then be mapped to a virtual page - which can then
 933                        * be scrubbed.
 934                        */
 935                remapped_page = mci->ctl_page_to_phys ?
 936                        mci->ctl_page_to_phys(mci, page_frame_number) :
 937                        page_frame_number;
 938
 939                edac_mc_scrub_block(remapped_page,
 940                                        offset_in_page, grain);
 941        }
 942}
 943
 944static void edac_ue_error(struct mem_ctl_info *mci,
 945                          const int pos[EDAC_MAX_LAYERS],
 946                          const char *msg,
 947                          const char *location,
 948                          const char *label,
 949                          const char *detail,
 950                          const char *other_detail,
 951                          const bool enable_per_layer_report)
 952{
 953        if (edac_mc_get_log_ue()) {
 954                if (other_detail && *other_detail)
 955                        edac_mc_printk(mci, KERN_WARNING,
 956                                       "UE %s on %s (%s%s - %s)\n",
 957                                       msg, label, location, detail,
 958                                       other_detail);
 959                else
 960                        edac_mc_printk(mci, KERN_WARNING,
 961                                       "UE %s on %s (%s%s)\n",
 962                                       msg, label, location, detail);
 963        }
 964
 965        if (edac_mc_get_panic_on_ue()) {
 966                if (other_detail && *other_detail)
 967                        panic("UE %s on %s (%s%s - %s)\n",
 968                              msg, label, location, detail, other_detail);
 969                else
 970                        panic("UE %s on %s (%s%s)\n",
 971                              msg, label, location, detail);
 972        }
 973
 974        edac_inc_ue_error(mci, enable_per_layer_report, pos);
 975}
 976
 977#define OTHER_LABEL " or "
 978void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 979                          struct mem_ctl_info *mci,
 980                          const unsigned long page_frame_number,
 981                          const unsigned long offset_in_page,
 982                          const unsigned long syndrome,
 983                          const int layer0,
 984                          const int layer1,
 985                          const int layer2,
 986                          const char *msg,
 987                          const char *other_detail,
 988                          const void *mcelog)
 989{
 990        /* FIXME: too much for stack: move it to some pre-alocated area */
 991        char detail[80], location[80];
 992        char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
 993        char *p;
 994        int row = -1, chan = -1;
 995        int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 };
 996        int i;
 997        u32 grain;
 998        bool enable_per_layer_report = false;
 999
1000        debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
1001
1002        /*
1003         * Check if the event report is consistent and if the memory
1004         * location is known. If it is known, enable_per_layer_report will be
1005         * true, the DIMM(s) label info will be filled and the per-layer
1006         * error counters will be incremented.
1007         */
1008        for (i = 0; i < mci->n_layers; i++) {
1009                if (pos[i] >= (int)mci->layers[i].size) {
1010                        if (type == HW_EVENT_ERR_CORRECTED)
1011                                p = "CE";
1012                        else
1013                                p = "UE";
1014
1015                        edac_mc_printk(mci, KERN_ERR,
1016                                       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1017                                       edac_layer_name[mci->layers[i].type],
1018                                       pos[i], mci->layers[i].size);
1019                        /*
1020                         * Instead of just returning it, let's use what's
1021                         * known about the error. The increment routines and
1022                         * the DIMM filter logic will do the right thing by
1023                         * pointing the likely damaged DIMMs.
1024                         */
1025                        pos[i] = -1;
1026                }
1027                if (pos[i] >= 0)
1028                        enable_per_layer_report = true;
1029        }
1030
1031        /*
1032         * Get the dimm label/grain that applies to the match criteria.
1033         * As the error algorithm may not be able to point to just one memory
1034         * stick, the logic here will get all possible labels that could
1035         * pottentially be affected by the error.
1036         * On FB-DIMM memory controllers, for uncorrected errors, it is common
1037         * to have only the MC channel and the MC dimm (also called "branch")
1038         * but the channel is not known, as the memory is arranged in pairs,
1039         * where each memory belongs to a separate channel within the same
1040         * branch.
1041         */
1042        grain = 0;
1043        p = label;
1044        *p = '\0';
1045        for (i = 0; i < mci->tot_dimms; i++) {
1046                struct dimm_info *dimm = &mci->dimms[i];
1047
1048                if (layer0 >= 0 && layer0 != dimm->location[0])
1049                        continue;
1050                if (layer1 >= 0 && layer1 != dimm->location[1])
1051                        continue;
1052                if (layer2 >= 0 && layer2 != dimm->location[2])
1053                        continue;
1054
1055                /* get the max grain, over the error match range */
1056                if (dimm->grain > grain)
1057                        grain = dimm->grain;
1058
1059                /*
1060                 * If the error is memory-controller wide, there's no need to
1061                 * seek for the affected DIMMs because the whole
1062                 * channel/memory controller/...  may be affected.
1063                 * Also, don't show errors for empty DIMM slots.
1064                 */
1065                if (enable_per_layer_report && dimm->nr_pages) {
1066                        if (p != label) {
1067                                strcpy(p, OTHER_LABEL);
1068                                p += strlen(OTHER_LABEL);
1069                        }
1070                        strcpy(p, dimm->label);
1071                        p += strlen(p);
1072                        *p = '\0';
1073
1074                        /*
1075                         * get csrow/channel of the DIMM, in order to allow
1076                         * incrementing the compat API counters
1077                         */
1078                        debugf4("%s: %s csrows map: (%d,%d)\n",
1079                                __func__,
1080                                mci->mem_is_per_rank ? "rank" : "dimm",
1081                                dimm->csrow, dimm->cschannel);
1082
1083                        if (row == -1)
1084                                row = dimm->csrow;
1085                        else if (row >= 0 && row != dimm->csrow)
1086                                row = -2;
1087
1088                        if (chan == -1)
1089                                chan = dimm->cschannel;
1090                        else if (chan >= 0 && chan != dimm->cschannel)
1091                                chan = -2;
1092                }
1093        }
1094
1095        if (!enable_per_layer_report) {
1096                strcpy(label, "any memory");
1097        } else {
1098                debugf4("%s: csrow/channel to increment: (%d,%d)\n",
1099                        __func__, row, chan);
1100                if (p == label)
1101                        strcpy(label, "unknown memory");
1102                if (type == HW_EVENT_ERR_CORRECTED) {
1103                        if (row >= 0) {
1104                                mci->csrows[row].ce_count++;
1105                                if (chan >= 0)
1106                                        mci->csrows[row].channels[chan].ce_count++;
1107                        }
1108                } else
1109                        if (row >= 0)
1110                                mci->csrows[row].ue_count++;
1111        }
1112
1113        /* Fill the RAM location data */
1114        p = location;
1115        for (i = 0; i < mci->n_layers; i++) {
1116                if (pos[i] < 0)
1117                        continue;
1118
1119                p += sprintf(p, "%s:%d ",
1120                             edac_layer_name[mci->layers[i].type],
1121                             pos[i]);
1122        }
1123
1124        /* Memory type dependent details about the error */
1125        if (type == HW_EVENT_ERR_CORRECTED) {
1126                snprintf(detail, sizeof(detail),
1127                        "page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx",
1128                        page_frame_number, offset_in_page,
1129                        grain, syndrome);
1130                edac_ce_error(mci, pos, msg, location, label, detail,
1131                              other_detail, enable_per_layer_report,
1132                              page_frame_number, offset_in_page, grain);
1133        } else {
1134                snprintf(detail, sizeof(detail),
1135                        "page:0x%lx offset:0x%lx grain:%d",
1136                        page_frame_number, offset_in_page, grain);
1137
1138                edac_ue_error(mci, pos, msg, location, label, detail,
1139                              other_detail, enable_per_layer_report);
1140        }
1141}
1142EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1143