linux/drivers/edac/edac_mc.c
<<
>>
Prefs
   1/*
   2 * edac_mc kernel module
   3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
   4 * This file may be distributed under the terms of the
   5 * GNU General Public License.
   6 *
   7 * Written by Thayne Harbaugh
   8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
   9 *      http://www.anime.net/~goemon/linux-ecc/
  10 *
  11 * Modified by Dave Peterson and Doug Thompson
  12 *
  13 */
  14
  15#include <linux/module.h>
  16#include <linux/proc_fs.h>
  17#include <linux/kernel.h>
  18#include <linux/types.h>
  19#include <linux/smp.h>
  20#include <linux/init.h>
  21#include <linux/sysctl.h>
  22#include <linux/highmem.h>
  23#include <linux/timer.h>
  24#include <linux/slab.h>
  25#include <linux/jiffies.h>
  26#include <linux/spinlock.h>
  27#include <linux/list.h>
  28#include <linux/ctype.h>
  29#include <linux/edac.h>
  30#include <linux/bitops.h>
  31#include <asm/uaccess.h>
  32#include <asm/page.h>
  33#include <asm/edac.h>
  34#include "edac_core.h"
  35#include "edac_module.h"
  36
  37#define CREATE_TRACE_POINTS
  38#define TRACE_INCLUDE_PATH ../../include/ras
  39#include <ras/ras_event.h>
  40
  41/* lock to memory controller's control array */
  42static DEFINE_MUTEX(mem_ctls_mutex);
  43static LIST_HEAD(mc_devices);
  44
  45/*
  46 * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
  47 *      apei/ghes and i7core_edac to be used at the same time.
  48 */
  49static void const *edac_mc_owner;
  50
  51static struct bus_type mc_bus[EDAC_MAX_MCS];
  52
  53unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
  54                                 unsigned len)
  55{
  56        struct mem_ctl_info *mci = dimm->mci;
  57        int i, n, count = 0;
  58        char *p = buf;
  59
  60        for (i = 0; i < mci->n_layers; i++) {
  61                n = snprintf(p, len, "%s %d ",
  62                              edac_layer_name[mci->layers[i].type],
  63                              dimm->location[i]);
  64                p += n;
  65                len -= n;
  66                count += n;
  67                if (!len)
  68                        break;
  69        }
  70
  71        return count;
  72}
  73
  74#ifdef CONFIG_EDAC_DEBUG
  75
  76static void edac_mc_dump_channel(struct rank_info *chan)
  77{
  78        edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
  79        edac_dbg(4, "    channel = %p\n", chan);
  80        edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
  81        edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
  82}
  83
  84static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
  85{
  86        char location[80];
  87
  88        edac_dimm_info_location(dimm, location, sizeof(location));
  89
  90        edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
  91                 dimm->mci->csbased ? "rank" : "dimm",
  92                 number, location, dimm->csrow, dimm->cschannel);
  93        edac_dbg(4, "  dimm = %p\n", dimm);
  94        edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
  95        edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
  96        edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
  97        edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
  98}
  99
 100static void edac_mc_dump_csrow(struct csrow_info *csrow)
 101{
 102        edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
 103        edac_dbg(4, "  csrow = %p\n", csrow);
 104        edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
 105        edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
 106        edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
 107        edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
 108        edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
 109        edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
 110}
 111
 112static void edac_mc_dump_mci(struct mem_ctl_info *mci)
 113{
 114        edac_dbg(3, "\tmci = %p\n", mci);
 115        edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
 116        edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
 117        edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
 118        edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
 119        edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
 120                 mci->nr_csrows, mci->csrows);
 121        edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
 122                 mci->tot_dimms, mci->dimms);
 123        edac_dbg(3, "\tdev = %p\n", mci->pdev);
 124        edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
 125                 mci->mod_name, mci->ctl_name);
 126        edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
 127}
 128
 129#endif                          /* CONFIG_EDAC_DEBUG */
 130
 131/*
 132 * keep those in sync with the enum mem_type
 133 */
 134const char *edac_mem_types[] = {
 135        "Empty csrow",
 136        "Reserved csrow type",
 137        "Unknown csrow type",
 138        "Fast page mode RAM",
 139        "Extended data out RAM",
 140        "Burst Extended data out RAM",
 141        "Single data rate SDRAM",
 142        "Registered single data rate SDRAM",
 143        "Double data rate SDRAM",
 144        "Registered Double data rate SDRAM",
 145        "Rambus DRAM",
 146        "Unbuffered DDR2 RAM",
 147        "Fully buffered DDR2",
 148        "Registered DDR2 RAM",
 149        "Rambus XDR",
 150        "Unbuffered DDR3 RAM",
 151        "Registered DDR3 RAM",
 152};
 153EXPORT_SYMBOL_GPL(edac_mem_types);
 154
 155/**
 156 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
 157 * @p:          pointer to a pointer with the memory offset to be used. At
 158 *              return, this will be incremented to point to the next offset
 159 * @size:       Size of the data structure to be reserved
 160 * @n_elems:    Number of elements that should be reserved
 161 *
 162 * If 'size' is a constant, the compiler will optimize this whole function
 163 * down to either a no-op or the addition of a constant to the value of '*p'.
 164 *
 165 * The 'p' pointer is absolutely needed to keep the proper advancing
 166 * further in memory to the proper offsets when allocating the struct along
 167 * with its embedded structs, as edac_device_alloc_ctl_info() does it
 168 * above, for example.
 169 *
 170 * At return, the pointer 'p' will be incremented to be used on a next call
 171 * to this function.
 172 */
 173void *edac_align_ptr(void **p, unsigned size, int n_elems)
 174{
 175        unsigned align, r;
 176        void *ptr = *p;
 177
 178        *p += size * n_elems;
 179
 180        /*
 181         * 'p' can possibly be an unaligned item X such that sizeof(X) is
 182         * 'size'.  Adjust 'p' so that its alignment is at least as
 183         * stringent as what the compiler would provide for X and return
 184         * the aligned result.
 185         * Here we assume that the alignment of a "long long" is the most
 186         * stringent alignment that the compiler will ever provide by default.
 187         * As far as I know, this is a reasonable assumption.
 188         */
 189        if (size > sizeof(long))
 190                align = sizeof(long long);
 191        else if (size > sizeof(int))
 192                align = sizeof(long);
 193        else if (size > sizeof(short))
 194                align = sizeof(int);
 195        else if (size > sizeof(char))
 196                align = sizeof(short);
 197        else
 198                return (char *)ptr;
 199
 200        r = (unsigned long)p % align;
 201
 202        if (r == 0)
 203                return (char *)ptr;
 204
 205        *p += align - r;
 206
 207        return (void *)(((unsigned long)ptr) + align - r);
 208}
 209
 210static void _edac_mc_free(struct mem_ctl_info *mci)
 211{
 212        int i, chn, row;
 213        struct csrow_info *csr;
 214        const unsigned int tot_dimms = mci->tot_dimms;
 215        const unsigned int tot_channels = mci->num_cschannel;
 216        const unsigned int tot_csrows = mci->nr_csrows;
 217
 218        if (mci->dimms) {
 219                for (i = 0; i < tot_dimms; i++)
 220                        kfree(mci->dimms[i]);
 221                kfree(mci->dimms);
 222        }
 223        if (mci->csrows) {
 224                for (row = 0; row < tot_csrows; row++) {
 225                        csr = mci->csrows[row];
 226                        if (csr) {
 227                                if (csr->channels) {
 228                                        for (chn = 0; chn < tot_channels; chn++)
 229                                                kfree(csr->channels[chn]);
 230                                        kfree(csr->channels);
 231                                }
 232                                kfree(csr);
 233                        }
 234                }
 235                kfree(mci->csrows);
 236        }
 237        kfree(mci);
 238}
 239
 240/**
 241 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
 242 * @mc_num:             Memory controller number
 243 * @n_layers:           Number of MC hierarchy layers
 244 * layers:              Describes each layer as seen by the Memory Controller
 245 * @size_pvt:           size of private storage needed
 246 *
 247 *
 248 * Everything is kmalloc'ed as one big chunk - more efficient.
 249 * Only can be used if all structures have the same lifetime - otherwise
 250 * you have to allocate and initialize your own structures.
 251 *
 252 * Use edac_mc_free() to free mc structures allocated by this function.
 253 *
 254 * NOTE: drivers handle multi-rank memories in different ways: in some
 255 * drivers, one multi-rank memory stick is mapped as one entry, while, in
 256 * others, a single multi-rank memory stick would be mapped into several
 257 * entries. Currently, this function will allocate multiple struct dimm_info
 258 * on such scenarios, as grouping the multiple ranks require drivers change.
 259 *
 260 * Returns:
 261 *      On failure: NULL
 262 *      On success: struct mem_ctl_info pointer
 263 */
 264struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 265                                   unsigned n_layers,
 266                                   struct edac_mc_layer *layers,
 267                                   unsigned sz_pvt)
 268{
 269        struct mem_ctl_info *mci;
 270        struct edac_mc_layer *layer;
 271        struct csrow_info *csr;
 272        struct rank_info *chan;
 273        struct dimm_info *dimm;
 274        u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
 275        unsigned pos[EDAC_MAX_LAYERS];
 276        unsigned size, tot_dimms = 1, count = 1;
 277        unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
 278        void *pvt, *p, *ptr = NULL;
 279        int i, j, row, chn, n, len, off;
 280        bool per_rank = false;
 281
 282        BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
 283        /*
 284         * Calculate the total amount of dimms and csrows/cschannels while
 285         * in the old API emulation mode
 286         */
 287        for (i = 0; i < n_layers; i++) {
 288                tot_dimms *= layers[i].size;
 289                if (layers[i].is_virt_csrow)
 290                        tot_csrows *= layers[i].size;
 291                else
 292                        tot_channels *= layers[i].size;
 293
 294                if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
 295                        per_rank = true;
 296        }
 297
 298        /* Figure out the offsets of the various items from the start of an mc
 299         * structure.  We want the alignment of each item to be at least as
 300         * stringent as what the compiler would provide if we could simply
 301         * hardcode everything into a single struct.
 302         */
 303        mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
 304        layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
 305        for (i = 0; i < n_layers; i++) {
 306                count *= layers[i].size;
 307                edac_dbg(4, "errcount layer %d size %d\n", i, count);
 308                ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 309                ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
 310                tot_errcount += 2 * count;
 311        }
 312
 313        edac_dbg(4, "allocating %d error counters\n", tot_errcount);
 314        pvt = edac_align_ptr(&ptr, sz_pvt, 1);
 315        size = ((unsigned long)pvt) + sz_pvt;
 316
 317        edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
 318                 size,
 319                 tot_dimms,
 320                 per_rank ? "ranks" : "dimms",
 321                 tot_csrows * tot_channels);
 322
 323        mci = kzalloc(size, GFP_KERNEL);
 324        if (mci == NULL)
 325                return NULL;
 326
 327        /* Adjust pointers so they point within the memory we just allocated
 328         * rather than an imaginary chunk of memory located at address 0.
 329         */
 330        layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
 331        for (i = 0; i < n_layers; i++) {
 332                mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
 333                mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
 334        }
 335        pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
 336
 337        /* setup index and various internal pointers */
 338        mci->mc_idx = mc_num;
 339        mci->tot_dimms = tot_dimms;
 340        mci->pvt_info = pvt;
 341        mci->n_layers = n_layers;
 342        mci->layers = layer;
 343        memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
 344        mci->nr_csrows = tot_csrows;
 345        mci->num_cschannel = tot_channels;
 346        mci->csbased = per_rank;
 347
 348        /*
 349         * Alocate and fill the csrow/channels structs
 350         */
 351        mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
 352        if (!mci->csrows)
 353                goto error;
 354        for (row = 0; row < tot_csrows; row++) {
 355                csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
 356                if (!csr)
 357                        goto error;
 358                mci->csrows[row] = csr;
 359                csr->csrow_idx = row;
 360                csr->mci = mci;
 361                csr->nr_channels = tot_channels;
 362                csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
 363                                        GFP_KERNEL);
 364                if (!csr->channels)
 365                        goto error;
 366
 367                for (chn = 0; chn < tot_channels; chn++) {
 368                        chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
 369                        if (!chan)
 370                                goto error;
 371                        csr->channels[chn] = chan;
 372                        chan->chan_idx = chn;
 373                        chan->csrow = csr;
 374                }
 375        }
 376
 377        /*
 378         * Allocate and fill the dimm structs
 379         */
 380        mci->dimms  = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
 381        if (!mci->dimms)
 382                goto error;
 383
 384        memset(&pos, 0, sizeof(pos));
 385        row = 0;
 386        chn = 0;
 387        for (i = 0; i < tot_dimms; i++) {
 388                chan = mci->csrows[row]->channels[chn];
 389                off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
 390                if (off < 0 || off >= tot_dimms) {
 391                        edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
 392                        goto error;
 393                }
 394
 395                dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
 396                if (!dimm)
 397                        goto error;
 398                mci->dimms[off] = dimm;
 399                dimm->mci = mci;
 400
 401                /*
 402                 * Copy DIMM location and initialize it.
 403                 */
 404                len = sizeof(dimm->label);
 405                p = dimm->label;
 406                n = snprintf(p, len, "mc#%u", mc_num);
 407                p += n;
 408                len -= n;
 409                for (j = 0; j < n_layers; j++) {
 410                        n = snprintf(p, len, "%s#%u",
 411                                     edac_layer_name[layers[j].type],
 412                                     pos[j]);
 413                        p += n;
 414                        len -= n;
 415                        dimm->location[j] = pos[j];
 416
 417                        if (len <= 0)
 418                                break;
 419                }
 420
 421                /* Link it to the csrows old API data */
 422                chan->dimm = dimm;
 423                dimm->csrow = row;
 424                dimm->cschannel = chn;
 425
 426                /* Increment csrow location */
 427                if (layers[0].is_virt_csrow) {
 428                        chn++;
 429                        if (chn == tot_channels) {
 430                                chn = 0;
 431                                row++;
 432                        }
 433                } else {
 434                        row++;
 435                        if (row == tot_csrows) {
 436                                row = 0;
 437                                chn++;
 438                        }
 439                }
 440
 441                /* Increment dimm location */
 442                for (j = n_layers - 1; j >= 0; j--) {
 443                        pos[j]++;
 444                        if (pos[j] < layers[j].size)
 445                                break;
 446                        pos[j] = 0;
 447                }
 448        }
 449
 450        mci->op_state = OP_ALLOC;
 451
 452        return mci;
 453
 454error:
 455        _edac_mc_free(mci);
 456
 457        return NULL;
 458}
 459EXPORT_SYMBOL_GPL(edac_mc_alloc);
 460
 461/**
 462 * edac_mc_free
 463 *      'Free' a previously allocated 'mci' structure
 464 * @mci: pointer to a struct mem_ctl_info structure
 465 */
 466void edac_mc_free(struct mem_ctl_info *mci)
 467{
 468        edac_dbg(1, "\n");
 469
 470        /* If we're not yet registered with sysfs free only what was allocated
 471         * in edac_mc_alloc().
 472         */
 473        if (!device_is_registered(&mci->dev)) {
 474                _edac_mc_free(mci);
 475                return;
 476        }
 477
 478        /* the mci instance is freed here, when the sysfs object is dropped */
 479        edac_unregister_sysfs(mci);
 480}
 481EXPORT_SYMBOL_GPL(edac_mc_free);
 482
 483
 484/**
 485 * find_mci_by_dev
 486 *
 487 *      scan list of controllers looking for the one that manages
 488 *      the 'dev' device
 489 * @dev: pointer to a struct device related with the MCI
 490 */
 491struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 492{
 493        struct mem_ctl_info *mci;
 494        struct list_head *item;
 495
 496        edac_dbg(3, "\n");
 497
 498        list_for_each(item, &mc_devices) {
 499                mci = list_entry(item, struct mem_ctl_info, link);
 500
 501                if (mci->pdev == dev)
 502                        return mci;
 503        }
 504
 505        return NULL;
 506}
 507EXPORT_SYMBOL_GPL(find_mci_by_dev);
 508
 509/*
 510 * handler for EDAC to check if NMI type handler has asserted interrupt
 511 */
 512static int edac_mc_assert_error_check_and_clear(void)
 513{
 514        int old_state;
 515
 516        if (edac_op_state == EDAC_OPSTATE_POLL)
 517                return 1;
 518
 519        old_state = edac_err_assert;
 520        edac_err_assert = 0;
 521
 522        return old_state;
 523}
 524
 525/*
 526 * edac_mc_workq_function
 527 *      performs the operation scheduled by a workq request
 528 */
 529static void edac_mc_workq_function(struct work_struct *work_req)
 530{
 531        struct delayed_work *d_work = to_delayed_work(work_req);
 532        struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
 533
 534        mutex_lock(&mem_ctls_mutex);
 535
 536        /* if this control struct has movd to offline state, we are done */
 537        if (mci->op_state == OP_OFFLINE) {
 538                mutex_unlock(&mem_ctls_mutex);
 539                return;
 540        }
 541
 542        /* Only poll controllers that are running polled and have a check */
 543        if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
 544                mci->edac_check(mci);
 545
 546        mutex_unlock(&mem_ctls_mutex);
 547
 548        /* Reschedule */
 549        queue_delayed_work(edac_workqueue, &mci->work,
 550                        msecs_to_jiffies(edac_mc_get_poll_msec()));
 551}
 552
 553/*
 554 * edac_mc_workq_setup
 555 *      initialize a workq item for this mci
 556 *      passing in the new delay period in msec
 557 *
 558 *      locking model:
 559 *
 560 *              called with the mem_ctls_mutex held
 561 */
 562static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec,
 563                                bool init)
 564{
 565        edac_dbg(0, "\n");
 566
 567        /* if this instance is not in the POLL state, then simply return */
 568        if (mci->op_state != OP_RUNNING_POLL)
 569                return;
 570
 571        if (init)
 572                INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
 573
 574        mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
 575}
 576
 577/*
 578 * edac_mc_workq_teardown
 579 *      stop the workq processing on this mci
 580 *
 581 *      locking model:
 582 *
 583 *              called WITHOUT lock held
 584 */
 585static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
 586{
 587        int status;
 588
 589        if (mci->op_state != OP_RUNNING_POLL)
 590                return;
 591
 592        status = cancel_delayed_work(&mci->work);
 593        if (status == 0) {
 594                edac_dbg(0, "not canceled, flush the queue\n");
 595
 596                /* workq instance might be running, wait for it */
 597                flush_workqueue(edac_workqueue);
 598        }
 599}
 600
 601/*
 602 * edac_mc_reset_delay_period(unsigned long value)
 603 *
 604 *      user space has updated our poll period value, need to
 605 *      reset our workq delays
 606 */
 607void edac_mc_reset_delay_period(unsigned long value)
 608{
 609        struct mem_ctl_info *mci;
 610        struct list_head *item;
 611
 612        mutex_lock(&mem_ctls_mutex);
 613
 614        list_for_each(item, &mc_devices) {
 615                mci = list_entry(item, struct mem_ctl_info, link);
 616
 617                edac_mc_workq_setup(mci, value, false);
 618        }
 619
 620        mutex_unlock(&mem_ctls_mutex);
 621}
 622
 623
 624
 625/* Return 0 on success, 1 on failure.
 626 * Before calling this function, caller must
 627 * assign a unique value to mci->mc_idx.
 628 *
 629 *      locking model:
 630 *
 631 *              called with the mem_ctls_mutex lock held
 632 */
 633static int add_mc_to_global_list(struct mem_ctl_info *mci)
 634{
 635        struct list_head *item, *insert_before;
 636        struct mem_ctl_info *p;
 637
 638        insert_before = &mc_devices;
 639
 640        p = find_mci_by_dev(mci->pdev);
 641        if (unlikely(p != NULL))
 642                goto fail0;
 643
 644        list_for_each(item, &mc_devices) {
 645                p = list_entry(item, struct mem_ctl_info, link);
 646
 647                if (p->mc_idx >= mci->mc_idx) {
 648                        if (unlikely(p->mc_idx == mci->mc_idx))
 649                                goto fail1;
 650
 651                        insert_before = item;
 652                        break;
 653                }
 654        }
 655
 656        list_add_tail_rcu(&mci->link, insert_before);
 657        atomic_inc(&edac_handlers);
 658        return 0;
 659
 660fail0:
 661        edac_printk(KERN_WARNING, EDAC_MC,
 662                "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
 663                edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
 664        return 1;
 665
 666fail1:
 667        edac_printk(KERN_WARNING, EDAC_MC,
 668                "bug in low-level driver: attempt to assign\n"
 669                "    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
 670        return 1;
 671}
 672
 673static int del_mc_from_global_list(struct mem_ctl_info *mci)
 674{
 675        int handlers = atomic_dec_return(&edac_handlers);
 676        list_del_rcu(&mci->link);
 677
 678        /* these are for safe removal of devices from global list while
 679         * NMI handlers may be traversing list
 680         */
 681        synchronize_rcu();
 682        INIT_LIST_HEAD(&mci->link);
 683
 684        return handlers;
 685}
 686
 687/**
 688 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 689 *
 690 * If found, return a pointer to the structure.
 691 * Else return NULL.
 692 *
 693 * Caller must hold mem_ctls_mutex.
 694 */
 695struct mem_ctl_info *edac_mc_find(int idx)
 696{
 697        struct list_head *item;
 698        struct mem_ctl_info *mci;
 699
 700        list_for_each(item, &mc_devices) {
 701                mci = list_entry(item, struct mem_ctl_info, link);
 702
 703                if (mci->mc_idx >= idx) {
 704                        if (mci->mc_idx == idx)
 705                                return mci;
 706
 707                        break;
 708                }
 709        }
 710
 711        return NULL;
 712}
 713EXPORT_SYMBOL(edac_mc_find);
 714
 715/**
 716 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 717 *                 create sysfs entries associated with mci structure
 718 * @mci: pointer to the mci structure to be added to the list
 719 *
 720 * Return:
 721 *      0       Success
 722 *      !0      Failure
 723 */
 724
 725/* FIXME - should a warning be printed if no error detection? correction? */
 726int edac_mc_add_mc(struct mem_ctl_info *mci)
 727{
 728        int ret = -EINVAL;
 729        edac_dbg(0, "\n");
 730
 731        if (mci->mc_idx >= EDAC_MAX_MCS) {
 732                pr_warn_once("Too many memory controllers: %d\n", mci->mc_idx);
 733                return -ENODEV;
 734        }
 735
 736#ifdef CONFIG_EDAC_DEBUG
 737        if (edac_debug_level >= 3)
 738                edac_mc_dump_mci(mci);
 739
 740        if (edac_debug_level >= 4) {
 741                int i;
 742
 743                for (i = 0; i < mci->nr_csrows; i++) {
 744                        struct csrow_info *csrow = mci->csrows[i];
 745                        u32 nr_pages = 0;
 746                        int j;
 747
 748                        for (j = 0; j < csrow->nr_channels; j++)
 749                                nr_pages += csrow->channels[j]->dimm->nr_pages;
 750                        if (!nr_pages)
 751                                continue;
 752                        edac_mc_dump_csrow(csrow);
 753                        for (j = 0; j < csrow->nr_channels; j++)
 754                                if (csrow->channels[j]->dimm->nr_pages)
 755                                        edac_mc_dump_channel(csrow->channels[j]);
 756                }
 757                for (i = 0; i < mci->tot_dimms; i++)
 758                        if (mci->dimms[i]->nr_pages)
 759                                edac_mc_dump_dimm(mci->dimms[i], i);
 760        }
 761#endif
 762        mutex_lock(&mem_ctls_mutex);
 763
 764        if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
 765                ret = -EPERM;
 766                goto fail0;
 767        }
 768
 769        if (add_mc_to_global_list(mci))
 770                goto fail0;
 771
 772        /* set load time so that error rate can be tracked */
 773        mci->start_time = jiffies;
 774
 775        mci->bus = &mc_bus[mci->mc_idx];
 776
 777        if (edac_create_sysfs_mci_device(mci)) {
 778                edac_mc_printk(mci, KERN_WARNING,
 779                        "failed to create sysfs device\n");
 780                goto fail1;
 781        }
 782
 783        /* If there IS a check routine, then we are running POLLED */
 784        if (mci->edac_check != NULL) {
 785                /* This instance is NOW RUNNING */
 786                mci->op_state = OP_RUNNING_POLL;
 787
 788                edac_mc_workq_setup(mci, edac_mc_get_poll_msec(), true);
 789        } else {
 790                mci->op_state = OP_RUNNING_INTERRUPT;
 791        }
 792
 793        /* Report action taken */
 794        edac_mc_printk(mci, KERN_INFO,
 795                "Giving out device to module %s controller %s: DEV %s (%s)\n",
 796                mci->mod_name, mci->ctl_name, mci->dev_name,
 797                edac_op_state_to_string(mci->op_state));
 798
 799        edac_mc_owner = mci->mod_name;
 800
 801        mutex_unlock(&mem_ctls_mutex);
 802        return 0;
 803
 804fail1:
 805        del_mc_from_global_list(mci);
 806
 807fail0:
 808        mutex_unlock(&mem_ctls_mutex);
 809        return ret;
 810}
 811EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 812
 813/**
 814 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 815 *                 remove mci structure from global list
 816 * @pdev: Pointer to 'struct device' representing mci structure to remove.
 817 *
 818 * Return pointer to removed mci structure, or NULL if device not found.
 819 */
 820struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 821{
 822        struct mem_ctl_info *mci;
 823
 824        edac_dbg(0, "\n");
 825
 826        mutex_lock(&mem_ctls_mutex);
 827
 828        /* find the requested mci struct in the global list */
 829        mci = find_mci_by_dev(dev);
 830        if (mci == NULL) {
 831                mutex_unlock(&mem_ctls_mutex);
 832                return NULL;
 833        }
 834
 835        if (!del_mc_from_global_list(mci))
 836                edac_mc_owner = NULL;
 837        mutex_unlock(&mem_ctls_mutex);
 838
 839        /* flush workq processes */
 840        edac_mc_workq_teardown(mci);
 841
 842        /* marking MCI offline */
 843        mci->op_state = OP_OFFLINE;
 844
 845        /* remove from sysfs */
 846        edac_remove_sysfs_mci_device(mci);
 847
 848        edac_printk(KERN_INFO, EDAC_MC,
 849                "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
 850                mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 851
 852        return mci;
 853}
 854EXPORT_SYMBOL_GPL(edac_mc_del_mc);
 855
 856static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 857                                u32 size)
 858{
 859        struct page *pg;
 860        void *virt_addr;
 861        unsigned long flags = 0;
 862
 863        edac_dbg(3, "\n");
 864
 865        /* ECC error page was not in our memory. Ignore it. */
 866        if (!pfn_valid(page))
 867                return;
 868
 869        /* Find the actual page structure then map it and fix */
 870        pg = pfn_to_page(page);
 871
 872        if (PageHighMem(pg))
 873                local_irq_save(flags);
 874
 875        virt_addr = kmap_atomic(pg);
 876
 877        /* Perform architecture specific atomic scrub operation */
 878        atomic_scrub(virt_addr + offset, size);
 879
 880        /* Unmap and complete */
 881        kunmap_atomic(virt_addr);
 882
 883        if (PageHighMem(pg))
 884                local_irq_restore(flags);
 885}
 886
 887/* FIXME - should return -1 */
 888int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 889{
 890        struct csrow_info **csrows = mci->csrows;
 891        int row, i, j, n;
 892
 893        edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
 894        row = -1;
 895
 896        for (i = 0; i < mci->nr_csrows; i++) {
 897                struct csrow_info *csrow = csrows[i];
 898                n = 0;
 899                for (j = 0; j < csrow->nr_channels; j++) {
 900                        struct dimm_info *dimm = csrow->channels[j]->dimm;
 901                        n += dimm->nr_pages;
 902                }
 903                if (n == 0)
 904                        continue;
 905
 906                edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
 907                         mci->mc_idx,
 908                         csrow->first_page, page, csrow->last_page,
 909                         csrow->page_mask);
 910
 911                if ((page >= csrow->first_page) &&
 912                    (page <= csrow->last_page) &&
 913                    ((page & csrow->page_mask) ==
 914                     (csrow->first_page & csrow->page_mask))) {
 915                        row = i;
 916                        break;
 917                }
 918        }
 919
 920        if (row == -1)
 921                edac_mc_printk(mci, KERN_ERR,
 922                        "could not look up page error address %lx\n",
 923                        (unsigned long)page);
 924
 925        return row;
 926}
 927EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
 928
 929const char *edac_layer_name[] = {
 930        [EDAC_MC_LAYER_BRANCH] = "branch",
 931        [EDAC_MC_LAYER_CHANNEL] = "channel",
 932        [EDAC_MC_LAYER_SLOT] = "slot",
 933        [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
 934        [EDAC_MC_LAYER_ALL_MEM] = "memory",
 935};
 936EXPORT_SYMBOL_GPL(edac_layer_name);
 937
 938static void edac_inc_ce_error(struct mem_ctl_info *mci,
 939                              bool enable_per_layer_report,
 940                              const int pos[EDAC_MAX_LAYERS],
 941                              const u16 count)
 942{
 943        int i, index = 0;
 944
 945        mci->ce_mc += count;
 946
 947        if (!enable_per_layer_report) {
 948                mci->ce_noinfo_count += count;
 949                return;
 950        }
 951
 952        for (i = 0; i < mci->n_layers; i++) {
 953                if (pos[i] < 0)
 954                        break;
 955                index += pos[i];
 956                mci->ce_per_layer[i][index] += count;
 957
 958                if (i < mci->n_layers - 1)
 959                        index *= mci->layers[i + 1].size;
 960        }
 961}
 962
 963static void edac_inc_ue_error(struct mem_ctl_info *mci,
 964                                    bool enable_per_layer_report,
 965                                    const int pos[EDAC_MAX_LAYERS],
 966                                    const u16 count)
 967{
 968        int i, index = 0;
 969
 970        mci->ue_mc += count;
 971
 972        if (!enable_per_layer_report) {
 973                mci->ce_noinfo_count += count;
 974                return;
 975        }
 976
 977        for (i = 0; i < mci->n_layers; i++) {
 978                if (pos[i] < 0)
 979                        break;
 980                index += pos[i];
 981                mci->ue_per_layer[i][index] += count;
 982
 983                if (i < mci->n_layers - 1)
 984                        index *= mci->layers[i + 1].size;
 985        }
 986}
 987
 988static void edac_ce_error(struct mem_ctl_info *mci,
 989                          const u16 error_count,
 990                          const int pos[EDAC_MAX_LAYERS],
 991                          const char *msg,
 992                          const char *location,
 993                          const char *label,
 994                          const char *detail,
 995                          const char *other_detail,
 996                          const bool enable_per_layer_report,
 997                          const unsigned long page_frame_number,
 998                          const unsigned long offset_in_page,
 999                          long grain)
1000{
1001        unsigned long remapped_page;
1002        char *msg_aux = "";
1003
1004        if (*msg)
1005                msg_aux = " ";
1006
1007        if (edac_mc_get_log_ce()) {
1008                if (other_detail && *other_detail)
1009                        edac_mc_printk(mci, KERN_WARNING,
1010                                       "%d CE %s%son %s (%s %s - %s)\n",
1011                                       error_count, msg, msg_aux, label,
1012                                       location, detail, other_detail);
1013                else
1014                        edac_mc_printk(mci, KERN_WARNING,
1015                                       "%d CE %s%son %s (%s %s)\n",
1016                                       error_count, msg, msg_aux, label,
1017                                       location, detail);
1018        }
1019        edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
1020
1021        if (mci->scrub_mode & SCRUB_SW_SRC) {
1022                /*
1023                        * Some memory controllers (called MCs below) can remap
1024                        * memory so that it is still available at a different
1025                        * address when PCI devices map into memory.
1026                        * MC's that can't do this, lose the memory where PCI
1027                        * devices are mapped. This mapping is MC-dependent
1028                        * and so we call back into the MC driver for it to
1029                        * map the MC page to a physical (CPU) page which can
1030                        * then be mapped to a virtual page - which can then
1031                        * be scrubbed.
1032                        */
1033                remapped_page = mci->ctl_page_to_phys ?
1034                        mci->ctl_page_to_phys(mci, page_frame_number) :
1035                        page_frame_number;
1036
1037                edac_mc_scrub_block(remapped_page,
1038                                        offset_in_page, grain);
1039        }
1040}
1041
1042static void edac_ue_error(struct mem_ctl_info *mci,
1043                          const u16 error_count,
1044                          const int pos[EDAC_MAX_LAYERS],
1045                          const char *msg,
1046                          const char *location,
1047                          const char *label,
1048                          const char *detail,
1049                          const char *other_detail,
1050                          const bool enable_per_layer_report)
1051{
1052        char *msg_aux = "";
1053
1054        if (*msg)
1055                msg_aux = " ";
1056
1057        if (edac_mc_get_log_ue()) {
1058                if (other_detail && *other_detail)
1059                        edac_mc_printk(mci, KERN_WARNING,
1060                                       "%d UE %s%son %s (%s %s - %s)\n",
1061                                       error_count, msg, msg_aux, label,
1062                                       location, detail, other_detail);
1063                else
1064                        edac_mc_printk(mci, KERN_WARNING,
1065                                       "%d UE %s%son %s (%s %s)\n",
1066                                       error_count, msg, msg_aux, label,
1067                                       location, detail);
1068        }
1069
1070        if (edac_mc_get_panic_on_ue()) {
1071                if (other_detail && *other_detail)
1072                        panic("UE %s%son %s (%s%s - %s)\n",
1073                              msg, msg_aux, label, location, detail, other_detail);
1074                else
1075                        panic("UE %s%son %s (%s%s)\n",
1076                              msg, msg_aux, label, location, detail);
1077        }
1078
1079        edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
1080}
1081
1082/**
1083 * edac_raw_mc_handle_error - reports a memory event to userspace without doing
1084 *                            anything to discover the error location
1085 *
1086 * @type:               severity of the error (CE/UE/Fatal)
1087 * @mci:                a struct mem_ctl_info pointer
1088 * @e:                  error description
1089 *
1090 * This raw function is used internally by edac_mc_handle_error(). It should
1091 * only be called directly when the hardware error come directly from BIOS,
1092 * like in the case of APEI GHES driver.
1093 */
1094void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
1095                              struct mem_ctl_info *mci,
1096                              struct edac_raw_error_desc *e)
1097{
1098        char detail[80];
1099        int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
1100
1101        /* Memory type dependent details about the error */
1102        if (type == HW_EVENT_ERR_CORRECTED) {
1103                snprintf(detail, sizeof(detail),
1104                        "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1105                        e->page_frame_number, e->offset_in_page,
1106                        e->grain, e->syndrome);
1107                edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1108                              detail, e->other_detail, e->enable_per_layer_report,
1109                              e->page_frame_number, e->offset_in_page, e->grain);
1110        } else {
1111                snprintf(detail, sizeof(detail),
1112                        "page:0x%lx offset:0x%lx grain:%ld",
1113                        e->page_frame_number, e->offset_in_page, e->grain);
1114
1115                edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1116                              detail, e->other_detail, e->enable_per_layer_report);
1117        }
1118
1119
1120}
1121EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
1122
1123/**
1124 * edac_mc_handle_error - reports a memory event to userspace
1125 *
1126 * @type:               severity of the error (CE/UE/Fatal)
1127 * @mci:                a struct mem_ctl_info pointer
1128 * @error_count:        Number of errors of the same type
1129 * @page_frame_number:  mem page where the error occurred
1130 * @offset_in_page:     offset of the error inside the page
1131 * @syndrome:           ECC syndrome
1132 * @top_layer:          Memory layer[0] position
1133 * @mid_layer:          Memory layer[1] position
1134 * @low_layer:          Memory layer[2] position
1135 * @msg:                Message meaningful to the end users that
1136 *                      explains the event
1137 * @other_detail:       Technical details about the event that
1138 *                      may help hardware manufacturers and
1139 *                      EDAC developers to analyse the event
1140 */
1141void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1142                          struct mem_ctl_info *mci,
1143                          const u16 error_count,
1144                          const unsigned long page_frame_number,
1145                          const unsigned long offset_in_page,
1146                          const unsigned long syndrome,
1147                          const int top_layer,
1148                          const int mid_layer,
1149                          const int low_layer,
1150                          const char *msg,
1151                          const char *other_detail)
1152{
1153        char *p;
1154        int row = -1, chan = -1;
1155        int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1156        int i, n_labels = 0;
1157        u8 grain_bits;
1158        struct edac_raw_error_desc *e = &mci->error_desc;
1159
1160        edac_dbg(3, "MC%d\n", mci->mc_idx);
1161
1162        /* Fills the error report buffer */
1163        memset(e, 0, sizeof (*e));
1164        e->error_count = error_count;
1165        e->top_layer = top_layer;
1166        e->mid_layer = mid_layer;
1167        e->low_layer = low_layer;
1168        e->page_frame_number = page_frame_number;
1169        e->offset_in_page = offset_in_page;
1170        e->syndrome = syndrome;
1171        e->msg = msg;
1172        e->other_detail = other_detail;
1173
1174        /*
1175         * Check if the event report is consistent and if the memory
1176         * location is known. If it is known, enable_per_layer_report will be
1177         * true, the DIMM(s) label info will be filled and the per-layer
1178         * error counters will be incremented.
1179         */
1180        for (i = 0; i < mci->n_layers; i++) {
1181                if (pos[i] >= (int)mci->layers[i].size) {
1182
1183                        edac_mc_printk(mci, KERN_ERR,
1184                                       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1185                                       edac_layer_name[mci->layers[i].type],
1186                                       pos[i], mci->layers[i].size);
1187                        /*
1188                         * Instead of just returning it, let's use what's
1189                         * known about the error. The increment routines and
1190                         * the DIMM filter logic will do the right thing by
1191                         * pointing the likely damaged DIMMs.
1192                         */
1193                        pos[i] = -1;
1194                }
1195                if (pos[i] >= 0)
1196                        e->enable_per_layer_report = true;
1197        }
1198
1199        /*
1200         * Get the dimm label/grain that applies to the match criteria.
1201         * As the error algorithm may not be able to point to just one memory
1202         * stick, the logic here will get all possible labels that could
1203         * pottentially be affected by the error.
1204         * On FB-DIMM memory controllers, for uncorrected errors, it is common
1205         * to have only the MC channel and the MC dimm (also called "branch")
1206         * but the channel is not known, as the memory is arranged in pairs,
1207         * where each memory belongs to a separate channel within the same
1208         * branch.
1209         */
1210        p = e->label;
1211        *p = '\0';
1212
1213        for (i = 0; i < mci->tot_dimms; i++) {
1214                struct dimm_info *dimm = mci->dimms[i];
1215
1216                if (top_layer >= 0 && top_layer != dimm->location[0])
1217                        continue;
1218                if (mid_layer >= 0 && mid_layer != dimm->location[1])
1219                        continue;
1220                if (low_layer >= 0 && low_layer != dimm->location[2])
1221                        continue;
1222
1223                /* get the max grain, over the error match range */
1224                if (dimm->grain > e->grain)
1225                        e->grain = dimm->grain;
1226
1227                /*
1228                 * If the error is memory-controller wide, there's no need to
1229                 * seek for the affected DIMMs because the whole
1230                 * channel/memory controller/...  may be affected.
1231                 * Also, don't show errors for empty DIMM slots.
1232                 */
1233                if (e->enable_per_layer_report && dimm->nr_pages) {
1234                        if (n_labels >= EDAC_MAX_LABELS) {
1235                                e->enable_per_layer_report = false;
1236                                break;
1237                        }
1238                        n_labels++;
1239                        if (p != e->label) {
1240                                strcpy(p, OTHER_LABEL);
1241                                p += strlen(OTHER_LABEL);
1242                        }
1243                        strcpy(p, dimm->label);
1244                        p += strlen(p);
1245                        *p = '\0';
1246
1247                        /*
1248                         * get csrow/channel of the DIMM, in order to allow
1249                         * incrementing the compat API counters
1250                         */
1251                        edac_dbg(4, "%s csrows map: (%d,%d)\n",
1252                                 mci->csbased ? "rank" : "dimm",
1253                                 dimm->csrow, dimm->cschannel);
1254                        if (row == -1)
1255                                row = dimm->csrow;
1256                        else if (row >= 0 && row != dimm->csrow)
1257                                row = -2;
1258
1259                        if (chan == -1)
1260                                chan = dimm->cschannel;
1261                        else if (chan >= 0 && chan != dimm->cschannel)
1262                                chan = -2;
1263                }
1264        }
1265
1266        if (!e->enable_per_layer_report) {
1267                strcpy(e->label, "any memory");
1268        } else {
1269                edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1270                if (p == e->label)
1271                        strcpy(e->label, "unknown memory");
1272                if (type == HW_EVENT_ERR_CORRECTED) {
1273                        if (row >= 0) {
1274                                mci->csrows[row]->ce_count += error_count;
1275                                if (chan >= 0)
1276                                        mci->csrows[row]->channels[chan]->ce_count += error_count;
1277                        }
1278                } else
1279                        if (row >= 0)
1280                                mci->csrows[row]->ue_count += error_count;
1281        }
1282
1283        /* Fill the RAM location data */
1284        p = e->location;
1285
1286        for (i = 0; i < mci->n_layers; i++) {
1287                if (pos[i] < 0)
1288                        continue;
1289
1290                p += sprintf(p, "%s:%d ",
1291                             edac_layer_name[mci->layers[i].type],
1292                             pos[i]);
1293        }
1294        if (p > e->location)
1295                *(p - 1) = '\0';
1296
1297        /* Report the error via the trace interface */
1298        grain_bits = fls_long(e->grain) + 1;
1299        trace_mc_event(type, e->msg, e->label, e->error_count,
1300                       mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
1301                       PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
1302                       grain_bits, e->syndrome, e->other_detail);
1303
1304        edac_raw_mc_handle_error(type, mci, e);
1305}
1306EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1307