linux/drivers/edac/edac_core.h
<<
>>
Prefs
   1/*
   2 * Defines, structures, APIs for edac_core module
   3 *
   4 * (C) 2007 Linux Networx (http://lnxi.com)
   5 * This file may be distributed under the terms of the
   6 * GNU General Public License.
   7 *
   8 * Written by Thayne Harbaugh
   9 * Based on work by Dan Hollis <goemon at anime dot net> and others.
  10 *      http://www.anime.net/~goemon/linux-ecc/
  11 *
  12 * NMI handling support added by
  13 *     Dave Peterson <dsp@llnl.gov> <dave_peterson@pobox.com>
  14 *
  15 * Refactored for multi-source files:
  16 *      Doug Thompson <norsk5@xmission.com>
  17 *
  18 */
  19
  20#ifndef _EDAC_CORE_H_
  21#define _EDAC_CORE_H_
  22
  23#include <linux/kernel.h>
  24#include <linux/types.h>
  25#include <linux/module.h>
  26#include <linux/spinlock.h>
  27#include <linux/smp.h>
  28#include <linux/pci.h>
  29#include <linux/time.h>
  30#include <linux/nmi.h>
  31#include <linux/rcupdate.h>
  32#include <linux/completion.h>
  33#include <linux/kobject.h>
  34#include <linux/platform_device.h>
  35#include <linux/sysdev.h>
  36#include <linux/workqueue.h>
  37#include <linux/edac.h>
  38
  39#define EDAC_DEVICE_NAME_LEN    31
  40#define EDAC_ATTRIB_VALUE_LEN   15
  41
  42#if PAGE_SHIFT < 20
  43#define PAGES_TO_MiB(pages)     ((pages) >> (20 - PAGE_SHIFT))
  44#define MiB_TO_PAGES(mb)        ((mb) << (20 - PAGE_SHIFT))
  45#else                           /* PAGE_SHIFT > 20 */
  46#define PAGES_TO_MiB(pages)     ((pages) << (PAGE_SHIFT - 20))
  47#define MiB_TO_PAGES(mb)        ((mb) >> (PAGE_SHIFT - 20))
  48#endif
  49
  50#define edac_printk(level, prefix, fmt, arg...) \
  51        printk(level "EDAC " prefix ": " fmt, ##arg)
  52
  53#define edac_mc_printk(mci, level, fmt, arg...) \
  54        printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg)
  55
  56#define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \
  57        printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg)
  58
  59#define edac_device_printk(ctl, level, fmt, arg...) \
  60        printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg)
  61
  62#define edac_pci_printk(ctl, level, fmt, arg...) \
  63        printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg)
  64
  65/* prefixes for edac_printk() and edac_mc_printk() */
  66#define EDAC_MC "MC"
  67#define EDAC_PCI "PCI"
  68#define EDAC_DEBUG "DEBUG"
  69
  70extern const char *edac_mem_types[];
  71
  72#ifdef CONFIG_EDAC_DEBUG
  73extern int edac_debug_level;
  74
  75#define edac_debug_printk(level, fmt, arg...)                           \
  76        do {                                                            \
  77                if (level <= edac_debug_level)                          \
  78                        edac_printk(KERN_DEBUG, EDAC_DEBUG,             \
  79                                    "%s: " fmt, __func__, ##arg);       \
  80        } while (0)
  81
  82#define debugf0( ... ) edac_debug_printk(0, __VA_ARGS__ )
  83#define debugf1( ... ) edac_debug_printk(1, __VA_ARGS__ )
  84#define debugf2( ... ) edac_debug_printk(2, __VA_ARGS__ )
  85#define debugf3( ... ) edac_debug_printk(3, __VA_ARGS__ )
  86#define debugf4( ... ) edac_debug_printk(4, __VA_ARGS__ )
  87
  88#else                           /* !CONFIG_EDAC_DEBUG */
  89
  90#define debugf0( ... )
  91#define debugf1( ... )
  92#define debugf2( ... )
  93#define debugf3( ... )
  94#define debugf4( ... )
  95
  96#endif                          /* !CONFIG_EDAC_DEBUG */
  97
  98#define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \
  99        PCI_DEVICE_ID_ ## vend ## _ ## dev
 100
 101#define edac_dev_name(dev) (dev)->dev_name
 102
 103/*
 104 * The following are the structures to provide for a generic
 105 * or abstract 'edac_device'. This set of structures and the
 106 * code that implements the APIs for the same, provide for
 107 * registering EDAC type devices which are NOT standard memory.
 108 *
 109 * CPU caches (L1 and L2)
 110 * DMA engines
 111 * Core CPU swithces
 112 * Fabric switch units
 113 * PCIe interface controllers
 114 * other EDAC/ECC type devices that can be monitored for
 115 * errors, etc.
 116 *
 117 * It allows for a 2 level set of hiearchry. For example:
 118 *
 119 * cache could be composed of L1, L2 and L3 levels of cache.
 120 * Each CPU core would have its own L1 cache, while sharing
 121 * L2 and maybe L3 caches.
 122 *
 123 * View them arranged, via the sysfs presentation:
 124 * /sys/devices/system/edac/..
 125 *
 126 *      mc/             <existing memory device directory>
 127 *      cpu/cpu0/..     <L1 and L2 block directory>
 128 *              /L1-cache/ce_count
 129 *                       /ue_count
 130 *              /L2-cache/ce_count
 131 *                       /ue_count
 132 *      cpu/cpu1/..     <L1 and L2 block directory>
 133 *              /L1-cache/ce_count
 134 *                       /ue_count
 135 *              /L2-cache/ce_count
 136 *                       /ue_count
 137 *      ...
 138 *
 139 *      the L1 and L2 directories would be "edac_device_block's"
 140 */
 141
 142struct edac_device_counter {
 143        u32 ue_count;
 144        u32 ce_count;
 145};
 146
 147/* forward reference */
 148struct edac_device_ctl_info;
 149struct edac_device_block;
 150
 151/* edac_dev_sysfs_attribute structure
 152 *      used for driver sysfs attributes in mem_ctl_info
 153 *      for extra controls and attributes:
 154 *              like high level error Injection controls
 155 */
 156struct edac_dev_sysfs_attribute {
 157        struct attribute attr;
 158        ssize_t (*show)(struct edac_device_ctl_info *, char *);
 159        ssize_t (*store)(struct edac_device_ctl_info *, const char *, size_t);
 160};
 161
 162/* edac_dev_sysfs_block_attribute structure
 163 *
 164 *      used in leaf 'block' nodes for adding controls/attributes
 165 *
 166 *      each block in each instance of the containing control structure
 167 *      can have an array of the following. The show and store functions
 168 *      will be filled in with the show/store function in the
 169 *      low level driver.
 170 *
 171 *      The 'value' field will be the actual value field used for
 172 *      counting
 173 */
 174struct edac_dev_sysfs_block_attribute {
 175        struct attribute attr;
 176        ssize_t (*show)(struct kobject *, struct attribute *, char *);
 177        ssize_t (*store)(struct kobject *, struct attribute *,
 178                        const char *, size_t);
 179        struct edac_device_block *block;
 180
 181        unsigned int value;
 182};
 183
 184/* device block control structure */
 185struct edac_device_block {
 186        struct edac_device_instance *instance;  /* Up Pointer */
 187        char name[EDAC_DEVICE_NAME_LEN + 1];
 188
 189        struct edac_device_counter counters;    /* basic UE and CE counters */
 190
 191        int nr_attribs;         /* how many attributes */
 192
 193        /* this block's attributes, could be NULL */
 194        struct edac_dev_sysfs_block_attribute *block_attributes;
 195
 196        /* edac sysfs device control */
 197        struct kobject kobj;
 198};
 199
 200/* device instance control structure */
 201struct edac_device_instance {
 202        struct edac_device_ctl_info *ctl;       /* Up pointer */
 203        char name[EDAC_DEVICE_NAME_LEN + 4];
 204
 205        struct edac_device_counter counters;    /* instance counters */
 206
 207        u32 nr_blocks;          /* how many blocks */
 208        struct edac_device_block *blocks;       /* block array */
 209
 210        /* edac sysfs device control */
 211        struct kobject kobj;
 212};
 213
 214
 215/*
 216 * Abstract edac_device control info structure
 217 *
 218 */
 219struct edac_device_ctl_info {
 220        /* for global list of edac_device_ctl_info structs */
 221        struct list_head link;
 222
 223        struct module *owner;   /* Module owner of this control struct */
 224
 225        int dev_idx;
 226
 227        /* Per instance controls for this edac_device */
 228        int log_ue;             /* boolean for logging UEs */
 229        int log_ce;             /* boolean for logging CEs */
 230        int panic_on_ue;        /* boolean for panic'ing on an UE */
 231        unsigned poll_msec;     /* number of milliseconds to poll interval */
 232        unsigned long delay;    /* number of jiffies for poll_msec */
 233
 234        /* Additional top controller level attributes, but specified
 235         * by the low level driver.
 236         *
 237         * Set by the low level driver to provide attributes at the
 238         * controller level, same level as 'ue_count' and 'ce_count' above.
 239         * An array of structures, NULL terminated
 240         *
 241         * If attributes are desired, then set to array of attributes
 242         * If no attributes are desired, leave NULL
 243         */
 244        struct edac_dev_sysfs_attribute *sysfs_attributes;
 245
 246        /* pointer to main 'edac' class in sysfs */
 247        struct sysdev_class *edac_class;
 248
 249        /* the internal state of this controller instance */
 250        int op_state;
 251        /* work struct for this instance */
 252        struct delayed_work work;
 253
 254        /* pointer to edac polling checking routine:
 255         *      If NOT NULL: points to polling check routine
 256         *      If NULL: Then assumes INTERRUPT operation, where
 257         *              MC driver will receive events
 258         */
 259        void (*edac_check) (struct edac_device_ctl_info * edac_dev);
 260
 261        struct device *dev;     /* pointer to device structure */
 262
 263        const char *mod_name;   /* module name */
 264        const char *ctl_name;   /* edac controller  name */
 265        const char *dev_name;   /* pci/platform/etc... name */
 266
 267        void *pvt_info;         /* pointer to 'private driver' info */
 268
 269        unsigned long start_time;       /* edac_device load start time (jiffies) */
 270
 271        struct completion removal_complete;
 272
 273        /* sysfs top name under 'edac' directory
 274         * and instance name:
 275         *      cpu/cpu0/...
 276         *      cpu/cpu1/...
 277         *      cpu/cpu2/...
 278         *      ...
 279         */
 280        char name[EDAC_DEVICE_NAME_LEN + 1];
 281
 282        /* Number of instances supported on this control structure
 283         * and the array of those instances
 284         */
 285        u32 nr_instances;
 286        struct edac_device_instance *instances;
 287
 288        /* Event counters for the this whole EDAC Device */
 289        struct edac_device_counter counters;
 290
 291        /* edac sysfs device control for the 'name'
 292         * device this structure controls
 293         */
 294        struct kobject kobj;
 295};
 296
 297/* To get from the instance's wq to the beginning of the ctl structure */
 298#define to_edac_mem_ctl_work(w) \
 299                container_of(w, struct mem_ctl_info, work)
 300
 301#define to_edac_device_ctl_work(w) \
 302                container_of(w,struct edac_device_ctl_info,work)
 303
 304/*
 305 * The alloc() and free() functions for the 'edac_device' control info
 306 * structure. A MC driver will allocate one of these for each edac_device
 307 * it is going to control/register with the EDAC CORE.
 308 */
 309extern struct edac_device_ctl_info *edac_device_alloc_ctl_info(
 310                unsigned sizeof_private,
 311                char *edac_device_name, unsigned nr_instances,
 312                char *edac_block_name, unsigned nr_blocks,
 313                unsigned offset_value,
 314                struct edac_dev_sysfs_block_attribute *block_attributes,
 315                unsigned nr_attribs,
 316                int device_index);
 317
 318/* The offset value can be:
 319 *      -1 indicating no offset value
 320 *      0 for zero-based block numbers
 321 *      1 for 1-based block number
 322 *      other for other-based block number
 323 */
 324#define BLOCK_OFFSET_VALUE_OFF  ((unsigned) -1)
 325
 326extern void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info);
 327
 328#ifdef CONFIG_PCI
 329
 330struct edac_pci_counter {
 331        atomic_t pe_count;
 332        atomic_t npe_count;
 333};
 334
 335/*
 336 * Abstract edac_pci control info structure
 337 *
 338 */
 339struct edac_pci_ctl_info {
 340        /* for global list of edac_pci_ctl_info structs */
 341        struct list_head link;
 342
 343        int pci_idx;
 344
 345        struct sysdev_class *edac_class;        /* pointer to class */
 346
 347        /* the internal state of this controller instance */
 348        int op_state;
 349        /* work struct for this instance */
 350        struct delayed_work work;
 351
 352        /* pointer to edac polling checking routine:
 353         *      If NOT NULL: points to polling check routine
 354         *      If NULL: Then assumes INTERRUPT operation, where
 355         *              MC driver will receive events
 356         */
 357        void (*edac_check) (struct edac_pci_ctl_info * edac_dev);
 358
 359        struct device *dev;     /* pointer to device structure */
 360
 361        const char *mod_name;   /* module name */
 362        const char *ctl_name;   /* edac controller  name */
 363        const char *dev_name;   /* pci/platform/etc... name */
 364
 365        void *pvt_info;         /* pointer to 'private driver' info */
 366
 367        unsigned long start_time;       /* edac_pci load start time (jiffies) */
 368
 369        struct completion complete;
 370
 371        /* sysfs top name under 'edac' directory
 372         * and instance name:
 373         *      cpu/cpu0/...
 374         *      cpu/cpu1/...
 375         *      cpu/cpu2/...
 376         *      ...
 377         */
 378        char name[EDAC_DEVICE_NAME_LEN + 1];
 379
 380        /* Event counters for the this whole EDAC Device */
 381        struct edac_pci_counter counters;
 382
 383        /* edac sysfs device control for the 'name'
 384         * device this structure controls
 385         */
 386        struct kobject kobj;
 387        struct completion kobj_complete;
 388};
 389
 390#define to_edac_pci_ctl_work(w) \
 391                container_of(w, struct edac_pci_ctl_info,work)
 392
 393/* write all or some bits in a byte-register*/
 394static inline void pci_write_bits8(struct pci_dev *pdev, int offset, u8 value,
 395                                   u8 mask)
 396{
 397        if (mask != 0xff) {
 398                u8 buf;
 399
 400                pci_read_config_byte(pdev, offset, &buf);
 401                value &= mask;
 402                buf &= ~mask;
 403                value |= buf;
 404        }
 405
 406        pci_write_config_byte(pdev, offset, value);
 407}
 408
 409/* write all or some bits in a word-register*/
 410static inline void pci_write_bits16(struct pci_dev *pdev, int offset,
 411                                    u16 value, u16 mask)
 412{
 413        if (mask != 0xffff) {
 414                u16 buf;
 415
 416                pci_read_config_word(pdev, offset, &buf);
 417                value &= mask;
 418                buf &= ~mask;
 419                value |= buf;
 420        }
 421
 422        pci_write_config_word(pdev, offset, value);
 423}
 424
 425/*
 426 * pci_write_bits32
 427 *
 428 * edac local routine to do pci_write_config_dword, but adds
 429 * a mask parameter. If mask is all ones, ignore the mask.
 430 * Otherwise utilize the mask to isolate specified bits
 431 *
 432 * write all or some bits in a dword-register
 433 */
 434static inline void pci_write_bits32(struct pci_dev *pdev, int offset,
 435                                    u32 value, u32 mask)
 436{
 437        if (mask != 0xffffffff) {
 438                u32 buf;
 439
 440                pci_read_config_dword(pdev, offset, &buf);
 441                value &= mask;
 442                buf &= ~mask;
 443                value |= buf;
 444        }
 445
 446        pci_write_config_dword(pdev, offset, value);
 447}
 448
 449#endif                          /* CONFIG_PCI */
 450
 451extern struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
 452                                          unsigned nr_chans, int edac_index);
 453extern int edac_mc_add_mc(struct mem_ctl_info *mci);
 454extern void edac_mc_free(struct mem_ctl_info *mci);
 455extern struct mem_ctl_info *edac_mc_find(int idx);
 456extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
 457extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
 458extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
 459                                      unsigned long page);
 460
 461/*
 462 * The no info errors are used when error overflows are reported.
 463 * There are a limited number of error logging registers that can
 464 * be exausted.  When all registers are exhausted and an additional
 465 * error occurs then an error overflow register records that an
 466 * error occurred and the type of error, but doesn't have any
 467 * further information.  The ce/ue versions make for cleaner
 468 * reporting logic and function interface - reduces conditional
 469 * statement clutter and extra function arguments.
 470 */
 471extern void edac_mc_handle_ce(struct mem_ctl_info *mci,
 472                              unsigned long page_frame_number,
 473                              unsigned long offset_in_page,
 474                              unsigned long syndrome, int row, int channel,
 475                              const char *msg);
 476extern void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci,
 477                                      const char *msg);
 478extern void edac_mc_handle_ue(struct mem_ctl_info *mci,
 479                              unsigned long page_frame_number,
 480                              unsigned long offset_in_page, int row,
 481                              const char *msg);
 482extern void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci,
 483                                      const char *msg);
 484extern void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, unsigned int csrow,
 485                                  unsigned int channel0, unsigned int channel1,
 486                                  char *msg);
 487extern void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, unsigned int csrow,
 488                                  unsigned int channel, char *msg);
 489
 490/*
 491 * edac_device APIs
 492 */
 493extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev);
 494extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);
 495extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
 496                                int inst_nr, int block_nr, const char *msg);
 497extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
 498                                int inst_nr, int block_nr, const char *msg);
 499extern int edac_device_alloc_index(void);
 500
 501/*
 502 * edac_pci APIs
 503 */
 504extern struct edac_pci_ctl_info *edac_pci_alloc_ctl_info(unsigned int sz_pvt,
 505                                const char *edac_pci_name);
 506
 507extern void edac_pci_free_ctl_info(struct edac_pci_ctl_info *pci);
 508
 509extern void edac_pci_reset_delay_period(struct edac_pci_ctl_info *pci,
 510                                unsigned long value);
 511
 512extern int edac_pci_alloc_index(void);
 513extern int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx);
 514extern struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev);
 515
 516extern struct edac_pci_ctl_info *edac_pci_create_generic_ctl(
 517                                struct device *dev,
 518                                const char *mod_name);
 519
 520extern void edac_pci_release_generic_ctl(struct edac_pci_ctl_info *pci);
 521extern int edac_pci_create_sysfs(struct edac_pci_ctl_info *pci);
 522extern void edac_pci_remove_sysfs(struct edac_pci_ctl_info *pci);
 523
 524/*
 525 * edac misc APIs
 526 */
 527extern char *edac_op_state_to_string(int op_state);
 528
 529#endif                          /* _EDAC_CORE_H_ */
 530