linux/drivers/edac/cortex_arm64_edac.c
<<
>>
Prefs
   1/*
   2 * Cortex A57 and A53 EDAC
   3 *
   4 * Copyright (c) 2015, Advanced Micro Devices
   5 * Author: Brijesh Singh <brijeshkumar.singh@amd.com>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 */
  16
  17#include <linux/module.h>
  18#include <linux/of_device.h>
  19#include <linux/platform_device.h>
  20#include <ras/ras_event.h>
  21
  22#include "edac_core.h"
  23
  24#define DRV_NAME                        "cortex_edac"
  25
  26#define CPUMERRSR_EL1_INDEX(x, y)       ((x) & (y))
  27#define CPUMERRSR_EL1_BANK_WAY(x, y)    (((x) >> 18) & (y))
  28#define CPUMERRSR_EL1_RAMID(x)          (((x) >> 24) & 0x7f)
  29#define CPUMERRSR_EL1_VALID(x)          ((x) & (1 << 31))
  30#define CPUMERRSR_EL1_REPEAT(x)         (((x) >> 32) & 0x7f)
  31#define CPUMERRSR_EL1_OTHER(x)          (((x) >> 40) & 0xff)
  32#define CPUMERRSR_EL1_FATAL(x)          ((x) & (1UL << 63))
  33#define L1_I_TAG_RAM                    0x00
  34#define L1_I_DATA_RAM                   0x01
  35#define L1_D_TAG_RAM                    0x08
  36#define L1_D_DATA_RAM                   0x09
  37#define L1_D_DIRTY_RAM                  0x14
  38#define TLB_RAM                         0x18
  39
  40#define L2MERRSR_EL1_CPUID_WAY(x)       (((x) >> 18) & 0xf)
  41#define L2MERRSR_EL1_RAMID(x)           (((x) >> 24) & 0x7f)
  42#define L2MERRSR_EL1_VALID(x)           ((x) & (1 << 31))
  43#define L2MERRSR_EL1_REPEAT(x)          (((x) >> 32) & 0xff)
  44#define L2MERRSR_EL1_OTHER(x)           (((x) >> 40) & 0xff)
  45#define L2MERRSR_EL1_FATAL(x)           ((x) & (1UL << 63))
  46#define L2_TAG_RAM                      0x10
  47#define L2_DATA_RAM                     0x11
  48#define L2_SNOOP_RAM                    0x12
  49#define L2_DIRTY_RAM                    0x14
  50#define L2_INCLUSION_PF_RAM             0x18
  51
  52#define L1_CACHE                        0
  53#define L2_CACHE                        1
  54
  55#define EDAC_MOD_STR                    DRV_NAME
  56
  57/* Error injectin macros*/
  58#define L1_DCACHE_ERRINJ_ENABLE         (1 << 6)
  59#define L1_DCACHE_ERRINJ_DISABLE        (~(1 << 6))
  60#define L2_DCACHE_ERRINJ_ENABLE         (1 << 29)
  61#define L2_DCACHE_ERRINJ_DISABLE        (~(1 << 29))
  62#define L2_ECC_PROTECTION               (1 << 22)
  63
  64static int poll_msec = 100;
  65
  66struct cortex_arm64_edac {
  67        struct edac_device_ctl_info *edac_ctl;
  68};
  69
  70static inline u64 read_cpumerrsr_el1(void)
  71{
  72        u64 val;
  73
  74        asm volatile("mrs %0, s3_1_c15_c2_2" : "=r" (val));
  75        return val;
  76}
  77
  78static inline void write_cpumerrsr_el1(u64 val)
  79{
  80        asm volatile("msr s3_1_c15_c2_2, %0" :: "r" (val));
  81}
  82
  83static inline u64 read_l2merrsr_el1(void)
  84{
  85        u64 val;
  86
  87        asm volatile("mrs %0, s3_1_c15_c2_3" : "=r" (val));
  88        return val;
  89}
  90
  91static inline void write_l2merrsr_el1(u64 val)
  92{
  93        asm volatile("msr s3_1_c15_c2_3, %0" :: "r" (val));
  94}
  95
  96static inline void cortexa53_edac_busy_on_inst(void)
  97{
  98        asm volatile("isb sy");
  99}
 100
 101static inline void cortexa53_edac_busy_on_data(void)
 102{
 103        asm volatile("dsb sy");
 104}
 105
 106static inline void write_l2actrl_el1(u64 val)
 107{
 108        asm volatile("msr s3_1_c15_c0_0, %0" :: "r" (val));
 109        cortexa53_edac_busy_on_inst();
 110}
 111
 112static inline u64 read_l2actrl_el1(void)
 113{
 114        u64 val;
 115
 116        asm volatile("mrs %0, s3_1_c15_c0_0" : "=r" (val));
 117        return val;
 118}
 119
 120static inline u64 read_l2ctlr_el1(void)
 121{
 122        u64 rval;
 123
 124        asm volatile("mrs %0,  S3_1_C11_C0_2" : "=r" (rval));
 125        return rval;
 126
 127}
 128
 129static inline u64 read_l1actrl_el1(void)
 130{
 131        u64 rval;
 132
 133        asm volatile("mrs %0,  S3_1_C15_C2_0" : "=r" (rval));
 134        return rval;
 135}
 136
 137static inline void write_l1actrl_el1(u64 val)
 138{
 139        asm volatile("msr S3_1_C15_C2_0, %0" :: "r" (val));
 140}
 141
 142static void parse_cpumerrsr(void *arg)
 143{
 144        int cpu, partnum, way;
 145        unsigned int index = 0;
 146        u64 val = read_cpumerrsr_el1();
 147        int repeat_err, other_err;
 148
 149        /* we do not support fatal error handling so far */
 150        if (CPUMERRSR_EL1_FATAL(val))
 151                return;
 152
 153        /* check if we have valid error before continuing */
 154        if (!CPUMERRSR_EL1_VALID(val))
 155                return;
 156
 157        cpu = smp_processor_id();
 158        partnum = read_cpuid_part_number();
 159        repeat_err = CPUMERRSR_EL1_REPEAT(val);
 160        other_err = CPUMERRSR_EL1_OTHER(val);
 161
 162        /* way/bank and index address bit ranges are different between
 163         * A57 and A53 */
 164        if (partnum == ARM_CPU_PART_CORTEX_A57) {
 165                index = CPUMERRSR_EL1_INDEX(val, 0x1ffff);
 166                way = CPUMERRSR_EL1_BANK_WAY(val, 0x1f);
 167        } else {
 168                index = CPUMERRSR_EL1_INDEX(val, 0xfff);
 169                way = CPUMERRSR_EL1_BANK_WAY(val, 0x7);
 170        }
 171
 172        edac_printk(KERN_CRIT, EDAC_MOD_STR, "CPU%d L1 error detected!\n", cpu);
 173        edac_printk(KERN_CRIT, EDAC_MOD_STR, "index=%#x, RAMID=", index);
 174
 175        switch (CPUMERRSR_EL1_RAMID(val)) {
 176        case L1_I_TAG_RAM:
 177                pr_cont("'L1-I Tag RAM' (way %d)", way);
 178                break;
 179        case L1_I_DATA_RAM:
 180                pr_cont("'L1-I Data RAM' (bank %d)", way);
 181                break;
 182        case L1_D_TAG_RAM:
 183                pr_cont("'L1-D Tag RAM' (way %d)", way);
 184                break;
 185        case L1_D_DATA_RAM:
 186                pr_cont("'L1-D Data RAM' (bank %d)", way);
 187                break;
 188        case L1_D_DIRTY_RAM:
 189                pr_cont("'L1 Dirty RAM'");
 190                break;
 191        case TLB_RAM:
 192                pr_cont("'TLB RAM'");
 193                break;
 194        default:
 195                pr_cont("'unknown'");
 196                break;
 197        }
 198
 199        pr_cont(", repeat=%d, other=%d (CPUMERRSR_EL1=%#llx)\n", repeat_err,
 200                other_err, val);
 201
 202        trace_mc_event(HW_EVENT_ERR_CORRECTED, "L1 non-fatal error",
 203                       "", repeat_err, 0, 0, 0, -1, index, 0, 0, DRV_NAME);
 204        write_cpumerrsr_el1(0);
 205}
 206
 207static void a57_parse_l2merrsr_way(u8 ramid, u8 val)
 208{
 209        switch (ramid) {
 210        case L2_TAG_RAM:
 211        case L2_DATA_RAM:
 212        case L2_DIRTY_RAM:
 213                pr_cont("(cpu%d tag, way %d)", val / 2, val % 2);
 214                break;
 215        case L2_SNOOP_RAM:
 216                pr_cont("(cpu%d tag, way %d)", (val & 0x6) >> 1,
 217                        (val & 0x1));
 218                break;
 219        }
 220}
 221
 222static void a53_parse_l2merrsr_way(u8 ramid, u8 val)
 223{
 224        switch (ramid) {
 225        case L2_TAG_RAM:
 226                pr_cont("(way %d)", val);
 227        case L2_DATA_RAM:
 228                pr_cont("(bank %d)", val);
 229                break;
 230        case L2_SNOOP_RAM:
 231                pr_cont("(cpu%d tag, way %d)", val / 2, val % 4);
 232                break;
 233        }
 234}
 235
 236static void parse_l2merrsr(void *arg)
 237{
 238        int cpu, partnum;
 239        unsigned int index;
 240        int repeat_err, other_err;
 241        u64 val = read_l2merrsr_el1();
 242
 243        /* we do not support fatal error handling so far */
 244        if (L2MERRSR_EL1_FATAL(val))
 245                return;
 246
 247        /* check if we have valid error before continuing */
 248        if (!L2MERRSR_EL1_VALID(val))
 249                return;
 250
 251        cpu = smp_processor_id();
 252        partnum = read_cpuid_part_number();
 253        repeat_err = L2MERRSR_EL1_REPEAT(val);
 254        other_err = L2MERRSR_EL1_OTHER(val);
 255
 256        /* index address range is different between A57 and A53 */
 257        if (partnum == ARM_CPU_PART_CORTEX_A57)
 258                index = val & 0x1ffff;
 259        else
 260                index = (val >> 3) & 0x3fff;
 261
 262        edac_printk(KERN_CRIT, EDAC_MOD_STR, "CPU%d L2 error detected!\n", cpu);
 263        edac_printk(KERN_CRIT, EDAC_MOD_STR, "index=%#x RAMID=", index);
 264
 265        switch (L2MERRSR_EL1_RAMID(val)) {
 266        case L2_TAG_RAM:
 267                pr_cont("'L2 Tag RAM'");
 268                break;
 269        case L2_DATA_RAM:
 270                pr_cont("'L2 Data RAM'");
 271                break;
 272        case L2_SNOOP_RAM:
 273                pr_cont("'L2 Snoop tag RAM'");
 274                break;
 275        case L2_DIRTY_RAM:
 276                pr_cont("'L2 Dirty RAM'");
 277                break;
 278        case L2_INCLUSION_PF_RAM:
 279                pr_cont("'L2 inclusion PF RAM'");
 280                break;
 281        default:
 282                pr_cont("unknown");
 283                break;
 284        }
 285
 286        /* cpuid/way bit description is different between A57 and A53 */
 287        if (partnum == ARM_CPU_PART_CORTEX_A57)
 288                a57_parse_l2merrsr_way(L2MERRSR_EL1_RAMID(val),
 289                                       L2MERRSR_EL1_CPUID_WAY(val));
 290        else
 291                a53_parse_l2merrsr_way(L2MERRSR_EL1_RAMID(val),
 292                                       L2MERRSR_EL1_CPUID_WAY(val));
 293
 294        pr_cont(", repeat=%d, other=%d (L2MERRSR_EL1=%#llx)\n", repeat_err,
 295                other_err, val);
 296        trace_mc_event(HW_EVENT_ERR_CORRECTED, "L2 non-fatal error",
 297                       "", repeat_err, 0, 0, 0, -1, index, 0, 0, DRV_NAME);
 298        write_l2merrsr_el1(0);
 299}
 300
 301static void cortex_arm64_edac_check(struct edac_device_ctl_info *edac_ctl)
 302{
 303        int cpu;
 304        struct cpumask cluster_mask, old_mask;
 305
 306        cpumask_clear(&cluster_mask);
 307        cpumask_clear(&old_mask);
 308
 309        get_online_cpus();
 310        for_each_online_cpu(cpu) {
 311                /* Check CPU L1 error */
 312                smp_call_function_single(cpu, parse_cpumerrsr, NULL, 0);
 313                cpumask_copy(&cluster_mask, topology_core_cpumask(cpu));
 314                if (cpumask_equal(&cluster_mask, &old_mask))
 315                        continue;
 316                cpumask_copy(&old_mask, &cluster_mask);
 317                /* Check CPU L2 error */
 318                smp_call_function_any(&cluster_mask, parse_l2merrsr, NULL, 0);
 319        }
 320        put_online_cpus();
 321}
 322
 323static ssize_t cortexa53_edac_inject_L2_show(struct edac_device_ctl_info
 324                                                        *dci, char *data)
 325{
 326        return sprintf(data, "L2ACTLR_EL1: [0x%llx]\n\r", read_l2actrl_el1());
 327}
 328
 329static ssize_t cortexa53_edac_inject_L2_store(
 330                struct edac_device_ctl_info *dci, const char *data,
 331                size_t count)
 332{
 333        u64 l2actrl, l2ecc;
 334
 335        if (!data)
 336                return -EFAULT;
 337
 338        l2ecc = read_l2ctlr_el1();
 339        if ((l2ecc & L2_ECC_PROTECTION)) {
 340                l2actrl = read_l2actrl_el1();
 341                l2actrl = l2actrl | L2_DCACHE_ERRINJ_ENABLE;
 342                write_l2actrl_el1(l2actrl);
 343                cortexa53_edac_busy_on_inst();
 344        } else {
 345                edac_printk(KERN_CRIT, EDAC_MOD_STR, "L2 ECC not enabled\n");
 346        }
 347
 348        return count;
 349}
 350
 351static ssize_t cortexa53_edac_inject_L1_show(struct edac_device_ctl_info
 352                                                        *dci, char *data)
 353{
 354        return sprintf(data, "L1CTLR_EL1: [0x%llx]\n\r", read_l1actrl_el1());
 355}
 356
 357static ssize_t cortexa53_edac_inject_L1_store(
 358                struct edac_device_ctl_info *dci, const char *data,
 359                size_t count)
 360{
 361        u64 l1actrl;
 362
 363        if (!data)
 364                return -EFAULT;
 365
 366        l1actrl = read_l1actrl_el1();
 367        l1actrl |= L1_DCACHE_ERRINJ_ENABLE;
 368        write_l1actrl_el1(l1actrl);
 369        cortexa53_edac_busy_on_inst();
 370
 371        return count;
 372}
 373
 374static struct edac_dev_sysfs_attribute cortexa53_edac_sysfs_attributes[] = {
 375        {
 376                .attr = {
 377                        .name = "inject_L2_Cache_Error",
 378                        .mode = (S_IRUGO | S_IWUSR)
 379                },
 380                .show = cortexa53_edac_inject_L2_show,
 381                .store = cortexa53_edac_inject_L2_store},
 382        {
 383                .attr = {
 384                        .name = "inject_L1_Cache_Error",
 385                        .mode = (S_IRUGO | S_IWUSR)
 386                },
 387                .show = cortexa53_edac_inject_L1_show,
 388                .store = cortexa53_edac_inject_L1_store},
 389
 390        /* End of list */
 391        {
 392                .attr = {.name = NULL}
 393        }
 394};
 395
 396static void cortexa53_set_edac_sysfs_attributes(struct edac_device_ctl_info
 397                                                *edac_dev)
 398{
 399        edac_dev->sysfs_attributes = cortexa53_edac_sysfs_attributes;
 400}
 401
 402static int cortex_arm64_edac_probe(struct platform_device *pdev)
 403{
 404        int rc;
 405        struct cortex_arm64_edac *drv;
 406        struct device *dev = &pdev->dev;
 407
 408        drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL);
 409        if (!drv)
 410                return -ENOMEM;
 411
 412        /* Only POLL mode is supported */
 413        edac_op_state = EDAC_OPSTATE_POLL;
 414
 415        drv->edac_ctl = edac_device_alloc_ctl_info(0, "cpu_cache", 1, "L", 2,
 416                                                   0, NULL, 0,
 417                                                   edac_device_alloc_index());
 418        if (IS_ERR(drv->edac_ctl))
 419                return -ENOMEM;
 420
 421        drv->edac_ctl->poll_msec = poll_msec;
 422        drv->edac_ctl->edac_check = cortex_arm64_edac_check;
 423        drv->edac_ctl->dev = dev;
 424        drv->edac_ctl->mod_name = dev_name(dev);
 425        drv->edac_ctl->dev_name = dev_name(dev);
 426        drv->edac_ctl->ctl_name = "cache_err";
 427        platform_set_drvdata(pdev, drv);
 428
 429        cortexa53_set_edac_sysfs_attributes(drv->edac_ctl);
 430
 431        rc = edac_device_add_device(drv->edac_ctl);
 432        if (rc)
 433                edac_device_free_ctl_info(drv->edac_ctl);
 434
 435        return rc;
 436}
 437
 438static int cortex_arm64_edac_remove(struct platform_device *pdev)
 439{
 440        struct cortex_arm64_edac *drv = dev_get_drvdata(&pdev->dev);
 441        struct edac_device_ctl_info *edac_ctl = drv->edac_ctl;
 442
 443        edac_device_del_device(edac_ctl->dev);
 444        edac_device_free_ctl_info(edac_ctl);
 445
 446        return 0;
 447}
 448
 449static const struct of_device_id cortex_arm64_edac_of_match[] = {
 450        { .compatible = "arm,cortex-a57-edac" },
 451        { .compatible = "arm,cortex-a53-edac" },
 452        {}
 453};
 454MODULE_DEVICE_TABLE(of, cortex_arm64_edac_of_match);
 455
 456static struct platform_driver cortex_arm64_edac_driver = {
 457        .probe = cortex_arm64_edac_probe,
 458        .remove = cortex_arm64_edac_remove,
 459        .driver = {
 460                .name = DRV_NAME,
 461                .of_match_table = cortex_arm64_edac_of_match,
 462        },
 463};
 464module_platform_driver(cortex_arm64_edac_driver);
 465
 466MODULE_LICENSE("GPL");
 467MODULE_AUTHOR("Brijesh Singh <brijeshkumar.singh@amd.com>");
 468MODULE_DESCRIPTION("Cortex A57 and A53 EDAC driver");
 469module_param(poll_msec, int, 0444);
 470MODULE_PARM_DESC(poll_msec, "EDAC monitor poll interval in msec");
 471