1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/module.h>
18#include <linux/of_device.h>
19#include <linux/platform_device.h>
20#include <ras/ras_event.h>
21
22#include "edac_core.h"
23
24#define DRV_NAME "cortex_edac"
25
26#define CPUMERRSR_EL1_INDEX(x, y) ((x) & (y))
27#define CPUMERRSR_EL1_BANK_WAY(x, y) (((x) >> 18) & (y))
28#define CPUMERRSR_EL1_RAMID(x) (((x) >> 24) & 0x7f)
29#define CPUMERRSR_EL1_VALID(x) ((x) & (1 << 31))
30#define CPUMERRSR_EL1_REPEAT(x) (((x) >> 32) & 0x7f)
31#define CPUMERRSR_EL1_OTHER(x) (((x) >> 40) & 0xff)
32#define CPUMERRSR_EL1_FATAL(x) ((x) & (1UL << 63))
33#define L1_I_TAG_RAM 0x00
34#define L1_I_DATA_RAM 0x01
35#define L1_D_TAG_RAM 0x08
36#define L1_D_DATA_RAM 0x09
37#define L1_D_DIRTY_RAM 0x14
38#define TLB_RAM 0x18
39
40#define L2MERRSR_EL1_CPUID_WAY(x) (((x) >> 18) & 0xf)
41#define L2MERRSR_EL1_RAMID(x) (((x) >> 24) & 0x7f)
42#define L2MERRSR_EL1_VALID(x) ((x) & (1 << 31))
43#define L2MERRSR_EL1_REPEAT(x) (((x) >> 32) & 0xff)
44#define L2MERRSR_EL1_OTHER(x) (((x) >> 40) & 0xff)
45#define L2MERRSR_EL1_FATAL(x) ((x) & (1UL << 63))
46#define L2_TAG_RAM 0x10
47#define L2_DATA_RAM 0x11
48#define L2_SNOOP_RAM 0x12
49#define L2_DIRTY_RAM 0x14
50#define L2_INCLUSION_PF_RAM 0x18
51
52#define L1_CACHE 0
53#define L2_CACHE 1
54
55#define EDAC_MOD_STR DRV_NAME
56
57
58#define L1_DCACHE_ERRINJ_ENABLE (1 << 6)
59#define L1_DCACHE_ERRINJ_DISABLE (~(1 << 6))
60#define L2_DCACHE_ERRINJ_ENABLE (1 << 29)
61#define L2_DCACHE_ERRINJ_DISABLE (~(1 << 29))
62#define L2_ECC_PROTECTION (1 << 22)
63
64static int poll_msec = 100;
65
66struct cortex_arm64_edac {
67 struct edac_device_ctl_info *edac_ctl;
68};
69
70static inline u64 read_cpumerrsr_el1(void)
71{
72 u64 val;
73
74 asm volatile("mrs %0, s3_1_c15_c2_2" : "=r" (val));
75 return val;
76}
77
78static inline void write_cpumerrsr_el1(u64 val)
79{
80 asm volatile("msr s3_1_c15_c2_2, %0" :: "r" (val));
81}
82
83static inline u64 read_l2merrsr_el1(void)
84{
85 u64 val;
86
87 asm volatile("mrs %0, s3_1_c15_c2_3" : "=r" (val));
88 return val;
89}
90
91static inline void write_l2merrsr_el1(u64 val)
92{
93 asm volatile("msr s3_1_c15_c2_3, %0" :: "r" (val));
94}
95
96static inline void cortexa53_edac_busy_on_inst(void)
97{
98 asm volatile("isb sy");
99}
100
101static inline void cortexa53_edac_busy_on_data(void)
102{
103 asm volatile("dsb sy");
104}
105
106static inline void write_l2actrl_el1(u64 val)
107{
108 asm volatile("msr s3_1_c15_c0_0, %0" :: "r" (val));
109 cortexa53_edac_busy_on_inst();
110}
111
112static inline u64 read_l2actrl_el1(void)
113{
114 u64 val;
115
116 asm volatile("mrs %0, s3_1_c15_c0_0" : "=r" (val));
117 return val;
118}
119
120static inline u64 read_l2ctlr_el1(void)
121{
122 u64 rval;
123
124 asm volatile("mrs %0, S3_1_C11_C0_2" : "=r" (rval));
125 return rval;
126
127}
128
129static inline u64 read_l1actrl_el1(void)
130{
131 u64 rval;
132
133 asm volatile("mrs %0, S3_1_C15_C2_0" : "=r" (rval));
134 return rval;
135}
136
137static inline void write_l1actrl_el1(u64 val)
138{
139 asm volatile("msr S3_1_C15_C2_0, %0" :: "r" (val));
140}
141
142static void parse_cpumerrsr(void *arg)
143{
144 int cpu, partnum, way;
145 unsigned int index = 0;
146 u64 val = read_cpumerrsr_el1();
147 int repeat_err, other_err;
148
149
150 if (CPUMERRSR_EL1_FATAL(val))
151 return;
152
153
154 if (!CPUMERRSR_EL1_VALID(val))
155 return;
156
157 cpu = smp_processor_id();
158 partnum = read_cpuid_part_number();
159 repeat_err = CPUMERRSR_EL1_REPEAT(val);
160 other_err = CPUMERRSR_EL1_OTHER(val);
161
162
163
164 if (partnum == ARM_CPU_PART_CORTEX_A57) {
165 index = CPUMERRSR_EL1_INDEX(val, 0x1ffff);
166 way = CPUMERRSR_EL1_BANK_WAY(val, 0x1f);
167 } else {
168 index = CPUMERRSR_EL1_INDEX(val, 0xfff);
169 way = CPUMERRSR_EL1_BANK_WAY(val, 0x7);
170 }
171
172 edac_printk(KERN_CRIT, EDAC_MOD_STR, "CPU%d L1 error detected!\n", cpu);
173 edac_printk(KERN_CRIT, EDAC_MOD_STR, "index=%#x, RAMID=", index);
174
175 switch (CPUMERRSR_EL1_RAMID(val)) {
176 case L1_I_TAG_RAM:
177 pr_cont("'L1-I Tag RAM' (way %d)", way);
178 break;
179 case L1_I_DATA_RAM:
180 pr_cont("'L1-I Data RAM' (bank %d)", way);
181 break;
182 case L1_D_TAG_RAM:
183 pr_cont("'L1-D Tag RAM' (way %d)", way);
184 break;
185 case L1_D_DATA_RAM:
186 pr_cont("'L1-D Data RAM' (bank %d)", way);
187 break;
188 case L1_D_DIRTY_RAM:
189 pr_cont("'L1 Dirty RAM'");
190 break;
191 case TLB_RAM:
192 pr_cont("'TLB RAM'");
193 break;
194 default:
195 pr_cont("'unknown'");
196 break;
197 }
198
199 pr_cont(", repeat=%d, other=%d (CPUMERRSR_EL1=%#llx)\n", repeat_err,
200 other_err, val);
201
202 trace_mc_event(HW_EVENT_ERR_CORRECTED, "L1 non-fatal error",
203 "", repeat_err, 0, 0, 0, -1, index, 0, 0, DRV_NAME);
204 write_cpumerrsr_el1(0);
205}
206
207static void a57_parse_l2merrsr_way(u8 ramid, u8 val)
208{
209 switch (ramid) {
210 case L2_TAG_RAM:
211 case L2_DATA_RAM:
212 case L2_DIRTY_RAM:
213 pr_cont("(cpu%d tag, way %d)", val / 2, val % 2);
214 break;
215 case L2_SNOOP_RAM:
216 pr_cont("(cpu%d tag, way %d)", (val & 0x6) >> 1,
217 (val & 0x1));
218 break;
219 }
220}
221
222static void a53_parse_l2merrsr_way(u8 ramid, u8 val)
223{
224 switch (ramid) {
225 case L2_TAG_RAM:
226 pr_cont("(way %d)", val);
227 case L2_DATA_RAM:
228 pr_cont("(bank %d)", val);
229 break;
230 case L2_SNOOP_RAM:
231 pr_cont("(cpu%d tag, way %d)", val / 2, val % 4);
232 break;
233 }
234}
235
236static void parse_l2merrsr(void *arg)
237{
238 int cpu, partnum;
239 unsigned int index;
240 int repeat_err, other_err;
241 u64 val = read_l2merrsr_el1();
242
243
244 if (L2MERRSR_EL1_FATAL(val))
245 return;
246
247
248 if (!L2MERRSR_EL1_VALID(val))
249 return;
250
251 cpu = smp_processor_id();
252 partnum = read_cpuid_part_number();
253 repeat_err = L2MERRSR_EL1_REPEAT(val);
254 other_err = L2MERRSR_EL1_OTHER(val);
255
256
257 if (partnum == ARM_CPU_PART_CORTEX_A57)
258 index = val & 0x1ffff;
259 else
260 index = (val >> 3) & 0x3fff;
261
262 edac_printk(KERN_CRIT, EDAC_MOD_STR, "CPU%d L2 error detected!\n", cpu);
263 edac_printk(KERN_CRIT, EDAC_MOD_STR, "index=%#x RAMID=", index);
264
265 switch (L2MERRSR_EL1_RAMID(val)) {
266 case L2_TAG_RAM:
267 pr_cont("'L2 Tag RAM'");
268 break;
269 case L2_DATA_RAM:
270 pr_cont("'L2 Data RAM'");
271 break;
272 case L2_SNOOP_RAM:
273 pr_cont("'L2 Snoop tag RAM'");
274 break;
275 case L2_DIRTY_RAM:
276 pr_cont("'L2 Dirty RAM'");
277 break;
278 case L2_INCLUSION_PF_RAM:
279 pr_cont("'L2 inclusion PF RAM'");
280 break;
281 default:
282 pr_cont("unknown");
283 break;
284 }
285
286
287 if (partnum == ARM_CPU_PART_CORTEX_A57)
288 a57_parse_l2merrsr_way(L2MERRSR_EL1_RAMID(val),
289 L2MERRSR_EL1_CPUID_WAY(val));
290 else
291 a53_parse_l2merrsr_way(L2MERRSR_EL1_RAMID(val),
292 L2MERRSR_EL1_CPUID_WAY(val));
293
294 pr_cont(", repeat=%d, other=%d (L2MERRSR_EL1=%#llx)\n", repeat_err,
295 other_err, val);
296 trace_mc_event(HW_EVENT_ERR_CORRECTED, "L2 non-fatal error",
297 "", repeat_err, 0, 0, 0, -1, index, 0, 0, DRV_NAME);
298 write_l2merrsr_el1(0);
299}
300
301static void cortex_arm64_edac_check(struct edac_device_ctl_info *edac_ctl)
302{
303 int cpu;
304 struct cpumask cluster_mask, old_mask;
305
306 cpumask_clear(&cluster_mask);
307 cpumask_clear(&old_mask);
308
309 get_online_cpus();
310 for_each_online_cpu(cpu) {
311
312 smp_call_function_single(cpu, parse_cpumerrsr, NULL, 0);
313 cpumask_copy(&cluster_mask, topology_core_cpumask(cpu));
314 if (cpumask_equal(&cluster_mask, &old_mask))
315 continue;
316 cpumask_copy(&old_mask, &cluster_mask);
317
318 smp_call_function_any(&cluster_mask, parse_l2merrsr, NULL, 0);
319 }
320 put_online_cpus();
321}
322
323static ssize_t cortexa53_edac_inject_L2_show(struct edac_device_ctl_info
324 *dci, char *data)
325{
326 return sprintf(data, "L2ACTLR_EL1: [0x%llx]\n\r", read_l2actrl_el1());
327}
328
329static ssize_t cortexa53_edac_inject_L2_store(
330 struct edac_device_ctl_info *dci, const char *data,
331 size_t count)
332{
333 u64 l2actrl, l2ecc;
334
335 if (!data)
336 return -EFAULT;
337
338 l2ecc = read_l2ctlr_el1();
339 if ((l2ecc & L2_ECC_PROTECTION)) {
340 l2actrl = read_l2actrl_el1();
341 l2actrl = l2actrl | L2_DCACHE_ERRINJ_ENABLE;
342 write_l2actrl_el1(l2actrl);
343 cortexa53_edac_busy_on_inst();
344 } else {
345 edac_printk(KERN_CRIT, EDAC_MOD_STR, "L2 ECC not enabled\n");
346 }
347
348 return count;
349}
350
351static ssize_t cortexa53_edac_inject_L1_show(struct edac_device_ctl_info
352 *dci, char *data)
353{
354 return sprintf(data, "L1CTLR_EL1: [0x%llx]\n\r", read_l1actrl_el1());
355}
356
357static ssize_t cortexa53_edac_inject_L1_store(
358 struct edac_device_ctl_info *dci, const char *data,
359 size_t count)
360{
361 u64 l1actrl;
362
363 if (!data)
364 return -EFAULT;
365
366 l1actrl = read_l1actrl_el1();
367 l1actrl |= L1_DCACHE_ERRINJ_ENABLE;
368 write_l1actrl_el1(l1actrl);
369 cortexa53_edac_busy_on_inst();
370
371 return count;
372}
373
374static struct edac_dev_sysfs_attribute cortexa53_edac_sysfs_attributes[] = {
375 {
376 .attr = {
377 .name = "inject_L2_Cache_Error",
378 .mode = (S_IRUGO | S_IWUSR)
379 },
380 .show = cortexa53_edac_inject_L2_show,
381 .store = cortexa53_edac_inject_L2_store},
382 {
383 .attr = {
384 .name = "inject_L1_Cache_Error",
385 .mode = (S_IRUGO | S_IWUSR)
386 },
387 .show = cortexa53_edac_inject_L1_show,
388 .store = cortexa53_edac_inject_L1_store},
389
390
391 {
392 .attr = {.name = NULL}
393 }
394};
395
396static void cortexa53_set_edac_sysfs_attributes(struct edac_device_ctl_info
397 *edac_dev)
398{
399 edac_dev->sysfs_attributes = cortexa53_edac_sysfs_attributes;
400}
401
402static int cortex_arm64_edac_probe(struct platform_device *pdev)
403{
404 int rc;
405 struct cortex_arm64_edac *drv;
406 struct device *dev = &pdev->dev;
407
408 drv = devm_kzalloc(dev, sizeof(*drv), GFP_KERNEL);
409 if (!drv)
410 return -ENOMEM;
411
412
413 edac_op_state = EDAC_OPSTATE_POLL;
414
415 drv->edac_ctl = edac_device_alloc_ctl_info(0, "cpu_cache", 1, "L", 2,
416 0, NULL, 0,
417 edac_device_alloc_index());
418 if (IS_ERR(drv->edac_ctl))
419 return -ENOMEM;
420
421 drv->edac_ctl->poll_msec = poll_msec;
422 drv->edac_ctl->edac_check = cortex_arm64_edac_check;
423 drv->edac_ctl->dev = dev;
424 drv->edac_ctl->mod_name = dev_name(dev);
425 drv->edac_ctl->dev_name = dev_name(dev);
426 drv->edac_ctl->ctl_name = "cache_err";
427 platform_set_drvdata(pdev, drv);
428
429 cortexa53_set_edac_sysfs_attributes(drv->edac_ctl);
430
431 rc = edac_device_add_device(drv->edac_ctl);
432 if (rc)
433 edac_device_free_ctl_info(drv->edac_ctl);
434
435 return rc;
436}
437
438static int cortex_arm64_edac_remove(struct platform_device *pdev)
439{
440 struct cortex_arm64_edac *drv = dev_get_drvdata(&pdev->dev);
441 struct edac_device_ctl_info *edac_ctl = drv->edac_ctl;
442
443 edac_device_del_device(edac_ctl->dev);
444 edac_device_free_ctl_info(edac_ctl);
445
446 return 0;
447}
448
449static const struct of_device_id cortex_arm64_edac_of_match[] = {
450 { .compatible = "arm,cortex-a57-edac" },
451 { .compatible = "arm,cortex-a53-edac" },
452 {}
453};
454MODULE_DEVICE_TABLE(of, cortex_arm64_edac_of_match);
455
456static struct platform_driver cortex_arm64_edac_driver = {
457 .probe = cortex_arm64_edac_probe,
458 .remove = cortex_arm64_edac_remove,
459 .driver = {
460 .name = DRV_NAME,
461 .of_match_table = cortex_arm64_edac_of_match,
462 },
463};
464module_platform_driver(cortex_arm64_edac_driver);
465
466MODULE_LICENSE("GPL");
467MODULE_AUTHOR("Brijesh Singh <brijeshkumar.singh@amd.com>");
468MODULE_DESCRIPTION("Cortex A57 and A53 EDAC driver");
469module_param(poll_msec, int, 0444);
470MODULE_PARM_DESC(poll_msec, "EDAC monitor poll interval in msec");
471