1
2
3
4
5
6
7
8
9#undef DEBUG
10#define pr_fmt(fmt) "mce: " fmt
11
12#include <linux/hardirq.h>
13#include <linux/types.h>
14#include <linux/ptrace.h>
15#include <linux/percpu.h>
16#include <linux/export.h>
17#include <linux/irq_work.h>
18
19#include <asm/machdep.h>
20#include <asm/mce.h>
21#include <asm/nmi.h>
22
23static DEFINE_PER_CPU(int, mce_nest_count);
24static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
25
26
27static DEFINE_PER_CPU(int, mce_queue_count);
28static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
29
30
31static DEFINE_PER_CPU(int, mce_ue_count);
32static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
33 mce_ue_event_queue);
34
35static void machine_check_process_queued_event(struct irq_work *work);
36void machine_check_ue_event(struct machine_check_event *evt);
37static void machine_process_ue_event(struct work_struct *work);
38
39static struct irq_work mce_event_process_work = {
40 .func = machine_check_process_queued_event,
41};
42
43DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
44
45static void mce_set_error_info(struct machine_check_event *mce,
46 struct mce_error_info *mce_err)
47{
48 mce->error_type = mce_err->error_type;
49 switch (mce_err->error_type) {
50 case MCE_ERROR_TYPE_UE:
51 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
52 break;
53 case MCE_ERROR_TYPE_SLB:
54 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
55 break;
56 case MCE_ERROR_TYPE_ERAT:
57 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
58 break;
59 case MCE_ERROR_TYPE_TLB:
60 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
61 break;
62 case MCE_ERROR_TYPE_USER:
63 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
64 break;
65 case MCE_ERROR_TYPE_RA:
66 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
67 break;
68 case MCE_ERROR_TYPE_LINK:
69 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
70 break;
71 case MCE_ERROR_TYPE_UNKNOWN:
72 default:
73 break;
74 }
75}
76
77
78
79
80
81void save_mce_event(struct pt_regs *regs, long handled,
82 struct mce_error_info *mce_err,
83 uint64_t nip, uint64_t addr, uint64_t phys_addr)
84{
85 int index = __this_cpu_inc_return(mce_nest_count) - 1;
86 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
87
88
89
90
91
92
93 if (index >= MAX_MC_EVT)
94 return;
95
96
97 mce->version = MCE_V1;
98 mce->srr0 = nip;
99 mce->srr1 = regs->msr;
100 mce->gpr3 = regs->gpr[3];
101 mce->in_use = 1;
102 mce->cpu = get_paca()->paca_index;
103
104
105 if (handled && (regs->msr & MSR_RI))
106 mce->disposition = MCE_DISPOSITION_RECOVERED;
107 else
108 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
109
110 mce->initiator = mce_err->initiator;
111 mce->severity = mce_err->severity;
112 mce->sync_error = mce_err->sync_error;
113 mce->error_class = mce_err->error_class;
114
115
116
117
118 mce_set_error_info(mce, mce_err);
119
120 if (!addr)
121 return;
122
123 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
124 mce->u.tlb_error.effective_address_provided = true;
125 mce->u.tlb_error.effective_address = addr;
126 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
127 mce->u.slb_error.effective_address_provided = true;
128 mce->u.slb_error.effective_address = addr;
129 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
130 mce->u.erat_error.effective_address_provided = true;
131 mce->u.erat_error.effective_address = addr;
132 } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
133 mce->u.user_error.effective_address_provided = true;
134 mce->u.user_error.effective_address = addr;
135 } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
136 mce->u.ra_error.effective_address_provided = true;
137 mce->u.ra_error.effective_address = addr;
138 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
139 mce->u.link_error.effective_address_provided = true;
140 mce->u.link_error.effective_address = addr;
141 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
142 mce->u.ue_error.effective_address_provided = true;
143 mce->u.ue_error.effective_address = addr;
144 if (phys_addr != ULONG_MAX) {
145 mce->u.ue_error.physical_address_provided = true;
146 mce->u.ue_error.physical_address = phys_addr;
147 machine_check_ue_event(mce);
148 }
149 }
150 return;
151}
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170int get_mce_event(struct machine_check_event *mce, bool release)
171{
172 int index = __this_cpu_read(mce_nest_count) - 1;
173 struct machine_check_event *mc_evt;
174 int ret = 0;
175
176
177 if (index < 0)
178 return ret;
179
180
181 if (index < MAX_MC_EVT) {
182 mc_evt = this_cpu_ptr(&mce_event[index]);
183
184 if (mce)
185 *mce = *mc_evt;
186 if (release)
187 mc_evt->in_use = 0;
188 ret = 1;
189 }
190
191 if (release)
192 __this_cpu_dec(mce_nest_count);
193
194 return ret;
195}
196
197void release_mce_event(void)
198{
199 get_mce_event(NULL, true);
200}
201
202
203
204
205
206void machine_check_ue_event(struct machine_check_event *evt)
207{
208 int index;
209
210 index = __this_cpu_inc_return(mce_ue_count) - 1;
211
212 if (index >= MAX_MC_EVT) {
213 __this_cpu_dec(mce_ue_count);
214 return;
215 }
216 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
217
218
219 schedule_work(&mce_ue_event_work);
220}
221
222
223
224
225void machine_check_queue_event(void)
226{
227 int index;
228 struct machine_check_event evt;
229
230 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
231 return;
232
233 index = __this_cpu_inc_return(mce_queue_count) - 1;
234
235 if (index >= MAX_MC_EVT) {
236 __this_cpu_dec(mce_queue_count);
237 return;
238 }
239 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
240
241
242 irq_work_queue(&mce_event_process_work);
243}
244
245
246
247
248static void machine_process_ue_event(struct work_struct *work)
249{
250 int index;
251 struct machine_check_event *evt;
252
253 while (__this_cpu_read(mce_ue_count) > 0) {
254 index = __this_cpu_read(mce_ue_count) - 1;
255 evt = this_cpu_ptr(&mce_ue_event_queue[index]);
256#ifdef CONFIG_MEMORY_FAILURE
257
258
259
260
261 if (evt->error_type == MCE_ERROR_TYPE_UE) {
262 if (evt->u.ue_error.physical_address_provided) {
263 unsigned long pfn;
264
265 pfn = evt->u.ue_error.physical_address >>
266 PAGE_SHIFT;
267 memory_failure(pfn, 0);
268 } else
269 pr_warn("Failed to identify bad address from "
270 "where the uncorrectable error (UE) "
271 "was generated\n");
272 }
273#endif
274 __this_cpu_dec(mce_ue_count);
275 }
276}
277
278
279
280
281static void machine_check_process_queued_event(struct irq_work *work)
282{
283 int index;
284 struct machine_check_event *evt;
285
286 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
287
288
289
290
291
292 while (__this_cpu_read(mce_queue_count) > 0) {
293 index = __this_cpu_read(mce_queue_count) - 1;
294 evt = this_cpu_ptr(&mce_event_queue[index]);
295 machine_check_print_event_info(evt, false, false);
296 __this_cpu_dec(mce_queue_count);
297 }
298}
299
300void machine_check_print_event_info(struct machine_check_event *evt,
301 bool user_mode, bool in_guest)
302{
303 const char *level, *sevstr, *subtype, *err_type;
304 uint64_t ea = 0, pa = 0;
305 int n = 0;
306 char dar_str[50];
307 char pa_str[50];
308 static const char *mc_ue_types[] = {
309 "Indeterminate",
310 "Instruction fetch",
311 "Page table walk ifetch",
312 "Load/Store",
313 "Page table walk Load/Store",
314 };
315 static const char *mc_slb_types[] = {
316 "Indeterminate",
317 "Parity",
318 "Multihit",
319 };
320 static const char *mc_erat_types[] = {
321 "Indeterminate",
322 "Parity",
323 "Multihit",
324 };
325 static const char *mc_tlb_types[] = {
326 "Indeterminate",
327 "Parity",
328 "Multihit",
329 };
330 static const char *mc_user_types[] = {
331 "Indeterminate",
332 "tlbie(l) invalid",
333 };
334 static const char *mc_ra_types[] = {
335 "Indeterminate",
336 "Instruction fetch (bad)",
337 "Instruction fetch (foreign)",
338 "Page table walk ifetch (bad)",
339 "Page table walk ifetch (foreign)",
340 "Load (bad)",
341 "Store (bad)",
342 "Page table walk Load/Store (bad)",
343 "Page table walk Load/Store (foreign)",
344 "Load/Store (foreign)",
345 };
346 static const char *mc_link_types[] = {
347 "Indeterminate",
348 "Instruction fetch (timeout)",
349 "Page table walk ifetch (timeout)",
350 "Load (timeout)",
351 "Store (timeout)",
352 "Page table walk Load/Store (timeout)",
353 };
354 static const char *mc_error_class[] = {
355 "Unknown",
356 "Hardware error",
357 "Probable Hardware error (some chance of software cause)",
358 "Software error",
359 "Probable Software error (some chance of hardware cause)",
360 };
361
362
363 if (evt->version != MCE_V1) {
364 pr_err("Machine Check Exception, Unknown event version %d !\n",
365 evt->version);
366 return;
367 }
368 switch (evt->severity) {
369 case MCE_SEV_NO_ERROR:
370 level = KERN_INFO;
371 sevstr = "Harmless";
372 break;
373 case MCE_SEV_WARNING:
374 level = KERN_WARNING;
375 sevstr = "Warning";
376 break;
377 case MCE_SEV_SEVERE:
378 level = KERN_ERR;
379 sevstr = "Severe";
380 break;
381 case MCE_SEV_FATAL:
382 default:
383 level = KERN_ERR;
384 sevstr = "Fatal";
385 break;
386 }
387
388 switch (evt->error_type) {
389 case MCE_ERROR_TYPE_UE:
390 err_type = "UE";
391 subtype = evt->u.ue_error.ue_error_type <
392 ARRAY_SIZE(mc_ue_types) ?
393 mc_ue_types[evt->u.ue_error.ue_error_type]
394 : "Unknown";
395 if (evt->u.ue_error.effective_address_provided)
396 ea = evt->u.ue_error.effective_address;
397 if (evt->u.ue_error.physical_address_provided)
398 pa = evt->u.ue_error.physical_address;
399 break;
400 case MCE_ERROR_TYPE_SLB:
401 err_type = "SLB";
402 subtype = evt->u.slb_error.slb_error_type <
403 ARRAY_SIZE(mc_slb_types) ?
404 mc_slb_types[evt->u.slb_error.slb_error_type]
405 : "Unknown";
406 if (evt->u.slb_error.effective_address_provided)
407 ea = evt->u.slb_error.effective_address;
408 break;
409 case MCE_ERROR_TYPE_ERAT:
410 err_type = "ERAT";
411 subtype = evt->u.erat_error.erat_error_type <
412 ARRAY_SIZE(mc_erat_types) ?
413 mc_erat_types[evt->u.erat_error.erat_error_type]
414 : "Unknown";
415 if (evt->u.erat_error.effective_address_provided)
416 ea = evt->u.erat_error.effective_address;
417 break;
418 case MCE_ERROR_TYPE_TLB:
419 err_type = "TLB";
420 subtype = evt->u.tlb_error.tlb_error_type <
421 ARRAY_SIZE(mc_tlb_types) ?
422 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
423 : "Unknown";
424 if (evt->u.tlb_error.effective_address_provided)
425 ea = evt->u.tlb_error.effective_address;
426 break;
427 case MCE_ERROR_TYPE_USER:
428 err_type = "User";
429 subtype = evt->u.user_error.user_error_type <
430 ARRAY_SIZE(mc_user_types) ?
431 mc_user_types[evt->u.user_error.user_error_type]
432 : "Unknown";
433 if (evt->u.user_error.effective_address_provided)
434 ea = evt->u.user_error.effective_address;
435 break;
436 case MCE_ERROR_TYPE_RA:
437 err_type = "Real address";
438 subtype = evt->u.ra_error.ra_error_type <
439 ARRAY_SIZE(mc_ra_types) ?
440 mc_ra_types[evt->u.ra_error.ra_error_type]
441 : "Unknown";
442 if (evt->u.ra_error.effective_address_provided)
443 ea = evt->u.ra_error.effective_address;
444 break;
445 case MCE_ERROR_TYPE_LINK:
446 err_type = "Link";
447 subtype = evt->u.link_error.link_error_type <
448 ARRAY_SIZE(mc_link_types) ?
449 mc_link_types[evt->u.link_error.link_error_type]
450 : "Unknown";
451 if (evt->u.link_error.effective_address_provided)
452 ea = evt->u.link_error.effective_address;
453 break;
454 default:
455 case MCE_ERROR_TYPE_UNKNOWN:
456 err_type = "Unknown";
457 subtype = "";
458 break;
459 }
460
461 dar_str[0] = pa_str[0] = '\0';
462 if (ea && evt->srr0 != ea) {
463
464 n = sprintf(dar_str, "DAR: %016llx ", ea);
465 if (pa)
466 sprintf(dar_str + n, "paddr: %016llx ", pa);
467 } else if (pa) {
468 sprintf(pa_str, " paddr: %016llx", pa);
469 }
470
471 printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
472 level, evt->cpu, sevstr, in_guest ? "Guest" : "Host",
473 err_type, subtype, dar_str,
474 evt->disposition == MCE_DISPOSITION_RECOVERED ?
475 "Recovered" : "Not recovered");
476
477 if (in_guest || user_mode) {
478 printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
479 level, evt->cpu, current->pid, current->comm,
480 in_guest ? "Guest " : "", evt->srr0, pa_str);
481 } else {
482 printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
483 level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
484 }
485
486 subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
487 mc_error_class[evt->error_class] : "Unknown";
488 printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
489}
490EXPORT_SYMBOL_GPL(machine_check_print_event_info);
491
492
493
494
495
496
497long machine_check_early(struct pt_regs *regs)
498{
499 long handled = 0;
500
501 hv_nmi_check_nonrecoverable(regs);
502
503
504
505
506 if (ppc_md.machine_check_early)
507 handled = ppc_md.machine_check_early(regs);
508 return handled;
509}
510
511
512static enum {
513 DTRIG_UNKNOWN,
514 DTRIG_VECTOR_CI,
515 DTRIG_SUSPEND_ESCAPE,
516} hmer_debug_trig_function;
517
518static int init_debug_trig_function(void)
519{
520 int pvr;
521 struct device_node *cpun;
522 struct property *prop = NULL;
523 const char *str;
524
525
526 preempt_disable();
527 cpun = of_get_cpu_node(smp_processor_id(), NULL);
528 if (cpun) {
529 of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
530 prop, str) {
531 if (strcmp(str, "bit17-vector-ci-load") == 0)
532 hmer_debug_trig_function = DTRIG_VECTOR_CI;
533 else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
534 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
535 }
536 of_node_put(cpun);
537 }
538 preempt_enable();
539
540
541 if (prop)
542 goto out;
543
544 pvr = mfspr(SPRN_PVR);
545
546 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
547
548 if ((pvr & 0xfff) >= 0x202)
549 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
550
551 else if ((pvr & 0xfff) >= 0x200)
552 hmer_debug_trig_function = DTRIG_VECTOR_CI;
553 }
554
555 out:
556 switch (hmer_debug_trig_function) {
557 case DTRIG_VECTOR_CI:
558 pr_debug("HMI debug trigger used for vector CI load\n");
559 break;
560 case DTRIG_SUSPEND_ESCAPE:
561 pr_debug("HMI debug trigger used for TM suspend escape\n");
562 break;
563 default:
564 break;
565 }
566 return 0;
567}
568__initcall(init_debug_trig_function);
569
570
571
572
573
574
575
576
577long hmi_handle_debugtrig(struct pt_regs *regs)
578{
579 unsigned long hmer = mfspr(SPRN_HMER);
580 long ret = 0;
581
582
583 if (!((hmer & HMER_DEBUG_TRIG)
584 && hmer_debug_trig_function != DTRIG_UNKNOWN))
585 return -1;
586
587 hmer &= ~HMER_DEBUG_TRIG;
588
589 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
590
591 switch (hmer_debug_trig_function) {
592 case DTRIG_VECTOR_CI:
593
594
595
596
597
598 if (regs && user_mode(regs))
599 ret = local_paca->hmi_p9_special_emu = 1;
600
601 break;
602
603 default:
604 break;
605 }
606
607
608
609
610 if (hmer & mfspr(SPRN_HMEER))
611 return -1;
612
613 return ret;
614}
615
616
617
618
619long hmi_exception_realmode(struct pt_regs *regs)
620{
621 int ret;
622
623 __this_cpu_inc(irq_stat.hmi_exceptions);
624
625 ret = hmi_handle_debugtrig(regs);
626 if (ret >= 0)
627 return ret;
628
629 wait_for_subcore_guest_exit();
630
631 if (ppc_md.hmi_exception_early)
632 ppc_md.hmi_exception_early(regs);
633
634 wait_for_tb_resync();
635
636 return 1;
637}
638