1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#undef DEBUG
23#define pr_fmt(fmt) "mce: " fmt
24
25#include <linux/hardirq.h>
26#include <linux/types.h>
27#include <linux/ptrace.h>
28#include <linux/percpu.h>
29#include <linux/export.h>
30#include <linux/irq_work.h>
31
32#include <asm/machdep.h>
33#include <asm/mce.h>
34
35static DEFINE_PER_CPU(int, mce_nest_count);
36static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
37
38
39static DEFINE_PER_CPU(int, mce_queue_count);
40static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
41
42
43static DEFINE_PER_CPU(int, mce_ue_count);
44static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
45 mce_ue_event_queue);
46
47static void machine_check_process_queued_event(struct irq_work *work);
48void machine_check_ue_event(struct machine_check_event *evt);
49static void machine_process_ue_event(struct work_struct *work);
50
51static struct irq_work mce_event_process_work = {
52 .func = machine_check_process_queued_event,
53};
54
55DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
56
57static void mce_set_error_info(struct machine_check_event *mce,
58 struct mce_error_info *mce_err)
59{
60 mce->error_type = mce_err->error_type;
61 switch (mce_err->error_type) {
62 case MCE_ERROR_TYPE_UE:
63 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
64 break;
65 case MCE_ERROR_TYPE_SLB:
66 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
67 break;
68 case MCE_ERROR_TYPE_ERAT:
69 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
70 break;
71 case MCE_ERROR_TYPE_TLB:
72 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
73 break;
74 case MCE_ERROR_TYPE_USER:
75 mce->u.user_error.user_error_type = mce_err->u.user_error_type;
76 break;
77 case MCE_ERROR_TYPE_RA:
78 mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
79 break;
80 case MCE_ERROR_TYPE_LINK:
81 mce->u.link_error.link_error_type = mce_err->u.link_error_type;
82 break;
83 case MCE_ERROR_TYPE_UNKNOWN:
84 default:
85 break;
86 }
87}
88
89
90
91
92
93void save_mce_event(struct pt_regs *regs, long handled,
94 struct mce_error_info *mce_err,
95 uint64_t nip, uint64_t addr, uint64_t phys_addr)
96{
97 int index = __this_cpu_inc_return(mce_nest_count) - 1;
98 struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
99
100
101
102
103
104
105 if (index >= MAX_MC_EVT)
106 return;
107
108
109 mce->version = MCE_V1;
110 mce->srr0 = nip;
111 mce->srr1 = regs->msr;
112 mce->gpr3 = regs->gpr[3];
113 mce->in_use = 1;
114
115
116 if (handled && (regs->msr & MSR_RI))
117 mce->disposition = MCE_DISPOSITION_RECOVERED;
118 else
119 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
120
121 mce->initiator = mce_err->initiator;
122 mce->severity = mce_err->severity;
123
124
125
126
127 mce_set_error_info(mce, mce_err);
128
129 if (!addr)
130 return;
131
132 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
133 mce->u.tlb_error.effective_address_provided = true;
134 mce->u.tlb_error.effective_address = addr;
135 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
136 mce->u.slb_error.effective_address_provided = true;
137 mce->u.slb_error.effective_address = addr;
138 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
139 mce->u.erat_error.effective_address_provided = true;
140 mce->u.erat_error.effective_address = addr;
141 } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
142 mce->u.user_error.effective_address_provided = true;
143 mce->u.user_error.effective_address = addr;
144 } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
145 mce->u.ra_error.effective_address_provided = true;
146 mce->u.ra_error.effective_address = addr;
147 } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
148 mce->u.link_error.effective_address_provided = true;
149 mce->u.link_error.effective_address = addr;
150 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
151 mce->u.ue_error.effective_address_provided = true;
152 mce->u.ue_error.effective_address = addr;
153 if (phys_addr != ULONG_MAX) {
154 mce->u.ue_error.physical_address_provided = true;
155 mce->u.ue_error.physical_address = phys_addr;
156 machine_check_ue_event(mce);
157 }
158 }
159 return;
160}
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179int get_mce_event(struct machine_check_event *mce, bool release)
180{
181 int index = __this_cpu_read(mce_nest_count) - 1;
182 struct machine_check_event *mc_evt;
183 int ret = 0;
184
185
186 if (index < 0)
187 return ret;
188
189
190 if (index < MAX_MC_EVT) {
191 mc_evt = this_cpu_ptr(&mce_event[index]);
192
193 if (mce)
194 *mce = *mc_evt;
195 if (release)
196 mc_evt->in_use = 0;
197 ret = 1;
198 }
199
200 if (release)
201 __this_cpu_dec(mce_nest_count);
202
203 return ret;
204}
205
206void release_mce_event(void)
207{
208 get_mce_event(NULL, true);
209}
210
211
212
213
214
215void machine_check_ue_event(struct machine_check_event *evt)
216{
217 int index;
218
219 index = __this_cpu_inc_return(mce_ue_count) - 1;
220
221 if (index >= MAX_MC_EVT) {
222 __this_cpu_dec(mce_ue_count);
223 return;
224 }
225 memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
226
227
228 schedule_work(&mce_ue_event_work);
229}
230
231
232
233
234void machine_check_queue_event(void)
235{
236 int index;
237 struct machine_check_event evt;
238
239 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
240 return;
241
242 index = __this_cpu_inc_return(mce_queue_count) - 1;
243
244 if (index >= MAX_MC_EVT) {
245 __this_cpu_dec(mce_queue_count);
246 return;
247 }
248 memcpy(this_cpu_ptr(&mce_event_queue[index]), &evt, sizeof(evt));
249
250
251 irq_work_queue(&mce_event_process_work);
252}
253
254
255
256
257static void machine_process_ue_event(struct work_struct *work)
258{
259 int index;
260 struct machine_check_event *evt;
261
262 while (__this_cpu_read(mce_ue_count) > 0) {
263 index = __this_cpu_read(mce_ue_count) - 1;
264 evt = this_cpu_ptr(&mce_ue_event_queue[index]);
265#ifdef CONFIG_MEMORY_FAILURE
266
267
268
269
270 if (evt->error_type == MCE_ERROR_TYPE_UE) {
271 if (evt->u.ue_error.physical_address_provided) {
272 unsigned long pfn;
273
274 pfn = evt->u.ue_error.physical_address >>
275 PAGE_SHIFT;
276 memory_failure(pfn, 0);
277 } else
278 pr_warn("Failed to identify bad address from "
279 "where the uncorrectable error (UE) "
280 "was generated\n");
281 }
282#endif
283 __this_cpu_dec(mce_ue_count);
284 }
285}
286
287
288
289
290static void machine_check_process_queued_event(struct irq_work *work)
291{
292 int index;
293 struct machine_check_event *evt;
294
295 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
296
297
298
299
300
301 while (__this_cpu_read(mce_queue_count) > 0) {
302 index = __this_cpu_read(mce_queue_count) - 1;
303 evt = this_cpu_ptr(&mce_event_queue[index]);
304 machine_check_print_event_info(evt, false);
305 __this_cpu_dec(mce_queue_count);
306 }
307}
308
309void machine_check_print_event_info(struct machine_check_event *evt,
310 bool user_mode)
311{
312 const char *level, *sevstr, *subtype;
313 static const char *mc_ue_types[] = {
314 "Indeterminate",
315 "Instruction fetch",
316 "Page table walk ifetch",
317 "Load/Store",
318 "Page table walk Load/Store",
319 };
320 static const char *mc_slb_types[] = {
321 "Indeterminate",
322 "Parity",
323 "Multihit",
324 };
325 static const char *mc_erat_types[] = {
326 "Indeterminate",
327 "Parity",
328 "Multihit",
329 };
330 static const char *mc_tlb_types[] = {
331 "Indeterminate",
332 "Parity",
333 "Multihit",
334 };
335 static const char *mc_user_types[] = {
336 "Indeterminate",
337 "tlbie(l) invalid",
338 };
339 static const char *mc_ra_types[] = {
340 "Indeterminate",
341 "Instruction fetch (bad)",
342 "Instruction fetch (foreign)",
343 "Page table walk ifetch (bad)",
344 "Page table walk ifetch (foreign)",
345 "Load (bad)",
346 "Store (bad)",
347 "Page table walk Load/Store (bad)",
348 "Page table walk Load/Store (foreign)",
349 "Load/Store (foreign)",
350 };
351 static const char *mc_link_types[] = {
352 "Indeterminate",
353 "Instruction fetch (timeout)",
354 "Page table walk ifetch (timeout)",
355 "Load (timeout)",
356 "Store (timeout)",
357 "Page table walk Load/Store (timeout)",
358 };
359
360
361 if (evt->version != MCE_V1) {
362 pr_err("Machine Check Exception, Unknown event version %d !\n",
363 evt->version);
364 return;
365 }
366 switch (evt->severity) {
367 case MCE_SEV_NO_ERROR:
368 level = KERN_INFO;
369 sevstr = "Harmless";
370 break;
371 case MCE_SEV_WARNING:
372 level = KERN_WARNING;
373 sevstr = "";
374 break;
375 case MCE_SEV_ERROR_SYNC:
376 level = KERN_ERR;
377 sevstr = "Severe";
378 break;
379 case MCE_SEV_FATAL:
380 default:
381 level = KERN_ERR;
382 sevstr = "Fatal";
383 break;
384 }
385
386 printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
387 evt->disposition == MCE_DISPOSITION_RECOVERED ?
388 "Recovered" : "Not recovered");
389
390 if (user_mode) {
391 printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
392 evt->srr0, current->pid, current->comm);
393 } else {
394 printk("%s NIP [%016llx]: %pS\n", level, evt->srr0,
395 (void *)evt->srr0);
396 }
397
398 printk("%s Initiator: %s\n", level,
399 evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
400 switch (evt->error_type) {
401 case MCE_ERROR_TYPE_UE:
402 subtype = evt->u.ue_error.ue_error_type <
403 ARRAY_SIZE(mc_ue_types) ?
404 mc_ue_types[evt->u.ue_error.ue_error_type]
405 : "Unknown";
406 printk("%s Error type: UE [%s]\n", level, subtype);
407 if (evt->u.ue_error.effective_address_provided)
408 printk("%s Effective address: %016llx\n",
409 level, evt->u.ue_error.effective_address);
410 if (evt->u.ue_error.physical_address_provided)
411 printk("%s Physical address: %016llx\n",
412 level, evt->u.ue_error.physical_address);
413 break;
414 case MCE_ERROR_TYPE_SLB:
415 subtype = evt->u.slb_error.slb_error_type <
416 ARRAY_SIZE(mc_slb_types) ?
417 mc_slb_types[evt->u.slb_error.slb_error_type]
418 : "Unknown";
419 printk("%s Error type: SLB [%s]\n", level, subtype);
420 if (evt->u.slb_error.effective_address_provided)
421 printk("%s Effective address: %016llx\n",
422 level, evt->u.slb_error.effective_address);
423 break;
424 case MCE_ERROR_TYPE_ERAT:
425 subtype = evt->u.erat_error.erat_error_type <
426 ARRAY_SIZE(mc_erat_types) ?
427 mc_erat_types[evt->u.erat_error.erat_error_type]
428 : "Unknown";
429 printk("%s Error type: ERAT [%s]\n", level, subtype);
430 if (evt->u.erat_error.effective_address_provided)
431 printk("%s Effective address: %016llx\n",
432 level, evt->u.erat_error.effective_address);
433 break;
434 case MCE_ERROR_TYPE_TLB:
435 subtype = evt->u.tlb_error.tlb_error_type <
436 ARRAY_SIZE(mc_tlb_types) ?
437 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
438 : "Unknown";
439 printk("%s Error type: TLB [%s]\n", level, subtype);
440 if (evt->u.tlb_error.effective_address_provided)
441 printk("%s Effective address: %016llx\n",
442 level, evt->u.tlb_error.effective_address);
443 break;
444 case MCE_ERROR_TYPE_USER:
445 subtype = evt->u.user_error.user_error_type <
446 ARRAY_SIZE(mc_user_types) ?
447 mc_user_types[evt->u.user_error.user_error_type]
448 : "Unknown";
449 printk("%s Error type: User [%s]\n", level, subtype);
450 if (evt->u.user_error.effective_address_provided)
451 printk("%s Effective address: %016llx\n",
452 level, evt->u.user_error.effective_address);
453 break;
454 case MCE_ERROR_TYPE_RA:
455 subtype = evt->u.ra_error.ra_error_type <
456 ARRAY_SIZE(mc_ra_types) ?
457 mc_ra_types[evt->u.ra_error.ra_error_type]
458 : "Unknown";
459 printk("%s Error type: Real address [%s]\n", level, subtype);
460 if (evt->u.ra_error.effective_address_provided)
461 printk("%s Effective address: %016llx\n",
462 level, evt->u.ra_error.effective_address);
463 break;
464 case MCE_ERROR_TYPE_LINK:
465 subtype = evt->u.link_error.link_error_type <
466 ARRAY_SIZE(mc_link_types) ?
467 mc_link_types[evt->u.link_error.link_error_type]
468 : "Unknown";
469 printk("%s Error type: Link [%s]\n", level, subtype);
470 if (evt->u.link_error.effective_address_provided)
471 printk("%s Effective address: %016llx\n",
472 level, evt->u.link_error.effective_address);
473 break;
474 default:
475 case MCE_ERROR_TYPE_UNKNOWN:
476 printk("%s Error type: Unknown\n", level);
477 break;
478 }
479}
480EXPORT_SYMBOL_GPL(machine_check_print_event_info);
481
482
483
484
485
486
487long machine_check_early(struct pt_regs *regs)
488{
489 long handled = 0;
490
491 __this_cpu_inc(irq_stat.mce_exceptions);
492
493 if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
494 handled = cur_cpu_spec->machine_check_early(regs);
495 return handled;
496}
497
498
499static enum {
500 DTRIG_UNKNOWN,
501 DTRIG_VECTOR_CI,
502 DTRIG_SUSPEND_ESCAPE,
503} hmer_debug_trig_function;
504
505static int init_debug_trig_function(void)
506{
507 int pvr;
508 struct device_node *cpun;
509 struct property *prop = NULL;
510 const char *str;
511
512
513 preempt_disable();
514 cpun = of_get_cpu_node(smp_processor_id(), NULL);
515 if (cpun) {
516 of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
517 prop, str) {
518 if (strcmp(str, "bit17-vector-ci-load") == 0)
519 hmer_debug_trig_function = DTRIG_VECTOR_CI;
520 else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
521 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
522 }
523 of_node_put(cpun);
524 }
525 preempt_enable();
526
527
528 if (prop)
529 goto out;
530
531 pvr = mfspr(SPRN_PVR);
532
533 if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
534
535 if ((pvr & 0xfff) >= 0x202)
536 hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
537
538 else if ((pvr & 0xfff) >= 0x200)
539 hmer_debug_trig_function = DTRIG_VECTOR_CI;
540 }
541
542 out:
543 switch (hmer_debug_trig_function) {
544 case DTRIG_VECTOR_CI:
545 pr_debug("HMI debug trigger used for vector CI load\n");
546 break;
547 case DTRIG_SUSPEND_ESCAPE:
548 pr_debug("HMI debug trigger used for TM suspend escape\n");
549 break;
550 default:
551 break;
552 }
553 return 0;
554}
555__initcall(init_debug_trig_function);
556
557
558
559
560
561
562
563
564long hmi_handle_debugtrig(struct pt_regs *regs)
565{
566 unsigned long hmer = mfspr(SPRN_HMER);
567 long ret = 0;
568
569
570 if (!((hmer & HMER_DEBUG_TRIG)
571 && hmer_debug_trig_function != DTRIG_UNKNOWN))
572 return -1;
573
574 hmer &= ~HMER_DEBUG_TRIG;
575
576 mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
577
578 switch (hmer_debug_trig_function) {
579 case DTRIG_VECTOR_CI:
580
581
582
583
584
585 if (regs && user_mode(regs))
586 ret = local_paca->hmi_p9_special_emu = 1;
587
588 break;
589
590 default:
591 break;
592 }
593
594
595
596
597 if (hmer & mfspr(SPRN_HMEER))
598 return -1;
599
600 return ret;
601}
602
603
604
605
606long hmi_exception_realmode(struct pt_regs *regs)
607{
608 int ret;
609
610 __this_cpu_inc(irq_stat.hmi_exceptions);
611
612 ret = hmi_handle_debugtrig(regs);
613 if (ret >= 0)
614 return ret;
615
616 wait_for_subcore_guest_exit();
617
618 if (ppc_md.hmi_exception_early)
619 ppc_md.hmi_exception_early(regs);
620
621 wait_for_tb_resync();
622
623 return 1;
624}
625