1
2
3
4
5
6
7
8
9#define pr_fmt(fmt) "opal core: " fmt
10
11#include <linux/memblock.h>
12#include <linux/uaccess.h>
13#include <linux/proc_fs.h>
14#include <linux/elf.h>
15#include <linux/elfcore.h>
16#include <linux/kobject.h>
17#include <linux/sysfs.h>
18#include <linux/slab.h>
19#include <linux/crash_core.h>
20#include <linux/of.h>
21
22#include <asm/page.h>
23#include <asm/opal.h>
24#include <asm/fadump-internal.h>
25
26#include "opal-fadump.h"
27
28#define MAX_PT_LOAD_CNT 8
29
30
31#define AUXV_CNT 1
32#define AUXV_DESC_SZ (((2 * AUXV_CNT) + 1) * sizeof(Elf64_Off))
33
34struct opalcore_config {
35 u32 num_cpus;
36
37 u32 crashing_cpu;
38
39
40 u64 cpu_state_destination_vaddr;
41 u64 cpu_state_data_size;
42 u64 cpu_state_entry_size;
43
44
45 u64 ptload_addr[MAX_PT_LOAD_CNT];
46 u64 ptload_size[MAX_PT_LOAD_CNT];
47 u64 ptload_cnt;
48
49
50 Elf64_Phdr *ptload_phdr;
51
52
53 size_t opalcore_size;
54
55
56 size_t opalcorebuf_sz;
57 char *opalcorebuf;
58
59
60 char auxv_buf[AUXV_DESC_SZ];
61};
62
63struct opalcore {
64 struct list_head list;
65 u64 paddr;
66 size_t size;
67 loff_t offset;
68};
69
70static LIST_HEAD(opalcore_list);
71static struct opalcore_config *oc_conf;
72static const struct opal_mpipl_fadump *opalc_metadata;
73static const struct opal_mpipl_fadump *opalc_cpu_metadata;
74static struct kobject *mpipl_kobj;
75
76
77
78
79
80bool kernel_initiated;
81
82static struct opalcore * __init get_new_element(void)
83{
84 return kzalloc(sizeof(struct opalcore), GFP_KERNEL);
85}
86
87static inline int is_opalcore_usable(void)
88{
89 return (oc_conf && oc_conf->opalcorebuf != NULL) ? 1 : 0;
90}
91
92static Elf64_Word *append_elf64_note(Elf64_Word *buf, char *name,
93 u32 type, void *data,
94 size_t data_len)
95{
96 Elf64_Nhdr *note = (Elf64_Nhdr *)buf;
97 Elf64_Word namesz = strlen(name) + 1;
98
99 note->n_namesz = cpu_to_be32(namesz);
100 note->n_descsz = cpu_to_be32(data_len);
101 note->n_type = cpu_to_be32(type);
102 buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf64_Word));
103 memcpy(buf, name, namesz);
104 buf += DIV_ROUND_UP(namesz, sizeof(Elf64_Word));
105 memcpy(buf, data, data_len);
106 buf += DIV_ROUND_UP(data_len, sizeof(Elf64_Word));
107
108 return buf;
109}
110
111static void fill_prstatus(struct elf_prstatus *prstatus, int pir,
112 struct pt_regs *regs)
113{
114 memset(prstatus, 0, sizeof(struct elf_prstatus));
115 elf_core_copy_kernel_regs(&(prstatus->pr_reg), regs);
116
117
118
119
120
121
122 prstatus->common.pr_pid = cpu_to_be32(100 + pir);
123 prstatus->common.pr_ppid = cpu_to_be32(1);
124
125
126
127
128
129 if (pir == oc_conf->crashing_cpu) {
130 short sig;
131
132 sig = kernel_initiated ? SIGUSR1 : SIGTERM;
133 prstatus->common.pr_cursig = cpu_to_be16(sig);
134 }
135}
136
137static Elf64_Word *auxv_to_elf64_notes(Elf64_Word *buf,
138 u64 opal_boot_entry)
139{
140 Elf64_Off *bufp = (Elf64_Off *)oc_conf->auxv_buf;
141 int idx = 0;
142
143 memset(bufp, 0, AUXV_DESC_SZ);
144
145
146 bufp[idx++] = cpu_to_be64(AT_ENTRY);
147 bufp[idx++] = cpu_to_be64(opal_boot_entry);
148
149
150 bufp[idx++] = cpu_to_be64(AT_NULL);
151
152 buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME, NT_AUXV,
153 oc_conf->auxv_buf, AUXV_DESC_SZ);
154 return buf;
155}
156
157
158
159
160
161static ssize_t read_opalcore(struct file *file, struct kobject *kobj,
162 struct bin_attribute *bin_attr, char *to,
163 loff_t pos, size_t count)
164{
165 struct opalcore *m;
166 ssize_t tsz, avail;
167 loff_t tpos = pos;
168
169 if (pos >= oc_conf->opalcore_size)
170 return 0;
171
172
173 avail = oc_conf->opalcore_size - pos;
174 if (count > avail)
175 count = avail;
176
177 if (count == 0)
178 return 0;
179
180
181 if (tpos < oc_conf->opalcorebuf_sz) {
182 tsz = min_t(size_t, oc_conf->opalcorebuf_sz - tpos, count);
183 memcpy(to, oc_conf->opalcorebuf + tpos, tsz);
184 to += tsz;
185 tpos += tsz;
186 count -= tsz;
187 }
188
189 list_for_each_entry(m, &opalcore_list, list) {
190
191 if (count == 0)
192 break;
193
194 if (tpos < m->offset + m->size) {
195 void *addr;
196
197 tsz = min_t(size_t, m->offset + m->size - tpos, count);
198 addr = (void *)(m->paddr + tpos - m->offset);
199 memcpy(to, __va(addr), tsz);
200 to += tsz;
201 tpos += tsz;
202 count -= tsz;
203 }
204 }
205
206 return (tpos - pos);
207}
208
209static struct bin_attribute opal_core_attr = {
210 .attr = {.name = "core", .mode = 0400},
211 .read = read_opalcore
212};
213
214
215
216
217
218
219
220
221static Elf64_Word * __init opalcore_append_cpu_notes(Elf64_Word *buf)
222{
223 u32 thread_pir, size_per_thread, regs_offset, regs_cnt, reg_esize;
224 struct hdat_fadump_thread_hdr *thdr;
225 struct elf_prstatus prstatus;
226 Elf64_Word *first_cpu_note;
227 struct pt_regs regs;
228 char *bufp;
229 int i;
230
231 size_per_thread = oc_conf->cpu_state_entry_size;
232 bufp = __va(oc_conf->cpu_state_destination_vaddr);
233
234
235
236
237
238
239 thdr = (struct hdat_fadump_thread_hdr *)bufp;
240 regs_offset = (offsetof(struct hdat_fadump_thread_hdr, offset) +
241 be32_to_cpu(thdr->offset));
242 reg_esize = be32_to_cpu(thdr->esize);
243 regs_cnt = be32_to_cpu(thdr->ecnt);
244
245 pr_debug("--------CPU State Data------------\n");
246 pr_debug("NumCpus : %u\n", oc_conf->num_cpus);
247 pr_debug("\tOffset: %u, Entry size: %u, Cnt: %u\n",
248 regs_offset, reg_esize, regs_cnt);
249
250
251
252
253
254 first_cpu_note = buf;
255 buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
256 &prstatus, sizeof(prstatus));
257
258 for (i = 0; i < oc_conf->num_cpus; i++, bufp += size_per_thread) {
259 thdr = (struct hdat_fadump_thread_hdr *)bufp;
260 thread_pir = be32_to_cpu(thdr->pir);
261
262 pr_debug("[%04d] PIR: 0x%x, core state: 0x%02x\n",
263 i, thread_pir, thdr->core_state);
264
265
266
267
268
269
270
271 if (thdr->core_state == HDAT_FADUMP_CORE_INACTIVE)
272 continue;
273
274 opal_fadump_read_regs((bufp + regs_offset), regs_cnt,
275 reg_esize, false, ®s);
276
277 pr_debug("PIR 0x%x - R1 : 0x%llx, NIP : 0x%llx\n", thread_pir,
278 be64_to_cpu(regs.gpr[1]), be64_to_cpu(regs.nip));
279 fill_prstatus(&prstatus, thread_pir, ®s);
280
281 if (thread_pir != oc_conf->crashing_cpu) {
282 buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME,
283 NT_PRSTATUS, &prstatus,
284 sizeof(prstatus));
285 } else {
286
287
288
289
290 append_elf64_note(first_cpu_note, CRASH_CORE_NOTE_NAME,
291 NT_PRSTATUS, &prstatus,
292 sizeof(prstatus));
293 }
294 }
295
296 return buf;
297}
298
299static int __init create_opalcore(void)
300{
301 u64 opal_boot_entry, opal_base_addr, paddr;
302 u32 hdr_size, cpu_notes_size, count;
303 struct device_node *dn;
304 struct opalcore *new;
305 loff_t opalcore_off;
306 struct page *page;
307 Elf64_Phdr *phdr;
308 Elf64_Ehdr *elf;
309 int i, ret;
310 char *bufp;
311
312
313 hdr_size = (sizeof(Elf64_Ehdr) +
314 ((oc_conf->ptload_cnt + 1) * sizeof(Elf64_Phdr)));
315 cpu_notes_size = ((oc_conf->num_cpus * (CRASH_CORE_NOTE_HEAD_BYTES +
316 CRASH_CORE_NOTE_NAME_BYTES +
317 CRASH_CORE_NOTE_DESC_BYTES)) +
318 (CRASH_CORE_NOTE_HEAD_BYTES +
319 CRASH_CORE_NOTE_NAME_BYTES + AUXV_DESC_SZ));
320
321
322 oc_conf->opalcorebuf_sz = PAGE_ALIGN(hdr_size + cpu_notes_size);
323 oc_conf->opalcorebuf = alloc_pages_exact(oc_conf->opalcorebuf_sz,
324 GFP_KERNEL | __GFP_ZERO);
325 if (!oc_conf->opalcorebuf) {
326 pr_err("Not enough memory to setup OPAL core (size: %lu)\n",
327 oc_conf->opalcorebuf_sz);
328 oc_conf->opalcorebuf_sz = 0;
329 return -ENOMEM;
330 }
331 count = oc_conf->opalcorebuf_sz / PAGE_SIZE;
332 page = virt_to_page(oc_conf->opalcorebuf);
333 for (i = 0; i < count; i++)
334 mark_page_reserved(page + i);
335
336 pr_debug("opalcorebuf = 0x%llx\n", (u64)oc_conf->opalcorebuf);
337
338
339 dn = of_find_node_by_name(NULL, "ibm,opal");
340 if (dn) {
341 ret = of_property_read_u64(dn, "opal-base-address",
342 &opal_base_addr);
343 pr_debug("opal-base-address: %llx\n", opal_base_addr);
344 ret |= of_property_read_u64(dn, "opal-boot-address",
345 &opal_boot_entry);
346 pr_debug("opal-boot-address: %llx\n", opal_boot_entry);
347 }
348 if (!dn || ret)
349 pr_warn("WARNING: Failed to read OPAL base & entry values\n");
350
351
352 count = 0;
353
354 bufp = oc_conf->opalcorebuf;
355 elf = (Elf64_Ehdr *)bufp;
356 bufp += sizeof(Elf64_Ehdr);
357 memcpy(elf->e_ident, ELFMAG, SELFMAG);
358 elf->e_ident[EI_CLASS] = ELF_CLASS;
359 elf->e_ident[EI_DATA] = ELFDATA2MSB;
360 elf->e_ident[EI_VERSION] = EV_CURRENT;
361 elf->e_ident[EI_OSABI] = ELF_OSABI;
362 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
363 elf->e_type = cpu_to_be16(ET_CORE);
364 elf->e_machine = cpu_to_be16(ELF_ARCH);
365 elf->e_version = cpu_to_be32(EV_CURRENT);
366 elf->e_entry = 0;
367 elf->e_phoff = cpu_to_be64(sizeof(Elf64_Ehdr));
368 elf->e_shoff = 0;
369 elf->e_flags = 0;
370
371 elf->e_ehsize = cpu_to_be16(sizeof(Elf64_Ehdr));
372 elf->e_phentsize = cpu_to_be16(sizeof(Elf64_Phdr));
373 elf->e_phnum = 0;
374 elf->e_shentsize = 0;
375 elf->e_shnum = 0;
376 elf->e_shstrndx = 0;
377
378 phdr = (Elf64_Phdr *)bufp;
379 bufp += sizeof(Elf64_Phdr);
380 phdr->p_type = cpu_to_be32(PT_NOTE);
381 phdr->p_flags = 0;
382 phdr->p_align = 0;
383 phdr->p_paddr = phdr->p_vaddr = 0;
384 phdr->p_offset = cpu_to_be64(hdr_size);
385 phdr->p_filesz = phdr->p_memsz = cpu_to_be64(cpu_notes_size);
386 count++;
387
388 opalcore_off = oc_conf->opalcorebuf_sz;
389 oc_conf->ptload_phdr = (Elf64_Phdr *)bufp;
390 paddr = 0;
391 for (i = 0; i < oc_conf->ptload_cnt; i++) {
392 phdr = (Elf64_Phdr *)bufp;
393 bufp += sizeof(Elf64_Phdr);
394 phdr->p_type = cpu_to_be32(PT_LOAD);
395 phdr->p_flags = cpu_to_be32(PF_R|PF_W|PF_X);
396 phdr->p_align = 0;
397
398 new = get_new_element();
399 if (!new)
400 return -ENOMEM;
401 new->paddr = oc_conf->ptload_addr[i];
402 new->size = oc_conf->ptload_size[i];
403 new->offset = opalcore_off;
404 list_add_tail(&new->list, &opalcore_list);
405
406 phdr->p_paddr = cpu_to_be64(paddr);
407 phdr->p_vaddr = cpu_to_be64(opal_base_addr + paddr);
408 phdr->p_filesz = phdr->p_memsz =
409 cpu_to_be64(oc_conf->ptload_size[i]);
410 phdr->p_offset = cpu_to_be64(opalcore_off);
411
412 count++;
413 opalcore_off += oc_conf->ptload_size[i];
414 paddr += oc_conf->ptload_size[i];
415 }
416
417 elf->e_phnum = cpu_to_be16(count);
418
419 bufp = (char *)opalcore_append_cpu_notes((Elf64_Word *)bufp);
420 bufp = (char *)auxv_to_elf64_notes((Elf64_Word *)bufp, opal_boot_entry);
421
422 oc_conf->opalcore_size = opalcore_off;
423 return 0;
424}
425
426static void opalcore_cleanup(void)
427{
428 if (oc_conf == NULL)
429 return;
430
431
432 sysfs_remove_bin_file(mpipl_kobj, &opal_core_attr);
433 oc_conf->ptload_phdr = NULL;
434 oc_conf->ptload_cnt = 0;
435
436
437 if (oc_conf->opalcorebuf) {
438 void *end = (void *)((u64)oc_conf->opalcorebuf +
439 oc_conf->opalcorebuf_sz);
440
441 free_reserved_area(oc_conf->opalcorebuf, end, -1, NULL);
442 oc_conf->opalcorebuf = NULL;
443 oc_conf->opalcorebuf_sz = 0;
444 }
445
446 kfree(oc_conf);
447 oc_conf = NULL;
448}
449__exitcall(opalcore_cleanup);
450
451static void __init opalcore_config_init(void)
452{
453 u32 idx, cpu_data_version;
454 struct device_node *np;
455 const __be32 *prop;
456 u64 addr = 0;
457 int i, ret;
458
459 np = of_find_node_by_path("/ibm,opal/dump");
460 if (np == NULL)
461 return;
462
463 if (!of_device_is_compatible(np, "ibm,opal-dump")) {
464 pr_warn("Support missing for this f/w version!\n");
465 return;
466 }
467
468
469 prop = of_get_property(np, "mpipl-boot", NULL);
470 if (!prop) {
471 of_node_put(np);
472 return;
473 }
474
475
476 ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_OPAL, &addr);
477 if ((ret != OPAL_SUCCESS) || !addr) {
478 pr_err("Failed to get OPAL metadata (%d)\n", ret);
479 goto error_out;
480 }
481
482 addr = be64_to_cpu(addr);
483 pr_debug("OPAL metadata addr: %llx\n", addr);
484 opalc_metadata = __va(addr);
485
486
487 ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr);
488 if ((ret != OPAL_SUCCESS) || !addr) {
489 pr_err("Failed to get OPAL CPU metadata (%d)\n", ret);
490 goto error_out;
491 }
492
493 addr = be64_to_cpu(addr);
494 pr_debug("CPU metadata addr: %llx\n", addr);
495 opalc_cpu_metadata = __va(addr);
496
497
498 oc_conf = kzalloc(sizeof(struct opalcore_config), GFP_KERNEL);
499 if (oc_conf == NULL)
500 goto error_out;
501
502
503 if (opalc_metadata->version != OPAL_MPIPL_VERSION) {
504 pr_warn("Supported OPAL metadata version: %u, found: %u!\n",
505 OPAL_MPIPL_VERSION, opalc_metadata->version);
506 pr_warn("WARNING: F/W using newer OPAL metadata format!!\n");
507 }
508
509 oc_conf->ptload_cnt = 0;
510 idx = be32_to_cpu(opalc_metadata->region_cnt);
511 if (idx > MAX_PT_LOAD_CNT) {
512 pr_warn("WARNING: OPAL regions count (%d) adjusted to limit (%d)",
513 idx, MAX_PT_LOAD_CNT);
514 idx = MAX_PT_LOAD_CNT;
515 }
516 for (i = 0; i < idx; i++) {
517 oc_conf->ptload_addr[oc_conf->ptload_cnt] =
518 be64_to_cpu(opalc_metadata->region[i].dest);
519 oc_conf->ptload_size[oc_conf->ptload_cnt++] =
520 be64_to_cpu(opalc_metadata->region[i].size);
521 }
522 oc_conf->ptload_cnt = i;
523 oc_conf->crashing_cpu = be32_to_cpu(opalc_metadata->crashing_pir);
524
525 if (!oc_conf->ptload_cnt) {
526 pr_err("OPAL memory regions not found\n");
527 goto error_out;
528 }
529
530
531 cpu_data_version = be32_to_cpu(opalc_cpu_metadata->cpu_data_version);
532 if (cpu_data_version != HDAT_FADUMP_CPU_DATA_VER) {
533 pr_warn("Supported CPU data version: %u, found: %u!\n",
534 HDAT_FADUMP_CPU_DATA_VER, cpu_data_version);
535 pr_warn("WARNING: F/W using newer CPU state data format!!\n");
536 }
537
538 addr = be64_to_cpu(opalc_cpu_metadata->region[0].dest);
539 if (!addr) {
540 pr_err("CPU state data not found!\n");
541 goto error_out;
542 }
543 oc_conf->cpu_state_destination_vaddr = (u64)__va(addr);
544
545 oc_conf->cpu_state_data_size =
546 be64_to_cpu(opalc_cpu_metadata->region[0].size);
547 oc_conf->cpu_state_entry_size =
548 be32_to_cpu(opalc_cpu_metadata->cpu_data_size);
549
550 if ((oc_conf->cpu_state_entry_size == 0) ||
551 (oc_conf->cpu_state_entry_size > oc_conf->cpu_state_data_size)) {
552 pr_err("CPU state data is invalid.\n");
553 goto error_out;
554 }
555 oc_conf->num_cpus = (oc_conf->cpu_state_data_size /
556 oc_conf->cpu_state_entry_size);
557
558 of_node_put(np);
559 return;
560
561error_out:
562 pr_err("Could not export /sys/firmware/opal/core\n");
563 opalcore_cleanup();
564 of_node_put(np);
565}
566
567static ssize_t release_core_store(struct kobject *kobj,
568 struct kobj_attribute *attr,
569 const char *buf, size_t count)
570{
571 int input = -1;
572
573 if (kstrtoint(buf, 0, &input))
574 return -EINVAL;
575
576 if (input == 1) {
577 if (oc_conf == NULL) {
578 pr_err("'/sys/firmware/opal/core' file not accessible!\n");
579 return -EPERM;
580 }
581
582
583
584
585
586 opalcore_cleanup();
587 } else
588 return -EINVAL;
589
590 return count;
591}
592
593static struct kobj_attribute opalcore_rel_attr = __ATTR_WO(release_core);
594
595static struct attribute *mpipl_attr[] = {
596 &opalcore_rel_attr.attr,
597 NULL,
598};
599
600static struct bin_attribute *mpipl_bin_attr[] = {
601 &opal_core_attr,
602 NULL,
603
604};
605
606static struct attribute_group mpipl_group = {
607 .attrs = mpipl_attr,
608 .bin_attrs = mpipl_bin_attr,
609};
610
611static int __init opalcore_init(void)
612{
613 int rc = -1;
614
615 opalcore_config_init();
616
617 if (oc_conf == NULL)
618 return rc;
619
620 create_opalcore();
621
622
623
624
625
626 if (!(is_opalcore_usable())) {
627 pr_err("Failed to export /sys/firmware/opal/mpipl/core\n");
628 opalcore_cleanup();
629 return rc;
630 }
631
632
633 opal_core_attr.size = oc_conf->opalcore_size;
634
635 mpipl_kobj = kobject_create_and_add("mpipl", opal_kobj);
636 if (!mpipl_kobj) {
637 pr_err("unable to create mpipl kobject\n");
638 return -ENOMEM;
639 }
640
641
642 rc = sysfs_create_group(mpipl_kobj, &mpipl_group);
643 if (rc) {
644 pr_err("mpipl sysfs group creation failed (%d)", rc);
645 opalcore_cleanup();
646 return rc;
647 }
648
649
650
651
652 rc = compat_only_sysfs_link_entry_to_kobj(opal_kobj, mpipl_kobj,
653 "core", NULL);
654 if (rc) {
655 pr_err("unable to create core symlink (%d)\n", rc);
656 return rc;
657 }
658
659 return 0;
660}
661fs_initcall(opalcore_init);
662