1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#include "qemu/osdep.h"
28#include "qapi/error.h"
29#include "qapi/visitor.h"
30#include "sysemu/sysemu.h"
31#include "sysemu/numa.h"
32#include "hw/hw.h"
33#include "qemu/log.h"
34#include "hw/fw-path-provider.h"
35#include "elf.h"
36#include "net/net.h"
37#include "sysemu/device_tree.h"
38#include "sysemu/block-backend.h"
39#include "sysemu/cpus.h"
40#include "sysemu/hw_accel.h"
41#include "kvm_ppc.h"
42#include "migration/misc.h"
43#include "migration/global_state.h"
44#include "migration/register.h"
45#include "mmu-hash64.h"
46#include "mmu-book3s-v3.h"
47#include "cpu-models.h"
48#include "qom/cpu.h"
49
50#include "hw/boards.h"
51#include "hw/ppc/ppc.h"
52#include "hw/loader.h"
53
54#include "hw/ppc/fdt.h"
55#include "hw/ppc/spapr.h"
56#include "hw/ppc/spapr_vio.h"
57#include "hw/pci-host/spapr.h"
58#include "hw/ppc/xics.h"
59#include "hw/pci/msi.h"
60
61#include "hw/pci/pci.h"
62#include "hw/scsi/scsi.h"
63#include "hw/virtio/virtio-scsi.h"
64#include "hw/virtio/vhost-scsi-common.h"
65
66#include "exec/address-spaces.h"
67#include "hw/usb.h"
68#include "qemu/config-file.h"
69#include "qemu/error-report.h"
70#include "trace.h"
71#include "hw/nmi.h"
72#include "hw/intc/intc.h"
73
74#include "hw/compat.h"
75#include "qemu/cutils.h"
76#include "hw/ppc/spapr_cpu_core.h"
77#include "qmp-commands.h"
78
79#include <libfdt.h>
80
81
82
83
84
85
86
87
88
89
90
91#define FDT_MAX_SIZE 0x100000
92#define RTAS_MAX_SIZE 0x10000
93#define RTAS_MAX_ADDR 0x80000000
94#define FW_MAX_SIZE 0x400000
95#define FW_FILE_NAME "slof.bin"
96#define FW_OVERHEAD 0x2800000
97#define KERNEL_LOAD_ADDR FW_MAX_SIZE
98
99#define MIN_RMA_SLOF 128UL
100
101#define PHANDLE_XICP 0x00001111
102
103static ICSState *spapr_ics_create(sPAPRMachineState *spapr,
104 const char *type_ics,
105 int nr_irqs, Error **errp)
106{
107 Error *local_err = NULL;
108 Object *obj;
109
110 obj = object_new(type_ics);
111 object_property_add_child(OBJECT(spapr), "ics", obj, &error_abort);
112 object_property_add_const_link(obj, ICS_PROP_XICS, OBJECT(spapr),
113 &error_abort);
114 object_property_set_int(obj, nr_irqs, "nr-irqs", &local_err);
115 if (local_err) {
116 goto error;
117 }
118 object_property_set_bool(obj, true, "realized", &local_err);
119 if (local_err) {
120 goto error;
121 }
122
123 return ICS_SIMPLE(obj);
124
125error:
126 error_propagate(errp, local_err);
127 return NULL;
128}
129
130static bool pre_2_10_vmstate_dummy_icp_needed(void *opaque)
131{
132
133
134
135
136 return false;
137}
138
139static const VMStateDescription pre_2_10_vmstate_dummy_icp = {
140 .name = "icp/server",
141 .version_id = 1,
142 .minimum_version_id = 1,
143 .needed = pre_2_10_vmstate_dummy_icp_needed,
144 .fields = (VMStateField[]) {
145 VMSTATE_UNUSED(4),
146 VMSTATE_UNUSED(1),
147 VMSTATE_UNUSED(1),
148 VMSTATE_END_OF_LIST()
149 },
150};
151
152static void pre_2_10_vmstate_register_dummy_icp(int i)
153{
154 vmstate_register(NULL, i, &pre_2_10_vmstate_dummy_icp,
155 (void *)(uintptr_t) i);
156}
157
158static void pre_2_10_vmstate_unregister_dummy_icp(int i)
159{
160 vmstate_unregister(NULL, &pre_2_10_vmstate_dummy_icp,
161 (void *)(uintptr_t) i);
162}
163
164static inline int xics_max_server_number(void)
165{
166 return DIV_ROUND_UP(max_cpus * kvmppc_smt_threads(), smp_threads);
167}
168
169static void xics_system_init(MachineState *machine, int nr_irqs, Error **errp)
170{
171 sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
172 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
173
174 if (kvm_enabled()) {
175 if (machine_kernel_irqchip_allowed(machine) &&
176 !xics_kvm_init(spapr, errp)) {
177 spapr->icp_type = TYPE_KVM_ICP;
178 spapr->ics = spapr_ics_create(spapr, TYPE_ICS_KVM, nr_irqs, errp);
179 }
180 if (machine_kernel_irqchip_required(machine) && !spapr->ics) {
181 error_prepend(errp, "kernel_irqchip requested but unavailable: ");
182 return;
183 }
184 }
185
186 if (!spapr->ics) {
187 xics_spapr_init(spapr);
188 spapr->icp_type = TYPE_ICP;
189 spapr->ics = spapr_ics_create(spapr, TYPE_ICS_SIMPLE, nr_irqs, errp);
190 if (!spapr->ics) {
191 return;
192 }
193 }
194
195 if (smc->pre_2_10_has_unused_icps) {
196 int i;
197
198 for (i = 0; i < xics_max_server_number(); i++) {
199
200
201
202 pre_2_10_vmstate_register_dummy_icp(i);
203 }
204 }
205}
206
207static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
208 int smt_threads)
209{
210 int i, ret = 0;
211 uint32_t servers_prop[smt_threads];
212 uint32_t gservers_prop[smt_threads * 2];
213 int index = spapr_vcpu_id(cpu);
214
215 if (cpu->compat_pvr) {
216 ret = fdt_setprop_cell(fdt, offset, "cpu-version", cpu->compat_pvr);
217 if (ret < 0) {
218 return ret;
219 }
220 }
221
222
223 for (i = 0; i < smt_threads; i++) {
224 servers_prop[i] = cpu_to_be32(index + i);
225
226 gservers_prop[i*2] = cpu_to_be32(index + i);
227 gservers_prop[i*2 + 1] = 0;
228 }
229 ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
230 servers_prop, sizeof(servers_prop));
231 if (ret < 0) {
232 return ret;
233 }
234 ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s",
235 gservers_prop, sizeof(gservers_prop));
236
237 return ret;
238}
239
240static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, PowerPCCPU *cpu)
241{
242 int index = spapr_vcpu_id(cpu);
243 uint32_t associativity[] = {cpu_to_be32(0x5),
244 cpu_to_be32(0x0),
245 cpu_to_be32(0x0),
246 cpu_to_be32(0x0),
247 cpu_to_be32(cpu->node_id),
248 cpu_to_be32(index)};
249
250
251 return fdt_setprop(fdt, offset, "ibm,associativity", associativity,
252 sizeof(associativity));
253}
254
255
256static void spapr_populate_pa_features(PowerPCCPU *cpu, void *fdt, int offset,
257 bool legacy_guest)
258{
259 CPUPPCState *env = &cpu->env;
260 uint8_t pa_features_206[] = { 6, 0,
261 0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 };
262 uint8_t pa_features_207[] = { 24, 0,
263 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0,
264 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
265 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
266 0x80, 0x00, 0x80, 0x00, 0x00, 0x00 };
267 uint8_t pa_features_300[] = { 66, 0,
268
269
270 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0,
271
272 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
273
274 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
275
276 0x80, 0x00, 0x80, 0x00, 0x00, 0x00,
277
278 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
279
280 0x80, 0x00, 0x80, 0x00, 0xC0, 0x00,
281
282 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
283
284 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
285
286 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
287
288 0x80, 0x00, 0x80, 0x00, 0x80, 0x00,
289
290 0x80, 0x00, 0x80, 0x00, 0x00, 0x00,
291 };
292 uint8_t *pa_features = NULL;
293 size_t pa_size;
294
295 if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_2_06, 0, cpu->compat_pvr)) {
296 pa_features = pa_features_206;
297 pa_size = sizeof(pa_features_206);
298 }
299 if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_2_07, 0, cpu->compat_pvr)) {
300 pa_features = pa_features_207;
301 pa_size = sizeof(pa_features_207);
302 }
303 if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, cpu->compat_pvr)) {
304 pa_features = pa_features_300;
305 pa_size = sizeof(pa_features_300);
306 }
307 if (!pa_features) {
308 return;
309 }
310
311 if (env->ci_large_pages) {
312
313
314
315
316
317
318
319 pa_features[3] |= 0x20;
320 }
321 if (kvmppc_has_cap_htm() && pa_size > 24) {
322 pa_features[24] |= 0x80;
323 }
324 if (legacy_guest && pa_size > 40) {
325
326
327
328 pa_features[40 + 2] &= ~0x80;
329 }
330
331 _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
332}
333
334static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr)
335{
336 int ret = 0, offset, cpus_offset;
337 CPUState *cs;
338 char cpu_model[32];
339 int smt = kvmppc_smt_threads();
340 uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
341
342 CPU_FOREACH(cs) {
343 PowerPCCPU *cpu = POWERPC_CPU(cs);
344 DeviceClass *dc = DEVICE_GET_CLASS(cs);
345 int index = spapr_vcpu_id(cpu);
346 int compat_smt = MIN(smp_threads, ppc_compat_max_threads(cpu));
347
348 if ((index % smt) != 0) {
349 continue;
350 }
351
352 snprintf(cpu_model, 32, "%s@%x", dc->fw_name, index);
353
354 cpus_offset = fdt_path_offset(fdt, "/cpus");
355 if (cpus_offset < 0) {
356 cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
357 if (cpus_offset < 0) {
358 return cpus_offset;
359 }
360 }
361 offset = fdt_subnode_offset(fdt, cpus_offset, cpu_model);
362 if (offset < 0) {
363 offset = fdt_add_subnode(fdt, cpus_offset, cpu_model);
364 if (offset < 0) {
365 return offset;
366 }
367 }
368
369 ret = fdt_setprop(fdt, offset, "ibm,pft-size",
370 pft_size_prop, sizeof(pft_size_prop));
371 if (ret < 0) {
372 return ret;
373 }
374
375 if (nb_numa_nodes > 1) {
376 ret = spapr_fixup_cpu_numa_dt(fdt, offset, cpu);
377 if (ret < 0) {
378 return ret;
379 }
380 }
381
382 ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt);
383 if (ret < 0) {
384 return ret;
385 }
386
387 spapr_populate_pa_features(cpu, fdt, offset,
388 spapr->cas_legacy_guest_workaround);
389 }
390 return ret;
391}
392
393static hwaddr spapr_node0_size(MachineState *machine)
394{
395 if (nb_numa_nodes) {
396 int i;
397 for (i = 0; i < nb_numa_nodes; ++i) {
398 if (numa_info[i].node_mem) {
399 return MIN(pow2floor(numa_info[i].node_mem),
400 machine->ram_size);
401 }
402 }
403 }
404 return machine->ram_size;
405}
406
407static void add_str(GString *s, const gchar *s1)
408{
409 g_string_append_len(s, s1, strlen(s1) + 1);
410}
411
412static int spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
413 hwaddr size)
414{
415 uint32_t associativity[] = {
416 cpu_to_be32(0x4),
417 cpu_to_be32(0x0), cpu_to_be32(0x0),
418 cpu_to_be32(0x0), cpu_to_be32(nodeid)
419 };
420 char mem_name[32];
421 uint64_t mem_reg_property[2];
422 int off;
423
424 mem_reg_property[0] = cpu_to_be64(start);
425 mem_reg_property[1] = cpu_to_be64(size);
426
427 sprintf(mem_name, "memory@" TARGET_FMT_lx, start);
428 off = fdt_add_subnode(fdt, 0, mem_name);
429 _FDT(off);
430 _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
431 _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
432 sizeof(mem_reg_property))));
433 _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
434 sizeof(associativity))));
435 return off;
436}
437
438static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt)
439{
440 MachineState *machine = MACHINE(spapr);
441 hwaddr mem_start, node_size;
442 int i, nb_nodes = nb_numa_nodes;
443 NodeInfo *nodes = numa_info;
444 NodeInfo ramnode;
445
446
447 if (!nb_numa_nodes) {
448 nb_nodes = 1;
449 ramnode.node_mem = machine->ram_size;
450 nodes = &ramnode;
451 }
452
453 for (i = 0, mem_start = 0; i < nb_nodes; ++i) {
454 if (!nodes[i].node_mem) {
455 continue;
456 }
457 if (mem_start >= machine->ram_size) {
458 node_size = 0;
459 } else {
460 node_size = nodes[i].node_mem;
461 if (node_size > machine->ram_size - mem_start) {
462 node_size = machine->ram_size - mem_start;
463 }
464 }
465 if (!mem_start) {
466
467 spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
468 mem_start += spapr->rma_size;
469 node_size -= spapr->rma_size;
470 }
471 for ( ; node_size; ) {
472 hwaddr sizetmp = pow2floor(node_size);
473
474
475 if (ctzl(mem_start) < ctzl(sizetmp)) {
476 sizetmp = 1ULL << ctzl(mem_start);
477 }
478
479 spapr_populate_memory_node(fdt, i, mem_start, sizetmp);
480 node_size -= sizetmp;
481 mem_start += sizetmp;
482 }
483 }
484
485 return 0;
486}
487
488static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
489 sPAPRMachineState *spapr)
490{
491 PowerPCCPU *cpu = POWERPC_CPU(cs);
492 CPUPPCState *env = &cpu->env;
493 PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
494 int index = spapr_vcpu_id(cpu);
495 uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
496 0xffffffff, 0xffffffff};
497 uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq()
498 : SPAPR_TIMEBASE_FREQ;
499 uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
500 uint32_t page_sizes_prop[64];
501 size_t page_sizes_prop_size;
502 uint32_t vcpus_per_socket = smp_threads * smp_cores;
503 uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
504 int compat_smt = MIN(smp_threads, ppc_compat_max_threads(cpu));
505 sPAPRDRConnector *drc;
506 int drc_index;
507 uint32_t radix_AP_encodings[PPC_PAGE_SIZES_MAX_SZ];
508 int i;
509
510 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU, index);
511 if (drc) {
512 drc_index = spapr_drc_index(drc);
513 _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
514 }
515
516 _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
517 _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
518
519 _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
520 _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
521 env->dcache_line_size)));
522 _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
523 env->dcache_line_size)));
524 _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
525 env->icache_line_size)));
526 _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
527 env->icache_line_size)));
528
529 if (pcc->l1_dcache_size) {
530 _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
531 pcc->l1_dcache_size)));
532 } else {
533 warn_report("Unknown L1 dcache size for cpu");
534 }
535 if (pcc->l1_icache_size) {
536 _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
537 pcc->l1_icache_size)));
538 } else {
539 warn_report("Unknown L1 icache size for cpu");
540 }
541
542 _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
543 _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
544 _FDT((fdt_setprop_cell(fdt, offset, "slb-size", env->slb_nr)));
545 _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
546 _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
547 _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
548
549 if (env->spr_cb[SPR_PURR].oea_read) {
550 _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
551 }
552
553 if (env->mmu_model & POWERPC_MMU_1TSEG) {
554 _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
555 segs, sizeof(segs))));
556 }
557
558
559
560
561
562 if (env->insns_flags & PPC_ALTIVEC) {
563 uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
564
565 _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
566 }
567
568
569
570
571 if (env->insns_flags2 & PPC2_DFP) {
572 _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
573 }
574
575 page_sizes_prop_size = ppc_create_page_sizes_prop(env, page_sizes_prop,
576 sizeof(page_sizes_prop));
577 if (page_sizes_prop_size) {
578 _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
579 page_sizes_prop, page_sizes_prop_size)));
580 }
581
582 spapr_populate_pa_features(cpu, fdt, offset, false);
583
584 _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
585 cs->cpu_index / vcpus_per_socket)));
586
587 _FDT((fdt_setprop(fdt, offset, "ibm,pft-size",
588 pft_size_prop, sizeof(pft_size_prop))));
589
590 if (nb_numa_nodes > 1) {
591 _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cpu));
592 }
593
594 _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt));
595
596 if (pcc->radix_page_info) {
597 for (i = 0; i < pcc->radix_page_info->count; i++) {
598 radix_AP_encodings[i] =
599 cpu_to_be32(pcc->radix_page_info->entries[i]);
600 }
601 _FDT((fdt_setprop(fdt, offset, "ibm,processor-radix-AP-encodings",
602 radix_AP_encodings,
603 pcc->radix_page_info->count *
604 sizeof(radix_AP_encodings[0]))));
605 }
606}
607
608static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr)
609{
610 CPUState *cs;
611 int cpus_offset;
612 char *nodename;
613 int smt = kvmppc_smt_threads();
614
615 cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
616 _FDT(cpus_offset);
617 _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
618 _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
619
620
621
622
623
624
625 CPU_FOREACH_REVERSE(cs) {
626 PowerPCCPU *cpu = POWERPC_CPU(cs);
627 int index = spapr_vcpu_id(cpu);
628 DeviceClass *dc = DEVICE_GET_CLASS(cs);
629 int offset;
630
631 if ((index % smt) != 0) {
632 continue;
633 }
634
635 nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
636 offset = fdt_add_subnode(fdt, cpus_offset, nodename);
637 g_free(nodename);
638 _FDT(offset);
639 spapr_populate_cpu_dt(cs, fdt, offset, spapr);
640 }
641
642}
643
644
645
646
647
648
649static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
650{
651 MachineState *machine = MACHINE(spapr);
652 int ret, i, offset;
653 uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
654 uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)};
655 uint32_t hotplug_lmb_start = spapr->hotplug_memory.base / lmb_size;
656 uint32_t nr_lmbs = (spapr->hotplug_memory.base +
657 memory_region_size(&spapr->hotplug_memory.mr)) /
658 lmb_size;
659 uint32_t *int_buf, *cur_index, buf_len;
660 int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
661
662
663
664
665 if (machine->ram_size == machine->maxram_size) {
666 return 0;
667 }
668
669
670
671
672
673 buf_len = MAX(nr_lmbs * SPAPR_DR_LMB_LIST_ENTRY_SIZE + 1, nr_nodes * 4 + 2)
674 * sizeof(uint32_t);
675 cur_index = int_buf = g_malloc0(buf_len);
676
677 offset = fdt_add_subnode(fdt, 0, "ibm,dynamic-reconfiguration-memory");
678
679 ret = fdt_setprop(fdt, offset, "ibm,lmb-size", prop_lmb_size,
680 sizeof(prop_lmb_size));
681 if (ret < 0) {
682 goto out;
683 }
684
685 ret = fdt_setprop_cell(fdt, offset, "ibm,memory-flags-mask", 0xff);
686 if (ret < 0) {
687 goto out;
688 }
689
690 ret = fdt_setprop_cell(fdt, offset, "ibm,memory-preservation-time", 0x0);
691 if (ret < 0) {
692 goto out;
693 }
694
695
696 int_buf[0] = cpu_to_be32(nr_lmbs);
697 cur_index++;
698 for (i = 0; i < nr_lmbs; i++) {
699 uint64_t addr = i * lmb_size;
700 uint32_t *dynamic_memory = cur_index;
701
702 if (i >= hotplug_lmb_start) {
703 sPAPRDRConnector *drc;
704
705 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, i);
706 g_assert(drc);
707
708 dynamic_memory[0] = cpu_to_be32(addr >> 32);
709 dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
710 dynamic_memory[2] = cpu_to_be32(spapr_drc_index(drc));
711 dynamic_memory[3] = cpu_to_be32(0);
712 dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL));
713 if (memory_region_present(get_system_memory(), addr)) {
714 dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
715 } else {
716 dynamic_memory[5] = cpu_to_be32(0);
717 }
718 } else {
719
720
721
722
723
724 dynamic_memory[0] = cpu_to_be32(addr >> 32);
725 dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
726 dynamic_memory[2] = cpu_to_be32(0);
727 dynamic_memory[3] = cpu_to_be32(0);
728 dynamic_memory[4] = cpu_to_be32(-1);
729 dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_RESERVED |
730 SPAPR_LMB_FLAGS_DRC_INVALID);
731 }
732
733 cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE;
734 }
735 ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len);
736 if (ret < 0) {
737 goto out;
738 }
739
740
741 cur_index = int_buf;
742 int_buf[0] = cpu_to_be32(nr_nodes);
743 int_buf[1] = cpu_to_be32(4);
744 cur_index += 2;
745 for (i = 0; i < nr_nodes; i++) {
746 uint32_t associativity[] = {
747 cpu_to_be32(0x0),
748 cpu_to_be32(0x0),
749 cpu_to_be32(0x0),
750 cpu_to_be32(i)
751 };
752 memcpy(cur_index, associativity, sizeof(associativity));
753 cur_index += 4;
754 }
755 ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
756 (cur_index - int_buf) * sizeof(uint32_t));
757out:
758 g_free(int_buf);
759 return ret;
760}
761
762static int spapr_dt_cas_updates(sPAPRMachineState *spapr, void *fdt,
763 sPAPROptionVector *ov5_updates)
764{
765 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
766 int ret = 0, offset;
767
768
769 if (spapr_ovec_test(ov5_updates, OV5_DRCONF_MEMORY)) {
770 g_assert(smc->dr_lmb_enabled);
771 ret = spapr_populate_drconf_memory(spapr, fdt);
772 if (ret) {
773 goto out;
774 }
775 }
776
777 offset = fdt_path_offset(fdt, "/chosen");
778 if (offset < 0) {
779 offset = fdt_add_subnode(fdt, 0, "chosen");
780 if (offset < 0) {
781 return offset;
782 }
783 }
784 ret = spapr_ovec_populate_dt(fdt, offset, spapr->ov5_cas,
785 "ibm,architecture-vec-5");
786
787out:
788 return ret;
789}
790
791static bool spapr_hotplugged_dev_before_cas(void)
792{
793 Object *drc_container, *obj;
794 ObjectProperty *prop;
795 ObjectPropertyIterator iter;
796
797 drc_container = container_get(object_get_root(), "/dr-connector");
798 object_property_iter_init(&iter, drc_container);
799 while ((prop = object_property_iter_next(&iter))) {
800 if (!strstart(prop->type, "link<", NULL)) {
801 continue;
802 }
803 obj = object_property_get_link(drc_container, prop->name, NULL);
804 if (spapr_drc_needed(obj)) {
805 return true;
806 }
807 }
808 return false;
809}
810
811int spapr_h_cas_compose_response(sPAPRMachineState *spapr,
812 target_ulong addr, target_ulong size,
813 sPAPROptionVector *ov5_updates)
814{
815 void *fdt, *fdt_skel;
816 sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 };
817
818 if (spapr_hotplugged_dev_before_cas()) {
819 return 1;
820 }
821
822 if (size < sizeof(hdr) || size > FW_MAX_SIZE) {
823 error_report("SLOF provided an unexpected CAS buffer size "
824 TARGET_FMT_lu " (min: %zu, max: %u)",
825 size, sizeof(hdr), FW_MAX_SIZE);
826 exit(EXIT_FAILURE);
827 }
828
829 size -= sizeof(hdr);
830
831
832 fdt_skel = g_malloc0(size);
833 _FDT((fdt_create(fdt_skel, size)));
834 _FDT((fdt_begin_node(fdt_skel, "")));
835 _FDT((fdt_end_node(fdt_skel)));
836 _FDT((fdt_finish(fdt_skel)));
837 fdt = g_malloc0(size);
838 _FDT((fdt_open_into(fdt_skel, fdt, size)));
839 g_free(fdt_skel);
840
841
842 _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
843
844 if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) {
845 return -1;
846 }
847
848
849 _FDT((fdt_pack(fdt)));
850
851 if (fdt_totalsize(fdt) + sizeof(hdr) > size) {
852 trace_spapr_cas_failed(size);
853 return -1;
854 }
855
856 cpu_physical_memory_write(addr, &hdr, sizeof(hdr));
857 cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt));
858 trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr));
859 g_free(fdt);
860
861 return 0;
862}
863
864static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
865{
866 int rtas;
867 GString *hypertas = g_string_sized_new(256);
868 GString *qemu_hypertas = g_string_sized_new(256);
869 uint32_t refpoints[] = { cpu_to_be32(0x4), cpu_to_be32(0x4) };
870 uint64_t max_hotplug_addr = spapr->hotplug_memory.base +
871 memory_region_size(&spapr->hotplug_memory.mr);
872 uint32_t lrdr_capacity[] = {
873 cpu_to_be32(max_hotplug_addr >> 32),
874 cpu_to_be32(max_hotplug_addr & 0xffffffff),
875 0, cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE),
876 cpu_to_be32(max_cpus / smp_threads),
877 };
878
879 _FDT(rtas = fdt_add_subnode(fdt, 0, "rtas"));
880
881
882 add_str(hypertas, "hcall-pft");
883 add_str(hypertas, "hcall-term");
884 add_str(hypertas, "hcall-dabr");
885 add_str(hypertas, "hcall-interrupt");
886 add_str(hypertas, "hcall-tce");
887 add_str(hypertas, "hcall-vio");
888 add_str(hypertas, "hcall-splpar");
889 add_str(hypertas, "hcall-bulk");
890 add_str(hypertas, "hcall-set-mode");
891 add_str(hypertas, "hcall-sprg0");
892 add_str(hypertas, "hcall-copy");
893 add_str(hypertas, "hcall-debug");
894 add_str(qemu_hypertas, "hcall-memop1");
895
896 if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
897 add_str(hypertas, "hcall-multi-tce");
898 }
899
900 if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
901 add_str(hypertas, "hcall-hpt-resize");
902 }
903
904 _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
905 hypertas->str, hypertas->len));
906 g_string_free(hypertas, TRUE);
907 _FDT(fdt_setprop(fdt, rtas, "qemu,hypertas-functions",
908 qemu_hypertas->str, qemu_hypertas->len));
909 g_string_free(qemu_hypertas, TRUE);
910
911 _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
912 refpoints, sizeof(refpoints)));
913
914 _FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max",
915 RTAS_ERROR_LOG_MAX));
916 _FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate",
917 RTAS_EVENT_SCAN_RATE));
918
919 if (msi_nonbroken) {
920 _FDT(fdt_setprop(fdt, rtas, "ibm,change-msix-capable", NULL, 0));
921 }
922
923
924
925
926
927
928
929
930 _FDT(fdt_setprop(fdt, rtas, "ibm,extended-os-term", NULL, 0));
931
932 _FDT(fdt_setprop(fdt, rtas, "ibm,lrdr-capacity",
933 lrdr_capacity, sizeof(lrdr_capacity)));
934
935 spapr_dt_rtas_tokens(fdt, rtas);
936}
937
938
939
940
941static void spapr_dt_ov5_platform_support(void *fdt, int chosen)
942{
943 PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
944
945 char val[2 * 4] = {
946 23, 0x00,
947 24, 0x00,
948 25, 0x00,
949 26, 0x40,
950 };
951
952 if (!ppc_check_compat(first_ppc_cpu, CPU_POWERPC_LOGICAL_3_00, 0,
953 first_ppc_cpu->compat_pvr)) {
954
955 val[3] = 0x00;
956 } else if (kvm_enabled()) {
957 if (kvmppc_has_cap_mmu_radix() && kvmppc_has_cap_mmu_hash_v3()) {
958 val[3] = 0x80;
959 } else if (kvmppc_has_cap_mmu_radix()) {
960 val[3] = 0x40;
961 } else {
962 val[3] = 0x00;
963 }
964 } else {
965
966 val[3] = 0xC0;
967 }
968 _FDT(fdt_setprop(fdt, chosen, "ibm,arch-vec-5-platform-support",
969 val, sizeof(val)));
970}
971
972static void spapr_dt_chosen(sPAPRMachineState *spapr, void *fdt)
973{
974 MachineState *machine = MACHINE(spapr);
975 int chosen;
976 const char *boot_device = machine->boot_order;
977 char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus);
978 size_t cb = 0;
979 char *bootlist = get_boot_devices_list(&cb, true);
980
981 _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen"));
982
983 _FDT(fdt_setprop_string(fdt, chosen, "bootargs", machine->kernel_cmdline));
984 _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-start",
985 spapr->initrd_base));
986 _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-end",
987 spapr->initrd_base + spapr->initrd_size));
988
989 if (spapr->kernel_size) {
990 uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
991 cpu_to_be64(spapr->kernel_size) };
992
993 _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel",
994 &kprop, sizeof(kprop)));
995 if (spapr->kernel_le) {
996 _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel-le", NULL, 0));
997 }
998 }
999 if (boot_menu) {
1000 _FDT((fdt_setprop_cell(fdt, chosen, "qemu,boot-menu", boot_menu)));
1001 }
1002 _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-width", graphic_width));
1003 _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-height", graphic_height));
1004 _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-depth", graphic_depth));
1005
1006 if (cb && bootlist) {
1007 int i;
1008
1009 for (i = 0; i < cb; i++) {
1010 if (bootlist[i] == '\n') {
1011 bootlist[i] = ' ';
1012 }
1013 }
1014 _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-list", bootlist));
1015 }
1016
1017 if (boot_device && strlen(boot_device)) {
1018 _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-device", boot_device));
1019 }
1020
1021 if (!spapr->has_graphics && stdout_path) {
1022 _FDT(fdt_setprop_string(fdt, chosen, "linux,stdout-path", stdout_path));
1023 }
1024
1025 spapr_dt_ov5_platform_support(fdt, chosen);
1026
1027 g_free(stdout_path);
1028 g_free(bootlist);
1029}
1030
1031static void spapr_dt_hypervisor(sPAPRMachineState *spapr, void *fdt)
1032{
1033
1034
1035 int hypervisor;
1036 uint8_t hypercall[16];
1037
1038 _FDT(hypervisor = fdt_add_subnode(fdt, 0, "hypervisor"));
1039
1040 _FDT(fdt_setprop_string(fdt, hypervisor, "compatible", "linux,kvm"));
1041 if (kvmppc_has_cap_fixup_hcalls()) {
1042
1043
1044
1045
1046 if (!kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
1047 sizeof(hypercall))) {
1048 _FDT(fdt_setprop(fdt, hypervisor, "hcall-instructions",
1049 hypercall, sizeof(hypercall)));
1050 }
1051 }
1052}
1053
1054static void *spapr_build_fdt(sPAPRMachineState *spapr,
1055 hwaddr rtas_addr,
1056 hwaddr rtas_size)
1057{
1058 MachineState *machine = MACHINE(spapr);
1059 MachineClass *mc = MACHINE_GET_CLASS(machine);
1060 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
1061 int ret;
1062 void *fdt;
1063 sPAPRPHBState *phb;
1064 char *buf;
1065
1066 fdt = g_malloc0(FDT_MAX_SIZE);
1067 _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE)));
1068
1069
1070 _FDT(fdt_setprop_string(fdt, 0, "device_type", "chrp"));
1071 _FDT(fdt_setprop_string(fdt, 0, "model", "IBM pSeries (emulated by qemu)"));
1072 _FDT(fdt_setprop_string(fdt, 0, "compatible", "qemu,pseries"));
1073
1074
1075
1076
1077
1078 if (kvmppc_get_host_model(&buf)) {
1079 _FDT(fdt_setprop_string(fdt, 0, "host-model", buf));
1080 g_free(buf);
1081 }
1082 if (kvmppc_get_host_serial(&buf)) {
1083 _FDT(fdt_setprop_string(fdt, 0, "host-serial", buf));
1084 g_free(buf);
1085 }
1086
1087 buf = qemu_uuid_unparse_strdup(&qemu_uuid);
1088
1089 _FDT(fdt_setprop_string(fdt, 0, "vm,uuid", buf));
1090 if (qemu_uuid_set) {
1091 _FDT(fdt_setprop_string(fdt, 0, "system-id", buf));
1092 }
1093 g_free(buf);
1094
1095 if (qemu_get_vm_name()) {
1096 _FDT(fdt_setprop_string(fdt, 0, "ibm,partition-name",
1097 qemu_get_vm_name()));
1098 }
1099
1100 _FDT(fdt_setprop_cell(fdt, 0, "#address-cells", 2));
1101 _FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2));
1102
1103
1104 spapr_dt_xics(xics_max_server_number(), fdt, PHANDLE_XICP);
1105
1106 ret = spapr_populate_memory(spapr, fdt);
1107 if (ret < 0) {
1108 error_report("couldn't setup memory nodes in fdt");
1109 exit(1);
1110 }
1111
1112
1113 spapr_dt_vdevice(spapr->vio_bus, fdt);
1114
1115 if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)) {
1116 ret = spapr_rng_populate_dt(fdt);
1117 if (ret < 0) {
1118 error_report("could not set up rng device in the fdt");
1119 exit(1);
1120 }
1121 }
1122
1123 QLIST_FOREACH(phb, &spapr->phbs, list) {
1124 ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt);
1125 if (ret < 0) {
1126 error_report("couldn't setup PCI devices in fdt");
1127 exit(1);
1128 }
1129 }
1130
1131
1132 spapr_populate_cpus_dt_node(fdt, spapr);
1133
1134 if (smc->dr_lmb_enabled) {
1135 _FDT(spapr_drc_populate_dt(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_LMB));
1136 }
1137
1138 if (mc->has_hotpluggable_cpus) {
1139 int offset = fdt_path_offset(fdt, "/cpus");
1140 ret = spapr_drc_populate_dt(fdt, offset, NULL,
1141 SPAPR_DR_CONNECTOR_TYPE_CPU);
1142 if (ret < 0) {
1143 error_report("Couldn't set up CPU DR device tree properties");
1144 exit(1);
1145 }
1146 }
1147
1148
1149 spapr_dt_events(spapr, fdt);
1150
1151
1152 spapr_dt_rtas(spapr, fdt);
1153
1154
1155 spapr_dt_chosen(spapr, fdt);
1156
1157
1158 if (kvm_enabled()) {
1159 spapr_dt_hypervisor(spapr, fdt);
1160 }
1161
1162
1163 if (spapr->kernel_size) {
1164 _FDT((fdt_add_mem_rsv(fdt, KERNEL_LOAD_ADDR, spapr->kernel_size)));
1165 }
1166 if (spapr->initrd_size) {
1167 _FDT((fdt_add_mem_rsv(fdt, spapr->initrd_base, spapr->initrd_size)));
1168 }
1169
1170
1171 ret = spapr_dt_cas_updates(spapr, fdt, spapr->ov5_cas);
1172 if (ret < 0) {
1173 error_report("couldn't setup CAS properties fdt");
1174 exit(1);
1175 }
1176
1177 return fdt;
1178}
1179
1180static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
1181{
1182 return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
1183}
1184
1185static void emulate_spapr_hypercall(PPCVirtualHypervisor *vhyp,
1186 PowerPCCPU *cpu)
1187{
1188 CPUPPCState *env = &cpu->env;
1189
1190
1191 g_assert(qemu_mutex_iothread_locked());
1192
1193 if (msr_pr) {
1194 hcall_dprintf("Hypercall made with MSR[PR]=1\n");
1195 env->gpr[3] = H_PRIVILEGE;
1196 } else {
1197 env->gpr[3] = spapr_hypercall(cpu, env->gpr[3], &env->gpr[4]);
1198 }
1199}
1200
1201static uint64_t spapr_get_patbe(PPCVirtualHypervisor *vhyp)
1202{
1203 sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
1204
1205 return spapr->patb_entry;
1206}
1207
1208#define HPTE(_table, _i) (void *)(((uint64_t *)(_table)) + ((_i) * 2))
1209#define HPTE_VALID(_hpte) (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID)
1210#define HPTE_DIRTY(_hpte) (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY)
1211#define CLEAN_HPTE(_hpte) ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
1212#define DIRTY_HPTE(_hpte) ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY))
1213
1214
1215
1216
1217static int get_htab_fd(sPAPRMachineState *spapr)
1218{
1219 Error *local_err = NULL;
1220
1221 if (spapr->htab_fd >= 0) {
1222 return spapr->htab_fd;
1223 }
1224
1225 spapr->htab_fd = kvmppc_get_htab_fd(false, 0, &local_err);
1226 if (spapr->htab_fd < 0) {
1227 error_report_err(local_err);
1228 }
1229
1230 return spapr->htab_fd;
1231}
1232
1233void close_htab_fd(sPAPRMachineState *spapr)
1234{
1235 if (spapr->htab_fd >= 0) {
1236 close(spapr->htab_fd);
1237 }
1238 spapr->htab_fd = -1;
1239}
1240
1241static hwaddr spapr_hpt_mask(PPCVirtualHypervisor *vhyp)
1242{
1243 sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
1244
1245 return HTAB_SIZE(spapr) / HASH_PTEG_SIZE_64 - 1;
1246}
1247
1248static target_ulong spapr_encode_hpt_for_kvm_pr(PPCVirtualHypervisor *vhyp)
1249{
1250 sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
1251
1252 assert(kvm_enabled());
1253
1254 if (!spapr->htab) {
1255 return 0;
1256 }
1257
1258 return (target_ulong)(uintptr_t)spapr->htab | (spapr->htab_shift - 18);
1259}
1260
1261static const ppc_hash_pte64_t *spapr_map_hptes(PPCVirtualHypervisor *vhyp,
1262 hwaddr ptex, int n)
1263{
1264 sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
1265 hwaddr pte_offset = ptex * HASH_PTE_SIZE_64;
1266
1267 if (!spapr->htab) {
1268
1269
1270
1271 ppc_hash_pte64_t *hptes = g_malloc(n * HASH_PTE_SIZE_64);
1272 kvmppc_read_hptes(hptes, ptex, n);
1273 return hptes;
1274 }
1275
1276
1277
1278
1279
1280 return (const ppc_hash_pte64_t *)(spapr->htab + pte_offset);
1281}
1282
1283static void spapr_unmap_hptes(PPCVirtualHypervisor *vhyp,
1284 const ppc_hash_pte64_t *hptes,
1285 hwaddr ptex, int n)
1286{
1287 sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
1288
1289 if (!spapr->htab) {
1290 g_free((void *)hptes);
1291 }
1292
1293
1294}
1295
1296static void spapr_store_hpte(PPCVirtualHypervisor *vhyp, hwaddr ptex,
1297 uint64_t pte0, uint64_t pte1)
1298{
1299 sPAPRMachineState *spapr = SPAPR_MACHINE(vhyp);
1300 hwaddr offset = ptex * HASH_PTE_SIZE_64;
1301
1302 if (!spapr->htab) {
1303 kvmppc_write_hpte(ptex, pte0, pte1);
1304 } else {
1305 stq_p(spapr->htab + offset, pte0);
1306 stq_p(spapr->htab + offset + HASH_PTE_SIZE_64 / 2, pte1);
1307 }
1308}
1309
1310int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
1311{
1312 int shift;
1313
1314
1315
1316
1317 shift = ctz64(pow2ceil(ramsize)) - 7;
1318 shift = MAX(shift, 18);
1319 shift = MIN(shift, 46);
1320 return shift;
1321}
1322
1323void spapr_free_hpt(sPAPRMachineState *spapr)
1324{
1325 g_free(spapr->htab);
1326 spapr->htab = NULL;
1327 spapr->htab_shift = 0;
1328 close_htab_fd(spapr);
1329}
1330
1331void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
1332 Error **errp)
1333{
1334 long rc;
1335
1336
1337 spapr_free_hpt(spapr);
1338
1339 rc = kvmppc_reset_htab(shift);
1340 if (rc < 0) {
1341
1342 error_setg_errno(errp, errno,
1343 "Failed to allocate KVM HPT of order %d (try smaller maxmem?)",
1344 shift);
1345
1346
1347 } else if (rc > 0) {
1348
1349 if (rc != shift) {
1350 error_setg(errp,
1351 "Requested order %d HPT, but kernel allocated order %ld (try smaller maxmem?)",
1352 shift, rc);
1353 }
1354
1355 spapr->htab_shift = shift;
1356 spapr->htab = NULL;
1357 } else {
1358
1359 size_t size = 1ULL << shift;
1360 int i;
1361
1362 spapr->htab = qemu_memalign(size, size);
1363 if (!spapr->htab) {
1364 error_setg_errno(errp, errno,
1365 "Could not allocate HPT of order %d", shift);
1366 return;
1367 }
1368
1369 memset(spapr->htab, 0, size);
1370 spapr->htab_shift = shift;
1371
1372 for (i = 0; i < size / HASH_PTE_SIZE_64; i++) {
1373 DIRTY_HPTE(HPTE(spapr->htab, i));
1374 }
1375 }
1376
1377 spapr->patb_entry = 0;
1378}
1379
1380void spapr_setup_hpt_and_vrma(sPAPRMachineState *spapr)
1381{
1382 int hpt_shift;
1383
1384 if ((spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED)
1385 || (spapr->cas_reboot
1386 && !spapr_ovec_test(spapr->ov5_cas, OV5_HPT_RESIZE))) {
1387 hpt_shift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size);
1388 } else {
1389 uint64_t current_ram_size;
1390
1391 current_ram_size = MACHINE(spapr)->ram_size + get_plugged_memory_size();
1392 hpt_shift = spapr_hpt_shift_for_ramsize(current_ram_size);
1393 }
1394 spapr_reallocate_hpt(spapr, hpt_shift, &error_fatal);
1395
1396 if (spapr->vrma_adjust) {
1397 spapr->rma_size = kvmppc_rma_size(spapr_node0_size(MACHINE(spapr)),
1398 spapr->htab_shift);
1399 }
1400}
1401
1402static void find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
1403{
1404 bool matched = false;
1405
1406 if (object_dynamic_cast(OBJECT(sbdev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
1407 matched = true;
1408 }
1409
1410 if (!matched) {
1411 error_report("Device %s is not supported by this machine yet.",
1412 qdev_fw_name(DEVICE(sbdev)));
1413 exit(1);
1414 }
1415}
1416
1417static int spapr_reset_drcs(Object *child, void *opaque)
1418{
1419 sPAPRDRConnector *drc =
1420 (sPAPRDRConnector *) object_dynamic_cast(child,
1421 TYPE_SPAPR_DR_CONNECTOR);
1422
1423 if (drc) {
1424 spapr_drc_reset(drc);
1425 }
1426
1427 return 0;
1428}
1429
1430static void ppc_spapr_reset(void)
1431{
1432 MachineState *machine = MACHINE(qdev_get_machine());
1433 sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
1434 PowerPCCPU *first_ppc_cpu;
1435 uint32_t rtas_limit;
1436 hwaddr rtas_addr, fdt_addr;
1437 void *fdt;
1438 int rc;
1439
1440
1441 foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL);
1442
1443 if (kvm_enabled() && kvmppc_has_cap_mmu_radix()) {
1444
1445
1446
1447 spapr->patb_entry = PATBE1_GR;
1448 } else {
1449 spapr_setup_hpt_and_vrma(spapr);
1450 }
1451
1452 qemu_devices_reset();
1453
1454
1455
1456
1457
1458
1459 object_child_foreach_recursive(object_get_root(), spapr_reset_drcs, NULL);
1460
1461 spapr_clear_pending_events(spapr);
1462
1463
1464
1465
1466
1467
1468 rtas_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR);
1469 rtas_addr = rtas_limit - RTAS_MAX_SIZE;
1470 fdt_addr = rtas_addr - FDT_MAX_SIZE;
1471
1472
1473
1474 if (!spapr->cas_reboot) {
1475 spapr_ovec_cleanup(spapr->ov5_cas);
1476 spapr->ov5_cas = spapr_ovec_new();
1477
1478 ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
1479 }
1480
1481 fdt = spapr_build_fdt(spapr, rtas_addr, spapr->rtas_size);
1482
1483 spapr_load_rtas(spapr, fdt, rtas_addr);
1484
1485 rc = fdt_pack(fdt);
1486
1487
1488 assert(rc == 0);
1489
1490 if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
1491 error_report("FDT too big ! 0x%x bytes (max is 0x%x)",
1492 fdt_totalsize(fdt), FDT_MAX_SIZE);
1493 exit(1);
1494 }
1495
1496
1497 qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
1498 cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
1499 g_free(fdt);
1500
1501
1502 first_ppc_cpu = POWERPC_CPU(first_cpu);
1503 first_ppc_cpu->env.gpr[3] = fdt_addr;
1504 first_ppc_cpu->env.gpr[5] = 0;
1505 first_cpu->halted = 0;
1506 first_ppc_cpu->env.nip = SPAPR_ENTRY_POINT;
1507
1508 spapr->cas_reboot = false;
1509}
1510
1511static void spapr_create_nvram(sPAPRMachineState *spapr)
1512{
1513 DeviceState *dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram");
1514 DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0);
1515
1516 if (dinfo) {
1517 qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(dinfo),
1518 &error_fatal);
1519 }
1520
1521 qdev_init_nofail(dev);
1522
1523 spapr->nvram = (struct sPAPRNVRAM *)dev;
1524}
1525
1526static void spapr_rtc_create(sPAPRMachineState *spapr)
1527{
1528 object_initialize(&spapr->rtc, sizeof(spapr->rtc), TYPE_SPAPR_RTC);
1529 object_property_add_child(OBJECT(spapr), "rtc", OBJECT(&spapr->rtc),
1530 &error_fatal);
1531 object_property_set_bool(OBJECT(&spapr->rtc), true, "realized",
1532 &error_fatal);
1533 object_property_add_alias(OBJECT(spapr), "rtc-time", OBJECT(&spapr->rtc),
1534 "date", &error_fatal);
1535}
1536
1537
1538static bool spapr_vga_init(PCIBus *pci_bus, Error **errp)
1539{
1540 switch (vga_interface_type) {
1541 case VGA_NONE:
1542 return false;
1543 case VGA_DEVICE:
1544 return true;
1545 case VGA_STD:
1546 case VGA_VIRTIO:
1547 return pci_vga_init(pci_bus) != NULL;
1548 default:
1549 error_setg(errp,
1550 "Unsupported VGA mode, only -vga std or -vga virtio is supported");
1551 return false;
1552 }
1553}
1554
1555static int spapr_post_load(void *opaque, int version_id)
1556{
1557 sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
1558 int err = 0;
1559
1560 if (!object_dynamic_cast(OBJECT(spapr->ics), TYPE_ICS_KVM)) {
1561 CPUState *cs;
1562 CPU_FOREACH(cs) {
1563 PowerPCCPU *cpu = POWERPC_CPU(cs);
1564 icp_resend(ICP(cpu->intc));
1565 }
1566 }
1567
1568
1569
1570
1571
1572 if (version_id < 3) {
1573 err = spapr_rtc_import_offset(&spapr->rtc, spapr->rtc_offset);
1574 }
1575
1576 if (kvm_enabled() && spapr->patb_entry) {
1577 PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
1578 bool radix = !!(spapr->patb_entry & PATBE1_GR);
1579 bool gtse = !!(cpu->env.spr[SPR_LPCR] & LPCR_GTSE);
1580
1581 err = kvmppc_configure_v3_mmu(cpu, radix, gtse, spapr->patb_entry);
1582 if (err) {
1583 error_report("Process table config unsupported by the host");
1584 return -EINVAL;
1585 }
1586 }
1587
1588 return err;
1589}
1590
1591static bool version_before_3(void *opaque, int version_id)
1592{
1593 return version_id < 3;
1594}
1595
1596static bool spapr_pending_events_needed(void *opaque)
1597{
1598 sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
1599 return !QTAILQ_EMPTY(&spapr->pending_events);
1600}
1601
1602static const VMStateDescription vmstate_spapr_event_entry = {
1603 .name = "spapr_event_log_entry",
1604 .version_id = 1,
1605 .minimum_version_id = 1,
1606 .fields = (VMStateField[]) {
1607 VMSTATE_UINT32(summary, sPAPREventLogEntry),
1608 VMSTATE_UINT32(extended_length, sPAPREventLogEntry),
1609 VMSTATE_VBUFFER_ALLOC_UINT32(extended_log, sPAPREventLogEntry, 0,
1610 NULL, extended_length),
1611 VMSTATE_END_OF_LIST()
1612 },
1613};
1614
1615static const VMStateDescription vmstate_spapr_pending_events = {
1616 .name = "spapr_pending_events",
1617 .version_id = 1,
1618 .minimum_version_id = 1,
1619 .needed = spapr_pending_events_needed,
1620 .fields = (VMStateField[]) {
1621 VMSTATE_QTAILQ_V(pending_events, sPAPRMachineState, 1,
1622 vmstate_spapr_event_entry, sPAPREventLogEntry, next),
1623 VMSTATE_END_OF_LIST()
1624 },
1625};
1626
1627static bool spapr_ov5_cas_needed(void *opaque)
1628{
1629 sPAPRMachineState *spapr = opaque;
1630 sPAPROptionVector *ov5_mask = spapr_ovec_new();
1631 sPAPROptionVector *ov5_legacy = spapr_ovec_new();
1632 sPAPROptionVector *ov5_removed = spapr_ovec_new();
1633 bool cas_needed;
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659 spapr_ovec_set(ov5_mask, OV5_FORM1_AFFINITY);
1660 spapr_ovec_set(ov5_mask, OV5_DRCONF_MEMORY);
1661
1662
1663
1664
1665
1666
1667 spapr_ovec_intersect(ov5_legacy, spapr->ov5, ov5_mask);
1668 cas_needed = spapr_ovec_diff(ov5_removed, spapr->ov5, ov5_legacy);
1669
1670 spapr_ovec_cleanup(ov5_mask);
1671 spapr_ovec_cleanup(ov5_legacy);
1672 spapr_ovec_cleanup(ov5_removed);
1673
1674 return cas_needed;
1675}
1676
1677static const VMStateDescription vmstate_spapr_ov5_cas = {
1678 .name = "spapr_option_vector_ov5_cas",
1679 .version_id = 1,
1680 .minimum_version_id = 1,
1681 .needed = spapr_ov5_cas_needed,
1682 .fields = (VMStateField[]) {
1683 VMSTATE_STRUCT_POINTER_V(ov5_cas, sPAPRMachineState, 1,
1684 vmstate_spapr_ovec, sPAPROptionVector),
1685 VMSTATE_END_OF_LIST()
1686 },
1687};
1688
1689static bool spapr_patb_entry_needed(void *opaque)
1690{
1691 sPAPRMachineState *spapr = opaque;
1692
1693 return !!spapr->patb_entry;
1694}
1695
1696static const VMStateDescription vmstate_spapr_patb_entry = {
1697 .name = "spapr_patb_entry",
1698 .version_id = 1,
1699 .minimum_version_id = 1,
1700 .needed = spapr_patb_entry_needed,
1701 .fields = (VMStateField[]) {
1702 VMSTATE_UINT64(patb_entry, sPAPRMachineState),
1703 VMSTATE_END_OF_LIST()
1704 },
1705};
1706
1707static const VMStateDescription vmstate_spapr = {
1708 .name = "spapr",
1709 .version_id = 3,
1710 .minimum_version_id = 1,
1711 .post_load = spapr_post_load,
1712 .fields = (VMStateField[]) {
1713
1714 VMSTATE_UNUSED_BUFFER(version_before_3, 0, 4),
1715
1716
1717 VMSTATE_UINT64_TEST(rtc_offset, sPAPRMachineState, version_before_3),
1718
1719 VMSTATE_PPC_TIMEBASE_V(tb, sPAPRMachineState, 2),
1720 VMSTATE_END_OF_LIST()
1721 },
1722 .subsections = (const VMStateDescription*[]) {
1723 &vmstate_spapr_ov5_cas,
1724 &vmstate_spapr_patb_entry,
1725 &vmstate_spapr_pending_events,
1726 NULL
1727 }
1728};
1729
1730static int htab_save_setup(QEMUFile *f, void *opaque)
1731{
1732 sPAPRMachineState *spapr = opaque;
1733
1734
1735 if (!spapr->htab_shift) {
1736 qemu_put_be32(f, -1);
1737 } else {
1738 qemu_put_be32(f, spapr->htab_shift);
1739 }
1740
1741 if (spapr->htab) {
1742 spapr->htab_save_index = 0;
1743 spapr->htab_first_pass = true;
1744 } else {
1745 if (spapr->htab_shift) {
1746 assert(kvm_enabled());
1747 }
1748 }
1749
1750
1751 return 0;
1752}
1753
1754static void htab_save_chunk(QEMUFile *f, sPAPRMachineState *spapr,
1755 int chunkstart, int n_valid, int n_invalid)
1756{
1757 qemu_put_be32(f, chunkstart);
1758 qemu_put_be16(f, n_valid);
1759 qemu_put_be16(f, n_invalid);
1760 qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
1761 HASH_PTE_SIZE_64 * n_valid);
1762}
1763
1764static void htab_save_end_marker(QEMUFile *f)
1765{
1766 qemu_put_be32(f, 0);
1767 qemu_put_be16(f, 0);
1768 qemu_put_be16(f, 0);
1769}
1770
1771static void htab_save_first_pass(QEMUFile *f, sPAPRMachineState *spapr,
1772 int64_t max_ns)
1773{
1774 bool has_timeout = max_ns != -1;
1775 int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
1776 int index = spapr->htab_save_index;
1777 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1778
1779 assert(spapr->htab_first_pass);
1780
1781 do {
1782 int chunkstart;
1783
1784
1785 while ((index < htabslots)
1786 && !HPTE_VALID(HPTE(spapr->htab, index))) {
1787 CLEAN_HPTE(HPTE(spapr->htab, index));
1788 index++;
1789 }
1790
1791
1792 chunkstart = index;
1793 while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
1794 && HPTE_VALID(HPTE(spapr->htab, index))) {
1795 CLEAN_HPTE(HPTE(spapr->htab, index));
1796 index++;
1797 }
1798
1799 if (index > chunkstart) {
1800 int n_valid = index - chunkstart;
1801
1802 htab_save_chunk(f, spapr, chunkstart, n_valid, 0);
1803
1804 if (has_timeout &&
1805 (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
1806 break;
1807 }
1808 }
1809 } while ((index < htabslots) && !qemu_file_rate_limit(f));
1810
1811 if (index >= htabslots) {
1812 assert(index == htabslots);
1813 index = 0;
1814 spapr->htab_first_pass = false;
1815 }
1816 spapr->htab_save_index = index;
1817}
1818
1819static int htab_save_later_pass(QEMUFile *f, sPAPRMachineState *spapr,
1820 int64_t max_ns)
1821{
1822 bool final = max_ns < 0;
1823 int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
1824 int examined = 0, sent = 0;
1825 int index = spapr->htab_save_index;
1826 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1827
1828 assert(!spapr->htab_first_pass);
1829
1830 do {
1831 int chunkstart, invalidstart;
1832
1833
1834 while ((index < htabslots)
1835 && !HPTE_DIRTY(HPTE(spapr->htab, index))) {
1836 index++;
1837 examined++;
1838 }
1839
1840 chunkstart = index;
1841
1842 while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
1843 && HPTE_DIRTY(HPTE(spapr->htab, index))
1844 && HPTE_VALID(HPTE(spapr->htab, index))) {
1845 CLEAN_HPTE(HPTE(spapr->htab, index));
1846 index++;
1847 examined++;
1848 }
1849
1850 invalidstart = index;
1851
1852 while ((index < htabslots) && (index - invalidstart < USHRT_MAX)
1853 && HPTE_DIRTY(HPTE(spapr->htab, index))
1854 && !HPTE_VALID(HPTE(spapr->htab, index))) {
1855 CLEAN_HPTE(HPTE(spapr->htab, index));
1856 index++;
1857 examined++;
1858 }
1859
1860 if (index > chunkstart) {
1861 int n_valid = invalidstart - chunkstart;
1862 int n_invalid = index - invalidstart;
1863
1864 htab_save_chunk(f, spapr, chunkstart, n_valid, n_invalid);
1865 sent += index - chunkstart;
1866
1867 if (!final && (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
1868 break;
1869 }
1870 }
1871
1872 if (examined >= htabslots) {
1873 break;
1874 }
1875
1876 if (index >= htabslots) {
1877 assert(index == htabslots);
1878 index = 0;
1879 }
1880 } while ((examined < htabslots) && (!qemu_file_rate_limit(f) || final));
1881
1882 if (index >= htabslots) {
1883 assert(index == htabslots);
1884 index = 0;
1885 }
1886
1887 spapr->htab_save_index = index;
1888
1889 return (examined >= htabslots) && (sent == 0) ? 1 : 0;
1890}
1891
1892#define MAX_ITERATION_NS 5000000
1893#define MAX_KVM_BUF_SIZE 2048
1894
1895static int htab_save_iterate(QEMUFile *f, void *opaque)
1896{
1897 sPAPRMachineState *spapr = opaque;
1898 int fd;
1899 int rc = 0;
1900
1901
1902 if (!spapr->htab_shift) {
1903 qemu_put_be32(f, -1);
1904 return 1;
1905 } else {
1906 qemu_put_be32(f, 0);
1907 }
1908
1909 if (!spapr->htab) {
1910 assert(kvm_enabled());
1911
1912 fd = get_htab_fd(spapr);
1913 if (fd < 0) {
1914 return fd;
1915 }
1916
1917 rc = kvmppc_save_htab(f, fd, MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
1918 if (rc < 0) {
1919 return rc;
1920 }
1921 } else if (spapr->htab_first_pass) {
1922 htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
1923 } else {
1924 rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
1925 }
1926
1927 htab_save_end_marker(f);
1928
1929 return rc;
1930}
1931
1932static int htab_save_complete(QEMUFile *f, void *opaque)
1933{
1934 sPAPRMachineState *spapr = opaque;
1935 int fd;
1936
1937
1938 if (!spapr->htab_shift) {
1939 qemu_put_be32(f, -1);
1940 return 0;
1941 } else {
1942 qemu_put_be32(f, 0);
1943 }
1944
1945 if (!spapr->htab) {
1946 int rc;
1947
1948 assert(kvm_enabled());
1949
1950 fd = get_htab_fd(spapr);
1951 if (fd < 0) {
1952 return fd;
1953 }
1954
1955 rc = kvmppc_save_htab(f, fd, MAX_KVM_BUF_SIZE, -1);
1956 if (rc < 0) {
1957 return rc;
1958 }
1959 } else {
1960 if (spapr->htab_first_pass) {
1961 htab_save_first_pass(f, spapr, -1);
1962 }
1963 htab_save_later_pass(f, spapr, -1);
1964 }
1965
1966
1967 htab_save_end_marker(f);
1968
1969 return 0;
1970}
1971
1972static int htab_load(QEMUFile *f, void *opaque, int version_id)
1973{
1974 sPAPRMachineState *spapr = opaque;
1975 uint32_t section_hdr;
1976 int fd = -1;
1977 Error *local_err = NULL;
1978
1979 if (version_id < 1 || version_id > 1) {
1980 error_report("htab_load() bad version");
1981 return -EINVAL;
1982 }
1983
1984 section_hdr = qemu_get_be32(f);
1985
1986 if (section_hdr == -1) {
1987 spapr_free_hpt(spapr);
1988 return 0;
1989 }
1990
1991 if (section_hdr) {
1992
1993 spapr_reallocate_hpt(spapr, section_hdr, &local_err);
1994 if (local_err) {
1995 error_report_err(local_err);
1996 return -EINVAL;
1997 }
1998 return 0;
1999 }
2000
2001 if (!spapr->htab) {
2002 assert(kvm_enabled());
2003
2004 fd = kvmppc_get_htab_fd(true, 0, &local_err);
2005 if (fd < 0) {
2006 error_report_err(local_err);
2007 return fd;
2008 }
2009 }
2010
2011 while (true) {
2012 uint32_t index;
2013 uint16_t n_valid, n_invalid;
2014
2015 index = qemu_get_be32(f);
2016 n_valid = qemu_get_be16(f);
2017 n_invalid = qemu_get_be16(f);
2018
2019 if ((index == 0) && (n_valid == 0) && (n_invalid == 0)) {
2020
2021 break;
2022 }
2023
2024 if ((index + n_valid + n_invalid) >
2025 (HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) {
2026
2027 error_report(
2028 "htab_load() bad index %d (%hd+%hd entries) in htab stream (htab_shift=%d)",
2029 index, n_valid, n_invalid, spapr->htab_shift);
2030 return -EINVAL;
2031 }
2032
2033 if (spapr->htab) {
2034 if (n_valid) {
2035 qemu_get_buffer(f, HPTE(spapr->htab, index),
2036 HASH_PTE_SIZE_64 * n_valid);
2037 }
2038 if (n_invalid) {
2039 memset(HPTE(spapr->htab, index + n_valid), 0,
2040 HASH_PTE_SIZE_64 * n_invalid);
2041 }
2042 } else {
2043 int rc;
2044
2045 assert(fd >= 0);
2046
2047 rc = kvmppc_load_htab_chunk(f, fd, index, n_valid, n_invalid);
2048 if (rc < 0) {
2049 return rc;
2050 }
2051 }
2052 }
2053
2054 if (!spapr->htab) {
2055 assert(fd >= 0);
2056 close(fd);
2057 }
2058
2059 return 0;
2060}
2061
2062static void htab_save_cleanup(void *opaque)
2063{
2064 sPAPRMachineState *spapr = opaque;
2065
2066 close_htab_fd(spapr);
2067}
2068
2069static SaveVMHandlers savevm_htab_handlers = {
2070 .save_setup = htab_save_setup,
2071 .save_live_iterate = htab_save_iterate,
2072 .save_live_complete_precopy = htab_save_complete,
2073 .save_cleanup = htab_save_cleanup,
2074 .load_state = htab_load,
2075};
2076
2077static void spapr_boot_set(void *opaque, const char *boot_device,
2078 Error **errp)
2079{
2080 MachineState *machine = MACHINE(opaque);
2081 machine->boot_order = g_strdup(boot_device);
2082}
2083
2084static void spapr_create_lmb_dr_connectors(sPAPRMachineState *spapr)
2085{
2086 MachineState *machine = MACHINE(spapr);
2087 uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
2088 uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size;
2089 int i;
2090
2091 for (i = 0; i < nr_lmbs; i++) {
2092 uint64_t addr;
2093
2094 addr = i * lmb_size + spapr->hotplug_memory.base;
2095 spapr_dr_connector_new(OBJECT(spapr), TYPE_SPAPR_DRC_LMB,
2096 addr / lmb_size);
2097 }
2098}
2099
2100
2101
2102
2103
2104
2105static void spapr_validate_node_memory(MachineState *machine, Error **errp)
2106{
2107 int i;
2108
2109 if (machine->ram_size % SPAPR_MEMORY_BLOCK_SIZE) {
2110 error_setg(errp, "Memory size 0x" RAM_ADDR_FMT
2111 " is not aligned to %llu MiB",
2112 machine->ram_size,
2113 SPAPR_MEMORY_BLOCK_SIZE / M_BYTE);
2114 return;
2115 }
2116
2117 if (machine->maxram_size % SPAPR_MEMORY_BLOCK_SIZE) {
2118 error_setg(errp, "Maximum memory size 0x" RAM_ADDR_FMT
2119 " is not aligned to %llu MiB",
2120 machine->ram_size,
2121 SPAPR_MEMORY_BLOCK_SIZE / M_BYTE);
2122 return;
2123 }
2124
2125 for (i = 0; i < nb_numa_nodes; i++) {
2126 if (numa_info[i].node_mem % SPAPR_MEMORY_BLOCK_SIZE) {
2127 error_setg(errp,
2128 "Node %d memory size 0x%" PRIx64
2129 " is not aligned to %llu MiB",
2130 i, numa_info[i].node_mem,
2131 SPAPR_MEMORY_BLOCK_SIZE / M_BYTE);
2132 return;
2133 }
2134 }
2135}
2136
2137
2138static CPUArchId *spapr_find_cpu_slot(MachineState *ms, uint32_t id, int *idx)
2139{
2140 int index = id / smp_threads;
2141
2142 if (index >= ms->possible_cpus->len) {
2143 return NULL;
2144 }
2145 if (idx) {
2146 *idx = index;
2147 }
2148 return &ms->possible_cpus->cpus[index];
2149}
2150
2151static void spapr_init_cpus(sPAPRMachineState *spapr)
2152{
2153 MachineState *machine = MACHINE(spapr);
2154 MachineClass *mc = MACHINE_GET_CLASS(machine);
2155 const char *type = spapr_get_cpu_core_type(machine->cpu_type);
2156 int smt = kvmppc_smt_threads();
2157 const CPUArchIdList *possible_cpus;
2158 int boot_cores_nr = smp_cpus / smp_threads;
2159 int i;
2160
2161 if (!type) {
2162 error_report("Unable to find sPAPR CPU Core definition");
2163 exit(1);
2164 }
2165
2166 possible_cpus = mc->possible_cpu_arch_ids(machine);
2167 if (mc->has_hotpluggable_cpus) {
2168 if (smp_cpus % smp_threads) {
2169 error_report("smp_cpus (%u) must be multiple of threads (%u)",
2170 smp_cpus, smp_threads);
2171 exit(1);
2172 }
2173 if (max_cpus % smp_threads) {
2174 error_report("max_cpus (%u) must be multiple of threads (%u)",
2175 max_cpus, smp_threads);
2176 exit(1);
2177 }
2178 } else {
2179 if (max_cpus != smp_cpus) {
2180 error_report("This machine version does not support CPU hotplug");
2181 exit(1);
2182 }
2183 boot_cores_nr = possible_cpus->len;
2184 }
2185
2186 for (i = 0; i < possible_cpus->len; i++) {
2187 int core_id = i * smp_threads;
2188
2189 if (mc->has_hotpluggable_cpus) {
2190 spapr_dr_connector_new(OBJECT(spapr), TYPE_SPAPR_DRC_CPU,
2191 (core_id / smp_threads) * smt);
2192 }
2193
2194 if (i < boot_cores_nr) {
2195 Object *core = object_new(type);
2196 int nr_threads = smp_threads;
2197
2198
2199 if ((i + 1) * smp_threads >= smp_cpus) {
2200 nr_threads = smp_cpus - i * smp_threads;
2201 }
2202
2203 object_property_set_int(core, nr_threads, "nr-threads",
2204 &error_fatal);
2205 object_property_set_int(core, core_id, CPU_CORE_PROP_CORE_ID,
2206 &error_fatal);
2207 object_property_set_bool(core, true, "realized", &error_fatal);
2208 }
2209 }
2210}
2211
2212static void spapr_set_vsmt_mode(sPAPRMachineState *spapr, Error **errp)
2213{
2214 Error *local_err = NULL;
2215 bool vsmt_user = !!spapr->vsmt;
2216 int kvm_smt = kvmppc_smt_threads();
2217 int ret;
2218
2219 if (!kvm_enabled() && (smp_threads > 1)) {
2220 error_setg(&local_err, "TCG cannot support more than 1 thread/core "
2221 "on a pseries machine");
2222 goto out;
2223 }
2224 if (!is_power_of_2(smp_threads)) {
2225 error_setg(&local_err, "Cannot support %d threads/core on a pseries "
2226 "machine because it must be a power of 2", smp_threads);
2227 goto out;
2228 }
2229
2230
2231 if (vsmt_user) {
2232 if (spapr->vsmt < smp_threads) {
2233 error_setg(&local_err, "Cannot support VSMT mode %d"
2234 " because it must be >= threads/core (%d)",
2235 spapr->vsmt, smp_threads);
2236 goto out;
2237 }
2238
2239 } else {
2240
2241
2242 spapr->vsmt = MAX(kvm_smt, smp_threads);
2243 }
2244
2245
2246 if (kvm_enabled() && (spapr->vsmt != kvm_smt)) {
2247 ret = kvmppc_set_smt_threads(spapr->vsmt);
2248 if (ret) {
2249 error_setg(&local_err,
2250 "Failed to set KVM's VSMT mode to %d (errno %d)",
2251 spapr->vsmt, ret);
2252 if (!vsmt_user) {
2253 error_append_hint(&local_err, "On PPC, a VM with %d threads/"
2254 "core on a host with %d threads/core requires "
2255 " the use of VSMT mode %d.\n",
2256 smp_threads, kvm_smt, spapr->vsmt);
2257 }
2258 kvmppc_hint_smt_possible(&local_err);
2259 goto out;
2260 }
2261 }
2262
2263out:
2264 error_propagate(errp, local_err);
2265}
2266
2267
2268static void ppc_spapr_init(MachineState *machine)
2269{
2270 sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
2271 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
2272 const char *kernel_filename = machine->kernel_filename;
2273 const char *initrd_filename = machine->initrd_filename;
2274 PCIHostState *phb;
2275 int i;
2276 MemoryRegion *sysmem = get_system_memory();
2277 MemoryRegion *ram = g_new(MemoryRegion, 1);
2278 MemoryRegion *rma_region;
2279 void *rma = NULL;
2280 hwaddr rma_alloc_size;
2281 hwaddr node0_size = spapr_node0_size(machine);
2282 long load_limit, fw_size;
2283 char *filename;
2284 Error *resize_hpt_err = NULL;
2285
2286 msi_nonbroken = true;
2287
2288 QLIST_INIT(&spapr->phbs);
2289 QTAILQ_INIT(&spapr->pending_dimm_unplugs);
2290
2291
2292 kvmppc_check_papr_resize_hpt(&resize_hpt_err);
2293 if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DEFAULT) {
2294
2295
2296
2297
2298
2299
2300 if (resize_hpt_err) {
2301 spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
2302 error_free(resize_hpt_err);
2303 resize_hpt_err = NULL;
2304 } else {
2305 spapr->resize_hpt = smc->resize_hpt_default;
2306 }
2307 }
2308
2309 assert(spapr->resize_hpt != SPAPR_RESIZE_HPT_DEFAULT);
2310
2311 if ((spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) && resize_hpt_err) {
2312
2313
2314
2315 error_report_err(resize_hpt_err);
2316 exit(1);
2317 }
2318
2319
2320 rma_alloc_size = kvmppc_alloc_rma(&rma);
2321
2322 if (rma_alloc_size == -1) {
2323 error_report("Unable to create RMA");
2324 exit(1);
2325 }
2326
2327 if (rma_alloc_size && (rma_alloc_size < node0_size)) {
2328 spapr->rma_size = rma_alloc_size;
2329 } else {
2330 spapr->rma_size = node0_size;
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341 if (kvm_enabled()) {
2342 spapr->vrma_adjust = 1;
2343 spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
2344 }
2345
2346
2347
2348
2349
2350
2351 spapr->rma_size = MIN(spapr->rma_size, 0x400000000ull);
2352 }
2353
2354 if (spapr->rma_size > node0_size) {
2355 error_report("Numa node 0 has to span the RMA (%#08"HWADDR_PRIx")",
2356 spapr->rma_size);
2357 exit(1);
2358 }
2359
2360
2361 load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
2362
2363
2364 xics_system_init(machine, XICS_IRQS_SPAPR, &error_fatal);
2365
2366
2367
2368 spapr->ov5 = spapr_ovec_new();
2369 spapr->ov5_cas = spapr_ovec_new();
2370
2371 if (smc->dr_lmb_enabled) {
2372 spapr_ovec_set(spapr->ov5, OV5_DRCONF_MEMORY);
2373 spapr_validate_node_memory(machine, &error_fatal);
2374 }
2375
2376 spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY);
2377 if (!kvm_enabled() || kvmppc_has_cap_mmu_radix()) {
2378
2379 spapr_ovec_set(spapr->ov5, OV5_MMU_RADIX_GTSE);
2380 }
2381
2382
2383
2384 if (spapr->use_hotplug_event_source) {
2385 spapr_ovec_set(spapr->ov5, OV5_HP_EVT);
2386 }
2387
2388
2389 if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
2390 spapr_ovec_set(spapr->ov5, OV5_HPT_RESIZE);
2391 }
2392
2393
2394 spapr_set_vsmt_mode(spapr, &error_fatal);
2395
2396 spapr_init_cpus(spapr);
2397
2398 if (kvm_enabled()) {
2399
2400 kvmppc_enable_logical_ci_hcalls();
2401 kvmppc_enable_set_mode_hcall();
2402
2403
2404 kvmppc_enable_clear_ref_mod_hcalls();
2405 }
2406
2407
2408 memory_region_allocate_system_memory(ram, NULL, "ppc_spapr.ram",
2409 machine->ram_size);
2410 memory_region_add_subregion(sysmem, 0, ram);
2411
2412 if (rma_alloc_size && rma) {
2413 rma_region = g_new(MemoryRegion, 1);
2414 memory_region_init_ram_ptr(rma_region, NULL, "ppc_spapr.rma",
2415 rma_alloc_size, rma);
2416 vmstate_register_ram_global(rma_region);
2417 memory_region_add_subregion(sysmem, 0, rma_region);
2418 }
2419
2420
2421 if (machine->ram_size < machine->maxram_size) {
2422 ram_addr_t hotplug_mem_size = machine->maxram_size - machine->ram_size;
2423
2424
2425
2426
2427
2428 int max_memslots = kvm_enabled() ? kvm_get_max_memslots() / 2 :
2429 SPAPR_MAX_RAM_SLOTS;
2430
2431 if (max_memslots < SPAPR_MAX_RAM_SLOTS) {
2432 max_memslots = SPAPR_MAX_RAM_SLOTS;
2433 }
2434 if (machine->ram_slots > max_memslots) {
2435 error_report("Specified number of memory slots %"
2436 PRIu64" exceeds max supported %d",
2437 machine->ram_slots, max_memslots);
2438 exit(1);
2439 }
2440
2441 spapr->hotplug_memory.base = ROUND_UP(machine->ram_size,
2442 SPAPR_HOTPLUG_MEM_ALIGN);
2443 memory_region_init(&spapr->hotplug_memory.mr, OBJECT(spapr),
2444 "hotplug-memory", hotplug_mem_size);
2445 memory_region_add_subregion(sysmem, spapr->hotplug_memory.base,
2446 &spapr->hotplug_memory.mr);
2447 }
2448
2449 if (smc->dr_lmb_enabled) {
2450 spapr_create_lmb_dr_connectors(spapr);
2451 }
2452
2453 filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
2454 if (!filename) {
2455 error_report("Could not find LPAR rtas '%s'", "spapr-rtas.bin");
2456 exit(1);
2457 }
2458 spapr->rtas_size = get_image_size(filename);
2459 if (spapr->rtas_size < 0) {
2460 error_report("Could not get size of LPAR rtas '%s'", filename);
2461 exit(1);
2462 }
2463 spapr->rtas_blob = g_malloc(spapr->rtas_size);
2464 if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
2465 error_report("Could not load LPAR rtas '%s'", filename);
2466 exit(1);
2467 }
2468 if (spapr->rtas_size > RTAS_MAX_SIZE) {
2469 error_report("RTAS too big ! 0x%zx bytes (max is 0x%x)",
2470 (size_t)spapr->rtas_size, RTAS_MAX_SIZE);
2471 exit(1);
2472 }
2473 g_free(filename);
2474
2475
2476 spapr_events_init(spapr);
2477
2478
2479 spapr_rtc_create(spapr);
2480
2481
2482 spapr->vio_bus = spapr_vio_bus_init();
2483
2484 for (i = 0; i < MAX_SERIAL_PORTS; i++) {
2485 if (serial_hds[i]) {
2486 spapr_vty_create(spapr->vio_bus, serial_hds[i]);
2487 }
2488 }
2489
2490
2491 spapr_create_nvram(spapr);
2492
2493
2494 spapr_pci_rtas_init();
2495
2496 phb = spapr_create_phb(spapr, 0);
2497
2498 for (i = 0; i < nb_nics; i++) {
2499 NICInfo *nd = &nd_table[i];
2500
2501 if (!nd->model) {
2502 nd->model = g_strdup("ibmveth");
2503 }
2504
2505 if (strcmp(nd->model, "ibmveth") == 0) {
2506 spapr_vlan_create(spapr->vio_bus, nd);
2507 } else {
2508 pci_nic_init_nofail(&nd_table[i], phb->bus, nd->model, NULL);
2509 }
2510 }
2511
2512 for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
2513 spapr_vscsi_create(spapr->vio_bus);
2514 }
2515
2516
2517 if (spapr_vga_init(phb->bus, &error_fatal)) {
2518 spapr->has_graphics = true;
2519 machine->usb |= defaults_enabled() && !machine->usb_disabled;
2520 }
2521
2522 if (machine->usb) {
2523 if (smc->use_ohci_by_default) {
2524 pci_create_simple(phb->bus, -1, "pci-ohci");
2525 } else {
2526 pci_create_simple(phb->bus, -1, "nec-usb-xhci");
2527 }
2528
2529 if (spapr->has_graphics) {
2530 USBBus *usb_bus = usb_bus_find(-1);
2531
2532 usb_create_simple(usb_bus, "usb-kbd");
2533 usb_create_simple(usb_bus, "usb-mouse");
2534 }
2535 }
2536
2537 if (spapr->rma_size < (MIN_RMA_SLOF << 20)) {
2538 error_report(
2539 "pSeries SLOF firmware requires >= %ldM guest RMA (Real Mode Area memory)",
2540 MIN_RMA_SLOF);
2541 exit(1);
2542 }
2543
2544 if (kernel_filename) {
2545 uint64_t lowaddr = 0;
2546
2547 spapr->kernel_size = load_elf(kernel_filename, translate_kernel_address,
2548 NULL, NULL, &lowaddr, NULL, 1,
2549 PPC_ELF_MACHINE, 0, 0);
2550 if (spapr->kernel_size == ELF_LOAD_WRONG_ENDIAN) {
2551 spapr->kernel_size = load_elf(kernel_filename,
2552 translate_kernel_address, NULL, NULL,
2553 &lowaddr, NULL, 0, PPC_ELF_MACHINE,
2554 0, 0);
2555 spapr->kernel_le = spapr->kernel_size > 0;
2556 }
2557 if (spapr->kernel_size < 0) {
2558 error_report("error loading %s: %s", kernel_filename,
2559 load_elf_strerror(spapr->kernel_size));
2560 exit(1);
2561 }
2562
2563
2564 if (initrd_filename) {
2565
2566
2567
2568 spapr->initrd_base = (KERNEL_LOAD_ADDR + spapr->kernel_size
2569 + 0x1ffff) & ~0xffff;
2570 spapr->initrd_size = load_image_targphys(initrd_filename,
2571 spapr->initrd_base,
2572 load_limit
2573 - spapr->initrd_base);
2574 if (spapr->initrd_size < 0) {
2575 error_report("could not load initial ram disk '%s'",
2576 initrd_filename);
2577 exit(1);
2578 }
2579 }
2580 }
2581
2582 if (bios_name == NULL) {
2583 bios_name = FW_FILE_NAME;
2584 }
2585 filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
2586 if (!filename) {
2587 error_report("Could not find LPAR firmware '%s'", bios_name);
2588 exit(1);
2589 }
2590 fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
2591 if (fw_size <= 0) {
2592 error_report("Could not load LPAR firmware '%s'", filename);
2593 exit(1);
2594 }
2595 g_free(filename);
2596
2597
2598
2599
2600 vmstate_register(NULL, 0, &vmstate_spapr, spapr);
2601 register_savevm_live(NULL, "spapr/htab", -1, 1,
2602 &savevm_htab_handlers, spapr);
2603
2604 qemu_register_boot_set(spapr_boot_set, spapr);
2605
2606 if (kvm_enabled()) {
2607
2608 qemu_add_vm_change_state_handler(cpu_ppc_clock_vm_state_change,
2609 &spapr->tb);
2610
2611 kvmppc_spapr_enable_inkernel_multitce();
2612 }
2613}
2614
2615static int spapr_kvm_type(const char *vm_type)
2616{
2617 if (!vm_type) {
2618 return 0;
2619 }
2620
2621 if (!strcmp(vm_type, "HV")) {
2622 return 1;
2623 }
2624
2625 if (!strcmp(vm_type, "PR")) {
2626 return 2;
2627 }
2628
2629 error_report("Unknown kvm-type specified '%s'", vm_type);
2630 exit(1);
2631}
2632
2633
2634
2635
2636
2637static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus,
2638 DeviceState *dev)
2639{
2640#define CAST(type, obj, name) \
2641 ((type *)object_dynamic_cast(OBJECT(obj), (name)))
2642 SCSIDevice *d = CAST(SCSIDevice, dev, TYPE_SCSI_DEVICE);
2643 sPAPRPHBState *phb = CAST(sPAPRPHBState, dev, TYPE_SPAPR_PCI_HOST_BRIDGE);
2644 VHostSCSICommon *vsc = CAST(VHostSCSICommon, dev, TYPE_VHOST_SCSI_COMMON);
2645
2646 if (d) {
2647 void *spapr = CAST(void, bus->parent, "spapr-vscsi");
2648 VirtIOSCSI *virtio = CAST(VirtIOSCSI, bus->parent, TYPE_VIRTIO_SCSI);
2649 USBDevice *usb = CAST(USBDevice, bus->parent, TYPE_USB_DEVICE);
2650
2651 if (spapr) {
2652
2653
2654
2655
2656
2657 unsigned id = 0x8000 | (d->id << 8) | d->lun;
2658 return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
2659 (uint64_t)id << 48);
2660 } else if (virtio) {
2661
2662
2663
2664
2665
2666
2667
2668 unsigned id = 0x1000000 | (d->id << 16) | d->lun;
2669 if (d->lun >= 256) {
2670
2671 id |= 0x4000;
2672 }
2673 return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
2674 (uint64_t)id << 32);
2675 } else if (usb) {
2676
2677
2678
2679
2680 unsigned usb_port = atoi(usb->port->path);
2681 unsigned id = 0x1000000 | (usb_port << 16) | d->lun;
2682 return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
2683 (uint64_t)id << 32);
2684 }
2685 }
2686
2687
2688
2689
2690
2691
2692
2693 if (strcmp("usb-host", qdev_fw_name(dev)) == 0) {
2694 USBDevice *usbdev = CAST(USBDevice, dev, TYPE_USB_DEVICE);
2695 if (usb_host_dev_is_scsi_storage(usbdev)) {
2696 return g_strdup_printf("storage@%s/disk", usbdev->port->path);
2697 }
2698 }
2699
2700 if (phb) {
2701
2702 return g_strdup_printf("pci@%"PRIX64, phb->buid);
2703 }
2704
2705 if (vsc) {
2706
2707 unsigned id = 0x1000000 | (vsc->target << 16) | vsc->lun;
2708 return g_strdup_printf("disk@%"PRIX64, (uint64_t)id << 32);
2709 }
2710
2711 if (g_str_equal("pci-bridge", qdev_fw_name(dev))) {
2712
2713 PCIDevice *pcidev = CAST(PCIDevice, dev, TYPE_PCI_DEVICE);
2714 return g_strdup_printf("pci@%x", PCI_SLOT(pcidev->devfn));
2715 }
2716
2717 return NULL;
2718}
2719
2720static char *spapr_get_kvm_type(Object *obj, Error **errp)
2721{
2722 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
2723
2724 return g_strdup(spapr->kvm_type);
2725}
2726
2727static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
2728{
2729 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
2730
2731 g_free(spapr->kvm_type);
2732 spapr->kvm_type = g_strdup(value);
2733}
2734
2735static bool spapr_get_modern_hotplug_events(Object *obj, Error **errp)
2736{
2737 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
2738
2739 return spapr->use_hotplug_event_source;
2740}
2741
2742static void spapr_set_modern_hotplug_events(Object *obj, bool value,
2743 Error **errp)
2744{
2745 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
2746
2747 spapr->use_hotplug_event_source = value;
2748}
2749
2750static char *spapr_get_resize_hpt(Object *obj, Error **errp)
2751{
2752 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
2753
2754 switch (spapr->resize_hpt) {
2755 case SPAPR_RESIZE_HPT_DEFAULT:
2756 return g_strdup("default");
2757 case SPAPR_RESIZE_HPT_DISABLED:
2758 return g_strdup("disabled");
2759 case SPAPR_RESIZE_HPT_ENABLED:
2760 return g_strdup("enabled");
2761 case SPAPR_RESIZE_HPT_REQUIRED:
2762 return g_strdup("required");
2763 }
2764 g_assert_not_reached();
2765}
2766
2767static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp)
2768{
2769 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
2770
2771 if (strcmp(value, "default") == 0) {
2772 spapr->resize_hpt = SPAPR_RESIZE_HPT_DEFAULT;
2773 } else if (strcmp(value, "disabled") == 0) {
2774 spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
2775 } else if (strcmp(value, "enabled") == 0) {
2776 spapr->resize_hpt = SPAPR_RESIZE_HPT_ENABLED;
2777 } else if (strcmp(value, "required") == 0) {
2778 spapr->resize_hpt = SPAPR_RESIZE_HPT_REQUIRED;
2779 } else {
2780 error_setg(errp, "Bad value for \"resize-hpt\" property");
2781 }
2782}
2783
2784static void spapr_get_vsmt(Object *obj, Visitor *v, const char *name,
2785 void *opaque, Error **errp)
2786{
2787 visit_type_uint32(v, name, (uint32_t *)opaque, errp);
2788}
2789
2790static void spapr_set_vsmt(Object *obj, Visitor *v, const char *name,
2791 void *opaque, Error **errp)
2792{
2793 visit_type_uint32(v, name, (uint32_t *)opaque, errp);
2794}
2795
2796static void spapr_machine_initfn(Object *obj)
2797{
2798 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
2799
2800 spapr->htab_fd = -1;
2801 spapr->use_hotplug_event_source = true;
2802 object_property_add_str(obj, "kvm-type",
2803 spapr_get_kvm_type, spapr_set_kvm_type, NULL);
2804 object_property_set_description(obj, "kvm-type",
2805 "Specifies the KVM virtualization mode (HV, PR)",
2806 NULL);
2807 object_property_add_bool(obj, "modern-hotplug-events",
2808 spapr_get_modern_hotplug_events,
2809 spapr_set_modern_hotplug_events,
2810 NULL);
2811 object_property_set_description(obj, "modern-hotplug-events",
2812 "Use dedicated hotplug event mechanism in"
2813 " place of standard EPOW events when possible"
2814 " (required for memory hot-unplug support)",
2815 NULL);
2816
2817 ppc_compat_add_property(obj, "max-cpu-compat", &spapr->max_compat_pvr,
2818 "Maximum permitted CPU compatibility mode",
2819 &error_fatal);
2820
2821 object_property_add_str(obj, "resize-hpt",
2822 spapr_get_resize_hpt, spapr_set_resize_hpt, NULL);
2823 object_property_set_description(obj, "resize-hpt",
2824 "Resizing of the Hash Page Table (enabled, disabled, required)",
2825 NULL);
2826 object_property_add(obj, "vsmt", "uint32", spapr_get_vsmt,
2827 spapr_set_vsmt, NULL, &spapr->vsmt, &error_abort);
2828 object_property_set_description(obj, "vsmt",
2829 "Virtual SMT: KVM behaves as if this were"
2830 " the host's SMT mode", &error_abort);
2831}
2832
2833static void spapr_machine_finalizefn(Object *obj)
2834{
2835 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
2836
2837 g_free(spapr->kvm_type);
2838}
2839
2840void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg)
2841{
2842 cpu_synchronize_state(cs);
2843 ppc_cpu_do_system_reset(cs);
2844}
2845
2846static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
2847{
2848 CPUState *cs;
2849
2850 CPU_FOREACH(cs) {
2851 async_run_on_cpu(cs, spapr_do_system_reset_on_cpu, RUN_ON_CPU_NULL);
2852 }
2853}
2854
2855static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
2856 uint32_t node, bool dedicated_hp_event_source,
2857 Error **errp)
2858{
2859 sPAPRDRConnector *drc;
2860 uint32_t nr_lmbs = size/SPAPR_MEMORY_BLOCK_SIZE;
2861 int i, fdt_offset, fdt_size;
2862 void *fdt;
2863 uint64_t addr = addr_start;
2864 bool hotplugged = spapr_drc_hotplugged(dev);
2865 Error *local_err = NULL;
2866
2867 for (i = 0; i < nr_lmbs; i++) {
2868 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
2869 addr / SPAPR_MEMORY_BLOCK_SIZE);
2870 g_assert(drc);
2871
2872 fdt = create_device_tree(&fdt_size);
2873 fdt_offset = spapr_populate_memory_node(fdt, node, addr,
2874 SPAPR_MEMORY_BLOCK_SIZE);
2875
2876 spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err);
2877 if (local_err) {
2878 while (addr > addr_start) {
2879 addr -= SPAPR_MEMORY_BLOCK_SIZE;
2880 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
2881 addr / SPAPR_MEMORY_BLOCK_SIZE);
2882 spapr_drc_detach(drc);
2883 }
2884 g_free(fdt);
2885 error_propagate(errp, local_err);
2886 return;
2887 }
2888 if (!hotplugged) {
2889 spapr_drc_reset(drc);
2890 }
2891 addr += SPAPR_MEMORY_BLOCK_SIZE;
2892 }
2893
2894
2895
2896 if (hotplugged) {
2897 if (dedicated_hp_event_source) {
2898 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
2899 addr_start / SPAPR_MEMORY_BLOCK_SIZE);
2900 spapr_hotplug_req_add_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
2901 nr_lmbs,
2902 spapr_drc_index(drc));
2903 } else {
2904 spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB,
2905 nr_lmbs);
2906 }
2907 }
2908}
2909
2910static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
2911 uint32_t node, Error **errp)
2912{
2913 Error *local_err = NULL;
2914 sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev);
2915 PCDIMMDevice *dimm = PC_DIMM(dev);
2916 PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
2917 MemoryRegion *mr;
2918 uint64_t align, size, addr;
2919
2920 mr = ddc->get_memory_region(dimm, &local_err);
2921 if (local_err) {
2922 goto out;
2923 }
2924 align = memory_region_get_alignment(mr);
2925 size = memory_region_size(mr);
2926
2927 pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err);
2928 if (local_err) {
2929 goto out;
2930 }
2931
2932 addr = object_property_get_uint(OBJECT(dimm),
2933 PC_DIMM_ADDR_PROP, &local_err);
2934 if (local_err) {
2935 goto out_unplug;
2936 }
2937
2938 spapr_add_lmbs(dev, addr, size, node,
2939 spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT),
2940 &local_err);
2941 if (local_err) {
2942 goto out_unplug;
2943 }
2944
2945 return;
2946
2947out_unplug:
2948 pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr);
2949out:
2950 error_propagate(errp, local_err);
2951}
2952
2953static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
2954 Error **errp)
2955{
2956 PCDIMMDevice *dimm = PC_DIMM(dev);
2957 PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
2958 MemoryRegion *mr;
2959 uint64_t size;
2960 char *mem_dev;
2961
2962 mr = ddc->get_memory_region(dimm, errp);
2963 if (!mr) {
2964 return;
2965 }
2966 size = memory_region_size(mr);
2967
2968 if (size % SPAPR_MEMORY_BLOCK_SIZE) {
2969 error_setg(errp, "Hotplugged memory size must be a multiple of "
2970 "%lld MB", SPAPR_MEMORY_BLOCK_SIZE / M_BYTE);
2971 return;
2972 }
2973
2974 mem_dev = object_property_get_str(OBJECT(dimm), PC_DIMM_MEMDEV_PROP, NULL);
2975 if (mem_dev && !kvmppc_is_mem_backend_page_size_ok(mem_dev)) {
2976 error_setg(errp, "Memory backend has bad page size. "
2977 "Use 'memory-backend-file' with correct mem-path.");
2978 goto out;
2979 }
2980
2981out:
2982 g_free(mem_dev);
2983}
2984
2985struct sPAPRDIMMState {
2986 PCDIMMDevice *dimm;
2987 uint32_t nr_lmbs;
2988 QTAILQ_ENTRY(sPAPRDIMMState) next;
2989};
2990
2991static sPAPRDIMMState *spapr_pending_dimm_unplugs_find(sPAPRMachineState *s,
2992 PCDIMMDevice *dimm)
2993{
2994 sPAPRDIMMState *dimm_state = NULL;
2995
2996 QTAILQ_FOREACH(dimm_state, &s->pending_dimm_unplugs, next) {
2997 if (dimm_state->dimm == dimm) {
2998 break;
2999 }
3000 }
3001 return dimm_state;
3002}
3003
3004static sPAPRDIMMState *spapr_pending_dimm_unplugs_add(sPAPRMachineState *spapr,
3005 uint32_t nr_lmbs,
3006 PCDIMMDevice *dimm)
3007{
3008 sPAPRDIMMState *ds = NULL;
3009
3010
3011
3012
3013
3014
3015
3016 ds = spapr_pending_dimm_unplugs_find(spapr, dimm);
3017 if (!ds) {
3018 ds = g_malloc0(sizeof(sPAPRDIMMState));
3019 ds->nr_lmbs = nr_lmbs;
3020 ds->dimm = dimm;
3021 QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, ds, next);
3022 }
3023 return ds;
3024}
3025
3026static void spapr_pending_dimm_unplugs_remove(sPAPRMachineState *spapr,
3027 sPAPRDIMMState *dimm_state)
3028{
3029 QTAILQ_REMOVE(&spapr->pending_dimm_unplugs, dimm_state, next);
3030 g_free(dimm_state);
3031}
3032
3033static sPAPRDIMMState *spapr_recover_pending_dimm_state(sPAPRMachineState *ms,
3034 PCDIMMDevice *dimm)
3035{
3036 sPAPRDRConnector *drc;
3037 PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
3038 MemoryRegion *mr = ddc->get_memory_region(dimm, &error_abort);
3039 uint64_t size = memory_region_size(mr);
3040 uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
3041 uint32_t avail_lmbs = 0;
3042 uint64_t addr_start, addr;
3043 int i;
3044
3045 addr_start = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP,
3046 &error_abort);
3047
3048 addr = addr_start;
3049 for (i = 0; i < nr_lmbs; i++) {
3050 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
3051 addr / SPAPR_MEMORY_BLOCK_SIZE);
3052 g_assert(drc);
3053 if (drc->dev) {
3054 avail_lmbs++;
3055 }
3056 addr += SPAPR_MEMORY_BLOCK_SIZE;
3057 }
3058
3059 return spapr_pending_dimm_unplugs_add(ms, avail_lmbs, dimm);
3060}
3061
3062
3063void spapr_lmb_release(DeviceState *dev)
3064{
3065 sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_hotplug_handler(dev));
3066 PCDIMMDevice *dimm = PC_DIMM(dev);
3067 PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
3068 MemoryRegion *mr = ddc->get_memory_region(dimm, &error_abort);
3069 sPAPRDIMMState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev));
3070
3071
3072
3073 if (ds == NULL) {
3074 ds = spapr_recover_pending_dimm_state(spapr, PC_DIMM(dev));
3075 g_assert(ds);
3076
3077 g_assert(ds->nr_lmbs);
3078 }
3079
3080 if (--ds->nr_lmbs) {
3081 return;
3082 }
3083
3084
3085
3086
3087
3088 pc_dimm_memory_unplug(dev, &spapr->hotplug_memory, mr);
3089 object_unparent(OBJECT(dev));
3090 spapr_pending_dimm_unplugs_remove(spapr, ds);
3091}
3092
3093static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev,
3094 DeviceState *dev, Error **errp)
3095{
3096 sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
3097 Error *local_err = NULL;
3098 PCDIMMDevice *dimm = PC_DIMM(dev);
3099 PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
3100 MemoryRegion *mr;
3101 uint32_t nr_lmbs;
3102 uint64_t size, addr_start, addr;
3103 int i;
3104 sPAPRDRConnector *drc;
3105
3106 mr = ddc->get_memory_region(dimm, &local_err);
3107 if (local_err) {
3108 goto out;
3109 }
3110 size = memory_region_size(mr);
3111 nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
3112
3113 addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP,
3114 &local_err);
3115 if (local_err) {
3116 goto out;
3117 }
3118
3119
3120
3121
3122
3123
3124
3125 if (spapr_pending_dimm_unplugs_find(spapr, dimm)) {
3126 error_setg(&local_err,
3127 "Memory unplug already in progress for device %s",
3128 dev->id);
3129 goto out;
3130 }
3131
3132 spapr_pending_dimm_unplugs_add(spapr, nr_lmbs, dimm);
3133
3134 addr = addr_start;
3135 for (i = 0; i < nr_lmbs; i++) {
3136 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
3137 addr / SPAPR_MEMORY_BLOCK_SIZE);
3138 g_assert(drc);
3139
3140 spapr_drc_detach(drc);
3141 addr += SPAPR_MEMORY_BLOCK_SIZE;
3142 }
3143
3144 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
3145 addr_start / SPAPR_MEMORY_BLOCK_SIZE);
3146 spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
3147 nr_lmbs, spapr_drc_index(drc));
3148out:
3149 error_propagate(errp, local_err);
3150}
3151
3152static void *spapr_populate_hotplug_cpu_dt(CPUState *cs, int *fdt_offset,
3153 sPAPRMachineState *spapr)
3154{
3155 PowerPCCPU *cpu = POWERPC_CPU(cs);
3156 DeviceClass *dc = DEVICE_GET_CLASS(cs);
3157 int id = spapr_vcpu_id(cpu);
3158 void *fdt;
3159 int offset, fdt_size;
3160 char *nodename;
3161
3162 fdt = create_device_tree(&fdt_size);
3163 nodename = g_strdup_printf("%s@%x", dc->fw_name, id);
3164 offset = fdt_add_subnode(fdt, 0, nodename);
3165
3166 spapr_populate_cpu_dt(cs, fdt, offset, spapr);
3167 g_free(nodename);
3168
3169 *fdt_offset = offset;
3170 return fdt;
3171}
3172
3173
3174void spapr_core_release(DeviceState *dev)
3175{
3176 MachineState *ms = MACHINE(qdev_get_hotplug_handler(dev));
3177 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms);
3178 CPUCore *cc = CPU_CORE(dev);
3179 CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL);
3180
3181 if (smc->pre_2_10_has_unused_icps) {
3182 sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
3183 sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(OBJECT(cc));
3184 size_t size = object_type_get_instance_size(scc->cpu_type);
3185 int i;
3186
3187 for (i = 0; i < cc->nr_threads; i++) {
3188 CPUState *cs = CPU(sc->threads + i * size);
3189
3190 pre_2_10_vmstate_register_dummy_icp(cs->cpu_index);
3191 }
3192 }
3193
3194 assert(core_slot);
3195 core_slot->cpu = NULL;
3196 object_unparent(OBJECT(dev));
3197}
3198
3199static
3200void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev,
3201 Error **errp)
3202{
3203 int index;
3204 sPAPRDRConnector *drc;
3205 CPUCore *cc = CPU_CORE(dev);
3206 int smt = kvmppc_smt_threads();
3207
3208 if (!spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index)) {
3209 error_setg(errp, "Unable to find CPU core with core-id: %d",
3210 cc->core_id);
3211 return;
3212 }
3213 if (index == 0) {
3214 error_setg(errp, "Boot CPU core may not be unplugged");
3215 return;
3216 }
3217
3218 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU, index * smt);
3219 g_assert(drc);
3220
3221 spapr_drc_detach(drc);
3222
3223 spapr_hotplug_req_remove_by_index(drc);
3224}
3225
3226static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
3227 Error **errp)
3228{
3229 sPAPRMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
3230 MachineClass *mc = MACHINE_GET_CLASS(spapr);
3231 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
3232 sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev));
3233 CPUCore *cc = CPU_CORE(dev);
3234 CPUState *cs = CPU(core->threads);
3235 sPAPRDRConnector *drc;
3236 Error *local_err = NULL;
3237 int smt = kvmppc_smt_threads();
3238 CPUArchId *core_slot;
3239 int index;
3240 bool hotplugged = spapr_drc_hotplugged(dev);
3241
3242 core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index);
3243 if (!core_slot) {
3244 error_setg(errp, "Unable to find CPU core with core-id: %d",
3245 cc->core_id);
3246 return;
3247 }
3248 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU, index * smt);
3249
3250 g_assert(drc || !mc->has_hotpluggable_cpus);
3251
3252 if (drc) {
3253 void *fdt;
3254 int fdt_offset;
3255
3256 fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr);
3257
3258 spapr_drc_attach(drc, dev, fdt, fdt_offset, &local_err);
3259 if (local_err) {
3260 g_free(fdt);
3261 error_propagate(errp, local_err);
3262 return;
3263 }
3264
3265 if (hotplugged) {
3266
3267
3268
3269
3270 spapr_hotplug_req_add_by_index(drc);
3271 } else {
3272 spapr_drc_reset(drc);
3273 }
3274 }
3275
3276 core_slot->cpu = OBJECT(dev);
3277
3278 if (smc->pre_2_10_has_unused_icps) {
3279 sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(OBJECT(cc));
3280 size_t size = object_type_get_instance_size(scc->cpu_type);
3281 int i;
3282
3283 for (i = 0; i < cc->nr_threads; i++) {
3284 sPAPRCPUCore *sc = SPAPR_CPU_CORE(dev);
3285 void *obj = sc->threads + i * size;
3286
3287 cs = CPU(obj);
3288 pre_2_10_vmstate_unregister_dummy_icp(cs->cpu_index);
3289 }
3290 }
3291}
3292
3293static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
3294 Error **errp)
3295{
3296 MachineState *machine = MACHINE(OBJECT(hotplug_dev));
3297 MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
3298 Error *local_err = NULL;
3299 CPUCore *cc = CPU_CORE(dev);
3300 const char *base_core_type = spapr_get_cpu_core_type(machine->cpu_type);
3301 const char *type = object_get_typename(OBJECT(dev));
3302 CPUArchId *core_slot;
3303 int index;
3304
3305 if (dev->hotplugged && !mc->has_hotpluggable_cpus) {
3306 error_setg(&local_err, "CPU hotplug not supported for this machine");
3307 goto out;
3308 }
3309
3310 if (strcmp(base_core_type, type)) {
3311 error_setg(&local_err, "CPU core type should be %s", base_core_type);
3312 goto out;
3313 }
3314
3315 if (cc->core_id % smp_threads) {
3316 error_setg(&local_err, "invalid core id %d", cc->core_id);
3317 goto out;
3318 }
3319
3320
3321
3322
3323
3324
3325
3326 if (mc->has_hotpluggable_cpus && (cc->nr_threads != smp_threads)) {
3327 error_setg(&local_err, "invalid nr-threads %d, must be %d",
3328 cc->nr_threads, smp_threads);
3329 goto out;
3330 }
3331
3332 core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index);
3333 if (!core_slot) {
3334 error_setg(&local_err, "core id %d out of range", cc->core_id);
3335 goto out;
3336 }
3337
3338 if (core_slot->cpu) {
3339 error_setg(&local_err, "core %d already populated", cc->core_id);
3340 goto out;
3341 }
3342
3343 numa_cpu_pre_plug(core_slot, dev, &local_err);
3344
3345out:
3346 error_propagate(errp, local_err);
3347}
3348
3349static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
3350 DeviceState *dev, Error **errp)
3351{
3352 MachineState *ms = MACHINE(hotplug_dev);
3353 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms);
3354
3355 if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
3356 int node;
3357
3358 if (!smc->dr_lmb_enabled) {
3359 error_setg(errp, "Memory hotplug not supported for this machine");
3360 return;
3361 }
3362 node = object_property_get_uint(OBJECT(dev), PC_DIMM_NODE_PROP, errp);
3363 if (*errp) {
3364 return;
3365 }
3366 if (node < 0 || node >= MAX_NODES) {
3367 error_setg(errp, "Invaild node %d", node);
3368 return;
3369 }
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387 if (nb_numa_nodes && !numa_info[node].node_mem) {
3388 error_setg(errp, "Can't hotplug memory to memory-less node %d",
3389 node);
3390 return;
3391 }
3392
3393 spapr_memory_plug(hotplug_dev, dev, node, errp);
3394 } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
3395 spapr_core_plug(hotplug_dev, dev, errp);
3396 }
3397}
3398
3399static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
3400 DeviceState *dev, Error **errp)
3401{
3402 sPAPRMachineState *sms = SPAPR_MACHINE(OBJECT(hotplug_dev));
3403 MachineClass *mc = MACHINE_GET_CLASS(sms);
3404
3405 if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
3406 if (spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) {
3407 spapr_memory_unplug_request(hotplug_dev, dev, errp);
3408 } else {
3409
3410
3411
3412
3413
3414
3415 error_setg(errp, "Memory hot unplug not supported for this guest");
3416 }
3417 } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
3418 if (!mc->has_hotpluggable_cpus) {
3419 error_setg(errp, "CPU hot unplug not supported on this machine");
3420 return;
3421 }
3422 spapr_core_unplug_request(hotplug_dev, dev, errp);
3423 }
3424}
3425
3426static void spapr_machine_device_pre_plug(HotplugHandler *hotplug_dev,
3427 DeviceState *dev, Error **errp)
3428{
3429 if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
3430 spapr_memory_pre_plug(hotplug_dev, dev, errp);
3431 } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
3432 spapr_core_pre_plug(hotplug_dev, dev, errp);
3433 }
3434}
3435
3436static HotplugHandler *spapr_get_hotplug_handler(MachineState *machine,
3437 DeviceState *dev)
3438{
3439 if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
3440 object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
3441 return HOTPLUG_HANDLER(machine);
3442 }
3443 return NULL;
3444}
3445
3446static CpuInstanceProperties
3447spapr_cpu_index_to_props(MachineState *machine, unsigned cpu_index)
3448{
3449 CPUArchId *core_slot;
3450 MachineClass *mc = MACHINE_GET_CLASS(machine);
3451
3452
3453 mc->possible_cpu_arch_ids(machine);
3454
3455 core_slot = spapr_find_cpu_slot(machine, cpu_index, NULL);
3456 assert(core_slot);
3457 return core_slot->props;
3458}
3459
3460static int64_t spapr_get_default_cpu_node_id(const MachineState *ms, int idx)
3461{
3462 return idx / smp_cores % nb_numa_nodes;
3463}
3464
3465static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
3466{
3467 int i;
3468 int spapr_max_cores = max_cpus / smp_threads;
3469 MachineClass *mc = MACHINE_GET_CLASS(machine);
3470
3471 if (!mc->has_hotpluggable_cpus) {
3472 spapr_max_cores = QEMU_ALIGN_UP(smp_cpus, smp_threads) / smp_threads;
3473 }
3474 if (machine->possible_cpus) {
3475 assert(machine->possible_cpus->len == spapr_max_cores);
3476 return machine->possible_cpus;
3477 }
3478
3479 machine->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
3480 sizeof(CPUArchId) * spapr_max_cores);
3481 machine->possible_cpus->len = spapr_max_cores;
3482 for (i = 0; i < machine->possible_cpus->len; i++) {
3483 int core_id = i * smp_threads;
3484
3485 machine->possible_cpus->cpus[i].vcpus_count = smp_threads;
3486 machine->possible_cpus->cpus[i].arch_id = core_id;
3487 machine->possible_cpus->cpus[i].props.has_core_id = true;
3488 machine->possible_cpus->cpus[i].props.core_id = core_id;
3489 }
3490 return machine->possible_cpus;
3491}
3492
3493static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
3494 uint64_t *buid, hwaddr *pio,
3495 hwaddr *mmio32, hwaddr *mmio64,
3496 unsigned n_dma, uint32_t *liobns, Error **errp)
3497{
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513 const uint64_t base_buid = 0x800000020000000ULL;
3514#define SPAPR_MAX_PHBS ((SPAPR_PCI_LIMIT - SPAPR_PCI_BASE) / \
3515 SPAPR_PCI_MEM64_WIN_SIZE - 1)
3516 int i;
3517
3518
3519 QEMU_BUILD_BUG_ON((SPAPR_PCI_BASE % SPAPR_PCI_MEM64_WIN_SIZE) != 0);
3520 QEMU_BUILD_BUG_ON((SPAPR_PCI_LIMIT % SPAPR_PCI_MEM64_WIN_SIZE) != 0);
3521 QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM64_WIN_SIZE % SPAPR_PCI_MEM32_WIN_SIZE) != 0);
3522 QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM32_WIN_SIZE % SPAPR_PCI_IO_WIN_SIZE) != 0);
3523
3524 QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_IO_WIN_SIZE) >
3525 SPAPR_PCI_MEM32_WIN_SIZE);
3526 QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_MEM32_WIN_SIZE) >
3527 SPAPR_PCI_MEM64_WIN_SIZE);
3528
3529 if (index >= SPAPR_MAX_PHBS) {
3530 error_setg(errp, "\"index\" for PAPR PHB is too large (max %llu)",
3531 SPAPR_MAX_PHBS - 1);
3532 return;
3533 }
3534
3535 *buid = base_buid + index;
3536 for (i = 0; i < n_dma; ++i) {
3537 liobns[i] = SPAPR_PCI_LIOBN(index, i);
3538 }
3539
3540 *pio = SPAPR_PCI_BASE + index * SPAPR_PCI_IO_WIN_SIZE;
3541 *mmio32 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM32_WIN_SIZE;
3542 *mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE;
3543}
3544
3545static ICSState *spapr_ics_get(XICSFabric *dev, int irq)
3546{
3547 sPAPRMachineState *spapr = SPAPR_MACHINE(dev);
3548
3549 return ics_valid_irq(spapr->ics, irq) ? spapr->ics : NULL;
3550}
3551
3552static void spapr_ics_resend(XICSFabric *dev)
3553{
3554 sPAPRMachineState *spapr = SPAPR_MACHINE(dev);
3555
3556 ics_resend(spapr->ics);
3557}
3558
3559static ICPState *spapr_icp_get(XICSFabric *xi, int vcpu_id)
3560{
3561 PowerPCCPU *cpu = spapr_find_cpu(vcpu_id);
3562
3563 return cpu ? ICP(cpu->intc) : NULL;
3564}
3565
3566static void spapr_pic_print_info(InterruptStatsProvider *obj,
3567 Monitor *mon)
3568{
3569 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
3570 CPUState *cs;
3571
3572 CPU_FOREACH(cs) {
3573 PowerPCCPU *cpu = POWERPC_CPU(cs);
3574
3575 icp_pic_print_info(ICP(cpu->intc), mon);
3576 }
3577
3578 ics_pic_print_info(spapr->ics, mon);
3579}
3580
3581int spapr_vcpu_id(PowerPCCPU *cpu)
3582{
3583 CPUState *cs = CPU(cpu);
3584
3585 if (kvm_enabled()) {
3586 return kvm_arch_vcpu_id(cs);
3587 } else {
3588 return cs->cpu_index;
3589 }
3590}
3591
3592PowerPCCPU *spapr_find_cpu(int vcpu_id)
3593{
3594 CPUState *cs;
3595
3596 CPU_FOREACH(cs) {
3597 PowerPCCPU *cpu = POWERPC_CPU(cs);
3598
3599 if (spapr_vcpu_id(cpu) == vcpu_id) {
3600 return cpu;
3601 }
3602 }
3603
3604 return NULL;
3605}
3606
3607static void spapr_machine_class_init(ObjectClass *oc, void *data)
3608{
3609 MachineClass *mc = MACHINE_CLASS(oc);
3610 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(oc);
3611 FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc);
3612 NMIClass *nc = NMI_CLASS(oc);
3613 HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
3614 PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc);
3615 XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
3616 InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
3617
3618 mc->desc = "pSeries Logical Partition (PAPR compliant)";
3619
3620
3621
3622
3623
3624
3625 mc->init = ppc_spapr_init;
3626 mc->reset = ppc_spapr_reset;
3627 mc->block_default_type = IF_SCSI;
3628 mc->max_cpus = 1024;
3629 mc->no_parallel = 1;
3630 mc->default_boot_order = "";
3631 mc->default_ram_size = 512 * M_BYTE;
3632 mc->kvm_type = spapr_kvm_type;
3633 mc->has_dynamic_sysbus = true;
3634 mc->pci_allow_0_address = true;
3635 mc->get_hotplug_handler = spapr_get_hotplug_handler;
3636 hc->pre_plug = spapr_machine_device_pre_plug;
3637 hc->plug = spapr_machine_device_plug;
3638 mc->cpu_index_to_instance_props = spapr_cpu_index_to_props;
3639 mc->get_default_cpu_node_id = spapr_get_default_cpu_node_id;
3640 mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids;
3641 hc->unplug_request = spapr_machine_device_unplug_request;
3642
3643 smc->dr_lmb_enabled = true;
3644 mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
3645 mc->has_hotpluggable_cpus = true;
3646 smc->resize_hpt_default = SPAPR_RESIZE_HPT_ENABLED;
3647 fwc->get_dev_path = spapr_get_fw_dev_path;
3648 nc->nmi_monitor_handler = spapr_nmi;
3649 smc->phb_placement = spapr_phb_placement;
3650 vhc->hypercall = emulate_spapr_hypercall;
3651 vhc->hpt_mask = spapr_hpt_mask;
3652 vhc->map_hptes = spapr_map_hptes;
3653 vhc->unmap_hptes = spapr_unmap_hptes;
3654 vhc->store_hpte = spapr_store_hpte;
3655 vhc->get_patbe = spapr_get_patbe;
3656 vhc->encode_hpt_for_kvm_pr = spapr_encode_hpt_for_kvm_pr;
3657 xic->ics_get = spapr_ics_get;
3658 xic->ics_resend = spapr_ics_resend;
3659 xic->icp_get = spapr_icp_get;
3660 ispc->print_info = spapr_pic_print_info;
3661
3662
3663
3664
3665 mc->numa_mem_align_shift = 28;
3666}
3667
3668static const TypeInfo spapr_machine_info = {
3669 .name = TYPE_SPAPR_MACHINE,
3670 .parent = TYPE_MACHINE,
3671 .abstract = true,
3672 .instance_size = sizeof(sPAPRMachineState),
3673 .instance_init = spapr_machine_initfn,
3674 .instance_finalize = spapr_machine_finalizefn,
3675 .class_size = sizeof(sPAPRMachineClass),
3676 .class_init = spapr_machine_class_init,
3677 .interfaces = (InterfaceInfo[]) {
3678 { TYPE_FW_PATH_PROVIDER },
3679 { TYPE_NMI },
3680 { TYPE_HOTPLUG_HANDLER },
3681 { TYPE_PPC_VIRTUAL_HYPERVISOR },
3682 { TYPE_XICS_FABRIC },
3683 { TYPE_INTERRUPT_STATS_PROVIDER },
3684 { }
3685 },
3686};
3687
3688#define DEFINE_SPAPR_MACHINE(suffix, verstr, latest) \
3689 static void spapr_machine_##suffix##_class_init(ObjectClass *oc, \
3690 void *data) \
3691 { \
3692 MachineClass *mc = MACHINE_CLASS(oc); \
3693 spapr_machine_##suffix##_class_options(mc); \
3694 if (latest) { \
3695 mc->alias = "pseries"; \
3696 mc->is_default = 1; \
3697 } \
3698 } \
3699 static void spapr_machine_##suffix##_instance_init(Object *obj) \
3700 { \
3701 MachineState *machine = MACHINE(obj); \
3702 spapr_machine_##suffix##_instance_options(machine); \
3703 } \
3704 static const TypeInfo spapr_machine_##suffix##_info = { \
3705 .name = MACHINE_TYPE_NAME("pseries-" verstr), \
3706 .parent = TYPE_SPAPR_MACHINE, \
3707 .class_init = spapr_machine_##suffix##_class_init, \
3708 .instance_init = spapr_machine_##suffix##_instance_init, \
3709 }; \
3710 static void spapr_machine_register_##suffix(void) \
3711 { \
3712 type_register(&spapr_machine_##suffix##_info); \
3713 } \
3714 type_init(spapr_machine_register_##suffix)
3715
3716
3717
3718
3719static void spapr_machine_2_11_instance_options(MachineState *machine)
3720{
3721}
3722
3723static void spapr_machine_2_11_class_options(MachineClass *mc)
3724{
3725
3726}
3727
3728DEFINE_SPAPR_MACHINE(2_11, "2.11", true);
3729
3730
3731
3732
3733#define SPAPR_COMPAT_2_10 \
3734 HW_COMPAT_2_10 \
3735
3736static void spapr_machine_2_10_instance_options(MachineState *machine)
3737{
3738}
3739
3740static void spapr_machine_2_10_class_options(MachineClass *mc)
3741{
3742 spapr_machine_2_11_class_options(mc);
3743 SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_10);
3744}
3745
3746DEFINE_SPAPR_MACHINE(2_10, "2.10", false);
3747
3748
3749
3750
3751#define SPAPR_COMPAT_2_9 \
3752 HW_COMPAT_2_9 \
3753 { \
3754 .driver = TYPE_POWERPC_CPU, \
3755 .property = "pre-2.10-migration", \
3756 .value = "on", \
3757 }, \
3758
3759static void spapr_machine_2_9_instance_options(MachineState *machine)
3760{
3761 spapr_machine_2_10_instance_options(machine);
3762}
3763
3764static void spapr_machine_2_9_class_options(MachineClass *mc)
3765{
3766 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
3767
3768 spapr_machine_2_10_class_options(mc);
3769 SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_9);
3770 mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
3771 smc->pre_2_10_has_unused_icps = true;
3772 smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED;
3773}
3774
3775DEFINE_SPAPR_MACHINE(2_9, "2.9", false);
3776
3777
3778
3779
3780#define SPAPR_COMPAT_2_8 \
3781 HW_COMPAT_2_8 \
3782 { \
3783 .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \
3784 .property = "pcie-extended-configuration-space", \
3785 .value = "off", \
3786 },
3787
3788static void spapr_machine_2_8_instance_options(MachineState *machine)
3789{
3790 spapr_machine_2_9_instance_options(machine);
3791}
3792
3793static void spapr_machine_2_8_class_options(MachineClass *mc)
3794{
3795 spapr_machine_2_9_class_options(mc);
3796 SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_8);
3797 mc->numa_mem_align_shift = 23;
3798}
3799
3800DEFINE_SPAPR_MACHINE(2_8, "2.8", false);
3801
3802
3803
3804
3805#define SPAPR_COMPAT_2_7 \
3806 HW_COMPAT_2_7 \
3807 { \
3808 .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \
3809 .property = "mem_win_size", \
3810 .value = stringify(SPAPR_PCI_2_7_MMIO_WIN_SIZE),\
3811 }, \
3812 { \
3813 .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \
3814 .property = "mem64_win_size", \
3815 .value = "0", \
3816 }, \
3817 { \
3818 .driver = TYPE_POWERPC_CPU, \
3819 .property = "pre-2.8-migration", \
3820 .value = "on", \
3821 }, \
3822 { \
3823 .driver = TYPE_SPAPR_PCI_HOST_BRIDGE, \
3824 .property = "pre-2.8-migration", \
3825 .value = "on", \
3826 },
3827
3828static void phb_placement_2_7(sPAPRMachineState *spapr, uint32_t index,
3829 uint64_t *buid, hwaddr *pio,
3830 hwaddr *mmio32, hwaddr *mmio64,
3831 unsigned n_dma, uint32_t *liobns, Error **errp)
3832{
3833
3834 const uint64_t base_buid = 0x800000020000000ULL;
3835 const hwaddr phb_spacing = 0x1000000000ULL;
3836 const hwaddr mmio_offset = 0xa0000000;
3837 const hwaddr pio_offset = 0x80000000;
3838 const uint32_t max_index = 255;
3839 const hwaddr phb0_alignment = 0x10000000000ULL;
3840
3841 uint64_t ram_top = MACHINE(spapr)->ram_size;
3842 hwaddr phb0_base, phb_base;
3843 int i;
3844
3845
3846 if (MACHINE(spapr)->maxram_size > ram_top) {
3847
3848
3849
3850 ram_top = spapr->hotplug_memory.base +
3851 memory_region_size(&spapr->hotplug_memory.mr);
3852 }
3853
3854 phb0_base = QEMU_ALIGN_UP(ram_top, phb0_alignment);
3855
3856 if (index > max_index) {
3857 error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
3858 max_index);
3859 return;
3860 }
3861
3862 *buid = base_buid + index;
3863 for (i = 0; i < n_dma; ++i) {
3864 liobns[i] = SPAPR_PCI_LIOBN(index, i);
3865 }
3866
3867 phb_base = phb0_base + index * phb_spacing;
3868 *pio = phb_base + pio_offset;
3869 *mmio32 = phb_base + mmio_offset;
3870
3871
3872
3873
3874
3875}
3876
3877static void spapr_machine_2_7_instance_options(MachineState *machine)
3878{
3879 sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
3880
3881 spapr_machine_2_8_instance_options(machine);
3882 spapr->use_hotplug_event_source = false;
3883}
3884
3885static void spapr_machine_2_7_class_options(MachineClass *mc)
3886{
3887 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
3888
3889 spapr_machine_2_8_class_options(mc);
3890 mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power7_v2.3");
3891 SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_7);
3892 smc->phb_placement = phb_placement_2_7;
3893}
3894
3895DEFINE_SPAPR_MACHINE(2_7, "2.7", false);
3896
3897
3898
3899
3900#define SPAPR_COMPAT_2_6 \
3901 HW_COMPAT_2_6 \
3902 { \
3903 .driver = TYPE_SPAPR_PCI_HOST_BRIDGE,\
3904 .property = "ddw",\
3905 .value = stringify(off),\
3906 },
3907
3908static void spapr_machine_2_6_instance_options(MachineState *machine)
3909{
3910 spapr_machine_2_7_instance_options(machine);
3911}
3912
3913static void spapr_machine_2_6_class_options(MachineClass *mc)
3914{
3915 spapr_machine_2_7_class_options(mc);
3916 mc->has_hotpluggable_cpus = false;
3917 SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_6);
3918}
3919
3920DEFINE_SPAPR_MACHINE(2_6, "2.6", false);
3921
3922
3923
3924
3925#define SPAPR_COMPAT_2_5 \
3926 HW_COMPAT_2_5 \
3927 { \
3928 .driver = "spapr-vlan", \
3929 .property = "use-rx-buffer-pools", \
3930 .value = "off", \
3931 },
3932
3933static void spapr_machine_2_5_instance_options(MachineState *machine)
3934{
3935 spapr_machine_2_6_instance_options(machine);
3936}
3937
3938static void spapr_machine_2_5_class_options(MachineClass *mc)
3939{
3940 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
3941
3942 spapr_machine_2_6_class_options(mc);
3943 smc->use_ohci_by_default = true;
3944 SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_5);
3945}
3946
3947DEFINE_SPAPR_MACHINE(2_5, "2.5", false);
3948
3949
3950
3951
3952#define SPAPR_COMPAT_2_4 \
3953 HW_COMPAT_2_4
3954
3955static void spapr_machine_2_4_instance_options(MachineState *machine)
3956{
3957 spapr_machine_2_5_instance_options(machine);
3958}
3959
3960static void spapr_machine_2_4_class_options(MachineClass *mc)
3961{
3962 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
3963
3964 spapr_machine_2_5_class_options(mc);
3965 smc->dr_lmb_enabled = false;
3966 SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_4);
3967}
3968
3969DEFINE_SPAPR_MACHINE(2_4, "2.4", false);
3970
3971
3972
3973
3974#define SPAPR_COMPAT_2_3 \
3975 HW_COMPAT_2_3 \
3976 {\
3977 .driver = "spapr-pci-host-bridge",\
3978 .property = "dynamic-reconfiguration",\
3979 .value = "off",\
3980 },
3981
3982static void spapr_machine_2_3_instance_options(MachineState *machine)
3983{
3984 spapr_machine_2_4_instance_options(machine);
3985}
3986
3987static void spapr_machine_2_3_class_options(MachineClass *mc)
3988{
3989 spapr_machine_2_4_class_options(mc);
3990 SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_3);
3991}
3992DEFINE_SPAPR_MACHINE(2_3, "2.3", false);
3993
3994
3995
3996
3997
3998#define SPAPR_COMPAT_2_2 \
3999 HW_COMPAT_2_2 \
4000 {\
4001 .driver = TYPE_SPAPR_PCI_HOST_BRIDGE,\
4002 .property = "mem_win_size",\
4003 .value = "0x20000000",\
4004 },
4005
4006static void spapr_machine_2_2_instance_options(MachineState *machine)
4007{
4008 spapr_machine_2_3_instance_options(machine);
4009 machine->suppress_vmdesc = true;
4010}
4011
4012static void spapr_machine_2_2_class_options(MachineClass *mc)
4013{
4014 spapr_machine_2_3_class_options(mc);
4015 SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_2);
4016}
4017DEFINE_SPAPR_MACHINE(2_2, "2.2", false);
4018
4019
4020
4021
4022#define SPAPR_COMPAT_2_1 \
4023 HW_COMPAT_2_1
4024
4025static void spapr_machine_2_1_instance_options(MachineState *machine)
4026{
4027 spapr_machine_2_2_instance_options(machine);
4028}
4029
4030static void spapr_machine_2_1_class_options(MachineClass *mc)
4031{
4032 spapr_machine_2_2_class_options(mc);
4033 SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_1);
4034}
4035DEFINE_SPAPR_MACHINE(2_1, "2.1", false);
4036
4037static void spapr_machine_register_types(void)
4038{
4039 type_register_static(&spapr_machine_info);
4040}
4041
4042type_init(spapr_machine_register_types)
4043