1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#include "qemu/osdep.h"
28#include "qapi/error.h"
29#include "sysemu/sysemu.h"
30#include "sysemu/numa.h"
31#include "hw/hw.h"
32#include "qemu/log.h"
33#include "hw/fw-path-provider.h"
34#include "elf.h"
35#include "net/net.h"
36#include "sysemu/device_tree.h"
37#include "sysemu/block-backend.h"
38#include "sysemu/cpus.h"
39#include "sysemu/kvm.h"
40#include "sysemu/device_tree.h"
41#include "kvm_ppc.h"
42#include "migration/migration.h"
43#include "mmu-hash64.h"
44#include "qom/cpu.h"
45
46#include "hw/boards.h"
47#include "hw/ppc/ppc.h"
48#include "hw/loader.h"
49
50#include "hw/ppc/spapr.h"
51#include "hw/ppc/spapr_vio.h"
52#include "hw/pci-host/spapr.h"
53#include "hw/ppc/xics.h"
54#include "hw/pci/msi.h"
55
56#include "hw/pci/pci.h"
57#include "hw/scsi/scsi.h"
58#include "hw/virtio/virtio-scsi.h"
59
60#include "exec/address-spaces.h"
61#include "hw/usb.h"
62#include "qemu/config-file.h"
63#include "qemu/error-report.h"
64#include "trace.h"
65#include "hw/nmi.h"
66
67#include "hw/compat.h"
68#include "qemu/cutils.h"
69#include "hw/ppc/spapr_cpu_core.h"
70#include "qmp-commands.h"
71
72#include <libfdt.h>
73
74
75
76
77
78
79
80
81
82
83
84#define FDT_MAX_SIZE 0x100000
85#define RTAS_MAX_SIZE 0x10000
86#define RTAS_MAX_ADDR 0x80000000
87#define FW_MAX_SIZE 0x400000
88#define FW_FILE_NAME "slof.bin"
89#define FW_OVERHEAD 0x2800000
90#define KERNEL_LOAD_ADDR FW_MAX_SIZE
91
92#define MIN_RMA_SLOF 128UL
93
94#define PHANDLE_XICP 0x00001111
95
96#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift))
97
98static XICSState *try_create_xics(const char *type, int nr_servers,
99 int nr_irqs, Error **errp)
100{
101 Error *err = NULL;
102 DeviceState *dev;
103
104 dev = qdev_create(NULL, type);
105 qdev_prop_set_uint32(dev, "nr_servers", nr_servers);
106 qdev_prop_set_uint32(dev, "nr_irqs", nr_irqs);
107 object_property_set_bool(OBJECT(dev), true, "realized", &err);
108 if (err) {
109 error_propagate(errp, err);
110 object_unparent(OBJECT(dev));
111 return NULL;
112 }
113 return XICS_COMMON(dev);
114}
115
116static XICSState *xics_system_init(MachineState *machine,
117 int nr_servers, int nr_irqs, Error **errp)
118{
119 XICSState *xics = NULL;
120
121 if (kvm_enabled()) {
122 Error *err = NULL;
123
124 if (machine_kernel_irqchip_allowed(machine)) {
125 xics = try_create_xics(TYPE_XICS_SPAPR_KVM, nr_servers, nr_irqs,
126 &err);
127 }
128 if (machine_kernel_irqchip_required(machine) && !xics) {
129 error_reportf_err(err,
130 "kernel_irqchip requested but unavailable: ");
131 } else {
132 error_free(err);
133 }
134 }
135
136 if (!xics) {
137 xics = try_create_xics(TYPE_XICS_SPAPR, nr_servers, nr_irqs, errp);
138 }
139
140 return xics;
141}
142
143static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
144 int smt_threads)
145{
146 int i, ret = 0;
147 uint32_t servers_prop[smt_threads];
148 uint32_t gservers_prop[smt_threads * 2];
149 int index = ppc_get_vcpu_dt_id(cpu);
150
151 if (cpu->cpu_version) {
152 ret = fdt_setprop_cell(fdt, offset, "cpu-version", cpu->cpu_version);
153 if (ret < 0) {
154 return ret;
155 }
156 }
157
158
159 for (i = 0; i < smt_threads; i++) {
160 servers_prop[i] = cpu_to_be32(index + i);
161
162 gservers_prop[i*2] = cpu_to_be32(index + i);
163 gservers_prop[i*2 + 1] = 0;
164 }
165 ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
166 servers_prop, sizeof(servers_prop));
167 if (ret < 0) {
168 return ret;
169 }
170 ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s",
171 gservers_prop, sizeof(gservers_prop));
172
173 return ret;
174}
175
176static int spapr_fixup_cpu_numa_dt(void *fdt, int offset, CPUState *cs)
177{
178 int ret = 0;
179 PowerPCCPU *cpu = POWERPC_CPU(cs);
180 int index = ppc_get_vcpu_dt_id(cpu);
181 uint32_t associativity[] = {cpu_to_be32(0x5),
182 cpu_to_be32(0x0),
183 cpu_to_be32(0x0),
184 cpu_to_be32(0x0),
185 cpu_to_be32(cs->numa_node),
186 cpu_to_be32(index)};
187
188
189 if (nb_numa_nodes > 1) {
190 ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
191 sizeof(associativity));
192 }
193
194 return ret;
195}
196
197static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr)
198{
199 int ret = 0, offset, cpus_offset;
200 CPUState *cs;
201 char cpu_model[32];
202 int smt = kvmppc_smt_threads();
203 uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
204
205 CPU_FOREACH(cs) {
206 PowerPCCPU *cpu = POWERPC_CPU(cs);
207 DeviceClass *dc = DEVICE_GET_CLASS(cs);
208 int index = ppc_get_vcpu_dt_id(cpu);
209
210 if ((index % smt) != 0) {
211 continue;
212 }
213
214 snprintf(cpu_model, 32, "%s@%x", dc->fw_name, index);
215
216 cpus_offset = fdt_path_offset(fdt, "/cpus");
217 if (cpus_offset < 0) {
218 cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
219 "cpus");
220 if (cpus_offset < 0) {
221 return cpus_offset;
222 }
223 }
224 offset = fdt_subnode_offset(fdt, cpus_offset, cpu_model);
225 if (offset < 0) {
226 offset = fdt_add_subnode(fdt, cpus_offset, cpu_model);
227 if (offset < 0) {
228 return offset;
229 }
230 }
231
232 ret = fdt_setprop(fdt, offset, "ibm,pft-size",
233 pft_size_prop, sizeof(pft_size_prop));
234 if (ret < 0) {
235 return ret;
236 }
237
238 ret = spapr_fixup_cpu_numa_dt(fdt, offset, cs);
239 if (ret < 0) {
240 return ret;
241 }
242
243 ret = spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
244 ppc_get_compat_smt_threads(cpu));
245 if (ret < 0) {
246 return ret;
247 }
248 }
249 return ret;
250}
251
252
253static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
254 size_t maxsize)
255{
256 size_t maxcells = maxsize / sizeof(uint32_t);
257 int i, j, count;
258 uint32_t *p = prop;
259
260 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
261 struct ppc_one_seg_page_size *sps = &env->sps.sps[i];
262
263 if (!sps->page_shift) {
264 break;
265 }
266 for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
267 if (sps->enc[count].page_shift == 0) {
268 break;
269 }
270 }
271 if ((p - prop) >= (maxcells - 3 - count * 2)) {
272 break;
273 }
274 *(p++) = cpu_to_be32(sps->page_shift);
275 *(p++) = cpu_to_be32(sps->slb_enc);
276 *(p++) = cpu_to_be32(count);
277 for (j = 0; j < count; j++) {
278 *(p++) = cpu_to_be32(sps->enc[j].page_shift);
279 *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
280 }
281 }
282
283 return (p - prop) * sizeof(uint32_t);
284}
285
286static hwaddr spapr_node0_size(void)
287{
288 MachineState *machine = MACHINE(qdev_get_machine());
289
290 if (nb_numa_nodes) {
291 int i;
292 for (i = 0; i < nb_numa_nodes; ++i) {
293 if (numa_info[i].node_mem) {
294 return MIN(pow2floor(numa_info[i].node_mem),
295 machine->ram_size);
296 }
297 }
298 }
299 return machine->ram_size;
300}
301
302#define _FDT(exp) \
303 do { \
304 int ret = (exp); \
305 if (ret < 0) { \
306 fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
307 #exp, fdt_strerror(ret)); \
308 exit(1); \
309 } \
310 } while (0)
311
312static void add_str(GString *s, const gchar *s1)
313{
314 g_string_append_len(s, s1, strlen(s1) + 1);
315}
316
317static void *spapr_create_fdt_skel(hwaddr initrd_base,
318 hwaddr initrd_size,
319 hwaddr kernel_size,
320 bool little_endian,
321 const char *kernel_cmdline,
322 uint32_t epow_irq)
323{
324 void *fdt;
325 uint32_t start_prop = cpu_to_be32(initrd_base);
326 uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
327 GString *hypertas = g_string_sized_new(256);
328 GString *qemu_hypertas = g_string_sized_new(256);
329 uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
330 uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(max_cpus)};
331 unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
332 char *buf;
333
334 add_str(hypertas, "hcall-pft");
335 add_str(hypertas, "hcall-term");
336 add_str(hypertas, "hcall-dabr");
337 add_str(hypertas, "hcall-interrupt");
338 add_str(hypertas, "hcall-tce");
339 add_str(hypertas, "hcall-vio");
340 add_str(hypertas, "hcall-splpar");
341 add_str(hypertas, "hcall-bulk");
342 add_str(hypertas, "hcall-set-mode");
343 add_str(hypertas, "hcall-sprg0");
344 add_str(hypertas, "hcall-copy");
345 add_str(hypertas, "hcall-debug");
346 add_str(qemu_hypertas, "hcall-memop1");
347
348 fdt = g_malloc0(FDT_MAX_SIZE);
349 _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
350
351 if (kernel_size) {
352 _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
353 }
354 if (initrd_size) {
355 _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
356 }
357 _FDT((fdt_finish_reservemap(fdt)));
358
359
360 _FDT((fdt_begin_node(fdt, "")));
361 _FDT((fdt_property_string(fdt, "device_type", "chrp")));
362 _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
363 _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));
364
365
366
367
368
369 if (kvmppc_get_host_model(&buf)) {
370 _FDT((fdt_property_string(fdt, "host-model", buf)));
371 g_free(buf);
372 }
373 if (kvmppc_get_host_serial(&buf)) {
374 _FDT((fdt_property_string(fdt, "host-serial", buf)));
375 g_free(buf);
376 }
377
378 buf = g_strdup_printf(UUID_FMT, qemu_uuid[0], qemu_uuid[1],
379 qemu_uuid[2], qemu_uuid[3], qemu_uuid[4],
380 qemu_uuid[5], qemu_uuid[6], qemu_uuid[7],
381 qemu_uuid[8], qemu_uuid[9], qemu_uuid[10],
382 qemu_uuid[11], qemu_uuid[12], qemu_uuid[13],
383 qemu_uuid[14], qemu_uuid[15]);
384
385 _FDT((fdt_property_string(fdt, "vm,uuid", buf)));
386 if (qemu_uuid_set) {
387 _FDT((fdt_property_string(fdt, "system-id", buf)));
388 }
389 g_free(buf);
390
391 if (qemu_get_vm_name()) {
392 _FDT((fdt_property_string(fdt, "ibm,partition-name",
393 qemu_get_vm_name())));
394 }
395
396 _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
397 _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
398
399
400 _FDT((fdt_begin_node(fdt, "chosen")));
401
402
403 _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5))));
404
405 _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
406 _FDT((fdt_property(fdt, "linux,initrd-start",
407 &start_prop, sizeof(start_prop))));
408 _FDT((fdt_property(fdt, "linux,initrd-end",
409 &end_prop, sizeof(end_prop))));
410 if (kernel_size) {
411 uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
412 cpu_to_be64(kernel_size) };
413
414 _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
415 if (little_endian) {
416 _FDT((fdt_property(fdt, "qemu,boot-kernel-le", NULL, 0)));
417 }
418 }
419 if (boot_menu) {
420 _FDT((fdt_property_cell(fdt, "qemu,boot-menu", boot_menu)));
421 }
422 _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width)));
423 _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height)));
424 _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth)));
425
426 _FDT((fdt_end_node(fdt)));
427
428
429 _FDT((fdt_begin_node(fdt, "rtas")));
430
431 if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
432 add_str(hypertas, "hcall-multi-tce");
433 }
434 _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas->str,
435 hypertas->len)));
436 g_string_free(hypertas, TRUE);
437 _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas->str,
438 qemu_hypertas->len)));
439 g_string_free(qemu_hypertas, TRUE);
440
441 _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
442 refpoints, sizeof(refpoints))));
443
444 _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
445 _FDT((fdt_property_cell(fdt, "rtas-event-scan-rate",
446 RTAS_EVENT_SCAN_RATE)));
447
448 if (msi_nonbroken) {
449 _FDT((fdt_property(fdt, "ibm,change-msix-capable", NULL, 0)));
450 }
451
452
453
454
455
456
457
458
459 _FDT((fdt_property(fdt, "ibm,extended-os-term", NULL, 0)));
460
461 _FDT((fdt_end_node(fdt)));
462
463
464 _FDT((fdt_begin_node(fdt, "interrupt-controller")));
465
466 _FDT((fdt_property_string(fdt, "device_type",
467 "PowerPC-External-Interrupt-Presentation")));
468 _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
469 _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
470 _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
471 interrupt_server_ranges_prop,
472 sizeof(interrupt_server_ranges_prop))));
473 _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
474 _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
475 _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
476
477 _FDT((fdt_end_node(fdt)));
478
479
480 _FDT((fdt_begin_node(fdt, "vdevice")));
481
482 _FDT((fdt_property_string(fdt, "device_type", "vdevice")));
483 _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
484 _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
485 _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
486 _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
487 _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
488
489 _FDT((fdt_end_node(fdt)));
490
491
492 spapr_events_fdt_skel(fdt, epow_irq);
493
494
495 if (kvm_enabled()) {
496 uint8_t hypercall[16];
497
498
499 _FDT((fdt_begin_node(fdt, "hypervisor")));
500 _FDT((fdt_property_string(fdt, "compatible", "linux,kvm")));
501 if (kvmppc_has_cap_fixup_hcalls()) {
502
503
504
505
506 if (!kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
507 sizeof(hypercall))) {
508 _FDT((fdt_property(fdt, "hcall-instructions", hypercall,
509 sizeof(hypercall))));
510 }
511 }
512 _FDT((fdt_end_node(fdt)));
513 }
514
515 _FDT((fdt_end_node(fdt)));
516 _FDT((fdt_finish(fdt)));
517
518 return fdt;
519}
520
521static int spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
522 hwaddr size)
523{
524 uint32_t associativity[] = {
525 cpu_to_be32(0x4),
526 cpu_to_be32(0x0), cpu_to_be32(0x0),
527 cpu_to_be32(0x0), cpu_to_be32(nodeid)
528 };
529 char mem_name[32];
530 uint64_t mem_reg_property[2];
531 int off;
532
533 mem_reg_property[0] = cpu_to_be64(start);
534 mem_reg_property[1] = cpu_to_be64(size);
535
536 sprintf(mem_name, "memory@" TARGET_FMT_lx, start);
537 off = fdt_add_subnode(fdt, 0, mem_name);
538 _FDT(off);
539 _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
540 _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
541 sizeof(mem_reg_property))));
542 _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
543 sizeof(associativity))));
544 return off;
545}
546
547static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt)
548{
549 MachineState *machine = MACHINE(spapr);
550 hwaddr mem_start, node_size;
551 int i, nb_nodes = nb_numa_nodes;
552 NodeInfo *nodes = numa_info;
553 NodeInfo ramnode;
554
555
556 if (!nb_numa_nodes) {
557 nb_nodes = 1;
558 ramnode.node_mem = machine->ram_size;
559 nodes = &ramnode;
560 }
561
562 for (i = 0, mem_start = 0; i < nb_nodes; ++i) {
563 if (!nodes[i].node_mem) {
564 continue;
565 }
566 if (mem_start >= machine->ram_size) {
567 node_size = 0;
568 } else {
569 node_size = nodes[i].node_mem;
570 if (node_size > machine->ram_size - mem_start) {
571 node_size = machine->ram_size - mem_start;
572 }
573 }
574 if (!mem_start) {
575
576 spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
577 mem_start += spapr->rma_size;
578 node_size -= spapr->rma_size;
579 }
580 for ( ; node_size; ) {
581 hwaddr sizetmp = pow2floor(node_size);
582
583
584 if (ctzl(mem_start) < ctzl(sizetmp)) {
585 sizetmp = 1ULL << ctzl(mem_start);
586 }
587
588 spapr_populate_memory_node(fdt, i, mem_start, sizetmp);
589 node_size -= sizetmp;
590 mem_start += sizetmp;
591 }
592 }
593
594 return 0;
595}
596
597static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
598 sPAPRMachineState *spapr)
599{
600 PowerPCCPU *cpu = POWERPC_CPU(cs);
601 CPUPPCState *env = &cpu->env;
602 PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
603 int index = ppc_get_vcpu_dt_id(cpu);
604 uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
605 0xffffffff, 0xffffffff};
606 uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq()
607 : SPAPR_TIMEBASE_FREQ;
608 uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
609 uint32_t page_sizes_prop[64];
610 size_t page_sizes_prop_size;
611 uint32_t vcpus_per_socket = smp_threads * smp_cores;
612 uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
613 sPAPRDRConnector *drc;
614 sPAPRDRConnectorClass *drck;
615 int drc_index;
616
617 drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index);
618 if (drc) {
619 drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
620 drc_index = drck->get_index(drc);
621 _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
622 }
623
624
625
626
627
628
629
630
631
632 uint8_t pa_features_206[] = { 6, 0,
633 0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 };
634 uint8_t pa_features_207[] = { 24, 0,
635 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0,
636 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
637 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
638 0x80, 0x00, 0x80, 0x00, 0x80, 0x00 };
639 uint8_t *pa_features;
640 size_t pa_size;
641
642 _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
643 _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
644
645 _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
646 _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
647 env->dcache_line_size)));
648 _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
649 env->dcache_line_size)));
650 _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
651 env->icache_line_size)));
652 _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
653 env->icache_line_size)));
654
655 if (pcc->l1_dcache_size) {
656 _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
657 pcc->l1_dcache_size)));
658 } else {
659 fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
660 }
661 if (pcc->l1_icache_size) {
662 _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
663 pcc->l1_icache_size)));
664 } else {
665 fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
666 }
667
668 _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
669 _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
670 _FDT((fdt_setprop_cell(fdt, offset, "slb-size", env->slb_nr)));
671 _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
672 _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
673 _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
674
675 if (env->spr_cb[SPR_PURR].oea_read) {
676 _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
677 }
678
679 if (env->mmu_model & POWERPC_MMU_1TSEG) {
680 _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
681 segs, sizeof(segs))));
682 }
683
684
685
686
687
688 if (env->insns_flags & PPC_ALTIVEC) {
689 uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
690
691 _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
692 }
693
694
695
696
697 if (env->insns_flags2 & PPC2_DFP) {
698 _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
699 }
700
701 page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
702 sizeof(page_sizes_prop));
703 if (page_sizes_prop_size) {
704 _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
705 page_sizes_prop, page_sizes_prop_size)));
706 }
707
708
709 if (env->mmu_model == POWERPC_MMU_2_06) {
710 pa_features = pa_features_206;
711 pa_size = sizeof(pa_features_206);
712 } else {
713 pa_features = pa_features_207;
714 pa_size = sizeof(pa_features_207);
715 }
716 if (env->ci_large_pages) {
717 pa_features[3] |= 0x20;
718 }
719 _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
720
721 _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
722 cs->cpu_index / vcpus_per_socket)));
723
724 _FDT((fdt_setprop(fdt, offset, "ibm,pft-size",
725 pft_size_prop, sizeof(pft_size_prop))));
726
727 _FDT(spapr_fixup_cpu_numa_dt(fdt, offset, cs));
728
729 _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu,
730 ppc_get_compat_smt_threads(cpu)));
731}
732
733static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr)
734{
735 CPUState *cs;
736 int cpus_offset;
737 char *nodename;
738 int smt = kvmppc_smt_threads();
739
740 cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
741 _FDT(cpus_offset);
742 _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
743 _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
744
745
746
747
748
749
750 CPU_FOREACH_REVERSE(cs) {
751 PowerPCCPU *cpu = POWERPC_CPU(cs);
752 int index = ppc_get_vcpu_dt_id(cpu);
753 DeviceClass *dc = DEVICE_GET_CLASS(cs);
754 int offset;
755
756 if ((index % smt) != 0) {
757 continue;
758 }
759
760 nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
761 offset = fdt_add_subnode(fdt, cpus_offset, nodename);
762 g_free(nodename);
763 _FDT(offset);
764 spapr_populate_cpu_dt(cs, fdt, offset, spapr);
765 }
766
767}
768
769
770
771
772
773
774static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
775{
776 MachineState *machine = MACHINE(spapr);
777 int ret, i, offset;
778 uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
779 uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)};
780 uint32_t hotplug_lmb_start = spapr->hotplug_memory.base / lmb_size;
781 uint32_t nr_lmbs = (spapr->hotplug_memory.base +
782 memory_region_size(&spapr->hotplug_memory.mr)) /
783 lmb_size;
784 uint32_t *int_buf, *cur_index, buf_len;
785 int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
786
787
788
789
790 if (machine->ram_size == machine->maxram_size) {
791 return 0;
792 }
793
794
795
796
797
798 buf_len = MAX(nr_lmbs * SPAPR_DR_LMB_LIST_ENTRY_SIZE + 1, nr_nodes * 4 + 2)
799 * sizeof(uint32_t);
800 cur_index = int_buf = g_malloc0(buf_len);
801
802 offset = fdt_add_subnode(fdt, 0, "ibm,dynamic-reconfiguration-memory");
803
804 ret = fdt_setprop(fdt, offset, "ibm,lmb-size", prop_lmb_size,
805 sizeof(prop_lmb_size));
806 if (ret < 0) {
807 goto out;
808 }
809
810 ret = fdt_setprop_cell(fdt, offset, "ibm,memory-flags-mask", 0xff);
811 if (ret < 0) {
812 goto out;
813 }
814
815 ret = fdt_setprop_cell(fdt, offset, "ibm,memory-preservation-time", 0x0);
816 if (ret < 0) {
817 goto out;
818 }
819
820
821 int_buf[0] = cpu_to_be32(nr_lmbs);
822 cur_index++;
823 for (i = 0; i < nr_lmbs; i++) {
824 uint64_t addr = i * lmb_size;
825 uint32_t *dynamic_memory = cur_index;
826
827 if (i >= hotplug_lmb_start) {
828 sPAPRDRConnector *drc;
829 sPAPRDRConnectorClass *drck;
830
831 drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, i);
832 g_assert(drc);
833 drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
834
835 dynamic_memory[0] = cpu_to_be32(addr >> 32);
836 dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
837 dynamic_memory[2] = cpu_to_be32(drck->get_index(drc));
838 dynamic_memory[3] = cpu_to_be32(0);
839 dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL));
840 if (memory_region_present(get_system_memory(), addr)) {
841 dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
842 } else {
843 dynamic_memory[5] = cpu_to_be32(0);
844 }
845 } else {
846
847
848
849
850
851 dynamic_memory[0] = cpu_to_be32(addr >> 32);
852 dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
853 dynamic_memory[2] = cpu_to_be32(0);
854 dynamic_memory[3] = cpu_to_be32(0);
855 dynamic_memory[4] = cpu_to_be32(-1);
856 dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_RESERVED |
857 SPAPR_LMB_FLAGS_DRC_INVALID);
858 }
859
860 cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE;
861 }
862 ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len);
863 if (ret < 0) {
864 goto out;
865 }
866
867
868 cur_index = int_buf;
869 int_buf[0] = cpu_to_be32(nr_nodes);
870 int_buf[1] = cpu_to_be32(4);
871 cur_index += 2;
872 for (i = 0; i < nr_nodes; i++) {
873 uint32_t associativity[] = {
874 cpu_to_be32(0x0),
875 cpu_to_be32(0x0),
876 cpu_to_be32(0x0),
877 cpu_to_be32(i)
878 };
879 memcpy(cur_index, associativity, sizeof(associativity));
880 cur_index += 4;
881 }
882 ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
883 (cur_index - int_buf) * sizeof(uint32_t));
884out:
885 g_free(int_buf);
886 return ret;
887}
888
889int spapr_h_cas_compose_response(sPAPRMachineState *spapr,
890 target_ulong addr, target_ulong size,
891 bool cpu_update, bool memory_update)
892{
893 void *fdt, *fdt_skel;
894 sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 };
895 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
896
897 size -= sizeof(hdr);
898
899
900 fdt_skel = g_malloc0(size);
901 _FDT((fdt_create(fdt_skel, size)));
902 _FDT((fdt_begin_node(fdt_skel, "")));
903 _FDT((fdt_end_node(fdt_skel)));
904 _FDT((fdt_finish(fdt_skel)));
905 fdt = g_malloc0(size);
906 _FDT((fdt_open_into(fdt_skel, fdt, size)));
907 g_free(fdt_skel);
908
909
910 if (cpu_update) {
911 _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
912 }
913
914
915 if (memory_update && smc->dr_lmb_enabled) {
916 _FDT((spapr_populate_drconf_memory(spapr, fdt)));
917 }
918
919
920 _FDT((fdt_pack(fdt)));
921
922 if (fdt_totalsize(fdt) + sizeof(hdr) > size) {
923 trace_spapr_cas_failed(size);
924 return -1;
925 }
926
927 cpu_physical_memory_write(addr, &hdr, sizeof(hdr));
928 cpu_physical_memory_write(addr + sizeof(hdr), fdt, fdt_totalsize(fdt));
929 trace_spapr_cas_continue(fdt_totalsize(fdt) + sizeof(hdr));
930 g_free(fdt);
931
932 return 0;
933}
934
935static void spapr_finalize_fdt(sPAPRMachineState *spapr,
936 hwaddr fdt_addr,
937 hwaddr rtas_addr,
938 hwaddr rtas_size)
939{
940 MachineState *machine = MACHINE(qdev_get_machine());
941 MachineClass *mc = MACHINE_GET_CLASS(machine);
942 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
943 const char *boot_device = machine->boot_order;
944 int ret, i;
945 size_t cb = 0;
946 char *bootlist;
947 void *fdt;
948 sPAPRPHBState *phb;
949
950 fdt = g_malloc(FDT_MAX_SIZE);
951
952
953 _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
954
955 ret = spapr_populate_memory(spapr, fdt);
956 if (ret < 0) {
957 fprintf(stderr, "couldn't setup memory nodes in fdt\n");
958 exit(1);
959 }
960
961 ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
962 if (ret < 0) {
963 fprintf(stderr, "couldn't setup vio devices in fdt\n");
964 exit(1);
965 }
966
967 if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)) {
968 ret = spapr_rng_populate_dt(fdt);
969 if (ret < 0) {
970 fprintf(stderr, "could not set up rng device in the fdt\n");
971 exit(1);
972 }
973 }
974
975 QLIST_FOREACH(phb, &spapr->phbs, list) {
976 ret = spapr_populate_pci_dt(phb, PHANDLE_XICP, fdt);
977 if (ret < 0) {
978 error_report("couldn't setup PCI devices in fdt");
979 exit(1);
980 }
981 }
982
983
984 ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
985 if (ret < 0) {
986 fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
987 }
988
989
990 spapr_populate_cpus_dt_node(fdt, spapr);
991
992 bootlist = get_boot_devices_list(&cb, true);
993 if (cb && bootlist) {
994 int offset = fdt_path_offset(fdt, "/chosen");
995 if (offset < 0) {
996 exit(1);
997 }
998 for (i = 0; i < cb; i++) {
999 if (bootlist[i] == '\n') {
1000 bootlist[i] = ' ';
1001 }
1002
1003 }
1004 ret = fdt_setprop_string(fdt, offset, "qemu,boot-list", bootlist);
1005 }
1006
1007 if (boot_device && strlen(boot_device)) {
1008 int offset = fdt_path_offset(fdt, "/chosen");
1009
1010 if (offset < 0) {
1011 exit(1);
1012 }
1013 fdt_setprop_string(fdt, offset, "qemu,boot-device", boot_device);
1014 }
1015
1016 if (!spapr->has_graphics) {
1017 spapr_populate_chosen_stdout(fdt, spapr->vio_bus);
1018 }
1019
1020 if (smc->dr_lmb_enabled) {
1021 _FDT(spapr_drc_populate_dt(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_LMB));
1022 }
1023
1024 if (mc->query_hotpluggable_cpus) {
1025 int offset = fdt_path_offset(fdt, "/cpus");
1026 ret = spapr_drc_populate_dt(fdt, offset, NULL,
1027 SPAPR_DR_CONNECTOR_TYPE_CPU);
1028 if (ret < 0) {
1029 error_report("Couldn't set up CPU DR device tree properties");
1030 exit(1);
1031 }
1032 }
1033
1034 _FDT((fdt_pack(fdt)));
1035
1036 if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
1037 error_report("FDT too big ! 0x%x bytes (max is 0x%x)",
1038 fdt_totalsize(fdt), FDT_MAX_SIZE);
1039 exit(1);
1040 }
1041
1042 qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
1043 cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
1044
1045 g_free(bootlist);
1046 g_free(fdt);
1047}
1048
1049static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
1050{
1051 return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
1052}
1053
1054static void emulate_spapr_hypercall(PowerPCCPU *cpu)
1055{
1056 CPUPPCState *env = &cpu->env;
1057
1058 if (msr_pr) {
1059 hcall_dprintf("Hypercall made with MSR[PR]=1\n");
1060 env->gpr[3] = H_PRIVILEGE;
1061 } else {
1062 env->gpr[3] = spapr_hypercall(cpu, env->gpr[3], &env->gpr[4]);
1063 }
1064}
1065
1066#define HPTE(_table, _i) (void *)(((uint64_t *)(_table)) + ((_i) * 2))
1067#define HPTE_VALID(_hpte) (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID)
1068#define HPTE_DIRTY(_hpte) (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY)
1069#define CLEAN_HPTE(_hpte) ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
1070#define DIRTY_HPTE(_hpte) ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY))
1071
1072
1073
1074
1075static int get_htab_fd(sPAPRMachineState *spapr)
1076{
1077 if (spapr->htab_fd >= 0) {
1078 return spapr->htab_fd;
1079 }
1080
1081 spapr->htab_fd = kvmppc_get_htab_fd(false);
1082 if (spapr->htab_fd < 0) {
1083 error_report("Unable to open fd for reading hash table from KVM: %s",
1084 strerror(errno));
1085 }
1086
1087 return spapr->htab_fd;
1088}
1089
1090static void close_htab_fd(sPAPRMachineState *spapr)
1091{
1092 if (spapr->htab_fd >= 0) {
1093 close(spapr->htab_fd);
1094 }
1095 spapr->htab_fd = -1;
1096}
1097
1098static int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
1099{
1100 int shift;
1101
1102
1103
1104
1105 shift = ctz64(pow2ceil(ramsize)) - 7;
1106 shift = MAX(shift, 18);
1107 shift = MIN(shift, 46);
1108 return shift;
1109}
1110
1111static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
1112 Error **errp)
1113{
1114 long rc;
1115
1116
1117 g_free(spapr->htab);
1118 spapr->htab = NULL;
1119 spapr->htab_shift = 0;
1120 close_htab_fd(spapr);
1121
1122 rc = kvmppc_reset_htab(shift);
1123 if (rc < 0) {
1124
1125 error_setg_errno(errp, errno,
1126 "Failed to allocate KVM HPT of order %d (try smaller maxmem?)",
1127 shift);
1128
1129
1130 } else if (rc > 0) {
1131
1132 if (rc != shift) {
1133 error_setg(errp,
1134 "Requested order %d HPT, but kernel allocated order %ld (try smaller maxmem?)",
1135 shift, rc);
1136 }
1137
1138 spapr->htab_shift = shift;
1139 spapr->htab = NULL;
1140 } else {
1141
1142 size_t size = 1ULL << shift;
1143 int i;
1144
1145 spapr->htab = qemu_memalign(size, size);
1146 if (!spapr->htab) {
1147 error_setg_errno(errp, errno,
1148 "Could not allocate HPT of order %d", shift);
1149 return;
1150 }
1151
1152 memset(spapr->htab, 0, size);
1153 spapr->htab_shift = shift;
1154
1155 for (i = 0; i < size / HASH_PTE_SIZE_64; i++) {
1156 DIRTY_HPTE(HPTE(spapr->htab, i));
1157 }
1158 }
1159}
1160
1161static int find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
1162{
1163 bool matched = false;
1164
1165 if (object_dynamic_cast(OBJECT(sbdev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
1166 matched = true;
1167 }
1168
1169 if (!matched) {
1170 error_report("Device %s is not supported by this machine yet.",
1171 qdev_fw_name(DEVICE(sbdev)));
1172 exit(1);
1173 }
1174
1175 return 0;
1176}
1177
1178static void ppc_spapr_reset(void)
1179{
1180 MachineState *machine = MACHINE(qdev_get_machine());
1181 sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
1182 PowerPCCPU *first_ppc_cpu;
1183 uint32_t rtas_limit;
1184
1185
1186 foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL);
1187
1188
1189 spapr_reallocate_hpt(spapr,
1190 spapr_hpt_shift_for_ramsize(machine->maxram_size),
1191 &error_fatal);
1192
1193
1194 if (spapr->vrma_adjust) {
1195 spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
1196 spapr->htab_shift);
1197 }
1198
1199 qemu_devices_reset();
1200
1201
1202
1203
1204
1205
1206 rtas_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR);
1207 spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
1208 spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
1209
1210
1211 spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
1212 spapr->rtas_size);
1213
1214
1215 cpu_physical_memory_write(spapr->rtas_addr, spapr->rtas_blob,
1216 spapr->rtas_size);
1217
1218
1219 first_ppc_cpu = POWERPC_CPU(first_cpu);
1220 first_ppc_cpu->env.gpr[3] = spapr->fdt_addr;
1221 first_ppc_cpu->env.gpr[5] = 0;
1222 first_cpu->halted = 0;
1223 first_ppc_cpu->env.nip = SPAPR_ENTRY_POINT;
1224
1225}
1226
1227static void spapr_create_nvram(sPAPRMachineState *spapr)
1228{
1229 DeviceState *dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram");
1230 DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0);
1231
1232 if (dinfo) {
1233 qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(dinfo),
1234 &error_fatal);
1235 }
1236
1237 qdev_init_nofail(dev);
1238
1239 spapr->nvram = (struct sPAPRNVRAM *)dev;
1240}
1241
1242static void spapr_rtc_create(sPAPRMachineState *spapr)
1243{
1244 DeviceState *dev = qdev_create(NULL, TYPE_SPAPR_RTC);
1245
1246 qdev_init_nofail(dev);
1247 spapr->rtc = dev;
1248
1249 object_property_add_alias(qdev_get_machine(), "rtc-time",
1250 OBJECT(spapr->rtc), "date", NULL);
1251}
1252
1253
1254static bool spapr_vga_init(PCIBus *pci_bus, Error **errp)
1255{
1256 switch (vga_interface_type) {
1257 case VGA_NONE:
1258 return false;
1259 case VGA_DEVICE:
1260 return true;
1261 case VGA_STD:
1262 case VGA_VIRTIO:
1263 return pci_vga_init(pci_bus) != NULL;
1264 default:
1265 error_setg(errp,
1266 "Unsupported VGA mode, only -vga std or -vga virtio is supported");
1267 return false;
1268 }
1269}
1270
1271static int spapr_post_load(void *opaque, int version_id)
1272{
1273 sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
1274 int err = 0;
1275
1276
1277
1278
1279
1280 if (version_id < 3) {
1281 err = spapr_rtc_import_offset(spapr->rtc, spapr->rtc_offset);
1282 }
1283
1284 return err;
1285}
1286
1287static bool version_before_3(void *opaque, int version_id)
1288{
1289 return version_id < 3;
1290}
1291
1292static const VMStateDescription vmstate_spapr = {
1293 .name = "spapr",
1294 .version_id = 3,
1295 .minimum_version_id = 1,
1296 .post_load = spapr_post_load,
1297 .fields = (VMStateField[]) {
1298
1299 VMSTATE_UNUSED_BUFFER(version_before_3, 0, 4),
1300
1301
1302 VMSTATE_UINT64_TEST(rtc_offset, sPAPRMachineState, version_before_3),
1303
1304 VMSTATE_PPC_TIMEBASE_V(tb, sPAPRMachineState, 2),
1305 VMSTATE_END_OF_LIST()
1306 },
1307};
1308
1309static int htab_save_setup(QEMUFile *f, void *opaque)
1310{
1311 sPAPRMachineState *spapr = opaque;
1312
1313
1314 qemu_put_be32(f, spapr->htab_shift);
1315
1316 if (spapr->htab) {
1317 spapr->htab_save_index = 0;
1318 spapr->htab_first_pass = true;
1319 } else {
1320 assert(kvm_enabled());
1321 }
1322
1323
1324 return 0;
1325}
1326
1327static void htab_save_first_pass(QEMUFile *f, sPAPRMachineState *spapr,
1328 int64_t max_ns)
1329{
1330 bool has_timeout = max_ns != -1;
1331 int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
1332 int index = spapr->htab_save_index;
1333 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1334
1335 assert(spapr->htab_first_pass);
1336
1337 do {
1338 int chunkstart;
1339
1340
1341 while ((index < htabslots)
1342 && !HPTE_VALID(HPTE(spapr->htab, index))) {
1343 index++;
1344 CLEAN_HPTE(HPTE(spapr->htab, index));
1345 }
1346
1347
1348 chunkstart = index;
1349 while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
1350 && HPTE_VALID(HPTE(spapr->htab, index))) {
1351 index++;
1352 CLEAN_HPTE(HPTE(spapr->htab, index));
1353 }
1354
1355 if (index > chunkstart) {
1356 int n_valid = index - chunkstart;
1357
1358 qemu_put_be32(f, chunkstart);
1359 qemu_put_be16(f, n_valid);
1360 qemu_put_be16(f, 0);
1361 qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
1362 HASH_PTE_SIZE_64 * n_valid);
1363
1364 if (has_timeout &&
1365 (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
1366 break;
1367 }
1368 }
1369 } while ((index < htabslots) && !qemu_file_rate_limit(f));
1370
1371 if (index >= htabslots) {
1372 assert(index == htabslots);
1373 index = 0;
1374 spapr->htab_first_pass = false;
1375 }
1376 spapr->htab_save_index = index;
1377}
1378
1379static int htab_save_later_pass(QEMUFile *f, sPAPRMachineState *spapr,
1380 int64_t max_ns)
1381{
1382 bool final = max_ns < 0;
1383 int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
1384 int examined = 0, sent = 0;
1385 int index = spapr->htab_save_index;
1386 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1387
1388 assert(!spapr->htab_first_pass);
1389
1390 do {
1391 int chunkstart, invalidstart;
1392
1393
1394 while ((index < htabslots)
1395 && !HPTE_DIRTY(HPTE(spapr->htab, index))) {
1396 index++;
1397 examined++;
1398 }
1399
1400 chunkstart = index;
1401
1402 while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
1403 && HPTE_DIRTY(HPTE(spapr->htab, index))
1404 && HPTE_VALID(HPTE(spapr->htab, index))) {
1405 CLEAN_HPTE(HPTE(spapr->htab, index));
1406 index++;
1407 examined++;
1408 }
1409
1410 invalidstart = index;
1411
1412 while ((index < htabslots) && (index - invalidstart < USHRT_MAX)
1413 && HPTE_DIRTY(HPTE(spapr->htab, index))
1414 && !HPTE_VALID(HPTE(spapr->htab, index))) {
1415 CLEAN_HPTE(HPTE(spapr->htab, index));
1416 index++;
1417 examined++;
1418 }
1419
1420 if (index > chunkstart) {
1421 int n_valid = invalidstart - chunkstart;
1422 int n_invalid = index - invalidstart;
1423
1424 qemu_put_be32(f, chunkstart);
1425 qemu_put_be16(f, n_valid);
1426 qemu_put_be16(f, n_invalid);
1427 qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
1428 HASH_PTE_SIZE_64 * n_valid);
1429 sent += index - chunkstart;
1430
1431 if (!final && (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
1432 break;
1433 }
1434 }
1435
1436 if (examined >= htabslots) {
1437 break;
1438 }
1439
1440 if (index >= htabslots) {
1441 assert(index == htabslots);
1442 index = 0;
1443 }
1444 } while ((examined < htabslots) && (!qemu_file_rate_limit(f) || final));
1445
1446 if (index >= htabslots) {
1447 assert(index == htabslots);
1448 index = 0;
1449 }
1450
1451 spapr->htab_save_index = index;
1452
1453 return (examined >= htabslots) && (sent == 0) ? 1 : 0;
1454}
1455
1456#define MAX_ITERATION_NS 5000000
1457#define MAX_KVM_BUF_SIZE 2048
1458
1459static int htab_save_iterate(QEMUFile *f, void *opaque)
1460{
1461 sPAPRMachineState *spapr = opaque;
1462 int fd;
1463 int rc = 0;
1464
1465
1466 qemu_put_be32(f, 0);
1467
1468 if (!spapr->htab) {
1469 assert(kvm_enabled());
1470
1471 fd = get_htab_fd(spapr);
1472 if (fd < 0) {
1473 return fd;
1474 }
1475
1476 rc = kvmppc_save_htab(f, fd, MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
1477 if (rc < 0) {
1478 return rc;
1479 }
1480 } else if (spapr->htab_first_pass) {
1481 htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
1482 } else {
1483 rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
1484 }
1485
1486
1487 qemu_put_be32(f, 0);
1488 qemu_put_be16(f, 0);
1489 qemu_put_be16(f, 0);
1490
1491 return rc;
1492}
1493
1494static int htab_save_complete(QEMUFile *f, void *opaque)
1495{
1496 sPAPRMachineState *spapr = opaque;
1497 int fd;
1498
1499
1500 qemu_put_be32(f, 0);
1501
1502 if (!spapr->htab) {
1503 int rc;
1504
1505 assert(kvm_enabled());
1506
1507 fd = get_htab_fd(spapr);
1508 if (fd < 0) {
1509 return fd;
1510 }
1511
1512 rc = kvmppc_save_htab(f, fd, MAX_KVM_BUF_SIZE, -1);
1513 if (rc < 0) {
1514 return rc;
1515 }
1516 } else {
1517 if (spapr->htab_first_pass) {
1518 htab_save_first_pass(f, spapr, -1);
1519 }
1520 htab_save_later_pass(f, spapr, -1);
1521 }
1522
1523
1524 qemu_put_be32(f, 0);
1525 qemu_put_be16(f, 0);
1526 qemu_put_be16(f, 0);
1527
1528 return 0;
1529}
1530
1531static int htab_load(QEMUFile *f, void *opaque, int version_id)
1532{
1533 sPAPRMachineState *spapr = opaque;
1534 uint32_t section_hdr;
1535 int fd = -1;
1536
1537 if (version_id < 1 || version_id > 1) {
1538 error_report("htab_load() bad version");
1539 return -EINVAL;
1540 }
1541
1542 section_hdr = qemu_get_be32(f);
1543
1544 if (section_hdr) {
1545 Error *local_err = NULL;
1546
1547
1548 spapr_reallocate_hpt(spapr, section_hdr, &local_err);
1549 if (local_err) {
1550 error_report_err(local_err);
1551 return -EINVAL;
1552 }
1553 return 0;
1554 }
1555
1556 if (!spapr->htab) {
1557 assert(kvm_enabled());
1558
1559 fd = kvmppc_get_htab_fd(true);
1560 if (fd < 0) {
1561 error_report("Unable to open fd to restore KVM hash table: %s",
1562 strerror(errno));
1563 }
1564 }
1565
1566 while (true) {
1567 uint32_t index;
1568 uint16_t n_valid, n_invalid;
1569
1570 index = qemu_get_be32(f);
1571 n_valid = qemu_get_be16(f);
1572 n_invalid = qemu_get_be16(f);
1573
1574 if ((index == 0) && (n_valid == 0) && (n_invalid == 0)) {
1575
1576 break;
1577 }
1578
1579 if ((index + n_valid + n_invalid) >
1580 (HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) {
1581
1582 error_report(
1583 "htab_load() bad index %d (%hd+%hd entries) in htab stream (htab_shift=%d)",
1584 index, n_valid, n_invalid, spapr->htab_shift);
1585 return -EINVAL;
1586 }
1587
1588 if (spapr->htab) {
1589 if (n_valid) {
1590 qemu_get_buffer(f, HPTE(spapr->htab, index),
1591 HASH_PTE_SIZE_64 * n_valid);
1592 }
1593 if (n_invalid) {
1594 memset(HPTE(spapr->htab, index + n_valid), 0,
1595 HASH_PTE_SIZE_64 * n_invalid);
1596 }
1597 } else {
1598 int rc;
1599
1600 assert(fd >= 0);
1601
1602 rc = kvmppc_load_htab_chunk(f, fd, index, n_valid, n_invalid);
1603 if (rc < 0) {
1604 return rc;
1605 }
1606 }
1607 }
1608
1609 if (!spapr->htab) {
1610 assert(fd >= 0);
1611 close(fd);
1612 }
1613
1614 return 0;
1615}
1616
1617static void htab_cleanup(void *opaque)
1618{
1619 sPAPRMachineState *spapr = opaque;
1620
1621 close_htab_fd(spapr);
1622}
1623
1624static SaveVMHandlers savevm_htab_handlers = {
1625 .save_live_setup = htab_save_setup,
1626 .save_live_iterate = htab_save_iterate,
1627 .save_live_complete_precopy = htab_save_complete,
1628 .cleanup = htab_cleanup,
1629 .load_state = htab_load,
1630};
1631
1632static void spapr_boot_set(void *opaque, const char *boot_device,
1633 Error **errp)
1634{
1635 MachineState *machine = MACHINE(qdev_get_machine());
1636 machine->boot_order = g_strdup(boot_device);
1637}
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647static void spapr_drc_reset(void *opaque)
1648{
1649 sPAPRDRConnector *drc = opaque;
1650 DeviceState *d = DEVICE(drc);
1651
1652 if (d) {
1653 device_reset(d);
1654 }
1655}
1656
1657static void spapr_create_lmb_dr_connectors(sPAPRMachineState *spapr)
1658{
1659 MachineState *machine = MACHINE(spapr);
1660 uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
1661 uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size;
1662 int i;
1663
1664 for (i = 0; i < nr_lmbs; i++) {
1665 sPAPRDRConnector *drc;
1666 uint64_t addr;
1667
1668 addr = i * lmb_size + spapr->hotplug_memory.base;
1669 drc = spapr_dr_connector_new(OBJECT(spapr), SPAPR_DR_CONNECTOR_TYPE_LMB,
1670 addr/lmb_size);
1671 qemu_register_reset(spapr_drc_reset, drc);
1672 }
1673}
1674
1675
1676
1677
1678
1679
1680static void spapr_validate_node_memory(MachineState *machine, Error **errp)
1681{
1682 int i;
1683
1684 if (machine->ram_size % SPAPR_MEMORY_BLOCK_SIZE) {
1685 error_setg(errp, "Memory size 0x" RAM_ADDR_FMT
1686 " is not aligned to %llu MiB",
1687 machine->ram_size,
1688 SPAPR_MEMORY_BLOCK_SIZE / M_BYTE);
1689 return;
1690 }
1691
1692 if (machine->maxram_size % SPAPR_MEMORY_BLOCK_SIZE) {
1693 error_setg(errp, "Maximum memory size 0x" RAM_ADDR_FMT
1694 " is not aligned to %llu MiB",
1695 machine->ram_size,
1696 SPAPR_MEMORY_BLOCK_SIZE / M_BYTE);
1697 return;
1698 }
1699
1700 for (i = 0; i < nb_numa_nodes; i++) {
1701 if (numa_info[i].node_mem % SPAPR_MEMORY_BLOCK_SIZE) {
1702 error_setg(errp,
1703 "Node %d memory size 0x%" PRIx64
1704 " is not aligned to %llu MiB",
1705 i, numa_info[i].node_mem,
1706 SPAPR_MEMORY_BLOCK_SIZE / M_BYTE);
1707 return;
1708 }
1709 }
1710}
1711
1712
1713static void ppc_spapr_init(MachineState *machine)
1714{
1715 sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
1716 MachineClass *mc = MACHINE_GET_CLASS(machine);
1717 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
1718 const char *kernel_filename = machine->kernel_filename;
1719 const char *kernel_cmdline = machine->kernel_cmdline;
1720 const char *initrd_filename = machine->initrd_filename;
1721 PCIHostState *phb;
1722 int i;
1723 MemoryRegion *sysmem = get_system_memory();
1724 MemoryRegion *ram = g_new(MemoryRegion, 1);
1725 MemoryRegion *rma_region;
1726 void *rma = NULL;
1727 hwaddr rma_alloc_size;
1728 hwaddr node0_size = spapr_node0_size();
1729 uint32_t initrd_base = 0;
1730 long kernel_size = 0, initrd_size = 0;
1731 long load_limit, fw_size;
1732 bool kernel_le = false;
1733 char *filename;
1734 int smt = kvmppc_smt_threads();
1735 int spapr_cores = smp_cpus / smp_threads;
1736 int spapr_max_cores = max_cpus / smp_threads;
1737
1738 if (mc->query_hotpluggable_cpus) {
1739 if (smp_cpus % smp_threads) {
1740 error_report("smp_cpus (%u) must be multiple of threads (%u)",
1741 smp_cpus, smp_threads);
1742 exit(1);
1743 }
1744 if (max_cpus % smp_threads) {
1745 error_report("max_cpus (%u) must be multiple of threads (%u)",
1746 max_cpus, smp_threads);
1747 exit(1);
1748 }
1749 }
1750
1751 msi_nonbroken = true;
1752
1753 QLIST_INIT(&spapr->phbs);
1754
1755 cpu_ppc_hypercall = emulate_spapr_hypercall;
1756
1757
1758 rma_alloc_size = kvmppc_alloc_rma(&rma);
1759
1760 if (rma_alloc_size == -1) {
1761 error_report("Unable to create RMA");
1762 exit(1);
1763 }
1764
1765 if (rma_alloc_size && (rma_alloc_size < node0_size)) {
1766 spapr->rma_size = rma_alloc_size;
1767 } else {
1768 spapr->rma_size = node0_size;
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779 if (kvm_enabled()) {
1780 spapr->vrma_adjust = 1;
1781 spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
1782 }
1783
1784
1785
1786
1787
1788
1789 spapr->rma_size = MIN(spapr->rma_size, 0x400000000ull);
1790 }
1791
1792 if (spapr->rma_size > node0_size) {
1793 error_report("Numa node 0 has to span the RMA (%#08"HWADDR_PRIx")",
1794 spapr->rma_size);
1795 exit(1);
1796 }
1797
1798
1799 load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
1800
1801
1802 spapr->xics = xics_system_init(machine,
1803 DIV_ROUND_UP(max_cpus * smt, smp_threads),
1804 XICS_IRQS_SPAPR, &error_fatal);
1805
1806 if (smc->dr_lmb_enabled) {
1807 spapr_validate_node_memory(machine, &error_fatal);
1808 }
1809
1810
1811 if (machine->cpu_model == NULL) {
1812 machine->cpu_model = kvm_enabled() ? "host" : "POWER7";
1813 }
1814
1815 ppc_cpu_parse_features(machine->cpu_model);
1816
1817 if (mc->query_hotpluggable_cpus) {
1818 char *type = spapr_get_cpu_core_type(machine->cpu_model);
1819
1820 if (type == NULL) {
1821 error_report("Unable to find sPAPR CPU Core definition");
1822 exit(1);
1823 }
1824
1825 spapr->cores = g_new0(Object *, spapr_max_cores);
1826 for (i = 0; i < spapr_max_cores; i++) {
1827 int core_id = i * smp_threads;
1828 sPAPRDRConnector *drc =
1829 spapr_dr_connector_new(OBJECT(spapr),
1830 SPAPR_DR_CONNECTOR_TYPE_CPU,
1831 (core_id / smp_threads) * smt);
1832
1833 qemu_register_reset(spapr_drc_reset, drc);
1834
1835 if (i < spapr_cores) {
1836 Object *core = object_new(type);
1837 object_property_set_int(core, smp_threads, "nr-threads",
1838 &error_fatal);
1839 object_property_set_int(core, core_id, CPU_CORE_PROP_CORE_ID,
1840 &error_fatal);
1841 object_property_set_bool(core, true, "realized", &error_fatal);
1842 }
1843 }
1844 g_free(type);
1845 } else {
1846 for (i = 0; i < smp_cpus; i++) {
1847 PowerPCCPU *cpu = cpu_ppc_init(machine->cpu_model);
1848 if (cpu == NULL) {
1849 error_report("Unable to find PowerPC CPU definition");
1850 exit(1);
1851 }
1852 spapr_cpu_init(spapr, cpu, &error_fatal);
1853 }
1854 }
1855
1856 if (kvm_enabled()) {
1857
1858 kvmppc_enable_logical_ci_hcalls();
1859 kvmppc_enable_set_mode_hcall();
1860 }
1861
1862
1863 memory_region_allocate_system_memory(ram, NULL, "ppc_spapr.ram",
1864 machine->ram_size);
1865 memory_region_add_subregion(sysmem, 0, ram);
1866
1867 if (rma_alloc_size && rma) {
1868 rma_region = g_new(MemoryRegion, 1);
1869 memory_region_init_ram_ptr(rma_region, NULL, "ppc_spapr.rma",
1870 rma_alloc_size, rma);
1871 vmstate_register_ram_global(rma_region);
1872 memory_region_add_subregion(sysmem, 0, rma_region);
1873 }
1874
1875
1876 if (machine->ram_size < machine->maxram_size) {
1877 ram_addr_t hotplug_mem_size = machine->maxram_size - machine->ram_size;
1878
1879
1880
1881
1882
1883 int max_memslots = kvm_enabled() ? kvm_get_max_memslots() / 2 :
1884 SPAPR_MAX_RAM_SLOTS;
1885
1886 if (max_memslots < SPAPR_MAX_RAM_SLOTS) {
1887 max_memslots = SPAPR_MAX_RAM_SLOTS;
1888 }
1889 if (machine->ram_slots > max_memslots) {
1890 error_report("Specified number of memory slots %"
1891 PRIu64" exceeds max supported %d",
1892 machine->ram_slots, max_memslots);
1893 exit(1);
1894 }
1895
1896 spapr->hotplug_memory.base = ROUND_UP(machine->ram_size,
1897 SPAPR_HOTPLUG_MEM_ALIGN);
1898 memory_region_init(&spapr->hotplug_memory.mr, OBJECT(spapr),
1899 "hotplug-memory", hotplug_mem_size);
1900 memory_region_add_subregion(sysmem, spapr->hotplug_memory.base,
1901 &spapr->hotplug_memory.mr);
1902 }
1903
1904 if (smc->dr_lmb_enabled) {
1905 spapr_create_lmb_dr_connectors(spapr);
1906 }
1907
1908 filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
1909 if (!filename) {
1910 error_report("Could not find LPAR rtas '%s'", "spapr-rtas.bin");
1911 exit(1);
1912 }
1913 spapr->rtas_size = get_image_size(filename);
1914 if (spapr->rtas_size < 0) {
1915 error_report("Could not get size of LPAR rtas '%s'", filename);
1916 exit(1);
1917 }
1918 spapr->rtas_blob = g_malloc(spapr->rtas_size);
1919 if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
1920 error_report("Could not load LPAR rtas '%s'", filename);
1921 exit(1);
1922 }
1923 if (spapr->rtas_size > RTAS_MAX_SIZE) {
1924 error_report("RTAS too big ! 0x%zx bytes (max is 0x%x)",
1925 (size_t)spapr->rtas_size, RTAS_MAX_SIZE);
1926 exit(1);
1927 }
1928 g_free(filename);
1929
1930
1931 spapr_events_init(spapr);
1932
1933
1934 spapr_rtc_create(spapr);
1935
1936
1937 spapr->vio_bus = spapr_vio_bus_init();
1938
1939 for (i = 0; i < MAX_SERIAL_PORTS; i++) {
1940 if (serial_hds[i]) {
1941 spapr_vty_create(spapr->vio_bus, serial_hds[i]);
1942 }
1943 }
1944
1945
1946 spapr_create_nvram(spapr);
1947
1948
1949 spapr_pci_rtas_init();
1950
1951 phb = spapr_create_phb(spapr, 0);
1952
1953 for (i = 0; i < nb_nics; i++) {
1954 NICInfo *nd = &nd_table[i];
1955
1956 if (!nd->model) {
1957 nd->model = g_strdup("ibmveth");
1958 }
1959
1960 if (strcmp(nd->model, "ibmveth") == 0) {
1961 spapr_vlan_create(spapr->vio_bus, nd);
1962 } else {
1963 pci_nic_init_nofail(&nd_table[i], phb->bus, nd->model, NULL);
1964 }
1965 }
1966
1967 for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
1968 spapr_vscsi_create(spapr->vio_bus);
1969 }
1970
1971
1972 if (spapr_vga_init(phb->bus, &error_fatal)) {
1973 spapr->has_graphics = true;
1974 machine->usb |= defaults_enabled() && !machine->usb_disabled;
1975 }
1976
1977 if (machine->usb) {
1978 if (smc->use_ohci_by_default) {
1979 pci_create_simple(phb->bus, -1, "pci-ohci");
1980 } else {
1981 pci_create_simple(phb->bus, -1, "nec-usb-xhci");
1982 }
1983
1984 if (spapr->has_graphics) {
1985 USBBus *usb_bus = usb_bus_find(-1);
1986
1987 usb_create_simple(usb_bus, "usb-kbd");
1988 usb_create_simple(usb_bus, "usb-mouse");
1989 }
1990 }
1991
1992 if (spapr->rma_size < (MIN_RMA_SLOF << 20)) {
1993 error_report(
1994 "pSeries SLOF firmware requires >= %ldM guest RMA (Real Mode Area memory)",
1995 MIN_RMA_SLOF);
1996 exit(1);
1997 }
1998
1999 if (kernel_filename) {
2000 uint64_t lowaddr = 0;
2001
2002 kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
2003 NULL, &lowaddr, NULL, 1, PPC_ELF_MACHINE,
2004 0, 0);
2005 if (kernel_size == ELF_LOAD_WRONG_ENDIAN) {
2006 kernel_size = load_elf(kernel_filename,
2007 translate_kernel_address, NULL,
2008 NULL, &lowaddr, NULL, 0, PPC_ELF_MACHINE,
2009 0, 0);
2010 kernel_le = kernel_size > 0;
2011 }
2012 if (kernel_size < 0) {
2013 error_report("error loading %s: %s",
2014 kernel_filename, load_elf_strerror(kernel_size));
2015 exit(1);
2016 }
2017
2018
2019 if (initrd_filename) {
2020
2021
2022
2023 initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
2024 initrd_size = load_image_targphys(initrd_filename, initrd_base,
2025 load_limit - initrd_base);
2026 if (initrd_size < 0) {
2027 error_report("could not load initial ram disk '%s'",
2028 initrd_filename);
2029 exit(1);
2030 }
2031 } else {
2032 initrd_base = 0;
2033 initrd_size = 0;
2034 }
2035 }
2036
2037 if (bios_name == NULL) {
2038 bios_name = FW_FILE_NAME;
2039 }
2040 filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
2041 if (!filename) {
2042 error_report("Could not find LPAR firmware '%s'", bios_name);
2043 exit(1);
2044 }
2045 fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
2046 if (fw_size <= 0) {
2047 error_report("Could not load LPAR firmware '%s'", filename);
2048 exit(1);
2049 }
2050 g_free(filename);
2051
2052
2053
2054
2055 vmstate_register(NULL, 0, &vmstate_spapr, spapr);
2056 register_savevm_live(NULL, "spapr/htab", -1, 1,
2057 &savevm_htab_handlers, spapr);
2058
2059
2060 spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size,
2061 kernel_size, kernel_le,
2062 kernel_cmdline,
2063 spapr->check_exception_irq);
2064 assert(spapr->fdt_skel != NULL);
2065
2066
2067 QTAILQ_INIT(&spapr->ccs_list);
2068 qemu_register_reset(spapr_ccs_reset_hook, spapr);
2069
2070 qemu_register_boot_set(spapr_boot_set, spapr);
2071}
2072
2073static int spapr_kvm_type(const char *vm_type)
2074{
2075 if (!vm_type) {
2076 return 0;
2077 }
2078
2079 if (!strcmp(vm_type, "HV")) {
2080 return 1;
2081 }
2082
2083 if (!strcmp(vm_type, "PR")) {
2084 return 2;
2085 }
2086
2087 error_report("Unknown kvm-type specified '%s'", vm_type);
2088 exit(1);
2089}
2090
2091
2092
2093
2094
2095static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus,
2096 DeviceState *dev)
2097{
2098#define CAST(type, obj, name) \
2099 ((type *)object_dynamic_cast(OBJECT(obj), (name)))
2100 SCSIDevice *d = CAST(SCSIDevice, dev, TYPE_SCSI_DEVICE);
2101 sPAPRPHBState *phb = CAST(sPAPRPHBState, dev, TYPE_SPAPR_PCI_HOST_BRIDGE);
2102
2103 if (d) {
2104 void *spapr = CAST(void, bus->parent, "spapr-vscsi");
2105 VirtIOSCSI *virtio = CAST(VirtIOSCSI, bus->parent, TYPE_VIRTIO_SCSI);
2106 USBDevice *usb = CAST(USBDevice, bus->parent, TYPE_USB_DEVICE);
2107
2108 if (spapr) {
2109
2110
2111
2112
2113
2114 unsigned id = 0x8000 | (d->id << 8) | d->lun;
2115 return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
2116 (uint64_t)id << 48);
2117 } else if (virtio) {
2118
2119
2120
2121
2122
2123
2124
2125 unsigned id = 0x1000000 | (d->id << 16) | d->lun;
2126 return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
2127 (uint64_t)id << 32);
2128 } else if (usb) {
2129
2130
2131
2132
2133 unsigned usb_port = atoi(usb->port->path);
2134 unsigned id = 0x1000000 | (usb_port << 16) | d->lun;
2135 return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
2136 (uint64_t)id << 32);
2137 }
2138 }
2139
2140 if (phb) {
2141
2142 return g_strdup_printf("pci@%"PRIX64, phb->buid);
2143 }
2144
2145 return NULL;
2146}
2147
2148static char *spapr_get_kvm_type(Object *obj, Error **errp)
2149{
2150 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
2151
2152 return g_strdup(spapr->kvm_type);
2153}
2154
2155static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
2156{
2157 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
2158
2159 g_free(spapr->kvm_type);
2160 spapr->kvm_type = g_strdup(value);
2161}
2162
2163static void spapr_machine_initfn(Object *obj)
2164{
2165 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
2166
2167 spapr->htab_fd = -1;
2168 object_property_add_str(obj, "kvm-type",
2169 spapr_get_kvm_type, spapr_set_kvm_type, NULL);
2170 object_property_set_description(obj, "kvm-type",
2171 "Specifies the KVM virtualization mode (HV, PR)",
2172 NULL);
2173}
2174
2175static void spapr_machine_finalizefn(Object *obj)
2176{
2177 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
2178
2179 g_free(spapr->kvm_type);
2180}
2181
2182static void ppc_cpu_do_nmi_on_cpu(void *arg)
2183{
2184 CPUState *cs = arg;
2185
2186 cpu_synchronize_state(cs);
2187 ppc_cpu_do_system_reset(cs);
2188}
2189
2190static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
2191{
2192 CPUState *cs;
2193
2194 CPU_FOREACH(cs) {
2195 async_run_on_cpu(cs, ppc_cpu_do_nmi_on_cpu, cs);
2196 }
2197}
2198
2199static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size,
2200 uint32_t node, Error **errp)
2201{
2202 sPAPRDRConnector *drc;
2203 sPAPRDRConnectorClass *drck;
2204 uint32_t nr_lmbs = size/SPAPR_MEMORY_BLOCK_SIZE;
2205 int i, fdt_offset, fdt_size;
2206 void *fdt;
2207
2208 for (i = 0; i < nr_lmbs; i++) {
2209 drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
2210 addr/SPAPR_MEMORY_BLOCK_SIZE);
2211 g_assert(drc);
2212
2213 fdt = create_device_tree(&fdt_size);
2214 fdt_offset = spapr_populate_memory_node(fdt, node, addr,
2215 SPAPR_MEMORY_BLOCK_SIZE);
2216
2217 drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
2218 drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, errp);
2219 addr += SPAPR_MEMORY_BLOCK_SIZE;
2220 }
2221
2222
2223
2224 if (dev->hotplugged) {
2225 spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, nr_lmbs);
2226 }
2227}
2228
2229static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
2230 uint32_t node, Error **errp)
2231{
2232 Error *local_err = NULL;
2233 sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev);
2234 PCDIMMDevice *dimm = PC_DIMM(dev);
2235 PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
2236 MemoryRegion *mr = ddc->get_memory_region(dimm);
2237 uint64_t align = memory_region_get_alignment(mr);
2238 uint64_t size = memory_region_size(mr);
2239 uint64_t addr;
2240
2241 if (size % SPAPR_MEMORY_BLOCK_SIZE) {
2242 error_setg(&local_err, "Hotplugged memory size must be a multiple of "
2243 "%lld MB", SPAPR_MEMORY_BLOCK_SIZE/M_BYTE);
2244 goto out;
2245 }
2246
2247 pc_dimm_memory_plug(dev, &ms->hotplug_memory, mr, align, &local_err);
2248 if (local_err) {
2249 goto out;
2250 }
2251
2252 addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
2253 if (local_err) {
2254 pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr);
2255 goto out;
2256 }
2257
2258 spapr_add_lmbs(dev, addr, size, node, &error_abort);
2259
2260out:
2261 error_propagate(errp, local_err);
2262}
2263
2264void *spapr_populate_hotplug_cpu_dt(CPUState *cs, int *fdt_offset,
2265 sPAPRMachineState *spapr)
2266{
2267 PowerPCCPU *cpu = POWERPC_CPU(cs);
2268 DeviceClass *dc = DEVICE_GET_CLASS(cs);
2269 int id = ppc_get_vcpu_dt_id(cpu);
2270 void *fdt;
2271 int offset, fdt_size;
2272 char *nodename;
2273
2274 fdt = create_device_tree(&fdt_size);
2275 nodename = g_strdup_printf("%s@%x", dc->fw_name, id);
2276 offset = fdt_add_subnode(fdt, 0, nodename);
2277
2278 spapr_populate_cpu_dt(cs, fdt, offset, spapr);
2279 g_free(nodename);
2280
2281 *fdt_offset = offset;
2282 return fdt;
2283}
2284
2285static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
2286 DeviceState *dev, Error **errp)
2287{
2288 sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
2289
2290 if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
2291 int node;
2292
2293 if (!smc->dr_lmb_enabled) {
2294 error_setg(errp, "Memory hotplug not supported for this machine");
2295 return;
2296 }
2297 node = object_property_get_int(OBJECT(dev), PC_DIMM_NODE_PROP, errp);
2298 if (*errp) {
2299 return;
2300 }
2301 if (node < 0 || node >= MAX_NODES) {
2302 error_setg(errp, "Invaild node %d", node);
2303 return;
2304 }
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322 if (nb_numa_nodes && !numa_info[node].node_mem) {
2323 error_setg(errp, "Can't hotplug memory to memory-less node %d",
2324 node);
2325 return;
2326 }
2327
2328 spapr_memory_plug(hotplug_dev, dev, node, errp);
2329 } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
2330 spapr_core_plug(hotplug_dev, dev, errp);
2331 }
2332}
2333
2334static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
2335 DeviceState *dev, Error **errp)
2336{
2337 MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
2338
2339 if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
2340 error_setg(errp, "Memory hot unplug not supported by sPAPR");
2341 } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
2342 if (!mc->query_hotpluggable_cpus) {
2343 error_setg(errp, "CPU hot unplug not supported on this machine");
2344 return;
2345 }
2346 spapr_core_unplug(hotplug_dev, dev, errp);
2347 }
2348}
2349
2350static void spapr_machine_device_pre_plug(HotplugHandler *hotplug_dev,
2351 DeviceState *dev, Error **errp)
2352{
2353 if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
2354 spapr_core_pre_plug(hotplug_dev, dev, errp);
2355 }
2356}
2357
2358static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine,
2359 DeviceState *dev)
2360{
2361 if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
2362 object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
2363 return HOTPLUG_HANDLER(machine);
2364 }
2365 return NULL;
2366}
2367
2368static unsigned spapr_cpu_index_to_socket_id(unsigned cpu_index)
2369{
2370
2371
2372 return cpu_index / smp_threads / smp_cores;
2373}
2374
2375static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine)
2376{
2377 int i;
2378 HotpluggableCPUList *head = NULL;
2379 sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
2380 int spapr_max_cores = max_cpus / smp_threads;
2381
2382 for (i = 0; i < spapr_max_cores; i++) {
2383 HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1);
2384 HotpluggableCPU *cpu_item = g_new0(typeof(*cpu_item), 1);
2385 CpuInstanceProperties *cpu_props = g_new0(typeof(*cpu_props), 1);
2386
2387 cpu_item->type = spapr_get_cpu_core_type(machine->cpu_model);
2388 cpu_item->vcpus_count = smp_threads;
2389 cpu_props->has_core_id = true;
2390 cpu_props->core_id = i * smp_threads;
2391
2392
2393
2394 cpu_item->props = cpu_props;
2395 if (spapr->cores[i]) {
2396 cpu_item->has_qom_path = true;
2397 cpu_item->qom_path = object_get_canonical_path(spapr->cores[i]);
2398 }
2399 list_item->value = cpu_item;
2400 list_item->next = head;
2401 head = list_item;
2402 }
2403 return head;
2404}
2405
2406static void spapr_machine_class_init(ObjectClass *oc, void *data)
2407{
2408 MachineClass *mc = MACHINE_CLASS(oc);
2409 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(oc);
2410 FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc);
2411 NMIClass *nc = NMI_CLASS(oc);
2412 HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
2413
2414 mc->desc = "pSeries Logical Partition (PAPR compliant)";
2415
2416
2417
2418
2419
2420
2421 mc->init = ppc_spapr_init;
2422 mc->reset = ppc_spapr_reset;
2423 mc->block_default_type = IF_SCSI;
2424 mc->max_cpus = MAX_CPUMASK_BITS;
2425 mc->no_parallel = 1;
2426 mc->default_boot_order = "";
2427 mc->default_ram_size = 512 * M_BYTE;
2428 mc->kvm_type = spapr_kvm_type;
2429 mc->has_dynamic_sysbus = true;
2430 mc->pci_allow_0_address = true;
2431 mc->get_hotplug_handler = spapr_get_hotpug_handler;
2432 hc->pre_plug = spapr_machine_device_pre_plug;
2433 hc->plug = spapr_machine_device_plug;
2434 hc->unplug = spapr_machine_device_unplug;
2435 mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
2436
2437 smc->dr_lmb_enabled = true;
2438 mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
2439 fwc->get_dev_path = spapr_get_fw_dev_path;
2440 nc->nmi_monitor_handler = spapr_nmi;
2441}
2442
2443static const TypeInfo spapr_machine_info = {
2444 .name = TYPE_SPAPR_MACHINE,
2445 .parent = TYPE_MACHINE,
2446 .abstract = true,
2447 .instance_size = sizeof(sPAPRMachineState),
2448 .instance_init = spapr_machine_initfn,
2449 .instance_finalize = spapr_machine_finalizefn,
2450 .class_size = sizeof(sPAPRMachineClass),
2451 .class_init = spapr_machine_class_init,
2452 .interfaces = (InterfaceInfo[]) {
2453 { TYPE_FW_PATH_PROVIDER },
2454 { TYPE_NMI },
2455 { TYPE_HOTPLUG_HANDLER },
2456 { }
2457 },
2458};
2459
2460#define DEFINE_SPAPR_MACHINE(suffix, verstr, latest) \
2461 static void spapr_machine_##suffix##_class_init(ObjectClass *oc, \
2462 void *data) \
2463 { \
2464 MachineClass *mc = MACHINE_CLASS(oc); \
2465 spapr_machine_##suffix##_class_options(mc); \
2466 if (latest) { \
2467 mc->alias = "pseries"; \
2468 mc->is_default = 1; \
2469 } \
2470 } \
2471 static void spapr_machine_##suffix##_instance_init(Object *obj) \
2472 { \
2473 MachineState *machine = MACHINE(obj); \
2474 spapr_machine_##suffix##_instance_options(machine); \
2475 } \
2476 static const TypeInfo spapr_machine_##suffix##_info = { \
2477 .name = MACHINE_TYPE_NAME("pseries-" verstr), \
2478 .parent = TYPE_SPAPR_MACHINE, \
2479 .class_init = spapr_machine_##suffix##_class_init, \
2480 .instance_init = spapr_machine_##suffix##_instance_init, \
2481 }; \
2482 static void spapr_machine_register_##suffix(void) \
2483 { \
2484 type_register(&spapr_machine_##suffix##_info); \
2485 } \
2486 type_init(spapr_machine_register_##suffix)
2487
2488
2489
2490
2491static void spapr_machine_2_7_instance_options(MachineState *machine)
2492{
2493}
2494
2495static void spapr_machine_2_7_class_options(MachineClass *mc)
2496{
2497
2498}
2499
2500DEFINE_SPAPR_MACHINE(2_7, "2.7", true);
2501
2502
2503
2504
2505#define SPAPR_COMPAT_2_6 \
2506 HW_COMPAT_2_6 \
2507 { \
2508 .driver = TYPE_SPAPR_PCI_HOST_BRIDGE,\
2509 .property = "ddw",\
2510 .value = stringify(off),\
2511 },
2512
2513static void spapr_machine_2_6_instance_options(MachineState *machine)
2514{
2515}
2516
2517static void spapr_machine_2_6_class_options(MachineClass *mc)
2518{
2519 spapr_machine_2_7_class_options(mc);
2520 mc->query_hotpluggable_cpus = NULL;
2521 SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_6);
2522}
2523
2524DEFINE_SPAPR_MACHINE(2_6, "2.6", false);
2525
2526
2527
2528
2529#define SPAPR_COMPAT_2_5 \
2530 HW_COMPAT_2_5 \
2531 { \
2532 .driver = "spapr-vlan", \
2533 .property = "use-rx-buffer-pools", \
2534 .value = "off", \
2535 },
2536
2537static void spapr_machine_2_5_instance_options(MachineState *machine)
2538{
2539}
2540
2541static void spapr_machine_2_5_class_options(MachineClass *mc)
2542{
2543 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
2544
2545 spapr_machine_2_6_class_options(mc);
2546 smc->use_ohci_by_default = true;
2547 SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_5);
2548}
2549
2550DEFINE_SPAPR_MACHINE(2_5, "2.5", false);
2551
2552
2553
2554
2555#define SPAPR_COMPAT_2_4 \
2556 HW_COMPAT_2_4
2557
2558static void spapr_machine_2_4_instance_options(MachineState *machine)
2559{
2560 spapr_machine_2_5_instance_options(machine);
2561}
2562
2563static void spapr_machine_2_4_class_options(MachineClass *mc)
2564{
2565 sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
2566
2567 spapr_machine_2_5_class_options(mc);
2568 smc->dr_lmb_enabled = false;
2569 SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_4);
2570}
2571
2572DEFINE_SPAPR_MACHINE(2_4, "2.4", false);
2573
2574
2575
2576
2577#define SPAPR_COMPAT_2_3 \
2578 HW_COMPAT_2_3 \
2579 {\
2580 .driver = "spapr-pci-host-bridge",\
2581 .property = "dynamic-reconfiguration",\
2582 .value = "off",\
2583 },
2584
2585static void spapr_machine_2_3_instance_options(MachineState *machine)
2586{
2587 spapr_machine_2_4_instance_options(machine);
2588 savevm_skip_section_footers();
2589 global_state_set_optional();
2590 savevm_skip_configuration();
2591}
2592
2593static void spapr_machine_2_3_class_options(MachineClass *mc)
2594{
2595 spapr_machine_2_4_class_options(mc);
2596 SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_3);
2597}
2598DEFINE_SPAPR_MACHINE(2_3, "2.3", false);
2599
2600
2601
2602
2603
2604#define SPAPR_COMPAT_2_2 \
2605 HW_COMPAT_2_2 \
2606 {\
2607 .driver = TYPE_SPAPR_PCI_HOST_BRIDGE,\
2608 .property = "mem_win_size",\
2609 .value = "0x20000000",\
2610 },
2611
2612static void spapr_machine_2_2_instance_options(MachineState *machine)
2613{
2614 spapr_machine_2_3_instance_options(machine);
2615 machine->suppress_vmdesc = true;
2616}
2617
2618static void spapr_machine_2_2_class_options(MachineClass *mc)
2619{
2620 spapr_machine_2_3_class_options(mc);
2621 SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_2);
2622}
2623DEFINE_SPAPR_MACHINE(2_2, "2.2", false);
2624
2625
2626
2627
2628#define SPAPR_COMPAT_2_1 \
2629 HW_COMPAT_2_1
2630
2631static void spapr_machine_2_1_instance_options(MachineState *machine)
2632{
2633 spapr_machine_2_2_instance_options(machine);
2634}
2635
2636static void spapr_machine_2_1_class_options(MachineClass *mc)
2637{
2638 spapr_machine_2_2_class_options(mc);
2639 SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_1);
2640}
2641DEFINE_SPAPR_MACHINE(2_1, "2.1", false);
2642
2643static void spapr_machine_register_types(void)
2644{
2645 type_register_static(&spapr_machine_info);
2646}
2647
2648type_init(spapr_machine_register_types)
2649