1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include "qemu/osdep.h"
26#include "qemu-common.h"
27#include "qemu/config-file.h"
28#include "monitor/monitor.h"
29#include "qapi/error.h"
30#include "qapi/qapi-commands-misc.h"
31#include "qapi/qapi-events-run-state.h"
32#include "qapi/qmp/qerror.h"
33#include "qemu/error-report.h"
34#include "qemu/qemu-print.h"
35#include "sysemu/tcg.h"
36#include "sysemu/block-backend.h"
37#include "exec/gdbstub.h"
38#include "sysemu/dma.h"
39#include "sysemu/hw_accel.h"
40#include "sysemu/kvm.h"
41#include "sysemu/hax.h"
42#include "sysemu/hvf.h"
43#include "sysemu/whpx.h"
44#include "exec/exec-all.h"
45
46#include "qemu/thread.h"
47#include "sysemu/cpus.h"
48#include "sysemu/qtest.h"
49#include "qemu/main-loop.h"
50#include "qemu/option.h"
51#include "qemu/bitmap.h"
52#include "qemu/seqlock.h"
53#include "qemu/guest-random.h"
54#include "tcg.h"
55#include "hw/nmi.h"
56#include "sysemu/replay.h"
57#include "hw/boards.h"
58
59#ifdef CONFIG_LINUX
60
61#include <sys/prctl.h>
62
63#ifndef PR_MCE_KILL
64#define PR_MCE_KILL 33
65#endif
66
67#ifndef PR_MCE_KILL_SET
68#define PR_MCE_KILL_SET 1
69#endif
70
71#ifndef PR_MCE_KILL_EARLY
72#define PR_MCE_KILL_EARLY 1
73#endif
74
75#endif
76
77int64_t max_delay;
78int64_t max_advance;
79
80
81static QEMUTimer *throttle_timer;
82static unsigned int throttle_percentage;
83
84#define CPU_THROTTLE_PCT_MIN 1
85#define CPU_THROTTLE_PCT_MAX 99
86#define CPU_THROTTLE_TIMESLICE_NS 10000000
87
88bool cpu_is_stopped(CPUState *cpu)
89{
90 return cpu->stopped || !runstate_is_running();
91}
92
93static bool cpu_thread_is_idle(CPUState *cpu)
94{
95 if (cpu->stop || cpu->queued_work_first) {
96 return false;
97 }
98 if (cpu_is_stopped(cpu)) {
99 return true;
100 }
101 if (!cpu->halted || cpu_has_work(cpu) ||
102 kvm_halt_in_kernel()) {
103 return false;
104 }
105 return true;
106}
107
108static bool all_cpu_threads_idle(void)
109{
110 CPUState *cpu;
111
112 CPU_FOREACH(cpu) {
113 if (!cpu_thread_is_idle(cpu)) {
114 return false;
115 }
116 }
117 return true;
118}
119
120
121
122
123
124
125static bool icount_sleep = true;
126
127#define MAX_ICOUNT_SHIFT 10
128
129typedef struct TimersState {
130
131 int64_t cpu_ticks_prev;
132 int64_t cpu_ticks_offset;
133
134
135
136
137 QemuSeqLock vm_clock_seqlock;
138 QemuSpin vm_clock_lock;
139
140 int16_t cpu_ticks_enabled;
141
142
143 int16_t icount_time_shift;
144
145
146 int64_t qemu_icount_bias;
147
148 int64_t vm_clock_warp_start;
149 int64_t cpu_clock_offset;
150
151
152 int64_t qemu_icount;
153
154
155 QEMUTimer *icount_rt_timer;
156 QEMUTimer *icount_vm_timer;
157 QEMUTimer *icount_warp_timer;
158} TimersState;
159
160static TimersState timers_state;
161bool mttcg_enabled;
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183static bool check_tcg_memory_orders_compatible(void)
184{
185#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
186 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
187#else
188 return false;
189#endif
190}
191
192static bool default_mttcg_enabled(void)
193{
194 if (use_icount || TCG_OVERSIZED_GUEST) {
195 return false;
196 } else {
197#ifdef TARGET_SUPPORTS_MTTCG
198 return check_tcg_memory_orders_compatible();
199#else
200 return false;
201#endif
202 }
203}
204
205void qemu_tcg_configure(QemuOpts *opts, Error **errp)
206{
207 const char *t = qemu_opt_get(opts, "thread");
208 if (t) {
209 if (strcmp(t, "multi") == 0) {
210 if (TCG_OVERSIZED_GUEST) {
211 error_setg(errp, "No MTTCG when guest word size > hosts");
212 } else if (use_icount) {
213 error_setg(errp, "No MTTCG when icount is enabled");
214 } else {
215#ifndef TARGET_SUPPORTS_MTTCG
216 warn_report("Guest not yet converted to MTTCG - "
217 "you may get unexpected results");
218#endif
219 if (!check_tcg_memory_orders_compatible()) {
220 warn_report("Guest expects a stronger memory ordering "
221 "than the host provides");
222 error_printf("This may cause strange/hard to debug errors\n");
223 }
224 mttcg_enabled = true;
225 }
226 } else if (strcmp(t, "single") == 0) {
227 mttcg_enabled = false;
228 } else {
229 error_setg(errp, "Invalid 'thread' setting %s", t);
230 }
231 } else {
232 mttcg_enabled = default_mttcg_enabled();
233 }
234}
235
236
237
238
239
240static int64_t cpu_get_icount_executed(CPUState *cpu)
241{
242 return (cpu->icount_budget -
243 (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
244}
245
246
247
248
249
250
251static void cpu_update_icount_locked(CPUState *cpu)
252{
253 int64_t executed = cpu_get_icount_executed(cpu);
254 cpu->icount_budget -= executed;
255
256 atomic_set_i64(&timers_state.qemu_icount,
257 timers_state.qemu_icount + executed);
258}
259
260
261
262
263
264
265void cpu_update_icount(CPUState *cpu)
266{
267 seqlock_write_lock(&timers_state.vm_clock_seqlock,
268 &timers_state.vm_clock_lock);
269 cpu_update_icount_locked(cpu);
270 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
271 &timers_state.vm_clock_lock);
272}
273
274static int64_t cpu_get_icount_raw_locked(void)
275{
276 CPUState *cpu = current_cpu;
277
278 if (cpu && cpu->running) {
279 if (!cpu->can_do_io) {
280 error_report("Bad icount read");
281 exit(1);
282 }
283
284 cpu_update_icount_locked(cpu);
285 }
286
287 return atomic_read_i64(&timers_state.qemu_icount);
288}
289
290static int64_t cpu_get_icount_locked(void)
291{
292 int64_t icount = cpu_get_icount_raw_locked();
293 return atomic_read_i64(&timers_state.qemu_icount_bias) +
294 cpu_icount_to_ns(icount);
295}
296
297int64_t cpu_get_icount_raw(void)
298{
299 int64_t icount;
300 unsigned start;
301
302 do {
303 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
304 icount = cpu_get_icount_raw_locked();
305 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
306
307 return icount;
308}
309
310
311int64_t cpu_get_icount(void)
312{
313 int64_t icount;
314 unsigned start;
315
316 do {
317 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
318 icount = cpu_get_icount_locked();
319 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
320
321 return icount;
322}
323
324int64_t cpu_icount_to_ns(int64_t icount)
325{
326 return icount << atomic_read(&timers_state.icount_time_shift);
327}
328
329static int64_t cpu_get_ticks_locked(void)
330{
331 int64_t ticks = timers_state.cpu_ticks_offset;
332 if (timers_state.cpu_ticks_enabled) {
333 ticks += cpu_get_host_ticks();
334 }
335
336 if (timers_state.cpu_ticks_prev > ticks) {
337
338 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
339 ticks = timers_state.cpu_ticks_prev;
340 }
341
342 timers_state.cpu_ticks_prev = ticks;
343 return ticks;
344}
345
346
347
348
349
350int64_t cpu_get_ticks(void)
351{
352 int64_t ticks;
353
354 if (use_icount) {
355 return cpu_get_icount();
356 }
357
358 qemu_spin_lock(&timers_state.vm_clock_lock);
359 ticks = cpu_get_ticks_locked();
360 qemu_spin_unlock(&timers_state.vm_clock_lock);
361 return ticks;
362}
363
364static int64_t cpu_get_clock_locked(void)
365{
366 int64_t time;
367
368 time = timers_state.cpu_clock_offset;
369 if (timers_state.cpu_ticks_enabled) {
370 time += get_clock();
371 }
372
373 return time;
374}
375
376
377
378
379int64_t cpu_get_clock(void)
380{
381 int64_t ti;
382 unsigned start;
383
384 do {
385 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
386 ti = cpu_get_clock_locked();
387 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
388
389 return ti;
390}
391
392
393
394
395void cpu_enable_ticks(void)
396{
397 seqlock_write_lock(&timers_state.vm_clock_seqlock,
398 &timers_state.vm_clock_lock);
399 if (!timers_state.cpu_ticks_enabled) {
400 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
401 timers_state.cpu_clock_offset -= get_clock();
402 timers_state.cpu_ticks_enabled = 1;
403 }
404 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
405 &timers_state.vm_clock_lock);
406}
407
408
409
410
411
412void cpu_disable_ticks(void)
413{
414 seqlock_write_lock(&timers_state.vm_clock_seqlock,
415 &timers_state.vm_clock_lock);
416 if (timers_state.cpu_ticks_enabled) {
417 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
418 timers_state.cpu_clock_offset = cpu_get_clock_locked();
419 timers_state.cpu_ticks_enabled = 0;
420 }
421 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
422 &timers_state.vm_clock_lock);
423}
424
425
426
427
428
429#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
430
431static void icount_adjust(void)
432{
433 int64_t cur_time;
434 int64_t cur_icount;
435 int64_t delta;
436
437
438 static int64_t last_delta;
439
440
441 if (!runstate_is_running()) {
442 return;
443 }
444
445 seqlock_write_lock(&timers_state.vm_clock_seqlock,
446 &timers_state.vm_clock_lock);
447 cur_time = cpu_get_clock_locked();
448 cur_icount = cpu_get_icount_locked();
449
450 delta = cur_icount - cur_time;
451
452 if (delta > 0
453 && last_delta + ICOUNT_WOBBLE < delta * 2
454 && timers_state.icount_time_shift > 0) {
455
456 atomic_set(&timers_state.icount_time_shift,
457 timers_state.icount_time_shift - 1);
458 }
459 if (delta < 0
460 && last_delta - ICOUNT_WOBBLE > delta * 2
461 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
462
463 atomic_set(&timers_state.icount_time_shift,
464 timers_state.icount_time_shift + 1);
465 }
466 last_delta = delta;
467 atomic_set_i64(&timers_state.qemu_icount_bias,
468 cur_icount - (timers_state.qemu_icount
469 << timers_state.icount_time_shift));
470 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
471 &timers_state.vm_clock_lock);
472}
473
474static void icount_adjust_rt(void *opaque)
475{
476 timer_mod(timers_state.icount_rt_timer,
477 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
478 icount_adjust();
479}
480
481static void icount_adjust_vm(void *opaque)
482{
483 timer_mod(timers_state.icount_vm_timer,
484 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
485 NANOSECONDS_PER_SECOND / 10);
486 icount_adjust();
487}
488
489static int64_t qemu_icount_round(int64_t count)
490{
491 int shift = atomic_read(&timers_state.icount_time_shift);
492 return (count + (1 << shift) - 1) >> shift;
493}
494
495static void icount_warp_rt(void)
496{
497 unsigned seq;
498 int64_t warp_start;
499
500
501
502
503 do {
504 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
505 warp_start = timers_state.vm_clock_warp_start;
506 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
507
508 if (warp_start == -1) {
509 return;
510 }
511
512 seqlock_write_lock(&timers_state.vm_clock_seqlock,
513 &timers_state.vm_clock_lock);
514 if (runstate_is_running()) {
515 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
516 cpu_get_clock_locked());
517 int64_t warp_delta;
518
519 warp_delta = clock - timers_state.vm_clock_warp_start;
520 if (use_icount == 2) {
521
522
523
524
525 int64_t cur_icount = cpu_get_icount_locked();
526 int64_t delta = clock - cur_icount;
527 warp_delta = MIN(warp_delta, delta);
528 }
529 atomic_set_i64(&timers_state.qemu_icount_bias,
530 timers_state.qemu_icount_bias + warp_delta);
531 }
532 timers_state.vm_clock_warp_start = -1;
533 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
534 &timers_state.vm_clock_lock);
535
536 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
537 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
538 }
539}
540
541static void icount_timer_cb(void *opaque)
542{
543
544
545
546 icount_warp_rt();
547}
548
549void qtest_clock_warp(int64_t dest)
550{
551 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
552 AioContext *aio_context;
553 assert(qtest_enabled());
554 aio_context = qemu_get_aio_context();
555 while (clock < dest) {
556 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
557 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
558
559 seqlock_write_lock(&timers_state.vm_clock_seqlock,
560 &timers_state.vm_clock_lock);
561 atomic_set_i64(&timers_state.qemu_icount_bias,
562 timers_state.qemu_icount_bias + warp);
563 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
564 &timers_state.vm_clock_lock);
565
566 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
567 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
568 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
569 }
570 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
571}
572
573void qemu_start_warp_timer(void)
574{
575 int64_t clock;
576 int64_t deadline;
577
578 if (!use_icount) {
579 return;
580 }
581
582
583
584
585 if (!runstate_is_running()) {
586 return;
587 }
588
589 if (replay_mode != REPLAY_MODE_PLAY) {
590 if (!all_cpu_threads_idle()) {
591 return;
592 }
593
594 if (qtest_enabled()) {
595
596 return;
597 }
598
599 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
600 } else {
601
602 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
603
604
605
606
607 if (replay_has_checkpoint()) {
608 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
609 }
610 return;
611 }
612 }
613
614
615 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
616 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
617 if (deadline < 0) {
618 static bool notified;
619 if (!icount_sleep && !notified) {
620 warn_report("icount sleep disabled and no active timers");
621 notified = true;
622 }
623 return;
624 }
625
626 if (deadline > 0) {
627
628
629
630
631
632
633
634 if (!icount_sleep) {
635
636
637
638
639
640
641
642 seqlock_write_lock(&timers_state.vm_clock_seqlock,
643 &timers_state.vm_clock_lock);
644 atomic_set_i64(&timers_state.qemu_icount_bias,
645 timers_state.qemu_icount_bias + deadline);
646 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
647 &timers_state.vm_clock_lock);
648 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
649 } else {
650
651
652
653
654
655
656
657
658 seqlock_write_lock(&timers_state.vm_clock_seqlock,
659 &timers_state.vm_clock_lock);
660 if (timers_state.vm_clock_warp_start == -1
661 || timers_state.vm_clock_warp_start > clock) {
662 timers_state.vm_clock_warp_start = clock;
663 }
664 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
665 &timers_state.vm_clock_lock);
666 timer_mod_anticipate(timers_state.icount_warp_timer,
667 clock + deadline);
668 }
669 } else if (deadline == 0) {
670 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
671 }
672}
673
674static void qemu_account_warp_timer(void)
675{
676 if (!use_icount || !icount_sleep) {
677 return;
678 }
679
680
681
682
683 if (!runstate_is_running()) {
684 return;
685 }
686
687
688 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
689 return;
690 }
691
692 timer_del(timers_state.icount_warp_timer);
693 icount_warp_rt();
694}
695
696static bool icount_state_needed(void *opaque)
697{
698 return use_icount;
699}
700
701static bool warp_timer_state_needed(void *opaque)
702{
703 TimersState *s = opaque;
704 return s->icount_warp_timer != NULL;
705}
706
707static bool adjust_timers_state_needed(void *opaque)
708{
709 TimersState *s = opaque;
710 return s->icount_rt_timer != NULL;
711}
712
713
714
715
716static const VMStateDescription icount_vmstate_warp_timer = {
717 .name = "timer/icount/warp_timer",
718 .version_id = 1,
719 .minimum_version_id = 1,
720 .needed = warp_timer_state_needed,
721 .fields = (VMStateField[]) {
722 VMSTATE_INT64(vm_clock_warp_start, TimersState),
723 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
724 VMSTATE_END_OF_LIST()
725 }
726};
727
728static const VMStateDescription icount_vmstate_adjust_timers = {
729 .name = "timer/icount/timers",
730 .version_id = 1,
731 .minimum_version_id = 1,
732 .needed = adjust_timers_state_needed,
733 .fields = (VMStateField[]) {
734 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
735 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
736 VMSTATE_END_OF_LIST()
737 }
738};
739
740
741
742
743static const VMStateDescription icount_vmstate_timers = {
744 .name = "timer/icount",
745 .version_id = 1,
746 .minimum_version_id = 1,
747 .needed = icount_state_needed,
748 .fields = (VMStateField[]) {
749 VMSTATE_INT64(qemu_icount_bias, TimersState),
750 VMSTATE_INT64(qemu_icount, TimersState),
751 VMSTATE_END_OF_LIST()
752 },
753 .subsections = (const VMStateDescription*[]) {
754 &icount_vmstate_warp_timer,
755 &icount_vmstate_adjust_timers,
756 NULL
757 }
758};
759
760static const VMStateDescription vmstate_timers = {
761 .name = "timer",
762 .version_id = 2,
763 .minimum_version_id = 1,
764 .fields = (VMStateField[]) {
765 VMSTATE_INT64(cpu_ticks_offset, TimersState),
766 VMSTATE_UNUSED(8),
767 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
768 VMSTATE_END_OF_LIST()
769 },
770 .subsections = (const VMStateDescription*[]) {
771 &icount_vmstate_timers,
772 NULL
773 }
774};
775
776static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
777{
778 double pct;
779 double throttle_ratio;
780 long sleeptime_ns;
781
782 if (!cpu_throttle_get_percentage()) {
783 return;
784 }
785
786 pct = (double)cpu_throttle_get_percentage()/100;
787 throttle_ratio = pct / (1 - pct);
788 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
789
790 qemu_mutex_unlock_iothread();
791 g_usleep(sleeptime_ns / 1000);
792 qemu_mutex_lock_iothread();
793 atomic_set(&cpu->throttle_thread_scheduled, 0);
794}
795
796static void cpu_throttle_timer_tick(void *opaque)
797{
798 CPUState *cpu;
799 double pct;
800
801
802 if (!cpu_throttle_get_percentage()) {
803 return;
804 }
805 CPU_FOREACH(cpu) {
806 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
807 async_run_on_cpu(cpu, cpu_throttle_thread,
808 RUN_ON_CPU_NULL);
809 }
810 }
811
812 pct = (double)cpu_throttle_get_percentage()/100;
813 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
814 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
815}
816
817void cpu_throttle_set(int new_throttle_pct)
818{
819
820 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
821 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
822
823 atomic_set(&throttle_percentage, new_throttle_pct);
824
825 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
826 CPU_THROTTLE_TIMESLICE_NS);
827}
828
829void cpu_throttle_stop(void)
830{
831 atomic_set(&throttle_percentage, 0);
832}
833
834bool cpu_throttle_active(void)
835{
836 return (cpu_throttle_get_percentage() != 0);
837}
838
839int cpu_throttle_get_percentage(void)
840{
841 return atomic_read(&throttle_percentage);
842}
843
844void cpu_ticks_init(void)
845{
846 seqlock_init(&timers_state.vm_clock_seqlock);
847 qemu_spin_init(&timers_state.vm_clock_lock);
848 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
849 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
850 cpu_throttle_timer_tick, NULL);
851}
852
853void configure_icount(QemuOpts *opts, Error **errp)
854{
855 const char *option;
856 char *rem_str = NULL;
857
858 option = qemu_opt_get(opts, "shift");
859 if (!option) {
860 if (qemu_opt_get(opts, "align") != NULL) {
861 error_setg(errp, "Please specify shift option when using align");
862 }
863 return;
864 }
865
866 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
867 if (icount_sleep) {
868 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
869 icount_timer_cb, NULL);
870 }
871
872 icount_align_option = qemu_opt_get_bool(opts, "align", false);
873
874 if (icount_align_option && !icount_sleep) {
875 error_setg(errp, "align=on and sleep=off are incompatible");
876 }
877 if (strcmp(option, "auto") != 0) {
878 errno = 0;
879 timers_state.icount_time_shift = strtol(option, &rem_str, 0);
880 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
881 error_setg(errp, "icount: Invalid shift value");
882 }
883 use_icount = 1;
884 return;
885 } else if (icount_align_option) {
886 error_setg(errp, "shift=auto and align=on are incompatible");
887 } else if (!icount_sleep) {
888 error_setg(errp, "shift=auto and sleep=off are incompatible");
889 }
890
891 use_icount = 2;
892
893
894
895 timers_state.icount_time_shift = 3;
896
897
898
899
900
901
902 timers_state.vm_clock_warp_start = -1;
903 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
904 icount_adjust_rt, NULL);
905 timer_mod(timers_state.icount_rt_timer,
906 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
907 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
908 icount_adjust_vm, NULL);
909 timer_mod(timers_state.icount_vm_timer,
910 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
911 NANOSECONDS_PER_SECOND / 10);
912}
913
914
915
916
917
918
919
920
921
922
923
924
925
926static QEMUTimer *tcg_kick_vcpu_timer;
927static CPUState *tcg_current_rr_cpu;
928
929#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
930
931static inline int64_t qemu_tcg_next_kick(void)
932{
933 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
934}
935
936
937static void qemu_cpu_kick_rr_cpu(void)
938{
939 CPUState *cpu;
940 do {
941 cpu = atomic_mb_read(&tcg_current_rr_cpu);
942 if (cpu) {
943 cpu_exit(cpu);
944 }
945 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
946}
947
948static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
949{
950}
951
952void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
953{
954 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
955 qemu_notify_event();
956 return;
957 }
958
959 if (qemu_in_vcpu_thread()) {
960
961
962
963
964 qemu_cpu_kick(current_cpu);
965 } else if (first_cpu) {
966
967
968
969
970
971
972
973 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
974 }
975}
976
977static void kick_tcg_thread(void *opaque)
978{
979 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
980 qemu_cpu_kick_rr_cpu();
981}
982
983static void start_tcg_kick_timer(void)
984{
985 assert(!mttcg_enabled);
986 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
987 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
988 kick_tcg_thread, NULL);
989 }
990 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
991 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
992 }
993}
994
995static void stop_tcg_kick_timer(void)
996{
997 assert(!mttcg_enabled);
998 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
999 timer_del(tcg_kick_vcpu_timer);
1000 }
1001}
1002
1003
1004void hw_error(const char *fmt, ...)
1005{
1006 va_list ap;
1007 CPUState *cpu;
1008
1009 va_start(ap, fmt);
1010 fprintf(stderr, "qemu: hardware error: ");
1011 vfprintf(stderr, fmt, ap);
1012 fprintf(stderr, "\n");
1013 CPU_FOREACH(cpu) {
1014 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
1015 cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
1016 }
1017 va_end(ap);
1018 abort();
1019}
1020
1021void cpu_synchronize_all_states(void)
1022{
1023 CPUState *cpu;
1024
1025 CPU_FOREACH(cpu) {
1026 cpu_synchronize_state(cpu);
1027
1028 if (hvf_enabled()) {
1029 hvf_cpu_synchronize_state(cpu);
1030 }
1031 }
1032}
1033
1034void cpu_synchronize_all_post_reset(void)
1035{
1036 CPUState *cpu;
1037
1038 CPU_FOREACH(cpu) {
1039 cpu_synchronize_post_reset(cpu);
1040
1041 if (hvf_enabled()) {
1042 hvf_cpu_synchronize_post_reset(cpu);
1043 }
1044 }
1045}
1046
1047void cpu_synchronize_all_post_init(void)
1048{
1049 CPUState *cpu;
1050
1051 CPU_FOREACH(cpu) {
1052 cpu_synchronize_post_init(cpu);
1053
1054 if (hvf_enabled()) {
1055 hvf_cpu_synchronize_post_init(cpu);
1056 }
1057 }
1058}
1059
1060void cpu_synchronize_all_pre_loadvm(void)
1061{
1062 CPUState *cpu;
1063
1064 CPU_FOREACH(cpu) {
1065 cpu_synchronize_pre_loadvm(cpu);
1066 }
1067}
1068
1069static int do_vm_stop(RunState state, bool send_stop)
1070{
1071 int ret = 0;
1072
1073 if (runstate_is_running()) {
1074 cpu_disable_ticks();
1075 pause_all_vcpus();
1076 runstate_set(state);
1077 vm_state_notify(0, state);
1078 if (send_stop) {
1079 qapi_event_send_stop();
1080 }
1081 }
1082
1083 bdrv_drain_all();
1084 replay_disable_events();
1085 ret = bdrv_flush_all();
1086
1087 return ret;
1088}
1089
1090
1091
1092
1093int vm_shutdown(void)
1094{
1095 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1096}
1097
1098static bool cpu_can_run(CPUState *cpu)
1099{
1100 if (cpu->stop) {
1101 return false;
1102 }
1103 if (cpu_is_stopped(cpu)) {
1104 return false;
1105 }
1106 return true;
1107}
1108
1109static void cpu_handle_guest_debug(CPUState *cpu)
1110{
1111 gdb_set_stop_cpu(cpu);
1112 qemu_system_debug_request();
1113 cpu->stopped = true;
1114}
1115
1116#ifdef CONFIG_LINUX
1117static void sigbus_reraise(void)
1118{
1119 sigset_t set;
1120 struct sigaction action;
1121
1122 memset(&action, 0, sizeof(action));
1123 action.sa_handler = SIG_DFL;
1124 if (!sigaction(SIGBUS, &action, NULL)) {
1125 raise(SIGBUS);
1126 sigemptyset(&set);
1127 sigaddset(&set, SIGBUS);
1128 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1129 }
1130 perror("Failed to re-raise SIGBUS!\n");
1131 abort();
1132}
1133
1134static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1135{
1136 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1137 sigbus_reraise();
1138 }
1139
1140 if (current_cpu) {
1141
1142 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1143 sigbus_reraise();
1144 }
1145 } else {
1146
1147 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1148 sigbus_reraise();
1149 }
1150 }
1151}
1152
1153static void qemu_init_sigbus(void)
1154{
1155 struct sigaction action;
1156
1157 memset(&action, 0, sizeof(action));
1158 action.sa_flags = SA_SIGINFO;
1159 action.sa_sigaction = sigbus_handler;
1160 sigaction(SIGBUS, &action, NULL);
1161
1162 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1163}
1164#else
1165static void qemu_init_sigbus(void)
1166{
1167}
1168#endif
1169
1170static QemuMutex qemu_global_mutex;
1171
1172static QemuThread io_thread;
1173
1174
1175static QemuCond qemu_cpu_cond;
1176
1177static QemuCond qemu_pause_cond;
1178
1179void qemu_init_cpu_loop(void)
1180{
1181 qemu_init_sigbus();
1182 qemu_cond_init(&qemu_cpu_cond);
1183 qemu_cond_init(&qemu_pause_cond);
1184 qemu_mutex_init(&qemu_global_mutex);
1185
1186 qemu_thread_get_self(&io_thread);
1187}
1188
1189void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1190{
1191 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1192}
1193
1194static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1195{
1196 if (kvm_destroy_vcpu(cpu) < 0) {
1197 error_report("kvm_destroy_vcpu failed");
1198 exit(EXIT_FAILURE);
1199 }
1200}
1201
1202static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1203{
1204}
1205
1206static void qemu_cpu_stop(CPUState *cpu, bool exit)
1207{
1208 g_assert(qemu_cpu_is_self(cpu));
1209 cpu->stop = false;
1210 cpu->stopped = true;
1211 if (exit) {
1212 cpu_exit(cpu);
1213 }
1214 qemu_cond_broadcast(&qemu_pause_cond);
1215}
1216
1217static void qemu_wait_io_event_common(CPUState *cpu)
1218{
1219 atomic_mb_set(&cpu->thread_kicked, false);
1220 if (cpu->stop) {
1221 qemu_cpu_stop(cpu, false);
1222 }
1223 process_queued_cpu_work(cpu);
1224}
1225
1226static void qemu_tcg_rr_wait_io_event(void)
1227{
1228 CPUState *cpu;
1229
1230 while (all_cpu_threads_idle()) {
1231 stop_tcg_kick_timer();
1232 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1233 }
1234
1235 start_tcg_kick_timer();
1236
1237 CPU_FOREACH(cpu) {
1238 qemu_wait_io_event_common(cpu);
1239 }
1240}
1241
1242static void qemu_wait_io_event(CPUState *cpu)
1243{
1244 while (cpu_thread_is_idle(cpu)) {
1245 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1246 }
1247
1248#ifdef _WIN32
1249
1250 if (!tcg_enabled()) {
1251 SleepEx(0, TRUE);
1252 }
1253#endif
1254 qemu_wait_io_event_common(cpu);
1255}
1256
1257static void *qemu_kvm_cpu_thread_fn(void *arg)
1258{
1259 CPUState *cpu = arg;
1260 int r;
1261
1262 rcu_register_thread();
1263
1264 qemu_mutex_lock_iothread();
1265 qemu_thread_get_self(cpu->thread);
1266 cpu->thread_id = qemu_get_thread_id();
1267 cpu->can_do_io = 1;
1268 current_cpu = cpu;
1269
1270 r = kvm_init_vcpu(cpu);
1271 if (r < 0) {
1272 error_report("kvm_init_vcpu failed: %s", strerror(-r));
1273 exit(1);
1274 }
1275
1276 kvm_init_cpu_signals(cpu);
1277
1278
1279 cpu->created = true;
1280 qemu_cond_signal(&qemu_cpu_cond);
1281 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1282
1283 do {
1284 if (cpu_can_run(cpu)) {
1285 r = kvm_cpu_exec(cpu);
1286 if (r == EXCP_DEBUG) {
1287 cpu_handle_guest_debug(cpu);
1288 }
1289 }
1290 qemu_wait_io_event(cpu);
1291 } while (!cpu->unplug || cpu_can_run(cpu));
1292
1293 qemu_kvm_destroy_vcpu(cpu);
1294 cpu->created = false;
1295 qemu_cond_signal(&qemu_cpu_cond);
1296 qemu_mutex_unlock_iothread();
1297 rcu_unregister_thread();
1298 return NULL;
1299}
1300
1301static void *qemu_dummy_cpu_thread_fn(void *arg)
1302{
1303#ifdef _WIN32
1304 error_report("qtest is not supported under Windows");
1305 exit(1);
1306#else
1307 CPUState *cpu = arg;
1308 sigset_t waitset;
1309 int r;
1310
1311 rcu_register_thread();
1312
1313 qemu_mutex_lock_iothread();
1314 qemu_thread_get_self(cpu->thread);
1315 cpu->thread_id = qemu_get_thread_id();
1316 cpu->can_do_io = 1;
1317 current_cpu = cpu;
1318
1319 sigemptyset(&waitset);
1320 sigaddset(&waitset, SIG_IPI);
1321
1322
1323 cpu->created = true;
1324 qemu_cond_signal(&qemu_cpu_cond);
1325 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1326
1327 do {
1328 qemu_mutex_unlock_iothread();
1329 do {
1330 int sig;
1331 r = sigwait(&waitset, &sig);
1332 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1333 if (r == -1) {
1334 perror("sigwait");
1335 exit(1);
1336 }
1337 qemu_mutex_lock_iothread();
1338 qemu_wait_io_event(cpu);
1339 } while (!cpu->unplug);
1340
1341 qemu_mutex_unlock_iothread();
1342 rcu_unregister_thread();
1343 return NULL;
1344#endif
1345}
1346
1347static int64_t tcg_get_icount_limit(void)
1348{
1349 int64_t deadline;
1350
1351 if (replay_mode != REPLAY_MODE_PLAY) {
1352 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1353
1354
1355
1356
1357
1358
1359 if ((deadline < 0) || (deadline > INT32_MAX)) {
1360 deadline = INT32_MAX;
1361 }
1362
1363 return qemu_icount_round(deadline);
1364 } else {
1365 return replay_get_instructions();
1366 }
1367}
1368
1369static void handle_icount_deadline(void)
1370{
1371 assert(qemu_in_vcpu_thread());
1372 if (use_icount) {
1373 int64_t deadline =
1374 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1375
1376 if (deadline == 0) {
1377
1378 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1379 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1380 }
1381 }
1382}
1383
1384static void prepare_icount_for_run(CPUState *cpu)
1385{
1386 if (use_icount) {
1387 int insns_left;
1388
1389
1390
1391
1392
1393 g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
1394 g_assert(cpu->icount_extra == 0);
1395
1396 cpu->icount_budget = tcg_get_icount_limit();
1397 insns_left = MIN(0xffff, cpu->icount_budget);
1398 cpu_neg(cpu)->icount_decr.u16.low = insns_left;
1399 cpu->icount_extra = cpu->icount_budget - insns_left;
1400
1401 replay_mutex_lock();
1402 }
1403}
1404
1405static void process_icount_data(CPUState *cpu)
1406{
1407 if (use_icount) {
1408
1409 cpu_update_icount(cpu);
1410
1411
1412 cpu_neg(cpu)->icount_decr.u16.low = 0;
1413 cpu->icount_extra = 0;
1414 cpu->icount_budget = 0;
1415
1416 replay_account_executed_instructions();
1417
1418 replay_mutex_unlock();
1419 }
1420}
1421
1422
1423static int tcg_cpu_exec(CPUState *cpu)
1424{
1425 int ret;
1426#ifdef CONFIG_PROFILER
1427 int64_t ti;
1428#endif
1429
1430 assert(tcg_enabled());
1431#ifdef CONFIG_PROFILER
1432 ti = profile_getclock();
1433#endif
1434 cpu_exec_start(cpu);
1435 ret = cpu_exec(cpu);
1436 cpu_exec_end(cpu);
1437#ifdef CONFIG_PROFILER
1438 atomic_set(&tcg_ctx->prof.cpu_exec_time,
1439 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
1440#endif
1441 return ret;
1442}
1443
1444
1445
1446
1447static void deal_with_unplugged_cpus(void)
1448{
1449 CPUState *cpu;
1450
1451 CPU_FOREACH(cpu) {
1452 if (cpu->unplug && !cpu_can_run(cpu)) {
1453 qemu_tcg_destroy_vcpu(cpu);
1454 cpu->created = false;
1455 qemu_cond_signal(&qemu_cpu_cond);
1456 break;
1457 }
1458 }
1459}
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1471{
1472 CPUState *cpu = arg;
1473
1474 assert(tcg_enabled());
1475 rcu_register_thread();
1476 tcg_register_thread();
1477
1478 qemu_mutex_lock_iothread();
1479 qemu_thread_get_self(cpu->thread);
1480
1481 cpu->thread_id = qemu_get_thread_id();
1482 cpu->created = true;
1483 cpu->can_do_io = 1;
1484 qemu_cond_signal(&qemu_cpu_cond);
1485 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1486
1487
1488 while (first_cpu->stopped) {
1489 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1490
1491
1492 CPU_FOREACH(cpu) {
1493 current_cpu = cpu;
1494 qemu_wait_io_event_common(cpu);
1495 }
1496 }
1497
1498 start_tcg_kick_timer();
1499
1500 cpu = first_cpu;
1501
1502
1503 cpu->exit_request = 1;
1504
1505 while (1) {
1506 qemu_mutex_unlock_iothread();
1507 replay_mutex_lock();
1508 qemu_mutex_lock_iothread();
1509
1510 qemu_account_warp_timer();
1511
1512
1513
1514
1515 handle_icount_deadline();
1516
1517 replay_mutex_unlock();
1518
1519 if (!cpu) {
1520 cpu = first_cpu;
1521 }
1522
1523 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1524
1525 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1526 current_cpu = cpu;
1527
1528 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1529 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1530
1531 if (cpu_can_run(cpu)) {
1532 int r;
1533
1534 qemu_mutex_unlock_iothread();
1535 prepare_icount_for_run(cpu);
1536
1537 r = tcg_cpu_exec(cpu);
1538
1539 process_icount_data(cpu);
1540 qemu_mutex_lock_iothread();
1541
1542 if (r == EXCP_DEBUG) {
1543 cpu_handle_guest_debug(cpu);
1544 break;
1545 } else if (r == EXCP_ATOMIC) {
1546 qemu_mutex_unlock_iothread();
1547 cpu_exec_step_atomic(cpu);
1548 qemu_mutex_lock_iothread();
1549 break;
1550 }
1551 } else if (cpu->stop) {
1552 if (cpu->unplug) {
1553 cpu = CPU_NEXT(cpu);
1554 }
1555 break;
1556 }
1557
1558 cpu = CPU_NEXT(cpu);
1559 }
1560
1561
1562 atomic_set(&tcg_current_rr_cpu, NULL);
1563
1564 if (cpu && cpu->exit_request) {
1565 atomic_mb_set(&cpu->exit_request, 0);
1566 }
1567
1568 if (use_icount && all_cpu_threads_idle()) {
1569
1570
1571
1572
1573 qemu_notify_event();
1574 }
1575
1576 qemu_tcg_rr_wait_io_event();
1577 deal_with_unplugged_cpus();
1578 }
1579
1580 rcu_unregister_thread();
1581 return NULL;
1582}
1583
1584static void *qemu_hax_cpu_thread_fn(void *arg)
1585{
1586 CPUState *cpu = arg;
1587 int r;
1588
1589 rcu_register_thread();
1590 qemu_mutex_lock_iothread();
1591 qemu_thread_get_self(cpu->thread);
1592
1593 cpu->thread_id = qemu_get_thread_id();
1594 cpu->created = true;
1595 current_cpu = cpu;
1596
1597 hax_init_vcpu(cpu);
1598 qemu_cond_signal(&qemu_cpu_cond);
1599 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1600
1601 do {
1602 if (cpu_can_run(cpu)) {
1603 r = hax_smp_cpu_exec(cpu);
1604 if (r == EXCP_DEBUG) {
1605 cpu_handle_guest_debug(cpu);
1606 }
1607 }
1608
1609 qemu_wait_io_event(cpu);
1610 } while (!cpu->unplug || cpu_can_run(cpu));
1611 rcu_unregister_thread();
1612 return NULL;
1613}
1614
1615
1616
1617static void *qemu_hvf_cpu_thread_fn(void *arg)
1618{
1619 CPUState *cpu = arg;
1620
1621 int r;
1622
1623 assert(hvf_enabled());
1624
1625 rcu_register_thread();
1626
1627 qemu_mutex_lock_iothread();
1628 qemu_thread_get_self(cpu->thread);
1629
1630 cpu->thread_id = qemu_get_thread_id();
1631 cpu->can_do_io = 1;
1632 current_cpu = cpu;
1633
1634 hvf_init_vcpu(cpu);
1635
1636
1637 cpu->created = true;
1638 qemu_cond_signal(&qemu_cpu_cond);
1639 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1640
1641 do {
1642 if (cpu_can_run(cpu)) {
1643 r = hvf_vcpu_exec(cpu);
1644 if (r == EXCP_DEBUG) {
1645 cpu_handle_guest_debug(cpu);
1646 }
1647 }
1648 qemu_wait_io_event(cpu);
1649 } while (!cpu->unplug || cpu_can_run(cpu));
1650
1651 hvf_vcpu_destroy(cpu);
1652 cpu->created = false;
1653 qemu_cond_signal(&qemu_cpu_cond);
1654 qemu_mutex_unlock_iothread();
1655 rcu_unregister_thread();
1656 return NULL;
1657}
1658
1659static void *qemu_whpx_cpu_thread_fn(void *arg)
1660{
1661 CPUState *cpu = arg;
1662 int r;
1663
1664 rcu_register_thread();
1665
1666 qemu_mutex_lock_iothread();
1667 qemu_thread_get_self(cpu->thread);
1668 cpu->thread_id = qemu_get_thread_id();
1669 current_cpu = cpu;
1670
1671 r = whpx_init_vcpu(cpu);
1672 if (r < 0) {
1673 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1674 exit(1);
1675 }
1676
1677
1678 cpu->created = true;
1679 qemu_cond_signal(&qemu_cpu_cond);
1680 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1681
1682 do {
1683 if (cpu_can_run(cpu)) {
1684 r = whpx_vcpu_exec(cpu);
1685 if (r == EXCP_DEBUG) {
1686 cpu_handle_guest_debug(cpu);
1687 }
1688 }
1689 while (cpu_thread_is_idle(cpu)) {
1690 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1691 }
1692 qemu_wait_io_event_common(cpu);
1693 } while (!cpu->unplug || cpu_can_run(cpu));
1694
1695 whpx_destroy_vcpu(cpu);
1696 cpu->created = false;
1697 qemu_cond_signal(&qemu_cpu_cond);
1698 qemu_mutex_unlock_iothread();
1699 rcu_unregister_thread();
1700 return NULL;
1701}
1702
1703#ifdef _WIN32
1704static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1705{
1706}
1707#endif
1708
1709
1710
1711
1712
1713
1714
1715
1716static void *qemu_tcg_cpu_thread_fn(void *arg)
1717{
1718 CPUState *cpu = arg;
1719
1720 assert(tcg_enabled());
1721 g_assert(!use_icount);
1722
1723 rcu_register_thread();
1724 tcg_register_thread();
1725
1726 qemu_mutex_lock_iothread();
1727 qemu_thread_get_self(cpu->thread);
1728
1729 cpu->thread_id = qemu_get_thread_id();
1730 cpu->created = true;
1731 cpu->can_do_io = 1;
1732 current_cpu = cpu;
1733 qemu_cond_signal(&qemu_cpu_cond);
1734 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1735
1736
1737 cpu->exit_request = 1;
1738
1739 do {
1740 if (cpu_can_run(cpu)) {
1741 int r;
1742 qemu_mutex_unlock_iothread();
1743 r = tcg_cpu_exec(cpu);
1744 qemu_mutex_lock_iothread();
1745 switch (r) {
1746 case EXCP_DEBUG:
1747 cpu_handle_guest_debug(cpu);
1748 break;
1749 case EXCP_HALTED:
1750
1751
1752
1753
1754
1755
1756
1757 g_assert(cpu->halted);
1758 break;
1759 case EXCP_ATOMIC:
1760 qemu_mutex_unlock_iothread();
1761 cpu_exec_step_atomic(cpu);
1762 qemu_mutex_lock_iothread();
1763 default:
1764
1765 break;
1766 }
1767 }
1768
1769 atomic_mb_set(&cpu->exit_request, 0);
1770 qemu_wait_io_event(cpu);
1771 } while (!cpu->unplug || cpu_can_run(cpu));
1772
1773 qemu_tcg_destroy_vcpu(cpu);
1774 cpu->created = false;
1775 qemu_cond_signal(&qemu_cpu_cond);
1776 qemu_mutex_unlock_iothread();
1777 rcu_unregister_thread();
1778 return NULL;
1779}
1780
1781static void qemu_cpu_kick_thread(CPUState *cpu)
1782{
1783#ifndef _WIN32
1784 int err;
1785
1786 if (cpu->thread_kicked) {
1787 return;
1788 }
1789 cpu->thread_kicked = true;
1790 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1791 if (err && err != ESRCH) {
1792 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1793 exit(1);
1794 }
1795#else
1796 if (!qemu_cpu_is_self(cpu)) {
1797 if (whpx_enabled()) {
1798 whpx_vcpu_kick(cpu);
1799 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1800 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1801 __func__, GetLastError());
1802 exit(1);
1803 }
1804 }
1805#endif
1806}
1807
1808void qemu_cpu_kick(CPUState *cpu)
1809{
1810 qemu_cond_broadcast(cpu->halt_cond);
1811 if (tcg_enabled()) {
1812 cpu_exit(cpu);
1813
1814 qemu_cpu_kick_rr_cpu();
1815 } else {
1816 if (hax_enabled()) {
1817
1818
1819
1820
1821 cpu->exit_request = 1;
1822 }
1823 qemu_cpu_kick_thread(cpu);
1824 }
1825}
1826
1827void qemu_cpu_kick_self(void)
1828{
1829 assert(current_cpu);
1830 qemu_cpu_kick_thread(current_cpu);
1831}
1832
1833bool qemu_cpu_is_self(CPUState *cpu)
1834{
1835 return qemu_thread_is_self(cpu->thread);
1836}
1837
1838bool qemu_in_vcpu_thread(void)
1839{
1840 return current_cpu && qemu_cpu_is_self(current_cpu);
1841}
1842
1843static __thread bool iothread_locked = false;
1844
1845bool qemu_mutex_iothread_locked(void)
1846{
1847 return iothread_locked;
1848}
1849
1850
1851
1852
1853
1854void qemu_mutex_lock_iothread_impl(const char *file, int line)
1855{
1856 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1857
1858 g_assert(!qemu_mutex_iothread_locked());
1859 bql_lock(&qemu_global_mutex, file, line);
1860 iothread_locked = true;
1861}
1862
1863void qemu_mutex_unlock_iothread(void)
1864{
1865 g_assert(qemu_mutex_iothread_locked());
1866 iothread_locked = false;
1867 qemu_mutex_unlock(&qemu_global_mutex);
1868}
1869
1870static bool all_vcpus_paused(void)
1871{
1872 CPUState *cpu;
1873
1874 CPU_FOREACH(cpu) {
1875 if (!cpu->stopped) {
1876 return false;
1877 }
1878 }
1879
1880 return true;
1881}
1882
1883void pause_all_vcpus(void)
1884{
1885 CPUState *cpu;
1886
1887 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1888 CPU_FOREACH(cpu) {
1889 if (qemu_cpu_is_self(cpu)) {
1890 qemu_cpu_stop(cpu, true);
1891 } else {
1892 cpu->stop = true;
1893 qemu_cpu_kick(cpu);
1894 }
1895 }
1896
1897
1898
1899
1900 replay_mutex_unlock();
1901
1902 while (!all_vcpus_paused()) {
1903 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1904 CPU_FOREACH(cpu) {
1905 qemu_cpu_kick(cpu);
1906 }
1907 }
1908
1909 qemu_mutex_unlock_iothread();
1910 replay_mutex_lock();
1911 qemu_mutex_lock_iothread();
1912}
1913
1914void cpu_resume(CPUState *cpu)
1915{
1916 cpu->stop = false;
1917 cpu->stopped = false;
1918 qemu_cpu_kick(cpu);
1919}
1920
1921void resume_all_vcpus(void)
1922{
1923 CPUState *cpu;
1924
1925 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1926 CPU_FOREACH(cpu) {
1927 cpu_resume(cpu);
1928 }
1929}
1930
1931void cpu_remove_sync(CPUState *cpu)
1932{
1933 cpu->stop = true;
1934 cpu->unplug = true;
1935 qemu_cpu_kick(cpu);
1936 qemu_mutex_unlock_iothread();
1937 qemu_thread_join(cpu->thread);
1938 qemu_mutex_lock_iothread();
1939}
1940
1941
1942#define VCPU_THREAD_NAME_SIZE 16
1943
1944static void qemu_tcg_init_vcpu(CPUState *cpu)
1945{
1946 char thread_name[VCPU_THREAD_NAME_SIZE];
1947 static QemuCond *single_tcg_halt_cond;
1948 static QemuThread *single_tcg_cpu_thread;
1949 static int tcg_region_inited;
1950
1951 assert(tcg_enabled());
1952
1953
1954
1955
1956
1957
1958 if (!tcg_region_inited) {
1959 tcg_region_inited = 1;
1960 tcg_region_init();
1961 }
1962
1963 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1964 cpu->thread = g_malloc0(sizeof(QemuThread));
1965 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1966 qemu_cond_init(cpu->halt_cond);
1967
1968 if (qemu_tcg_mttcg_enabled()) {
1969
1970 parallel_cpus = true;
1971 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1972 cpu->cpu_index);
1973
1974 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1975 cpu, QEMU_THREAD_JOINABLE);
1976
1977 } else {
1978
1979 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1980 qemu_thread_create(cpu->thread, thread_name,
1981 qemu_tcg_rr_cpu_thread_fn,
1982 cpu, QEMU_THREAD_JOINABLE);
1983
1984 single_tcg_halt_cond = cpu->halt_cond;
1985 single_tcg_cpu_thread = cpu->thread;
1986 }
1987#ifdef _WIN32
1988 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1989#endif
1990 } else {
1991
1992 cpu->thread = single_tcg_cpu_thread;
1993 cpu->halt_cond = single_tcg_halt_cond;
1994 cpu->thread_id = first_cpu->thread_id;
1995 cpu->can_do_io = 1;
1996 cpu->created = true;
1997 }
1998}
1999
2000static void qemu_hax_start_vcpu(CPUState *cpu)
2001{
2002 char thread_name[VCPU_THREAD_NAME_SIZE];
2003
2004 cpu->thread = g_malloc0(sizeof(QemuThread));
2005 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2006 qemu_cond_init(cpu->halt_cond);
2007
2008 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
2009 cpu->cpu_index);
2010 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
2011 cpu, QEMU_THREAD_JOINABLE);
2012#ifdef _WIN32
2013 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2014#endif
2015}
2016
2017static void qemu_kvm_start_vcpu(CPUState *cpu)
2018{
2019 char thread_name[VCPU_THREAD_NAME_SIZE];
2020
2021 cpu->thread = g_malloc0(sizeof(QemuThread));
2022 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2023 qemu_cond_init(cpu->halt_cond);
2024 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2025 cpu->cpu_index);
2026 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2027 cpu, QEMU_THREAD_JOINABLE);
2028}
2029
2030static void qemu_hvf_start_vcpu(CPUState *cpu)
2031{
2032 char thread_name[VCPU_THREAD_NAME_SIZE];
2033
2034
2035
2036 assert(hvf_enabled());
2037
2038 cpu->thread = g_malloc0(sizeof(QemuThread));
2039 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2040 qemu_cond_init(cpu->halt_cond);
2041
2042 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2043 cpu->cpu_index);
2044 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2045 cpu, QEMU_THREAD_JOINABLE);
2046}
2047
2048static void qemu_whpx_start_vcpu(CPUState *cpu)
2049{
2050 char thread_name[VCPU_THREAD_NAME_SIZE];
2051
2052 cpu->thread = g_malloc0(sizeof(QemuThread));
2053 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2054 qemu_cond_init(cpu->halt_cond);
2055 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2056 cpu->cpu_index);
2057 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2058 cpu, QEMU_THREAD_JOINABLE);
2059#ifdef _WIN32
2060 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2061#endif
2062}
2063
2064static void qemu_dummy_start_vcpu(CPUState *cpu)
2065{
2066 char thread_name[VCPU_THREAD_NAME_SIZE];
2067
2068 cpu->thread = g_malloc0(sizeof(QemuThread));
2069 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2070 qemu_cond_init(cpu->halt_cond);
2071 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2072 cpu->cpu_index);
2073 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
2074 QEMU_THREAD_JOINABLE);
2075}
2076
2077void qemu_init_vcpu(CPUState *cpu)
2078{
2079 MachineState *ms = MACHINE(qdev_get_machine());
2080
2081 cpu->nr_cores = ms->smp.cores;
2082 cpu->nr_threads = ms->smp.threads;
2083 cpu->stopped = true;
2084 cpu->random_seed = qemu_guest_random_seed_thread_part1();
2085
2086 if (!cpu->as) {
2087
2088
2089
2090 cpu->num_ases = 1;
2091 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
2092 }
2093
2094 if (kvm_enabled()) {
2095 qemu_kvm_start_vcpu(cpu);
2096 } else if (hax_enabled()) {
2097 qemu_hax_start_vcpu(cpu);
2098 } else if (hvf_enabled()) {
2099 qemu_hvf_start_vcpu(cpu);
2100 } else if (tcg_enabled()) {
2101 qemu_tcg_init_vcpu(cpu);
2102 } else if (whpx_enabled()) {
2103 qemu_whpx_start_vcpu(cpu);
2104 } else {
2105 qemu_dummy_start_vcpu(cpu);
2106 }
2107
2108 while (!cpu->created) {
2109 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2110 }
2111}
2112
2113void cpu_stop_current(void)
2114{
2115 if (current_cpu) {
2116 current_cpu->stop = true;
2117 cpu_exit(current_cpu);
2118 }
2119}
2120
2121int vm_stop(RunState state)
2122{
2123 if (qemu_in_vcpu_thread()) {
2124 qemu_system_vmstop_request_prepare();
2125 qemu_system_vmstop_request(state);
2126
2127
2128
2129
2130 cpu_stop_current();
2131 return 0;
2132 }
2133
2134 return do_vm_stop(state, true);
2135}
2136
2137
2138
2139
2140
2141
2142int vm_prepare_start(void)
2143{
2144 RunState requested;
2145
2146 qemu_vmstop_requested(&requested);
2147 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2148 return -1;
2149 }
2150
2151
2152
2153
2154
2155
2156 if (runstate_is_running()) {
2157 qapi_event_send_stop();
2158 qapi_event_send_resume();
2159 return -1;
2160 }
2161
2162
2163 qapi_event_send_resume();
2164
2165 replay_enable_events();
2166 cpu_enable_ticks();
2167 runstate_set(RUN_STATE_RUNNING);
2168 vm_state_notify(1, RUN_STATE_RUNNING);
2169 return 0;
2170}
2171
2172void vm_start(void)
2173{
2174 if (!vm_prepare_start()) {
2175 resume_all_vcpus();
2176 }
2177}
2178
2179
2180
2181int vm_stop_force_state(RunState state)
2182{
2183 if (runstate_is_running()) {
2184 return vm_stop(state);
2185 } else {
2186 runstate_set(state);
2187
2188 bdrv_drain_all();
2189
2190
2191 return bdrv_flush_all();
2192 }
2193}
2194
2195void list_cpus(const char *optarg)
2196{
2197
2198#if defined(cpu_list)
2199 cpu_list();
2200#endif
2201}
2202
2203void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2204 bool has_cpu, int64_t cpu_index, Error **errp)
2205{
2206 FILE *f;
2207 uint32_t l;
2208 CPUState *cpu;
2209 uint8_t buf[1024];
2210 int64_t orig_addr = addr, orig_size = size;
2211
2212 if (!has_cpu) {
2213 cpu_index = 0;
2214 }
2215
2216 cpu = qemu_get_cpu(cpu_index);
2217 if (cpu == NULL) {
2218 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2219 "a CPU number");
2220 return;
2221 }
2222
2223 f = fopen(filename, "wb");
2224 if (!f) {
2225 error_setg_file_open(errp, errno, filename);
2226 return;
2227 }
2228
2229 while (size != 0) {
2230 l = sizeof(buf);
2231 if (l > size)
2232 l = size;
2233 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2234 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2235 " specified", orig_addr, orig_size);
2236 goto exit;
2237 }
2238 if (fwrite(buf, 1, l, f) != l) {
2239 error_setg(errp, QERR_IO_ERROR);
2240 goto exit;
2241 }
2242 addr += l;
2243 size -= l;
2244 }
2245
2246exit:
2247 fclose(f);
2248}
2249
2250void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2251 Error **errp)
2252{
2253 FILE *f;
2254 uint32_t l;
2255 uint8_t buf[1024];
2256
2257 f = fopen(filename, "wb");
2258 if (!f) {
2259 error_setg_file_open(errp, errno, filename);
2260 return;
2261 }
2262
2263 while (size != 0) {
2264 l = sizeof(buf);
2265 if (l > size)
2266 l = size;
2267 cpu_physical_memory_read(addr, buf, l);
2268 if (fwrite(buf, 1, l, f) != l) {
2269 error_setg(errp, QERR_IO_ERROR);
2270 goto exit;
2271 }
2272 addr += l;
2273 size -= l;
2274 }
2275
2276exit:
2277 fclose(f);
2278}
2279
2280void qmp_inject_nmi(Error **errp)
2281{
2282 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2283}
2284
2285void dump_drift_info(void)
2286{
2287 if (!use_icount) {
2288 return;
2289 }
2290
2291 qemu_printf("Host - Guest clock %"PRIi64" ms\n",
2292 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2293 if (icount_align_option) {
2294 qemu_printf("Max guest delay %"PRIi64" ms\n",
2295 -max_delay / SCALE_MS);
2296 qemu_printf("Max guest advance %"PRIi64" ms\n",
2297 max_advance / SCALE_MS);
2298 } else {
2299 qemu_printf("Max guest delay NA\n");
2300 qemu_printf("Max guest advance NA\n");
2301 }
2302}
2303