1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26#include "config-host.h"
27
28#include "monitor/monitor.h"
29#include "qapi/qmp/qerror.h"
30#include "qemu/error-report.h"
31#include "sysemu/sysemu.h"
32#include "exec/gdbstub.h"
33#include "sysemu/dma.h"
34#include "sysemu/kvm.h"
35#include "qmp-commands.h"
36
37#include "qemu/thread.h"
38#include "sysemu/cpus.h"
39#include "sysemu/qtest.h"
40#include "qemu/main-loop.h"
41#include "qemu/bitmap.h"
42#include "qemu/seqlock.h"
43#include "qapi-event.h"
44#include "hw/nmi.h"
45
46#ifndef _WIN32
47#include "qemu/compatfd.h"
48#endif
49
50#ifdef CONFIG_LINUX
51
52#include <sys/prctl.h>
53
54#ifndef PR_MCE_KILL
55#define PR_MCE_KILL 33
56#endif
57
58#ifndef PR_MCE_KILL_SET
59#define PR_MCE_KILL_SET 1
60#endif
61
62#ifndef PR_MCE_KILL_EARLY
63#define PR_MCE_KILL_EARLY 1
64#endif
65
66#endif
67
68static CPUState *next_cpu;
69int64_t max_delay;
70int64_t max_advance;
71
72bool cpu_is_stopped(CPUState *cpu)
73{
74 return cpu->stopped || !runstate_is_running();
75}
76
77static bool cpu_thread_is_idle(CPUState *cpu)
78{
79 if (cpu->stop || cpu->queued_work_first) {
80 return false;
81 }
82 if (cpu_is_stopped(cpu)) {
83 return true;
84 }
85 if (!cpu->halted || cpu_has_work(cpu) ||
86 kvm_halt_in_kernel()) {
87 return false;
88 }
89 return true;
90}
91
92static bool all_cpu_threads_idle(void)
93{
94 CPUState *cpu;
95
96 CPU_FOREACH(cpu) {
97 if (!cpu_thread_is_idle(cpu)) {
98 return false;
99 }
100 }
101 return true;
102}
103
104
105
106
107
108
109static bool icount_sleep = true;
110static int64_t vm_clock_warp_start = -1;
111
112static int icount_time_shift;
113
114#define MAX_ICOUNT_SHIFT 10
115
116static QEMUTimer *icount_rt_timer;
117static QEMUTimer *icount_vm_timer;
118static QEMUTimer *icount_warp_timer;
119
120typedef struct TimersState {
121
122 int64_t cpu_ticks_prev;
123 int64_t cpu_ticks_offset;
124
125
126
127
128 QemuSeqLock vm_clock_seqlock;
129 int64_t cpu_clock_offset;
130 int32_t cpu_ticks_enabled;
131 int64_t dummy;
132
133
134 int64_t qemu_icount_bias;
135
136 int64_t qemu_icount;
137} TimersState;
138
139static TimersState timers_state;
140
141int64_t cpu_get_icount_raw(void)
142{
143 int64_t icount;
144 CPUState *cpu = current_cpu;
145
146 icount = timers_state.qemu_icount;
147 if (cpu) {
148 if (!cpu_can_do_io(cpu)) {
149 fprintf(stderr, "Bad icount read\n");
150 exit(1);
151 }
152 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
153 }
154 return icount;
155}
156
157
158static int64_t cpu_get_icount_locked(void)
159{
160 int64_t icount = cpu_get_icount_raw();
161 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
162}
163
164int64_t cpu_get_icount(void)
165{
166 int64_t icount;
167 unsigned start;
168
169 do {
170 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
171 icount = cpu_get_icount_locked();
172 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
173
174 return icount;
175}
176
177int64_t cpu_icount_to_ns(int64_t icount)
178{
179 return icount << icount_time_shift;
180}
181
182
183
184int64_t cpu_get_ticks(void)
185{
186 int64_t ticks;
187
188 if (use_icount) {
189 return cpu_get_icount();
190 }
191
192 ticks = timers_state.cpu_ticks_offset;
193 if (timers_state.cpu_ticks_enabled) {
194 ticks += cpu_get_real_ticks();
195 }
196
197 if (timers_state.cpu_ticks_prev > ticks) {
198
199
200 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
201 ticks = timers_state.cpu_ticks_prev;
202 }
203
204 timers_state.cpu_ticks_prev = ticks;
205 return ticks;
206}
207
208static int64_t cpu_get_clock_locked(void)
209{
210 int64_t ticks;
211
212 ticks = timers_state.cpu_clock_offset;
213 if (timers_state.cpu_ticks_enabled) {
214 ticks += get_clock();
215 }
216
217 return ticks;
218}
219
220
221int64_t cpu_get_clock(void)
222{
223 int64_t ti;
224 unsigned start;
225
226 do {
227 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
228 ti = cpu_get_clock_locked();
229 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
230
231 return ti;
232}
233
234
235
236
237void cpu_enable_ticks(void)
238{
239
240 seqlock_write_lock(&timers_state.vm_clock_seqlock);
241 if (!timers_state.cpu_ticks_enabled) {
242 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
243 timers_state.cpu_clock_offset -= get_clock();
244 timers_state.cpu_ticks_enabled = 1;
245 }
246 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
247}
248
249
250
251
252
253void cpu_disable_ticks(void)
254{
255
256 seqlock_write_lock(&timers_state.vm_clock_seqlock);
257 if (timers_state.cpu_ticks_enabled) {
258 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
259 timers_state.cpu_clock_offset = cpu_get_clock_locked();
260 timers_state.cpu_ticks_enabled = 0;
261 }
262 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
263}
264
265
266
267
268
269#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
270
271static void icount_adjust(void)
272{
273 int64_t cur_time;
274 int64_t cur_icount;
275 int64_t delta;
276
277
278 static int64_t last_delta;
279
280
281 if (!runstate_is_running()) {
282 return;
283 }
284
285 seqlock_write_lock(&timers_state.vm_clock_seqlock);
286 cur_time = cpu_get_clock_locked();
287 cur_icount = cpu_get_icount_locked();
288
289 delta = cur_icount - cur_time;
290
291 if (delta > 0
292 && last_delta + ICOUNT_WOBBLE < delta * 2
293 && icount_time_shift > 0) {
294
295 icount_time_shift--;
296 }
297 if (delta < 0
298 && last_delta - ICOUNT_WOBBLE > delta * 2
299 && icount_time_shift < MAX_ICOUNT_SHIFT) {
300
301 icount_time_shift++;
302 }
303 last_delta = delta;
304 timers_state.qemu_icount_bias = cur_icount
305 - (timers_state.qemu_icount << icount_time_shift);
306 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
307}
308
309static void icount_adjust_rt(void *opaque)
310{
311 timer_mod(icount_rt_timer,
312 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
313 icount_adjust();
314}
315
316static void icount_adjust_vm(void *opaque)
317{
318 timer_mod(icount_vm_timer,
319 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
320 get_ticks_per_sec() / 10);
321 icount_adjust();
322}
323
324static int64_t qemu_icount_round(int64_t count)
325{
326 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
327}
328
329static void icount_warp_rt(void *opaque)
330{
331
332
333
334 if (atomic_read(&vm_clock_warp_start) == -1) {
335 return;
336 }
337
338 seqlock_write_lock(&timers_state.vm_clock_seqlock);
339 if (runstate_is_running()) {
340 int64_t clock = cpu_get_clock_locked();
341 int64_t warp_delta;
342
343 warp_delta = clock - vm_clock_warp_start;
344 if (use_icount == 2) {
345
346
347
348
349 int64_t cur_icount = cpu_get_icount_locked();
350 int64_t delta = clock - cur_icount;
351 warp_delta = MIN(warp_delta, delta);
352 }
353 timers_state.qemu_icount_bias += warp_delta;
354 }
355 vm_clock_warp_start = -1;
356 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
357
358 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
359 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
360 }
361}
362
363void qtest_clock_warp(int64_t dest)
364{
365 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
366 AioContext *aio_context;
367 assert(qtest_enabled());
368 aio_context = qemu_get_aio_context();
369 while (clock < dest) {
370 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
371 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
372
373 seqlock_write_lock(&timers_state.vm_clock_seqlock);
374 timers_state.qemu_icount_bias += warp;
375 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
376
377 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
378 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
379 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
380 }
381 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
382}
383
384void qemu_clock_warp(QEMUClockType type)
385{
386 int64_t clock;
387 int64_t deadline;
388
389
390
391
392
393
394 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
395 return;
396 }
397
398 if (icount_sleep) {
399
400
401
402
403
404
405
406
407 icount_warp_rt(NULL);
408 timer_del(icount_warp_timer);
409 }
410 if (!all_cpu_threads_idle()) {
411 return;
412 }
413
414 if (qtest_enabled()) {
415
416 return;
417 }
418
419
420 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
421 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
422 if (deadline < 0) {
423 static bool notified;
424 if (!icount_sleep && !notified) {
425 error_report("WARNING: icount sleep disabled and no active timers");
426 notified = true;
427 }
428 return;
429 }
430
431 if (deadline > 0) {
432
433
434
435
436
437
438
439 if (!icount_sleep) {
440
441
442
443
444
445
446
447 seqlock_write_lock(&timers_state.vm_clock_seqlock);
448 timers_state.qemu_icount_bias += deadline;
449 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
450 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
451 } else {
452
453
454
455
456
457
458
459
460 seqlock_write_lock(&timers_state.vm_clock_seqlock);
461 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
462 vm_clock_warp_start = clock;
463 }
464 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
465 timer_mod_anticipate(icount_warp_timer, clock + deadline);
466 }
467 } else if (deadline == 0) {
468 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
469 }
470}
471
472static bool icount_state_needed(void *opaque)
473{
474 return use_icount;
475}
476
477
478
479
480static const VMStateDescription icount_vmstate_timers = {
481 .name = "timer/icount",
482 .version_id = 1,
483 .minimum_version_id = 1,
484 .needed = icount_state_needed,
485 .fields = (VMStateField[]) {
486 VMSTATE_INT64(qemu_icount_bias, TimersState),
487 VMSTATE_INT64(qemu_icount, TimersState),
488 VMSTATE_END_OF_LIST()
489 }
490};
491
492static const VMStateDescription vmstate_timers = {
493 .name = "timer",
494 .version_id = 2,
495 .minimum_version_id = 1,
496 .fields = (VMStateField[]) {
497 VMSTATE_INT64(cpu_ticks_offset, TimersState),
498 VMSTATE_INT64(dummy, TimersState),
499 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
500 VMSTATE_END_OF_LIST()
501 },
502 .subsections = (const VMStateDescription*[]) {
503 &icount_vmstate_timers,
504 NULL
505 }
506};
507
508void cpu_ticks_init(void)
509{
510 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
511 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
512}
513
514void configure_icount(QemuOpts *opts, Error **errp)
515{
516 const char *option;
517 char *rem_str = NULL;
518
519 option = qemu_opt_get(opts, "shift");
520 if (!option) {
521 if (qemu_opt_get(opts, "align") != NULL) {
522 error_setg(errp, "Please specify shift option when using align");
523 }
524 return;
525 }
526
527 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
528 if (icount_sleep) {
529 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
530 icount_warp_rt, NULL);
531 }
532
533 icount_align_option = qemu_opt_get_bool(opts, "align", false);
534
535 if (icount_align_option && !icount_sleep) {
536 error_setg(errp, "align=on and sleep=no are incompatible");
537 }
538 if (strcmp(option, "auto") != 0) {
539 errno = 0;
540 icount_time_shift = strtol(option, &rem_str, 0);
541 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
542 error_setg(errp, "icount: Invalid shift value");
543 }
544 use_icount = 1;
545 return;
546 } else if (icount_align_option) {
547 error_setg(errp, "shift=auto and align=on are incompatible");
548 } else if (!icount_sleep) {
549 error_setg(errp, "shift=auto and sleep=no are incompatible");
550 }
551
552 use_icount = 2;
553
554
555
556 icount_time_shift = 3;
557
558
559
560
561
562
563 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
564 icount_adjust_rt, NULL);
565 timer_mod(icount_rt_timer,
566 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
567 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
568 icount_adjust_vm, NULL);
569 timer_mod(icount_vm_timer,
570 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
571 get_ticks_per_sec() / 10);
572}
573
574
575void hw_error(const char *fmt, ...)
576{
577 va_list ap;
578 CPUState *cpu;
579
580 va_start(ap, fmt);
581 fprintf(stderr, "qemu: hardware error: ");
582 vfprintf(stderr, fmt, ap);
583 fprintf(stderr, "\n");
584 CPU_FOREACH(cpu) {
585 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
586 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
587 }
588 va_end(ap);
589 abort();
590}
591
592void cpu_synchronize_all_states(void)
593{
594 CPUState *cpu;
595
596 CPU_FOREACH(cpu) {
597 cpu_synchronize_state(cpu);
598 }
599}
600
601void cpu_synchronize_all_post_reset(void)
602{
603 CPUState *cpu;
604
605 CPU_FOREACH(cpu) {
606 cpu_synchronize_post_reset(cpu);
607 }
608}
609
610void cpu_synchronize_all_post_init(void)
611{
612 CPUState *cpu;
613
614 CPU_FOREACH(cpu) {
615 cpu_synchronize_post_init(cpu);
616 }
617}
618
619void cpu_clean_all_dirty(void)
620{
621 CPUState *cpu;
622
623 CPU_FOREACH(cpu) {
624 cpu_clean_state(cpu);
625 }
626}
627
628static int do_vm_stop(RunState state)
629{
630 int ret = 0;
631
632 if (runstate_is_running()) {
633 cpu_disable_ticks();
634 pause_all_vcpus();
635 runstate_set(state);
636 vm_state_notify(0, state);
637 qapi_event_send_stop(&error_abort);
638 }
639
640 bdrv_drain_all();
641 ret = bdrv_flush_all();
642
643 return ret;
644}
645
646static bool cpu_can_run(CPUState *cpu)
647{
648 if (cpu->stop) {
649 return false;
650 }
651 if (cpu_is_stopped(cpu)) {
652 return false;
653 }
654 return true;
655}
656
657static void cpu_handle_guest_debug(CPUState *cpu)
658{
659 gdb_set_stop_cpu(cpu);
660 qemu_system_debug_request();
661 cpu->stopped = true;
662}
663
664static void cpu_signal(int sig)
665{
666 if (current_cpu) {
667 cpu_exit(current_cpu);
668 }
669 exit_request = 1;
670}
671
672#ifdef CONFIG_LINUX
673static void sigbus_reraise(void)
674{
675 sigset_t set;
676 struct sigaction action;
677
678 memset(&action, 0, sizeof(action));
679 action.sa_handler = SIG_DFL;
680 if (!sigaction(SIGBUS, &action, NULL)) {
681 raise(SIGBUS);
682 sigemptyset(&set);
683 sigaddset(&set, SIGBUS);
684 sigprocmask(SIG_UNBLOCK, &set, NULL);
685 }
686 perror("Failed to re-raise SIGBUS!\n");
687 abort();
688}
689
690static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
691 void *ctx)
692{
693 if (kvm_on_sigbus(siginfo->ssi_code,
694 (void *)(intptr_t)siginfo->ssi_addr)) {
695 sigbus_reraise();
696 }
697}
698
699static void qemu_init_sigbus(void)
700{
701 struct sigaction action;
702
703 memset(&action, 0, sizeof(action));
704 action.sa_flags = SA_SIGINFO;
705 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
706 sigaction(SIGBUS, &action, NULL);
707
708 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
709}
710
711static void qemu_kvm_eat_signals(CPUState *cpu)
712{
713 struct timespec ts = { 0, 0 };
714 siginfo_t siginfo;
715 sigset_t waitset;
716 sigset_t chkset;
717 int r;
718
719 sigemptyset(&waitset);
720 sigaddset(&waitset, SIG_IPI);
721 sigaddset(&waitset, SIGBUS);
722
723 do {
724 r = sigtimedwait(&waitset, &siginfo, &ts);
725 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
726 perror("sigtimedwait");
727 exit(1);
728 }
729
730 switch (r) {
731 case SIGBUS:
732 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
733 sigbus_reraise();
734 }
735 break;
736 default:
737 break;
738 }
739
740 r = sigpending(&chkset);
741 if (r == -1) {
742 perror("sigpending");
743 exit(1);
744 }
745 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
746}
747
748#else
749
750static void qemu_init_sigbus(void)
751{
752}
753
754static void qemu_kvm_eat_signals(CPUState *cpu)
755{
756}
757#endif
758
759#ifndef _WIN32
760static void dummy_signal(int sig)
761{
762}
763
764static void qemu_kvm_init_cpu_signals(CPUState *cpu)
765{
766 int r;
767 sigset_t set;
768 struct sigaction sigact;
769
770 memset(&sigact, 0, sizeof(sigact));
771 sigact.sa_handler = dummy_signal;
772 sigaction(SIG_IPI, &sigact, NULL);
773
774 pthread_sigmask(SIG_BLOCK, NULL, &set);
775 sigdelset(&set, SIG_IPI);
776 sigdelset(&set, SIGBUS);
777 r = kvm_set_signal_mask(cpu, &set);
778 if (r) {
779 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
780 exit(1);
781 }
782}
783
784static void qemu_tcg_init_cpu_signals(void)
785{
786 sigset_t set;
787 struct sigaction sigact;
788
789 memset(&sigact, 0, sizeof(sigact));
790 sigact.sa_handler = cpu_signal;
791 sigaction(SIG_IPI, &sigact, NULL);
792
793 sigemptyset(&set);
794 sigaddset(&set, SIG_IPI);
795 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
796}
797
798#else
799static void qemu_kvm_init_cpu_signals(CPUState *cpu)
800{
801 abort();
802}
803
804static void qemu_tcg_init_cpu_signals(void)
805{
806}
807#endif
808
809static QemuMutex qemu_global_mutex;
810static QemuCond qemu_io_proceeded_cond;
811static unsigned iothread_requesting_mutex;
812
813static QemuThread io_thread;
814
815static QemuThread *tcg_cpu_thread;
816static QemuCond *tcg_halt_cond;
817
818
819static QemuCond qemu_cpu_cond;
820
821static QemuCond qemu_pause_cond;
822static QemuCond qemu_work_cond;
823
824void qemu_init_cpu_loop(void)
825{
826 qemu_init_sigbus();
827 qemu_cond_init(&qemu_cpu_cond);
828 qemu_cond_init(&qemu_pause_cond);
829 qemu_cond_init(&qemu_work_cond);
830 qemu_cond_init(&qemu_io_proceeded_cond);
831 qemu_mutex_init(&qemu_global_mutex);
832
833 qemu_thread_get_self(&io_thread);
834}
835
836void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
837{
838 struct qemu_work_item wi;
839
840 if (qemu_cpu_is_self(cpu)) {
841 func(data);
842 return;
843 }
844
845 wi.func = func;
846 wi.data = data;
847 wi.free = false;
848 if (cpu->queued_work_first == NULL) {
849 cpu->queued_work_first = &wi;
850 } else {
851 cpu->queued_work_last->next = &wi;
852 }
853 cpu->queued_work_last = &wi;
854 wi.next = NULL;
855 wi.done = false;
856
857 qemu_cpu_kick(cpu);
858 while (!wi.done) {
859 CPUState *self_cpu = current_cpu;
860
861 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
862 current_cpu = self_cpu;
863 }
864}
865
866void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
867{
868 struct qemu_work_item *wi;
869
870 if (qemu_cpu_is_self(cpu)) {
871 func(data);
872 return;
873 }
874
875 wi = g_malloc0(sizeof(struct qemu_work_item));
876 wi->func = func;
877 wi->data = data;
878 wi->free = true;
879 if (cpu->queued_work_first == NULL) {
880 cpu->queued_work_first = wi;
881 } else {
882 cpu->queued_work_last->next = wi;
883 }
884 cpu->queued_work_last = wi;
885 wi->next = NULL;
886 wi->done = false;
887
888 qemu_cpu_kick(cpu);
889}
890
891static void flush_queued_work(CPUState *cpu)
892{
893 struct qemu_work_item *wi;
894
895 if (cpu->queued_work_first == NULL) {
896 return;
897 }
898
899 while ((wi = cpu->queued_work_first)) {
900 cpu->queued_work_first = wi->next;
901 wi->func(wi->data);
902 wi->done = true;
903 if (wi->free) {
904 g_free(wi);
905 }
906 }
907 cpu->queued_work_last = NULL;
908 qemu_cond_broadcast(&qemu_work_cond);
909}
910
911static void qemu_wait_io_event_common(CPUState *cpu)
912{
913 if (cpu->stop) {
914 cpu->stop = false;
915 cpu->stopped = true;
916 qemu_cond_signal(&qemu_pause_cond);
917 }
918 flush_queued_work(cpu);
919 cpu->thread_kicked = false;
920}
921
922static void qemu_tcg_wait_io_event(void)
923{
924 CPUState *cpu;
925
926 while (all_cpu_threads_idle()) {
927
928
929 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
930 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
931 }
932
933 while (iothread_requesting_mutex) {
934 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
935 }
936
937 CPU_FOREACH(cpu) {
938 qemu_wait_io_event_common(cpu);
939 }
940}
941
942static void qemu_kvm_wait_io_event(CPUState *cpu)
943{
944 while (cpu_thread_is_idle(cpu)) {
945 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
946 }
947
948 qemu_kvm_eat_signals(cpu);
949 qemu_wait_io_event_common(cpu);
950}
951
952static void *qemu_kvm_cpu_thread_fn(void *arg)
953{
954 CPUState *cpu = arg;
955 int r;
956
957 rcu_register_thread();
958
959 qemu_mutex_lock_iothread();
960 qemu_thread_get_self(cpu->thread);
961 cpu->thread_id = qemu_get_thread_id();
962 cpu->can_do_io = 1;
963 current_cpu = cpu;
964
965 r = kvm_init_vcpu(cpu);
966 if (r < 0) {
967 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
968 exit(1);
969 }
970
971 qemu_kvm_init_cpu_signals(cpu);
972
973
974 cpu->created = true;
975 qemu_cond_signal(&qemu_cpu_cond);
976
977 while (1) {
978 if (cpu_can_run(cpu)) {
979 r = kvm_cpu_exec(cpu);
980 if (r == EXCP_DEBUG) {
981 cpu_handle_guest_debug(cpu);
982 }
983 }
984 qemu_kvm_wait_io_event(cpu);
985 }
986
987 return NULL;
988}
989
990static void *qemu_dummy_cpu_thread_fn(void *arg)
991{
992#ifdef _WIN32
993 fprintf(stderr, "qtest is not supported under Windows\n");
994 exit(1);
995#else
996 CPUState *cpu = arg;
997 sigset_t waitset;
998 int r;
999
1000 rcu_register_thread();
1001
1002 qemu_mutex_lock_iothread();
1003 qemu_thread_get_self(cpu->thread);
1004 cpu->thread_id = qemu_get_thread_id();
1005 cpu->can_do_io = 1;
1006
1007 sigemptyset(&waitset);
1008 sigaddset(&waitset, SIG_IPI);
1009
1010
1011 cpu->created = true;
1012 qemu_cond_signal(&qemu_cpu_cond);
1013
1014 current_cpu = cpu;
1015 while (1) {
1016 current_cpu = NULL;
1017 qemu_mutex_unlock_iothread();
1018 do {
1019 int sig;
1020 r = sigwait(&waitset, &sig);
1021 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1022 if (r == -1) {
1023 perror("sigwait");
1024 exit(1);
1025 }
1026 qemu_mutex_lock_iothread();
1027 current_cpu = cpu;
1028 qemu_wait_io_event_common(cpu);
1029 }
1030
1031 return NULL;
1032#endif
1033}
1034
1035static void tcg_exec_all(void);
1036
1037static void *qemu_tcg_cpu_thread_fn(void *arg)
1038{
1039 CPUState *cpu = arg;
1040
1041 rcu_register_thread();
1042
1043 qemu_mutex_lock_iothread();
1044 qemu_tcg_init_cpu_signals();
1045 qemu_thread_get_self(cpu->thread);
1046
1047 CPU_FOREACH(cpu) {
1048 cpu->thread_id = qemu_get_thread_id();
1049 cpu->created = true;
1050 cpu->can_do_io = 1;
1051 }
1052 qemu_cond_signal(&qemu_cpu_cond);
1053
1054
1055 while (first_cpu->stopped) {
1056 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
1057
1058
1059 CPU_FOREACH(cpu) {
1060 qemu_wait_io_event_common(cpu);
1061 }
1062 }
1063
1064
1065 exit_request = 1;
1066
1067 while (1) {
1068 tcg_exec_all();
1069
1070 if (use_icount) {
1071 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1072
1073 if (deadline == 0) {
1074 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1075 }
1076 }
1077 qemu_tcg_wait_io_event();
1078 }
1079
1080 return NULL;
1081}
1082
1083static void qemu_cpu_kick_thread(CPUState *cpu)
1084{
1085#ifndef _WIN32
1086 int err;
1087
1088 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1089 if (err) {
1090 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1091 exit(1);
1092 }
1093#else
1094 if (!qemu_cpu_is_self(cpu)) {
1095 CONTEXT tcgContext;
1096
1097 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
1098 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1099 GetLastError());
1100 exit(1);
1101 }
1102
1103
1104
1105
1106 tcgContext.ContextFlags = CONTEXT_CONTROL;
1107 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1108 continue;
1109 }
1110
1111 cpu_signal(0);
1112
1113 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
1114 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1115 GetLastError());
1116 exit(1);
1117 }
1118 }
1119#endif
1120}
1121
1122void qemu_cpu_kick(CPUState *cpu)
1123{
1124 qemu_cond_broadcast(cpu->halt_cond);
1125 if (!tcg_enabled() && !cpu->thread_kicked) {
1126 qemu_cpu_kick_thread(cpu);
1127 cpu->thread_kicked = true;
1128 }
1129}
1130
1131void qemu_cpu_kick_self(void)
1132{
1133#ifndef _WIN32
1134 assert(current_cpu);
1135
1136 if (!current_cpu->thread_kicked) {
1137 qemu_cpu_kick_thread(current_cpu);
1138 current_cpu->thread_kicked = true;
1139 }
1140#else
1141 abort();
1142#endif
1143}
1144
1145bool qemu_cpu_is_self(CPUState *cpu)
1146{
1147 return qemu_thread_is_self(cpu->thread);
1148}
1149
1150bool qemu_in_vcpu_thread(void)
1151{
1152 return current_cpu && qemu_cpu_is_self(current_cpu);
1153}
1154
1155static __thread bool iothread_locked = false;
1156
1157bool qemu_mutex_iothread_locked(void)
1158{
1159 return iothread_locked;
1160}
1161
1162void qemu_mutex_lock_iothread(void)
1163{
1164 atomic_inc(&iothread_requesting_mutex);
1165
1166
1167
1168 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1169 !first_cpu || !first_cpu->created) {
1170 qemu_mutex_lock(&qemu_global_mutex);
1171 atomic_dec(&iothread_requesting_mutex);
1172 } else {
1173 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1174 qemu_cpu_kick_thread(first_cpu);
1175 qemu_mutex_lock(&qemu_global_mutex);
1176 }
1177 atomic_dec(&iothread_requesting_mutex);
1178 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1179 }
1180 iothread_locked = true;
1181}
1182
1183void qemu_mutex_unlock_iothread(void)
1184{
1185 iothread_locked = false;
1186 qemu_mutex_unlock(&qemu_global_mutex);
1187}
1188
1189static int all_vcpus_paused(void)
1190{
1191 CPUState *cpu;
1192
1193 CPU_FOREACH(cpu) {
1194 if (!cpu->stopped) {
1195 return 0;
1196 }
1197 }
1198
1199 return 1;
1200}
1201
1202void pause_all_vcpus(void)
1203{
1204 CPUState *cpu;
1205
1206 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1207 CPU_FOREACH(cpu) {
1208 cpu->stop = true;
1209 qemu_cpu_kick(cpu);
1210 }
1211
1212 if (qemu_in_vcpu_thread()) {
1213 cpu_stop_current();
1214 if (!kvm_enabled()) {
1215 CPU_FOREACH(cpu) {
1216 cpu->stop = false;
1217 cpu->stopped = true;
1218 }
1219 return;
1220 }
1221 }
1222
1223 while (!all_vcpus_paused()) {
1224 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1225 CPU_FOREACH(cpu) {
1226 qemu_cpu_kick(cpu);
1227 }
1228 }
1229}
1230
1231void cpu_resume(CPUState *cpu)
1232{
1233 cpu->stop = false;
1234 cpu->stopped = false;
1235 qemu_cpu_kick(cpu);
1236}
1237
1238void resume_all_vcpus(void)
1239{
1240 CPUState *cpu;
1241
1242 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1243 CPU_FOREACH(cpu) {
1244 cpu_resume(cpu);
1245 }
1246}
1247
1248
1249#define VCPU_THREAD_NAME_SIZE 16
1250
1251static void qemu_tcg_init_vcpu(CPUState *cpu)
1252{
1253 char thread_name[VCPU_THREAD_NAME_SIZE];
1254
1255 tcg_cpu_address_space_init(cpu, cpu->as);
1256
1257
1258 if (!tcg_cpu_thread) {
1259 cpu->thread = g_malloc0(sizeof(QemuThread));
1260 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1261 qemu_cond_init(cpu->halt_cond);
1262 tcg_halt_cond = cpu->halt_cond;
1263 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1264 cpu->cpu_index);
1265 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1266 cpu, QEMU_THREAD_JOINABLE);
1267#ifdef _WIN32
1268 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1269#endif
1270 while (!cpu->created) {
1271 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1272 }
1273 tcg_cpu_thread = cpu->thread;
1274 } else {
1275 cpu->thread = tcg_cpu_thread;
1276 cpu->halt_cond = tcg_halt_cond;
1277 }
1278}
1279
1280static void qemu_kvm_start_vcpu(CPUState *cpu)
1281{
1282 char thread_name[VCPU_THREAD_NAME_SIZE];
1283
1284 cpu->thread = g_malloc0(sizeof(QemuThread));
1285 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1286 qemu_cond_init(cpu->halt_cond);
1287 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1288 cpu->cpu_index);
1289 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1290 cpu, QEMU_THREAD_JOINABLE);
1291 while (!cpu->created) {
1292 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1293 }
1294}
1295
1296static void qemu_dummy_start_vcpu(CPUState *cpu)
1297{
1298 char thread_name[VCPU_THREAD_NAME_SIZE];
1299
1300 cpu->thread = g_malloc0(sizeof(QemuThread));
1301 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1302 qemu_cond_init(cpu->halt_cond);
1303 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1304 cpu->cpu_index);
1305 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1306 QEMU_THREAD_JOINABLE);
1307 while (!cpu->created) {
1308 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1309 }
1310}
1311
1312void qemu_init_vcpu(CPUState *cpu)
1313{
1314 cpu->nr_cores = smp_cores;
1315 cpu->nr_threads = smp_threads;
1316 cpu->stopped = true;
1317 if (kvm_enabled()) {
1318 qemu_kvm_start_vcpu(cpu);
1319 } else if (tcg_enabled()) {
1320 qemu_tcg_init_vcpu(cpu);
1321 } else {
1322 qemu_dummy_start_vcpu(cpu);
1323 }
1324}
1325
1326void cpu_stop_current(void)
1327{
1328 if (current_cpu) {
1329 current_cpu->stop = false;
1330 current_cpu->stopped = true;
1331 cpu_exit(current_cpu);
1332 qemu_cond_signal(&qemu_pause_cond);
1333 }
1334}
1335
1336int vm_stop(RunState state)
1337{
1338 if (qemu_in_vcpu_thread()) {
1339 qemu_system_vmstop_request_prepare();
1340 qemu_system_vmstop_request(state);
1341
1342
1343
1344
1345 cpu_stop_current();
1346 return 0;
1347 }
1348
1349 return do_vm_stop(state);
1350}
1351
1352
1353
1354int vm_stop_force_state(RunState state)
1355{
1356 if (runstate_is_running()) {
1357 return vm_stop(state);
1358 } else {
1359 runstate_set(state);
1360
1361
1362 return bdrv_flush_all();
1363 }
1364}
1365
1366static int tcg_cpu_exec(CPUState *cpu)
1367{
1368 int ret;
1369#ifdef CONFIG_PROFILER
1370 int64_t ti;
1371#endif
1372
1373#ifdef CONFIG_PROFILER
1374 ti = profile_getclock();
1375#endif
1376 if (use_icount) {
1377 int64_t count;
1378 int64_t deadline;
1379 int decr;
1380 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1381 + cpu->icount_extra);
1382 cpu->icount_decr.u16.low = 0;
1383 cpu->icount_extra = 0;
1384 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1385
1386
1387
1388
1389
1390
1391 if ((deadline < 0) || (deadline > INT32_MAX)) {
1392 deadline = INT32_MAX;
1393 }
1394
1395 count = qemu_icount_round(deadline);
1396 timers_state.qemu_icount += count;
1397 decr = (count > 0xffff) ? 0xffff : count;
1398 count -= decr;
1399 cpu->icount_decr.u16.low = decr;
1400 cpu->icount_extra = count;
1401 }
1402 ret = cpu_exec(cpu);
1403#ifdef CONFIG_PROFILER
1404 tcg_time += profile_getclock() - ti;
1405#endif
1406 if (use_icount) {
1407
1408
1409 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1410 + cpu->icount_extra);
1411 cpu->icount_decr.u32 = 0;
1412 cpu->icount_extra = 0;
1413 }
1414 return ret;
1415}
1416
1417static void tcg_exec_all(void)
1418{
1419 int r;
1420
1421
1422 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1423
1424 if (next_cpu == NULL) {
1425 next_cpu = first_cpu;
1426 }
1427 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1428 CPUState *cpu = next_cpu;
1429
1430 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1431 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1432
1433 if (cpu_can_run(cpu)) {
1434 r = tcg_cpu_exec(cpu);
1435 if (r == EXCP_DEBUG) {
1436 cpu_handle_guest_debug(cpu);
1437 break;
1438 }
1439 } else if (cpu->stop || cpu->stopped) {
1440 break;
1441 }
1442 }
1443 exit_request = 0;
1444}
1445
1446void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1447{
1448
1449#if defined(cpu_list)
1450 cpu_list(f, cpu_fprintf);
1451#endif
1452}
1453
1454CpuInfoList *qmp_query_cpus(Error **errp)
1455{
1456 CpuInfoList *head = NULL, *cur_item = NULL;
1457 CPUState *cpu;
1458
1459 CPU_FOREACH(cpu) {
1460 CpuInfoList *info;
1461#if defined(TARGET_I386)
1462 X86CPU *x86_cpu = X86_CPU(cpu);
1463 CPUX86State *env = &x86_cpu->env;
1464#elif defined(TARGET_PPC)
1465 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1466 CPUPPCState *env = &ppc_cpu->env;
1467#elif defined(TARGET_SPARC)
1468 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1469 CPUSPARCState *env = &sparc_cpu->env;
1470#elif defined(TARGET_MIPS)
1471 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1472 CPUMIPSState *env = &mips_cpu->env;
1473#elif defined(TARGET_TRICORE)
1474 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1475 CPUTriCoreState *env = &tricore_cpu->env;
1476#endif
1477
1478 cpu_synchronize_state(cpu);
1479
1480 info = g_malloc0(sizeof(*info));
1481 info->value = g_malloc0(sizeof(*info->value));
1482 info->value->CPU = cpu->cpu_index;
1483 info->value->current = (cpu == first_cpu);
1484 info->value->halted = cpu->halted;
1485 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1486 info->value->thread_id = cpu->thread_id;
1487#if defined(TARGET_I386)
1488 info->value->has_pc = true;
1489 info->value->pc = env->eip + env->segs[R_CS].base;
1490#elif defined(TARGET_PPC)
1491 info->value->has_nip = true;
1492 info->value->nip = env->nip;
1493#elif defined(TARGET_SPARC)
1494 info->value->has_pc = true;
1495 info->value->pc = env->pc;
1496 info->value->has_npc = true;
1497 info->value->npc = env->npc;
1498#elif defined(TARGET_MIPS)
1499 info->value->has_PC = true;
1500 info->value->PC = env->active_tc.PC;
1501#elif defined(TARGET_TRICORE)
1502 info->value->has_PC = true;
1503 info->value->PC = env->PC;
1504#endif
1505
1506
1507 if (!cur_item) {
1508 head = cur_item = info;
1509 } else {
1510 cur_item->next = info;
1511 cur_item = info;
1512 }
1513 }
1514
1515 return head;
1516}
1517
1518void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1519 bool has_cpu, int64_t cpu_index, Error **errp)
1520{
1521 FILE *f;
1522 uint32_t l;
1523 CPUState *cpu;
1524 uint8_t buf[1024];
1525 int64_t orig_addr = addr, orig_size = size;
1526
1527 if (!has_cpu) {
1528 cpu_index = 0;
1529 }
1530
1531 cpu = qemu_get_cpu(cpu_index);
1532 if (cpu == NULL) {
1533 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1534 "a CPU number");
1535 return;
1536 }
1537
1538 f = fopen(filename, "wb");
1539 if (!f) {
1540 error_setg_file_open(errp, errno, filename);
1541 return;
1542 }
1543
1544 while (size != 0) {
1545 l = sizeof(buf);
1546 if (l > size)
1547 l = size;
1548 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1549 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1550 " specified", orig_addr, orig_size);
1551 goto exit;
1552 }
1553 if (fwrite(buf, 1, l, f) != l) {
1554 error_setg(errp, QERR_IO_ERROR);
1555 goto exit;
1556 }
1557 addr += l;
1558 size -= l;
1559 }
1560
1561exit:
1562 fclose(f);
1563}
1564
1565void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1566 Error **errp)
1567{
1568 FILE *f;
1569 uint32_t l;
1570 uint8_t buf[1024];
1571
1572 f = fopen(filename, "wb");
1573 if (!f) {
1574 error_setg_file_open(errp, errno, filename);
1575 return;
1576 }
1577
1578 while (size != 0) {
1579 l = sizeof(buf);
1580 if (l > size)
1581 l = size;
1582 cpu_physical_memory_read(addr, buf, l);
1583 if (fwrite(buf, 1, l, f) != l) {
1584 error_setg(errp, QERR_IO_ERROR);
1585 goto exit;
1586 }
1587 addr += l;
1588 size -= l;
1589 }
1590
1591exit:
1592 fclose(f);
1593}
1594
1595void qmp_inject_nmi(Error **errp)
1596{
1597#if defined(TARGET_I386)
1598 CPUState *cs;
1599
1600 CPU_FOREACH(cs) {
1601 X86CPU *cpu = X86_CPU(cs);
1602
1603 if (!cpu->apic_state) {
1604 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1605 } else {
1606 apic_deliver_nmi(cpu->apic_state);
1607 }
1608 }
1609#else
1610 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1611#endif
1612}
1613
1614void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1615{
1616 if (!use_icount) {
1617 return;
1618 }
1619
1620 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1621 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1622 if (icount_align_option) {
1623 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1624 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1625 } else {
1626 cpu_fprintf(f, "Max guest delay NA\n");
1627 cpu_fprintf(f, "Max guest advance NA\n");
1628 }
1629}
1630