1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26#include "config-host.h"
27
28#include "monitor/monitor.h"
29#include "qapi/qmp/qerror.h"
30#include "sysemu/sysemu.h"
31#include "exec/gdbstub.h"
32#include "sysemu/dma.h"
33#include "sysemu/kvm.h"
34#include "qmp-commands.h"
35
36#include "qemu/thread.h"
37#include "sysemu/cpus.h"
38#include "sysemu/qtest.h"
39#include "qemu/main-loop.h"
40#include "qemu/bitmap.h"
41#include "qemu/seqlock.h"
42#include "qapi-event.h"
43#include "hw/nmi.h"
44
45#ifndef _WIN32
46#include "qemu/compatfd.h"
47#endif
48
49#ifdef CONFIG_LINUX
50
51#include <sys/prctl.h>
52
53#ifndef PR_MCE_KILL
54#define PR_MCE_KILL 33
55#endif
56
57#ifndef PR_MCE_KILL_SET
58#define PR_MCE_KILL_SET 1
59#endif
60
61#ifndef PR_MCE_KILL_EARLY
62#define PR_MCE_KILL_EARLY 1
63#endif
64
65#endif
66
67static CPUState *next_cpu;
68int64_t max_delay;
69int64_t max_advance;
70
71bool cpu_is_stopped(CPUState *cpu)
72{
73 return cpu->stopped || !runstate_is_running();
74}
75
76static bool cpu_thread_is_idle(CPUState *cpu)
77{
78 if (cpu->stop || cpu->queued_work_first) {
79 return false;
80 }
81 if (cpu_is_stopped(cpu)) {
82 return true;
83 }
84 if (!cpu->halted || cpu_has_work(cpu) ||
85 kvm_halt_in_kernel()) {
86 return false;
87 }
88 return true;
89}
90
91static bool all_cpu_threads_idle(void)
92{
93 CPUState *cpu;
94
95 CPU_FOREACH(cpu) {
96 if (!cpu_thread_is_idle(cpu)) {
97 return false;
98 }
99 }
100 return true;
101}
102
103
104
105
106
107
108static int64_t vm_clock_warp_start = -1;
109
110static int icount_time_shift;
111
112#define MAX_ICOUNT_SHIFT 10
113
114static QEMUTimer *icount_rt_timer;
115static QEMUTimer *icount_vm_timer;
116static QEMUTimer *icount_warp_timer;
117
118typedef struct TimersState {
119
120 int64_t cpu_ticks_prev;
121 int64_t cpu_ticks_offset;
122
123
124
125
126 QemuSeqLock vm_clock_seqlock;
127 int64_t cpu_clock_offset;
128 int32_t cpu_ticks_enabled;
129 int64_t dummy;
130
131
132 int64_t qemu_icount_bias;
133
134 int64_t qemu_icount;
135} TimersState;
136
137static TimersState timers_state;
138
139
140static int64_t cpu_get_icount_locked(void)
141{
142 int64_t icount;
143 CPUState *cpu = current_cpu;
144
145 icount = timers_state.qemu_icount;
146 if (cpu) {
147 if (!cpu_can_do_io(cpu)) {
148 fprintf(stderr, "Bad clock read\n");
149 }
150 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
151 }
152 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
153}
154
155int64_t cpu_get_icount(void)
156{
157 int64_t icount;
158 unsigned start;
159
160 do {
161 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
162 icount = cpu_get_icount_locked();
163 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
164
165 return icount;
166}
167
168int64_t cpu_icount_to_ns(int64_t icount)
169{
170 return icount << icount_time_shift;
171}
172
173
174
175int64_t cpu_get_ticks(void)
176{
177 int64_t ticks;
178
179 if (use_icount) {
180 return cpu_get_icount();
181 }
182
183 ticks = timers_state.cpu_ticks_offset;
184 if (timers_state.cpu_ticks_enabled) {
185 ticks += cpu_get_real_ticks();
186 }
187
188 if (timers_state.cpu_ticks_prev > ticks) {
189
190
191 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
192 ticks = timers_state.cpu_ticks_prev;
193 }
194
195 timers_state.cpu_ticks_prev = ticks;
196 return ticks;
197}
198
199static int64_t cpu_get_clock_locked(void)
200{
201 int64_t ticks;
202
203 ticks = timers_state.cpu_clock_offset;
204 if (timers_state.cpu_ticks_enabled) {
205 ticks += get_clock();
206 }
207
208 return ticks;
209}
210
211
212int64_t cpu_get_clock(void)
213{
214 int64_t ti;
215 unsigned start;
216
217 do {
218 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
219 ti = cpu_get_clock_locked();
220 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
221
222 return ti;
223}
224
225
226int64_t cpu_get_clock_offset(void)
227{
228 int64_t ti;
229 unsigned start;
230
231 do {
232 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
233 ti = timers_state.cpu_clock_offset;
234 if (!timers_state.cpu_ticks_enabled) {
235 ti -= get_clock();
236 }
237 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
238
239 return -ti;
240}
241
242
243
244
245void cpu_enable_ticks(void)
246{
247
248 seqlock_write_lock(&timers_state.vm_clock_seqlock);
249 if (!timers_state.cpu_ticks_enabled) {
250 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
251 timers_state.cpu_clock_offset -= get_clock();
252 timers_state.cpu_ticks_enabled = 1;
253 }
254 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
255}
256
257
258
259
260
261void cpu_disable_ticks(void)
262{
263
264 seqlock_write_lock(&timers_state.vm_clock_seqlock);
265 if (timers_state.cpu_ticks_enabled) {
266 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
267 timers_state.cpu_clock_offset = cpu_get_clock_locked();
268 timers_state.cpu_ticks_enabled = 0;
269 }
270 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
271}
272
273
274
275
276
277#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
278
279static void icount_adjust(void)
280{
281 int64_t cur_time;
282 int64_t cur_icount;
283 int64_t delta;
284
285
286 static int64_t last_delta;
287
288
289 if (!runstate_is_running()) {
290 return;
291 }
292
293 seqlock_write_lock(&timers_state.vm_clock_seqlock);
294 cur_time = cpu_get_clock_locked();
295 cur_icount = cpu_get_icount_locked();
296
297 delta = cur_icount - cur_time;
298
299 if (delta > 0
300 && last_delta + ICOUNT_WOBBLE < delta * 2
301 && icount_time_shift > 0) {
302
303 icount_time_shift--;
304 }
305 if (delta < 0
306 && last_delta - ICOUNT_WOBBLE > delta * 2
307 && icount_time_shift < MAX_ICOUNT_SHIFT) {
308
309 icount_time_shift++;
310 }
311 last_delta = delta;
312 timers_state.qemu_icount_bias = cur_icount
313 - (timers_state.qemu_icount << icount_time_shift);
314 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
315}
316
317static void icount_adjust_rt(void *opaque)
318{
319 timer_mod(icount_rt_timer,
320 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
321 icount_adjust();
322}
323
324static void icount_adjust_vm(void *opaque)
325{
326 timer_mod(icount_vm_timer,
327 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
328 get_ticks_per_sec() / 10);
329 icount_adjust();
330}
331
332static int64_t qemu_icount_round(int64_t count)
333{
334 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
335}
336
337static void icount_warp_rt(void *opaque)
338{
339
340
341
342 if (atomic_read(&vm_clock_warp_start) == -1) {
343 return;
344 }
345
346 seqlock_write_lock(&timers_state.vm_clock_seqlock);
347 if (runstate_is_running()) {
348 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
349 int64_t warp_delta;
350
351 warp_delta = clock - vm_clock_warp_start;
352 if (use_icount == 2) {
353
354
355
356
357 int64_t cur_time = cpu_get_clock_locked();
358 int64_t cur_icount = cpu_get_icount_locked();
359 int64_t delta = cur_time - cur_icount;
360 warp_delta = MIN(warp_delta, delta);
361 }
362 timers_state.qemu_icount_bias += warp_delta;
363 }
364 vm_clock_warp_start = -1;
365 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
366
367 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
368 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
369 }
370}
371
372void qtest_clock_warp(int64_t dest)
373{
374 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
375 assert(qtest_enabled());
376 while (clock < dest) {
377 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
378 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
379 seqlock_write_lock(&timers_state.vm_clock_seqlock);
380 timers_state.qemu_icount_bias += warp;
381 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
382
383 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
384 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
385 }
386 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
387}
388
389void qemu_clock_warp(QEMUClockType type)
390{
391 int64_t clock;
392 int64_t deadline;
393
394
395
396
397
398
399 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
400 return;
401 }
402
403
404
405
406
407
408
409
410 icount_warp_rt(NULL);
411 timer_del(icount_warp_timer);
412 if (!all_cpu_threads_idle()) {
413 return;
414 }
415
416 if (qtest_enabled()) {
417
418 return;
419 }
420
421
422 clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
423 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
424 if (deadline < 0) {
425 return;
426 }
427
428 if (deadline > 0) {
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446 seqlock_write_lock(&timers_state.vm_clock_seqlock);
447 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
448 vm_clock_warp_start = clock;
449 }
450 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
451 timer_mod_anticipate(icount_warp_timer, clock + deadline);
452 } else if (deadline == 0) {
453 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
454 }
455}
456
457static bool icount_state_needed(void *opaque)
458{
459 return use_icount;
460}
461
462
463
464
465static const VMStateDescription icount_vmstate_timers = {
466 .name = "timer/icount",
467 .version_id = 1,
468 .minimum_version_id = 1,
469 .fields = (VMStateField[]) {
470 VMSTATE_INT64(qemu_icount_bias, TimersState),
471 VMSTATE_INT64(qemu_icount, TimersState),
472 VMSTATE_END_OF_LIST()
473 }
474};
475
476static const VMStateDescription vmstate_timers = {
477 .name = "timer",
478 .version_id = 2,
479 .minimum_version_id = 1,
480 .fields = (VMStateField[]) {
481 VMSTATE_INT64(cpu_ticks_offset, TimersState),
482 VMSTATE_INT64(dummy, TimersState),
483 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
484 VMSTATE_END_OF_LIST()
485 },
486 .subsections = (VMStateSubsection[]) {
487 {
488 .vmsd = &icount_vmstate_timers,
489 .needed = icount_state_needed,
490 }, {
491
492 }
493 }
494};
495
496void cpu_ticks_init(void)
497{
498 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
499 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
500}
501
502void configure_icount(QemuOpts *opts, Error **errp)
503{
504 const char *option;
505 char *rem_str = NULL;
506
507 option = qemu_opt_get(opts, "shift");
508 if (!option) {
509 if (qemu_opt_get(opts, "align") != NULL) {
510 error_setg(errp, "Please specify shift option when using align");
511 }
512 return;
513 }
514 icount_align_option = qemu_opt_get_bool(opts, "align", false);
515 icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
516 icount_warp_rt, NULL);
517 if (strcmp(option, "auto") != 0) {
518 errno = 0;
519 icount_time_shift = strtol(option, &rem_str, 0);
520 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
521 error_setg(errp, "icount: Invalid shift value");
522 }
523 use_icount = 1;
524 return;
525 } else if (icount_align_option) {
526 error_setg(errp, "shift=auto and align=on are incompatible");
527 }
528
529 use_icount = 2;
530
531
532
533 icount_time_shift = 3;
534
535
536
537
538
539
540 icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
541 icount_adjust_rt, NULL);
542 timer_mod(icount_rt_timer,
543 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
544 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
545 icount_adjust_vm, NULL);
546 timer_mod(icount_vm_timer,
547 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
548 get_ticks_per_sec() / 10);
549}
550
551
552void hw_error(const char *fmt, ...)
553{
554 va_list ap;
555 CPUState *cpu;
556
557 va_start(ap, fmt);
558 fprintf(stderr, "qemu: hardware error: ");
559 vfprintf(stderr, fmt, ap);
560 fprintf(stderr, "\n");
561 CPU_FOREACH(cpu) {
562 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
563 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
564 }
565 va_end(ap);
566 abort();
567}
568
569void cpu_synchronize_all_states(void)
570{
571 CPUState *cpu;
572
573 CPU_FOREACH(cpu) {
574 cpu_synchronize_state(cpu);
575 }
576}
577
578void cpu_synchronize_all_post_reset(void)
579{
580 CPUState *cpu;
581
582 CPU_FOREACH(cpu) {
583 cpu_synchronize_post_reset(cpu);
584 }
585}
586
587void cpu_synchronize_all_post_init(void)
588{
589 CPUState *cpu;
590
591 CPU_FOREACH(cpu) {
592 cpu_synchronize_post_init(cpu);
593 }
594}
595
596void cpu_clean_all_dirty(void)
597{
598 CPUState *cpu;
599
600 CPU_FOREACH(cpu) {
601 cpu_clean_state(cpu);
602 }
603}
604
605static int do_vm_stop(RunState state)
606{
607 int ret = 0;
608
609 if (runstate_is_running()) {
610 cpu_disable_ticks();
611 pause_all_vcpus();
612 runstate_set(state);
613 vm_state_notify(0, state);
614 qapi_event_send_stop(&error_abort);
615 }
616
617 bdrv_drain_all();
618 ret = bdrv_flush_all();
619
620 return ret;
621}
622
623static bool cpu_can_run(CPUState *cpu)
624{
625 if (cpu->stop) {
626 return false;
627 }
628 if (cpu_is_stopped(cpu)) {
629 return false;
630 }
631 return true;
632}
633
634static void cpu_handle_guest_debug(CPUState *cpu)
635{
636 gdb_set_stop_cpu(cpu);
637 qemu_system_debug_request();
638 cpu->stopped = true;
639}
640
641static void cpu_signal(int sig)
642{
643 if (current_cpu) {
644 cpu_exit(current_cpu);
645 }
646 exit_request = 1;
647}
648
649#ifdef CONFIG_LINUX
650static void sigbus_reraise(void)
651{
652 sigset_t set;
653 struct sigaction action;
654
655 memset(&action, 0, sizeof(action));
656 action.sa_handler = SIG_DFL;
657 if (!sigaction(SIGBUS, &action, NULL)) {
658 raise(SIGBUS);
659 sigemptyset(&set);
660 sigaddset(&set, SIGBUS);
661 sigprocmask(SIG_UNBLOCK, &set, NULL);
662 }
663 perror("Failed to re-raise SIGBUS!\n");
664 abort();
665}
666
667static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
668 void *ctx)
669{
670 if (kvm_on_sigbus(siginfo->ssi_code,
671 (void *)(intptr_t)siginfo->ssi_addr)) {
672 sigbus_reraise();
673 }
674}
675
676static void qemu_init_sigbus(void)
677{
678 struct sigaction action;
679
680 memset(&action, 0, sizeof(action));
681 action.sa_flags = SA_SIGINFO;
682 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
683 sigaction(SIGBUS, &action, NULL);
684
685 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
686}
687
688static void qemu_kvm_eat_signals(CPUState *cpu)
689{
690 struct timespec ts = { 0, 0 };
691 siginfo_t siginfo;
692 sigset_t waitset;
693 sigset_t chkset;
694 int r;
695
696 sigemptyset(&waitset);
697 sigaddset(&waitset, SIG_IPI);
698 sigaddset(&waitset, SIGBUS);
699
700 do {
701 r = sigtimedwait(&waitset, &siginfo, &ts);
702 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
703 perror("sigtimedwait");
704 exit(1);
705 }
706
707 switch (r) {
708 case SIGBUS:
709 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
710 sigbus_reraise();
711 }
712 break;
713 default:
714 break;
715 }
716
717 r = sigpending(&chkset);
718 if (r == -1) {
719 perror("sigpending");
720 exit(1);
721 }
722 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
723}
724
725#else
726
727static void qemu_init_sigbus(void)
728{
729}
730
731static void qemu_kvm_eat_signals(CPUState *cpu)
732{
733}
734#endif
735
736#ifndef _WIN32
737static void dummy_signal(int sig)
738{
739}
740
741static void qemu_kvm_init_cpu_signals(CPUState *cpu)
742{
743 int r;
744 sigset_t set;
745 struct sigaction sigact;
746
747 memset(&sigact, 0, sizeof(sigact));
748 sigact.sa_handler = dummy_signal;
749 sigaction(SIG_IPI, &sigact, NULL);
750
751 pthread_sigmask(SIG_BLOCK, NULL, &set);
752 sigdelset(&set, SIG_IPI);
753 sigdelset(&set, SIGBUS);
754 r = kvm_set_signal_mask(cpu, &set);
755 if (r) {
756 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
757 exit(1);
758 }
759}
760
761static void qemu_tcg_init_cpu_signals(void)
762{
763 sigset_t set;
764 struct sigaction sigact;
765
766 memset(&sigact, 0, sizeof(sigact));
767 sigact.sa_handler = cpu_signal;
768 sigaction(SIG_IPI, &sigact, NULL);
769
770 sigemptyset(&set);
771 sigaddset(&set, SIG_IPI);
772 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
773}
774
775#else
776static void qemu_kvm_init_cpu_signals(CPUState *cpu)
777{
778 abort();
779}
780
781static void qemu_tcg_init_cpu_signals(void)
782{
783}
784#endif
785
786static QemuMutex qemu_global_mutex;
787static QemuCond qemu_io_proceeded_cond;
788static bool iothread_requesting_mutex;
789
790static QemuThread io_thread;
791
792static QemuThread *tcg_cpu_thread;
793static QemuCond *tcg_halt_cond;
794
795
796static QemuCond qemu_cpu_cond;
797
798static QemuCond qemu_pause_cond;
799static QemuCond qemu_work_cond;
800
801void qemu_init_cpu_loop(void)
802{
803 qemu_init_sigbus();
804 qemu_cond_init(&qemu_cpu_cond);
805 qemu_cond_init(&qemu_pause_cond);
806 qemu_cond_init(&qemu_work_cond);
807 qemu_cond_init(&qemu_io_proceeded_cond);
808 qemu_mutex_init(&qemu_global_mutex);
809
810 qemu_thread_get_self(&io_thread);
811}
812
813void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
814{
815 struct qemu_work_item wi;
816
817 if (qemu_cpu_is_self(cpu)) {
818 func(data);
819 return;
820 }
821
822 wi.func = func;
823 wi.data = data;
824 wi.free = false;
825 if (cpu->queued_work_first == NULL) {
826 cpu->queued_work_first = &wi;
827 } else {
828 cpu->queued_work_last->next = &wi;
829 }
830 cpu->queued_work_last = &wi;
831 wi.next = NULL;
832 wi.done = false;
833
834 qemu_cpu_kick(cpu);
835 while (!wi.done) {
836 CPUState *self_cpu = current_cpu;
837
838 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
839 current_cpu = self_cpu;
840 }
841}
842
843void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
844{
845 struct qemu_work_item *wi;
846
847 if (qemu_cpu_is_self(cpu)) {
848 func(data);
849 return;
850 }
851
852 wi = g_malloc0(sizeof(struct qemu_work_item));
853 wi->func = func;
854 wi->data = data;
855 wi->free = true;
856 if (cpu->queued_work_first == NULL) {
857 cpu->queued_work_first = wi;
858 } else {
859 cpu->queued_work_last->next = wi;
860 }
861 cpu->queued_work_last = wi;
862 wi->next = NULL;
863 wi->done = false;
864
865 qemu_cpu_kick(cpu);
866}
867
868static void flush_queued_work(CPUState *cpu)
869{
870 struct qemu_work_item *wi;
871
872 if (cpu->queued_work_first == NULL) {
873 return;
874 }
875
876 while ((wi = cpu->queued_work_first)) {
877 cpu->queued_work_first = wi->next;
878 wi->func(wi->data);
879 wi->done = true;
880 if (wi->free) {
881 g_free(wi);
882 }
883 }
884 cpu->queued_work_last = NULL;
885 qemu_cond_broadcast(&qemu_work_cond);
886}
887
888static void qemu_wait_io_event_common(CPUState *cpu)
889{
890 if (cpu->stop) {
891 cpu->stop = false;
892 cpu->stopped = true;
893 qemu_cond_signal(&qemu_pause_cond);
894 }
895 flush_queued_work(cpu);
896 cpu->thread_kicked = false;
897}
898
899static void qemu_tcg_wait_io_event(void)
900{
901 CPUState *cpu;
902
903 while (all_cpu_threads_idle()) {
904
905
906 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
907 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
908 }
909
910 while (iothread_requesting_mutex) {
911 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
912 }
913
914 CPU_FOREACH(cpu) {
915 qemu_wait_io_event_common(cpu);
916 }
917}
918
919static void qemu_kvm_wait_io_event(CPUState *cpu)
920{
921 while (cpu_thread_is_idle(cpu)) {
922 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
923 }
924
925 qemu_kvm_eat_signals(cpu);
926 qemu_wait_io_event_common(cpu);
927}
928
929static void *qemu_kvm_cpu_thread_fn(void *arg)
930{
931 CPUState *cpu = arg;
932 int r;
933
934 qemu_mutex_lock(&qemu_global_mutex);
935 qemu_thread_get_self(cpu->thread);
936 cpu->thread_id = qemu_get_thread_id();
937 current_cpu = cpu;
938
939 r = kvm_init_vcpu(cpu);
940 if (r < 0) {
941 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
942 exit(1);
943 }
944
945 qemu_kvm_init_cpu_signals(cpu);
946
947
948 cpu->created = true;
949 qemu_cond_signal(&qemu_cpu_cond);
950
951 while (1) {
952 if (cpu_can_run(cpu)) {
953 r = kvm_cpu_exec(cpu);
954 if (r == EXCP_DEBUG) {
955 cpu_handle_guest_debug(cpu);
956 }
957 }
958 qemu_kvm_wait_io_event(cpu);
959 }
960
961 return NULL;
962}
963
964static void *qemu_dummy_cpu_thread_fn(void *arg)
965{
966#ifdef _WIN32
967 fprintf(stderr, "qtest is not supported under Windows\n");
968 exit(1);
969#else
970 CPUState *cpu = arg;
971 sigset_t waitset;
972 int r;
973
974 qemu_mutex_lock_iothread();
975 qemu_thread_get_self(cpu->thread);
976 cpu->thread_id = qemu_get_thread_id();
977
978 sigemptyset(&waitset);
979 sigaddset(&waitset, SIG_IPI);
980
981
982 cpu->created = true;
983 qemu_cond_signal(&qemu_cpu_cond);
984
985 current_cpu = cpu;
986 while (1) {
987 current_cpu = NULL;
988 qemu_mutex_unlock_iothread();
989 do {
990 int sig;
991 r = sigwait(&waitset, &sig);
992 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
993 if (r == -1) {
994 perror("sigwait");
995 exit(1);
996 }
997 qemu_mutex_lock_iothread();
998 current_cpu = cpu;
999 qemu_wait_io_event_common(cpu);
1000 }
1001
1002 return NULL;
1003#endif
1004}
1005
1006static void tcg_exec_all(void);
1007
1008static void *qemu_tcg_cpu_thread_fn(void *arg)
1009{
1010 CPUState *cpu = arg;
1011
1012 qemu_tcg_init_cpu_signals();
1013 qemu_thread_get_self(cpu->thread);
1014
1015 qemu_mutex_lock(&qemu_global_mutex);
1016 CPU_FOREACH(cpu) {
1017 cpu->thread_id = qemu_get_thread_id();
1018 cpu->created = true;
1019 }
1020 qemu_cond_signal(&qemu_cpu_cond);
1021
1022
1023 while (QTAILQ_FIRST(&cpus)->stopped) {
1024 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
1025
1026
1027 CPU_FOREACH(cpu) {
1028 qemu_wait_io_event_common(cpu);
1029 }
1030 }
1031
1032 while (1) {
1033 tcg_exec_all();
1034
1035 if (use_icount) {
1036 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1037
1038 if (deadline == 0) {
1039 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1040 }
1041 }
1042 qemu_tcg_wait_io_event();
1043 }
1044
1045 return NULL;
1046}
1047
1048static void qemu_cpu_kick_thread(CPUState *cpu)
1049{
1050#ifndef _WIN32
1051 int err;
1052
1053 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1054 if (err) {
1055 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1056 exit(1);
1057 }
1058#else
1059 if (!qemu_cpu_is_self(cpu)) {
1060 CONTEXT tcgContext;
1061
1062 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
1063 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1064 GetLastError());
1065 exit(1);
1066 }
1067
1068
1069
1070
1071 tcgContext.ContextFlags = CONTEXT_CONTROL;
1072 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1073 continue;
1074 }
1075
1076 cpu_signal(0);
1077
1078 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
1079 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1080 GetLastError());
1081 exit(1);
1082 }
1083 }
1084#endif
1085}
1086
1087void qemu_cpu_kick(CPUState *cpu)
1088{
1089 qemu_cond_broadcast(cpu->halt_cond);
1090 if (!tcg_enabled() && !cpu->thread_kicked) {
1091 qemu_cpu_kick_thread(cpu);
1092 cpu->thread_kicked = true;
1093 }
1094}
1095
1096void qemu_cpu_kick_self(void)
1097{
1098#ifndef _WIN32
1099 assert(current_cpu);
1100
1101 if (!current_cpu->thread_kicked) {
1102 qemu_cpu_kick_thread(current_cpu);
1103 current_cpu->thread_kicked = true;
1104 }
1105#else
1106 abort();
1107#endif
1108}
1109
1110bool qemu_cpu_is_self(CPUState *cpu)
1111{
1112 return qemu_thread_is_self(cpu->thread);
1113}
1114
1115static bool qemu_in_vcpu_thread(void)
1116{
1117 return current_cpu && qemu_cpu_is_self(current_cpu);
1118}
1119
1120void qemu_mutex_lock_iothread(void)
1121{
1122 if (!tcg_enabled()) {
1123 qemu_mutex_lock(&qemu_global_mutex);
1124 } else {
1125 iothread_requesting_mutex = true;
1126 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1127 qemu_cpu_kick_thread(first_cpu);
1128 qemu_mutex_lock(&qemu_global_mutex);
1129 }
1130 iothread_requesting_mutex = false;
1131 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1132 }
1133}
1134
1135void qemu_mutex_unlock_iothread(void)
1136{
1137 qemu_mutex_unlock(&qemu_global_mutex);
1138}
1139
1140static int all_vcpus_paused(void)
1141{
1142 CPUState *cpu;
1143
1144 CPU_FOREACH(cpu) {
1145 if (!cpu->stopped) {
1146 return 0;
1147 }
1148 }
1149
1150 return 1;
1151}
1152
1153void pause_all_vcpus(void)
1154{
1155 CPUState *cpu;
1156
1157 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1158 CPU_FOREACH(cpu) {
1159 cpu->stop = true;
1160 qemu_cpu_kick(cpu);
1161 }
1162
1163 if (qemu_in_vcpu_thread()) {
1164 cpu_stop_current();
1165 if (!kvm_enabled()) {
1166 CPU_FOREACH(cpu) {
1167 cpu->stop = false;
1168 cpu->stopped = true;
1169 }
1170 return;
1171 }
1172 }
1173
1174 while (!all_vcpus_paused()) {
1175 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1176 CPU_FOREACH(cpu) {
1177 qemu_cpu_kick(cpu);
1178 }
1179 }
1180}
1181
1182void cpu_resume(CPUState *cpu)
1183{
1184 cpu->stop = false;
1185 cpu->stopped = false;
1186 qemu_cpu_kick(cpu);
1187}
1188
1189void resume_all_vcpus(void)
1190{
1191 CPUState *cpu;
1192
1193 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1194 CPU_FOREACH(cpu) {
1195 cpu_resume(cpu);
1196 }
1197}
1198
1199
1200#define VCPU_THREAD_NAME_SIZE 16
1201
1202static void qemu_tcg_init_vcpu(CPUState *cpu)
1203{
1204 char thread_name[VCPU_THREAD_NAME_SIZE];
1205
1206 tcg_cpu_address_space_init(cpu, cpu->as);
1207
1208
1209 if (!tcg_cpu_thread) {
1210 cpu->thread = g_malloc0(sizeof(QemuThread));
1211 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1212 qemu_cond_init(cpu->halt_cond);
1213 tcg_halt_cond = cpu->halt_cond;
1214 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1215 cpu->cpu_index);
1216 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1217 cpu, QEMU_THREAD_JOINABLE);
1218#ifdef _WIN32
1219 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1220#endif
1221 while (!cpu->created) {
1222 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1223 }
1224 tcg_cpu_thread = cpu->thread;
1225 } else {
1226 cpu->thread = tcg_cpu_thread;
1227 cpu->halt_cond = tcg_halt_cond;
1228 }
1229}
1230
1231static void qemu_kvm_start_vcpu(CPUState *cpu)
1232{
1233 char thread_name[VCPU_THREAD_NAME_SIZE];
1234
1235 cpu->thread = g_malloc0(sizeof(QemuThread));
1236 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1237 qemu_cond_init(cpu->halt_cond);
1238 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1239 cpu->cpu_index);
1240 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1241 cpu, QEMU_THREAD_JOINABLE);
1242 while (!cpu->created) {
1243 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1244 }
1245}
1246
1247static void qemu_dummy_start_vcpu(CPUState *cpu)
1248{
1249 char thread_name[VCPU_THREAD_NAME_SIZE];
1250
1251 cpu->thread = g_malloc0(sizeof(QemuThread));
1252 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1253 qemu_cond_init(cpu->halt_cond);
1254 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1255 cpu->cpu_index);
1256 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1257 QEMU_THREAD_JOINABLE);
1258 while (!cpu->created) {
1259 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1260 }
1261}
1262
1263void qemu_init_vcpu(CPUState *cpu)
1264{
1265 cpu->nr_cores = smp_cores;
1266 cpu->nr_threads = smp_threads;
1267 cpu->stopped = true;
1268 if (kvm_enabled()) {
1269 qemu_kvm_start_vcpu(cpu);
1270 } else if (tcg_enabled()) {
1271 qemu_tcg_init_vcpu(cpu);
1272 } else {
1273 qemu_dummy_start_vcpu(cpu);
1274 }
1275}
1276
1277void cpu_stop_current(void)
1278{
1279 if (current_cpu) {
1280 current_cpu->stop = false;
1281 current_cpu->stopped = true;
1282 cpu_exit(current_cpu);
1283 qemu_cond_signal(&qemu_pause_cond);
1284 }
1285}
1286
1287int vm_stop(RunState state)
1288{
1289 if (qemu_in_vcpu_thread()) {
1290 qemu_system_vmstop_request_prepare();
1291 qemu_system_vmstop_request(state);
1292
1293
1294
1295
1296 cpu_stop_current();
1297 return 0;
1298 }
1299
1300 return do_vm_stop(state);
1301}
1302
1303
1304
1305int vm_stop_force_state(RunState state)
1306{
1307 if (runstate_is_running()) {
1308 return vm_stop(state);
1309 } else {
1310 runstate_set(state);
1311
1312
1313 return bdrv_flush_all();
1314 }
1315}
1316
1317static int tcg_cpu_exec(CPUArchState *env)
1318{
1319 CPUState *cpu = ENV_GET_CPU(env);
1320 int ret;
1321#ifdef CONFIG_PROFILER
1322 int64_t ti;
1323#endif
1324
1325#ifdef CONFIG_PROFILER
1326 ti = profile_getclock();
1327#endif
1328 if (use_icount) {
1329 int64_t count;
1330 int64_t deadline;
1331 int decr;
1332 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1333 + cpu->icount_extra);
1334 cpu->icount_decr.u16.low = 0;
1335 cpu->icount_extra = 0;
1336 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1337
1338
1339
1340
1341
1342
1343 if ((deadline < 0) || (deadline > INT32_MAX)) {
1344 deadline = INT32_MAX;
1345 }
1346
1347 count = qemu_icount_round(deadline);
1348 timers_state.qemu_icount += count;
1349 decr = (count > 0xffff) ? 0xffff : count;
1350 count -= decr;
1351 cpu->icount_decr.u16.low = decr;
1352 cpu->icount_extra = count;
1353 }
1354 ret = cpu_exec(env);
1355#ifdef CONFIG_PROFILER
1356 qemu_time += profile_getclock() - ti;
1357#endif
1358 if (use_icount) {
1359
1360
1361 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1362 + cpu->icount_extra);
1363 cpu->icount_decr.u32 = 0;
1364 cpu->icount_extra = 0;
1365 }
1366 return ret;
1367}
1368
1369static void tcg_exec_all(void)
1370{
1371 int r;
1372
1373
1374 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1375
1376 if (next_cpu == NULL) {
1377 next_cpu = first_cpu;
1378 }
1379 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1380 CPUState *cpu = next_cpu;
1381 CPUArchState *env = cpu->env_ptr;
1382
1383 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1384 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1385
1386 if (cpu_can_run(cpu)) {
1387 r = tcg_cpu_exec(env);
1388 if (r == EXCP_DEBUG) {
1389 cpu_handle_guest_debug(cpu);
1390 break;
1391 }
1392 } else if (cpu->stop || cpu->stopped) {
1393 break;
1394 }
1395 }
1396 exit_request = 0;
1397}
1398
1399void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1400{
1401
1402#if defined(cpu_list)
1403 cpu_list(f, cpu_fprintf);
1404#endif
1405}
1406
1407CpuInfoList *qmp_query_cpus(Error **errp)
1408{
1409 CpuInfoList *head = NULL, *cur_item = NULL;
1410 CPUState *cpu;
1411
1412 CPU_FOREACH(cpu) {
1413 CpuInfoList *info;
1414#if defined(TARGET_I386)
1415 X86CPU *x86_cpu = X86_CPU(cpu);
1416 CPUX86State *env = &x86_cpu->env;
1417#elif defined(TARGET_PPC)
1418 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1419 CPUPPCState *env = &ppc_cpu->env;
1420#elif defined(TARGET_SPARC)
1421 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1422 CPUSPARCState *env = &sparc_cpu->env;
1423#elif defined(TARGET_MIPS)
1424 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1425 CPUMIPSState *env = &mips_cpu->env;
1426#elif defined(TARGET_TRICORE)
1427 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1428 CPUTriCoreState *env = &tricore_cpu->env;
1429#endif
1430
1431 cpu_synchronize_state(cpu);
1432
1433 info = g_malloc0(sizeof(*info));
1434 info->value = g_malloc0(sizeof(*info->value));
1435 info->value->CPU = cpu->cpu_index;
1436 info->value->current = (cpu == first_cpu);
1437 info->value->halted = cpu->halted;
1438 info->value->thread_id = cpu->thread_id;
1439#if defined(TARGET_I386)
1440 info->value->has_pc = true;
1441 info->value->pc = env->eip + env->segs[R_CS].base;
1442#elif defined(TARGET_PPC)
1443 info->value->has_nip = true;
1444 info->value->nip = env->nip;
1445#elif defined(TARGET_SPARC)
1446 info->value->has_pc = true;
1447 info->value->pc = env->pc;
1448 info->value->has_npc = true;
1449 info->value->npc = env->npc;
1450#elif defined(TARGET_MIPS)
1451 info->value->has_PC = true;
1452 info->value->PC = env->active_tc.PC;
1453#elif defined(TARGET_TRICORE)
1454 info->value->has_PC = true;
1455 info->value->PC = env->PC;
1456#endif
1457
1458
1459 if (!cur_item) {
1460 head = cur_item = info;
1461 } else {
1462 cur_item->next = info;
1463 cur_item = info;
1464 }
1465 }
1466
1467 return head;
1468}
1469
1470void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1471 bool has_cpu, int64_t cpu_index, Error **errp)
1472{
1473 FILE *f;
1474 uint32_t l;
1475 CPUState *cpu;
1476 uint8_t buf[1024];
1477
1478 if (!has_cpu) {
1479 cpu_index = 0;
1480 }
1481
1482 cpu = qemu_get_cpu(cpu_index);
1483 if (cpu == NULL) {
1484 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1485 "a CPU number");
1486 return;
1487 }
1488
1489 f = fopen(filename, "wb");
1490 if (!f) {
1491 error_setg_file_open(errp, errno, filename);
1492 return;
1493 }
1494
1495 while (size != 0) {
1496 l = sizeof(buf);
1497 if (l > size)
1498 l = size;
1499 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1500 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1501 goto exit;
1502 }
1503 if (fwrite(buf, 1, l, f) != l) {
1504 error_set(errp, QERR_IO_ERROR);
1505 goto exit;
1506 }
1507 addr += l;
1508 size -= l;
1509 }
1510
1511exit:
1512 fclose(f);
1513}
1514
1515void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1516 Error **errp)
1517{
1518 FILE *f;
1519 uint32_t l;
1520 uint8_t buf[1024];
1521
1522 f = fopen(filename, "wb");
1523 if (!f) {
1524 error_setg_file_open(errp, errno, filename);
1525 return;
1526 }
1527
1528 while (size != 0) {
1529 l = sizeof(buf);
1530 if (l > size)
1531 l = size;
1532 cpu_physical_memory_read(addr, buf, l);
1533 if (fwrite(buf, 1, l, f) != l) {
1534 error_set(errp, QERR_IO_ERROR);
1535 goto exit;
1536 }
1537 addr += l;
1538 size -= l;
1539 }
1540
1541exit:
1542 fclose(f);
1543}
1544
1545void qmp_inject_nmi(Error **errp)
1546{
1547#if defined(TARGET_I386)
1548 CPUState *cs;
1549
1550 CPU_FOREACH(cs) {
1551 X86CPU *cpu = X86_CPU(cs);
1552
1553 if (!cpu->apic_state) {
1554 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1555 } else {
1556 apic_deliver_nmi(cpu->apic_state);
1557 }
1558 }
1559#else
1560 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1561#endif
1562}
1563
1564void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1565{
1566 if (!use_icount) {
1567 return;
1568 }
1569
1570 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1571 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1572 if (icount_align_option) {
1573 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1574 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1575 } else {
1576 cpu_fprintf(f, "Max guest delay NA\n");
1577 cpu_fprintf(f, "Max guest advance NA\n");
1578 }
1579}
1580