4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "qemu/error-report.h"
31 #include "sysemu/sysemu.h"
32 #include "exec/gdbstub.h"
33 #include "sysemu/dma.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/hax.h"
36 #include "qmp-commands.h"
38 #include "qemu/thread.h"
39 #include "sysemu/cpus.h"
40 #include "sysemu/qtest.h"
41 #include "qemu/main-loop.h"
42 #include "qemu/bitmap.h"
43 #include "qemu/seqlock.h"
44 #include "qapi-event.h"
48 #include "qemu/compatfd.h"
53 #include <sys/prctl.h>
56 #define PR_MCE_KILL 33
59 #ifndef PR_MCE_KILL_SET
60 #define PR_MCE_KILL_SET 1
63 #ifndef PR_MCE_KILL_EARLY
64 #define PR_MCE_KILL_EARLY 1
67 #endif /* CONFIG_LINUX */
69 static CPUState *next_cpu;
73 bool cpu_is_stopped(CPUState *cpu)
75 return cpu->stopped || !runstate_is_running();
78 static bool cpu_thread_is_idle(CPUState *cpu)
80 if (cpu->stop || cpu->queued_work_first) {
83 if (cpu_is_stopped(cpu)) {
86 if (!cpu->halted || cpu_has_work(cpu) ||
87 kvm_halt_in_kernel()) {
93 static bool all_cpu_threads_idle(void)
98 if (!cpu_thread_is_idle(cpu)) {
105 /***********************************************************/
106 /* guest cycle counter */
108 /* Protected by TimersState seqlock */
110 static bool icount_sleep = true;
111 static int64_t vm_clock_warp_start = -1;
112 /* Conversion factor from emulated instructions to virtual clock ticks. */
113 static int icount_time_shift;
114 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
115 #define MAX_ICOUNT_SHIFT 10
117 static QEMUTimer *icount_rt_timer;
118 static QEMUTimer *icount_vm_timer;
119 static QEMUTimer *icount_warp_timer;
121 typedef struct TimersState {
122 /* Protected by BQL. */
123 int64_t cpu_ticks_prev;
124 int64_t cpu_ticks_offset;
126 /* cpu_clock_offset can be read out of BQL, so protect it with
129 QemuSeqLock vm_clock_seqlock;
130 int64_t cpu_clock_offset;
131 int32_t cpu_ticks_enabled;
134 /* Compensate for varying guest execution speed. */
135 int64_t qemu_icount_bias;
136 /* Only written by TCG thread */
140 static TimersState timers_state;
142 int64_t cpu_get_icount_raw(void)
145 CPUState *cpu = current_cpu;
147 icount = timers_state.qemu_icount;
149 if (!cpu_can_do_io(cpu)) {
150 fprintf(stderr, "Bad icount read\n");
153 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
158 /* Return the virtual CPU time, based on the instruction counter. */
159 static int64_t cpu_get_icount_locked(void)
161 int64_t icount = cpu_get_icount_raw();
162 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
165 int64_t cpu_get_icount(void)
171 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
172 icount = cpu_get_icount_locked();
173 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
178 int64_t cpu_icount_to_ns(int64_t icount)
180 return icount << icount_time_shift;
183 /* return the host CPU cycle counter and handle stop/restart */
184 /* Caller must hold the BQL */
185 int64_t cpu_get_ticks(void)
190 return cpu_get_icount();
193 ticks = timers_state.cpu_ticks_offset;
194 if (timers_state.cpu_ticks_enabled) {
195 ticks += cpu_get_real_ticks();
198 if (timers_state.cpu_ticks_prev > ticks) {
199 /* Note: non increasing ticks may happen if the host uses
201 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
202 ticks = timers_state.cpu_ticks_prev;
205 timers_state.cpu_ticks_prev = ticks;
209 static int64_t cpu_get_clock_locked(void)
213 ticks = timers_state.cpu_clock_offset;
214 if (timers_state.cpu_ticks_enabled) {
215 ticks += get_clock();
221 /* return the host CPU monotonic timer and handle stop/restart */
222 int64_t cpu_get_clock(void)
228 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
229 ti = cpu_get_clock_locked();
230 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
235 /* enable cpu_get_ticks()
236 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
238 void cpu_enable_ticks(void)
240 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
241 seqlock_write_lock(&timers_state.vm_clock_seqlock);
242 if (!timers_state.cpu_ticks_enabled) {
243 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
244 timers_state.cpu_clock_offset -= get_clock();
245 timers_state.cpu_ticks_enabled = 1;
247 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
250 /* disable cpu_get_ticks() : the clock is stopped. You must not call
251 * cpu_get_ticks() after that.
252 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
254 void cpu_disable_ticks(void)
256 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
257 seqlock_write_lock(&timers_state.vm_clock_seqlock);
258 if (timers_state.cpu_ticks_enabled) {
259 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
260 timers_state.cpu_clock_offset = cpu_get_clock_locked();
261 timers_state.cpu_ticks_enabled = 0;
263 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
266 /* Correlation between real and virtual time is always going to be
267 fairly approximate, so ignore small variation.
268 When the guest is idle real and virtual time will be aligned in
270 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
272 static void icount_adjust(void)
278 /* Protected by TimersState mutex. */
279 static int64_t last_delta;
281 /* If the VM is not running, then do nothing. */
282 if (!runstate_is_running()) {
286 seqlock_write_lock(&timers_state.vm_clock_seqlock);
287 cur_time = cpu_get_clock_locked();
288 cur_icount = cpu_get_icount_locked();
290 delta = cur_icount - cur_time;
291 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
293 && last_delta + ICOUNT_WOBBLE < delta * 2
294 && icount_time_shift > 0) {
295 /* The guest is getting too far ahead. Slow time down. */
299 && last_delta - ICOUNT_WOBBLE > delta * 2
300 && icount_time_shift < MAX_ICOUNT_SHIFT) {
301 /* The guest is getting too far behind. Speed time up. */
305 timers_state.qemu_icount_bias = cur_icount
306 - (timers_state.qemu_icount << icount_time_shift);
307 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
310 static void icount_adjust_rt(void *opaque)
312 timer_mod(icount_rt_timer,
313 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
317 static void icount_adjust_vm(void *opaque)
319 timer_mod(icount_vm_timer,
320 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
321 get_ticks_per_sec() / 10);
325 static int64_t qemu_icount_round(int64_t count)
327 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
330 static void icount_warp_rt(void *opaque)
332 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
333 * changes from -1 to another value, so the race here is okay.
335 if (atomic_read(&vm_clock_warp_start) == -1) {
339 seqlock_write_lock(&timers_state.vm_clock_seqlock);
340 if (runstate_is_running()) {
341 int64_t clock = cpu_get_clock_locked();
344 warp_delta = clock - vm_clock_warp_start;
345 if (use_icount == 2) {
347 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
348 * far ahead of real time.
350 int64_t cur_icount = cpu_get_icount_locked();
351 int64_t delta = clock - cur_icount;
352 warp_delta = MIN(warp_delta, delta);
354 timers_state.qemu_icount_bias += warp_delta;
356 vm_clock_warp_start = -1;
357 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
359 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
360 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
364 void qtest_clock_warp(int64_t dest)
366 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
367 AioContext *aio_context;
368 assert(qtest_enabled());
369 aio_context = qemu_get_aio_context();
370 while (clock < dest) {
371 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
372 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
374 seqlock_write_lock(&timers_state.vm_clock_seqlock);
375 timers_state.qemu_icount_bias += warp;
376 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
378 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
379 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
380 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
382 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
385 void qemu_clock_warp(QEMUClockType type)
391 * There are too many global variables to make the "warp" behavior
392 * applicable to other clocks. But a clock argument removes the
393 * need for if statements all over the place.
395 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
401 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
402 * This ensures that the deadline for the timer is computed correctly
404 * This also makes sure that the insn counter is synchronized before
405 * the CPU starts running, in case the CPU is woken by an event other
406 * than the earliest QEMU_CLOCK_VIRTUAL timer.
408 icount_warp_rt(NULL);
409 timer_del(icount_warp_timer);
411 if (!all_cpu_threads_idle()) {
415 if (qtest_enabled()) {
416 /* When testing, qtest commands advance icount. */
420 /* We want to use the earliest deadline from ALL vm_clocks */
421 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
422 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
424 static bool notified;
425 if (!icount_sleep && !notified) {
426 error_report("WARNING: icount sleep disabled and no active timers");
434 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
435 * sleep. Otherwise, the CPU might be waiting for a future timer
436 * interrupt to wake it up, but the interrupt never comes because
437 * the vCPU isn't running any insns and thus doesn't advance the
438 * QEMU_CLOCK_VIRTUAL.
442 * We never let VCPUs sleep in no sleep icount mode.
443 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
444 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
445 * It is useful when we want a deterministic execution time,
446 * isolated from host latencies.
448 seqlock_write_lock(&timers_state.vm_clock_seqlock);
449 timers_state.qemu_icount_bias += deadline;
450 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
451 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
454 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
455 * "real" time, (related to the time left until the next event) has
456 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
457 * This avoids that the warps are visible externally; for example,
458 * you will not be sending network packets continuously instead of
461 seqlock_write_lock(&timers_state.vm_clock_seqlock);
462 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
463 vm_clock_warp_start = clock;
465 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
466 timer_mod_anticipate(icount_warp_timer, clock + deadline);
468 } else if (deadline == 0) {
469 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
473 static bool icount_state_needed(void *opaque)
479 * This is a subsection for icount migration.
481 static const VMStateDescription icount_vmstate_timers = {
482 .name = "timer/icount",
484 .minimum_version_id = 1,
485 .needed = icount_state_needed,
486 .fields = (VMStateField[]) {
487 VMSTATE_INT64(qemu_icount_bias, TimersState),
488 VMSTATE_INT64(qemu_icount, TimersState),
489 VMSTATE_END_OF_LIST()
493 static const VMStateDescription vmstate_timers = {
496 .minimum_version_id = 1,
497 .fields = (VMStateField[]) {
498 VMSTATE_INT64(cpu_ticks_offset, TimersState),
499 VMSTATE_INT64(dummy, TimersState),
500 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
501 VMSTATE_END_OF_LIST()
503 .subsections = (const VMStateDescription*[]) {
504 &icount_vmstate_timers,
509 void cpu_ticks_init(void)
511 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
512 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
515 void configure_icount(QemuOpts *opts, Error **errp)
518 char *rem_str = NULL;
520 option = qemu_opt_get(opts, "shift");
522 if (qemu_opt_get(opts, "align") != NULL) {
523 error_setg(errp, "Please specify shift option when using align");
528 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
530 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
531 icount_warp_rt, NULL);
534 icount_align_option = qemu_opt_get_bool(opts, "align", false);
536 if (icount_align_option && !icount_sleep) {
537 error_setg(errp, "align=on and sleep=no are incompatible");
539 if (strcmp(option, "auto") != 0) {
541 icount_time_shift = strtol(option, &rem_str, 0);
542 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
543 error_setg(errp, "icount: Invalid shift value");
547 } else if (icount_align_option) {
548 error_setg(errp, "shift=auto and align=on are incompatible");
549 } else if (!icount_sleep) {
550 error_setg(errp, "shift=auto and sleep=no are incompatible");
555 /* 125MIPS seems a reasonable initial guess at the guest speed.
556 It will be corrected fairly quickly anyway. */
557 icount_time_shift = 3;
559 /* Have both realtime and virtual time triggers for speed adjustment.
560 The realtime trigger catches emulated time passing too slowly,
561 the virtual time trigger catches emulated time passing too fast.
562 Realtime triggers occur even when idle, so use them less frequently
564 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
565 icount_adjust_rt, NULL);
566 timer_mod(icount_rt_timer,
567 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
568 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
569 icount_adjust_vm, NULL);
570 timer_mod(icount_vm_timer,
571 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
572 get_ticks_per_sec() / 10);
575 /***********************************************************/
576 void hw_error(const char *fmt, ...)
582 fprintf(stderr, "qemu: hardware error: ");
583 vfprintf(stderr, fmt, ap);
584 fprintf(stderr, "\n");
586 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
587 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
593 void cpu_synchronize_all_states(void)
598 cpu_synchronize_state(cpu);
602 void cpu_synchronize_all_post_reset(void)
607 cpu_synchronize_post_reset(cpu);
609 if (hax_enabled() && hax_ug_platform())
610 hax_cpu_synchronize_post_reset(cpu);
615 void cpu_synchronize_all_post_init(void)
620 cpu_synchronize_post_init(cpu);
622 if (hax_enabled() && hax_ug_platform())
623 hax_cpu_synchronize_post_init(cpu);
628 void cpu_clean_all_dirty(void)
633 cpu_clean_state(cpu);
637 static int do_vm_stop(RunState state)
641 if (runstate_is_running()) {
645 vm_state_notify(0, state);
646 qapi_event_send_stop(&error_abort);
650 ret = bdrv_flush_all();
655 static bool cpu_can_run(CPUState *cpu)
660 if (cpu_is_stopped(cpu)) {
666 static void cpu_handle_guest_debug(CPUState *cpu)
668 gdb_set_stop_cpu(cpu);
669 qemu_system_debug_request();
673 static void cpu_signal(int sig)
676 cpu_exit(current_cpu);
682 static void sigbus_reraise(void)
685 struct sigaction action;
687 memset(&action, 0, sizeof(action));
688 action.sa_handler = SIG_DFL;
689 if (!sigaction(SIGBUS, &action, NULL)) {
692 sigaddset(&set, SIGBUS);
693 sigprocmask(SIG_UNBLOCK, &set, NULL);
695 perror("Failed to re-raise SIGBUS!\n");
699 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
702 if (kvm_on_sigbus(siginfo->ssi_code,
703 (void *)(intptr_t)siginfo->ssi_addr)) {
708 static void qemu_init_sigbus(void)
710 struct sigaction action;
712 memset(&action, 0, sizeof(action));
713 action.sa_flags = SA_SIGINFO;
714 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
715 sigaction(SIGBUS, &action, NULL);
717 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
720 static void qemu_kvm_eat_signals(CPUState *cpu)
722 struct timespec ts = { 0, 0 };
728 sigemptyset(&waitset);
729 sigaddset(&waitset, SIG_IPI);
730 sigaddset(&waitset, SIGBUS);
733 r = sigtimedwait(&waitset, &siginfo, &ts);
734 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
735 perror("sigtimedwait");
741 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
749 r = sigpending(&chkset);
751 perror("sigpending");
754 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
757 #else /* !CONFIG_LINUX */
759 static void qemu_init_sigbus(void)
763 static void qemu_kvm_eat_signals(CPUState *cpu)
766 #endif /* !CONFIG_LINUX */
769 static void dummy_signal(int sig)
773 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
777 struct sigaction sigact;
779 memset(&sigact, 0, sizeof(sigact));
780 sigact.sa_handler = dummy_signal;
781 sigaction(SIG_IPI, &sigact, NULL);
783 pthread_sigmask(SIG_BLOCK, NULL, &set);
784 sigdelset(&set, SIG_IPI);
785 sigdelset(&set, SIGBUS);
786 r = kvm_set_signal_mask(cpu, &set);
788 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
793 static void qemu_tcg_init_cpu_signals(void)
796 struct sigaction sigact;
798 memset(&sigact, 0, sizeof(sigact));
799 sigact.sa_handler = cpu_signal;
800 sigaction(SIG_IPI, &sigact, NULL);
803 sigaddset(&set, SIG_IPI);
804 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
808 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
813 static void qemu_tcg_init_cpu_signals(void)
818 static QemuMutex qemu_global_mutex;
819 static QemuCond qemu_io_proceeded_cond;
820 static unsigned iothread_requesting_mutex;
822 static QemuThread io_thread;
824 static QemuThread *tcg_cpu_thread;
825 static QemuCond *tcg_halt_cond;
828 static QemuCond qemu_cpu_cond;
830 static QemuCond qemu_pause_cond;
831 static QemuCond qemu_work_cond;
833 void qemu_init_cpu_loop(void)
836 qemu_cond_init(&qemu_cpu_cond);
837 qemu_cond_init(&qemu_pause_cond);
838 qemu_cond_init(&qemu_work_cond);
839 qemu_cond_init(&qemu_io_proceeded_cond);
840 qemu_mutex_init(&qemu_global_mutex);
842 qemu_thread_get_self(&io_thread);
845 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
847 struct qemu_work_item wi;
849 if (qemu_cpu_is_self(cpu)) {
857 if (cpu->queued_work_first == NULL) {
858 cpu->queued_work_first = &wi;
860 cpu->queued_work_last->next = &wi;
862 cpu->queued_work_last = &wi;
868 CPUState *self_cpu = current_cpu;
870 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
871 current_cpu = self_cpu;
875 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
877 struct qemu_work_item *wi;
879 if (qemu_cpu_is_self(cpu)) {
884 wi = g_malloc0(sizeof(struct qemu_work_item));
888 if (cpu->queued_work_first == NULL) {
889 cpu->queued_work_first = wi;
891 cpu->queued_work_last->next = wi;
893 cpu->queued_work_last = wi;
900 static void flush_queued_work(CPUState *cpu)
902 struct qemu_work_item *wi;
904 if (cpu->queued_work_first == NULL) {
908 while ((wi = cpu->queued_work_first)) {
909 cpu->queued_work_first = wi->next;
916 cpu->queued_work_last = NULL;
917 qemu_cond_broadcast(&qemu_work_cond);
920 static void qemu_wait_io_event_common(CPUState *cpu)
925 qemu_cond_signal(&qemu_pause_cond);
927 flush_queued_work(cpu);
928 cpu->thread_kicked = false;
931 static void qemu_tcg_wait_io_event(void)
935 while (all_cpu_threads_idle()) {
936 /* Start accounting real time to the virtual clock if the CPUs
938 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
939 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
942 while (iothread_requesting_mutex) {
943 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
947 qemu_wait_io_event_common(cpu);
952 static void qemu_hax_wait_io_event(CPUState *cpu)
954 while (cpu_thread_is_idle(cpu)) {
955 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
958 qemu_wait_io_event_common(cpu);
962 static void qemu_kvm_wait_io_event(CPUState *cpu)
964 while (cpu_thread_is_idle(cpu)) {
965 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
968 qemu_kvm_eat_signals(cpu);
969 qemu_wait_io_event_common(cpu);
972 static void *qemu_kvm_cpu_thread_fn(void *arg)
977 rcu_register_thread();
979 qemu_mutex_lock_iothread();
980 qemu_thread_get_self(cpu->thread);
981 cpu->thread_id = qemu_get_thread_id();
985 r = kvm_init_vcpu(cpu);
987 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
991 qemu_kvm_init_cpu_signals(cpu);
993 /* signal CPU creation */
995 qemu_cond_signal(&qemu_cpu_cond);
998 if (cpu_can_run(cpu)) {
999 r = kvm_cpu_exec(cpu);
1000 if (r == EXCP_DEBUG) {
1001 cpu_handle_guest_debug(cpu);
1004 qemu_kvm_wait_io_event(cpu);
1010 static void *qemu_dummy_cpu_thread_fn(void *arg)
1013 fprintf(stderr, "qtest is not supported under Windows\n");
1016 CPUState *cpu = arg;
1020 rcu_register_thread();
1022 qemu_mutex_lock_iothread();
1023 qemu_thread_get_self(cpu->thread);
1024 cpu->thread_id = qemu_get_thread_id();
1027 sigemptyset(&waitset);
1028 sigaddset(&waitset, SIG_IPI);
1030 /* signal CPU creation */
1031 cpu->created = true;
1032 qemu_cond_signal(&qemu_cpu_cond);
1037 qemu_mutex_unlock_iothread();
1040 r = sigwait(&waitset, &sig);
1041 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1046 qemu_mutex_lock_iothread();
1048 qemu_wait_io_event_common(cpu);
1055 static void tcg_exec_all(void);
1057 static void *qemu_tcg_cpu_thread_fn(void *arg)
1059 CPUState *cpu = arg;
1061 rcu_register_thread();
1063 qemu_mutex_lock_iothread();
1064 qemu_tcg_init_cpu_signals();
1065 qemu_thread_get_self(cpu->thread);
1068 cpu->thread_id = qemu_get_thread_id();
1069 cpu->created = true;
1072 qemu_cond_signal(&qemu_cpu_cond);
1074 /* wait for initial kick-off after machine start */
1075 while (first_cpu->stopped) {
1076 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
1078 /* process any pending work */
1080 qemu_wait_io_event_common(cpu);
1084 /* process any pending work */
1091 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1093 if (deadline == 0) {
1094 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1097 qemu_tcg_wait_io_event();
1104 static void *qemu_hax_cpu_thread_fn(void *arg)
1106 CPUState *cpu = arg;
1108 qemu_thread_get_self(cpu->thread);
1109 qemu_mutex_lock(&qemu_global_mutex);
1111 cpu->thread_id = qemu_get_thread_id();
1112 cpu->created = true;
1117 qemu_cond_signal(&qemu_cpu_cond);
1120 if (cpu_can_run(cpu)) {
1121 r = hax_smp_cpu_exec(cpu);
1122 if (r == EXCP_DEBUG) {
1123 cpu_handle_guest_debug(cpu);
1126 qemu_hax_wait_io_event(cpu);
1132 static void qemu_cpu_kick_thread(CPUState *cpu)
1137 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1139 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1142 /* The cpu thread cannot catch it reliably when shutdown the guest on Mac.
1143 * We can double check it and resend it
1146 #ifdef CONFIG_DARWIN
1150 if (hax_enabled() && hax_ug_platform())
1151 cpu->exit_request = 1;
1154 if (!qemu_cpu_is_self(cpu)) {
1157 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
1158 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1163 /* On multi-core systems, we are not sure that the thread is actually
1164 * suspended until we can get the context.
1166 tcgContext.ContextFlags = CONTEXT_CONTROL;
1167 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1172 if(hax_enabled() && hax_ug_platform())
1173 cpu->exit_request = 1;
1175 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
1176 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1184 void qemu_cpu_kick(CPUState *cpu)
1186 qemu_cond_broadcast(cpu->halt_cond);
1188 if (((hax_enabled() && hax_ug_platform()) ||
1189 !tcg_enabled()) && !cpu->thread_kicked) {
1191 if (!tcg_enabled() && !cpu->thread_kicked) {
1193 qemu_cpu_kick_thread(cpu);
1194 cpu->thread_kicked = true;
1198 void qemu_cpu_kick_self(void)
1201 assert(current_cpu);
1203 if (!current_cpu->thread_kicked) {
1204 qemu_cpu_kick_thread(current_cpu);
1205 current_cpu->thread_kicked = true;
1212 bool qemu_cpu_is_self(CPUState *cpu)
1214 return qemu_thread_is_self(cpu->thread);
1217 bool qemu_in_vcpu_thread(void)
1219 return current_cpu && qemu_cpu_is_self(current_cpu);
1222 static __thread bool iothread_locked = false;
1224 bool qemu_mutex_iothread_locked(void)
1226 return iothread_locked;
1229 void qemu_mutex_lock_iothread(void)
1231 atomic_inc(&iothread_requesting_mutex);
1232 /* In the simple case there is no need to bump the VCPU thread out of
1233 * TCG code execution.
1235 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1237 (hax_enabled() && hax_ug_platform()) ||
1239 !first_cpu || !first_cpu->created) {
1240 qemu_mutex_lock(&qemu_global_mutex);
1241 atomic_dec(&iothread_requesting_mutex);
1243 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1244 qemu_cpu_kick_thread(first_cpu);
1245 qemu_mutex_lock(&qemu_global_mutex);
1247 atomic_dec(&iothread_requesting_mutex);
1248 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1250 iothread_locked = true;
1253 void qemu_mutex_unlock_iothread(void)
1255 iothread_locked = false;
1256 qemu_mutex_unlock(&qemu_global_mutex);
1259 static int all_vcpus_paused(void)
1264 if (!cpu->stopped) {
1272 void pause_all_vcpus(void)
1276 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1282 if (qemu_in_vcpu_thread()) {
1284 if (!kvm_enabled()) {
1287 cpu->stopped = true;
1293 while (!all_vcpus_paused()) {
1294 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1301 void cpu_resume(CPUState *cpu)
1304 cpu->stopped = false;
1308 void resume_all_vcpus(void)
1312 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1318 /* For temporary buffers for forming a name */
1319 #define VCPU_THREAD_NAME_SIZE 16
1321 static void qemu_tcg_init_vcpu(CPUState *cpu)
1327 char thread_name[VCPU_THREAD_NAME_SIZE];
1329 tcg_cpu_address_space_init(cpu, cpu->as);
1331 /* share a single thread for all cpus with TCG */
1332 if (!tcg_cpu_thread) {
1333 cpu->thread = g_malloc0(sizeof(QemuThread));
1334 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1335 qemu_cond_init(cpu->halt_cond);
1336 tcg_halt_cond = cpu->halt_cond;
1337 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1339 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1340 cpu, QEMU_THREAD_JOINABLE);
1342 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1344 while (!cpu->created) {
1345 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1347 tcg_cpu_thread = cpu->thread;
1349 cpu->thread = tcg_cpu_thread;
1350 cpu->halt_cond = tcg_halt_cond;
1355 static void qemu_hax_start_vcpu(CPUState *cpu)
1357 char thread_name[VCPU_THREAD_NAME_SIZE];
1359 cpu->thread = g_malloc0(sizeof(QemuThread));
1360 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1361 qemu_cond_init(cpu->halt_cond);
1363 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1366 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1367 cpu, QEMU_THREAD_JOINABLE);
1369 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1371 while (!cpu->created) {
1372 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1377 static void qemu_kvm_start_vcpu(CPUState *cpu)
1379 char thread_name[VCPU_THREAD_NAME_SIZE];
1381 cpu->thread = g_malloc0(sizeof(QemuThread));
1382 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1383 qemu_cond_init(cpu->halt_cond);
1384 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1386 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1387 cpu, QEMU_THREAD_JOINABLE);
1388 while (!cpu->created) {
1389 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1393 static void qemu_dummy_start_vcpu(CPUState *cpu)
1395 char thread_name[VCPU_THREAD_NAME_SIZE];
1397 cpu->thread = g_malloc0(sizeof(QemuThread));
1398 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1399 qemu_cond_init(cpu->halt_cond);
1400 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1402 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1403 QEMU_THREAD_JOINABLE);
1404 while (!cpu->created) {
1405 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1409 void qemu_init_vcpu(CPUState *cpu)
1411 cpu->nr_cores = smp_cores;
1412 cpu->nr_threads = smp_threads;
1413 cpu->stopped = true;
1415 if (kvm_enabled()) {
1416 qemu_kvm_start_vcpu(cpu);
1418 } else if (hax_enabled() && hax_ug_platform()) {
1419 qemu_hax_start_vcpu(cpu);
1421 } else if (tcg_enabled()) {
1422 qemu_tcg_init_vcpu(cpu);
1424 qemu_dummy_start_vcpu(cpu);
1428 void cpu_stop_current(void)
1431 current_cpu->stop = false;
1432 current_cpu->stopped = true;
1433 cpu_exit(current_cpu);
1434 qemu_cond_signal(&qemu_pause_cond);
1438 int vm_stop(RunState state)
1440 if (qemu_in_vcpu_thread()) {
1441 qemu_system_vmstop_request_prepare();
1442 qemu_system_vmstop_request(state);
1444 * FIXME: should not return to device code in case
1445 * vm_stop() has been requested.
1451 return do_vm_stop(state);
1454 /* does a state transition even if the VM is already stopped,
1455 current state is forgotten forever */
1456 int vm_stop_force_state(RunState state)
1458 if (runstate_is_running()) {
1459 return vm_stop(state);
1461 runstate_set(state);
1462 /* Make sure to return an error if the flush in a previous vm_stop()
1464 return bdrv_flush_all();
1468 static int tcg_cpu_exec(CPUState *cpu)
1471 #ifdef CONFIG_PROFILER
1475 #ifdef CONFIG_PROFILER
1476 ti = profile_getclock();
1482 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1483 + cpu->icount_extra);
1484 cpu->icount_decr.u16.low = 0;
1485 cpu->icount_extra = 0;
1486 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1488 /* Maintain prior (possibly buggy) behaviour where if no deadline
1489 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1490 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1493 if ((deadline < 0) || (deadline > INT32_MAX)) {
1494 deadline = INT32_MAX;
1497 count = qemu_icount_round(deadline);
1498 timers_state.qemu_icount += count;
1499 decr = (count > 0xffff) ? 0xffff : count;
1501 cpu->icount_decr.u16.low = decr;
1502 cpu->icount_extra = count;
1504 ret = cpu_exec(cpu);
1505 #ifdef CONFIG_PROFILER
1506 tcg_time += profile_getclock() - ti;
1509 /* Fold pending instructions back into the
1510 instruction counter, and clear the interrupt flag. */
1511 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1512 + cpu->icount_extra);
1513 cpu->icount_decr.u32 = 0;
1514 cpu->icount_extra = 0;
1519 static void tcg_exec_all(void)
1523 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1524 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1526 if (next_cpu == NULL) {
1527 next_cpu = first_cpu;
1529 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1530 CPUState *cpu = next_cpu;
1532 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1533 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1535 if (cpu_can_run(cpu)) {
1536 r = tcg_cpu_exec(cpu);
1537 if (r == EXCP_DEBUG) {
1538 cpu_handle_guest_debug(cpu);
1541 } else if (cpu->stop || cpu->stopped) {
1548 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1550 /* XXX: implement xxx_cpu_list for targets that still miss it */
1551 #if defined(cpu_list)
1552 cpu_list(f, cpu_fprintf);
1556 CpuInfoList *qmp_query_cpus(Error **errp)
1558 CpuInfoList *head = NULL, *cur_item = NULL;
1563 #if defined(TARGET_I386)
1564 X86CPU *x86_cpu = X86_CPU(cpu);
1565 CPUX86State *env = &x86_cpu->env;
1566 #elif defined(TARGET_PPC)
1567 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1568 CPUPPCState *env = &ppc_cpu->env;
1569 #elif defined(TARGET_SPARC)
1570 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1571 CPUSPARCState *env = &sparc_cpu->env;
1572 #elif defined(TARGET_MIPS)
1573 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1574 CPUMIPSState *env = &mips_cpu->env;
1575 #elif defined(TARGET_TRICORE)
1576 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1577 CPUTriCoreState *env = &tricore_cpu->env;
1580 cpu_synchronize_state(cpu);
1582 info = g_malloc0(sizeof(*info));
1583 info->value = g_malloc0(sizeof(*info->value));
1584 info->value->CPU = cpu->cpu_index;
1585 info->value->current = (cpu == first_cpu);
1586 info->value->halted = cpu->halted;
1587 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1588 info->value->thread_id = cpu->thread_id;
1589 #if defined(TARGET_I386)
1590 info->value->has_pc = true;
1591 info->value->pc = env->eip + env->segs[R_CS].base;
1592 #elif defined(TARGET_PPC)
1593 info->value->has_nip = true;
1594 info->value->nip = env->nip;
1595 #elif defined(TARGET_SPARC)
1596 info->value->has_pc = true;
1597 info->value->pc = env->pc;
1598 info->value->has_npc = true;
1599 info->value->npc = env->npc;
1600 #elif defined(TARGET_MIPS)
1601 info->value->has_PC = true;
1602 info->value->PC = env->active_tc.PC;
1603 #elif defined(TARGET_TRICORE)
1604 info->value->has_PC = true;
1605 info->value->PC = env->PC;
1608 /* XXX: waiting for the qapi to support GSList */
1610 head = cur_item = info;
1612 cur_item->next = info;
1620 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1621 bool has_cpu, int64_t cpu_index, Error **errp)
1627 int64_t orig_addr = addr, orig_size = size;
1633 cpu = qemu_get_cpu(cpu_index);
1635 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1640 f = fopen(filename, "wb");
1642 error_setg_file_open(errp, errno, filename);
1650 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1651 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1652 " specified", orig_addr, orig_size);
1655 if (fwrite(buf, 1, l, f) != l) {
1656 error_setg(errp, QERR_IO_ERROR);
1667 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1674 f = fopen(filename, "wb");
1676 error_setg_file_open(errp, errno, filename);
1684 cpu_physical_memory_read(addr, buf, l);
1685 if (fwrite(buf, 1, l, f) != l) {
1686 error_setg(errp, QERR_IO_ERROR);
1697 void qmp_inject_nmi(Error **errp)
1699 #if defined(TARGET_I386)
1703 X86CPU *cpu = X86_CPU(cs);
1705 if (!cpu->apic_state) {
1706 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1708 apic_deliver_nmi(cpu->apic_state);
1712 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1716 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1722 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1723 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1724 if (icount_align_option) {
1725 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1726 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1728 cpu_fprintf(f, "Max guest delay NA\n");
1729 cpu_fprintf(f, "Max guest advance NA\n");