4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "qemu/error-report.h"
31 #include "sysemu/sysemu.h"
32 #include "exec/gdbstub.h"
33 #include "sysemu/dma.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/hax.h"
36 #include "qmp-commands.h"
38 #include "qemu/thread.h"
39 #include "sysemu/cpus.h"
40 #include "sysemu/qtest.h"
41 #include "qemu/main-loop.h"
42 #include "qemu/bitmap.h"
43 #include "qemu/seqlock.h"
44 #include "qapi-event.h"
48 #include "qemu/compatfd.h"
53 #include <sys/prctl.h>
56 #define PR_MCE_KILL 33
59 #ifndef PR_MCE_KILL_SET
60 #define PR_MCE_KILL_SET 1
63 #ifndef PR_MCE_KILL_EARLY
64 #define PR_MCE_KILL_EARLY 1
67 #endif /* CONFIG_LINUX */
69 static CPUState *next_cpu;
73 bool cpu_is_stopped(CPUState *cpu)
75 return cpu->stopped || !runstate_is_running();
78 static bool cpu_thread_is_idle(CPUState *cpu)
80 if (cpu->stop || cpu->queued_work_first) {
83 if (cpu_is_stopped(cpu)) {
86 if (!cpu->halted || cpu_has_work(cpu) ||
87 kvm_halt_in_kernel()) {
93 static bool all_cpu_threads_idle(void)
98 if (!cpu_thread_is_idle(cpu)) {
105 /***********************************************************/
106 /* guest cycle counter */
108 /* Protected by TimersState seqlock */
110 static bool icount_sleep = true;
111 static int64_t vm_clock_warp_start = -1;
112 /* Conversion factor from emulated instructions to virtual clock ticks. */
113 static int icount_time_shift;
114 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
115 #define MAX_ICOUNT_SHIFT 10
117 static QEMUTimer *icount_rt_timer;
118 static QEMUTimer *icount_vm_timer;
119 static QEMUTimer *icount_warp_timer;
121 typedef struct TimersState {
122 /* Protected by BQL. */
123 int64_t cpu_ticks_prev;
124 int64_t cpu_ticks_offset;
126 /* cpu_clock_offset can be read out of BQL, so protect it with
129 QemuSeqLock vm_clock_seqlock;
130 int64_t cpu_clock_offset;
131 int32_t cpu_ticks_enabled;
134 /* Compensate for varying guest execution speed. */
135 int64_t qemu_icount_bias;
136 /* Only written by TCG thread */
140 static TimersState timers_state;
142 int64_t cpu_get_icount_raw(void)
145 CPUState *cpu = current_cpu;
147 icount = timers_state.qemu_icount;
149 if (!cpu_can_do_io(cpu)) {
150 fprintf(stderr, "Bad icount read\n");
153 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
158 /* Return the virtual CPU time, based on the instruction counter. */
159 static int64_t cpu_get_icount_locked(void)
161 int64_t icount = cpu_get_icount_raw();
162 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
165 int64_t cpu_get_icount(void)
171 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
172 icount = cpu_get_icount_locked();
173 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
178 int64_t cpu_icount_to_ns(int64_t icount)
180 return icount << icount_time_shift;
183 /* return the host CPU cycle counter and handle stop/restart */
184 /* Caller must hold the BQL */
185 int64_t cpu_get_ticks(void)
190 return cpu_get_icount();
193 ticks = timers_state.cpu_ticks_offset;
194 if (timers_state.cpu_ticks_enabled) {
195 ticks += cpu_get_real_ticks();
198 if (timers_state.cpu_ticks_prev > ticks) {
199 /* Note: non increasing ticks may happen if the host uses
201 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
202 ticks = timers_state.cpu_ticks_prev;
205 timers_state.cpu_ticks_prev = ticks;
209 static int64_t cpu_get_clock_locked(void)
213 ticks = timers_state.cpu_clock_offset;
214 if (timers_state.cpu_ticks_enabled) {
215 ticks += get_clock();
221 /* return the host CPU monotonic timer and handle stop/restart */
222 int64_t cpu_get_clock(void)
228 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
229 ti = cpu_get_clock_locked();
230 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
235 /* enable cpu_get_ticks()
236 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
238 void cpu_enable_ticks(void)
240 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
241 seqlock_write_lock(&timers_state.vm_clock_seqlock);
242 if (!timers_state.cpu_ticks_enabled) {
243 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
244 timers_state.cpu_clock_offset -= get_clock();
245 timers_state.cpu_ticks_enabled = 1;
247 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
250 /* disable cpu_get_ticks() : the clock is stopped. You must not call
251 * cpu_get_ticks() after that.
252 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
254 void cpu_disable_ticks(void)
256 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
257 seqlock_write_lock(&timers_state.vm_clock_seqlock);
258 if (timers_state.cpu_ticks_enabled) {
259 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
260 timers_state.cpu_clock_offset = cpu_get_clock_locked();
261 timers_state.cpu_ticks_enabled = 0;
263 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
266 /* Correlation between real and virtual time is always going to be
267 fairly approximate, so ignore small variation.
268 When the guest is idle real and virtual time will be aligned in
270 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
272 static void icount_adjust(void)
278 /* Protected by TimersState mutex. */
279 static int64_t last_delta;
281 /* If the VM is not running, then do nothing. */
282 if (!runstate_is_running()) {
286 seqlock_write_lock(&timers_state.vm_clock_seqlock);
287 cur_time = cpu_get_clock_locked();
288 cur_icount = cpu_get_icount_locked();
290 delta = cur_icount - cur_time;
291 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
293 && last_delta + ICOUNT_WOBBLE < delta * 2
294 && icount_time_shift > 0) {
295 /* The guest is getting too far ahead. Slow time down. */
299 && last_delta - ICOUNT_WOBBLE > delta * 2
300 && icount_time_shift < MAX_ICOUNT_SHIFT) {
301 /* The guest is getting too far behind. Speed time up. */
305 timers_state.qemu_icount_bias = cur_icount
306 - (timers_state.qemu_icount << icount_time_shift);
307 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
310 static void icount_adjust_rt(void *opaque)
312 timer_mod(icount_rt_timer,
313 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
317 static void icount_adjust_vm(void *opaque)
319 timer_mod(icount_vm_timer,
320 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
321 get_ticks_per_sec() / 10);
325 static int64_t qemu_icount_round(int64_t count)
327 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
330 static void icount_warp_rt(void *opaque)
332 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
333 * changes from -1 to another value, so the race here is okay.
335 if (atomic_read(&vm_clock_warp_start) == -1) {
339 seqlock_write_lock(&timers_state.vm_clock_seqlock);
340 if (runstate_is_running()) {
341 int64_t clock = cpu_get_clock_locked();
344 warp_delta = clock - vm_clock_warp_start;
345 if (use_icount == 2) {
347 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
348 * far ahead of real time.
350 int64_t cur_icount = cpu_get_icount_locked();
351 int64_t delta = clock - cur_icount;
352 warp_delta = MIN(warp_delta, delta);
354 timers_state.qemu_icount_bias += warp_delta;
356 vm_clock_warp_start = -1;
357 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
359 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
360 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
364 void qtest_clock_warp(int64_t dest)
366 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
367 AioContext *aio_context;
368 assert(qtest_enabled());
369 aio_context = qemu_get_aio_context();
370 while (clock < dest) {
371 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
372 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
374 seqlock_write_lock(&timers_state.vm_clock_seqlock);
375 timers_state.qemu_icount_bias += warp;
376 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
378 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
379 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
380 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
382 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
385 void qemu_clock_warp(QEMUClockType type)
391 * There are too many global variables to make the "warp" behavior
392 * applicable to other clocks. But a clock argument removes the
393 * need for if statements all over the place.
395 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
401 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
402 * This ensures that the deadline for the timer is computed correctly
404 * This also makes sure that the insn counter is synchronized before
405 * the CPU starts running, in case the CPU is woken by an event other
406 * than the earliest QEMU_CLOCK_VIRTUAL timer.
408 icount_warp_rt(NULL);
409 timer_del(icount_warp_timer);
411 if (!all_cpu_threads_idle()) {
415 if (qtest_enabled()) {
416 /* When testing, qtest commands advance icount. */
420 /* We want to use the earliest deadline from ALL vm_clocks */
421 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
422 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
424 static bool notified;
425 if (!icount_sleep && !notified) {
426 error_report("WARNING: icount sleep disabled and no active timers");
434 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
435 * sleep. Otherwise, the CPU might be waiting for a future timer
436 * interrupt to wake it up, but the interrupt never comes because
437 * the vCPU isn't running any insns and thus doesn't advance the
438 * QEMU_CLOCK_VIRTUAL.
442 * We never let VCPUs sleep in no sleep icount mode.
443 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
444 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
445 * It is useful when we want a deterministic execution time,
446 * isolated from host latencies.
448 seqlock_write_lock(&timers_state.vm_clock_seqlock);
449 timers_state.qemu_icount_bias += deadline;
450 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
451 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
454 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
455 * "real" time, (related to the time left until the next event) has
456 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
457 * This avoids that the warps are visible externally; for example,
458 * you will not be sending network packets continuously instead of
461 seqlock_write_lock(&timers_state.vm_clock_seqlock);
462 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
463 vm_clock_warp_start = clock;
465 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
466 timer_mod_anticipate(icount_warp_timer, clock + deadline);
468 } else if (deadline == 0) {
469 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
473 static bool icount_state_needed(void *opaque)
479 * This is a subsection for icount migration.
481 static const VMStateDescription icount_vmstate_timers = {
482 .name = "timer/icount",
484 .minimum_version_id = 1,
485 .needed = icount_state_needed,
486 .fields = (VMStateField[]) {
487 VMSTATE_INT64(qemu_icount_bias, TimersState),
488 VMSTATE_INT64(qemu_icount, TimersState),
489 VMSTATE_END_OF_LIST()
493 static const VMStateDescription vmstate_timers = {
496 .minimum_version_id = 1,
497 .fields = (VMStateField[]) {
498 VMSTATE_INT64(cpu_ticks_offset, TimersState),
499 VMSTATE_INT64(dummy, TimersState),
500 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
501 VMSTATE_END_OF_LIST()
503 .subsections = (const VMStateDescription*[]) {
504 &icount_vmstate_timers,
509 void cpu_ticks_init(void)
511 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
512 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
515 void configure_icount(QemuOpts *opts, Error **errp)
518 char *rem_str = NULL;
520 option = qemu_opt_get(opts, "shift");
522 if (qemu_opt_get(opts, "align") != NULL) {
523 error_setg(errp, "Please specify shift option when using align");
528 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
530 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
531 icount_warp_rt, NULL);
534 icount_align_option = qemu_opt_get_bool(opts, "align", false);
536 if (icount_align_option && !icount_sleep) {
537 error_setg(errp, "align=on and sleep=no are incompatible");
539 if (strcmp(option, "auto") != 0) {
541 icount_time_shift = strtol(option, &rem_str, 0);
542 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
543 error_setg(errp, "icount: Invalid shift value");
547 } else if (icount_align_option) {
548 error_setg(errp, "shift=auto and align=on are incompatible");
549 } else if (!icount_sleep) {
550 error_setg(errp, "shift=auto and sleep=no are incompatible");
555 /* 125MIPS seems a reasonable initial guess at the guest speed.
556 It will be corrected fairly quickly anyway. */
557 icount_time_shift = 3;
559 /* Have both realtime and virtual time triggers for speed adjustment.
560 The realtime trigger catches emulated time passing too slowly,
561 the virtual time trigger catches emulated time passing too fast.
562 Realtime triggers occur even when idle, so use them less frequently
564 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
565 icount_adjust_rt, NULL);
566 timer_mod(icount_rt_timer,
567 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
568 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
569 icount_adjust_vm, NULL);
570 timer_mod(icount_vm_timer,
571 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
572 get_ticks_per_sec() / 10);
575 /***********************************************************/
576 void hw_error(const char *fmt, ...)
582 fprintf(stderr, "qemu: hardware error: ");
583 vfprintf(stderr, fmt, ap);
584 fprintf(stderr, "\n");
586 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
587 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
593 void cpu_synchronize_all_states(void)
598 cpu_synchronize_state(cpu);
602 void cpu_synchronize_all_post_reset(void)
607 cpu_synchronize_post_reset(cpu);
609 if (hax_enabled() && hax_ug_platform())
610 hax_cpu_synchronize_post_reset(cpu);
615 void cpu_synchronize_all_post_init(void)
620 cpu_synchronize_post_init(cpu);
622 if (hax_enabled() && hax_ug_platform())
623 hax_cpu_synchronize_post_init(cpu);
628 void cpu_clean_all_dirty(void)
633 cpu_clean_state(cpu);
637 static int do_vm_stop(RunState state)
641 if (runstate_is_running()) {
645 vm_state_notify(0, state);
646 qapi_event_send_stop(&error_abort);
650 ret = bdrv_flush_all();
655 static bool cpu_can_run(CPUState *cpu)
660 if (cpu_is_stopped(cpu)) {
666 static void cpu_handle_guest_debug(CPUState *cpu)
668 gdb_set_stop_cpu(cpu);
669 qemu_system_debug_request();
673 static void cpu_signal(int sig)
676 cpu_exit(current_cpu);
682 static void sigbus_reraise(void)
685 struct sigaction action;
687 memset(&action, 0, sizeof(action));
688 action.sa_handler = SIG_DFL;
689 if (!sigaction(SIGBUS, &action, NULL)) {
692 sigaddset(&set, SIGBUS);
693 sigprocmask(SIG_UNBLOCK, &set, NULL);
695 perror("Failed to re-raise SIGBUS!\n");
699 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
702 if (kvm_on_sigbus(siginfo->ssi_code,
703 (void *)(intptr_t)siginfo->ssi_addr)) {
708 static void qemu_init_sigbus(void)
710 struct sigaction action;
712 memset(&action, 0, sizeof(action));
713 action.sa_flags = SA_SIGINFO;
714 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
715 sigaction(SIGBUS, &action, NULL);
717 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
720 static void qemu_kvm_eat_signals(CPUState *cpu)
722 struct timespec ts = { 0, 0 };
728 sigemptyset(&waitset);
729 sigaddset(&waitset, SIG_IPI);
730 sigaddset(&waitset, SIGBUS);
733 r = sigtimedwait(&waitset, &siginfo, &ts);
734 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
735 perror("sigtimedwait");
741 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
749 r = sigpending(&chkset);
751 perror("sigpending");
754 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
757 #else /* !CONFIG_LINUX */
759 static void qemu_init_sigbus(void)
763 static void qemu_kvm_eat_signals(CPUState *cpu)
766 #endif /* !CONFIG_LINUX */
769 static void dummy_signal(int sig)
773 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
777 struct sigaction sigact;
779 memset(&sigact, 0, sizeof(sigact));
780 sigact.sa_handler = dummy_signal;
781 sigaction(SIG_IPI, &sigact, NULL);
783 pthread_sigmask(SIG_BLOCK, NULL, &set);
784 sigdelset(&set, SIG_IPI);
785 sigdelset(&set, SIGBUS);
786 r = kvm_set_signal_mask(cpu, &set);
788 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
793 static void qemu_tcg_init_cpu_signals(void)
796 struct sigaction sigact;
798 memset(&sigact, 0, sizeof(sigact));
799 sigact.sa_handler = cpu_signal;
800 sigaction(SIG_IPI, &sigact, NULL);
803 sigaddset(&set, SIG_IPI);
804 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
808 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
813 static void qemu_tcg_init_cpu_signals(void)
818 static QemuMutex qemu_global_mutex;
819 static QemuCond qemu_io_proceeded_cond;
820 static unsigned iothread_requesting_mutex;
822 static QemuThread io_thread;
824 static QemuThread *tcg_cpu_thread;
825 static QemuCond *tcg_halt_cond;
828 static QemuCond qemu_cpu_cond;
830 static QemuCond qemu_pause_cond;
831 static QemuCond qemu_work_cond;
833 void qemu_init_cpu_loop(void)
836 qemu_cond_init(&qemu_cpu_cond);
837 qemu_cond_init(&qemu_pause_cond);
838 qemu_cond_init(&qemu_work_cond);
839 qemu_cond_init(&qemu_io_proceeded_cond);
840 qemu_mutex_init(&qemu_global_mutex);
842 qemu_thread_get_self(&io_thread);
845 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
847 struct qemu_work_item wi;
849 if (qemu_cpu_is_self(cpu)) {
857 if (cpu->queued_work_first == NULL) {
858 cpu->queued_work_first = &wi;
860 cpu->queued_work_last->next = &wi;
862 cpu->queued_work_last = &wi;
868 CPUState *self_cpu = current_cpu;
870 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
871 current_cpu = self_cpu;
875 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
877 struct qemu_work_item *wi;
879 if (qemu_cpu_is_self(cpu)) {
884 wi = g_malloc0(sizeof(struct qemu_work_item));
888 if (cpu->queued_work_first == NULL) {
889 cpu->queued_work_first = wi;
891 cpu->queued_work_last->next = wi;
893 cpu->queued_work_last = wi;
900 static void flush_queued_work(CPUState *cpu)
902 struct qemu_work_item *wi;
904 if (cpu->queued_work_first == NULL) {
908 while ((wi = cpu->queued_work_first)) {
909 cpu->queued_work_first = wi->next;
916 cpu->queued_work_last = NULL;
917 qemu_cond_broadcast(&qemu_work_cond);
920 static void qemu_wait_io_event_common(CPUState *cpu)
925 qemu_cond_signal(&qemu_pause_cond);
927 flush_queued_work(cpu);
928 cpu->thread_kicked = false;
931 static void qemu_tcg_wait_io_event(void)
935 while (all_cpu_threads_idle()) {
936 /* Start accounting real time to the virtual clock if the CPUs
938 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
939 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
942 while (iothread_requesting_mutex) {
943 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
947 qemu_wait_io_event_common(cpu);
952 static void qemu_hax_wait_io_event(CPUState *cpu)
954 while (cpu_thread_is_idle(cpu)) {
955 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
958 qemu_wait_io_event_common(cpu);
962 static void qemu_kvm_wait_io_event(CPUState *cpu)
964 while (cpu_thread_is_idle(cpu)) {
965 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
968 qemu_kvm_eat_signals(cpu);
969 qemu_wait_io_event_common(cpu);
972 static void *qemu_kvm_cpu_thread_fn(void *arg)
977 rcu_register_thread();
979 qemu_mutex_lock_iothread();
980 qemu_thread_get_self(cpu->thread);
981 cpu->thread_id = qemu_get_thread_id();
985 r = kvm_init_vcpu(cpu);
987 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
991 qemu_kvm_init_cpu_signals(cpu);
993 /* signal CPU creation */
995 qemu_cond_signal(&qemu_cpu_cond);
998 if (cpu_can_run(cpu)) {
999 r = kvm_cpu_exec(cpu);
1000 if (r == EXCP_DEBUG) {
1001 cpu_handle_guest_debug(cpu);
1004 qemu_kvm_wait_io_event(cpu);
1010 static void *qemu_dummy_cpu_thread_fn(void *arg)
1013 fprintf(stderr, "qtest is not supported under Windows\n");
1016 CPUState *cpu = arg;
1020 rcu_register_thread();
1022 qemu_mutex_lock_iothread();
1023 qemu_thread_get_self(cpu->thread);
1024 cpu->thread_id = qemu_get_thread_id();
1027 sigemptyset(&waitset);
1028 sigaddset(&waitset, SIG_IPI);
1030 /* signal CPU creation */
1031 cpu->created = true;
1032 qemu_cond_signal(&qemu_cpu_cond);
1037 qemu_mutex_unlock_iothread();
1040 r = sigwait(&waitset, &sig);
1041 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1046 qemu_mutex_lock_iothread();
1048 qemu_wait_io_event_common(cpu);
1055 static void tcg_exec_all(void);
1057 static void *qemu_tcg_cpu_thread_fn(void *arg)
1059 CPUState *cpu = arg;
1061 rcu_register_thread();
1063 qemu_mutex_lock_iothread();
1064 qemu_tcg_init_cpu_signals();
1065 qemu_thread_get_self(cpu->thread);
1068 cpu->thread_id = qemu_get_thread_id();
1069 cpu->created = true;
1072 qemu_cond_signal(&qemu_cpu_cond);
1074 /* wait for initial kick-off after machine start */
1075 while (first_cpu->stopped) {
1076 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
1078 /* process any pending work */
1080 qemu_wait_io_event_common(cpu);
1084 /* process any pending work */
1091 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1093 if (deadline == 0) {
1094 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1097 qemu_tcg_wait_io_event();
1104 static void *qemu_hax_cpu_thread_fn(void *arg)
1106 CPUState *cpu = arg;
1108 qemu_thread_get_self(cpu->thread);
1109 qemu_mutex_lock(&qemu_global_mutex);
1111 cpu->thread_id = qemu_get_thread_id();
1112 cpu->created = true;
1117 qemu_cond_signal(&qemu_cpu_cond);
1120 if (cpu_can_run(cpu)) {
1121 r = hax_smp_cpu_exec(cpu);
1122 if (r == EXCP_DEBUG) {
1123 cpu_handle_guest_debug(cpu);
1126 qemu_hax_wait_io_event(cpu);
1132 static void qemu_cpu_kick_thread(CPUState *cpu)
1137 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1139 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1142 /* The cpu thread cannot catch it reliably when shutdown the guest on Mac.
1143 * We can double check it and resend it
1146 #ifdef CONFIG_DARWIN
1150 if (hax_enabled() && hax_ug_platform())
1151 cpu->exit_request = 1;
1154 if (!qemu_cpu_is_self(cpu)) {
1157 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
1158 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1163 /* On multi-core systems, we are not sure that the thread is actually
1164 * suspended until we can get the context.
1166 tcgContext.ContextFlags = CONTEXT_CONTROL;
1167 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1172 if(hax_enabled() && hax_ug_platform())
1173 cpu->exit_request = 1;
1175 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
1176 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1184 void qemu_cpu_kick(CPUState *cpu)
1186 qemu_cond_broadcast(cpu->halt_cond);
1188 if (((hax_enabled() && hax_ug_platform()) || !tcg_enabled()) && !cpu->thread_kicked) {
1190 if (!tcg_enabled() && !cpu->thread_kicked) {
1192 qemu_cpu_kick_thread(cpu);
1193 cpu->thread_kicked = true;
1197 void qemu_cpu_kick_self(void)
1200 assert(current_cpu);
1202 if (!current_cpu->thread_kicked) {
1203 qemu_cpu_kick_thread(current_cpu);
1204 current_cpu->thread_kicked = true;
1211 bool qemu_cpu_is_self(CPUState *cpu)
1213 return qemu_thread_is_self(cpu->thread);
1216 bool qemu_in_vcpu_thread(void)
1218 return current_cpu && qemu_cpu_is_self(current_cpu);
1221 static __thread bool iothread_locked = false;
1223 bool qemu_mutex_iothread_locked(void)
1225 return iothread_locked;
1228 void qemu_mutex_lock_iothread(void)
1230 atomic_inc(&iothread_requesting_mutex);
1231 /* In the simple case there is no need to bump the VCPU thread out of
1232 * TCG code execution.
1235 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1236 !first_cpu || !first_cpu->thread ||
1237 (hax_enabled() && hax_ug_platform())) {
1239 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1240 !first_cpu || !first_cpu->thread) {
1242 qemu_mutex_lock(&qemu_global_mutex);
1243 atomic_dec(&iothread_requesting_mutex);
1245 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1246 qemu_cpu_kick_thread(first_cpu);
1247 qemu_mutex_lock(&qemu_global_mutex);
1249 atomic_dec(&iothread_requesting_mutex);
1250 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1252 iothread_locked = true;
1255 void qemu_mutex_unlock_iothread(void)
1257 iothread_locked = false;
1258 qemu_mutex_unlock(&qemu_global_mutex);
1261 static int all_vcpus_paused(void)
1266 if (!cpu->stopped) {
1274 void pause_all_vcpus(void)
1278 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1284 if (qemu_in_vcpu_thread()) {
1286 if (!kvm_enabled()) {
1289 cpu->stopped = true;
1295 while (!all_vcpus_paused()) {
1296 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1303 void cpu_resume(CPUState *cpu)
1306 cpu->stopped = false;
1310 void resume_all_vcpus(void)
1314 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1320 /* For temporary buffers for forming a name */
1321 #define VCPU_THREAD_NAME_SIZE 16
1323 static void qemu_tcg_init_vcpu(CPUState *cpu)
1329 char thread_name[VCPU_THREAD_NAME_SIZE];
1331 tcg_cpu_address_space_init(cpu, cpu->as);
1333 /* share a single thread for all cpus with TCG */
1334 if (!tcg_cpu_thread) {
1335 cpu->thread = g_malloc0(sizeof(QemuThread));
1336 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1337 qemu_cond_init(cpu->halt_cond);
1338 tcg_halt_cond = cpu->halt_cond;
1339 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1341 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1342 cpu, QEMU_THREAD_JOINABLE);
1344 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1346 while (!cpu->created) {
1347 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1349 tcg_cpu_thread = cpu->thread;
1351 cpu->thread = tcg_cpu_thread;
1352 cpu->halt_cond = tcg_halt_cond;
1357 static void qemu_hax_start_vcpu(CPUState *cpu)
1359 char thread_name[VCPU_THREAD_NAME_SIZE];
1361 cpu->thread = g_malloc0(sizeof(QemuThread));
1362 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1363 qemu_cond_init(cpu->halt_cond);
1365 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1368 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1369 cpu, QEMU_THREAD_JOINABLE);
1371 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1373 while (!cpu->created) {
1374 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1379 static void qemu_kvm_start_vcpu(CPUState *cpu)
1381 char thread_name[VCPU_THREAD_NAME_SIZE];
1383 cpu->thread = g_malloc0(sizeof(QemuThread));
1384 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1385 qemu_cond_init(cpu->halt_cond);
1386 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1388 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1389 cpu, QEMU_THREAD_JOINABLE);
1390 while (!cpu->created) {
1391 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1395 static void qemu_dummy_start_vcpu(CPUState *cpu)
1397 char thread_name[VCPU_THREAD_NAME_SIZE];
1399 cpu->thread = g_malloc0(sizeof(QemuThread));
1400 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1401 qemu_cond_init(cpu->halt_cond);
1402 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1404 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1405 QEMU_THREAD_JOINABLE);
1406 while (!cpu->created) {
1407 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1411 void qemu_init_vcpu(CPUState *cpu)
1413 cpu->nr_cores = smp_cores;
1414 cpu->nr_threads = smp_threads;
1415 cpu->stopped = true;
1417 if (kvm_enabled()) {
1418 qemu_kvm_start_vcpu(cpu);
1420 } else if (hax_enabled() && hax_ug_platform()) {
1421 qemu_hax_start_vcpu(cpu);
1423 } else if (tcg_enabled()) {
1424 qemu_tcg_init_vcpu(cpu);
1426 qemu_dummy_start_vcpu(cpu);
1430 void cpu_stop_current(void)
1433 current_cpu->stop = false;
1434 current_cpu->stopped = true;
1435 cpu_exit(current_cpu);
1436 qemu_cond_signal(&qemu_pause_cond);
1440 int vm_stop(RunState state)
1442 if (qemu_in_vcpu_thread()) {
1443 qemu_system_vmstop_request_prepare();
1444 qemu_system_vmstop_request(state);
1446 * FIXME: should not return to device code in case
1447 * vm_stop() has been requested.
1453 return do_vm_stop(state);
1456 /* does a state transition even if the VM is already stopped,
1457 current state is forgotten forever */
1458 int vm_stop_force_state(RunState state)
1460 if (runstate_is_running()) {
1461 return vm_stop(state);
1463 runstate_set(state);
1464 /* Make sure to return an error if the flush in a previous vm_stop()
1466 return bdrv_flush_all();
1470 static int tcg_cpu_exec(CPUState *cpu)
1473 #ifdef CONFIG_PROFILER
1477 #ifdef CONFIG_PROFILER
1478 ti = profile_getclock();
1484 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1485 + cpu->icount_extra);
1486 cpu->icount_decr.u16.low = 0;
1487 cpu->icount_extra = 0;
1488 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1490 /* Maintain prior (possibly buggy) behaviour where if no deadline
1491 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1492 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1495 if ((deadline < 0) || (deadline > INT32_MAX)) {
1496 deadline = INT32_MAX;
1499 count = qemu_icount_round(deadline);
1500 timers_state.qemu_icount += count;
1501 decr = (count > 0xffff) ? 0xffff : count;
1503 cpu->icount_decr.u16.low = decr;
1504 cpu->icount_extra = count;
1506 ret = cpu_exec(cpu);
1507 #ifdef CONFIG_PROFILER
1508 tcg_time += profile_getclock() - ti;
1511 /* Fold pending instructions back into the
1512 instruction counter, and clear the interrupt flag. */
1513 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1514 + cpu->icount_extra);
1515 cpu->icount_decr.u32 = 0;
1516 cpu->icount_extra = 0;
1521 static void tcg_exec_all(void)
1525 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1526 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1528 if (next_cpu == NULL) {
1529 next_cpu = first_cpu;
1531 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1532 CPUState *cpu = next_cpu;
1534 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1535 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1537 if (cpu_can_run(cpu)) {
1538 r = tcg_cpu_exec(cpu);
1539 if (r == EXCP_DEBUG) {
1540 cpu_handle_guest_debug(cpu);
1543 } else if (cpu->stop || cpu->stopped) {
1550 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1552 /* XXX: implement xxx_cpu_list for targets that still miss it */
1553 #if defined(cpu_list)
1554 cpu_list(f, cpu_fprintf);
1558 CpuInfoList *qmp_query_cpus(Error **errp)
1560 CpuInfoList *head = NULL, *cur_item = NULL;
1565 #if defined(TARGET_I386)
1566 X86CPU *x86_cpu = X86_CPU(cpu);
1567 CPUX86State *env = &x86_cpu->env;
1568 #elif defined(TARGET_PPC)
1569 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1570 CPUPPCState *env = &ppc_cpu->env;
1571 #elif defined(TARGET_SPARC)
1572 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1573 CPUSPARCState *env = &sparc_cpu->env;
1574 #elif defined(TARGET_MIPS)
1575 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1576 CPUMIPSState *env = &mips_cpu->env;
1577 #elif defined(TARGET_TRICORE)
1578 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1579 CPUTriCoreState *env = &tricore_cpu->env;
1582 cpu_synchronize_state(cpu);
1584 info = g_malloc0(sizeof(*info));
1585 info->value = g_malloc0(sizeof(*info->value));
1586 info->value->CPU = cpu->cpu_index;
1587 info->value->current = (cpu == first_cpu);
1588 info->value->halted = cpu->halted;
1589 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1590 info->value->thread_id = cpu->thread_id;
1591 #if defined(TARGET_I386)
1592 info->value->has_pc = true;
1593 info->value->pc = env->eip + env->segs[R_CS].base;
1594 #elif defined(TARGET_PPC)
1595 info->value->has_nip = true;
1596 info->value->nip = env->nip;
1597 #elif defined(TARGET_SPARC)
1598 info->value->has_pc = true;
1599 info->value->pc = env->pc;
1600 info->value->has_npc = true;
1601 info->value->npc = env->npc;
1602 #elif defined(TARGET_MIPS)
1603 info->value->has_PC = true;
1604 info->value->PC = env->active_tc.PC;
1605 #elif defined(TARGET_TRICORE)
1606 info->value->has_PC = true;
1607 info->value->PC = env->PC;
1610 /* XXX: waiting for the qapi to support GSList */
1612 head = cur_item = info;
1614 cur_item->next = info;
1622 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1623 bool has_cpu, int64_t cpu_index, Error **errp)
1629 int64_t orig_addr = addr, orig_size = size;
1635 cpu = qemu_get_cpu(cpu_index);
1637 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1642 f = fopen(filename, "wb");
1644 error_setg_file_open(errp, errno, filename);
1652 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1653 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1654 " specified", orig_addr, orig_size);
1657 if (fwrite(buf, 1, l, f) != l) {
1658 error_setg(errp, QERR_IO_ERROR);
1669 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1676 f = fopen(filename, "wb");
1678 error_setg_file_open(errp, errno, filename);
1686 cpu_physical_memory_read(addr, buf, l);
1687 if (fwrite(buf, 1, l, f) != l) {
1688 error_setg(errp, QERR_IO_ERROR);
1699 void qmp_inject_nmi(Error **errp)
1701 #if defined(TARGET_I386)
1705 X86CPU *cpu = X86_CPU(cs);
1707 if (!cpu->apic_state) {
1708 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1710 apic_deliver_nmi(cpu->apic_state);
1714 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1718 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1724 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1725 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1726 if (icount_align_option) {
1727 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1728 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1730 cpu_fprintf(f, "Max guest delay NA\n");
1731 cpu_fprintf(f, "Max guest advance NA\n");