4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "sysemu/sysemu.h"
30 #include "exec/gdbstub.h"
31 #include "sysemu/dma.h"
32 #include "sysemu/kvm.h"
33 #include "sysemu/hax.h"
34 #include "qmp-commands.h"
36 #include "qemu/thread.h"
37 #include "sysemu/cpus.h"
38 #include "sysemu/qtest.h"
39 #include "qemu/main-loop.h"
40 #include "qemu/bitmap.h"
41 #include "qemu/seqlock.h"
44 #include "qemu/compatfd.h"
49 #include <sys/prctl.h>
52 #define PR_MCE_KILL 33
55 #ifndef PR_MCE_KILL_SET
56 #define PR_MCE_KILL_SET 1
59 #ifndef PR_MCE_KILL_EARLY
60 #define PR_MCE_KILL_EARLY 1
63 #endif /* CONFIG_LINUX */
65 static CPUState *next_cpu;
67 bool cpu_is_stopped(CPUState *cpu)
69 return cpu->stopped || !runstate_is_running();
72 static bool cpu_thread_is_idle(CPUState *cpu)
74 if (cpu->stop || cpu->queued_work_first) {
77 if (cpu_is_stopped(cpu)) {
80 if (!cpu->halted || cpu_has_work(cpu) ||
81 kvm_halt_in_kernel()) {
87 static bool all_cpu_threads_idle(void)
92 if (!cpu_thread_is_idle(cpu)) {
99 /***********************************************************/
100 /* guest cycle counter */
102 /* Protected by TimersState seqlock */
104 /* Compensate for varying guest execution speed. */
105 static int64_t qemu_icount_bias;
106 static int64_t vm_clock_warp_start;
107 /* Conversion factor from emulated instructions to virtual clock ticks. */
108 static int icount_time_shift;
109 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
110 #define MAX_ICOUNT_SHIFT 10
112 /* Only written by TCG thread */
113 static int64_t qemu_icount;
115 static QEMUTimer *icount_rt_timer;
116 static QEMUTimer *icount_vm_timer;
117 static QEMUTimer *icount_warp_timer;
119 typedef struct TimersState {
120 /* Protected by BQL. */
121 int64_t cpu_ticks_prev;
122 int64_t cpu_ticks_offset;
124 /* cpu_clock_offset can be read out of BQL, so protect it with
127 QemuSeqLock vm_clock_seqlock;
128 int64_t cpu_clock_offset;
129 int32_t cpu_ticks_enabled;
133 static TimersState timers_state;
135 /* Return the virtual CPU time, based on the instruction counter. */
136 static int64_t cpu_get_icount_locked(void)
139 CPUState *cpu = current_cpu;
141 icount = qemu_icount;
143 if (!cpu_can_do_io(cpu)) {
144 fprintf(stderr, "Bad clock read\n");
146 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
148 return qemu_icount_bias + (icount << icount_time_shift);
151 int64_t cpu_get_icount(void)
157 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
158 icount = cpu_get_icount_locked();
159 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
164 /* return the host CPU cycle counter and handle stop/restart */
165 /* Caller must hold the BQL */
166 int64_t cpu_get_ticks(void)
171 return cpu_get_icount();
174 ticks = timers_state.cpu_ticks_offset;
175 if (timers_state.cpu_ticks_enabled) {
176 ticks += cpu_get_real_ticks();
179 if (timers_state.cpu_ticks_prev > ticks) {
180 /* Note: non increasing ticks may happen if the host uses
182 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
183 ticks = timers_state.cpu_ticks_prev;
186 timers_state.cpu_ticks_prev = ticks;
190 static int64_t cpu_get_clock_locked(void)
194 ticks = timers_state.cpu_clock_offset;
195 if (timers_state.cpu_ticks_enabled) {
196 ticks += get_clock();
202 /* return the host CPU monotonic timer and handle stop/restart */
203 int64_t cpu_get_clock(void)
209 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
210 ti = cpu_get_clock_locked();
211 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
216 /* enable cpu_get_ticks()
217 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
219 void cpu_enable_ticks(void)
221 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
222 seqlock_write_lock(&timers_state.vm_clock_seqlock);
223 if (!timers_state.cpu_ticks_enabled) {
224 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
225 timers_state.cpu_clock_offset -= get_clock();
226 timers_state.cpu_ticks_enabled = 1;
228 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
231 /* disable cpu_get_ticks() : the clock is stopped. You must not call
232 * cpu_get_ticks() after that.
233 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
235 void cpu_disable_ticks(void)
237 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
238 seqlock_write_lock(&timers_state.vm_clock_seqlock);
239 if (timers_state.cpu_ticks_enabled) {
240 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
241 timers_state.cpu_clock_offset = cpu_get_clock_locked();
242 timers_state.cpu_ticks_enabled = 0;
244 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
247 /* Correlation between real and virtual time is always going to be
248 fairly approximate, so ignore small variation.
249 When the guest is idle real and virtual time will be aligned in
251 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
253 static void icount_adjust(void)
259 /* Protected by TimersState mutex. */
260 static int64_t last_delta;
262 /* If the VM is not running, then do nothing. */
263 if (!runstate_is_running()) {
267 seqlock_write_lock(&timers_state.vm_clock_seqlock);
268 cur_time = cpu_get_clock_locked();
269 cur_icount = cpu_get_icount_locked();
271 delta = cur_icount - cur_time;
272 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
274 && last_delta + ICOUNT_WOBBLE < delta * 2
275 && icount_time_shift > 0) {
276 /* The guest is getting too far ahead. Slow time down. */
280 && last_delta - ICOUNT_WOBBLE > delta * 2
281 && icount_time_shift < MAX_ICOUNT_SHIFT) {
282 /* The guest is getting too far behind. Speed time up. */
286 qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
287 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
290 static void icount_adjust_rt(void *opaque)
292 timer_mod(icount_rt_timer,
293 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
297 static void icount_adjust_vm(void *opaque)
299 timer_mod(icount_vm_timer,
300 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
301 get_ticks_per_sec() / 10);
305 static int64_t qemu_icount_round(int64_t count)
307 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
310 static void icount_warp_rt(void *opaque)
312 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
313 * changes from -1 to another value, so the race here is okay.
315 if (atomic_read(&vm_clock_warp_start) == -1) {
319 seqlock_write_lock(&timers_state.vm_clock_seqlock);
320 if (runstate_is_running()) {
321 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
324 warp_delta = clock - vm_clock_warp_start;
325 if (use_icount == 2) {
327 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
328 * far ahead of real time.
330 int64_t cur_time = cpu_get_clock_locked();
331 int64_t cur_icount = cpu_get_icount_locked();
332 int64_t delta = cur_time - cur_icount;
333 warp_delta = MIN(warp_delta, delta);
335 qemu_icount_bias += warp_delta;
337 vm_clock_warp_start = -1;
338 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
340 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
341 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
345 void qtest_clock_warp(int64_t dest)
347 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
348 assert(qtest_enabled());
349 while (clock < dest) {
350 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
351 int64_t warp = MIN(dest - clock, deadline);
352 seqlock_write_lock(&timers_state.vm_clock_seqlock);
353 qemu_icount_bias += warp;
354 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
356 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
357 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
359 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
362 void qemu_clock_warp(QEMUClockType type)
368 * There are too many global variables to make the "warp" behavior
369 * applicable to other clocks. But a clock argument removes the
370 * need for if statements all over the place.
372 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
377 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
378 * This ensures that the deadline for the timer is computed correctly below.
379 * This also makes sure that the insn counter is synchronized before the
380 * CPU starts running, in case the CPU is woken by an event other than
381 * the earliest QEMU_CLOCK_VIRTUAL timer.
383 icount_warp_rt(NULL);
384 timer_del(icount_warp_timer);
385 if (!all_cpu_threads_idle()) {
389 if (qtest_enabled()) {
390 /* When testing, qtest commands advance icount. */
394 /* We want to use the earliest deadline from ALL vm_clocks */
395 clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
396 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
403 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
404 * sleep. Otherwise, the CPU might be waiting for a future timer
405 * interrupt to wake it up, but the interrupt never comes because
406 * the vCPU isn't running any insns and thus doesn't advance the
407 * QEMU_CLOCK_VIRTUAL.
409 * An extreme solution for this problem would be to never let VCPUs
410 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
411 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
412 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
413 * after some e"real" time, (related to the time left until the next
414 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
415 * This avoids that the warps are visible externally; for example,
416 * you will not be sending network packets continuously instead of
419 seqlock_write_lock(&timers_state.vm_clock_seqlock);
420 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
421 vm_clock_warp_start = clock;
423 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
424 timer_mod_anticipate(icount_warp_timer, clock + deadline);
425 } else if (deadline == 0) {
426 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
430 static const VMStateDescription vmstate_timers = {
433 .minimum_version_id = 1,
434 .minimum_version_id_old = 1,
435 .fields = (VMStateField[]) {
436 VMSTATE_INT64(cpu_ticks_offset, TimersState),
437 VMSTATE_INT64(dummy, TimersState),
438 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
439 VMSTATE_END_OF_LIST()
443 void configure_icount(const char *option)
445 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
446 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
451 icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
452 icount_warp_rt, NULL);
453 if (strcmp(option, "auto") != 0) {
454 icount_time_shift = strtol(option, NULL, 0);
461 /* 125MIPS seems a reasonable initial guess at the guest speed.
462 It will be corrected fairly quickly anyway. */
463 icount_time_shift = 3;
465 /* Have both realtime and virtual time triggers for speed adjustment.
466 The realtime trigger catches emulated time passing too slowly,
467 the virtual time trigger catches emulated time passing too fast.
468 Realtime triggers occur even when idle, so use them less frequently
470 icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
471 icount_adjust_rt, NULL);
472 timer_mod(icount_rt_timer,
473 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
474 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
475 icount_adjust_vm, NULL);
476 timer_mod(icount_vm_timer,
477 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
478 get_ticks_per_sec() / 10);
481 /***********************************************************/
482 void hw_error(const char *fmt, ...)
488 fprintf(stderr, "qemu: hardware error: ");
489 vfprintf(stderr, fmt, ap);
490 fprintf(stderr, "\n");
492 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
493 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
499 void cpu_synchronize_all_states(void)
504 cpu_synchronize_state(cpu);
508 void cpu_synchronize_all_post_reset(void)
513 cpu_synchronize_post_reset(cpu);
515 if (hax_enabled() && hax_ug_platform())
516 hax_cpu_synchronize_post_reset(cpu);
521 void cpu_synchronize_all_post_init(void)
526 cpu_synchronize_post_init(cpu);
528 if (hax_enabled() && hax_ug_platform())
529 hax_cpu_synchronize_post_init(cpu);
534 static int do_vm_stop(RunState state)
538 if (runstate_is_running()) {
542 vm_state_notify(0, state);
543 monitor_protocol_event(QEVENT_STOP, NULL);
547 ret = bdrv_flush_all();
552 static bool cpu_can_run(CPUState *cpu)
557 if (cpu_is_stopped(cpu)) {
563 static void cpu_handle_guest_debug(CPUState *cpu)
565 gdb_set_stop_cpu(cpu);
566 qemu_system_debug_request();
570 static void cpu_signal(int sig)
573 cpu_exit(current_cpu);
579 static void sigbus_reraise(void)
582 struct sigaction action;
584 memset(&action, 0, sizeof(action));
585 action.sa_handler = SIG_DFL;
586 if (!sigaction(SIGBUS, &action, NULL)) {
589 sigaddset(&set, SIGBUS);
590 sigprocmask(SIG_UNBLOCK, &set, NULL);
592 perror("Failed to re-raise SIGBUS!\n");
596 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
599 if (kvm_on_sigbus(siginfo->ssi_code,
600 (void *)(intptr_t)siginfo->ssi_addr)) {
605 static void qemu_init_sigbus(void)
607 struct sigaction action;
609 memset(&action, 0, sizeof(action));
610 action.sa_flags = SA_SIGINFO;
611 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
612 sigaction(SIGBUS, &action, NULL);
614 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
617 static void qemu_kvm_eat_signals(CPUState *cpu)
619 struct timespec ts = { 0, 0 };
625 sigemptyset(&waitset);
626 sigaddset(&waitset, SIG_IPI);
627 sigaddset(&waitset, SIGBUS);
630 r = sigtimedwait(&waitset, &siginfo, &ts);
631 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
632 perror("sigtimedwait");
638 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
646 r = sigpending(&chkset);
648 perror("sigpending");
651 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
654 #else /* !CONFIG_LINUX */
656 static void qemu_init_sigbus(void)
660 static void qemu_kvm_eat_signals(CPUState *cpu)
663 #endif /* !CONFIG_LINUX */
666 static void dummy_signal(int sig)
670 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
674 struct sigaction sigact;
676 memset(&sigact, 0, sizeof(sigact));
677 sigact.sa_handler = dummy_signal;
678 sigaction(SIG_IPI, &sigact, NULL);
680 pthread_sigmask(SIG_BLOCK, NULL, &set);
681 sigdelset(&set, SIG_IPI);
682 sigdelset(&set, SIGBUS);
683 r = kvm_set_signal_mask(cpu, &set);
685 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
690 static void qemu_tcg_init_cpu_signals(void)
693 struct sigaction sigact;
695 memset(&sigact, 0, sizeof(sigact));
696 sigact.sa_handler = cpu_signal;
697 sigaction(SIG_IPI, &sigact, NULL);
700 sigaddset(&set, SIG_IPI);
701 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
705 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
710 static void qemu_tcg_init_cpu_signals(void)
715 static QemuMutex qemu_global_mutex;
716 static QemuCond qemu_io_proceeded_cond;
717 static bool iothread_requesting_mutex;
719 static QemuThread io_thread;
721 static QemuThread *tcg_cpu_thread;
722 static QemuCond *tcg_halt_cond;
725 static QemuCond qemu_cpu_cond;
727 static QemuCond qemu_pause_cond;
728 static QemuCond qemu_work_cond;
730 void qemu_init_cpu_loop(void)
733 qemu_cond_init(&qemu_cpu_cond);
734 qemu_cond_init(&qemu_pause_cond);
735 qemu_cond_init(&qemu_work_cond);
736 qemu_cond_init(&qemu_io_proceeded_cond);
737 qemu_mutex_init(&qemu_global_mutex);
739 qemu_thread_get_self(&io_thread);
742 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
744 struct qemu_work_item wi;
746 if (qemu_cpu_is_self(cpu)) {
754 if (cpu->queued_work_first == NULL) {
755 cpu->queued_work_first = &wi;
757 cpu->queued_work_last->next = &wi;
759 cpu->queued_work_last = &wi;
765 CPUState *self_cpu = current_cpu;
767 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
768 current_cpu = self_cpu;
772 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
774 struct qemu_work_item *wi;
776 if (qemu_cpu_is_self(cpu)) {
781 wi = g_malloc0(sizeof(struct qemu_work_item));
785 if (cpu->queued_work_first == NULL) {
786 cpu->queued_work_first = wi;
788 cpu->queued_work_last->next = wi;
790 cpu->queued_work_last = wi;
797 static void flush_queued_work(CPUState *cpu)
799 struct qemu_work_item *wi;
801 if (cpu->queued_work_first == NULL) {
805 while ((wi = cpu->queued_work_first)) {
806 cpu->queued_work_first = wi->next;
813 cpu->queued_work_last = NULL;
814 qemu_cond_broadcast(&qemu_work_cond);
817 static void qemu_wait_io_event_common(CPUState *cpu)
822 qemu_cond_signal(&qemu_pause_cond);
824 flush_queued_work(cpu);
825 cpu->thread_kicked = false;
828 static void qemu_tcg_wait_io_event(void)
832 while (all_cpu_threads_idle()) {
833 /* Start accounting real time to the virtual clock if the CPUs
835 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
836 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
839 while (iothread_requesting_mutex) {
840 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
844 qemu_wait_io_event_common(cpu);
849 static void qemu_hax_wait_io_event(CPUState *cpu)
851 while (cpu_thread_is_idle(cpu)) {
852 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
855 qemu_wait_io_event_common(cpu);
859 static void qemu_kvm_wait_io_event(CPUState *cpu)
861 while (cpu_thread_is_idle(cpu)) {
862 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
865 qemu_kvm_eat_signals(cpu);
866 qemu_wait_io_event_common(cpu);
869 static void *qemu_kvm_cpu_thread_fn(void *arg)
874 qemu_mutex_lock(&qemu_global_mutex);
875 qemu_thread_get_self(cpu->thread);
876 cpu->thread_id = qemu_get_thread_id();
879 r = kvm_init_vcpu(cpu);
881 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
885 qemu_kvm_init_cpu_signals(cpu);
887 /* signal CPU creation */
889 qemu_cond_signal(&qemu_cpu_cond);
892 if (cpu_can_run(cpu)) {
893 r = kvm_cpu_exec(cpu);
894 if (r == EXCP_DEBUG) {
895 cpu_handle_guest_debug(cpu);
898 qemu_kvm_wait_io_event(cpu);
904 static void *qemu_dummy_cpu_thread_fn(void *arg)
907 fprintf(stderr, "qtest is not supported under Windows\n");
914 qemu_mutex_lock_iothread();
915 qemu_thread_get_self(cpu->thread);
916 cpu->thread_id = qemu_get_thread_id();
918 sigemptyset(&waitset);
919 sigaddset(&waitset, SIG_IPI);
921 /* signal CPU creation */
923 qemu_cond_signal(&qemu_cpu_cond);
928 qemu_mutex_unlock_iothread();
931 r = sigwait(&waitset, &sig);
932 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
937 qemu_mutex_lock_iothread();
939 qemu_wait_io_event_common(cpu);
946 static void tcg_exec_all(void);
948 static void *qemu_tcg_cpu_thread_fn(void *arg)
952 qemu_tcg_init_cpu_signals();
953 qemu_thread_get_self(cpu->thread);
955 qemu_mutex_lock(&qemu_global_mutex);
957 cpu->thread_id = qemu_get_thread_id();
960 qemu_cond_signal(&qemu_cpu_cond);
962 /* wait for initial kick-off after machine start */
963 while (QTAILQ_FIRST(&cpus)->stopped) {
964 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
966 /* process any pending work */
968 qemu_wait_io_event_common(cpu);
976 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
979 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
982 qemu_tcg_wait_io_event();
989 static void *qemu_hax_cpu_thread_fn(void *arg)
993 qemu_thread_get_self(cpu->thread);
994 qemu_mutex_lock(&qemu_global_mutex);
996 cpu->thread_id = qemu_get_thread_id();
1002 qemu_cond_signal(&qemu_cpu_cond);
1005 if (cpu_can_run(cpu)) {
1006 r = hax_smp_cpu_exec(cpu);
1007 if (r == EXCP_DEBUG) {
1008 cpu_handle_guest_debug(cpu);
1011 qemu_hax_wait_io_event(cpu);
1017 static void qemu_cpu_kick_thread(CPUState *cpu)
1022 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1024 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1027 /* The cpu thread cannot catch it reliably when shutdown the guest on Mac.
1028 * We can double check it and resend it
1031 #ifdef CONFIG_DARWIN
1035 if (hax_enabled() && hax_ug_platform())
1036 cpu->exit_request = 1;
1039 if (!qemu_cpu_is_self(cpu)) {
1042 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
1043 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1048 /* On multi-core systems, we are not sure that the thread is actually
1049 * suspended until we can get the context.
1051 tcgContext.ContextFlags = CONTEXT_CONTROL;
1052 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1057 if(hax_enabled() && hax_ug_platform())
1058 cpu->exit_request = 1;
1060 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
1061 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1069 void qemu_cpu_kick(CPUState *cpu)
1071 qemu_cond_broadcast(cpu->halt_cond);
1073 if (((hax_enabled() && hax_ug_platform()) || !tcg_enabled()) && !cpu->thread_kicked) {
1075 if (!tcg_enabled() && !cpu->thread_kicked) {
1077 qemu_cpu_kick_thread(cpu);
1078 cpu->thread_kicked = true;
1082 void qemu_cpu_kick_self(void)
1085 assert(current_cpu);
1087 if (!current_cpu->thread_kicked) {
1088 qemu_cpu_kick_thread(current_cpu);
1089 current_cpu->thread_kicked = true;
1096 bool qemu_cpu_is_self(CPUState *cpu)
1098 return qemu_thread_is_self(cpu->thread);
1101 static bool qemu_in_vcpu_thread(void)
1103 return current_cpu && qemu_cpu_is_self(current_cpu);
1106 void qemu_mutex_lock_iothread(void)
1109 if ((hax_enabled() && hax_ug_platform()) || !tcg_enabled()) {
1111 if (!tcg_enabled()) {
1113 qemu_mutex_lock(&qemu_global_mutex);
1115 iothread_requesting_mutex = true;
1116 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1117 qemu_cpu_kick_thread(first_cpu);
1118 qemu_mutex_lock(&qemu_global_mutex);
1120 iothread_requesting_mutex = false;
1121 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1125 void qemu_mutex_unlock_iothread(void)
1127 qemu_mutex_unlock(&qemu_global_mutex);
1130 static int all_vcpus_paused(void)
1135 if (!cpu->stopped) {
1143 void pause_all_vcpus(void)
1147 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1153 if (qemu_in_vcpu_thread()) {
1155 if (!kvm_enabled()) {
1158 cpu->stopped = true;
1164 while (!all_vcpus_paused()) {
1165 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1172 void cpu_resume(CPUState *cpu)
1175 cpu->stopped = false;
1179 void resume_all_vcpus(void)
1183 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1189 /* For temporary buffers for forming a name */
1190 #define VCPU_THREAD_NAME_SIZE 16
1192 static void qemu_tcg_init_vcpu(CPUState *cpu)
1198 char thread_name[VCPU_THREAD_NAME_SIZE];
1200 tcg_cpu_address_space_init(cpu, cpu->as);
1202 /* share a single thread for all cpus with TCG */
1203 if (!tcg_cpu_thread) {
1204 cpu->thread = g_malloc0(sizeof(QemuThread));
1205 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1206 qemu_cond_init(cpu->halt_cond);
1207 tcg_halt_cond = cpu->halt_cond;
1208 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1210 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1211 cpu, QEMU_THREAD_JOINABLE);
1213 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1215 while (!cpu->created) {
1216 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1218 tcg_cpu_thread = cpu->thread;
1220 cpu->thread = tcg_cpu_thread;
1221 cpu->halt_cond = tcg_halt_cond;
1226 static void qemu_hax_start_vcpu(CPUState *cpu)
1228 char thread_name[VCPU_THREAD_NAME_SIZE];
1230 cpu->thread = g_malloc0(sizeof(QemuThread));
1231 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1232 qemu_cond_init(cpu->halt_cond);
1234 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1237 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1238 cpu, QEMU_THREAD_JOINABLE);
1240 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1242 while (!cpu->created) {
1243 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1248 static void qemu_kvm_start_vcpu(CPUState *cpu)
1250 char thread_name[VCPU_THREAD_NAME_SIZE];
1252 cpu->thread = g_malloc0(sizeof(QemuThread));
1253 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1254 qemu_cond_init(cpu->halt_cond);
1255 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1257 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1258 cpu, QEMU_THREAD_JOINABLE);
1259 while (!cpu->created) {
1260 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1264 static void qemu_dummy_start_vcpu(CPUState *cpu)
1266 char thread_name[VCPU_THREAD_NAME_SIZE];
1268 cpu->thread = g_malloc0(sizeof(QemuThread));
1269 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1270 qemu_cond_init(cpu->halt_cond);
1271 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1273 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1274 QEMU_THREAD_JOINABLE);
1275 while (!cpu->created) {
1276 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1280 void qemu_init_vcpu(CPUState *cpu)
1282 cpu->nr_cores = smp_cores;
1283 cpu->nr_threads = smp_threads;
1284 cpu->stopped = true;
1286 if (kvm_enabled()) {
1287 qemu_kvm_start_vcpu(cpu);
1289 } else if (hax_enabled() && hax_ug_platform()) {
1290 qemu_hax_start_vcpu(cpu);
1292 } else if (tcg_enabled()) {
1293 qemu_tcg_init_vcpu(cpu);
1295 qemu_dummy_start_vcpu(cpu);
1299 void cpu_stop_current(void)
1302 current_cpu->stop = false;
1303 current_cpu->stopped = true;
1304 cpu_exit(current_cpu);
1305 qemu_cond_signal(&qemu_pause_cond);
1309 int vm_stop(RunState state)
1311 if (qemu_in_vcpu_thread()) {
1312 qemu_system_vmstop_request(state);
1314 * FIXME: should not return to device code in case
1315 * vm_stop() has been requested.
1321 return do_vm_stop(state);
1324 /* does a state transition even if the VM is already stopped,
1325 current state is forgotten forever */
1326 int vm_stop_force_state(RunState state)
1328 if (runstate_is_running()) {
1329 return vm_stop(state);
1331 runstate_set(state);
1332 /* Make sure to return an error if the flush in a previous vm_stop()
1334 return bdrv_flush_all();
1338 static int tcg_cpu_exec(CPUArchState *env)
1340 CPUState *cpu = ENV_GET_CPU(env);
1342 #ifdef CONFIG_PROFILER
1346 #ifdef CONFIG_PROFILER
1347 ti = profile_getclock();
1353 qemu_icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
1354 cpu->icount_decr.u16.low = 0;
1355 cpu->icount_extra = 0;
1356 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1358 /* Maintain prior (possibly buggy) behaviour where if no deadline
1359 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1360 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1363 if ((deadline < 0) || (deadline > INT32_MAX)) {
1364 deadline = INT32_MAX;
1367 count = qemu_icount_round(deadline);
1368 qemu_icount += count;
1369 decr = (count > 0xffff) ? 0xffff : count;
1371 cpu->icount_decr.u16.low = decr;
1372 cpu->icount_extra = count;
1374 ret = cpu_exec(env);
1375 #ifdef CONFIG_PROFILER
1376 qemu_time += profile_getclock() - ti;
1379 /* Fold pending instructions back into the
1380 instruction counter, and clear the interrupt flag. */
1381 qemu_icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
1382 cpu->icount_decr.u32 = 0;
1383 cpu->icount_extra = 0;
1388 static void tcg_exec_all(void)
1392 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1393 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1395 if (next_cpu == NULL) {
1396 next_cpu = first_cpu;
1398 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1399 CPUState *cpu = next_cpu;
1400 CPUArchState *env = cpu->env_ptr;
1402 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1403 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1405 if (cpu_can_run(cpu)) {
1406 r = tcg_cpu_exec(env);
1407 if (r == EXCP_DEBUG) {
1408 cpu_handle_guest_debug(cpu);
1411 } else if (cpu->stop || cpu->stopped) {
1418 void set_numa_modes(void)
1424 for (i = 0; i < nb_numa_nodes; i++) {
1425 if (test_bit(cpu->cpu_index, node_cpumask[i])) {
1432 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1434 /* XXX: implement xxx_cpu_list for targets that still miss it */
1435 #if defined(cpu_list)
1436 cpu_list(f, cpu_fprintf);
1440 CpuInfoList *qmp_query_cpus(Error **errp)
1442 CpuInfoList *head = NULL, *cur_item = NULL;
1447 #if defined(TARGET_I386)
1448 X86CPU *x86_cpu = X86_CPU(cpu);
1449 CPUX86State *env = &x86_cpu->env;
1450 #elif defined(TARGET_PPC)
1451 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1452 CPUPPCState *env = &ppc_cpu->env;
1453 #elif defined(TARGET_SPARC)
1454 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1455 CPUSPARCState *env = &sparc_cpu->env;
1456 #elif defined(TARGET_MIPS)
1457 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1458 CPUMIPSState *env = &mips_cpu->env;
1461 cpu_synchronize_state(cpu);
1463 info = g_malloc0(sizeof(*info));
1464 info->value = g_malloc0(sizeof(*info->value));
1465 info->value->CPU = cpu->cpu_index;
1466 info->value->current = (cpu == first_cpu);
1467 info->value->halted = cpu->halted;
1468 info->value->thread_id = cpu->thread_id;
1469 #if defined(TARGET_I386)
1470 info->value->has_pc = true;
1471 info->value->pc = env->eip + env->segs[R_CS].base;
1472 #elif defined(TARGET_PPC)
1473 info->value->has_nip = true;
1474 info->value->nip = env->nip;
1475 #elif defined(TARGET_SPARC)
1476 info->value->has_pc = true;
1477 info->value->pc = env->pc;
1478 info->value->has_npc = true;
1479 info->value->npc = env->npc;
1480 #elif defined(TARGET_MIPS)
1481 info->value->has_PC = true;
1482 info->value->PC = env->active_tc.PC;
1485 /* XXX: waiting for the qapi to support GSList */
1487 head = cur_item = info;
1489 cur_item->next = info;
1497 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1498 bool has_cpu, int64_t cpu_index, Error **errp)
1509 cpu = qemu_get_cpu(cpu_index);
1511 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1516 f = fopen(filename, "wb");
1518 error_setg_file_open(errp, errno, filename);
1526 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1527 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1530 if (fwrite(buf, 1, l, f) != l) {
1531 error_set(errp, QERR_IO_ERROR);
1542 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1549 f = fopen(filename, "wb");
1551 error_setg_file_open(errp, errno, filename);
1559 cpu_physical_memory_rw(addr, buf, l, 0);
1560 if (fwrite(buf, 1, l, f) != l) {
1561 error_set(errp, QERR_IO_ERROR);
1572 void qmp_inject_nmi(Error **errp)
1574 #if defined(TARGET_I386)
1578 X86CPU *cpu = X86_CPU(cs);
1580 if (!cpu->apic_state) {
1581 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1583 apic_deliver_nmi(cpu->apic_state);
1586 #elif defined(TARGET_S390X)
1592 if (cpu->env.cpu_num == monitor_get_cpu_index()) {
1593 if (s390_cpu_restart(S390_CPU(cs)) == -1) {
1594 error_set(errp, QERR_UNSUPPORTED);
1601 error_set(errp, QERR_UNSUPPORTED);