4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "sysemu/sysemu.h"
30 #include "exec/gdbstub.h"
31 #include "sysemu/dma.h"
32 #include "sysemu/kvm.h"
33 #include "sysemu/hax.h"
34 #include "qmp-commands.h"
36 #include "qemu/thread.h"
37 #include "sysemu/cpus.h"
38 #include "sysemu/qtest.h"
39 #include "qemu/main-loop.h"
40 #include "qemu/bitmap.h"
43 #include "qemu/compatfd.h"
48 #include <sys/prctl.h>
51 #define PR_MCE_KILL 33
54 #ifndef PR_MCE_KILL_SET
55 #define PR_MCE_KILL_SET 1
58 #ifndef PR_MCE_KILL_EARLY
59 #define PR_MCE_KILL_EARLY 1
62 #endif /* CONFIG_LINUX */
64 static CPUArchState *next_cpu;
66 static bool cpu_thread_is_idle(CPUArchState *env)
68 CPUState *cpu = ENV_GET_CPU(env);
70 if (cpu->stop || cpu->queued_work_first) {
73 if (cpu->stopped || !runstate_is_running()) {
76 if (!cpu->halted || qemu_cpu_has_work(cpu) ||
77 kvm_async_interrupts_enabled() || hax_enabled()) {
83 static bool all_cpu_threads_idle(void)
87 for (env = first_cpu; env != NULL; env = env->next_cpu) {
88 if (!cpu_thread_is_idle(env)) {
95 /***********************************************************/
96 /* guest cycle counter */
98 /* Conversion factor from emulated instructions to virtual clock ticks. */
99 static int icount_time_shift;
100 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
101 #define MAX_ICOUNT_SHIFT 10
102 /* Compensate for varying guest execution speed. */
103 static int64_t qemu_icount_bias;
104 static QEMUTimer *icount_rt_timer;
105 static QEMUTimer *icount_vm_timer;
106 static QEMUTimer *icount_warp_timer;
107 static int64_t vm_clock_warp_start;
108 static int64_t qemu_icount;
110 typedef struct TimersState {
111 int64_t cpu_ticks_prev;
112 int64_t cpu_ticks_offset;
113 int64_t cpu_clock_offset;
114 int32_t cpu_ticks_enabled;
118 TimersState timers_state;
120 /* Return the virtual CPU time, based on the instruction counter. */
121 int64_t cpu_get_icount(void)
124 CPUArchState *env = cpu_single_env;
126 icount = qemu_icount;
128 if (!can_do_io(env)) {
129 fprintf(stderr, "Bad clock read\n");
131 icount -= (env->icount_decr.u16.low + env->icount_extra);
133 return qemu_icount_bias + (icount << icount_time_shift);
136 /* return the host CPU cycle counter and handle stop/restart */
137 int64_t cpu_get_ticks(void)
140 return cpu_get_icount();
142 if (!timers_state.cpu_ticks_enabled) {
143 return timers_state.cpu_ticks_offset;
146 ticks = cpu_get_real_ticks();
147 if (timers_state.cpu_ticks_prev > ticks) {
148 /* Note: non increasing ticks may happen if the host uses
150 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
152 timers_state.cpu_ticks_prev = ticks;
153 return ticks + timers_state.cpu_ticks_offset;
157 /* return the host CPU monotonic timer and handle stop/restart */
158 int64_t cpu_get_clock(void)
161 if (!timers_state.cpu_ticks_enabled) {
162 return timers_state.cpu_clock_offset;
165 return ti + timers_state.cpu_clock_offset;
169 /* enable cpu_get_ticks() */
170 void cpu_enable_ticks(void)
172 if (!timers_state.cpu_ticks_enabled) {
173 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
174 timers_state.cpu_clock_offset -= get_clock();
175 timers_state.cpu_ticks_enabled = 1;
179 /* disable cpu_get_ticks() : the clock is stopped. You must not call
180 cpu_get_ticks() after that. */
181 void cpu_disable_ticks(void)
183 if (timers_state.cpu_ticks_enabled) {
184 timers_state.cpu_ticks_offset = cpu_get_ticks();
185 timers_state.cpu_clock_offset = cpu_get_clock();
186 timers_state.cpu_ticks_enabled = 0;
190 /* Correlation between real and virtual time is always going to be
191 fairly approximate, so ignore small variation.
192 When the guest is idle real and virtual time will be aligned in
194 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
196 static void icount_adjust(void)
201 static int64_t last_delta;
202 /* If the VM is not running, then do nothing. */
203 if (!runstate_is_running()) {
206 cur_time = cpu_get_clock();
207 cur_icount = qemu_get_clock_ns(vm_clock);
208 delta = cur_icount - cur_time;
209 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
211 && last_delta + ICOUNT_WOBBLE < delta * 2
212 && icount_time_shift > 0) {
213 /* The guest is getting too far ahead. Slow time down. */
217 && last_delta - ICOUNT_WOBBLE > delta * 2
218 && icount_time_shift < MAX_ICOUNT_SHIFT) {
219 /* The guest is getting too far behind. Speed time up. */
223 qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
226 static void icount_adjust_rt(void *opaque)
228 qemu_mod_timer(icount_rt_timer,
229 qemu_get_clock_ms(rt_clock) + 1000);
233 static void icount_adjust_vm(void *opaque)
235 qemu_mod_timer(icount_vm_timer,
236 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
240 static int64_t qemu_icount_round(int64_t count)
242 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
245 static void icount_warp_rt(void *opaque)
247 if (vm_clock_warp_start == -1) {
251 if (runstate_is_running()) {
252 int64_t clock = qemu_get_clock_ns(rt_clock);
253 int64_t warp_delta = clock - vm_clock_warp_start;
254 if (use_icount == 1) {
255 qemu_icount_bias += warp_delta;
258 * In adaptive mode, do not let the vm_clock run too
259 * far ahead of real time.
261 int64_t cur_time = cpu_get_clock();
262 int64_t cur_icount = qemu_get_clock_ns(vm_clock);
263 int64_t delta = cur_time - cur_icount;
264 qemu_icount_bias += MIN(warp_delta, delta);
266 if (qemu_clock_expired(vm_clock)) {
270 vm_clock_warp_start = -1;
273 void qtest_clock_warp(int64_t dest)
275 int64_t clock = qemu_get_clock_ns(vm_clock);
276 assert(qtest_enabled());
277 while (clock < dest) {
278 int64_t deadline = qemu_clock_deadline(vm_clock);
279 int64_t warp = MIN(dest - clock, deadline);
280 qemu_icount_bias += warp;
281 qemu_run_timers(vm_clock);
282 clock = qemu_get_clock_ns(vm_clock);
287 void qemu_clock_warp(QEMUClock *clock)
292 * There are too many global variables to make the "warp" behavior
293 * applicable to other clocks. But a clock argument removes the
294 * need for if statements all over the place.
296 if (clock != vm_clock || !use_icount) {
301 * If the CPUs have been sleeping, advance the vm_clock timer now. This
302 * ensures that the deadline for the timer is computed correctly below.
303 * This also makes sure that the insn counter is synchronized before the
304 * CPU starts running, in case the CPU is woken by an event other than
305 * the earliest vm_clock timer.
307 icount_warp_rt(NULL);
308 if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
309 qemu_del_timer(icount_warp_timer);
313 if (qtest_enabled()) {
314 /* When testing, qtest commands advance icount. */
318 vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
319 deadline = qemu_clock_deadline(vm_clock);
322 * Ensure the vm_clock proceeds even when the virtual CPU goes to
323 * sleep. Otherwise, the CPU might be waiting for a future timer
324 * interrupt to wake it up, but the interrupt never comes because
325 * the vCPU isn't running any insns and thus doesn't advance the
328 * An extreme solution for this problem would be to never let VCPUs
329 * sleep in icount mode if there is a pending vm_clock timer; rather
330 * time could just advance to the next vm_clock event. Instead, we
331 * do stop VCPUs and only advance vm_clock after some "real" time,
332 * (related to the time left until the next event) has passed. This
333 * rt_clock timer will do this. This avoids that the warps are too
334 * visible externally---for example, you will not be sending network
335 * packets continuously instead of every 100ms.
337 qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
343 static const VMStateDescription vmstate_timers = {
346 .minimum_version_id = 1,
347 .minimum_version_id_old = 1,
348 .fields = (VMStateField[]) {
349 VMSTATE_INT64(cpu_ticks_offset, TimersState),
350 VMSTATE_INT64(dummy, TimersState),
351 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
352 VMSTATE_END_OF_LIST()
356 void configure_icount(const char *option)
358 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
363 icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
364 if (strcmp(option, "auto") != 0) {
365 icount_time_shift = strtol(option, NULL, 0);
372 /* 125MIPS seems a reasonable initial guess at the guest speed.
373 It will be corrected fairly quickly anyway. */
374 icount_time_shift = 3;
376 /* Have both realtime and virtual time triggers for speed adjustment.
377 The realtime trigger catches emulated time passing too slowly,
378 the virtual time trigger catches emulated time passing too fast.
379 Realtime triggers occur even when idle, so use them less frequently
381 icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
382 qemu_mod_timer(icount_rt_timer,
383 qemu_get_clock_ms(rt_clock) + 1000);
384 icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
385 qemu_mod_timer(icount_vm_timer,
386 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
389 /***********************************************************/
390 void hw_error(const char *fmt, ...)
397 fprintf(stderr, "qemu: hardware error: ");
398 vfprintf(stderr, fmt, ap);
399 fprintf(stderr, "\n");
400 for (env = first_cpu; env != NULL; env = env->next_cpu) {
401 cpu = ENV_GET_CPU(env);
402 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
403 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU);
409 void cpu_synchronize_all_states(void)
413 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
414 cpu_synchronize_state(cpu);
418 void cpu_synchronize_all_post_reset(void)
422 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
423 cpu_synchronize_post_reset(ENV_GET_CPU(cpu));
427 void cpu_synchronize_all_post_init(void)
431 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
432 cpu_synchronize_post_init(ENV_GET_CPU(cpu));
436 bool cpu_is_stopped(CPUState *cpu)
438 return !runstate_is_running() || cpu->stopped;
441 static void do_vm_stop(RunState state)
443 if (runstate_is_running()) {
447 vm_state_notify(0, state);
450 monitor_protocol_event(QEVENT_STOP, NULL);
454 static bool cpu_can_run(CPUState *cpu)
459 if (cpu->stopped || !runstate_is_running()) {
465 static void cpu_handle_guest_debug(CPUArchState *env)
467 CPUState *cpu = ENV_GET_CPU(env);
469 gdb_set_stop_cpu(env);
470 qemu_system_debug_request();
474 static void cpu_signal(int sig)
476 if (cpu_single_env) {
477 cpu_exit(cpu_single_env);
483 static void sigbus_reraise(void)
486 struct sigaction action;
488 memset(&action, 0, sizeof(action));
489 action.sa_handler = SIG_DFL;
490 if (!sigaction(SIGBUS, &action, NULL)) {
493 sigaddset(&set, SIGBUS);
494 sigprocmask(SIG_UNBLOCK, &set, NULL);
496 perror("Failed to re-raise SIGBUS!\n");
500 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
503 if (kvm_on_sigbus(siginfo->ssi_code,
504 (void *)(intptr_t)siginfo->ssi_addr)) {
509 static void qemu_init_sigbus(void)
511 struct sigaction action;
513 memset(&action, 0, sizeof(action));
514 action.sa_flags = SA_SIGINFO;
515 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
516 sigaction(SIGBUS, &action, NULL);
518 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
521 static void qemu_kvm_eat_signals(CPUState *cpu)
523 struct timespec ts = { 0, 0 };
529 sigemptyset(&waitset);
530 sigaddset(&waitset, SIG_IPI);
531 sigaddset(&waitset, SIGBUS);
534 r = sigtimedwait(&waitset, &siginfo, &ts);
535 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
536 perror("sigtimedwait");
542 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
550 r = sigpending(&chkset);
552 perror("sigpending");
555 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
558 #else /* !CONFIG_LINUX */
560 static void qemu_init_sigbus(void)
564 static void qemu_kvm_eat_signals(CPUState *cpu)
567 #endif /* !CONFIG_LINUX */
570 static void dummy_signal(int sig)
574 static void qemu_kvm_init_cpu_signals(CPUArchState *env)
578 struct sigaction sigact;
580 memset(&sigact, 0, sizeof(sigact));
581 sigact.sa_handler = dummy_signal;
582 sigaction(SIG_IPI, &sigact, NULL);
584 pthread_sigmask(SIG_BLOCK, NULL, &set);
585 sigdelset(&set, SIG_IPI);
586 sigdelset(&set, SIGBUS);
587 r = kvm_set_signal_mask(env, &set);
589 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
594 static void qemu_tcg_init_cpu_signals(void)
597 struct sigaction sigact;
599 memset(&sigact, 0, sizeof(sigact));
600 sigact.sa_handler = cpu_signal;
601 sigaction(SIG_IPI, &sigact, NULL);
604 sigaddset(&set, SIG_IPI);
605 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
609 static void qemu_kvm_init_cpu_signals(CPUArchState *env)
614 static void qemu_tcg_init_cpu_signals(void)
619 static QemuMutex qemu_global_mutex;
620 static QemuCond qemu_io_proceeded_cond;
621 static bool iothread_requesting_mutex;
623 static QemuThread io_thread;
625 static QemuThread *tcg_cpu_thread;
626 static QemuCond *tcg_halt_cond;
629 static QemuCond qemu_cpu_cond;
631 static QemuCond qemu_pause_cond;
632 static QemuCond qemu_work_cond;
634 void qemu_init_cpu_loop(void)
637 qemu_cond_init(&qemu_cpu_cond);
638 qemu_cond_init(&qemu_pause_cond);
639 qemu_cond_init(&qemu_work_cond);
640 qemu_cond_init(&qemu_io_proceeded_cond);
641 qemu_mutex_init(&qemu_global_mutex);
643 qemu_thread_get_self(&io_thread);
646 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
648 struct qemu_work_item wi;
650 if (qemu_cpu_is_self(cpu)) {
657 if (cpu->queued_work_first == NULL) {
658 cpu->queued_work_first = &wi;
660 cpu->queued_work_last->next = &wi;
662 cpu->queued_work_last = &wi;
668 CPUArchState *self_env = cpu_single_env;
670 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
671 cpu_single_env = self_env;
675 static void flush_queued_work(CPUState *cpu)
677 struct qemu_work_item *wi;
679 if (cpu->queued_work_first == NULL) {
683 while ((wi = cpu->queued_work_first)) {
684 cpu->queued_work_first = wi->next;
688 cpu->queued_work_last = NULL;
689 qemu_cond_broadcast(&qemu_work_cond);
692 static void qemu_wait_io_event_common(CPUState *cpu)
697 qemu_cond_signal(&qemu_pause_cond);
699 flush_queued_work(cpu);
700 cpu->thread_kicked = false;
703 static void qemu_tcg_wait_io_event(void)
707 while (all_cpu_threads_idle()) {
708 /* Start accounting real time to the virtual clock if the CPUs
710 qemu_clock_warp(vm_clock);
711 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
714 while (iothread_requesting_mutex) {
715 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
718 for (env = first_cpu; env != NULL; env = env->next_cpu) {
719 qemu_wait_io_event_common(ENV_GET_CPU(env));
723 static void qemu_kvm_wait_io_event(CPUArchState *env)
725 CPUState *cpu = ENV_GET_CPU(env);
727 while (cpu_thread_is_idle(env)) {
728 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
731 qemu_kvm_eat_signals(cpu);
732 qemu_wait_io_event_common(cpu);
735 static void *qemu_kvm_cpu_thread_fn(void *arg)
737 CPUArchState *env = arg;
738 CPUState *cpu = ENV_GET_CPU(env);
741 qemu_mutex_lock(&qemu_global_mutex);
742 qemu_thread_get_self(cpu->thread);
743 cpu->thread_id = qemu_get_thread_id();
744 cpu_single_env = env;
746 r = kvm_init_vcpu(cpu);
748 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
752 qemu_kvm_init_cpu_signals(env);
754 /* signal CPU creation */
756 qemu_cond_signal(&qemu_cpu_cond);
759 if (cpu_can_run(cpu)) {
760 r = kvm_cpu_exec(env);
761 if (r == EXCP_DEBUG) {
762 cpu_handle_guest_debug(env);
765 qemu_kvm_wait_io_event(env);
771 static void *qemu_dummy_cpu_thread_fn(void *arg)
774 fprintf(stderr, "qtest is not supported under Windows\n");
777 CPUArchState *env = arg;
778 CPUState *cpu = ENV_GET_CPU(env);
782 qemu_mutex_lock_iothread();
783 qemu_thread_get_self(cpu->thread);
784 cpu->thread_id = qemu_get_thread_id();
786 sigemptyset(&waitset);
787 sigaddset(&waitset, SIG_IPI);
789 /* signal CPU creation */
791 qemu_cond_signal(&qemu_cpu_cond);
793 cpu_single_env = env;
795 cpu_single_env = NULL;
796 qemu_mutex_unlock_iothread();
799 r = sigwait(&waitset, &sig);
800 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
805 qemu_mutex_lock_iothread();
806 cpu_single_env = env;
807 qemu_wait_io_event_common(cpu);
814 static void tcg_exec_all(void);
816 static void tcg_signal_cpu_creation(CPUState *cpu, void *data)
818 cpu->thread_id = qemu_get_thread_id();
822 static void *qemu_tcg_cpu_thread_fn(void *arg)
827 qemu_tcg_init_cpu_signals();
828 qemu_thread_get_self(cpu->thread);
830 qemu_mutex_lock(&qemu_global_mutex);
831 qemu_for_each_cpu(tcg_signal_cpu_creation, NULL);
832 qemu_cond_signal(&qemu_cpu_cond);
834 /* wait for initial kick-off after machine start */
835 while (ENV_GET_CPU(first_cpu)->stopped) {
836 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
838 /* process any pending work */
839 for (env = first_cpu; env != NULL; env = env->next_cpu) {
840 qemu_wait_io_event_common(ENV_GET_CPU(env));
846 if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
849 qemu_tcg_wait_io_event();
855 static void qemu_cpu_kick_thread(CPUState *cpu)
860 err = pthread_kill(cpu->thread->thread, SIG_IPI);
862 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
865 /* The cpu thread cannot catch it reliably when shutdown the guest on Mac.
866 * We can double check it and resend it
873 if (!qemu_cpu_is_self(cpu)) {
876 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
877 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
882 /* On multi-core systems, we are not sure that the thread is actually
883 * suspended until we can get the context.
885 tcgContext.ContextFlags = CONTEXT_CONTROL;
886 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
892 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
893 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
901 void qemu_cpu_kick(CPUState *cpu)
903 qemu_cond_broadcast(cpu->halt_cond);
904 if (!tcg_enabled() && !cpu->thread_kicked) {
905 qemu_cpu_kick_thread(cpu);
906 cpu->thread_kicked = true;
910 void qemu_cpu_kick_self(void)
913 assert(cpu_single_env);
914 CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
916 if (!cpu_single_cpu->thread_kicked) {
917 qemu_cpu_kick_thread(cpu_single_cpu);
918 cpu_single_cpu->thread_kicked = true;
925 bool qemu_cpu_is_self(CPUState *cpu)
927 return qemu_thread_is_self(cpu->thread);
930 static bool qemu_in_vcpu_thread(void)
932 return cpu_single_env && qemu_cpu_is_self(ENV_GET_CPU(cpu_single_env));
935 void qemu_mutex_lock_iothread(void)
937 if (!tcg_enabled()) {
938 qemu_mutex_lock(&qemu_global_mutex);
940 iothread_requesting_mutex = true;
941 if (qemu_mutex_trylock(&qemu_global_mutex)) {
942 qemu_cpu_kick_thread(ENV_GET_CPU(first_cpu));
943 qemu_mutex_lock(&qemu_global_mutex);
945 iothread_requesting_mutex = false;
946 qemu_cond_broadcast(&qemu_io_proceeded_cond);
950 void qemu_mutex_unlock_iothread(void)
952 qemu_mutex_unlock(&qemu_global_mutex);
955 static int all_vcpus_paused(void)
957 CPUArchState *penv = first_cpu;
960 CPUState *pcpu = ENV_GET_CPU(penv);
961 if (!pcpu->stopped) {
964 penv = penv->next_cpu;
970 void pause_all_vcpus(void)
972 CPUArchState *penv = first_cpu;
974 qemu_clock_enable(vm_clock, false);
976 CPUState *pcpu = ENV_GET_CPU(penv);
979 penv = penv->next_cpu;
982 if (qemu_in_vcpu_thread()) {
984 if (!kvm_enabled()) {
987 CPUState *pcpu = ENV_GET_CPU(penv);
989 pcpu->stopped = true;
990 penv = penv->next_cpu;
996 while (!all_vcpus_paused()) {
997 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1000 qemu_cpu_kick(ENV_GET_CPU(penv));
1001 penv = penv->next_cpu;
1006 void cpu_resume(CPUState *cpu)
1009 cpu->stopped = false;
1013 void resume_all_vcpus(void)
1015 CPUArchState *penv = first_cpu;
1017 qemu_clock_enable(vm_clock, true);
1019 CPUState *pcpu = ENV_GET_CPU(penv);
1021 penv = penv->next_cpu;
1025 static void qemu_tcg_init_vcpu(CPUState *cpu)
1029 hax_init_vcpu(cpu->env_ptr);
1031 /* share a single thread for all cpus with TCG */
1032 if (!tcg_cpu_thread) {
1033 cpu->thread = g_malloc0(sizeof(QemuThread));
1034 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1035 qemu_cond_init(cpu->halt_cond);
1036 tcg_halt_cond = cpu->halt_cond;
1037 qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, cpu,
1038 QEMU_THREAD_JOINABLE);
1040 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1042 while (!cpu->created) {
1043 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1045 tcg_cpu_thread = cpu->thread;
1047 cpu->thread = tcg_cpu_thread;
1048 cpu->halt_cond = tcg_halt_cond;
1052 static void qemu_kvm_start_vcpu(CPUArchState *env)
1054 CPUState *cpu = ENV_GET_CPU(env);
1056 cpu->thread = g_malloc0(sizeof(QemuThread));
1057 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1058 qemu_cond_init(cpu->halt_cond);
1059 qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, env,
1060 QEMU_THREAD_JOINABLE);
1061 while (!cpu->created) {
1062 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1066 static void qemu_dummy_start_vcpu(CPUArchState *env)
1068 CPUState *cpu = ENV_GET_CPU(env);
1070 cpu->thread = g_malloc0(sizeof(QemuThread));
1071 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1072 qemu_cond_init(cpu->halt_cond);
1073 qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, env,
1074 QEMU_THREAD_JOINABLE);
1075 while (!cpu->created) {
1076 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1080 void qemu_init_vcpu(void *_env)
1082 CPUArchState *env = _env;
1083 CPUState *cpu = ENV_GET_CPU(env);
1085 cpu->nr_cores = smp_cores;
1086 cpu->nr_threads = smp_threads;
1087 cpu->stopped = true;
1088 if (kvm_enabled()) {
1089 qemu_kvm_start_vcpu(env);
1090 } else if (tcg_enabled()) {
1091 qemu_tcg_init_vcpu(cpu);
1093 qemu_dummy_start_vcpu(env);
1097 void cpu_stop_current(void)
1099 if (cpu_single_env) {
1100 CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
1101 cpu_single_cpu->stop = false;
1102 cpu_single_cpu->stopped = true;
1103 cpu_exit(cpu_single_env);
1104 qemu_cond_signal(&qemu_pause_cond);
1108 void vm_stop(RunState state)
1110 if (qemu_in_vcpu_thread()) {
1111 qemu_system_vmstop_request(state);
1113 * FIXME: should not return to device code in case
1114 * vm_stop() has been requested.
1122 /* does a state transition even if the VM is already stopped,
1123 current state is forgotten forever */
1124 void vm_stop_force_state(RunState state)
1126 if (runstate_is_running()) {
1129 runstate_set(state);
1133 static int tcg_cpu_exec(CPUArchState *env)
1136 #ifdef CONFIG_PROFILER
1140 #ifdef CONFIG_PROFILER
1141 ti = profile_getclock();
1146 qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1147 env->icount_decr.u16.low = 0;
1148 env->icount_extra = 0;
1149 count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1150 qemu_icount += count;
1151 decr = (count > 0xffff) ? 0xffff : count;
1153 env->icount_decr.u16.low = decr;
1154 env->icount_extra = count;
1156 ret = cpu_exec(env);
1157 #ifdef CONFIG_PROFILER
1158 qemu_time += profile_getclock() - ti;
1161 /* Fold pending instructions back into the
1162 instruction counter, and clear the interrupt flag. */
1163 qemu_icount -= (env->icount_decr.u16.low
1164 + env->icount_extra);
1165 env->icount_decr.u32 = 0;
1166 env->icount_extra = 0;
1171 static void tcg_exec_all(void)
1175 /* Account partial waits to the vm_clock. */
1176 qemu_clock_warp(vm_clock);
1178 if (next_cpu == NULL) {
1179 next_cpu = first_cpu;
1181 for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1182 CPUArchState *env = next_cpu;
1183 CPUState *cpu = ENV_GET_CPU(env);
1185 qemu_clock_enable(vm_clock,
1186 (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1188 if (cpu_can_run(cpu)) {
1189 r = tcg_cpu_exec(env);
1190 if (r == EXCP_DEBUG) {
1191 cpu_handle_guest_debug(env);
1194 } else if (cpu->stop || cpu->stopped) {
1201 void set_numa_modes(void)
1207 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1208 cpu = ENV_GET_CPU(env);
1209 for (i = 0; i < nb_numa_nodes; i++) {
1210 if (test_bit(cpu->cpu_index, node_cpumask[i])) {
1217 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1219 /* XXX: implement xxx_cpu_list for targets that still miss it */
1220 #if defined(cpu_list)
1221 cpu_list(f, cpu_fprintf);
1225 CpuInfoList *qmp_query_cpus(Error **errp)
1227 CpuInfoList *head = NULL, *cur_item = NULL;
1230 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1231 CPUState *cpu = ENV_GET_CPU(env);
1234 cpu_synchronize_state(env);
1236 info = g_malloc0(sizeof(*info));
1237 info->value = g_malloc0(sizeof(*info->value));
1238 info->value->CPU = cpu->cpu_index;
1239 info->value->current = (env == first_cpu);
1240 info->value->halted = cpu->halted;
1241 info->value->thread_id = cpu->thread_id;
1242 #if defined(TARGET_I386)
1243 info->value->has_pc = true;
1244 info->value->pc = env->eip + env->segs[R_CS].base;
1245 #elif defined(TARGET_PPC)
1246 info->value->has_nip = true;
1247 info->value->nip = env->nip;
1248 #elif defined(TARGET_SPARC)
1249 info->value->has_pc = true;
1250 info->value->pc = env->pc;
1251 info->value->has_npc = true;
1252 info->value->npc = env->npc;
1253 #elif defined(TARGET_MIPS)
1254 info->value->has_PC = true;
1255 info->value->PC = env->active_tc.PC;
1258 /* XXX: waiting for the qapi to support GSList */
1260 head = cur_item = info;
1262 cur_item->next = info;
1270 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1271 bool has_cpu, int64_t cpu_index, Error **errp)
1283 cpu = qemu_get_cpu(cpu_index);
1285 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1291 f = fopen(filename, "wb");
1293 error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1301 cpu_memory_rw_debug(env, addr, buf, l, 0);
1302 if (fwrite(buf, 1, l, f) != l) {
1303 error_set(errp, QERR_IO_ERROR);
1314 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1321 f = fopen(filename, "wb");
1323 error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1331 cpu_physical_memory_rw(addr, buf, l, 0);
1332 if (fwrite(buf, 1, l, f) != l) {
1333 error_set(errp, QERR_IO_ERROR);
1344 void qmp_inject_nmi(Error **errp)
1346 #if defined(TARGET_I386)
1349 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1350 if (!env->apic_state) {
1351 cpu_interrupt(CPU(x86_env_get_cpu(env)), CPU_INTERRUPT_NMI);
1353 apic_deliver_nmi(env->apic_state);
1357 error_set(errp, QERR_UNSUPPORTED);