4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "sysemu/sysemu.h"
31 #include "exec/gdbstub.h"
32 #include "sysemu/dma.h"
33 #include "sysemu/kvm.h"
34 #include "qmp-commands.h"
36 #include "qemu/thread.h"
37 #include "sysemu/cpus.h"
38 #include "sysemu/qtest.h"
39 #include "qemu/main-loop.h"
40 #include "qemu/bitmap.h"
41 #include "qemu/seqlock.h"
42 #include "qapi-event.h"
45 #include "qemu/compatfd.h"
50 #include <sys/prctl.h>
53 #define PR_MCE_KILL 33
56 #ifndef PR_MCE_KILL_SET
57 #define PR_MCE_KILL_SET 1
60 #ifndef PR_MCE_KILL_EARLY
61 #define PR_MCE_KILL_EARLY 1
64 #endif /* CONFIG_LINUX */
66 static CPUState *next_cpu;
68 bool cpu_is_stopped(CPUState *cpu)
70 return cpu->stopped || !runstate_is_running();
73 static bool cpu_thread_is_idle(CPUState *cpu)
75 if (cpu->stop || cpu->queued_work_first) {
78 if (cpu_is_stopped(cpu)) {
81 if (!cpu->halted || cpu_has_work(cpu) ||
82 kvm_halt_in_kernel()) {
88 static bool all_cpu_threads_idle(void)
93 if (!cpu_thread_is_idle(cpu)) {
100 /***********************************************************/
101 /* guest cycle counter */
103 /* Protected by TimersState seqlock */
105 static int64_t vm_clock_warp_start;
106 /* Conversion factor from emulated instructions to virtual clock ticks. */
107 static int icount_time_shift;
108 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
109 #define MAX_ICOUNT_SHIFT 10
111 static QEMUTimer *icount_rt_timer;
112 static QEMUTimer *icount_vm_timer;
113 static QEMUTimer *icount_warp_timer;
115 typedef struct TimersState {
116 /* Protected by BQL. */
117 int64_t cpu_ticks_prev;
118 int64_t cpu_ticks_offset;
120 /* cpu_clock_offset can be read out of BQL, so protect it with
123 QemuSeqLock vm_clock_seqlock;
124 int64_t cpu_clock_offset;
125 int32_t cpu_ticks_enabled;
128 /* Compensate for varying guest execution speed. */
129 int64_t qemu_icount_bias;
130 /* Only written by TCG thread */
134 static TimersState timers_state;
136 /* Return the virtual CPU time, based on the instruction counter. */
137 static int64_t cpu_get_icount_locked(void)
140 CPUState *cpu = current_cpu;
142 icount = timers_state.qemu_icount;
144 if (!cpu_can_do_io(cpu)) {
145 fprintf(stderr, "Bad clock read\n");
147 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
149 return timers_state.qemu_icount_bias + (icount << icount_time_shift);
152 int64_t cpu_get_icount(void)
158 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
159 icount = cpu_get_icount_locked();
160 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
165 /* return the host CPU cycle counter and handle stop/restart */
166 /* Caller must hold the BQL */
167 int64_t cpu_get_ticks(void)
172 return cpu_get_icount();
175 ticks = timers_state.cpu_ticks_offset;
176 if (timers_state.cpu_ticks_enabled) {
177 ticks += cpu_get_real_ticks();
180 if (timers_state.cpu_ticks_prev > ticks) {
181 /* Note: non increasing ticks may happen if the host uses
183 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
184 ticks = timers_state.cpu_ticks_prev;
187 timers_state.cpu_ticks_prev = ticks;
191 static int64_t cpu_get_clock_locked(void)
195 ticks = timers_state.cpu_clock_offset;
196 if (timers_state.cpu_ticks_enabled) {
197 ticks += get_clock();
203 /* return the host CPU monotonic timer and handle stop/restart */
204 int64_t cpu_get_clock(void)
210 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
211 ti = cpu_get_clock_locked();
212 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
217 /* enable cpu_get_ticks()
218 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
220 void cpu_enable_ticks(void)
222 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
223 seqlock_write_lock(&timers_state.vm_clock_seqlock);
224 if (!timers_state.cpu_ticks_enabled) {
225 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
226 timers_state.cpu_clock_offset -= get_clock();
227 timers_state.cpu_ticks_enabled = 1;
229 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
232 /* disable cpu_get_ticks() : the clock is stopped. You must not call
233 * cpu_get_ticks() after that.
234 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
236 void cpu_disable_ticks(void)
238 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
239 seqlock_write_lock(&timers_state.vm_clock_seqlock);
240 if (timers_state.cpu_ticks_enabled) {
241 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
242 timers_state.cpu_clock_offset = cpu_get_clock_locked();
243 timers_state.cpu_ticks_enabled = 0;
245 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
248 /* Correlation between real and virtual time is always going to be
249 fairly approximate, so ignore small variation.
250 When the guest is idle real and virtual time will be aligned in
252 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
254 static void icount_adjust(void)
260 /* Protected by TimersState mutex. */
261 static int64_t last_delta;
263 /* If the VM is not running, then do nothing. */
264 if (!runstate_is_running()) {
268 seqlock_write_lock(&timers_state.vm_clock_seqlock);
269 cur_time = cpu_get_clock_locked();
270 cur_icount = cpu_get_icount_locked();
272 delta = cur_icount - cur_time;
273 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
275 && last_delta + ICOUNT_WOBBLE < delta * 2
276 && icount_time_shift > 0) {
277 /* The guest is getting too far ahead. Slow time down. */
281 && last_delta - ICOUNT_WOBBLE > delta * 2
282 && icount_time_shift < MAX_ICOUNT_SHIFT) {
283 /* The guest is getting too far behind. Speed time up. */
287 timers_state.qemu_icount_bias = cur_icount
288 - (timers_state.qemu_icount << icount_time_shift);
289 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
292 static void icount_adjust_rt(void *opaque)
294 timer_mod(icount_rt_timer,
295 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
299 static void icount_adjust_vm(void *opaque)
301 timer_mod(icount_vm_timer,
302 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
303 get_ticks_per_sec() / 10);
307 static int64_t qemu_icount_round(int64_t count)
309 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
312 static void icount_warp_rt(void *opaque)
314 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
315 * changes from -1 to another value, so the race here is okay.
317 if (atomic_read(&vm_clock_warp_start) == -1) {
321 seqlock_write_lock(&timers_state.vm_clock_seqlock);
322 if (runstate_is_running()) {
323 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
326 warp_delta = clock - vm_clock_warp_start;
327 if (use_icount == 2) {
329 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
330 * far ahead of real time.
332 int64_t cur_time = cpu_get_clock_locked();
333 int64_t cur_icount = cpu_get_icount_locked();
334 int64_t delta = cur_time - cur_icount;
335 warp_delta = MIN(warp_delta, delta);
337 timers_state.qemu_icount_bias += warp_delta;
339 vm_clock_warp_start = -1;
340 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
342 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
343 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
347 void qtest_clock_warp(int64_t dest)
349 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
350 assert(qtest_enabled());
351 while (clock < dest) {
352 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
353 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
354 seqlock_write_lock(&timers_state.vm_clock_seqlock);
355 timers_state.qemu_icount_bias += warp;
356 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
358 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
359 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
361 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
364 void qemu_clock_warp(QEMUClockType type)
370 * There are too many global variables to make the "warp" behavior
371 * applicable to other clocks. But a clock argument removes the
372 * need for if statements all over the place.
374 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
379 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
380 * This ensures that the deadline for the timer is computed correctly below.
381 * This also makes sure that the insn counter is synchronized before the
382 * CPU starts running, in case the CPU is woken by an event other than
383 * the earliest QEMU_CLOCK_VIRTUAL timer.
385 icount_warp_rt(NULL);
386 timer_del(icount_warp_timer);
387 if (!all_cpu_threads_idle()) {
391 if (qtest_enabled()) {
392 /* When testing, qtest commands advance icount. */
396 /* We want to use the earliest deadline from ALL vm_clocks */
397 clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
398 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
405 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
406 * sleep. Otherwise, the CPU might be waiting for a future timer
407 * interrupt to wake it up, but the interrupt never comes because
408 * the vCPU isn't running any insns and thus doesn't advance the
409 * QEMU_CLOCK_VIRTUAL.
411 * An extreme solution for this problem would be to never let VCPUs
412 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
413 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
414 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
415 * after some e"real" time, (related to the time left until the next
416 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
417 * This avoids that the warps are visible externally; for example,
418 * you will not be sending network packets continuously instead of
421 seqlock_write_lock(&timers_state.vm_clock_seqlock);
422 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
423 vm_clock_warp_start = clock;
425 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
426 timer_mod_anticipate(icount_warp_timer, clock + deadline);
427 } else if (deadline == 0) {
428 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
432 static bool icount_state_needed(void *opaque)
438 * This is a subsection for icount migration.
440 static const VMStateDescription icount_vmstate_timers = {
441 .name = "timer/icount",
443 .minimum_version_id = 1,
444 .fields = (VMStateField[]) {
445 VMSTATE_INT64(qemu_icount_bias, TimersState),
446 VMSTATE_INT64(qemu_icount, TimersState),
447 VMSTATE_END_OF_LIST()
451 static const VMStateDescription vmstate_timers = {
454 .minimum_version_id = 1,
455 .fields = (VMStateField[]) {
456 VMSTATE_INT64(cpu_ticks_offset, TimersState),
457 VMSTATE_INT64(dummy, TimersState),
458 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
459 VMSTATE_END_OF_LIST()
461 .subsections = (VMStateSubsection[]) {
463 .vmsd = &icount_vmstate_timers,
464 .needed = icount_state_needed,
471 void configure_icount(const char *option)
473 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
474 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
479 icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
480 icount_warp_rt, NULL);
481 if (strcmp(option, "auto") != 0) {
482 icount_time_shift = strtol(option, NULL, 0);
489 /* 125MIPS seems a reasonable initial guess at the guest speed.
490 It will be corrected fairly quickly anyway. */
491 icount_time_shift = 3;
493 /* Have both realtime and virtual time triggers for speed adjustment.
494 The realtime trigger catches emulated time passing too slowly,
495 the virtual time trigger catches emulated time passing too fast.
496 Realtime triggers occur even when idle, so use them less frequently
498 icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
499 icount_adjust_rt, NULL);
500 timer_mod(icount_rt_timer,
501 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
502 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
503 icount_adjust_vm, NULL);
504 timer_mod(icount_vm_timer,
505 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
506 get_ticks_per_sec() / 10);
509 /***********************************************************/
510 void hw_error(const char *fmt, ...)
516 fprintf(stderr, "qemu: hardware error: ");
517 vfprintf(stderr, fmt, ap);
518 fprintf(stderr, "\n");
520 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
521 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
527 void cpu_synchronize_all_states(void)
532 cpu_synchronize_state(cpu);
536 void cpu_synchronize_all_post_reset(void)
541 cpu_synchronize_post_reset(cpu);
545 void cpu_synchronize_all_post_init(void)
550 cpu_synchronize_post_init(cpu);
554 static int do_vm_stop(RunState state)
558 if (runstate_is_running()) {
562 vm_state_notify(0, state);
563 qapi_event_send_stop(&error_abort);
567 ret = bdrv_flush_all();
572 static bool cpu_can_run(CPUState *cpu)
577 if (cpu_is_stopped(cpu)) {
583 static void cpu_handle_guest_debug(CPUState *cpu)
585 gdb_set_stop_cpu(cpu);
586 qemu_system_debug_request();
590 static void cpu_signal(int sig)
593 cpu_exit(current_cpu);
599 static void sigbus_reraise(void)
602 struct sigaction action;
604 memset(&action, 0, sizeof(action));
605 action.sa_handler = SIG_DFL;
606 if (!sigaction(SIGBUS, &action, NULL)) {
609 sigaddset(&set, SIGBUS);
610 sigprocmask(SIG_UNBLOCK, &set, NULL);
612 perror("Failed to re-raise SIGBUS!\n");
616 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
619 if (kvm_on_sigbus(siginfo->ssi_code,
620 (void *)(intptr_t)siginfo->ssi_addr)) {
625 static void qemu_init_sigbus(void)
627 struct sigaction action;
629 memset(&action, 0, sizeof(action));
630 action.sa_flags = SA_SIGINFO;
631 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
632 sigaction(SIGBUS, &action, NULL);
634 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
637 static void qemu_kvm_eat_signals(CPUState *cpu)
639 struct timespec ts = { 0, 0 };
645 sigemptyset(&waitset);
646 sigaddset(&waitset, SIG_IPI);
647 sigaddset(&waitset, SIGBUS);
650 r = sigtimedwait(&waitset, &siginfo, &ts);
651 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
652 perror("sigtimedwait");
658 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
666 r = sigpending(&chkset);
668 perror("sigpending");
671 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
674 #else /* !CONFIG_LINUX */
676 static void qemu_init_sigbus(void)
680 static void qemu_kvm_eat_signals(CPUState *cpu)
683 #endif /* !CONFIG_LINUX */
686 static void dummy_signal(int sig)
690 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
694 struct sigaction sigact;
696 memset(&sigact, 0, sizeof(sigact));
697 sigact.sa_handler = dummy_signal;
698 sigaction(SIG_IPI, &sigact, NULL);
700 pthread_sigmask(SIG_BLOCK, NULL, &set);
701 sigdelset(&set, SIG_IPI);
702 sigdelset(&set, SIGBUS);
703 r = kvm_set_signal_mask(cpu, &set);
705 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
710 static void qemu_tcg_init_cpu_signals(void)
713 struct sigaction sigact;
715 memset(&sigact, 0, sizeof(sigact));
716 sigact.sa_handler = cpu_signal;
717 sigaction(SIG_IPI, &sigact, NULL);
720 sigaddset(&set, SIG_IPI);
721 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
725 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
730 static void qemu_tcg_init_cpu_signals(void)
735 static QemuMutex qemu_global_mutex;
736 static QemuCond qemu_io_proceeded_cond;
737 static bool iothread_requesting_mutex;
739 static QemuThread io_thread;
741 static QemuThread *tcg_cpu_thread;
742 static QemuCond *tcg_halt_cond;
745 static QemuCond qemu_cpu_cond;
747 static QemuCond qemu_pause_cond;
748 static QemuCond qemu_work_cond;
750 void qemu_init_cpu_loop(void)
753 qemu_cond_init(&qemu_cpu_cond);
754 qemu_cond_init(&qemu_pause_cond);
755 qemu_cond_init(&qemu_work_cond);
756 qemu_cond_init(&qemu_io_proceeded_cond);
757 qemu_mutex_init(&qemu_global_mutex);
759 qemu_thread_get_self(&io_thread);
762 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
764 struct qemu_work_item wi;
766 if (qemu_cpu_is_self(cpu)) {
774 if (cpu->queued_work_first == NULL) {
775 cpu->queued_work_first = &wi;
777 cpu->queued_work_last->next = &wi;
779 cpu->queued_work_last = &wi;
785 CPUState *self_cpu = current_cpu;
787 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
788 current_cpu = self_cpu;
792 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
794 struct qemu_work_item *wi;
796 if (qemu_cpu_is_self(cpu)) {
801 wi = g_malloc0(sizeof(struct qemu_work_item));
805 if (cpu->queued_work_first == NULL) {
806 cpu->queued_work_first = wi;
808 cpu->queued_work_last->next = wi;
810 cpu->queued_work_last = wi;
817 static void flush_queued_work(CPUState *cpu)
819 struct qemu_work_item *wi;
821 if (cpu->queued_work_first == NULL) {
825 while ((wi = cpu->queued_work_first)) {
826 cpu->queued_work_first = wi->next;
833 cpu->queued_work_last = NULL;
834 qemu_cond_broadcast(&qemu_work_cond);
837 static void qemu_wait_io_event_common(CPUState *cpu)
842 qemu_cond_signal(&qemu_pause_cond);
844 flush_queued_work(cpu);
845 cpu->thread_kicked = false;
848 static void qemu_tcg_wait_io_event(void)
852 while (all_cpu_threads_idle()) {
853 /* Start accounting real time to the virtual clock if the CPUs
855 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
856 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
859 while (iothread_requesting_mutex) {
860 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
864 qemu_wait_io_event_common(cpu);
868 static void qemu_kvm_wait_io_event(CPUState *cpu)
870 while (cpu_thread_is_idle(cpu)) {
871 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
874 qemu_kvm_eat_signals(cpu);
875 qemu_wait_io_event_common(cpu);
878 static void *qemu_kvm_cpu_thread_fn(void *arg)
883 qemu_mutex_lock(&qemu_global_mutex);
884 qemu_thread_get_self(cpu->thread);
885 cpu->thread_id = qemu_get_thread_id();
888 r = kvm_init_vcpu(cpu);
890 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
894 qemu_kvm_init_cpu_signals(cpu);
896 /* signal CPU creation */
898 qemu_cond_signal(&qemu_cpu_cond);
901 if (cpu_can_run(cpu)) {
902 r = kvm_cpu_exec(cpu);
903 if (r == EXCP_DEBUG) {
904 cpu_handle_guest_debug(cpu);
907 qemu_kvm_wait_io_event(cpu);
913 static void *qemu_dummy_cpu_thread_fn(void *arg)
916 fprintf(stderr, "qtest is not supported under Windows\n");
923 qemu_mutex_lock_iothread();
924 qemu_thread_get_self(cpu->thread);
925 cpu->thread_id = qemu_get_thread_id();
927 sigemptyset(&waitset);
928 sigaddset(&waitset, SIG_IPI);
930 /* signal CPU creation */
932 qemu_cond_signal(&qemu_cpu_cond);
937 qemu_mutex_unlock_iothread();
940 r = sigwait(&waitset, &sig);
941 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
946 qemu_mutex_lock_iothread();
948 qemu_wait_io_event_common(cpu);
955 static void tcg_exec_all(void);
957 static void *qemu_tcg_cpu_thread_fn(void *arg)
961 qemu_tcg_init_cpu_signals();
962 qemu_thread_get_self(cpu->thread);
964 qemu_mutex_lock(&qemu_global_mutex);
966 cpu->thread_id = qemu_get_thread_id();
969 qemu_cond_signal(&qemu_cpu_cond);
971 /* wait for initial kick-off after machine start */
972 while (QTAILQ_FIRST(&cpus)->stopped) {
973 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
975 /* process any pending work */
977 qemu_wait_io_event_common(cpu);
985 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
988 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
991 qemu_tcg_wait_io_event();
997 static void qemu_cpu_kick_thread(CPUState *cpu)
1002 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1004 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1008 if (!qemu_cpu_is_self(cpu)) {
1011 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
1012 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1017 /* On multi-core systems, we are not sure that the thread is actually
1018 * suspended until we can get the context.
1020 tcgContext.ContextFlags = CONTEXT_CONTROL;
1021 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1027 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
1028 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1036 void qemu_cpu_kick(CPUState *cpu)
1038 qemu_cond_broadcast(cpu->halt_cond);
1039 if (!tcg_enabled() && !cpu->thread_kicked) {
1040 qemu_cpu_kick_thread(cpu);
1041 cpu->thread_kicked = true;
1045 void qemu_cpu_kick_self(void)
1048 assert(current_cpu);
1050 if (!current_cpu->thread_kicked) {
1051 qemu_cpu_kick_thread(current_cpu);
1052 current_cpu->thread_kicked = true;
1059 bool qemu_cpu_is_self(CPUState *cpu)
1061 return qemu_thread_is_self(cpu->thread);
1064 static bool qemu_in_vcpu_thread(void)
1066 return current_cpu && qemu_cpu_is_self(current_cpu);
1069 void qemu_mutex_lock_iothread(void)
1071 if (!tcg_enabled()) {
1072 qemu_mutex_lock(&qemu_global_mutex);
1074 iothread_requesting_mutex = true;
1075 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1076 qemu_cpu_kick_thread(first_cpu);
1077 qemu_mutex_lock(&qemu_global_mutex);
1079 iothread_requesting_mutex = false;
1080 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1084 void qemu_mutex_unlock_iothread(void)
1086 qemu_mutex_unlock(&qemu_global_mutex);
1089 static int all_vcpus_paused(void)
1094 if (!cpu->stopped) {
1102 void pause_all_vcpus(void)
1106 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1112 if (qemu_in_vcpu_thread()) {
1114 if (!kvm_enabled()) {
1117 cpu->stopped = true;
1123 while (!all_vcpus_paused()) {
1124 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1131 void cpu_resume(CPUState *cpu)
1134 cpu->stopped = false;
1138 void resume_all_vcpus(void)
1142 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1148 /* For temporary buffers for forming a name */
1149 #define VCPU_THREAD_NAME_SIZE 16
1151 static void qemu_tcg_init_vcpu(CPUState *cpu)
1153 char thread_name[VCPU_THREAD_NAME_SIZE];
1155 tcg_cpu_address_space_init(cpu, cpu->as);
1157 /* share a single thread for all cpus with TCG */
1158 if (!tcg_cpu_thread) {
1159 cpu->thread = g_malloc0(sizeof(QemuThread));
1160 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1161 qemu_cond_init(cpu->halt_cond);
1162 tcg_halt_cond = cpu->halt_cond;
1163 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1165 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1166 cpu, QEMU_THREAD_JOINABLE);
1168 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1170 while (!cpu->created) {
1171 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1173 tcg_cpu_thread = cpu->thread;
1175 cpu->thread = tcg_cpu_thread;
1176 cpu->halt_cond = tcg_halt_cond;
1180 static void qemu_kvm_start_vcpu(CPUState *cpu)
1182 char thread_name[VCPU_THREAD_NAME_SIZE];
1184 cpu->thread = g_malloc0(sizeof(QemuThread));
1185 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1186 qemu_cond_init(cpu->halt_cond);
1187 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1189 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1190 cpu, QEMU_THREAD_JOINABLE);
1191 while (!cpu->created) {
1192 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1196 static void qemu_dummy_start_vcpu(CPUState *cpu)
1198 char thread_name[VCPU_THREAD_NAME_SIZE];
1200 cpu->thread = g_malloc0(sizeof(QemuThread));
1201 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1202 qemu_cond_init(cpu->halt_cond);
1203 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1205 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1206 QEMU_THREAD_JOINABLE);
1207 while (!cpu->created) {
1208 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1212 void qemu_init_vcpu(CPUState *cpu)
1214 cpu->nr_cores = smp_cores;
1215 cpu->nr_threads = smp_threads;
1216 cpu->stopped = true;
1217 if (kvm_enabled()) {
1218 qemu_kvm_start_vcpu(cpu);
1219 } else if (tcg_enabled()) {
1220 qemu_tcg_init_vcpu(cpu);
1222 qemu_dummy_start_vcpu(cpu);
1226 void cpu_stop_current(void)
1229 current_cpu->stop = false;
1230 current_cpu->stopped = true;
1231 cpu_exit(current_cpu);
1232 qemu_cond_signal(&qemu_pause_cond);
1236 int vm_stop(RunState state)
1238 if (qemu_in_vcpu_thread()) {
1239 qemu_system_vmstop_request_prepare();
1240 qemu_system_vmstop_request(state);
1242 * FIXME: should not return to device code in case
1243 * vm_stop() has been requested.
1249 return do_vm_stop(state);
1252 /* does a state transition even if the VM is already stopped,
1253 current state is forgotten forever */
1254 int vm_stop_force_state(RunState state)
1256 if (runstate_is_running()) {
1257 return vm_stop(state);
1259 runstate_set(state);
1260 /* Make sure to return an error if the flush in a previous vm_stop()
1262 return bdrv_flush_all();
1266 static int tcg_cpu_exec(CPUArchState *env)
1268 CPUState *cpu = ENV_GET_CPU(env);
1270 #ifdef CONFIG_PROFILER
1274 #ifdef CONFIG_PROFILER
1275 ti = profile_getclock();
1281 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1282 + cpu->icount_extra);
1283 cpu->icount_decr.u16.low = 0;
1284 cpu->icount_extra = 0;
1285 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1287 /* Maintain prior (possibly buggy) behaviour where if no deadline
1288 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1289 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1292 if ((deadline < 0) || (deadline > INT32_MAX)) {
1293 deadline = INT32_MAX;
1296 count = qemu_icount_round(deadline);
1297 timers_state.qemu_icount += count;
1298 decr = (count > 0xffff) ? 0xffff : count;
1300 cpu->icount_decr.u16.low = decr;
1301 cpu->icount_extra = count;
1303 ret = cpu_exec(env);
1304 #ifdef CONFIG_PROFILER
1305 qemu_time += profile_getclock() - ti;
1308 /* Fold pending instructions back into the
1309 instruction counter, and clear the interrupt flag. */
1310 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1311 + cpu->icount_extra);
1312 cpu->icount_decr.u32 = 0;
1313 cpu->icount_extra = 0;
1318 static void tcg_exec_all(void)
1322 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1323 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1325 if (next_cpu == NULL) {
1326 next_cpu = first_cpu;
1328 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1329 CPUState *cpu = next_cpu;
1330 CPUArchState *env = cpu->env_ptr;
1332 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1333 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1335 if (cpu_can_run(cpu)) {
1336 r = tcg_cpu_exec(env);
1337 if (r == EXCP_DEBUG) {
1338 cpu_handle_guest_debug(cpu);
1341 } else if (cpu->stop || cpu->stopped) {
1348 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1350 /* XXX: implement xxx_cpu_list for targets that still miss it */
1351 #if defined(cpu_list)
1352 cpu_list(f, cpu_fprintf);
1356 CpuInfoList *qmp_query_cpus(Error **errp)
1358 CpuInfoList *head = NULL, *cur_item = NULL;
1363 #if defined(TARGET_I386)
1364 X86CPU *x86_cpu = X86_CPU(cpu);
1365 CPUX86State *env = &x86_cpu->env;
1366 #elif defined(TARGET_PPC)
1367 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1368 CPUPPCState *env = &ppc_cpu->env;
1369 #elif defined(TARGET_SPARC)
1370 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1371 CPUSPARCState *env = &sparc_cpu->env;
1372 #elif defined(TARGET_MIPS)
1373 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1374 CPUMIPSState *env = &mips_cpu->env;
1377 cpu_synchronize_state(cpu);
1379 info = g_malloc0(sizeof(*info));
1380 info->value = g_malloc0(sizeof(*info->value));
1381 info->value->CPU = cpu->cpu_index;
1382 info->value->current = (cpu == first_cpu);
1383 info->value->halted = cpu->halted;
1384 info->value->thread_id = cpu->thread_id;
1385 #if defined(TARGET_I386)
1386 info->value->has_pc = true;
1387 info->value->pc = env->eip + env->segs[R_CS].base;
1388 #elif defined(TARGET_PPC)
1389 info->value->has_nip = true;
1390 info->value->nip = env->nip;
1391 #elif defined(TARGET_SPARC)
1392 info->value->has_pc = true;
1393 info->value->pc = env->pc;
1394 info->value->has_npc = true;
1395 info->value->npc = env->npc;
1396 #elif defined(TARGET_MIPS)
1397 info->value->has_PC = true;
1398 info->value->PC = env->active_tc.PC;
1401 /* XXX: waiting for the qapi to support GSList */
1403 head = cur_item = info;
1405 cur_item->next = info;
1413 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1414 bool has_cpu, int64_t cpu_index, Error **errp)
1425 cpu = qemu_get_cpu(cpu_index);
1427 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1432 f = fopen(filename, "wb");
1434 error_setg_file_open(errp, errno, filename);
1442 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1443 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1446 if (fwrite(buf, 1, l, f) != l) {
1447 error_set(errp, QERR_IO_ERROR);
1458 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1465 f = fopen(filename, "wb");
1467 error_setg_file_open(errp, errno, filename);
1475 cpu_physical_memory_read(addr, buf, l);
1476 if (fwrite(buf, 1, l, f) != l) {
1477 error_set(errp, QERR_IO_ERROR);
1488 void qmp_inject_nmi(Error **errp)
1490 #if defined(TARGET_I386)
1494 X86CPU *cpu = X86_CPU(cs);
1496 if (!cpu->apic_state) {
1497 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1499 apic_deliver_nmi(cpu->apic_state);
1502 #elif defined(TARGET_S390X)
1508 if (cpu->env.cpu_num == monitor_get_cpu_index()) {
1509 if (s390_cpu_restart(S390_CPU(cs)) == -1) {
1510 error_set(errp, QERR_UNSUPPORTED);
1517 error_set(errp, QERR_UNSUPPORTED);