4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "sysemu/sysemu.h"
31 #include "exec/gdbstub.h"
32 #include "sysemu/dma.h"
33 #include "sysemu/kvm.h"
34 #include "qmp-commands.h"
36 #include "qemu/thread.h"
37 #include "sysemu/cpus.h"
38 #include "sysemu/qtest.h"
39 #include "qemu/main-loop.h"
40 #include "qemu/bitmap.h"
41 #include "qemu/seqlock.h"
42 #include "qapi-event.h"
45 #include "qemu/compatfd.h"
50 #include <sys/prctl.h>
53 #define PR_MCE_KILL 33
56 #ifndef PR_MCE_KILL_SET
57 #define PR_MCE_KILL_SET 1
60 #ifndef PR_MCE_KILL_EARLY
61 #define PR_MCE_KILL_EARLY 1
64 #endif /* CONFIG_LINUX */
66 static CPUState *next_cpu;
68 bool cpu_is_stopped(CPUState *cpu)
70 return cpu->stopped || !runstate_is_running();
73 static bool cpu_thread_is_idle(CPUState *cpu)
75 if (cpu->stop || cpu->queued_work_first) {
78 if (cpu_is_stopped(cpu)) {
81 if (!cpu->halted || cpu_has_work(cpu) ||
82 kvm_halt_in_kernel()) {
88 static bool all_cpu_threads_idle(void)
93 if (!cpu_thread_is_idle(cpu)) {
100 /***********************************************************/
101 /* guest cycle counter */
103 /* Protected by TimersState seqlock */
105 static int64_t vm_clock_warp_start = -1;
106 /* Conversion factor from emulated instructions to virtual clock ticks. */
107 static int icount_time_shift;
108 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
109 #define MAX_ICOUNT_SHIFT 10
111 static QEMUTimer *icount_rt_timer;
112 static QEMUTimer *icount_vm_timer;
113 static QEMUTimer *icount_warp_timer;
115 typedef struct TimersState {
116 /* Protected by BQL. */
117 int64_t cpu_ticks_prev;
118 int64_t cpu_ticks_offset;
120 /* cpu_clock_offset can be read out of BQL, so protect it with
123 QemuSeqLock vm_clock_seqlock;
124 int64_t cpu_clock_offset;
125 int32_t cpu_ticks_enabled;
128 /* Compensate for varying guest execution speed. */
129 int64_t qemu_icount_bias;
130 /* Only written by TCG thread */
134 static TimersState timers_state;
136 /* Return the virtual CPU time, based on the instruction counter. */
137 static int64_t cpu_get_icount_locked(void)
140 CPUState *cpu = current_cpu;
142 icount = timers_state.qemu_icount;
144 if (!cpu_can_do_io(cpu)) {
145 fprintf(stderr, "Bad clock read\n");
147 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
149 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
152 int64_t cpu_get_icount(void)
158 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
159 icount = cpu_get_icount_locked();
160 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
165 int64_t cpu_icount_to_ns(int64_t icount)
167 return icount << icount_time_shift;
170 /* return the host CPU cycle counter and handle stop/restart */
171 /* Caller must hold the BQL */
172 int64_t cpu_get_ticks(void)
177 return cpu_get_icount();
180 ticks = timers_state.cpu_ticks_offset;
181 if (timers_state.cpu_ticks_enabled) {
182 ticks += cpu_get_real_ticks();
185 if (timers_state.cpu_ticks_prev > ticks) {
186 /* Note: non increasing ticks may happen if the host uses
188 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
189 ticks = timers_state.cpu_ticks_prev;
192 timers_state.cpu_ticks_prev = ticks;
196 static int64_t cpu_get_clock_locked(void)
200 ticks = timers_state.cpu_clock_offset;
201 if (timers_state.cpu_ticks_enabled) {
202 ticks += get_clock();
208 /* return the host CPU monotonic timer and handle stop/restart */
209 int64_t cpu_get_clock(void)
215 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
216 ti = cpu_get_clock_locked();
217 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
222 /* enable cpu_get_ticks()
223 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
225 void cpu_enable_ticks(void)
227 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
228 seqlock_write_lock(&timers_state.vm_clock_seqlock);
229 if (!timers_state.cpu_ticks_enabled) {
230 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
231 timers_state.cpu_clock_offset -= get_clock();
232 timers_state.cpu_ticks_enabled = 1;
234 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
237 /* disable cpu_get_ticks() : the clock is stopped. You must not call
238 * cpu_get_ticks() after that.
239 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
241 void cpu_disable_ticks(void)
243 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
244 seqlock_write_lock(&timers_state.vm_clock_seqlock);
245 if (timers_state.cpu_ticks_enabled) {
246 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
247 timers_state.cpu_clock_offset = cpu_get_clock_locked();
248 timers_state.cpu_ticks_enabled = 0;
250 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
253 /* Correlation between real and virtual time is always going to be
254 fairly approximate, so ignore small variation.
255 When the guest is idle real and virtual time will be aligned in
257 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
259 static void icount_adjust(void)
265 /* Protected by TimersState mutex. */
266 static int64_t last_delta;
268 /* If the VM is not running, then do nothing. */
269 if (!runstate_is_running()) {
273 seqlock_write_lock(&timers_state.vm_clock_seqlock);
274 cur_time = cpu_get_clock_locked();
275 cur_icount = cpu_get_icount_locked();
277 delta = cur_icount - cur_time;
278 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
280 && last_delta + ICOUNT_WOBBLE < delta * 2
281 && icount_time_shift > 0) {
282 /* The guest is getting too far ahead. Slow time down. */
286 && last_delta - ICOUNT_WOBBLE > delta * 2
287 && icount_time_shift < MAX_ICOUNT_SHIFT) {
288 /* The guest is getting too far behind. Speed time up. */
292 timers_state.qemu_icount_bias = cur_icount
293 - (timers_state.qemu_icount << icount_time_shift);
294 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
297 static void icount_adjust_rt(void *opaque)
299 timer_mod(icount_rt_timer,
300 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
304 static void icount_adjust_vm(void *opaque)
306 timer_mod(icount_vm_timer,
307 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
308 get_ticks_per_sec() / 10);
312 static int64_t qemu_icount_round(int64_t count)
314 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
317 static void icount_warp_rt(void *opaque)
319 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
320 * changes from -1 to another value, so the race here is okay.
322 if (atomic_read(&vm_clock_warp_start) == -1) {
326 seqlock_write_lock(&timers_state.vm_clock_seqlock);
327 if (runstate_is_running()) {
328 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
331 warp_delta = clock - vm_clock_warp_start;
332 if (use_icount == 2) {
334 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
335 * far ahead of real time.
337 int64_t cur_time = cpu_get_clock_locked();
338 int64_t cur_icount = cpu_get_icount_locked();
339 int64_t delta = cur_time - cur_icount;
340 warp_delta = MIN(warp_delta, delta);
342 timers_state.qemu_icount_bias += warp_delta;
344 vm_clock_warp_start = -1;
345 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
347 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
348 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
352 void qtest_clock_warp(int64_t dest)
354 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
355 assert(qtest_enabled());
356 while (clock < dest) {
357 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
358 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
359 seqlock_write_lock(&timers_state.vm_clock_seqlock);
360 timers_state.qemu_icount_bias += warp;
361 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
363 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
364 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
366 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
369 void qemu_clock_warp(QEMUClockType type)
375 * There are too many global variables to make the "warp" behavior
376 * applicable to other clocks. But a clock argument removes the
377 * need for if statements all over the place.
379 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
384 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
385 * This ensures that the deadline for the timer is computed correctly below.
386 * This also makes sure that the insn counter is synchronized before the
387 * CPU starts running, in case the CPU is woken by an event other than
388 * the earliest QEMU_CLOCK_VIRTUAL timer.
390 icount_warp_rt(NULL);
391 timer_del(icount_warp_timer);
392 if (!all_cpu_threads_idle()) {
396 if (qtest_enabled()) {
397 /* When testing, qtest commands advance icount. */
401 /* We want to use the earliest deadline from ALL vm_clocks */
402 clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
403 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
410 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
411 * sleep. Otherwise, the CPU might be waiting for a future timer
412 * interrupt to wake it up, but the interrupt never comes because
413 * the vCPU isn't running any insns and thus doesn't advance the
414 * QEMU_CLOCK_VIRTUAL.
416 * An extreme solution for this problem would be to never let VCPUs
417 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
418 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
419 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
420 * after some e"real" time, (related to the time left until the next
421 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
422 * This avoids that the warps are visible externally; for example,
423 * you will not be sending network packets continuously instead of
426 seqlock_write_lock(&timers_state.vm_clock_seqlock);
427 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
428 vm_clock_warp_start = clock;
430 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
431 timer_mod_anticipate(icount_warp_timer, clock + deadline);
432 } else if (deadline == 0) {
433 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
437 static bool icount_state_needed(void *opaque)
443 * This is a subsection for icount migration.
445 static const VMStateDescription icount_vmstate_timers = {
446 .name = "timer/icount",
448 .minimum_version_id = 1,
449 .fields = (VMStateField[]) {
450 VMSTATE_INT64(qemu_icount_bias, TimersState),
451 VMSTATE_INT64(qemu_icount, TimersState),
452 VMSTATE_END_OF_LIST()
456 static const VMStateDescription vmstate_timers = {
459 .minimum_version_id = 1,
460 .fields = (VMStateField[]) {
461 VMSTATE_INT64(cpu_ticks_offset, TimersState),
462 VMSTATE_INT64(dummy, TimersState),
463 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
464 VMSTATE_END_OF_LIST()
466 .subsections = (VMStateSubsection[]) {
468 .vmsd = &icount_vmstate_timers,
469 .needed = icount_state_needed,
476 void configure_icount(QemuOpts *opts, Error **errp)
479 char *rem_str = NULL;
481 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
482 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
483 option = qemu_opt_get(opts, "shift");
485 if (qemu_opt_get(opts, "align") != NULL) {
486 error_setg(errp, "Please specify shift option when using align");
490 icount_align_option = qemu_opt_get_bool(opts, "align", false);
491 icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
492 icount_warp_rt, NULL);
493 if (strcmp(option, "auto") != 0) {
495 icount_time_shift = strtol(option, &rem_str, 0);
496 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
497 error_setg(errp, "icount: Invalid shift value");
501 } else if (icount_align_option) {
502 error_setg(errp, "shift=auto and align=on are incompatible");
507 /* 125MIPS seems a reasonable initial guess at the guest speed.
508 It will be corrected fairly quickly anyway. */
509 icount_time_shift = 3;
511 /* Have both realtime and virtual time triggers for speed adjustment.
512 The realtime trigger catches emulated time passing too slowly,
513 the virtual time trigger catches emulated time passing too fast.
514 Realtime triggers occur even when idle, so use them less frequently
516 icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
517 icount_adjust_rt, NULL);
518 timer_mod(icount_rt_timer,
519 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
520 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
521 icount_adjust_vm, NULL);
522 timer_mod(icount_vm_timer,
523 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
524 get_ticks_per_sec() / 10);
527 /***********************************************************/
528 void hw_error(const char *fmt, ...)
534 fprintf(stderr, "qemu: hardware error: ");
535 vfprintf(stderr, fmt, ap);
536 fprintf(stderr, "\n");
538 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
539 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
545 void cpu_synchronize_all_states(void)
550 cpu_synchronize_state(cpu);
554 void cpu_synchronize_all_post_reset(void)
559 cpu_synchronize_post_reset(cpu);
563 void cpu_synchronize_all_post_init(void)
568 cpu_synchronize_post_init(cpu);
572 static int do_vm_stop(RunState state)
576 if (runstate_is_running()) {
580 vm_state_notify(0, state);
581 qapi_event_send_stop(&error_abort);
585 ret = bdrv_flush_all();
590 static bool cpu_can_run(CPUState *cpu)
595 if (cpu_is_stopped(cpu)) {
601 static void cpu_handle_guest_debug(CPUState *cpu)
603 gdb_set_stop_cpu(cpu);
604 qemu_system_debug_request();
608 static void cpu_signal(int sig)
611 cpu_exit(current_cpu);
617 static void sigbus_reraise(void)
620 struct sigaction action;
622 memset(&action, 0, sizeof(action));
623 action.sa_handler = SIG_DFL;
624 if (!sigaction(SIGBUS, &action, NULL)) {
627 sigaddset(&set, SIGBUS);
628 sigprocmask(SIG_UNBLOCK, &set, NULL);
630 perror("Failed to re-raise SIGBUS!\n");
634 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
637 if (kvm_on_sigbus(siginfo->ssi_code,
638 (void *)(intptr_t)siginfo->ssi_addr)) {
643 static void qemu_init_sigbus(void)
645 struct sigaction action;
647 memset(&action, 0, sizeof(action));
648 action.sa_flags = SA_SIGINFO;
649 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
650 sigaction(SIGBUS, &action, NULL);
652 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
655 static void qemu_kvm_eat_signals(CPUState *cpu)
657 struct timespec ts = { 0, 0 };
663 sigemptyset(&waitset);
664 sigaddset(&waitset, SIG_IPI);
665 sigaddset(&waitset, SIGBUS);
668 r = sigtimedwait(&waitset, &siginfo, &ts);
669 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
670 perror("sigtimedwait");
676 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
684 r = sigpending(&chkset);
686 perror("sigpending");
689 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
692 #else /* !CONFIG_LINUX */
694 static void qemu_init_sigbus(void)
698 static void qemu_kvm_eat_signals(CPUState *cpu)
701 #endif /* !CONFIG_LINUX */
704 static void dummy_signal(int sig)
708 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
712 struct sigaction sigact;
714 memset(&sigact, 0, sizeof(sigact));
715 sigact.sa_handler = dummy_signal;
716 sigaction(SIG_IPI, &sigact, NULL);
718 pthread_sigmask(SIG_BLOCK, NULL, &set);
719 sigdelset(&set, SIG_IPI);
720 sigdelset(&set, SIGBUS);
721 r = kvm_set_signal_mask(cpu, &set);
723 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
728 static void qemu_tcg_init_cpu_signals(void)
731 struct sigaction sigact;
733 memset(&sigact, 0, sizeof(sigact));
734 sigact.sa_handler = cpu_signal;
735 sigaction(SIG_IPI, &sigact, NULL);
738 sigaddset(&set, SIG_IPI);
739 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
743 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
748 static void qemu_tcg_init_cpu_signals(void)
753 static QemuMutex qemu_global_mutex;
754 static QemuCond qemu_io_proceeded_cond;
755 static bool iothread_requesting_mutex;
757 static QemuThread io_thread;
759 static QemuThread *tcg_cpu_thread;
760 static QemuCond *tcg_halt_cond;
763 static QemuCond qemu_cpu_cond;
765 static QemuCond qemu_pause_cond;
766 static QemuCond qemu_work_cond;
768 void qemu_init_cpu_loop(void)
771 qemu_cond_init(&qemu_cpu_cond);
772 qemu_cond_init(&qemu_pause_cond);
773 qemu_cond_init(&qemu_work_cond);
774 qemu_cond_init(&qemu_io_proceeded_cond);
775 qemu_mutex_init(&qemu_global_mutex);
777 qemu_thread_get_self(&io_thread);
780 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
782 struct qemu_work_item wi;
784 if (qemu_cpu_is_self(cpu)) {
792 if (cpu->queued_work_first == NULL) {
793 cpu->queued_work_first = &wi;
795 cpu->queued_work_last->next = &wi;
797 cpu->queued_work_last = &wi;
803 CPUState *self_cpu = current_cpu;
805 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
806 current_cpu = self_cpu;
810 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
812 struct qemu_work_item *wi;
814 if (qemu_cpu_is_self(cpu)) {
819 wi = g_malloc0(sizeof(struct qemu_work_item));
823 if (cpu->queued_work_first == NULL) {
824 cpu->queued_work_first = wi;
826 cpu->queued_work_last->next = wi;
828 cpu->queued_work_last = wi;
835 static void flush_queued_work(CPUState *cpu)
837 struct qemu_work_item *wi;
839 if (cpu->queued_work_first == NULL) {
843 while ((wi = cpu->queued_work_first)) {
844 cpu->queued_work_first = wi->next;
851 cpu->queued_work_last = NULL;
852 qemu_cond_broadcast(&qemu_work_cond);
855 static void qemu_wait_io_event_common(CPUState *cpu)
860 qemu_cond_signal(&qemu_pause_cond);
862 flush_queued_work(cpu);
863 cpu->thread_kicked = false;
866 static void qemu_tcg_wait_io_event(void)
870 while (all_cpu_threads_idle()) {
871 /* Start accounting real time to the virtual clock if the CPUs
873 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
874 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
877 while (iothread_requesting_mutex) {
878 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
882 qemu_wait_io_event_common(cpu);
886 static void qemu_kvm_wait_io_event(CPUState *cpu)
888 while (cpu_thread_is_idle(cpu)) {
889 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
892 qemu_kvm_eat_signals(cpu);
893 qemu_wait_io_event_common(cpu);
896 static void *qemu_kvm_cpu_thread_fn(void *arg)
901 qemu_mutex_lock(&qemu_global_mutex);
902 qemu_thread_get_self(cpu->thread);
903 cpu->thread_id = qemu_get_thread_id();
906 r = kvm_init_vcpu(cpu);
908 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
912 qemu_kvm_init_cpu_signals(cpu);
914 /* signal CPU creation */
916 qemu_cond_signal(&qemu_cpu_cond);
919 if (cpu_can_run(cpu)) {
920 r = kvm_cpu_exec(cpu);
921 if (r == EXCP_DEBUG) {
922 cpu_handle_guest_debug(cpu);
925 qemu_kvm_wait_io_event(cpu);
931 static void *qemu_dummy_cpu_thread_fn(void *arg)
934 fprintf(stderr, "qtest is not supported under Windows\n");
941 qemu_mutex_lock_iothread();
942 qemu_thread_get_self(cpu->thread);
943 cpu->thread_id = qemu_get_thread_id();
945 sigemptyset(&waitset);
946 sigaddset(&waitset, SIG_IPI);
948 /* signal CPU creation */
950 qemu_cond_signal(&qemu_cpu_cond);
955 qemu_mutex_unlock_iothread();
958 r = sigwait(&waitset, &sig);
959 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
964 qemu_mutex_lock_iothread();
966 qemu_wait_io_event_common(cpu);
973 static void tcg_exec_all(void);
975 static void *qemu_tcg_cpu_thread_fn(void *arg)
979 qemu_tcg_init_cpu_signals();
980 qemu_thread_get_self(cpu->thread);
982 qemu_mutex_lock(&qemu_global_mutex);
984 cpu->thread_id = qemu_get_thread_id();
987 qemu_cond_signal(&qemu_cpu_cond);
989 /* wait for initial kick-off after machine start */
990 while (QTAILQ_FIRST(&cpus)->stopped) {
991 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
993 /* process any pending work */
995 qemu_wait_io_event_common(cpu);
1003 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1005 if (deadline == 0) {
1006 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1009 qemu_tcg_wait_io_event();
1015 static void qemu_cpu_kick_thread(CPUState *cpu)
1020 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1022 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1026 if (!qemu_cpu_is_self(cpu)) {
1029 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
1030 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1035 /* On multi-core systems, we are not sure that the thread is actually
1036 * suspended until we can get the context.
1038 tcgContext.ContextFlags = CONTEXT_CONTROL;
1039 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1045 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
1046 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1054 void qemu_cpu_kick(CPUState *cpu)
1056 qemu_cond_broadcast(cpu->halt_cond);
1057 if (!tcg_enabled() && !cpu->thread_kicked) {
1058 qemu_cpu_kick_thread(cpu);
1059 cpu->thread_kicked = true;
1063 void qemu_cpu_kick_self(void)
1066 assert(current_cpu);
1068 if (!current_cpu->thread_kicked) {
1069 qemu_cpu_kick_thread(current_cpu);
1070 current_cpu->thread_kicked = true;
1077 bool qemu_cpu_is_self(CPUState *cpu)
1079 return qemu_thread_is_self(cpu->thread);
1082 static bool qemu_in_vcpu_thread(void)
1084 return current_cpu && qemu_cpu_is_self(current_cpu);
1087 void qemu_mutex_lock_iothread(void)
1089 if (!tcg_enabled()) {
1090 qemu_mutex_lock(&qemu_global_mutex);
1092 iothread_requesting_mutex = true;
1093 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1094 qemu_cpu_kick_thread(first_cpu);
1095 qemu_mutex_lock(&qemu_global_mutex);
1097 iothread_requesting_mutex = false;
1098 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1102 void qemu_mutex_unlock_iothread(void)
1104 qemu_mutex_unlock(&qemu_global_mutex);
1107 static int all_vcpus_paused(void)
1112 if (!cpu->stopped) {
1120 void pause_all_vcpus(void)
1124 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1130 if (qemu_in_vcpu_thread()) {
1132 if (!kvm_enabled()) {
1135 cpu->stopped = true;
1141 while (!all_vcpus_paused()) {
1142 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1149 void cpu_resume(CPUState *cpu)
1152 cpu->stopped = false;
1156 void resume_all_vcpus(void)
1160 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1166 /* For temporary buffers for forming a name */
1167 #define VCPU_THREAD_NAME_SIZE 16
1169 static void qemu_tcg_init_vcpu(CPUState *cpu)
1171 char thread_name[VCPU_THREAD_NAME_SIZE];
1173 tcg_cpu_address_space_init(cpu, cpu->as);
1175 /* share a single thread for all cpus with TCG */
1176 if (!tcg_cpu_thread) {
1177 cpu->thread = g_malloc0(sizeof(QemuThread));
1178 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1179 qemu_cond_init(cpu->halt_cond);
1180 tcg_halt_cond = cpu->halt_cond;
1181 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1183 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1184 cpu, QEMU_THREAD_JOINABLE);
1186 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1188 while (!cpu->created) {
1189 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1191 tcg_cpu_thread = cpu->thread;
1193 cpu->thread = tcg_cpu_thread;
1194 cpu->halt_cond = tcg_halt_cond;
1198 static void qemu_kvm_start_vcpu(CPUState *cpu)
1200 char thread_name[VCPU_THREAD_NAME_SIZE];
1202 cpu->thread = g_malloc0(sizeof(QemuThread));
1203 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1204 qemu_cond_init(cpu->halt_cond);
1205 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1207 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1208 cpu, QEMU_THREAD_JOINABLE);
1209 while (!cpu->created) {
1210 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1214 static void qemu_dummy_start_vcpu(CPUState *cpu)
1216 char thread_name[VCPU_THREAD_NAME_SIZE];
1218 cpu->thread = g_malloc0(sizeof(QemuThread));
1219 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1220 qemu_cond_init(cpu->halt_cond);
1221 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1223 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1224 QEMU_THREAD_JOINABLE);
1225 while (!cpu->created) {
1226 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1230 void qemu_init_vcpu(CPUState *cpu)
1232 cpu->nr_cores = smp_cores;
1233 cpu->nr_threads = smp_threads;
1234 cpu->stopped = true;
1235 if (kvm_enabled()) {
1236 qemu_kvm_start_vcpu(cpu);
1237 } else if (tcg_enabled()) {
1238 qemu_tcg_init_vcpu(cpu);
1240 qemu_dummy_start_vcpu(cpu);
1244 void cpu_stop_current(void)
1247 current_cpu->stop = false;
1248 current_cpu->stopped = true;
1249 cpu_exit(current_cpu);
1250 qemu_cond_signal(&qemu_pause_cond);
1254 int vm_stop(RunState state)
1256 if (qemu_in_vcpu_thread()) {
1257 qemu_system_vmstop_request_prepare();
1258 qemu_system_vmstop_request(state);
1260 * FIXME: should not return to device code in case
1261 * vm_stop() has been requested.
1267 return do_vm_stop(state);
1270 /* does a state transition even if the VM is already stopped,
1271 current state is forgotten forever */
1272 int vm_stop_force_state(RunState state)
1274 if (runstate_is_running()) {
1275 return vm_stop(state);
1277 runstate_set(state);
1278 /* Make sure to return an error if the flush in a previous vm_stop()
1280 return bdrv_flush_all();
1284 static int tcg_cpu_exec(CPUArchState *env)
1286 CPUState *cpu = ENV_GET_CPU(env);
1288 #ifdef CONFIG_PROFILER
1292 #ifdef CONFIG_PROFILER
1293 ti = profile_getclock();
1299 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1300 + cpu->icount_extra);
1301 cpu->icount_decr.u16.low = 0;
1302 cpu->icount_extra = 0;
1303 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1305 /* Maintain prior (possibly buggy) behaviour where if no deadline
1306 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1307 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1310 if ((deadline < 0) || (deadline > INT32_MAX)) {
1311 deadline = INT32_MAX;
1314 count = qemu_icount_round(deadline);
1315 timers_state.qemu_icount += count;
1316 decr = (count > 0xffff) ? 0xffff : count;
1318 cpu->icount_decr.u16.low = decr;
1319 cpu->icount_extra = count;
1321 ret = cpu_exec(env);
1322 #ifdef CONFIG_PROFILER
1323 qemu_time += profile_getclock() - ti;
1326 /* Fold pending instructions back into the
1327 instruction counter, and clear the interrupt flag. */
1328 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1329 + cpu->icount_extra);
1330 cpu->icount_decr.u32 = 0;
1331 cpu->icount_extra = 0;
1336 static void tcg_exec_all(void)
1340 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1341 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1343 if (next_cpu == NULL) {
1344 next_cpu = first_cpu;
1346 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1347 CPUState *cpu = next_cpu;
1348 CPUArchState *env = cpu->env_ptr;
1350 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1351 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1353 if (cpu_can_run(cpu)) {
1354 r = tcg_cpu_exec(env);
1355 if (r == EXCP_DEBUG) {
1356 cpu_handle_guest_debug(cpu);
1359 } else if (cpu->stop || cpu->stopped) {
1366 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1368 /* XXX: implement xxx_cpu_list for targets that still miss it */
1369 #if defined(cpu_list)
1370 cpu_list(f, cpu_fprintf);
1374 CpuInfoList *qmp_query_cpus(Error **errp)
1376 CpuInfoList *head = NULL, *cur_item = NULL;
1381 #if defined(TARGET_I386)
1382 X86CPU *x86_cpu = X86_CPU(cpu);
1383 CPUX86State *env = &x86_cpu->env;
1384 #elif defined(TARGET_PPC)
1385 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1386 CPUPPCState *env = &ppc_cpu->env;
1387 #elif defined(TARGET_SPARC)
1388 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1389 CPUSPARCState *env = &sparc_cpu->env;
1390 #elif defined(TARGET_MIPS)
1391 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1392 CPUMIPSState *env = &mips_cpu->env;
1395 cpu_synchronize_state(cpu);
1397 info = g_malloc0(sizeof(*info));
1398 info->value = g_malloc0(sizeof(*info->value));
1399 info->value->CPU = cpu->cpu_index;
1400 info->value->current = (cpu == first_cpu);
1401 info->value->halted = cpu->halted;
1402 info->value->thread_id = cpu->thread_id;
1403 #if defined(TARGET_I386)
1404 info->value->has_pc = true;
1405 info->value->pc = env->eip + env->segs[R_CS].base;
1406 #elif defined(TARGET_PPC)
1407 info->value->has_nip = true;
1408 info->value->nip = env->nip;
1409 #elif defined(TARGET_SPARC)
1410 info->value->has_pc = true;
1411 info->value->pc = env->pc;
1412 info->value->has_npc = true;
1413 info->value->npc = env->npc;
1414 #elif defined(TARGET_MIPS)
1415 info->value->has_PC = true;
1416 info->value->PC = env->active_tc.PC;
1419 /* XXX: waiting for the qapi to support GSList */
1421 head = cur_item = info;
1423 cur_item->next = info;
1431 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1432 bool has_cpu, int64_t cpu_index, Error **errp)
1443 cpu = qemu_get_cpu(cpu_index);
1445 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1450 f = fopen(filename, "wb");
1452 error_setg_file_open(errp, errno, filename);
1460 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1461 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1464 if (fwrite(buf, 1, l, f) != l) {
1465 error_set(errp, QERR_IO_ERROR);
1476 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1483 f = fopen(filename, "wb");
1485 error_setg_file_open(errp, errno, filename);
1493 cpu_physical_memory_read(addr, buf, l);
1494 if (fwrite(buf, 1, l, f) != l) {
1495 error_set(errp, QERR_IO_ERROR);
1506 void qmp_inject_nmi(Error **errp)
1508 #if defined(TARGET_I386)
1512 X86CPU *cpu = X86_CPU(cs);
1514 if (!cpu->apic_state) {
1515 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1517 apic_deliver_nmi(cpu->apic_state);
1520 #elif defined(TARGET_S390X)
1526 if (cpu->env.cpu_num == monitor_get_cpu_index()) {
1527 if (s390_cpu_restart(S390_CPU(cs)) == -1) {
1528 error_set(errp, QERR_UNSUPPORTED);
1535 error_set(errp, QERR_UNSUPPORTED);