4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "sysemu/sysemu.h"
31 #include "exec/gdbstub.h"
32 #include "sysemu/dma.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/hax.h"
35 #include "qmp-commands.h"
37 #include "qemu/thread.h"
38 #include "sysemu/cpus.h"
39 #include "sysemu/qtest.h"
40 #include "qemu/main-loop.h"
41 #include "qemu/bitmap.h"
42 #include "qemu/seqlock.h"
43 #include "qapi-event.h"
47 #include "qemu/compatfd.h"
52 #include <sys/prctl.h>
55 #define PR_MCE_KILL 33
58 #ifndef PR_MCE_KILL_SET
59 #define PR_MCE_KILL_SET 1
62 #ifndef PR_MCE_KILL_EARLY
63 #define PR_MCE_KILL_EARLY 1
66 #endif /* CONFIG_LINUX */
68 static CPUState *next_cpu;
72 bool cpu_is_stopped(CPUState *cpu)
74 return cpu->stopped || !runstate_is_running();
77 static bool cpu_thread_is_idle(CPUState *cpu)
79 if (cpu->stop || cpu->queued_work_first) {
82 if (cpu_is_stopped(cpu)) {
85 if (!cpu->halted || cpu_has_work(cpu) ||
86 kvm_halt_in_kernel()) {
92 static bool all_cpu_threads_idle(void)
97 if (!cpu_thread_is_idle(cpu)) {
104 /***********************************************************/
105 /* guest cycle counter */
107 /* Protected by TimersState seqlock */
109 static int64_t vm_clock_warp_start = -1;
110 /* Conversion factor from emulated instructions to virtual clock ticks. */
111 static int icount_time_shift;
112 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
113 #define MAX_ICOUNT_SHIFT 10
115 static QEMUTimer *icount_rt_timer;
116 static QEMUTimer *icount_vm_timer;
117 static QEMUTimer *icount_warp_timer;
119 typedef struct TimersState {
120 /* Protected by BQL. */
121 int64_t cpu_ticks_prev;
122 int64_t cpu_ticks_offset;
124 /* cpu_clock_offset can be read out of BQL, so protect it with
127 QemuSeqLock vm_clock_seqlock;
128 int64_t cpu_clock_offset;
129 int32_t cpu_ticks_enabled;
132 /* Compensate for varying guest execution speed. */
133 int64_t qemu_icount_bias;
134 /* Only written by TCG thread */
138 static TimersState timers_state;
140 /* Return the virtual CPU time, based on the instruction counter. */
141 static int64_t cpu_get_icount_locked(void)
144 CPUState *cpu = current_cpu;
146 icount = timers_state.qemu_icount;
148 if (!cpu_can_do_io(cpu)) {
149 fprintf(stderr, "Bad clock read\n");
151 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
153 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
156 int64_t cpu_get_icount(void)
162 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
163 icount = cpu_get_icount_locked();
164 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
169 int64_t cpu_icount_to_ns(int64_t icount)
171 return icount << icount_time_shift;
174 /* return the host CPU cycle counter and handle stop/restart */
175 /* Caller must hold the BQL */
176 int64_t cpu_get_ticks(void)
181 return cpu_get_icount();
184 ticks = timers_state.cpu_ticks_offset;
185 if (timers_state.cpu_ticks_enabled) {
186 ticks += cpu_get_real_ticks();
189 if (timers_state.cpu_ticks_prev > ticks) {
190 /* Note: non increasing ticks may happen if the host uses
192 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
193 ticks = timers_state.cpu_ticks_prev;
196 timers_state.cpu_ticks_prev = ticks;
200 static int64_t cpu_get_clock_locked(void)
204 ticks = timers_state.cpu_clock_offset;
205 if (timers_state.cpu_ticks_enabled) {
206 ticks += get_clock();
212 /* return the host CPU monotonic timer and handle stop/restart */
213 int64_t cpu_get_clock(void)
219 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
220 ti = cpu_get_clock_locked();
221 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
226 /* return the offset between the host clock and virtual CPU clock */
227 int64_t cpu_get_clock_offset(void)
233 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
234 ti = timers_state.cpu_clock_offset;
235 if (!timers_state.cpu_ticks_enabled) {
238 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
243 /* enable cpu_get_ticks()
244 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
246 void cpu_enable_ticks(void)
248 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
249 seqlock_write_lock(&timers_state.vm_clock_seqlock);
250 if (!timers_state.cpu_ticks_enabled) {
251 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
252 timers_state.cpu_clock_offset -= get_clock();
253 timers_state.cpu_ticks_enabled = 1;
255 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
258 /* disable cpu_get_ticks() : the clock is stopped. You must not call
259 * cpu_get_ticks() after that.
260 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
262 void cpu_disable_ticks(void)
264 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
265 seqlock_write_lock(&timers_state.vm_clock_seqlock);
266 if (timers_state.cpu_ticks_enabled) {
267 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
268 timers_state.cpu_clock_offset = cpu_get_clock_locked();
269 timers_state.cpu_ticks_enabled = 0;
271 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
274 /* Correlation between real and virtual time is always going to be
275 fairly approximate, so ignore small variation.
276 When the guest is idle real and virtual time will be aligned in
278 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
280 static void icount_adjust(void)
286 /* Protected by TimersState mutex. */
287 static int64_t last_delta;
289 /* If the VM is not running, then do nothing. */
290 if (!runstate_is_running()) {
294 seqlock_write_lock(&timers_state.vm_clock_seqlock);
295 cur_time = cpu_get_clock_locked();
296 cur_icount = cpu_get_icount_locked();
298 delta = cur_icount - cur_time;
299 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
301 && last_delta + ICOUNT_WOBBLE < delta * 2
302 && icount_time_shift > 0) {
303 /* The guest is getting too far ahead. Slow time down. */
307 && last_delta - ICOUNT_WOBBLE > delta * 2
308 && icount_time_shift < MAX_ICOUNT_SHIFT) {
309 /* The guest is getting too far behind. Speed time up. */
313 timers_state.qemu_icount_bias = cur_icount
314 - (timers_state.qemu_icount << icount_time_shift);
315 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
318 static void icount_adjust_rt(void *opaque)
320 timer_mod(icount_rt_timer,
321 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
325 static void icount_adjust_vm(void *opaque)
327 timer_mod(icount_vm_timer,
328 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
329 get_ticks_per_sec() / 10);
333 static int64_t qemu_icount_round(int64_t count)
335 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
338 static void icount_warp_rt(void *opaque)
340 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
341 * changes from -1 to another value, so the race here is okay.
343 if (atomic_read(&vm_clock_warp_start) == -1) {
347 seqlock_write_lock(&timers_state.vm_clock_seqlock);
348 if (runstate_is_running()) {
349 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
352 warp_delta = clock - vm_clock_warp_start;
353 if (use_icount == 2) {
355 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
356 * far ahead of real time.
358 int64_t cur_time = cpu_get_clock_locked();
359 int64_t cur_icount = cpu_get_icount_locked();
360 int64_t delta = cur_time - cur_icount;
361 warp_delta = MIN(warp_delta, delta);
363 timers_state.qemu_icount_bias += warp_delta;
365 vm_clock_warp_start = -1;
366 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
368 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
369 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
373 void qtest_clock_warp(int64_t dest)
375 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
376 assert(qtest_enabled());
377 while (clock < dest) {
378 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
379 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
380 seqlock_write_lock(&timers_state.vm_clock_seqlock);
381 timers_state.qemu_icount_bias += warp;
382 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
384 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
385 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
387 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
390 void qemu_clock_warp(QEMUClockType type)
396 * There are too many global variables to make the "warp" behavior
397 * applicable to other clocks. But a clock argument removes the
398 * need for if statements all over the place.
400 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
405 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
406 * This ensures that the deadline for the timer is computed correctly below.
407 * This also makes sure that the insn counter is synchronized before the
408 * CPU starts running, in case the CPU is woken by an event other than
409 * the earliest QEMU_CLOCK_VIRTUAL timer.
411 icount_warp_rt(NULL);
412 timer_del(icount_warp_timer);
413 if (!all_cpu_threads_idle()) {
417 if (qtest_enabled()) {
418 /* When testing, qtest commands advance icount. */
422 /* We want to use the earliest deadline from ALL vm_clocks */
423 clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
424 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
431 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
432 * sleep. Otherwise, the CPU might be waiting for a future timer
433 * interrupt to wake it up, but the interrupt never comes because
434 * the vCPU isn't running any insns and thus doesn't advance the
435 * QEMU_CLOCK_VIRTUAL.
437 * An extreme solution for this problem would be to never let VCPUs
438 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
439 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
440 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
441 * after some e"real" time, (related to the time left until the next
442 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
443 * This avoids that the warps are visible externally; for example,
444 * you will not be sending network packets continuously instead of
447 seqlock_write_lock(&timers_state.vm_clock_seqlock);
448 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
449 vm_clock_warp_start = clock;
451 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
452 timer_mod_anticipate(icount_warp_timer, clock + deadline);
453 } else if (deadline == 0) {
454 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
458 static bool icount_state_needed(void *opaque)
464 * This is a subsection for icount migration.
466 static const VMStateDescription icount_vmstate_timers = {
467 .name = "timer/icount",
469 .minimum_version_id = 1,
470 .fields = (VMStateField[]) {
471 VMSTATE_INT64(qemu_icount_bias, TimersState),
472 VMSTATE_INT64(qemu_icount, TimersState),
473 VMSTATE_END_OF_LIST()
477 static const VMStateDescription vmstate_timers = {
480 .minimum_version_id = 1,
481 .fields = (VMStateField[]) {
482 VMSTATE_INT64(cpu_ticks_offset, TimersState),
483 VMSTATE_INT64(dummy, TimersState),
484 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
485 VMSTATE_END_OF_LIST()
487 .subsections = (VMStateSubsection[]) {
489 .vmsd = &icount_vmstate_timers,
490 .needed = icount_state_needed,
497 void cpu_ticks_init(void)
499 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
500 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
503 void configure_icount(QemuOpts *opts, Error **errp)
506 char *rem_str = NULL;
508 option = qemu_opt_get(opts, "shift");
510 if (qemu_opt_get(opts, "align") != NULL) {
511 error_setg(errp, "Please specify shift option when using align");
515 icount_align_option = qemu_opt_get_bool(opts, "align", false);
516 icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
517 icount_warp_rt, NULL);
518 if (strcmp(option, "auto") != 0) {
520 icount_time_shift = strtol(option, &rem_str, 0);
521 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
522 error_setg(errp, "icount: Invalid shift value");
526 } else if (icount_align_option) {
527 error_setg(errp, "shift=auto and align=on are incompatible");
532 /* 125MIPS seems a reasonable initial guess at the guest speed.
533 It will be corrected fairly quickly anyway. */
534 icount_time_shift = 3;
536 /* Have both realtime and virtual time triggers for speed adjustment.
537 The realtime trigger catches emulated time passing too slowly,
538 the virtual time trigger catches emulated time passing too fast.
539 Realtime triggers occur even when idle, so use them less frequently
541 icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
542 icount_adjust_rt, NULL);
543 timer_mod(icount_rt_timer,
544 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
545 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
546 icount_adjust_vm, NULL);
547 timer_mod(icount_vm_timer,
548 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
549 get_ticks_per_sec() / 10);
552 /***********************************************************/
553 void hw_error(const char *fmt, ...)
559 fprintf(stderr, "qemu: hardware error: ");
560 vfprintf(stderr, fmt, ap);
561 fprintf(stderr, "\n");
563 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
564 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
570 void cpu_synchronize_all_states(void)
575 cpu_synchronize_state(cpu);
579 void cpu_synchronize_all_post_reset(void)
584 cpu_synchronize_post_reset(cpu);
586 if (hax_enabled() && hax_ug_platform())
587 hax_cpu_synchronize_post_reset(cpu);
592 void cpu_synchronize_all_post_init(void)
597 cpu_synchronize_post_init(cpu);
599 if (hax_enabled() && hax_ug_platform())
600 hax_cpu_synchronize_post_init(cpu);
605 void cpu_clean_all_dirty(void)
610 cpu_clean_state(cpu);
614 static int do_vm_stop(RunState state)
618 if (runstate_is_running()) {
622 vm_state_notify(0, state);
623 qapi_event_send_stop(&error_abort);
627 ret = bdrv_flush_all();
632 static bool cpu_can_run(CPUState *cpu)
637 if (cpu_is_stopped(cpu)) {
643 static void cpu_handle_guest_debug(CPUState *cpu)
645 gdb_set_stop_cpu(cpu);
646 qemu_system_debug_request();
650 static void cpu_signal(int sig)
653 cpu_exit(current_cpu);
659 static void sigbus_reraise(void)
662 struct sigaction action;
664 memset(&action, 0, sizeof(action));
665 action.sa_handler = SIG_DFL;
666 if (!sigaction(SIGBUS, &action, NULL)) {
669 sigaddset(&set, SIGBUS);
670 sigprocmask(SIG_UNBLOCK, &set, NULL);
672 perror("Failed to re-raise SIGBUS!\n");
676 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
679 if (kvm_on_sigbus(siginfo->ssi_code,
680 (void *)(intptr_t)siginfo->ssi_addr)) {
685 static void qemu_init_sigbus(void)
687 struct sigaction action;
689 memset(&action, 0, sizeof(action));
690 action.sa_flags = SA_SIGINFO;
691 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
692 sigaction(SIGBUS, &action, NULL);
694 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
697 static void qemu_kvm_eat_signals(CPUState *cpu)
699 struct timespec ts = { 0, 0 };
705 sigemptyset(&waitset);
706 sigaddset(&waitset, SIG_IPI);
707 sigaddset(&waitset, SIGBUS);
710 r = sigtimedwait(&waitset, &siginfo, &ts);
711 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
712 perror("sigtimedwait");
718 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
726 r = sigpending(&chkset);
728 perror("sigpending");
731 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
734 #else /* !CONFIG_LINUX */
736 static void qemu_init_sigbus(void)
740 static void qemu_kvm_eat_signals(CPUState *cpu)
743 #endif /* !CONFIG_LINUX */
746 static void dummy_signal(int sig)
750 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
754 struct sigaction sigact;
756 memset(&sigact, 0, sizeof(sigact));
757 sigact.sa_handler = dummy_signal;
758 sigaction(SIG_IPI, &sigact, NULL);
760 pthread_sigmask(SIG_BLOCK, NULL, &set);
761 sigdelset(&set, SIG_IPI);
762 sigdelset(&set, SIGBUS);
763 r = kvm_set_signal_mask(cpu, &set);
765 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
770 static void qemu_tcg_init_cpu_signals(void)
773 struct sigaction sigact;
775 memset(&sigact, 0, sizeof(sigact));
776 sigact.sa_handler = cpu_signal;
777 sigaction(SIG_IPI, &sigact, NULL);
780 sigaddset(&set, SIG_IPI);
781 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
785 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
790 static void qemu_tcg_init_cpu_signals(void)
795 static QemuMutex qemu_global_mutex;
796 static QemuCond qemu_io_proceeded_cond;
797 static bool iothread_requesting_mutex;
799 static QemuThread io_thread;
801 static QemuThread *tcg_cpu_thread;
802 static QemuCond *tcg_halt_cond;
805 static QemuCond qemu_cpu_cond;
807 static QemuCond qemu_pause_cond;
808 static QemuCond qemu_work_cond;
810 void qemu_init_cpu_loop(void)
813 qemu_cond_init(&qemu_cpu_cond);
814 qemu_cond_init(&qemu_pause_cond);
815 qemu_cond_init(&qemu_work_cond);
816 qemu_cond_init(&qemu_io_proceeded_cond);
817 qemu_mutex_init(&qemu_global_mutex);
819 qemu_thread_get_self(&io_thread);
822 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
824 struct qemu_work_item wi;
826 if (qemu_cpu_is_self(cpu)) {
834 if (cpu->queued_work_first == NULL) {
835 cpu->queued_work_first = &wi;
837 cpu->queued_work_last->next = &wi;
839 cpu->queued_work_last = &wi;
845 CPUState *self_cpu = current_cpu;
847 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
848 current_cpu = self_cpu;
852 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
854 struct qemu_work_item *wi;
856 if (qemu_cpu_is_self(cpu)) {
861 wi = g_malloc0(sizeof(struct qemu_work_item));
865 if (cpu->queued_work_first == NULL) {
866 cpu->queued_work_first = wi;
868 cpu->queued_work_last->next = wi;
870 cpu->queued_work_last = wi;
877 static void flush_queued_work(CPUState *cpu)
879 struct qemu_work_item *wi;
881 if (cpu->queued_work_first == NULL) {
885 while ((wi = cpu->queued_work_first)) {
886 cpu->queued_work_first = wi->next;
893 cpu->queued_work_last = NULL;
894 qemu_cond_broadcast(&qemu_work_cond);
897 static void qemu_wait_io_event_common(CPUState *cpu)
902 qemu_cond_signal(&qemu_pause_cond);
904 flush_queued_work(cpu);
905 cpu->thread_kicked = false;
908 static void qemu_tcg_wait_io_event(void)
912 while (all_cpu_threads_idle()) {
913 /* Start accounting real time to the virtual clock if the CPUs
915 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
916 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
919 while (iothread_requesting_mutex) {
920 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
924 qemu_wait_io_event_common(cpu);
929 static void qemu_hax_wait_io_event(CPUState *cpu)
931 while (cpu_thread_is_idle(cpu)) {
932 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
935 qemu_wait_io_event_common(cpu);
939 static void qemu_kvm_wait_io_event(CPUState *cpu)
941 while (cpu_thread_is_idle(cpu)) {
942 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
945 qemu_kvm_eat_signals(cpu);
946 qemu_wait_io_event_common(cpu);
949 static void *qemu_kvm_cpu_thread_fn(void *arg)
954 qemu_mutex_lock(&qemu_global_mutex);
955 qemu_thread_get_self(cpu->thread);
956 cpu->thread_id = qemu_get_thread_id();
959 r = kvm_init_vcpu(cpu);
961 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
965 qemu_kvm_init_cpu_signals(cpu);
967 /* signal CPU creation */
969 qemu_cond_signal(&qemu_cpu_cond);
972 if (cpu_can_run(cpu)) {
973 r = kvm_cpu_exec(cpu);
974 if (r == EXCP_DEBUG) {
975 cpu_handle_guest_debug(cpu);
978 qemu_kvm_wait_io_event(cpu);
984 static void *qemu_dummy_cpu_thread_fn(void *arg)
987 fprintf(stderr, "qtest is not supported under Windows\n");
994 qemu_mutex_lock_iothread();
995 qemu_thread_get_self(cpu->thread);
996 cpu->thread_id = qemu_get_thread_id();
998 sigemptyset(&waitset);
999 sigaddset(&waitset, SIG_IPI);
1001 /* signal CPU creation */
1002 cpu->created = true;
1003 qemu_cond_signal(&qemu_cpu_cond);
1008 qemu_mutex_unlock_iothread();
1011 r = sigwait(&waitset, &sig);
1012 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1017 qemu_mutex_lock_iothread();
1019 qemu_wait_io_event_common(cpu);
1026 static void tcg_exec_all(void);
1028 static void *qemu_tcg_cpu_thread_fn(void *arg)
1030 CPUState *cpu = arg;
1032 qemu_tcg_init_cpu_signals();
1033 qemu_thread_get_self(cpu->thread);
1035 qemu_mutex_lock(&qemu_global_mutex);
1037 cpu->thread_id = qemu_get_thread_id();
1038 cpu->created = true;
1040 qemu_cond_signal(&qemu_cpu_cond);
1042 /* wait for initial kick-off after machine start */
1043 while (QTAILQ_FIRST(&cpus)->stopped) {
1044 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
1046 /* process any pending work */
1048 qemu_wait_io_event_common(cpu);
1056 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1058 if (deadline == 0) {
1059 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1062 qemu_tcg_wait_io_event();
1069 static void *qemu_hax_cpu_thread_fn(void *arg)
1071 CPUState *cpu = arg;
1073 qemu_thread_get_self(cpu->thread);
1074 qemu_mutex_lock(&qemu_global_mutex);
1076 cpu->thread_id = qemu_get_thread_id();
1077 cpu->created = true;
1082 qemu_cond_signal(&qemu_cpu_cond);
1085 if (cpu_can_run(cpu)) {
1086 r = hax_smp_cpu_exec(cpu);
1087 if (r == EXCP_DEBUG) {
1088 cpu_handle_guest_debug(cpu);
1091 qemu_hax_wait_io_event(cpu);
1097 static void qemu_cpu_kick_thread(CPUState *cpu)
1102 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1104 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1107 /* The cpu thread cannot catch it reliably when shutdown the guest on Mac.
1108 * We can double check it and resend it
1111 #ifdef CONFIG_DARWIN
1115 if (hax_enabled() && hax_ug_platform())
1116 cpu->exit_request = 1;
1119 if (!qemu_cpu_is_self(cpu)) {
1122 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
1123 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1128 /* On multi-core systems, we are not sure that the thread is actually
1129 * suspended until we can get the context.
1131 tcgContext.ContextFlags = CONTEXT_CONTROL;
1132 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1137 if(hax_enabled() && hax_ug_platform())
1138 cpu->exit_request = 1;
1140 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
1141 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1149 void qemu_cpu_kick(CPUState *cpu)
1151 qemu_cond_broadcast(cpu->halt_cond);
1153 if (((hax_enabled() && hax_ug_platform()) || !tcg_enabled()) && !cpu->thread_kicked) {
1155 if (!tcg_enabled() && !cpu->thread_kicked) {
1157 qemu_cpu_kick_thread(cpu);
1158 cpu->thread_kicked = true;
1162 void qemu_cpu_kick_self(void)
1165 assert(current_cpu);
1167 if (!current_cpu->thread_kicked) {
1168 qemu_cpu_kick_thread(current_cpu);
1169 current_cpu->thread_kicked = true;
1176 bool qemu_cpu_is_self(CPUState *cpu)
1178 return qemu_thread_is_self(cpu->thread);
1181 static bool qemu_in_vcpu_thread(void)
1183 return current_cpu && qemu_cpu_is_self(current_cpu);
1186 void qemu_mutex_lock_iothread(void)
1189 if ((hax_enabled() && hax_ug_platform()) || !tcg_enabled()) {
1191 if (!tcg_enabled()) {
1193 qemu_mutex_lock(&qemu_global_mutex);
1195 iothread_requesting_mutex = true;
1196 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1197 qemu_cpu_kick_thread(first_cpu);
1198 qemu_mutex_lock(&qemu_global_mutex);
1200 iothread_requesting_mutex = false;
1201 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1205 void qemu_mutex_unlock_iothread(void)
1207 qemu_mutex_unlock(&qemu_global_mutex);
1210 static int all_vcpus_paused(void)
1215 if (!cpu->stopped) {
1223 void pause_all_vcpus(void)
1227 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1233 if (qemu_in_vcpu_thread()) {
1235 if (!kvm_enabled()) {
1238 cpu->stopped = true;
1244 while (!all_vcpus_paused()) {
1245 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1252 void cpu_resume(CPUState *cpu)
1255 cpu->stopped = false;
1259 void resume_all_vcpus(void)
1263 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1269 /* For temporary buffers for forming a name */
1270 #define VCPU_THREAD_NAME_SIZE 16
1272 static void qemu_tcg_init_vcpu(CPUState *cpu)
1278 char thread_name[VCPU_THREAD_NAME_SIZE];
1280 tcg_cpu_address_space_init(cpu, cpu->as);
1282 /* share a single thread for all cpus with TCG */
1283 if (!tcg_cpu_thread) {
1284 cpu->thread = g_malloc0(sizeof(QemuThread));
1285 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1286 qemu_cond_init(cpu->halt_cond);
1287 tcg_halt_cond = cpu->halt_cond;
1288 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1290 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1291 cpu, QEMU_THREAD_JOINABLE);
1293 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1295 while (!cpu->created) {
1296 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1298 tcg_cpu_thread = cpu->thread;
1300 cpu->thread = tcg_cpu_thread;
1301 cpu->halt_cond = tcg_halt_cond;
1306 static void qemu_hax_start_vcpu(CPUState *cpu)
1308 char thread_name[VCPU_THREAD_NAME_SIZE];
1310 cpu->thread = g_malloc0(sizeof(QemuThread));
1311 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1312 qemu_cond_init(cpu->halt_cond);
1314 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1317 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1318 cpu, QEMU_THREAD_JOINABLE);
1320 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1322 while (!cpu->created) {
1323 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1328 static void qemu_kvm_start_vcpu(CPUState *cpu)
1330 char thread_name[VCPU_THREAD_NAME_SIZE];
1332 cpu->thread = g_malloc0(sizeof(QemuThread));
1333 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1334 qemu_cond_init(cpu->halt_cond);
1335 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1337 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1338 cpu, QEMU_THREAD_JOINABLE);
1339 while (!cpu->created) {
1340 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1344 static void qemu_dummy_start_vcpu(CPUState *cpu)
1346 char thread_name[VCPU_THREAD_NAME_SIZE];
1348 cpu->thread = g_malloc0(sizeof(QemuThread));
1349 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1350 qemu_cond_init(cpu->halt_cond);
1351 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1353 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1354 QEMU_THREAD_JOINABLE);
1355 while (!cpu->created) {
1356 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1360 void qemu_init_vcpu(CPUState *cpu)
1362 cpu->nr_cores = smp_cores;
1363 cpu->nr_threads = smp_threads;
1364 cpu->stopped = true;
1366 if (kvm_enabled()) {
1367 qemu_kvm_start_vcpu(cpu);
1369 } else if (hax_enabled() && hax_ug_platform()) {
1370 qemu_hax_start_vcpu(cpu);
1372 } else if (tcg_enabled()) {
1373 qemu_tcg_init_vcpu(cpu);
1375 qemu_dummy_start_vcpu(cpu);
1379 void cpu_stop_current(void)
1382 current_cpu->stop = false;
1383 current_cpu->stopped = true;
1384 cpu_exit(current_cpu);
1385 qemu_cond_signal(&qemu_pause_cond);
1389 int vm_stop(RunState state)
1391 if (qemu_in_vcpu_thread()) {
1392 qemu_system_vmstop_request_prepare();
1393 qemu_system_vmstop_request(state);
1395 * FIXME: should not return to device code in case
1396 * vm_stop() has been requested.
1402 return do_vm_stop(state);
1405 /* does a state transition even if the VM is already stopped,
1406 current state is forgotten forever */
1407 int vm_stop_force_state(RunState state)
1409 if (runstate_is_running()) {
1410 return vm_stop(state);
1412 runstate_set(state);
1413 /* Make sure to return an error if the flush in a previous vm_stop()
1415 return bdrv_flush_all();
1419 static int tcg_cpu_exec(CPUArchState *env)
1421 CPUState *cpu = ENV_GET_CPU(env);
1423 #ifdef CONFIG_PROFILER
1427 #ifdef CONFIG_PROFILER
1428 ti = profile_getclock();
1434 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1435 + cpu->icount_extra);
1436 cpu->icount_decr.u16.low = 0;
1437 cpu->icount_extra = 0;
1438 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1440 /* Maintain prior (possibly buggy) behaviour where if no deadline
1441 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1442 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1445 if ((deadline < 0) || (deadline > INT32_MAX)) {
1446 deadline = INT32_MAX;
1449 count = qemu_icount_round(deadline);
1450 timers_state.qemu_icount += count;
1451 decr = (count > 0xffff) ? 0xffff : count;
1453 cpu->icount_decr.u16.low = decr;
1454 cpu->icount_extra = count;
1456 ret = cpu_exec(env);
1457 #ifdef CONFIG_PROFILER
1458 qemu_time += profile_getclock() - ti;
1461 /* Fold pending instructions back into the
1462 instruction counter, and clear the interrupt flag. */
1463 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1464 + cpu->icount_extra);
1465 cpu->icount_decr.u32 = 0;
1466 cpu->icount_extra = 0;
1471 static void tcg_exec_all(void)
1475 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1476 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1478 if (next_cpu == NULL) {
1479 next_cpu = first_cpu;
1481 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1482 CPUState *cpu = next_cpu;
1483 CPUArchState *env = cpu->env_ptr;
1485 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1486 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1488 if (cpu_can_run(cpu)) {
1489 r = tcg_cpu_exec(env);
1490 if (r == EXCP_DEBUG) {
1491 cpu_handle_guest_debug(cpu);
1494 } else if (cpu->stop || cpu->stopped) {
1501 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1503 /* XXX: implement xxx_cpu_list for targets that still miss it */
1504 #if defined(cpu_list)
1505 cpu_list(f, cpu_fprintf);
1509 CpuInfoList *qmp_query_cpus(Error **errp)
1511 CpuInfoList *head = NULL, *cur_item = NULL;
1516 #if defined(TARGET_I386)
1517 X86CPU *x86_cpu = X86_CPU(cpu);
1518 CPUX86State *env = &x86_cpu->env;
1519 #elif defined(TARGET_PPC)
1520 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1521 CPUPPCState *env = &ppc_cpu->env;
1522 #elif defined(TARGET_SPARC)
1523 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1524 CPUSPARCState *env = &sparc_cpu->env;
1525 #elif defined(TARGET_MIPS)
1526 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1527 CPUMIPSState *env = &mips_cpu->env;
1528 #elif defined(TARGET_TRICORE)
1529 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1530 CPUTriCoreState *env = &tricore_cpu->env;
1533 cpu_synchronize_state(cpu);
1535 info = g_malloc0(sizeof(*info));
1536 info->value = g_malloc0(sizeof(*info->value));
1537 info->value->CPU = cpu->cpu_index;
1538 info->value->current = (cpu == first_cpu);
1539 info->value->halted = cpu->halted;
1540 info->value->thread_id = cpu->thread_id;
1541 #if defined(TARGET_I386)
1542 info->value->has_pc = true;
1543 info->value->pc = env->eip + env->segs[R_CS].base;
1544 #elif defined(TARGET_PPC)
1545 info->value->has_nip = true;
1546 info->value->nip = env->nip;
1547 #elif defined(TARGET_SPARC)
1548 info->value->has_pc = true;
1549 info->value->pc = env->pc;
1550 info->value->has_npc = true;
1551 info->value->npc = env->npc;
1552 #elif defined(TARGET_MIPS)
1553 info->value->has_PC = true;
1554 info->value->PC = env->active_tc.PC;
1555 #elif defined(TARGET_TRICORE)
1556 info->value->has_PC = true;
1557 info->value->PC = env->PC;
1560 /* XXX: waiting for the qapi to support GSList */
1562 head = cur_item = info;
1564 cur_item->next = info;
1572 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1573 bool has_cpu, int64_t cpu_index, Error **errp)
1584 cpu = qemu_get_cpu(cpu_index);
1586 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1591 f = fopen(filename, "wb");
1593 error_setg_file_open(errp, errno, filename);
1601 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1602 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1605 if (fwrite(buf, 1, l, f) != l) {
1606 error_set(errp, QERR_IO_ERROR);
1617 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1624 f = fopen(filename, "wb");
1626 error_setg_file_open(errp, errno, filename);
1634 cpu_physical_memory_read(addr, buf, l);
1635 if (fwrite(buf, 1, l, f) != l) {
1636 error_set(errp, QERR_IO_ERROR);
1647 void qmp_inject_nmi(Error **errp)
1649 #if defined(TARGET_I386)
1653 X86CPU *cpu = X86_CPU(cs);
1655 if (!cpu->apic_state) {
1656 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1658 apic_deliver_nmi(cpu->apic_state);
1662 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1666 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1672 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1673 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1674 if (icount_align_option) {
1675 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1676 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1678 cpu_fprintf(f, "Max guest delay NA\n");
1679 cpu_fprintf(f, "Max guest advance NA\n");