4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "qemu/error-report.h"
31 #include "sysemu/sysemu.h"
32 #include "exec/gdbstub.h"
33 #include "sysemu/dma.h"
34 #include "sysemu/kvm.h"
35 #include "qmp-commands.h"
37 #include "qemu/thread.h"
38 #include "sysemu/cpus.h"
39 #include "sysemu/qtest.h"
40 #include "qemu/main-loop.h"
41 #include "qemu/bitmap.h"
42 #include "qemu/seqlock.h"
43 #include "qapi-event.h"
45 #include "sysemu/replay.h"
48 #include "qemu/compatfd.h"
53 #include <sys/prctl.h>
56 #define PR_MCE_KILL 33
59 #ifndef PR_MCE_KILL_SET
60 #define PR_MCE_KILL_SET 1
63 #ifndef PR_MCE_KILL_EARLY
64 #define PR_MCE_KILL_EARLY 1
67 #endif /* CONFIG_LINUX */
69 static CPUState *next_cpu;
73 /* vcpu throttling controls */
74 static QEMUTimer *throttle_timer;
75 static unsigned int throttle_percentage;
77 #define CPU_THROTTLE_PCT_MIN 1
78 #define CPU_THROTTLE_PCT_MAX 99
79 #define CPU_THROTTLE_TIMESLICE_NS 10000000
81 bool cpu_is_stopped(CPUState *cpu)
83 return cpu->stopped || !runstate_is_running();
86 static bool cpu_thread_is_idle(CPUState *cpu)
88 if (cpu->stop || cpu->queued_work_first) {
91 if (cpu_is_stopped(cpu)) {
94 if (!cpu->halted || cpu_has_work(cpu) ||
95 kvm_halt_in_kernel()) {
101 static bool all_cpu_threads_idle(void)
106 if (!cpu_thread_is_idle(cpu)) {
113 /***********************************************************/
114 /* guest cycle counter */
116 /* Protected by TimersState seqlock */
118 static bool icount_sleep = true;
119 static int64_t vm_clock_warp_start = -1;
120 /* Conversion factor from emulated instructions to virtual clock ticks. */
121 static int icount_time_shift;
122 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
123 #define MAX_ICOUNT_SHIFT 10
125 static QEMUTimer *icount_rt_timer;
126 static QEMUTimer *icount_vm_timer;
127 static QEMUTimer *icount_warp_timer;
129 typedef struct TimersState {
130 /* Protected by BQL. */
131 int64_t cpu_ticks_prev;
132 int64_t cpu_ticks_offset;
134 /* cpu_clock_offset can be read out of BQL, so protect it with
137 QemuSeqLock vm_clock_seqlock;
138 int64_t cpu_clock_offset;
139 int32_t cpu_ticks_enabled;
142 /* Compensate for varying guest execution speed. */
143 int64_t qemu_icount_bias;
144 /* Only written by TCG thread */
148 static TimersState timers_state;
150 int64_t cpu_get_icount_raw(void)
153 CPUState *cpu = current_cpu;
155 icount = timers_state.qemu_icount;
157 if (!cpu->can_do_io) {
158 fprintf(stderr, "Bad icount read\n");
161 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
166 /* Return the virtual CPU time, based on the instruction counter. */
167 static int64_t cpu_get_icount_locked(void)
169 int64_t icount = cpu_get_icount_raw();
170 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
173 int64_t cpu_get_icount(void)
179 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
180 icount = cpu_get_icount_locked();
181 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
186 int64_t cpu_icount_to_ns(int64_t icount)
188 return icount << icount_time_shift;
191 /* return the host CPU cycle counter and handle stop/restart */
192 /* Caller must hold the BQL */
193 int64_t cpu_get_ticks(void)
198 return cpu_get_icount();
201 ticks = timers_state.cpu_ticks_offset;
202 if (timers_state.cpu_ticks_enabled) {
203 ticks += cpu_get_host_ticks();
206 if (timers_state.cpu_ticks_prev > ticks) {
207 /* Note: non increasing ticks may happen if the host uses
209 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
210 ticks = timers_state.cpu_ticks_prev;
213 timers_state.cpu_ticks_prev = ticks;
217 static int64_t cpu_get_clock_locked(void)
221 ticks = timers_state.cpu_clock_offset;
222 if (timers_state.cpu_ticks_enabled) {
223 ticks += get_clock();
229 /* return the host CPU monotonic timer and handle stop/restart */
230 int64_t cpu_get_clock(void)
236 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
237 ti = cpu_get_clock_locked();
238 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
243 /* enable cpu_get_ticks()
244 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
246 void cpu_enable_ticks(void)
248 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
249 seqlock_write_lock(&timers_state.vm_clock_seqlock);
250 if (!timers_state.cpu_ticks_enabled) {
251 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
252 timers_state.cpu_clock_offset -= get_clock();
253 timers_state.cpu_ticks_enabled = 1;
255 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
258 /* disable cpu_get_ticks() : the clock is stopped. You must not call
259 * cpu_get_ticks() after that.
260 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
262 void cpu_disable_ticks(void)
264 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
265 seqlock_write_lock(&timers_state.vm_clock_seqlock);
266 if (timers_state.cpu_ticks_enabled) {
267 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
268 timers_state.cpu_clock_offset = cpu_get_clock_locked();
269 timers_state.cpu_ticks_enabled = 0;
271 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
274 /* Correlation between real and virtual time is always going to be
275 fairly approximate, so ignore small variation.
276 When the guest is idle real and virtual time will be aligned in
278 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
280 static void icount_adjust(void)
286 /* Protected by TimersState mutex. */
287 static int64_t last_delta;
289 /* If the VM is not running, then do nothing. */
290 if (!runstate_is_running()) {
294 seqlock_write_lock(&timers_state.vm_clock_seqlock);
295 cur_time = cpu_get_clock_locked();
296 cur_icount = cpu_get_icount_locked();
298 delta = cur_icount - cur_time;
299 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
301 && last_delta + ICOUNT_WOBBLE < delta * 2
302 && icount_time_shift > 0) {
303 /* The guest is getting too far ahead. Slow time down. */
307 && last_delta - ICOUNT_WOBBLE > delta * 2
308 && icount_time_shift < MAX_ICOUNT_SHIFT) {
309 /* The guest is getting too far behind. Speed time up. */
313 timers_state.qemu_icount_bias = cur_icount
314 - (timers_state.qemu_icount << icount_time_shift);
315 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
318 static void icount_adjust_rt(void *opaque)
320 timer_mod(icount_rt_timer,
321 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
325 static void icount_adjust_vm(void *opaque)
327 timer_mod(icount_vm_timer,
328 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
329 get_ticks_per_sec() / 10);
333 static int64_t qemu_icount_round(int64_t count)
335 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
338 static void icount_warp_rt(void)
340 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
341 * changes from -1 to another value, so the race here is okay.
343 if (atomic_read(&vm_clock_warp_start) == -1) {
347 seqlock_write_lock(&timers_state.vm_clock_seqlock);
348 if (runstate_is_running()) {
349 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
350 cpu_get_clock_locked());
353 warp_delta = clock - vm_clock_warp_start;
354 if (use_icount == 2) {
356 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
357 * far ahead of real time.
359 int64_t cur_icount = cpu_get_icount_locked();
360 int64_t delta = clock - cur_icount;
361 warp_delta = MIN(warp_delta, delta);
363 timers_state.qemu_icount_bias += warp_delta;
365 vm_clock_warp_start = -1;
366 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
368 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
369 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
373 static void icount_dummy_timer(void *opaque)
378 void qtest_clock_warp(int64_t dest)
380 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
381 AioContext *aio_context;
382 assert(qtest_enabled());
383 aio_context = qemu_get_aio_context();
384 while (clock < dest) {
385 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
386 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
388 seqlock_write_lock(&timers_state.vm_clock_seqlock);
389 timers_state.qemu_icount_bias += warp;
390 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
392 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
393 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
394 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
396 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
399 void qemu_clock_warp(QEMUClockType type)
405 * There are too many global variables to make the "warp" behavior
406 * applicable to other clocks. But a clock argument removes the
407 * need for if statements all over the place.
409 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
415 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
416 * This ensures that the deadline for the timer is computed correctly
418 * This also makes sure that the insn counter is synchronized before
419 * the CPU starts running, in case the CPU is woken by an event other
420 * than the earliest QEMU_CLOCK_VIRTUAL timer.
423 timer_del(icount_warp_timer);
425 if (!all_cpu_threads_idle()) {
429 if (qtest_enabled()) {
430 /* When testing, qtest commands advance icount. */
434 /* We want to use the earliest deadline from ALL vm_clocks */
435 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
436 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
438 static bool notified;
439 if (!icount_sleep && !notified) {
440 error_report("WARNING: icount sleep disabled and no active timers");
448 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
449 * sleep. Otherwise, the CPU might be waiting for a future timer
450 * interrupt to wake it up, but the interrupt never comes because
451 * the vCPU isn't running any insns and thus doesn't advance the
452 * QEMU_CLOCK_VIRTUAL.
456 * We never let VCPUs sleep in no sleep icount mode.
457 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
458 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
459 * It is useful when we want a deterministic execution time,
460 * isolated from host latencies.
462 seqlock_write_lock(&timers_state.vm_clock_seqlock);
463 timers_state.qemu_icount_bias += deadline;
464 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
465 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
468 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
469 * "real" time, (related to the time left until the next event) has
470 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
471 * This avoids that the warps are visible externally; for example,
472 * you will not be sending network packets continuously instead of
475 seqlock_write_lock(&timers_state.vm_clock_seqlock);
476 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
477 vm_clock_warp_start = clock;
479 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
480 timer_mod_anticipate(icount_warp_timer, clock + deadline);
482 } else if (deadline == 0) {
483 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
487 static bool icount_state_needed(void *opaque)
493 * This is a subsection for icount migration.
495 static const VMStateDescription icount_vmstate_timers = {
496 .name = "timer/icount",
498 .minimum_version_id = 1,
499 .needed = icount_state_needed,
500 .fields = (VMStateField[]) {
501 VMSTATE_INT64(qemu_icount_bias, TimersState),
502 VMSTATE_INT64(qemu_icount, TimersState),
503 VMSTATE_END_OF_LIST()
507 static const VMStateDescription vmstate_timers = {
510 .minimum_version_id = 1,
511 .fields = (VMStateField[]) {
512 VMSTATE_INT64(cpu_ticks_offset, TimersState),
513 VMSTATE_INT64(dummy, TimersState),
514 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
515 VMSTATE_END_OF_LIST()
517 .subsections = (const VMStateDescription*[]) {
518 &icount_vmstate_timers,
523 static void cpu_throttle_thread(void *opaque)
525 CPUState *cpu = opaque;
527 double throttle_ratio;
530 if (!cpu_throttle_get_percentage()) {
534 pct = (double)cpu_throttle_get_percentage()/100;
535 throttle_ratio = pct / (1 - pct);
536 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
538 qemu_mutex_unlock_iothread();
539 atomic_set(&cpu->throttle_thread_scheduled, 0);
540 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
541 qemu_mutex_lock_iothread();
544 static void cpu_throttle_timer_tick(void *opaque)
549 /* Stop the timer if needed */
550 if (!cpu_throttle_get_percentage()) {
554 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
555 async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
559 pct = (double)cpu_throttle_get_percentage()/100;
560 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
561 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
564 void cpu_throttle_set(int new_throttle_pct)
566 /* Ensure throttle percentage is within valid range */
567 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
568 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
570 atomic_set(&throttle_percentage, new_throttle_pct);
572 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
573 CPU_THROTTLE_TIMESLICE_NS);
576 void cpu_throttle_stop(void)
578 atomic_set(&throttle_percentage, 0);
581 bool cpu_throttle_active(void)
583 return (cpu_throttle_get_percentage() != 0);
586 int cpu_throttle_get_percentage(void)
588 return atomic_read(&throttle_percentage);
591 void cpu_ticks_init(void)
593 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
594 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
595 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
596 cpu_throttle_timer_tick, NULL);
599 void configure_icount(QemuOpts *opts, Error **errp)
602 char *rem_str = NULL;
604 option = qemu_opt_get(opts, "shift");
606 if (qemu_opt_get(opts, "align") != NULL) {
607 error_setg(errp, "Please specify shift option when using align");
612 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
614 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
615 icount_dummy_timer, NULL);
618 icount_align_option = qemu_opt_get_bool(opts, "align", false);
620 if (icount_align_option && !icount_sleep) {
621 error_setg(errp, "align=on and sleep=no are incompatible");
623 if (strcmp(option, "auto") != 0) {
625 icount_time_shift = strtol(option, &rem_str, 0);
626 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
627 error_setg(errp, "icount: Invalid shift value");
631 } else if (icount_align_option) {
632 error_setg(errp, "shift=auto and align=on are incompatible");
633 } else if (!icount_sleep) {
634 error_setg(errp, "shift=auto and sleep=no are incompatible");
639 /* 125MIPS seems a reasonable initial guess at the guest speed.
640 It will be corrected fairly quickly anyway. */
641 icount_time_shift = 3;
643 /* Have both realtime and virtual time triggers for speed adjustment.
644 The realtime trigger catches emulated time passing too slowly,
645 the virtual time trigger catches emulated time passing too fast.
646 Realtime triggers occur even when idle, so use them less frequently
648 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
649 icount_adjust_rt, NULL);
650 timer_mod(icount_rt_timer,
651 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
652 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
653 icount_adjust_vm, NULL);
654 timer_mod(icount_vm_timer,
655 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
656 get_ticks_per_sec() / 10);
659 /***********************************************************/
660 void hw_error(const char *fmt, ...)
666 fprintf(stderr, "qemu: hardware error: ");
667 vfprintf(stderr, fmt, ap);
668 fprintf(stderr, "\n");
670 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
671 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
677 void cpu_synchronize_all_states(void)
682 cpu_synchronize_state(cpu);
686 void cpu_synchronize_all_post_reset(void)
691 cpu_synchronize_post_reset(cpu);
695 void cpu_synchronize_all_post_init(void)
700 cpu_synchronize_post_init(cpu);
704 void cpu_clean_all_dirty(void)
709 cpu_clean_state(cpu);
713 static int do_vm_stop(RunState state)
717 if (runstate_is_running()) {
721 vm_state_notify(0, state);
722 qapi_event_send_stop(&error_abort);
726 ret = bdrv_flush_all();
731 static bool cpu_can_run(CPUState *cpu)
736 if (cpu_is_stopped(cpu)) {
742 static void cpu_handle_guest_debug(CPUState *cpu)
744 gdb_set_stop_cpu(cpu);
745 qemu_system_debug_request();
750 static void sigbus_reraise(void)
753 struct sigaction action;
755 memset(&action, 0, sizeof(action));
756 action.sa_handler = SIG_DFL;
757 if (!sigaction(SIGBUS, &action, NULL)) {
760 sigaddset(&set, SIGBUS);
761 sigprocmask(SIG_UNBLOCK, &set, NULL);
763 perror("Failed to re-raise SIGBUS!\n");
767 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
770 if (kvm_on_sigbus(siginfo->ssi_code,
771 (void *)(intptr_t)siginfo->ssi_addr)) {
776 static void qemu_init_sigbus(void)
778 struct sigaction action;
780 memset(&action, 0, sizeof(action));
781 action.sa_flags = SA_SIGINFO;
782 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
783 sigaction(SIGBUS, &action, NULL);
785 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
788 static void qemu_kvm_eat_signals(CPUState *cpu)
790 struct timespec ts = { 0, 0 };
796 sigemptyset(&waitset);
797 sigaddset(&waitset, SIG_IPI);
798 sigaddset(&waitset, SIGBUS);
801 r = sigtimedwait(&waitset, &siginfo, &ts);
802 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
803 perror("sigtimedwait");
809 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
817 r = sigpending(&chkset);
819 perror("sigpending");
822 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
825 #else /* !CONFIG_LINUX */
827 static void qemu_init_sigbus(void)
831 static void qemu_kvm_eat_signals(CPUState *cpu)
834 #endif /* !CONFIG_LINUX */
837 static void dummy_signal(int sig)
841 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
845 struct sigaction sigact;
847 memset(&sigact, 0, sizeof(sigact));
848 sigact.sa_handler = dummy_signal;
849 sigaction(SIG_IPI, &sigact, NULL);
851 pthread_sigmask(SIG_BLOCK, NULL, &set);
852 sigdelset(&set, SIG_IPI);
853 sigdelset(&set, SIGBUS);
854 r = kvm_set_signal_mask(cpu, &set);
856 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
862 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
868 static QemuMutex qemu_global_mutex;
869 static QemuCond qemu_io_proceeded_cond;
870 static unsigned iothread_requesting_mutex;
872 static QemuThread io_thread;
875 static QemuCond qemu_cpu_cond;
877 static QemuCond qemu_pause_cond;
878 static QemuCond qemu_work_cond;
880 void qemu_init_cpu_loop(void)
883 qemu_cond_init(&qemu_cpu_cond);
884 qemu_cond_init(&qemu_pause_cond);
885 qemu_cond_init(&qemu_work_cond);
886 qemu_cond_init(&qemu_io_proceeded_cond);
887 qemu_mutex_init(&qemu_global_mutex);
889 qemu_thread_get_self(&io_thread);
892 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
894 struct qemu_work_item wi;
896 if (qemu_cpu_is_self(cpu)) {
905 qemu_mutex_lock(&cpu->work_mutex);
906 if (cpu->queued_work_first == NULL) {
907 cpu->queued_work_first = &wi;
909 cpu->queued_work_last->next = &wi;
911 cpu->queued_work_last = &wi;
914 qemu_mutex_unlock(&cpu->work_mutex);
917 while (!atomic_mb_read(&wi.done)) {
918 CPUState *self_cpu = current_cpu;
920 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
921 current_cpu = self_cpu;
925 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
927 struct qemu_work_item *wi;
929 if (qemu_cpu_is_self(cpu)) {
934 wi = g_malloc0(sizeof(struct qemu_work_item));
939 qemu_mutex_lock(&cpu->work_mutex);
940 if (cpu->queued_work_first == NULL) {
941 cpu->queued_work_first = wi;
943 cpu->queued_work_last->next = wi;
945 cpu->queued_work_last = wi;
948 qemu_mutex_unlock(&cpu->work_mutex);
953 static void flush_queued_work(CPUState *cpu)
955 struct qemu_work_item *wi;
957 if (cpu->queued_work_first == NULL) {
961 qemu_mutex_lock(&cpu->work_mutex);
962 while (cpu->queued_work_first != NULL) {
963 wi = cpu->queued_work_first;
964 cpu->queued_work_first = wi->next;
965 if (!cpu->queued_work_first) {
966 cpu->queued_work_last = NULL;
968 qemu_mutex_unlock(&cpu->work_mutex);
970 qemu_mutex_lock(&cpu->work_mutex);
974 atomic_mb_set(&wi->done, true);
977 qemu_mutex_unlock(&cpu->work_mutex);
978 qemu_cond_broadcast(&qemu_work_cond);
981 static void qemu_wait_io_event_common(CPUState *cpu)
986 qemu_cond_signal(&qemu_pause_cond);
988 flush_queued_work(cpu);
989 cpu->thread_kicked = false;
992 static void qemu_tcg_wait_io_event(CPUState *cpu)
994 while (all_cpu_threads_idle()) {
995 /* Start accounting real time to the virtual clock if the CPUs
997 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
998 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1001 while (iothread_requesting_mutex) {
1002 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
1006 qemu_wait_io_event_common(cpu);
1010 static void qemu_kvm_wait_io_event(CPUState *cpu)
1012 while (cpu_thread_is_idle(cpu)) {
1013 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1016 qemu_kvm_eat_signals(cpu);
1017 qemu_wait_io_event_common(cpu);
1020 static void *qemu_kvm_cpu_thread_fn(void *arg)
1022 CPUState *cpu = arg;
1025 rcu_register_thread();
1027 qemu_mutex_lock_iothread();
1028 qemu_thread_get_self(cpu->thread);
1029 cpu->thread_id = qemu_get_thread_id();
1033 r = kvm_init_vcpu(cpu);
1035 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1039 qemu_kvm_init_cpu_signals(cpu);
1041 /* signal CPU creation */
1042 cpu->created = true;
1043 qemu_cond_signal(&qemu_cpu_cond);
1046 if (cpu_can_run(cpu)) {
1047 r = kvm_cpu_exec(cpu);
1048 if (r == EXCP_DEBUG) {
1049 cpu_handle_guest_debug(cpu);
1052 qemu_kvm_wait_io_event(cpu);
1058 static void *qemu_dummy_cpu_thread_fn(void *arg)
1061 fprintf(stderr, "qtest is not supported under Windows\n");
1064 CPUState *cpu = arg;
1068 rcu_register_thread();
1070 qemu_mutex_lock_iothread();
1071 qemu_thread_get_self(cpu->thread);
1072 cpu->thread_id = qemu_get_thread_id();
1075 sigemptyset(&waitset);
1076 sigaddset(&waitset, SIG_IPI);
1078 /* signal CPU creation */
1079 cpu->created = true;
1080 qemu_cond_signal(&qemu_cpu_cond);
1085 qemu_mutex_unlock_iothread();
1088 r = sigwait(&waitset, &sig);
1089 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1094 qemu_mutex_lock_iothread();
1096 qemu_wait_io_event_common(cpu);
1103 static void tcg_exec_all(void);
1105 static void *qemu_tcg_cpu_thread_fn(void *arg)
1107 CPUState *cpu = arg;
1109 rcu_register_thread();
1111 qemu_mutex_lock_iothread();
1112 qemu_thread_get_self(cpu->thread);
1115 cpu->thread_id = qemu_get_thread_id();
1116 cpu->created = true;
1119 qemu_cond_signal(&qemu_cpu_cond);
1121 /* wait for initial kick-off after machine start */
1122 while (first_cpu->stopped) {
1123 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1125 /* process any pending work */
1127 qemu_wait_io_event_common(cpu);
1131 /* process any pending work */
1132 atomic_mb_set(&exit_request, 1);
1138 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1140 if (deadline == 0) {
1141 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1144 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1150 static void qemu_cpu_kick_thread(CPUState *cpu)
1155 if (cpu->thread_kicked) {
1158 cpu->thread_kicked = true;
1159 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1161 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1169 static void qemu_cpu_kick_no_halt(void)
1172 /* Ensure whatever caused the exit has reached the CPU threads before
1173 * writing exit_request.
1175 atomic_mb_set(&exit_request, 1);
1176 cpu = atomic_mb_read(&tcg_current_cpu);
1182 void qemu_cpu_kick(CPUState *cpu)
1184 qemu_cond_broadcast(cpu->halt_cond);
1185 if (tcg_enabled()) {
1186 qemu_cpu_kick_no_halt();
1188 qemu_cpu_kick_thread(cpu);
1192 void qemu_cpu_kick_self(void)
1194 assert(current_cpu);
1195 qemu_cpu_kick_thread(current_cpu);
1198 bool qemu_cpu_is_self(CPUState *cpu)
1200 return qemu_thread_is_self(cpu->thread);
1203 bool qemu_in_vcpu_thread(void)
1205 return current_cpu && qemu_cpu_is_self(current_cpu);
1208 static __thread bool iothread_locked = false;
1210 bool qemu_mutex_iothread_locked(void)
1212 return iothread_locked;
1215 void qemu_mutex_lock_iothread(void)
1217 atomic_inc(&iothread_requesting_mutex);
1218 /* In the simple case there is no need to bump the VCPU thread out of
1219 * TCG code execution.
1221 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1222 !first_cpu || !first_cpu->created) {
1223 qemu_mutex_lock(&qemu_global_mutex);
1224 atomic_dec(&iothread_requesting_mutex);
1226 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1227 qemu_cpu_kick_no_halt();
1228 qemu_mutex_lock(&qemu_global_mutex);
1230 atomic_dec(&iothread_requesting_mutex);
1231 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1233 iothread_locked = true;
1236 void qemu_mutex_unlock_iothread(void)
1238 iothread_locked = false;
1239 qemu_mutex_unlock(&qemu_global_mutex);
1242 static int all_vcpus_paused(void)
1247 if (!cpu->stopped) {
1255 void pause_all_vcpus(void)
1259 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1265 if (qemu_in_vcpu_thread()) {
1267 if (!kvm_enabled()) {
1270 cpu->stopped = true;
1276 while (!all_vcpus_paused()) {
1277 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1284 void cpu_resume(CPUState *cpu)
1287 cpu->stopped = false;
1291 void resume_all_vcpus(void)
1295 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1301 /* For temporary buffers for forming a name */
1302 #define VCPU_THREAD_NAME_SIZE 16
1304 static void qemu_tcg_init_vcpu(CPUState *cpu)
1306 char thread_name[VCPU_THREAD_NAME_SIZE];
1307 static QemuCond *tcg_halt_cond;
1308 static QemuThread *tcg_cpu_thread;
1310 tcg_cpu_address_space_init(cpu, cpu->as);
1312 /* share a single thread for all cpus with TCG */
1313 if (!tcg_cpu_thread) {
1314 cpu->thread = g_malloc0(sizeof(QemuThread));
1315 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1316 qemu_cond_init(cpu->halt_cond);
1317 tcg_halt_cond = cpu->halt_cond;
1318 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1320 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1321 cpu, QEMU_THREAD_JOINABLE);
1323 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1325 while (!cpu->created) {
1326 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1328 tcg_cpu_thread = cpu->thread;
1330 cpu->thread = tcg_cpu_thread;
1331 cpu->halt_cond = tcg_halt_cond;
1335 static void qemu_kvm_start_vcpu(CPUState *cpu)
1337 char thread_name[VCPU_THREAD_NAME_SIZE];
1339 cpu->thread = g_malloc0(sizeof(QemuThread));
1340 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1341 qemu_cond_init(cpu->halt_cond);
1342 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1344 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1345 cpu, QEMU_THREAD_JOINABLE);
1346 while (!cpu->created) {
1347 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1351 static void qemu_dummy_start_vcpu(CPUState *cpu)
1353 char thread_name[VCPU_THREAD_NAME_SIZE];
1355 cpu->thread = g_malloc0(sizeof(QemuThread));
1356 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1357 qemu_cond_init(cpu->halt_cond);
1358 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1360 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1361 QEMU_THREAD_JOINABLE);
1362 while (!cpu->created) {
1363 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1367 void qemu_init_vcpu(CPUState *cpu)
1369 cpu->nr_cores = smp_cores;
1370 cpu->nr_threads = smp_threads;
1371 cpu->stopped = true;
1372 if (kvm_enabled()) {
1373 qemu_kvm_start_vcpu(cpu);
1374 } else if (tcg_enabled()) {
1375 qemu_tcg_init_vcpu(cpu);
1377 qemu_dummy_start_vcpu(cpu);
1381 void cpu_stop_current(void)
1384 current_cpu->stop = false;
1385 current_cpu->stopped = true;
1386 cpu_exit(current_cpu);
1387 qemu_cond_signal(&qemu_pause_cond);
1391 int vm_stop(RunState state)
1393 if (qemu_in_vcpu_thread()) {
1394 qemu_system_vmstop_request_prepare();
1395 qemu_system_vmstop_request(state);
1397 * FIXME: should not return to device code in case
1398 * vm_stop() has been requested.
1404 return do_vm_stop(state);
1407 /* does a state transition even if the VM is already stopped,
1408 current state is forgotten forever */
1409 int vm_stop_force_state(RunState state)
1411 if (runstate_is_running()) {
1412 return vm_stop(state);
1414 runstate_set(state);
1415 /* Make sure to return an error if the flush in a previous vm_stop()
1417 return bdrv_flush_all();
1421 static int64_t tcg_get_icount_limit(void)
1425 if (replay_mode != REPLAY_MODE_PLAY) {
1426 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1428 /* Maintain prior (possibly buggy) behaviour where if no deadline
1429 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1430 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1433 if ((deadline < 0) || (deadline > INT32_MAX)) {
1434 deadline = INT32_MAX;
1437 return qemu_icount_round(deadline);
1439 return replay_get_instructions();
1443 static int tcg_cpu_exec(CPUState *cpu)
1446 #ifdef CONFIG_PROFILER
1450 #ifdef CONFIG_PROFILER
1451 ti = profile_getclock();
1456 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1457 + cpu->icount_extra);
1458 cpu->icount_decr.u16.low = 0;
1459 cpu->icount_extra = 0;
1460 count = tcg_get_icount_limit();
1461 timers_state.qemu_icount += count;
1462 decr = (count > 0xffff) ? 0xffff : count;
1464 cpu->icount_decr.u16.low = decr;
1465 cpu->icount_extra = count;
1467 ret = cpu_exec(cpu);
1468 #ifdef CONFIG_PROFILER
1469 tcg_time += profile_getclock() - ti;
1472 /* Fold pending instructions back into the
1473 instruction counter, and clear the interrupt flag. */
1474 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1475 + cpu->icount_extra);
1476 cpu->icount_decr.u32 = 0;
1477 cpu->icount_extra = 0;
1478 replay_account_executed_instructions();
1483 static void tcg_exec_all(void)
1487 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1488 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1490 if (next_cpu == NULL) {
1491 next_cpu = first_cpu;
1493 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1494 CPUState *cpu = next_cpu;
1496 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1497 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1499 if (cpu_can_run(cpu)) {
1500 r = tcg_cpu_exec(cpu);
1501 if (r == EXCP_DEBUG) {
1502 cpu_handle_guest_debug(cpu);
1505 } else if (cpu->stop || cpu->stopped) {
1510 /* Pairs with smp_wmb in qemu_cpu_kick. */
1511 atomic_mb_set(&exit_request, 0);
1514 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1516 /* XXX: implement xxx_cpu_list for targets that still miss it */
1517 #if defined(cpu_list)
1518 cpu_list(f, cpu_fprintf);
1522 CpuInfoList *qmp_query_cpus(Error **errp)
1524 CpuInfoList *head = NULL, *cur_item = NULL;
1529 #if defined(TARGET_I386)
1530 X86CPU *x86_cpu = X86_CPU(cpu);
1531 CPUX86State *env = &x86_cpu->env;
1532 #elif defined(TARGET_PPC)
1533 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1534 CPUPPCState *env = &ppc_cpu->env;
1535 #elif defined(TARGET_SPARC)
1536 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1537 CPUSPARCState *env = &sparc_cpu->env;
1538 #elif defined(TARGET_MIPS)
1539 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1540 CPUMIPSState *env = &mips_cpu->env;
1541 #elif defined(TARGET_TRICORE)
1542 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1543 CPUTriCoreState *env = &tricore_cpu->env;
1546 cpu_synchronize_state(cpu);
1548 info = g_malloc0(sizeof(*info));
1549 info->value = g_malloc0(sizeof(*info->value));
1550 info->value->CPU = cpu->cpu_index;
1551 info->value->current = (cpu == first_cpu);
1552 info->value->halted = cpu->halted;
1553 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1554 info->value->thread_id = cpu->thread_id;
1555 #if defined(TARGET_I386)
1556 info->value->has_pc = true;
1557 info->value->pc = env->eip + env->segs[R_CS].base;
1558 #elif defined(TARGET_PPC)
1559 info->value->has_nip = true;
1560 info->value->nip = env->nip;
1561 #elif defined(TARGET_SPARC)
1562 info->value->has_pc = true;
1563 info->value->pc = env->pc;
1564 info->value->has_npc = true;
1565 info->value->npc = env->npc;
1566 #elif defined(TARGET_MIPS)
1567 info->value->has_PC = true;
1568 info->value->PC = env->active_tc.PC;
1569 #elif defined(TARGET_TRICORE)
1570 info->value->has_PC = true;
1571 info->value->PC = env->PC;
1574 /* XXX: waiting for the qapi to support GSList */
1576 head = cur_item = info;
1578 cur_item->next = info;
1586 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1587 bool has_cpu, int64_t cpu_index, Error **errp)
1593 int64_t orig_addr = addr, orig_size = size;
1599 cpu = qemu_get_cpu(cpu_index);
1601 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1606 f = fopen(filename, "wb");
1608 error_setg_file_open(errp, errno, filename);
1616 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1617 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1618 " specified", orig_addr, orig_size);
1621 if (fwrite(buf, 1, l, f) != l) {
1622 error_setg(errp, QERR_IO_ERROR);
1633 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1640 f = fopen(filename, "wb");
1642 error_setg_file_open(errp, errno, filename);
1650 cpu_physical_memory_read(addr, buf, l);
1651 if (fwrite(buf, 1, l, f) != l) {
1652 error_setg(errp, QERR_IO_ERROR);
1663 void qmp_inject_nmi(Error **errp)
1665 #if defined(TARGET_I386)
1669 X86CPU *cpu = X86_CPU(cs);
1671 if (!cpu->apic_state) {
1672 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1674 apic_deliver_nmi(cpu->apic_state);
1678 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1682 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1688 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1689 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1690 if (icount_align_option) {
1691 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1692 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1694 cpu_fprintf(f, "Max guest delay NA\n");
1695 cpu_fprintf(f, "Max guest advance NA\n");