4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "sysemu/sysemu.h"
31 #include "exec/gdbstub.h"
32 #include "sysemu/dma.h"
33 #include "sysemu/kvm.h"
34 #include "qmp-commands.h"
36 #include "qemu/thread.h"
37 #include "sysemu/cpus.h"
38 #include "sysemu/qtest.h"
39 #include "qemu/main-loop.h"
40 #include "qemu/bitmap.h"
41 #include "qemu/seqlock.h"
42 #include "qapi-event.h"
45 #include "qemu/compatfd.h"
50 #include <sys/prctl.h>
53 #define PR_MCE_KILL 33
56 #ifndef PR_MCE_KILL_SET
57 #define PR_MCE_KILL_SET 1
60 #ifndef PR_MCE_KILL_EARLY
61 #define PR_MCE_KILL_EARLY 1
64 #endif /* CONFIG_LINUX */
66 static CPUState *next_cpu;
68 bool cpu_is_stopped(CPUState *cpu)
70 return cpu->stopped || !runstate_is_running();
73 static bool cpu_thread_is_idle(CPUState *cpu)
75 if (cpu->stop || cpu->queued_work_first) {
78 if (cpu_is_stopped(cpu)) {
81 if (!cpu->halted || cpu_has_work(cpu) ||
82 kvm_halt_in_kernel()) {
88 static bool all_cpu_threads_idle(void)
93 if (!cpu_thread_is_idle(cpu)) {
100 /***********************************************************/
101 /* guest cycle counter */
103 /* Protected by TimersState seqlock */
105 static int64_t vm_clock_warp_start = -1;
106 /* Conversion factor from emulated instructions to virtual clock ticks. */
107 static int icount_time_shift;
108 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
109 #define MAX_ICOUNT_SHIFT 10
111 static QEMUTimer *icount_rt_timer;
112 static QEMUTimer *icount_vm_timer;
113 static QEMUTimer *icount_warp_timer;
115 typedef struct TimersState {
116 /* Protected by BQL. */
117 int64_t cpu_ticks_prev;
118 int64_t cpu_ticks_offset;
120 /* cpu_clock_offset can be read out of BQL, so protect it with
123 QemuSeqLock vm_clock_seqlock;
124 int64_t cpu_clock_offset;
125 int32_t cpu_ticks_enabled;
128 /* Compensate for varying guest execution speed. */
129 int64_t qemu_icount_bias;
130 /* Only written by TCG thread */
134 static TimersState timers_state;
136 /* Return the virtual CPU time, based on the instruction counter. */
137 static int64_t cpu_get_icount_locked(void)
140 CPUState *cpu = current_cpu;
142 icount = timers_state.qemu_icount;
144 if (!cpu_can_do_io(cpu)) {
145 fprintf(stderr, "Bad clock read\n");
147 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
149 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
152 int64_t cpu_get_icount(void)
158 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
159 icount = cpu_get_icount_locked();
160 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
165 int64_t cpu_icount_to_ns(int64_t icount)
167 return icount << icount_time_shift;
170 /* return the host CPU cycle counter and handle stop/restart */
171 /* Caller must hold the BQL */
172 int64_t cpu_get_ticks(void)
177 return cpu_get_icount();
180 ticks = timers_state.cpu_ticks_offset;
181 if (timers_state.cpu_ticks_enabled) {
182 ticks += cpu_get_real_ticks();
185 if (timers_state.cpu_ticks_prev > ticks) {
186 /* Note: non increasing ticks may happen if the host uses
188 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
189 ticks = timers_state.cpu_ticks_prev;
192 timers_state.cpu_ticks_prev = ticks;
196 static int64_t cpu_get_clock_locked(void)
200 ticks = timers_state.cpu_clock_offset;
201 if (timers_state.cpu_ticks_enabled) {
202 ticks += get_clock();
208 /* return the host CPU monotonic timer and handle stop/restart */
209 int64_t cpu_get_clock(void)
215 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
216 ti = cpu_get_clock_locked();
217 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
222 /* enable cpu_get_ticks()
223 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
225 void cpu_enable_ticks(void)
227 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
228 seqlock_write_lock(&timers_state.vm_clock_seqlock);
229 if (!timers_state.cpu_ticks_enabled) {
230 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
231 timers_state.cpu_clock_offset -= get_clock();
232 timers_state.cpu_ticks_enabled = 1;
234 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
237 /* disable cpu_get_ticks() : the clock is stopped. You must not call
238 * cpu_get_ticks() after that.
239 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
241 void cpu_disable_ticks(void)
243 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
244 seqlock_write_lock(&timers_state.vm_clock_seqlock);
245 if (timers_state.cpu_ticks_enabled) {
246 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
247 timers_state.cpu_clock_offset = cpu_get_clock_locked();
248 timers_state.cpu_ticks_enabled = 0;
250 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
253 /* Correlation between real and virtual time is always going to be
254 fairly approximate, so ignore small variation.
255 When the guest is idle real and virtual time will be aligned in
257 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
259 static void icount_adjust(void)
265 /* Protected by TimersState mutex. */
266 static int64_t last_delta;
268 /* If the VM is not running, then do nothing. */
269 if (!runstate_is_running()) {
273 seqlock_write_lock(&timers_state.vm_clock_seqlock);
274 cur_time = cpu_get_clock_locked();
275 cur_icount = cpu_get_icount_locked();
277 delta = cur_icount - cur_time;
278 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
280 && last_delta + ICOUNT_WOBBLE < delta * 2
281 && icount_time_shift > 0) {
282 /* The guest is getting too far ahead. Slow time down. */
286 && last_delta - ICOUNT_WOBBLE > delta * 2
287 && icount_time_shift < MAX_ICOUNT_SHIFT) {
288 /* The guest is getting too far behind. Speed time up. */
292 timers_state.qemu_icount_bias = cur_icount
293 - (timers_state.qemu_icount << icount_time_shift);
294 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
297 static void icount_adjust_rt(void *opaque)
299 timer_mod(icount_rt_timer,
300 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
304 static void icount_adjust_vm(void *opaque)
306 timer_mod(icount_vm_timer,
307 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
308 get_ticks_per_sec() / 10);
312 static int64_t qemu_icount_round(int64_t count)
314 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
317 static void icount_warp_rt(void *opaque)
319 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
320 * changes from -1 to another value, so the race here is okay.
322 if (atomic_read(&vm_clock_warp_start) == -1) {
326 seqlock_write_lock(&timers_state.vm_clock_seqlock);
327 if (runstate_is_running()) {
328 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
331 warp_delta = clock - vm_clock_warp_start;
332 if (use_icount == 2) {
334 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
335 * far ahead of real time.
337 int64_t cur_time = cpu_get_clock_locked();
338 int64_t cur_icount = cpu_get_icount_locked();
339 int64_t delta = cur_time - cur_icount;
340 warp_delta = MIN(warp_delta, delta);
342 timers_state.qemu_icount_bias += warp_delta;
344 vm_clock_warp_start = -1;
345 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
347 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
348 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
352 void qtest_clock_warp(int64_t dest)
354 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
355 assert(qtest_enabled());
356 while (clock < dest) {
357 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
358 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
359 seqlock_write_lock(&timers_state.vm_clock_seqlock);
360 timers_state.qemu_icount_bias += warp;
361 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
363 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
364 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
366 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
369 void qemu_clock_warp(QEMUClockType type)
375 * There are too many global variables to make the "warp" behavior
376 * applicable to other clocks. But a clock argument removes the
377 * need for if statements all over the place.
379 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
384 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
385 * This ensures that the deadline for the timer is computed correctly below.
386 * This also makes sure that the insn counter is synchronized before the
387 * CPU starts running, in case the CPU is woken by an event other than
388 * the earliest QEMU_CLOCK_VIRTUAL timer.
390 icount_warp_rt(NULL);
391 timer_del(icount_warp_timer);
392 if (!all_cpu_threads_idle()) {
396 if (qtest_enabled()) {
397 /* When testing, qtest commands advance icount. */
401 /* We want to use the earliest deadline from ALL vm_clocks */
402 clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
403 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
410 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
411 * sleep. Otherwise, the CPU might be waiting for a future timer
412 * interrupt to wake it up, but the interrupt never comes because
413 * the vCPU isn't running any insns and thus doesn't advance the
414 * QEMU_CLOCK_VIRTUAL.
416 * An extreme solution for this problem would be to never let VCPUs
417 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
418 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
419 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
420 * after some e"real" time, (related to the time left until the next
421 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
422 * This avoids that the warps are visible externally; for example,
423 * you will not be sending network packets continuously instead of
426 seqlock_write_lock(&timers_state.vm_clock_seqlock);
427 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
428 vm_clock_warp_start = clock;
430 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
431 timer_mod_anticipate(icount_warp_timer, clock + deadline);
432 } else if (deadline == 0) {
433 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
437 static bool icount_state_needed(void *opaque)
443 * This is a subsection for icount migration.
445 static const VMStateDescription icount_vmstate_timers = {
446 .name = "timer/icount",
448 .minimum_version_id = 1,
449 .fields = (VMStateField[]) {
450 VMSTATE_INT64(qemu_icount_bias, TimersState),
451 VMSTATE_INT64(qemu_icount, TimersState),
452 VMSTATE_END_OF_LIST()
456 static const VMStateDescription vmstate_timers = {
459 .minimum_version_id = 1,
460 .fields = (VMStateField[]) {
461 VMSTATE_INT64(cpu_ticks_offset, TimersState),
462 VMSTATE_INT64(dummy, TimersState),
463 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
464 VMSTATE_END_OF_LIST()
466 .subsections = (VMStateSubsection[]) {
468 .vmsd = &icount_vmstate_timers,
469 .needed = icount_state_needed,
476 void configure_icount(const char *option)
478 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
479 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
484 icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
485 icount_warp_rt, NULL);
486 if (strcmp(option, "auto") != 0) {
487 icount_time_shift = strtol(option, NULL, 0);
494 /* 125MIPS seems a reasonable initial guess at the guest speed.
495 It will be corrected fairly quickly anyway. */
496 icount_time_shift = 3;
498 /* Have both realtime and virtual time triggers for speed adjustment.
499 The realtime trigger catches emulated time passing too slowly,
500 the virtual time trigger catches emulated time passing too fast.
501 Realtime triggers occur even when idle, so use them less frequently
503 icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
504 icount_adjust_rt, NULL);
505 timer_mod(icount_rt_timer,
506 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
507 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
508 icount_adjust_vm, NULL);
509 timer_mod(icount_vm_timer,
510 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
511 get_ticks_per_sec() / 10);
514 /***********************************************************/
515 void hw_error(const char *fmt, ...)
521 fprintf(stderr, "qemu: hardware error: ");
522 vfprintf(stderr, fmt, ap);
523 fprintf(stderr, "\n");
525 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
526 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
532 void cpu_synchronize_all_states(void)
537 cpu_synchronize_state(cpu);
541 void cpu_synchronize_all_post_reset(void)
546 cpu_synchronize_post_reset(cpu);
550 void cpu_synchronize_all_post_init(void)
555 cpu_synchronize_post_init(cpu);
559 static int do_vm_stop(RunState state)
563 if (runstate_is_running()) {
567 vm_state_notify(0, state);
568 qapi_event_send_stop(&error_abort);
572 ret = bdrv_flush_all();
577 static bool cpu_can_run(CPUState *cpu)
582 if (cpu_is_stopped(cpu)) {
588 static void cpu_handle_guest_debug(CPUState *cpu)
590 gdb_set_stop_cpu(cpu);
591 qemu_system_debug_request();
595 static void cpu_signal(int sig)
598 cpu_exit(current_cpu);
604 static void sigbus_reraise(void)
607 struct sigaction action;
609 memset(&action, 0, sizeof(action));
610 action.sa_handler = SIG_DFL;
611 if (!sigaction(SIGBUS, &action, NULL)) {
614 sigaddset(&set, SIGBUS);
615 sigprocmask(SIG_UNBLOCK, &set, NULL);
617 perror("Failed to re-raise SIGBUS!\n");
621 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
624 if (kvm_on_sigbus(siginfo->ssi_code,
625 (void *)(intptr_t)siginfo->ssi_addr)) {
630 static void qemu_init_sigbus(void)
632 struct sigaction action;
634 memset(&action, 0, sizeof(action));
635 action.sa_flags = SA_SIGINFO;
636 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
637 sigaction(SIGBUS, &action, NULL);
639 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
642 static void qemu_kvm_eat_signals(CPUState *cpu)
644 struct timespec ts = { 0, 0 };
650 sigemptyset(&waitset);
651 sigaddset(&waitset, SIG_IPI);
652 sigaddset(&waitset, SIGBUS);
655 r = sigtimedwait(&waitset, &siginfo, &ts);
656 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
657 perror("sigtimedwait");
663 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
671 r = sigpending(&chkset);
673 perror("sigpending");
676 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
679 #else /* !CONFIG_LINUX */
681 static void qemu_init_sigbus(void)
685 static void qemu_kvm_eat_signals(CPUState *cpu)
688 #endif /* !CONFIG_LINUX */
691 static void dummy_signal(int sig)
695 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
699 struct sigaction sigact;
701 memset(&sigact, 0, sizeof(sigact));
702 sigact.sa_handler = dummy_signal;
703 sigaction(SIG_IPI, &sigact, NULL);
705 pthread_sigmask(SIG_BLOCK, NULL, &set);
706 sigdelset(&set, SIG_IPI);
707 sigdelset(&set, SIGBUS);
708 r = kvm_set_signal_mask(cpu, &set);
710 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
715 static void qemu_tcg_init_cpu_signals(void)
718 struct sigaction sigact;
720 memset(&sigact, 0, sizeof(sigact));
721 sigact.sa_handler = cpu_signal;
722 sigaction(SIG_IPI, &sigact, NULL);
725 sigaddset(&set, SIG_IPI);
726 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
730 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
735 static void qemu_tcg_init_cpu_signals(void)
740 static QemuMutex qemu_global_mutex;
741 static QemuCond qemu_io_proceeded_cond;
742 static bool iothread_requesting_mutex;
744 static QemuThread io_thread;
746 static QemuThread *tcg_cpu_thread;
747 static QemuCond *tcg_halt_cond;
750 static QemuCond qemu_cpu_cond;
752 static QemuCond qemu_pause_cond;
753 static QemuCond qemu_work_cond;
755 void qemu_init_cpu_loop(void)
758 qemu_cond_init(&qemu_cpu_cond);
759 qemu_cond_init(&qemu_pause_cond);
760 qemu_cond_init(&qemu_work_cond);
761 qemu_cond_init(&qemu_io_proceeded_cond);
762 qemu_mutex_init(&qemu_global_mutex);
764 qemu_thread_get_self(&io_thread);
767 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
769 struct qemu_work_item wi;
771 if (qemu_cpu_is_self(cpu)) {
779 if (cpu->queued_work_first == NULL) {
780 cpu->queued_work_first = &wi;
782 cpu->queued_work_last->next = &wi;
784 cpu->queued_work_last = &wi;
790 CPUState *self_cpu = current_cpu;
792 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
793 current_cpu = self_cpu;
797 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
799 struct qemu_work_item *wi;
801 if (qemu_cpu_is_self(cpu)) {
806 wi = g_malloc0(sizeof(struct qemu_work_item));
810 if (cpu->queued_work_first == NULL) {
811 cpu->queued_work_first = wi;
813 cpu->queued_work_last->next = wi;
815 cpu->queued_work_last = wi;
822 static void flush_queued_work(CPUState *cpu)
824 struct qemu_work_item *wi;
826 if (cpu->queued_work_first == NULL) {
830 while ((wi = cpu->queued_work_first)) {
831 cpu->queued_work_first = wi->next;
838 cpu->queued_work_last = NULL;
839 qemu_cond_broadcast(&qemu_work_cond);
842 static void qemu_wait_io_event_common(CPUState *cpu)
847 qemu_cond_signal(&qemu_pause_cond);
849 flush_queued_work(cpu);
850 cpu->thread_kicked = false;
853 static void qemu_tcg_wait_io_event(void)
857 while (all_cpu_threads_idle()) {
858 /* Start accounting real time to the virtual clock if the CPUs
860 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
861 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
864 while (iothread_requesting_mutex) {
865 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
869 qemu_wait_io_event_common(cpu);
873 static void qemu_kvm_wait_io_event(CPUState *cpu)
875 while (cpu_thread_is_idle(cpu)) {
876 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
879 qemu_kvm_eat_signals(cpu);
880 qemu_wait_io_event_common(cpu);
883 static void *qemu_kvm_cpu_thread_fn(void *arg)
888 qemu_mutex_lock(&qemu_global_mutex);
889 qemu_thread_get_self(cpu->thread);
890 cpu->thread_id = qemu_get_thread_id();
893 r = kvm_init_vcpu(cpu);
895 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
899 qemu_kvm_init_cpu_signals(cpu);
901 /* signal CPU creation */
903 qemu_cond_signal(&qemu_cpu_cond);
906 if (cpu_can_run(cpu)) {
907 r = kvm_cpu_exec(cpu);
908 if (r == EXCP_DEBUG) {
909 cpu_handle_guest_debug(cpu);
912 qemu_kvm_wait_io_event(cpu);
918 static void *qemu_dummy_cpu_thread_fn(void *arg)
921 fprintf(stderr, "qtest is not supported under Windows\n");
928 qemu_mutex_lock_iothread();
929 qemu_thread_get_self(cpu->thread);
930 cpu->thread_id = qemu_get_thread_id();
932 sigemptyset(&waitset);
933 sigaddset(&waitset, SIG_IPI);
935 /* signal CPU creation */
937 qemu_cond_signal(&qemu_cpu_cond);
942 qemu_mutex_unlock_iothread();
945 r = sigwait(&waitset, &sig);
946 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
951 qemu_mutex_lock_iothread();
953 qemu_wait_io_event_common(cpu);
960 static void tcg_exec_all(void);
962 static void *qemu_tcg_cpu_thread_fn(void *arg)
966 qemu_tcg_init_cpu_signals();
967 qemu_thread_get_self(cpu->thread);
969 qemu_mutex_lock(&qemu_global_mutex);
971 cpu->thread_id = qemu_get_thread_id();
974 qemu_cond_signal(&qemu_cpu_cond);
976 /* wait for initial kick-off after machine start */
977 while (QTAILQ_FIRST(&cpus)->stopped) {
978 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
980 /* process any pending work */
982 qemu_wait_io_event_common(cpu);
990 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
993 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
996 qemu_tcg_wait_io_event();
1002 static void qemu_cpu_kick_thread(CPUState *cpu)
1007 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1009 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1013 if (!qemu_cpu_is_self(cpu)) {
1016 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
1017 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1022 /* On multi-core systems, we are not sure that the thread is actually
1023 * suspended until we can get the context.
1025 tcgContext.ContextFlags = CONTEXT_CONTROL;
1026 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1032 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
1033 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1041 void qemu_cpu_kick(CPUState *cpu)
1043 qemu_cond_broadcast(cpu->halt_cond);
1044 if (!tcg_enabled() && !cpu->thread_kicked) {
1045 qemu_cpu_kick_thread(cpu);
1046 cpu->thread_kicked = true;
1050 void qemu_cpu_kick_self(void)
1053 assert(current_cpu);
1055 if (!current_cpu->thread_kicked) {
1056 qemu_cpu_kick_thread(current_cpu);
1057 current_cpu->thread_kicked = true;
1064 bool qemu_cpu_is_self(CPUState *cpu)
1066 return qemu_thread_is_self(cpu->thread);
1069 static bool qemu_in_vcpu_thread(void)
1071 return current_cpu && qemu_cpu_is_self(current_cpu);
1074 void qemu_mutex_lock_iothread(void)
1076 if (!tcg_enabled()) {
1077 qemu_mutex_lock(&qemu_global_mutex);
1079 iothread_requesting_mutex = true;
1080 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1081 qemu_cpu_kick_thread(first_cpu);
1082 qemu_mutex_lock(&qemu_global_mutex);
1084 iothread_requesting_mutex = false;
1085 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1089 void qemu_mutex_unlock_iothread(void)
1091 qemu_mutex_unlock(&qemu_global_mutex);
1094 static int all_vcpus_paused(void)
1099 if (!cpu->stopped) {
1107 void pause_all_vcpus(void)
1111 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1117 if (qemu_in_vcpu_thread()) {
1119 if (!kvm_enabled()) {
1122 cpu->stopped = true;
1128 while (!all_vcpus_paused()) {
1129 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1136 void cpu_resume(CPUState *cpu)
1139 cpu->stopped = false;
1143 void resume_all_vcpus(void)
1147 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1153 /* For temporary buffers for forming a name */
1154 #define VCPU_THREAD_NAME_SIZE 16
1156 static void qemu_tcg_init_vcpu(CPUState *cpu)
1158 char thread_name[VCPU_THREAD_NAME_SIZE];
1160 tcg_cpu_address_space_init(cpu, cpu->as);
1162 /* share a single thread for all cpus with TCG */
1163 if (!tcg_cpu_thread) {
1164 cpu->thread = g_malloc0(sizeof(QemuThread));
1165 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1166 qemu_cond_init(cpu->halt_cond);
1167 tcg_halt_cond = cpu->halt_cond;
1168 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1170 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1171 cpu, QEMU_THREAD_JOINABLE);
1173 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1175 while (!cpu->created) {
1176 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1178 tcg_cpu_thread = cpu->thread;
1180 cpu->thread = tcg_cpu_thread;
1181 cpu->halt_cond = tcg_halt_cond;
1185 static void qemu_kvm_start_vcpu(CPUState *cpu)
1187 char thread_name[VCPU_THREAD_NAME_SIZE];
1189 cpu->thread = g_malloc0(sizeof(QemuThread));
1190 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1191 qemu_cond_init(cpu->halt_cond);
1192 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1194 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1195 cpu, QEMU_THREAD_JOINABLE);
1196 while (!cpu->created) {
1197 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1201 static void qemu_dummy_start_vcpu(CPUState *cpu)
1203 char thread_name[VCPU_THREAD_NAME_SIZE];
1205 cpu->thread = g_malloc0(sizeof(QemuThread));
1206 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1207 qemu_cond_init(cpu->halt_cond);
1208 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1210 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1211 QEMU_THREAD_JOINABLE);
1212 while (!cpu->created) {
1213 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1217 void qemu_init_vcpu(CPUState *cpu)
1219 cpu->nr_cores = smp_cores;
1220 cpu->nr_threads = smp_threads;
1221 cpu->stopped = true;
1222 if (kvm_enabled()) {
1223 qemu_kvm_start_vcpu(cpu);
1224 } else if (tcg_enabled()) {
1225 qemu_tcg_init_vcpu(cpu);
1227 qemu_dummy_start_vcpu(cpu);
1231 void cpu_stop_current(void)
1234 current_cpu->stop = false;
1235 current_cpu->stopped = true;
1236 cpu_exit(current_cpu);
1237 qemu_cond_signal(&qemu_pause_cond);
1241 int vm_stop(RunState state)
1243 if (qemu_in_vcpu_thread()) {
1244 qemu_system_vmstop_request_prepare();
1245 qemu_system_vmstop_request(state);
1247 * FIXME: should not return to device code in case
1248 * vm_stop() has been requested.
1254 return do_vm_stop(state);
1257 /* does a state transition even if the VM is already stopped,
1258 current state is forgotten forever */
1259 int vm_stop_force_state(RunState state)
1261 if (runstate_is_running()) {
1262 return vm_stop(state);
1264 runstate_set(state);
1265 /* Make sure to return an error if the flush in a previous vm_stop()
1267 return bdrv_flush_all();
1271 static int tcg_cpu_exec(CPUArchState *env)
1273 CPUState *cpu = ENV_GET_CPU(env);
1275 #ifdef CONFIG_PROFILER
1279 #ifdef CONFIG_PROFILER
1280 ti = profile_getclock();
1286 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1287 + cpu->icount_extra);
1288 cpu->icount_decr.u16.low = 0;
1289 cpu->icount_extra = 0;
1290 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1292 /* Maintain prior (possibly buggy) behaviour where if no deadline
1293 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1294 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1297 if ((deadline < 0) || (deadline > INT32_MAX)) {
1298 deadline = INT32_MAX;
1301 count = qemu_icount_round(deadline);
1302 timers_state.qemu_icount += count;
1303 decr = (count > 0xffff) ? 0xffff : count;
1305 cpu->icount_decr.u16.low = decr;
1306 cpu->icount_extra = count;
1308 ret = cpu_exec(env);
1309 #ifdef CONFIG_PROFILER
1310 qemu_time += profile_getclock() - ti;
1313 /* Fold pending instructions back into the
1314 instruction counter, and clear the interrupt flag. */
1315 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1316 + cpu->icount_extra);
1317 cpu->icount_decr.u32 = 0;
1318 cpu->icount_extra = 0;
1323 static void tcg_exec_all(void)
1327 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1328 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1330 if (next_cpu == NULL) {
1331 next_cpu = first_cpu;
1333 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1334 CPUState *cpu = next_cpu;
1335 CPUArchState *env = cpu->env_ptr;
1337 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1338 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1340 if (cpu_can_run(cpu)) {
1341 r = tcg_cpu_exec(env);
1342 if (r == EXCP_DEBUG) {
1343 cpu_handle_guest_debug(cpu);
1346 } else if (cpu->stop || cpu->stopped) {
1353 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1355 /* XXX: implement xxx_cpu_list for targets that still miss it */
1356 #if defined(cpu_list)
1357 cpu_list(f, cpu_fprintf);
1361 CpuInfoList *qmp_query_cpus(Error **errp)
1363 CpuInfoList *head = NULL, *cur_item = NULL;
1368 #if defined(TARGET_I386)
1369 X86CPU *x86_cpu = X86_CPU(cpu);
1370 CPUX86State *env = &x86_cpu->env;
1371 #elif defined(TARGET_PPC)
1372 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1373 CPUPPCState *env = &ppc_cpu->env;
1374 #elif defined(TARGET_SPARC)
1375 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1376 CPUSPARCState *env = &sparc_cpu->env;
1377 #elif defined(TARGET_MIPS)
1378 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1379 CPUMIPSState *env = &mips_cpu->env;
1382 cpu_synchronize_state(cpu);
1384 info = g_malloc0(sizeof(*info));
1385 info->value = g_malloc0(sizeof(*info->value));
1386 info->value->CPU = cpu->cpu_index;
1387 info->value->current = (cpu == first_cpu);
1388 info->value->halted = cpu->halted;
1389 info->value->thread_id = cpu->thread_id;
1390 #if defined(TARGET_I386)
1391 info->value->has_pc = true;
1392 info->value->pc = env->eip + env->segs[R_CS].base;
1393 #elif defined(TARGET_PPC)
1394 info->value->has_nip = true;
1395 info->value->nip = env->nip;
1396 #elif defined(TARGET_SPARC)
1397 info->value->has_pc = true;
1398 info->value->pc = env->pc;
1399 info->value->has_npc = true;
1400 info->value->npc = env->npc;
1401 #elif defined(TARGET_MIPS)
1402 info->value->has_PC = true;
1403 info->value->PC = env->active_tc.PC;
1406 /* XXX: waiting for the qapi to support GSList */
1408 head = cur_item = info;
1410 cur_item->next = info;
1418 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1419 bool has_cpu, int64_t cpu_index, Error **errp)
1430 cpu = qemu_get_cpu(cpu_index);
1432 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1437 f = fopen(filename, "wb");
1439 error_setg_file_open(errp, errno, filename);
1447 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1448 error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
1451 if (fwrite(buf, 1, l, f) != l) {
1452 error_set(errp, QERR_IO_ERROR);
1463 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1470 f = fopen(filename, "wb");
1472 error_setg_file_open(errp, errno, filename);
1480 cpu_physical_memory_read(addr, buf, l);
1481 if (fwrite(buf, 1, l, f) != l) {
1482 error_set(errp, QERR_IO_ERROR);
1493 void qmp_inject_nmi(Error **errp)
1495 #if defined(TARGET_I386)
1499 X86CPU *cpu = X86_CPU(cs);
1501 if (!cpu->apic_state) {
1502 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1504 apic_deliver_nmi(cpu->apic_state);
1507 #elif defined(TARGET_S390X)
1513 if (cpu->env.cpu_num == monitor_get_cpu_index()) {
1514 if (s390_cpu_restart(S390_CPU(cs)) == -1) {
1515 error_set(errp, QERR_UNSUPPORTED);
1522 error_set(errp, QERR_UNSUPPORTED);