Revert "display: move display functionality to Qt5 GUI"
[sdk/emulator/qemu.git] / cpus.c
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "qemu/error-report.h"
31 #include "sysemu/sysemu.h"
32 #include "sysemu/block-backend.h"
33 #include "exec/gdbstub.h"
34 #include "sysemu/dma.h"
35 #include "sysemu/kvm.h"
36 #include "sysemu/hax.h"
37 #include "qmp-commands.h"
38
39 #include "qemu/thread.h"
40 #include "sysemu/cpus.h"
41 #include "sysemu/qtest.h"
42 #include "qemu/main-loop.h"
43 #include "qemu/bitmap.h"
44 #include "qemu/seqlock.h"
45 #include "qapi-event.h"
46 #include "hw/nmi.h"
47 #include "sysemu/replay.h"
48
49 #ifndef _WIN32
50 #include "qemu/compatfd.h"
51 #endif
52
53 #ifdef CONFIG_LINUX
54
55 #include <sys/prctl.h>
56
57 #ifndef PR_MCE_KILL
58 #define PR_MCE_KILL 33
59 #endif
60
61 #ifndef PR_MCE_KILL_SET
62 #define PR_MCE_KILL_SET 1
63 #endif
64
65 #ifndef PR_MCE_KILL_EARLY
66 #define PR_MCE_KILL_EARLY 1
67 #endif
68
69 #endif /* CONFIG_LINUX */
70
71 static CPUState *next_cpu;
72 int64_t max_delay;
73 int64_t max_advance;
74
75 /* vcpu throttling controls */
76 static QEMUTimer *throttle_timer;
77 static unsigned int throttle_percentage;
78
79 #define CPU_THROTTLE_PCT_MIN 1
80 #define CPU_THROTTLE_PCT_MAX 99
81 #define CPU_THROTTLE_TIMESLICE_NS 10000000
82
83 bool cpu_is_stopped(CPUState *cpu)
84 {
85     return cpu->stopped || !runstate_is_running();
86 }
87
88 static bool cpu_thread_is_idle(CPUState *cpu)
89 {
90     if (cpu->stop || cpu->queued_work_first) {
91         return false;
92     }
93     if (cpu_is_stopped(cpu)) {
94         return true;
95     }
96     if (!cpu->halted || cpu_has_work(cpu) ||
97         kvm_halt_in_kernel()) {
98         return false;
99     }
100     return true;
101 }
102
103 static bool all_cpu_threads_idle(void)
104 {
105     CPUState *cpu;
106
107     CPU_FOREACH(cpu) {
108         if (!cpu_thread_is_idle(cpu)) {
109             return false;
110         }
111     }
112     return true;
113 }
114
115 /***********************************************************/
116 /* guest cycle counter */
117
118 /* Protected by TimersState seqlock */
119
120 static bool icount_sleep = true;
121 static int64_t vm_clock_warp_start = -1;
122 /* Conversion factor from emulated instructions to virtual clock ticks.  */
123 static int icount_time_shift;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
125 #define MAX_ICOUNT_SHIFT 10
126
127 static QEMUTimer *icount_rt_timer;
128 static QEMUTimer *icount_vm_timer;
129 static QEMUTimer *icount_warp_timer;
130
131 typedef struct TimersState {
132     /* Protected by BQL.  */
133     int64_t cpu_ticks_prev;
134     int64_t cpu_ticks_offset;
135
136     /* cpu_clock_offset can be read out of BQL, so protect it with
137      * this lock.
138      */
139     QemuSeqLock vm_clock_seqlock;
140     int64_t cpu_clock_offset;
141     int32_t cpu_ticks_enabled;
142     int64_t dummy;
143
144     /* Compensate for varying guest execution speed.  */
145     int64_t qemu_icount_bias;
146     /* Only written by TCG thread */
147     int64_t qemu_icount;
148 } TimersState;
149
150 static TimersState timers_state;
151
152 int64_t cpu_get_icount_raw(void)
153 {
154     int64_t icount;
155     CPUState *cpu = current_cpu;
156
157     icount = timers_state.qemu_icount;
158     if (cpu) {
159         if (!cpu->can_do_io) {
160             fprintf(stderr, "Bad icount read\n");
161             exit(1);
162         }
163         icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
164     }
165     return icount;
166 }
167
168 /* Return the virtual CPU time, based on the instruction counter.  */
169 static int64_t cpu_get_icount_locked(void)
170 {
171     int64_t icount = cpu_get_icount_raw();
172     return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
173 }
174
175 int64_t cpu_get_icount(void)
176 {
177     int64_t icount;
178     unsigned start;
179
180     do {
181         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
182         icount = cpu_get_icount_locked();
183     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
184
185     return icount;
186 }
187
188 int64_t cpu_icount_to_ns(int64_t icount)
189 {
190     return icount << icount_time_shift;
191 }
192
193 /* return the host CPU cycle counter and handle stop/restart */
194 /* Caller must hold the BQL */
195 int64_t cpu_get_ticks(void)
196 {
197     int64_t ticks;
198
199     if (use_icount) {
200         return cpu_get_icount();
201     }
202
203     ticks = timers_state.cpu_ticks_offset;
204     if (timers_state.cpu_ticks_enabled) {
205         ticks += cpu_get_host_ticks();
206     }
207
208     if (timers_state.cpu_ticks_prev > ticks) {
209         /* Note: non increasing ticks may happen if the host uses
210            software suspend */
211         timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
212         ticks = timers_state.cpu_ticks_prev;
213     }
214
215     timers_state.cpu_ticks_prev = ticks;
216     return ticks;
217 }
218
219 static int64_t cpu_get_clock_locked(void)
220 {
221     int64_t ticks;
222
223     ticks = timers_state.cpu_clock_offset;
224     if (timers_state.cpu_ticks_enabled) {
225         ticks += get_clock();
226     }
227
228     return ticks;
229 }
230
231 /* return the host CPU monotonic timer and handle stop/restart */
232 int64_t cpu_get_clock(void)
233 {
234     int64_t ti;
235     unsigned start;
236
237     do {
238         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
239         ti = cpu_get_clock_locked();
240     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
241
242     return ti;
243 }
244
245 /* enable cpu_get_ticks()
246  * Caller must hold BQL which server as mutex for vm_clock_seqlock.
247  */
248 void cpu_enable_ticks(void)
249 {
250     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
251     seqlock_write_lock(&timers_state.vm_clock_seqlock);
252     if (!timers_state.cpu_ticks_enabled) {
253         timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
254         timers_state.cpu_clock_offset -= get_clock();
255         timers_state.cpu_ticks_enabled = 1;
256     }
257     seqlock_write_unlock(&timers_state.vm_clock_seqlock);
258 }
259
260 /* disable cpu_get_ticks() : the clock is stopped. You must not call
261  * cpu_get_ticks() after that.
262  * Caller must hold BQL which server as mutex for vm_clock_seqlock.
263  */
264 void cpu_disable_ticks(void)
265 {
266     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
267     seqlock_write_lock(&timers_state.vm_clock_seqlock);
268     if (timers_state.cpu_ticks_enabled) {
269         timers_state.cpu_ticks_offset += cpu_get_host_ticks();
270         timers_state.cpu_clock_offset = cpu_get_clock_locked();
271         timers_state.cpu_ticks_enabled = 0;
272     }
273     seqlock_write_unlock(&timers_state.vm_clock_seqlock);
274 }
275
276 /* Correlation between real and virtual time is always going to be
277    fairly approximate, so ignore small variation.
278    When the guest is idle real and virtual time will be aligned in
279    the IO wait loop.  */
280 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
281
282 static void icount_adjust(void)
283 {
284     int64_t cur_time;
285     int64_t cur_icount;
286     int64_t delta;
287
288     /* Protected by TimersState mutex.  */
289     static int64_t last_delta;
290
291     /* If the VM is not running, then do nothing.  */
292     if (!runstate_is_running()) {
293         return;
294     }
295
296     seqlock_write_lock(&timers_state.vm_clock_seqlock);
297     cur_time = cpu_get_clock_locked();
298     cur_icount = cpu_get_icount_locked();
299
300     delta = cur_icount - cur_time;
301     /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
302     if (delta > 0
303         && last_delta + ICOUNT_WOBBLE < delta * 2
304         && icount_time_shift > 0) {
305         /* The guest is getting too far ahead.  Slow time down.  */
306         icount_time_shift--;
307     }
308     if (delta < 0
309         && last_delta - ICOUNT_WOBBLE > delta * 2
310         && icount_time_shift < MAX_ICOUNT_SHIFT) {
311         /* The guest is getting too far behind.  Speed time up.  */
312         icount_time_shift++;
313     }
314     last_delta = delta;
315     timers_state.qemu_icount_bias = cur_icount
316                               - (timers_state.qemu_icount << icount_time_shift);
317     seqlock_write_unlock(&timers_state.vm_clock_seqlock);
318 }
319
320 static void icount_adjust_rt(void *opaque)
321 {
322     timer_mod(icount_rt_timer,
323               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
324     icount_adjust();
325 }
326
327 static void icount_adjust_vm(void *opaque)
328 {
329     timer_mod(icount_vm_timer,
330                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
331                    NANOSECONDS_PER_SECOND / 10);
332     icount_adjust();
333 }
334
335 static int64_t qemu_icount_round(int64_t count)
336 {
337     return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
338 }
339
340 static void icount_warp_rt(void)
341 {
342     unsigned seq;
343     int64_t warp_start;
344
345     /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
346      * changes from -1 to another value, so the race here is okay.
347      */
348     do {
349         seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
350         warp_start = vm_clock_warp_start;
351     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
352
353     if (warp_start == -1) {
354         return;
355     }
356
357     seqlock_write_lock(&timers_state.vm_clock_seqlock);
358     if (runstate_is_running()) {
359         int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
360                                      cpu_get_clock_locked());
361         int64_t warp_delta;
362
363         warp_delta = clock - vm_clock_warp_start;
364         if (use_icount == 2) {
365             /*
366              * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
367              * far ahead of real time.
368              */
369             int64_t cur_icount = cpu_get_icount_locked();
370             int64_t delta = clock - cur_icount;
371             warp_delta = MIN(warp_delta, delta);
372         }
373         timers_state.qemu_icount_bias += warp_delta;
374     }
375     vm_clock_warp_start = -1;
376     seqlock_write_unlock(&timers_state.vm_clock_seqlock);
377
378     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
379         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
380     }
381 }
382
383 static void icount_timer_cb(void *opaque)
384 {
385     /* No need for a checkpoint because the timer already synchronizes
386      * with CHECKPOINT_CLOCK_VIRTUAL_RT.
387      */
388     icount_warp_rt();
389 }
390
391 void qtest_clock_warp(int64_t dest)
392 {
393     int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
394     AioContext *aio_context;
395     assert(qtest_enabled());
396     aio_context = qemu_get_aio_context();
397     while (clock < dest) {
398         int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
399         int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
400
401         seqlock_write_lock(&timers_state.vm_clock_seqlock);
402         timers_state.qemu_icount_bias += warp;
403         seqlock_write_unlock(&timers_state.vm_clock_seqlock);
404
405         qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
406         timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
407         clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
408     }
409     qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
410 }
411
412 void qemu_start_warp_timer(void)
413 {
414     int64_t clock;
415     int64_t deadline;
416
417     if (!use_icount) {
418         return;
419     }
420
421     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
422      * do not fire, so computing the deadline does not make sense.
423      */
424     if (!runstate_is_running()) {
425         return;
426     }
427
428     /* warp clock deterministically in record/replay mode */
429     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
430         return;
431     }
432
433     if (!all_cpu_threads_idle()) {
434         return;
435     }
436
437     if (qtest_enabled()) {
438         /* When testing, qtest commands advance icount.  */
439         return;
440     }
441
442     /* We want to use the earliest deadline from ALL vm_clocks */
443     clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
444     deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
445     if (deadline < 0) {
446         static bool notified;
447         if (!icount_sleep && !notified) {
448             error_report("WARNING: icount sleep disabled and no active timers");
449             notified = true;
450         }
451         return;
452     }
453
454     if (deadline > 0) {
455         /*
456          * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
457          * sleep.  Otherwise, the CPU might be waiting for a future timer
458          * interrupt to wake it up, but the interrupt never comes because
459          * the vCPU isn't running any insns and thus doesn't advance the
460          * QEMU_CLOCK_VIRTUAL.
461          */
462         if (!icount_sleep) {
463             /*
464              * We never let VCPUs sleep in no sleep icount mode.
465              * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
466              * to the next QEMU_CLOCK_VIRTUAL event and notify it.
467              * It is useful when we want a deterministic execution time,
468              * isolated from host latencies.
469              */
470             seqlock_write_lock(&timers_state.vm_clock_seqlock);
471             timers_state.qemu_icount_bias += deadline;
472             seqlock_write_unlock(&timers_state.vm_clock_seqlock);
473             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
474         } else {
475             /*
476              * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
477              * "real" time, (related to the time left until the next event) has
478              * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
479              * This avoids that the warps are visible externally; for example,
480              * you will not be sending network packets continuously instead of
481              * every 100ms.
482              */
483             seqlock_write_lock(&timers_state.vm_clock_seqlock);
484             if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
485                 vm_clock_warp_start = clock;
486             }
487             seqlock_write_unlock(&timers_state.vm_clock_seqlock);
488             timer_mod_anticipate(icount_warp_timer, clock + deadline);
489         }
490     } else if (deadline == 0) {
491         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
492     }
493 }
494
495 static void qemu_account_warp_timer(void)
496 {
497     if (!use_icount || !icount_sleep) {
498         return;
499     }
500
501     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
502      * do not fire, so computing the deadline does not make sense.
503      */
504     if (!runstate_is_running()) {
505         return;
506     }
507
508     /* warp clock deterministically in record/replay mode */
509     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
510         return;
511     }
512
513     timer_del(icount_warp_timer);
514     icount_warp_rt();
515 }
516
517 static bool icount_state_needed(void *opaque)
518 {
519     return use_icount;
520 }
521
522 /*
523  * This is a subsection for icount migration.
524  */
525 static const VMStateDescription icount_vmstate_timers = {
526     .name = "timer/icount",
527     .version_id = 1,
528     .minimum_version_id = 1,
529     .needed = icount_state_needed,
530     .fields = (VMStateField[]) {
531         VMSTATE_INT64(qemu_icount_bias, TimersState),
532         VMSTATE_INT64(qemu_icount, TimersState),
533         VMSTATE_END_OF_LIST()
534     }
535 };
536
537 static const VMStateDescription vmstate_timers = {
538     .name = "timer",
539     .version_id = 2,
540     .minimum_version_id = 1,
541     .fields = (VMStateField[]) {
542         VMSTATE_INT64(cpu_ticks_offset, TimersState),
543         VMSTATE_INT64(dummy, TimersState),
544         VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
545         VMSTATE_END_OF_LIST()
546     },
547     .subsections = (const VMStateDescription*[]) {
548         &icount_vmstate_timers,
549         NULL
550     }
551 };
552
553 static void cpu_throttle_thread(void *opaque)
554 {
555     CPUState *cpu = opaque;
556     double pct;
557     double throttle_ratio;
558     long sleeptime_ns;
559
560     if (!cpu_throttle_get_percentage()) {
561         return;
562     }
563
564     pct = (double)cpu_throttle_get_percentage()/100;
565     throttle_ratio = pct / (1 - pct);
566     sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
567
568     qemu_mutex_unlock_iothread();
569     atomic_set(&cpu->throttle_thread_scheduled, 0);
570     g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
571     qemu_mutex_lock_iothread();
572 }
573
574 static void cpu_throttle_timer_tick(void *opaque)
575 {
576     CPUState *cpu;
577     double pct;
578
579     /* Stop the timer if needed */
580     if (!cpu_throttle_get_percentage()) {
581         return;
582     }
583     CPU_FOREACH(cpu) {
584         if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
585             async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
586         }
587     }
588
589     pct = (double)cpu_throttle_get_percentage()/100;
590     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
591                                    CPU_THROTTLE_TIMESLICE_NS / (1-pct));
592 }
593
594 void cpu_throttle_set(int new_throttle_pct)
595 {
596     /* Ensure throttle percentage is within valid range */
597     new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
598     new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
599
600     atomic_set(&throttle_percentage, new_throttle_pct);
601
602     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
603                                        CPU_THROTTLE_TIMESLICE_NS);
604 }
605
606 void cpu_throttle_stop(void)
607 {
608     atomic_set(&throttle_percentage, 0);
609 }
610
611 bool cpu_throttle_active(void)
612 {
613     return (cpu_throttle_get_percentage() != 0);
614 }
615
616 int cpu_throttle_get_percentage(void)
617 {
618     return atomic_read(&throttle_percentage);
619 }
620
621 void cpu_ticks_init(void)
622 {
623     seqlock_init(&timers_state.vm_clock_seqlock, NULL);
624     vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
625     throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
626                                            cpu_throttle_timer_tick, NULL);
627 }
628
629 void configure_icount(QemuOpts *opts, Error **errp)
630 {
631     const char *option;
632     char *rem_str = NULL;
633
634     option = qemu_opt_get(opts, "shift");
635     if (!option) {
636         if (qemu_opt_get(opts, "align") != NULL) {
637             error_setg(errp, "Please specify shift option when using align");
638         }
639         return;
640     }
641
642     icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
643     if (icount_sleep) {
644         icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
645                                          icount_timer_cb, NULL);
646     }
647
648     icount_align_option = qemu_opt_get_bool(opts, "align", false);
649
650     if (icount_align_option && !icount_sleep) {
651         error_setg(errp, "align=on and sleep=off are incompatible");
652     }
653     if (strcmp(option, "auto") != 0) {
654         errno = 0;
655         icount_time_shift = strtol(option, &rem_str, 0);
656         if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
657             error_setg(errp, "icount: Invalid shift value");
658         }
659         use_icount = 1;
660         return;
661     } else if (icount_align_option) {
662         error_setg(errp, "shift=auto and align=on are incompatible");
663     } else if (!icount_sleep) {
664         error_setg(errp, "shift=auto and sleep=off are incompatible");
665     }
666
667     use_icount = 2;
668
669     /* 125MIPS seems a reasonable initial guess at the guest speed.
670        It will be corrected fairly quickly anyway.  */
671     icount_time_shift = 3;
672
673     /* Have both realtime and virtual time triggers for speed adjustment.
674        The realtime trigger catches emulated time passing too slowly,
675        the virtual time trigger catches emulated time passing too fast.
676        Realtime triggers occur even when idle, so use them less frequently
677        than VM triggers.  */
678     icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
679                                    icount_adjust_rt, NULL);
680     timer_mod(icount_rt_timer,
681                    qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
682     icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
683                                         icount_adjust_vm, NULL);
684     timer_mod(icount_vm_timer,
685                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
686                    NANOSECONDS_PER_SECOND / 10);
687 }
688
689 /***********************************************************/
690 void hw_error(const char *fmt, ...)
691 {
692     va_list ap;
693     CPUState *cpu;
694
695     va_start(ap, fmt);
696     fprintf(stderr, "qemu: hardware error: ");
697     vfprintf(stderr, fmt, ap);
698     fprintf(stderr, "\n");
699     CPU_FOREACH(cpu) {
700         fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
701         cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
702     }
703     va_end(ap);
704     abort();
705 }
706
707 void cpu_synchronize_all_states(void)
708 {
709     CPUState *cpu;
710
711     CPU_FOREACH(cpu) {
712         cpu_synchronize_state(cpu);
713     }
714 }
715
716 void cpu_synchronize_all_post_reset(void)
717 {
718     CPUState *cpu;
719
720     CPU_FOREACH(cpu) {
721         cpu_synchronize_post_reset(cpu);
722 #ifdef CONFIG_HAX
723         if (hax_enabled() && hax_ug_platform())
724             hax_cpu_synchronize_post_reset(cpu);
725 #endif
726     }
727 }
728
729 void cpu_synchronize_all_post_init(void)
730 {
731     CPUState *cpu;
732
733     CPU_FOREACH(cpu) {
734         cpu_synchronize_post_init(cpu);
735 #ifdef CONFIG_HAX
736         if (hax_enabled() && hax_ug_platform())
737             hax_cpu_synchronize_post_init(cpu);
738 #endif
739     }
740 }
741
742 static int do_vm_stop(RunState state)
743 {
744     int ret = 0;
745
746     if (runstate_is_running()) {
747         cpu_disable_ticks();
748         pause_all_vcpus();
749         runstate_set(state);
750         vm_state_notify(0, state);
751         qapi_event_send_stop(&error_abort);
752     }
753
754     bdrv_drain_all();
755     ret = blk_flush_all();
756
757     return ret;
758 }
759
760 static bool cpu_can_run(CPUState *cpu)
761 {
762     if (cpu->stop) {
763         return false;
764     }
765     if (cpu_is_stopped(cpu)) {
766         return false;
767     }
768     return true;
769 }
770
771 static void cpu_handle_guest_debug(CPUState *cpu)
772 {
773     gdb_set_stop_cpu(cpu);
774     qemu_system_debug_request();
775     cpu->stopped = true;
776 }
777
778 #ifdef CONFIG_LINUX
779 static void sigbus_reraise(void)
780 {
781     sigset_t set;
782     struct sigaction action;
783
784     memset(&action, 0, sizeof(action));
785     action.sa_handler = SIG_DFL;
786     if (!sigaction(SIGBUS, &action, NULL)) {
787         raise(SIGBUS);
788         sigemptyset(&set);
789         sigaddset(&set, SIGBUS);
790         sigprocmask(SIG_UNBLOCK, &set, NULL);
791     }
792     perror("Failed to re-raise SIGBUS!\n");
793     abort();
794 }
795
796 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
797                            void *ctx)
798 {
799     if (kvm_on_sigbus(siginfo->ssi_code,
800                       (void *)(intptr_t)siginfo->ssi_addr)) {
801         sigbus_reraise();
802     }
803 }
804
805 static void qemu_init_sigbus(void)
806 {
807     struct sigaction action;
808
809     memset(&action, 0, sizeof(action));
810     action.sa_flags = SA_SIGINFO;
811     action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
812     sigaction(SIGBUS, &action, NULL);
813
814     prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
815 }
816
817 static void qemu_kvm_eat_signals(CPUState *cpu)
818 {
819     struct timespec ts = { 0, 0 };
820     siginfo_t siginfo;
821     sigset_t waitset;
822     sigset_t chkset;
823     int r;
824
825     sigemptyset(&waitset);
826     sigaddset(&waitset, SIG_IPI);
827     sigaddset(&waitset, SIGBUS);
828
829     do {
830         r = sigtimedwait(&waitset, &siginfo, &ts);
831         if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
832             perror("sigtimedwait");
833             exit(1);
834         }
835
836         switch (r) {
837         case SIGBUS:
838             if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
839                 sigbus_reraise();
840             }
841             break;
842         default:
843             break;
844         }
845
846         r = sigpending(&chkset);
847         if (r == -1) {
848             perror("sigpending");
849             exit(1);
850         }
851     } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
852 }
853
854 #else /* !CONFIG_LINUX */
855
856 static void qemu_init_sigbus(void)
857 {
858 }
859
860 static void qemu_kvm_eat_signals(CPUState *cpu)
861 {
862 }
863 #endif /* !CONFIG_LINUX */
864
865 #ifndef _WIN32
866 static void dummy_signal(int sig)
867 {
868 }
869
870 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
871 {
872     int r;
873     sigset_t set;
874     struct sigaction sigact;
875
876     memset(&sigact, 0, sizeof(sigact));
877     sigact.sa_handler = dummy_signal;
878     sigaction(SIG_IPI, &sigact, NULL);
879
880     pthread_sigmask(SIG_BLOCK, NULL, &set);
881     sigdelset(&set, SIG_IPI);
882     sigdelset(&set, SIGBUS);
883     r = kvm_set_signal_mask(cpu, &set);
884     if (r) {
885         fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
886         exit(1);
887     }
888 }
889
890 #else /* _WIN32 */
891 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
892 {
893     abort();
894 }
895 #endif /* _WIN32 */
896
897 static QemuMutex qemu_global_mutex;
898 static QemuCond qemu_io_proceeded_cond;
899 static unsigned iothread_requesting_mutex;
900
901 static QemuThread io_thread;
902
903 /* cpu creation */
904 static QemuCond qemu_cpu_cond;
905 /* system init */
906 static QemuCond qemu_pause_cond;
907 static QemuCond qemu_work_cond;
908
909 void qemu_init_cpu_loop(void)
910 {
911     qemu_init_sigbus();
912     qemu_cond_init(&qemu_cpu_cond);
913     qemu_cond_init(&qemu_pause_cond);
914     qemu_cond_init(&qemu_work_cond);
915     qemu_cond_init(&qemu_io_proceeded_cond);
916     qemu_mutex_init(&qemu_global_mutex);
917
918     qemu_thread_get_self(&io_thread);
919 }
920
921 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
922 {
923     struct qemu_work_item wi;
924
925     if (qemu_cpu_is_self(cpu)) {
926         func(data);
927         return;
928     }
929
930     wi.func = func;
931     wi.data = data;
932     wi.free = false;
933
934     qemu_mutex_lock(&cpu->work_mutex);
935     if (cpu->queued_work_first == NULL) {
936         cpu->queued_work_first = &wi;
937     } else {
938         cpu->queued_work_last->next = &wi;
939     }
940     cpu->queued_work_last = &wi;
941     wi.next = NULL;
942     wi.done = false;
943     qemu_mutex_unlock(&cpu->work_mutex);
944
945     qemu_cpu_kick(cpu);
946     while (!atomic_mb_read(&wi.done)) {
947         CPUState *self_cpu = current_cpu;
948
949         qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
950         current_cpu = self_cpu;
951     }
952 }
953
954 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
955 {
956     struct qemu_work_item *wi;
957
958     if (qemu_cpu_is_self(cpu)) {
959         func(data);
960         return;
961     }
962
963     wi = g_malloc0(sizeof(struct qemu_work_item));
964     wi->func = func;
965     wi->data = data;
966     wi->free = true;
967
968     qemu_mutex_lock(&cpu->work_mutex);
969     if (cpu->queued_work_first == NULL) {
970         cpu->queued_work_first = wi;
971     } else {
972         cpu->queued_work_last->next = wi;
973     }
974     cpu->queued_work_last = wi;
975     wi->next = NULL;
976     wi->done = false;
977     qemu_mutex_unlock(&cpu->work_mutex);
978
979     qemu_cpu_kick(cpu);
980 }
981
982 static void flush_queued_work(CPUState *cpu)
983 {
984     struct qemu_work_item *wi;
985
986     if (cpu->queued_work_first == NULL) {
987         return;
988     }
989
990     qemu_mutex_lock(&cpu->work_mutex);
991     while (cpu->queued_work_first != NULL) {
992         wi = cpu->queued_work_first;
993         cpu->queued_work_first = wi->next;
994         if (!cpu->queued_work_first) {
995             cpu->queued_work_last = NULL;
996         }
997         qemu_mutex_unlock(&cpu->work_mutex);
998         wi->func(wi->data);
999         qemu_mutex_lock(&cpu->work_mutex);
1000         if (wi->free) {
1001             g_free(wi);
1002         } else {
1003             atomic_mb_set(&wi->done, true);
1004         }
1005     }
1006     qemu_mutex_unlock(&cpu->work_mutex);
1007     qemu_cond_broadcast(&qemu_work_cond);
1008 }
1009
1010 static void qemu_wait_io_event_common(CPUState *cpu)
1011 {
1012     if (cpu->stop) {
1013         cpu->stop = false;
1014         cpu->stopped = true;
1015         qemu_cond_broadcast(&qemu_pause_cond);
1016     }
1017     flush_queued_work(cpu);
1018     cpu->thread_kicked = false;
1019 }
1020
1021 static void qemu_tcg_wait_io_event(CPUState *cpu)
1022 {
1023     while (all_cpu_threads_idle()) {
1024         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1025     }
1026
1027     while (iothread_requesting_mutex) {
1028         qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
1029     }
1030
1031     CPU_FOREACH(cpu) {
1032         qemu_wait_io_event_common(cpu);
1033     }
1034 }
1035
1036 #ifdef CONFIG_HAX
1037 static void qemu_hax_wait_io_event(CPUState *cpu)
1038 {
1039     while (cpu_thread_is_idle(cpu)) {
1040         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1041     }
1042
1043     qemu_wait_io_event_common(cpu);
1044 }
1045 #endif
1046
1047 static void qemu_kvm_wait_io_event(CPUState *cpu)
1048 {
1049     while (cpu_thread_is_idle(cpu)) {
1050         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1051     }
1052
1053     qemu_kvm_eat_signals(cpu);
1054     qemu_wait_io_event_common(cpu);
1055 }
1056
1057 static void *qemu_kvm_cpu_thread_fn(void *arg)
1058 {
1059     CPUState *cpu = arg;
1060     int r;
1061
1062     rcu_register_thread();
1063
1064     qemu_mutex_lock_iothread();
1065     qemu_thread_get_self(cpu->thread);
1066     cpu->thread_id = qemu_get_thread_id();
1067     cpu->can_do_io = 1;
1068     current_cpu = cpu;
1069
1070     r = kvm_init_vcpu(cpu);
1071     if (r < 0) {
1072         fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1073         exit(1);
1074     }
1075
1076     qemu_kvm_init_cpu_signals(cpu);
1077
1078     /* signal CPU creation */
1079     cpu->created = true;
1080     qemu_cond_signal(&qemu_cpu_cond);
1081
1082     while (1) {
1083         if (cpu_can_run(cpu)) {
1084             r = kvm_cpu_exec(cpu);
1085             if (r == EXCP_DEBUG) {
1086                 cpu_handle_guest_debug(cpu);
1087             }
1088         }
1089         qemu_kvm_wait_io_event(cpu);
1090     }
1091
1092     return NULL;
1093 }
1094
1095 static void *qemu_dummy_cpu_thread_fn(void *arg)
1096 {
1097 #ifdef _WIN32
1098     fprintf(stderr, "qtest is not supported under Windows\n");
1099     exit(1);
1100 #else
1101     CPUState *cpu = arg;
1102     sigset_t waitset;
1103     int r;
1104
1105     rcu_register_thread();
1106
1107     qemu_mutex_lock_iothread();
1108     qemu_thread_get_self(cpu->thread);
1109     cpu->thread_id = qemu_get_thread_id();
1110     cpu->can_do_io = 1;
1111
1112     sigemptyset(&waitset);
1113     sigaddset(&waitset, SIG_IPI);
1114
1115     /* signal CPU creation */
1116     cpu->created = true;
1117     qemu_cond_signal(&qemu_cpu_cond);
1118
1119     current_cpu = cpu;
1120     while (1) {
1121         current_cpu = NULL;
1122         qemu_mutex_unlock_iothread();
1123         do {
1124             int sig;
1125             r = sigwait(&waitset, &sig);
1126         } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1127         if (r == -1) {
1128             perror("sigwait");
1129             exit(1);
1130         }
1131         qemu_mutex_lock_iothread();
1132         current_cpu = cpu;
1133         qemu_wait_io_event_common(cpu);
1134     }
1135
1136     return NULL;
1137 #endif
1138 }
1139
1140 static void tcg_exec_all(void);
1141
1142 static void *qemu_tcg_cpu_thread_fn(void *arg)
1143 {
1144     CPUState *cpu = arg;
1145
1146     rcu_register_thread();
1147
1148     qemu_mutex_lock_iothread();
1149     qemu_thread_get_self(cpu->thread);
1150
1151     CPU_FOREACH(cpu) {
1152         cpu->thread_id = qemu_get_thread_id();
1153         cpu->created = true;
1154         cpu->can_do_io = 1;
1155     }
1156     qemu_cond_signal(&qemu_cpu_cond);
1157
1158     /* wait for initial kick-off after machine start */
1159     while (first_cpu->stopped) {
1160         qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1161
1162         /* process any pending work */
1163         CPU_FOREACH(cpu) {
1164             qemu_wait_io_event_common(cpu);
1165         }
1166     }
1167
1168     /* process any pending work */
1169     atomic_mb_set(&exit_request, 1);
1170
1171     while (1) {
1172         tcg_exec_all();
1173
1174         if (use_icount) {
1175             int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1176
1177             if (deadline == 0) {
1178                 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1179             }
1180         }
1181         qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1182     }
1183
1184     return NULL;
1185 }
1186
1187 #ifdef CONFIG_HAX
1188 static void *qemu_hax_cpu_thread_fn(void *arg)
1189 {
1190     CPUState *cpu = arg;
1191     int r;
1192     qemu_thread_get_self(cpu->thread);
1193     qemu_mutex_lock(&qemu_global_mutex);
1194
1195     cpu->thread_id = qemu_get_thread_id();
1196     cpu->created = true;
1197     cpu->halted = 0;
1198     current_cpu = cpu;
1199
1200     hax_init_vcpu(cpu);
1201     qemu_cond_signal(&qemu_cpu_cond);
1202
1203     while (1) {
1204         if (cpu_can_run(cpu)) {
1205             r = hax_smp_cpu_exec(cpu);
1206             if (r == EXCP_DEBUG) {
1207                 cpu_handle_guest_debug(cpu);
1208             }
1209         }
1210         qemu_hax_wait_io_event(cpu);
1211     }
1212     return NULL;
1213 }
1214 #endif
1215
1216 static void qemu_cpu_kick_thread(CPUState *cpu)
1217 {
1218 #ifndef _WIN32
1219     int err;
1220
1221     if (cpu->thread_kicked) {
1222         return;
1223     }
1224     cpu->thread_kicked = true;
1225     err = pthread_kill(cpu->thread->thread, SIG_IPI);
1226     if (err) {
1227         fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1228         exit(1);
1229     }
1230
1231 # ifdef CONFIG_DARWIN
1232     /* The cpu thread cannot catch it reliably when shutdown the guest on Mac.
1233     * We can double check it and resend it
1234     */
1235     if (!exit_request) {
1236     // FIXME: check it soon
1237 //        cpu_signal(0);
1238     }
1239
1240     if (hax_enabled() && hax_ug_platform()) {
1241         cpu->exit_request = 1;
1242     }
1243 # endif
1244 #else /* _WIN32 */
1245 # ifndef CONFIG_HAX
1246     abort();
1247 # else
1248     // FIXME: check it soon
1249 #if 0
1250     if (!qemu_cpu_is_self(cpu)) {
1251         CONTEXT tcgContext;
1252
1253         if (SuspendThread(cpu->hThread) == (DWORD)-1) {
1254             fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1255                     GetLastError());
1256             exit(1);
1257         }
1258
1259         /* On multi-core systems, we are not sure that the thread is actually
1260          * suspended until we can get the context.
1261          */
1262         tcgContext.ContextFlags = CONTEXT_CONTROL;
1263         while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1264             continue;
1265         }
1266
1267 //        cpu_signal(0);
1268
1269         if(hax_enabled() && hax_ug_platform()) {
1270             cpu->exit_request = 1;
1271         } else {
1272             abort();
1273         }
1274         if (ResumeThread(cpu->hThread) == (DWORD)-1) {
1275             fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1276                     GetLastError());
1277             exit(1);
1278         }
1279     }
1280 #endif
1281     if (!qemu_cpu_is_self(cpu)) {
1282         if(hax_enabled() && hax_ug_platform()) {
1283             cpu->exit_request = 1;
1284         }
1285     }
1286 # endif
1287 #endif
1288 }
1289
1290 static void qemu_cpu_kick_no_halt(void)
1291 {
1292     CPUState *cpu;
1293     /* Ensure whatever caused the exit has reached the CPU threads before
1294      * writing exit_request.
1295      */
1296     atomic_mb_set(&exit_request, 1);
1297     cpu = atomic_mb_read(&tcg_current_cpu);
1298     if (cpu) {
1299         cpu_exit(cpu);
1300     }
1301 }
1302
1303 void qemu_cpu_kick(CPUState *cpu)
1304 {
1305     qemu_cond_broadcast(cpu->halt_cond);
1306     if (tcg_enabled()) {
1307         qemu_cpu_kick_no_halt();
1308     } else {
1309         qemu_cpu_kick_thread(cpu);
1310     }
1311 }
1312
1313 void qemu_cpu_kick_self(void)
1314 {
1315     assert(current_cpu);
1316     qemu_cpu_kick_thread(current_cpu);
1317 }
1318
1319 bool qemu_cpu_is_self(CPUState *cpu)
1320 {
1321     return qemu_thread_is_self(cpu->thread);
1322 }
1323
1324 bool qemu_in_vcpu_thread(void)
1325 {
1326     return current_cpu && qemu_cpu_is_self(current_cpu);
1327 }
1328
1329 static __thread bool iothread_locked = false;
1330
1331 bool qemu_mutex_iothread_locked(void)
1332 {
1333     return iothread_locked;
1334 }
1335
1336 void qemu_mutex_lock_iothread(void)
1337 {
1338     atomic_inc(&iothread_requesting_mutex);
1339     /* In the simple case there is no need to bump the VCPU thread out of
1340      * TCG code execution.
1341      */
1342     if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1343         !first_cpu || !first_cpu->created) {
1344         qemu_mutex_lock(&qemu_global_mutex);
1345         atomic_dec(&iothread_requesting_mutex);
1346     } else {
1347         if (qemu_mutex_trylock(&qemu_global_mutex)) {
1348             qemu_cpu_kick_no_halt();
1349             qemu_mutex_lock(&qemu_global_mutex);
1350         }
1351         atomic_dec(&iothread_requesting_mutex);
1352         qemu_cond_broadcast(&qemu_io_proceeded_cond);
1353     }
1354     iothread_locked = true;
1355 }
1356
1357 void qemu_mutex_unlock_iothread(void)
1358 {
1359     iothread_locked = false;
1360     qemu_mutex_unlock(&qemu_global_mutex);
1361 }
1362
1363 static int all_vcpus_paused(void)
1364 {
1365     CPUState *cpu;
1366
1367     CPU_FOREACH(cpu) {
1368         if (!cpu->stopped) {
1369             return 0;
1370         }
1371     }
1372
1373     return 1;
1374 }
1375
1376 void pause_all_vcpus(void)
1377 {
1378     CPUState *cpu;
1379
1380     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1381     CPU_FOREACH(cpu) {
1382         cpu->stop = true;
1383         qemu_cpu_kick(cpu);
1384     }
1385
1386     if (qemu_in_vcpu_thread()) {
1387         cpu_stop_current();
1388         if (!kvm_enabled()) {
1389             CPU_FOREACH(cpu) {
1390                 cpu->stop = false;
1391                 cpu->stopped = true;
1392             }
1393             return;
1394         }
1395     }
1396
1397     while (!all_vcpus_paused()) {
1398         qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1399         CPU_FOREACH(cpu) {
1400             qemu_cpu_kick(cpu);
1401         }
1402     }
1403 }
1404
1405 void cpu_resume(CPUState *cpu)
1406 {
1407     cpu->stop = false;
1408     cpu->stopped = false;
1409     qemu_cpu_kick(cpu);
1410 }
1411
1412 void resume_all_vcpus(void)
1413 {
1414     CPUState *cpu;
1415
1416     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1417     CPU_FOREACH(cpu) {
1418         cpu_resume(cpu);
1419     }
1420 }
1421
1422 /* For temporary buffers for forming a name */
1423 #define VCPU_THREAD_NAME_SIZE 16
1424
1425 static void qemu_tcg_init_vcpu(CPUState *cpu)
1426 {
1427 #ifdef CONFIG_HAX
1428     if (hax_enabled()) {
1429         hax_init_vcpu(cpu);
1430     }
1431 #endif
1432     char thread_name[VCPU_THREAD_NAME_SIZE];
1433     static QemuCond *tcg_halt_cond;
1434     static QemuThread *tcg_cpu_thread;
1435
1436     /* share a single thread for all cpus with TCG */
1437     if (!tcg_cpu_thread) {
1438         cpu->thread = g_malloc0(sizeof(QemuThread));
1439         cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1440         qemu_cond_init(cpu->halt_cond);
1441         tcg_halt_cond = cpu->halt_cond;
1442         snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1443                  cpu->cpu_index);
1444         qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1445                            cpu, QEMU_THREAD_JOINABLE);
1446 #ifdef _WIN32
1447         cpu->hThread = qemu_thread_get_handle(cpu->thread);
1448 #endif
1449         while (!cpu->created) {
1450             qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1451         }
1452         tcg_cpu_thread = cpu->thread;
1453     } else {
1454         cpu->thread = tcg_cpu_thread;
1455         cpu->halt_cond = tcg_halt_cond;
1456     }
1457 }
1458
1459 #ifdef CONFIG_HAX
1460 static void qemu_hax_start_vcpu(CPUState *cpu)
1461 {
1462     char thread_name[VCPU_THREAD_NAME_SIZE];
1463
1464     cpu->thread = g_malloc0(sizeof(QemuThread));
1465     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1466     qemu_cond_init(cpu->halt_cond);
1467
1468     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1469              cpu->cpu_index);
1470
1471     qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1472                        cpu, QEMU_THREAD_JOINABLE);
1473 #ifdef _WIN32
1474      cpu->hThread = qemu_thread_get_handle(cpu->thread);
1475 #endif
1476     while (!cpu->created) {
1477         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1478     }
1479 }
1480 #endif
1481
1482 static void qemu_kvm_start_vcpu(CPUState *cpu)
1483 {
1484     char thread_name[VCPU_THREAD_NAME_SIZE];
1485
1486     cpu->thread = g_malloc0(sizeof(QemuThread));
1487     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1488     qemu_cond_init(cpu->halt_cond);
1489     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1490              cpu->cpu_index);
1491     qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1492                        cpu, QEMU_THREAD_JOINABLE);
1493     while (!cpu->created) {
1494         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1495     }
1496 }
1497
1498 static void qemu_dummy_start_vcpu(CPUState *cpu)
1499 {
1500     char thread_name[VCPU_THREAD_NAME_SIZE];
1501
1502     cpu->thread = g_malloc0(sizeof(QemuThread));
1503     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1504     qemu_cond_init(cpu->halt_cond);
1505     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1506              cpu->cpu_index);
1507     qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1508                        QEMU_THREAD_JOINABLE);
1509     while (!cpu->created) {
1510         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1511     }
1512 }
1513
1514 void qemu_init_vcpu(CPUState *cpu)
1515 {
1516     cpu->nr_cores = smp_cores;
1517     cpu->nr_threads = smp_threads;
1518     cpu->stopped = true;
1519
1520     if (!cpu->as) {
1521         /* If the target cpu hasn't set up any address spaces itself,
1522          * give it the default one.
1523          */
1524         AddressSpace *as = address_space_init_shareable(cpu->memory,
1525                                                         "cpu-memory");
1526         cpu->num_ases = 1;
1527         cpu_address_space_init(cpu, as, 0);
1528     }
1529
1530     if (kvm_enabled()) {
1531         qemu_kvm_start_vcpu(cpu);
1532 #ifdef CONFIG_HAX
1533     } else if (hax_enabled() && hax_ug_platform()) {
1534         qemu_hax_start_vcpu(cpu);
1535 #endif
1536     } else if (tcg_enabled()) {
1537         qemu_tcg_init_vcpu(cpu);
1538     } else {
1539         qemu_dummy_start_vcpu(cpu);
1540     }
1541 }
1542
1543 void cpu_stop_current(void)
1544 {
1545     if (current_cpu) {
1546         current_cpu->stop = false;
1547         current_cpu->stopped = true;
1548         cpu_exit(current_cpu);
1549         qemu_cond_broadcast(&qemu_pause_cond);
1550     }
1551 }
1552
1553 int vm_stop(RunState state)
1554 {
1555     if (qemu_in_vcpu_thread()) {
1556         qemu_system_vmstop_request_prepare();
1557         qemu_system_vmstop_request(state);
1558         /*
1559          * FIXME: should not return to device code in case
1560          * vm_stop() has been requested.
1561          */
1562         cpu_stop_current();
1563         return 0;
1564     }
1565
1566     return do_vm_stop(state);
1567 }
1568
1569 /* does a state transition even if the VM is already stopped,
1570    current state is forgotten forever */
1571 int vm_stop_force_state(RunState state)
1572 {
1573     if (runstate_is_running()) {
1574         return vm_stop(state);
1575     } else {
1576         runstate_set(state);
1577
1578         bdrv_drain_all();
1579         /* Make sure to return an error if the flush in a previous vm_stop()
1580          * failed. */
1581         return blk_flush_all();
1582     }
1583 }
1584
1585 static int64_t tcg_get_icount_limit(void)
1586 {
1587     int64_t deadline;
1588
1589     if (replay_mode != REPLAY_MODE_PLAY) {
1590         deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1591
1592         /* Maintain prior (possibly buggy) behaviour where if no deadline
1593          * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1594          * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1595          * nanoseconds.
1596          */
1597         if ((deadline < 0) || (deadline > INT32_MAX)) {
1598             deadline = INT32_MAX;
1599         }
1600
1601         return qemu_icount_round(deadline);
1602     } else {
1603         return replay_get_instructions();
1604     }
1605 }
1606
1607 static int tcg_cpu_exec(CPUState *cpu)
1608 {
1609     int ret;
1610 #ifdef CONFIG_PROFILER
1611     int64_t ti;
1612 #endif
1613
1614 #ifdef CONFIG_PROFILER
1615     ti = profile_getclock();
1616 #endif
1617     if (use_icount) {
1618         int64_t count;
1619         int decr;
1620         timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1621                                     + cpu->icount_extra);
1622         cpu->icount_decr.u16.low = 0;
1623         cpu->icount_extra = 0;
1624         count = tcg_get_icount_limit();
1625         timers_state.qemu_icount += count;
1626         decr = (count > 0xffff) ? 0xffff : count;
1627         count -= decr;
1628         cpu->icount_decr.u16.low = decr;
1629         cpu->icount_extra = count;
1630     }
1631     ret = cpu_exec(cpu);
1632 #ifdef CONFIG_PROFILER
1633     tcg_time += profile_getclock() - ti;
1634 #endif
1635     if (use_icount) {
1636         /* Fold pending instructions back into the
1637            instruction counter, and clear the interrupt flag.  */
1638         timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1639                         + cpu->icount_extra);
1640         cpu->icount_decr.u32 = 0;
1641         cpu->icount_extra = 0;
1642         replay_account_executed_instructions();
1643     }
1644     return ret;
1645 }
1646
1647 static void tcg_exec_all(void)
1648 {
1649     int r;
1650
1651     /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
1652     qemu_account_warp_timer();
1653
1654     if (next_cpu == NULL) {
1655         next_cpu = first_cpu;
1656     }
1657     for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1658         CPUState *cpu = next_cpu;
1659
1660         qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1661                           (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1662
1663         if (cpu_can_run(cpu)) {
1664             r = tcg_cpu_exec(cpu);
1665             if (r == EXCP_DEBUG) {
1666                 cpu_handle_guest_debug(cpu);
1667                 break;
1668             }
1669         } else if (cpu->stop || cpu->stopped) {
1670             break;
1671         }
1672     }
1673
1674     /* Pairs with smp_wmb in qemu_cpu_kick.  */
1675     atomic_mb_set(&exit_request, 0);
1676 }
1677
1678 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1679 {
1680     /* XXX: implement xxx_cpu_list for targets that still miss it */
1681 #if defined(cpu_list)
1682     cpu_list(f, cpu_fprintf);
1683 #endif
1684 }
1685
1686 CpuInfoList *qmp_query_cpus(Error **errp)
1687 {
1688     CpuInfoList *head = NULL, *cur_item = NULL;
1689     CPUState *cpu;
1690
1691     CPU_FOREACH(cpu) {
1692         CpuInfoList *info;
1693 #if defined(TARGET_I386)
1694         X86CPU *x86_cpu = X86_CPU(cpu);
1695         CPUX86State *env = &x86_cpu->env;
1696 #elif defined(TARGET_PPC)
1697         PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1698         CPUPPCState *env = &ppc_cpu->env;
1699 #elif defined(TARGET_SPARC)
1700         SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1701         CPUSPARCState *env = &sparc_cpu->env;
1702 #elif defined(TARGET_MIPS)
1703         MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1704         CPUMIPSState *env = &mips_cpu->env;
1705 #elif defined(TARGET_TRICORE)
1706         TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1707         CPUTriCoreState *env = &tricore_cpu->env;
1708 #endif
1709
1710         cpu_synchronize_state(cpu);
1711
1712         info = g_malloc0(sizeof(*info));
1713         info->value = g_malloc0(sizeof(*info->value));
1714         info->value->CPU = cpu->cpu_index;
1715         info->value->current = (cpu == first_cpu);
1716         info->value->halted = cpu->halted;
1717         info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1718         info->value->thread_id = cpu->thread_id;
1719 #if defined(TARGET_I386)
1720         info->value->arch = CPU_INFO_ARCH_X86;
1721         info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1722 #elif defined(TARGET_PPC)
1723         info->value->arch = CPU_INFO_ARCH_PPC;
1724         info->value->u.ppc.nip = env->nip;
1725 #elif defined(TARGET_SPARC)
1726         info->value->arch = CPU_INFO_ARCH_SPARC;
1727         info->value->u.q_sparc.pc = env->pc;
1728         info->value->u.q_sparc.npc = env->npc;
1729 #elif defined(TARGET_MIPS)
1730         info->value->arch = CPU_INFO_ARCH_MIPS;
1731         info->value->u.q_mips.PC = env->active_tc.PC;
1732 #elif defined(TARGET_TRICORE)
1733         info->value->arch = CPU_INFO_ARCH_TRICORE;
1734         info->value->u.tricore.PC = env->PC;
1735 #else
1736         info->value->arch = CPU_INFO_ARCH_OTHER;
1737 #endif
1738
1739         /* XXX: waiting for the qapi to support GSList */
1740         if (!cur_item) {
1741             head = cur_item = info;
1742         } else {
1743             cur_item->next = info;
1744             cur_item = info;
1745         }
1746     }
1747
1748     return head;
1749 }
1750
1751 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1752                  bool has_cpu, int64_t cpu_index, Error **errp)
1753 {
1754     FILE *f;
1755     uint32_t l;
1756     CPUState *cpu;
1757     uint8_t buf[1024];
1758     int64_t orig_addr = addr, orig_size = size;
1759
1760     if (!has_cpu) {
1761         cpu_index = 0;
1762     }
1763
1764     cpu = qemu_get_cpu(cpu_index);
1765     if (cpu == NULL) {
1766         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1767                    "a CPU number");
1768         return;
1769     }
1770
1771     f = fopen(filename, "wb");
1772     if (!f) {
1773         error_setg_file_open(errp, errno, filename);
1774         return;
1775     }
1776
1777     while (size != 0) {
1778         l = sizeof(buf);
1779         if (l > size)
1780             l = size;
1781         if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1782             error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1783                              " specified", orig_addr, orig_size);
1784             goto exit;
1785         }
1786         if (fwrite(buf, 1, l, f) != l) {
1787             error_setg(errp, QERR_IO_ERROR);
1788             goto exit;
1789         }
1790         addr += l;
1791         size -= l;
1792     }
1793
1794 exit:
1795     fclose(f);
1796 }
1797
1798 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1799                   Error **errp)
1800 {
1801     FILE *f;
1802     uint32_t l;
1803     uint8_t buf[1024];
1804
1805     f = fopen(filename, "wb");
1806     if (!f) {
1807         error_setg_file_open(errp, errno, filename);
1808         return;
1809     }
1810
1811     while (size != 0) {
1812         l = sizeof(buf);
1813         if (l > size)
1814             l = size;
1815         cpu_physical_memory_read(addr, buf, l);
1816         if (fwrite(buf, 1, l, f) != l) {
1817             error_setg(errp, QERR_IO_ERROR);
1818             goto exit;
1819         }
1820         addr += l;
1821         size -= l;
1822     }
1823
1824 exit:
1825     fclose(f);
1826 }
1827
1828 void qmp_inject_nmi(Error **errp)
1829 {
1830 #if defined(TARGET_I386)
1831     CPUState *cs;
1832
1833     CPU_FOREACH(cs) {
1834         X86CPU *cpu = X86_CPU(cs);
1835
1836         if (!cpu->apic_state) {
1837             cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1838         } else {
1839             apic_deliver_nmi(cpu->apic_state);
1840         }
1841     }
1842 #else
1843     nmi_monitor_handle(monitor_get_cpu_index(), errp);
1844 #endif
1845 }
1846
1847 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1848 {
1849     if (!use_icount) {
1850         return;
1851     }
1852
1853     cpu_fprintf(f, "Host - Guest clock  %"PRIi64" ms\n",
1854                 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1855     if (icount_align_option) {
1856         cpu_fprintf(f, "Max guest delay     %"PRIi64" ms\n", -max_delay/SCALE_MS);
1857         cpu_fprintf(f, "Max guest advance   %"PRIi64" ms\n", max_advance/SCALE_MS);
1858     } else {
1859         cpu_fprintf(f, "Max guest delay     NA\n");
1860         cpu_fprintf(f, "Max guest advance   NA\n");
1861     }
1862 }