skin: control size of controller window
[sdk/emulator/qemu.git] / cpus.c
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "monitor/monitor.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qemu/error-report.h"
32 #include "sysemu/sysemu.h"
33 #include "sysemu/block-backend.h"
34 #include "exec/gdbstub.h"
35 #include "sysemu/dma.h"
36 #include "sysemu/kvm.h"
37 #include "sysemu/hax.h"
38 #include "qmp-commands.h"
39 #include "exec/exec-all.h"
40
41 #include "qemu/thread.h"
42 #include "sysemu/cpus.h"
43 #include "sysemu/qtest.h"
44 #include "qemu/main-loop.h"
45 #include "qemu/bitmap.h"
46 #include "qemu/seqlock.h"
47 #include "qapi-event.h"
48 #include "hw/nmi.h"
49 #include "sysemu/replay.h"
50
51 #ifndef _WIN32
52 #include "qemu/compatfd.h"
53 #endif
54
55 #ifdef CONFIG_LINUX
56
57 #include <sys/prctl.h>
58
59 #ifndef PR_MCE_KILL
60 #define PR_MCE_KILL 33
61 #endif
62
63 #ifndef PR_MCE_KILL_SET
64 #define PR_MCE_KILL_SET 1
65 #endif
66
67 #ifndef PR_MCE_KILL_EARLY
68 #define PR_MCE_KILL_EARLY 1
69 #endif
70
71 #endif /* CONFIG_LINUX */
72
73 int64_t max_delay;
74 int64_t max_advance;
75
76 /* vcpu throttling controls */
77 static QEMUTimer *throttle_timer;
78 static unsigned int throttle_percentage;
79
80 #define CPU_THROTTLE_PCT_MIN 1
81 #define CPU_THROTTLE_PCT_MAX 99
82 #define CPU_THROTTLE_TIMESLICE_NS 10000000
83
84 bool cpu_is_stopped(CPUState *cpu)
85 {
86     return cpu->stopped || !runstate_is_running();
87 }
88
89 static bool cpu_thread_is_idle(CPUState *cpu)
90 {
91     if (cpu->stop || cpu->queued_work_first) {
92         return false;
93     }
94     if (cpu_is_stopped(cpu)) {
95         return true;
96     }
97     if (!cpu->halted || cpu_has_work(cpu) ||
98         kvm_halt_in_kernel()) {
99         return false;
100     }
101     return true;
102 }
103
104 static bool all_cpu_threads_idle(void)
105 {
106     CPUState *cpu;
107
108     CPU_FOREACH(cpu) {
109         if (!cpu_thread_is_idle(cpu)) {
110             return false;
111         }
112     }
113     return true;
114 }
115
116 /***********************************************************/
117 /* guest cycle counter */
118
119 /* Protected by TimersState seqlock */
120
121 static bool icount_sleep = true;
122 static int64_t vm_clock_warp_start = -1;
123 /* Conversion factor from emulated instructions to virtual clock ticks.  */
124 static int icount_time_shift;
125 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
126 #define MAX_ICOUNT_SHIFT 10
127
128 static QEMUTimer *icount_rt_timer;
129 static QEMUTimer *icount_vm_timer;
130 static QEMUTimer *icount_warp_timer;
131
132 typedef struct TimersState {
133     /* Protected by BQL.  */
134     int64_t cpu_ticks_prev;
135     int64_t cpu_ticks_offset;
136
137     /* cpu_clock_offset can be read out of BQL, so protect it with
138      * this lock.
139      */
140     QemuSeqLock vm_clock_seqlock;
141     int64_t cpu_clock_offset;
142     int32_t cpu_ticks_enabled;
143     int64_t dummy;
144
145     /* Compensate for varying guest execution speed.  */
146     int64_t qemu_icount_bias;
147     /* Only written by TCG thread */
148     int64_t qemu_icount;
149 } TimersState;
150
151 static TimersState timers_state;
152
153 int64_t cpu_get_icount_raw(void)
154 {
155     int64_t icount;
156     CPUState *cpu = current_cpu;
157
158     icount = timers_state.qemu_icount;
159     if (cpu) {
160         if (!cpu->can_do_io) {
161             fprintf(stderr, "Bad icount read\n");
162             exit(1);
163         }
164         icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
165     }
166     return icount;
167 }
168
169 /* Return the virtual CPU time, based on the instruction counter.  */
170 static int64_t cpu_get_icount_locked(void)
171 {
172     int64_t icount = cpu_get_icount_raw();
173     return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
174 }
175
176 int64_t cpu_get_icount(void)
177 {
178     int64_t icount;
179     unsigned start;
180
181     do {
182         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
183         icount = cpu_get_icount_locked();
184     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
185
186     return icount;
187 }
188
189 int64_t cpu_icount_to_ns(int64_t icount)
190 {
191     return icount << icount_time_shift;
192 }
193
194 /* return the time elapsed in VM between vm_start and vm_stop.  Unless
195  * icount is active, cpu_get_ticks() uses units of the host CPU cycle
196  * counter.
197  *
198  * Caller must hold the BQL
199  */
200 int64_t cpu_get_ticks(void)
201 {
202     int64_t ticks;
203
204     if (use_icount) {
205         return cpu_get_icount();
206     }
207
208     ticks = timers_state.cpu_ticks_offset;
209     if (timers_state.cpu_ticks_enabled) {
210         ticks += cpu_get_host_ticks();
211     }
212
213     if (timers_state.cpu_ticks_prev > ticks) {
214         /* Note: non increasing ticks may happen if the host uses
215            software suspend */
216         timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
217         ticks = timers_state.cpu_ticks_prev;
218     }
219
220     timers_state.cpu_ticks_prev = ticks;
221     return ticks;
222 }
223
224 static int64_t cpu_get_clock_locked(void)
225 {
226     int64_t time;
227
228     time = timers_state.cpu_clock_offset;
229     if (timers_state.cpu_ticks_enabled) {
230         time += get_clock();
231     }
232
233     return time;
234 }
235
236 /* Return the monotonic time elapsed in VM, i.e.,
237  * the time between vm_start and vm_stop
238  */
239 int64_t cpu_get_clock(void)
240 {
241     int64_t ti;
242     unsigned start;
243
244     do {
245         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
246         ti = cpu_get_clock_locked();
247     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
248
249     return ti;
250 }
251
252 /* enable cpu_get_ticks()
253  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
254  */
255 void cpu_enable_ticks(void)
256 {
257     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
258     seqlock_write_begin(&timers_state.vm_clock_seqlock);
259     if (!timers_state.cpu_ticks_enabled) {
260         timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
261         timers_state.cpu_clock_offset -= get_clock();
262         timers_state.cpu_ticks_enabled = 1;
263     }
264     seqlock_write_end(&timers_state.vm_clock_seqlock);
265 }
266
267 /* disable cpu_get_ticks() : the clock is stopped. You must not call
268  * cpu_get_ticks() after that.
269  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
270  */
271 void cpu_disable_ticks(void)
272 {
273     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
274     seqlock_write_begin(&timers_state.vm_clock_seqlock);
275     if (timers_state.cpu_ticks_enabled) {
276         timers_state.cpu_ticks_offset += cpu_get_host_ticks();
277         timers_state.cpu_clock_offset = cpu_get_clock_locked();
278         timers_state.cpu_ticks_enabled = 0;
279     }
280     seqlock_write_end(&timers_state.vm_clock_seqlock);
281 }
282
283 /* Correlation between real and virtual time is always going to be
284    fairly approximate, so ignore small variation.
285    When the guest is idle real and virtual time will be aligned in
286    the IO wait loop.  */
287 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
288
289 static void icount_adjust(void)
290 {
291     int64_t cur_time;
292     int64_t cur_icount;
293     int64_t delta;
294
295     /* Protected by TimersState mutex.  */
296     static int64_t last_delta;
297
298     /* If the VM is not running, then do nothing.  */
299     if (!runstate_is_running()) {
300         return;
301     }
302
303     seqlock_write_begin(&timers_state.vm_clock_seqlock);
304     cur_time = cpu_get_clock_locked();
305     cur_icount = cpu_get_icount_locked();
306
307     delta = cur_icount - cur_time;
308     /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
309     if (delta > 0
310         && last_delta + ICOUNT_WOBBLE < delta * 2
311         && icount_time_shift > 0) {
312         /* The guest is getting too far ahead.  Slow time down.  */
313         icount_time_shift--;
314     }
315     if (delta < 0
316         && last_delta - ICOUNT_WOBBLE > delta * 2
317         && icount_time_shift < MAX_ICOUNT_SHIFT) {
318         /* The guest is getting too far behind.  Speed time up.  */
319         icount_time_shift++;
320     }
321     last_delta = delta;
322     timers_state.qemu_icount_bias = cur_icount
323                               - (timers_state.qemu_icount << icount_time_shift);
324     seqlock_write_end(&timers_state.vm_clock_seqlock);
325 }
326
327 static void icount_adjust_rt(void *opaque)
328 {
329     timer_mod(icount_rt_timer,
330               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
331     icount_adjust();
332 }
333
334 static void icount_adjust_vm(void *opaque)
335 {
336     timer_mod(icount_vm_timer,
337                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
338                    NANOSECONDS_PER_SECOND / 10);
339     icount_adjust();
340 }
341
342 static int64_t qemu_icount_round(int64_t count)
343 {
344     return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
345 }
346
347 static void icount_warp_rt(void)
348 {
349     unsigned seq;
350     int64_t warp_start;
351
352     /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
353      * changes from -1 to another value, so the race here is okay.
354      */
355     do {
356         seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
357         warp_start = vm_clock_warp_start;
358     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
359
360     if (warp_start == -1) {
361         return;
362     }
363
364     seqlock_write_begin(&timers_state.vm_clock_seqlock);
365     if (runstate_is_running()) {
366         int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
367                                      cpu_get_clock_locked());
368         int64_t warp_delta;
369
370         warp_delta = clock - vm_clock_warp_start;
371         if (use_icount == 2) {
372             /*
373              * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
374              * far ahead of real time.
375              */
376             int64_t cur_icount = cpu_get_icount_locked();
377             int64_t delta = clock - cur_icount;
378             warp_delta = MIN(warp_delta, delta);
379         }
380         timers_state.qemu_icount_bias += warp_delta;
381     }
382     vm_clock_warp_start = -1;
383     seqlock_write_end(&timers_state.vm_clock_seqlock);
384
385     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
386         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
387     }
388 }
389
390 static void icount_timer_cb(void *opaque)
391 {
392     /* No need for a checkpoint because the timer already synchronizes
393      * with CHECKPOINT_CLOCK_VIRTUAL_RT.
394      */
395     icount_warp_rt();
396 }
397
398 void qtest_clock_warp(int64_t dest)
399 {
400     int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
401     AioContext *aio_context;
402     assert(qtest_enabled());
403     aio_context = qemu_get_aio_context();
404     while (clock < dest) {
405         int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
406         int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
407
408         seqlock_write_begin(&timers_state.vm_clock_seqlock);
409         timers_state.qemu_icount_bias += warp;
410         seqlock_write_end(&timers_state.vm_clock_seqlock);
411
412         qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
413         timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
414         clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
415     }
416     qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
417 }
418
419 void qemu_start_warp_timer(void)
420 {
421     int64_t clock;
422     int64_t deadline;
423
424     if (!use_icount) {
425         return;
426     }
427
428     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
429      * do not fire, so computing the deadline does not make sense.
430      */
431     if (!runstate_is_running()) {
432         return;
433     }
434
435     /* warp clock deterministically in record/replay mode */
436     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
437         return;
438     }
439
440     if (!all_cpu_threads_idle()) {
441         return;
442     }
443
444     if (qtest_enabled()) {
445         /* When testing, qtest commands advance icount.  */
446         return;
447     }
448
449     /* We want to use the earliest deadline from ALL vm_clocks */
450     clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
451     deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
452     if (deadline < 0) {
453         static bool notified;
454         if (!icount_sleep && !notified) {
455             error_report("WARNING: icount sleep disabled and no active timers");
456             notified = true;
457         }
458         return;
459     }
460
461     if (deadline > 0) {
462         /*
463          * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
464          * sleep.  Otherwise, the CPU might be waiting for a future timer
465          * interrupt to wake it up, but the interrupt never comes because
466          * the vCPU isn't running any insns and thus doesn't advance the
467          * QEMU_CLOCK_VIRTUAL.
468          */
469         if (!icount_sleep) {
470             /*
471              * We never let VCPUs sleep in no sleep icount mode.
472              * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
473              * to the next QEMU_CLOCK_VIRTUAL event and notify it.
474              * It is useful when we want a deterministic execution time,
475              * isolated from host latencies.
476              */
477             seqlock_write_begin(&timers_state.vm_clock_seqlock);
478             timers_state.qemu_icount_bias += deadline;
479             seqlock_write_end(&timers_state.vm_clock_seqlock);
480             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
481         } else {
482             /*
483              * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
484              * "real" time, (related to the time left until the next event) has
485              * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
486              * This avoids that the warps are visible externally; for example,
487              * you will not be sending network packets continuously instead of
488              * every 100ms.
489              */
490             seqlock_write_begin(&timers_state.vm_clock_seqlock);
491             if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
492                 vm_clock_warp_start = clock;
493             }
494             seqlock_write_end(&timers_state.vm_clock_seqlock);
495             timer_mod_anticipate(icount_warp_timer, clock + deadline);
496         }
497     } else if (deadline == 0) {
498         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
499     }
500 }
501
502 static void qemu_account_warp_timer(void)
503 {
504     if (!use_icount || !icount_sleep) {
505         return;
506     }
507
508     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
509      * do not fire, so computing the deadline does not make sense.
510      */
511     if (!runstate_is_running()) {
512         return;
513     }
514
515     /* warp clock deterministically in record/replay mode */
516     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
517         return;
518     }
519
520     timer_del(icount_warp_timer);
521     icount_warp_rt();
522 }
523
524 static bool icount_state_needed(void *opaque)
525 {
526     return use_icount;
527 }
528
529 /*
530  * This is a subsection for icount migration.
531  */
532 static const VMStateDescription icount_vmstate_timers = {
533     .name = "timer/icount",
534     .version_id = 1,
535     .minimum_version_id = 1,
536     .needed = icount_state_needed,
537     .fields = (VMStateField[]) {
538         VMSTATE_INT64(qemu_icount_bias, TimersState),
539         VMSTATE_INT64(qemu_icount, TimersState),
540         VMSTATE_END_OF_LIST()
541     }
542 };
543
544 static const VMStateDescription vmstate_timers = {
545     .name = "timer",
546     .version_id = 2,
547     .minimum_version_id = 1,
548     .fields = (VMStateField[]) {
549         VMSTATE_INT64(cpu_ticks_offset, TimersState),
550         VMSTATE_INT64(dummy, TimersState),
551         VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
552         VMSTATE_END_OF_LIST()
553     },
554     .subsections = (const VMStateDescription*[]) {
555         &icount_vmstate_timers,
556         NULL
557     }
558 };
559
560 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
561 {
562     double pct;
563     double throttle_ratio;
564     long sleeptime_ns;
565
566     if (!cpu_throttle_get_percentage()) {
567         return;
568     }
569
570     pct = (double)cpu_throttle_get_percentage()/100;
571     throttle_ratio = pct / (1 - pct);
572     sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
573
574     qemu_mutex_unlock_iothread();
575     atomic_set(&cpu->throttle_thread_scheduled, 0);
576     g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
577     qemu_mutex_lock_iothread();
578 }
579
580 static void cpu_throttle_timer_tick(void *opaque)
581 {
582     CPUState *cpu;
583     double pct;
584
585     /* Stop the timer if needed */
586     if (!cpu_throttle_get_percentage()) {
587         return;
588     }
589     CPU_FOREACH(cpu) {
590         if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
591             async_run_on_cpu(cpu, cpu_throttle_thread,
592                              RUN_ON_CPU_NULL);
593         }
594     }
595
596     pct = (double)cpu_throttle_get_percentage()/100;
597     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
598                                    CPU_THROTTLE_TIMESLICE_NS / (1-pct));
599 }
600
601 void cpu_throttle_set(int new_throttle_pct)
602 {
603     /* Ensure throttle percentage is within valid range */
604     new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
605     new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
606
607     atomic_set(&throttle_percentage, new_throttle_pct);
608
609     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
610                                        CPU_THROTTLE_TIMESLICE_NS);
611 }
612
613 void cpu_throttle_stop(void)
614 {
615     atomic_set(&throttle_percentage, 0);
616 }
617
618 bool cpu_throttle_active(void)
619 {
620     return (cpu_throttle_get_percentage() != 0);
621 }
622
623 int cpu_throttle_get_percentage(void)
624 {
625     return atomic_read(&throttle_percentage);
626 }
627
628 void cpu_ticks_init(void)
629 {
630     seqlock_init(&timers_state.vm_clock_seqlock);
631     vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
632     throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
633                                            cpu_throttle_timer_tick, NULL);
634 }
635
636 void configure_icount(QemuOpts *opts, Error **errp)
637 {
638     const char *option;
639     char *rem_str = NULL;
640
641     option = qemu_opt_get(opts, "shift");
642     if (!option) {
643         if (qemu_opt_get(opts, "align") != NULL) {
644             error_setg(errp, "Please specify shift option when using align");
645         }
646         return;
647     }
648
649     icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
650     if (icount_sleep) {
651         icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
652                                          icount_timer_cb, NULL);
653     }
654
655     icount_align_option = qemu_opt_get_bool(opts, "align", false);
656
657     if (icount_align_option && !icount_sleep) {
658         error_setg(errp, "align=on and sleep=off are incompatible");
659     }
660     if (strcmp(option, "auto") != 0) {
661         errno = 0;
662         icount_time_shift = strtol(option, &rem_str, 0);
663         if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
664             error_setg(errp, "icount: Invalid shift value");
665         }
666         use_icount = 1;
667         return;
668     } else if (icount_align_option) {
669         error_setg(errp, "shift=auto and align=on are incompatible");
670     } else if (!icount_sleep) {
671         error_setg(errp, "shift=auto and sleep=off are incompatible");
672     }
673
674     use_icount = 2;
675
676     /* 125MIPS seems a reasonable initial guess at the guest speed.
677        It will be corrected fairly quickly anyway.  */
678     icount_time_shift = 3;
679
680     /* Have both realtime and virtual time triggers for speed adjustment.
681        The realtime trigger catches emulated time passing too slowly,
682        the virtual time trigger catches emulated time passing too fast.
683        Realtime triggers occur even when idle, so use them less frequently
684        than VM triggers.  */
685     icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
686                                    icount_adjust_rt, NULL);
687     timer_mod(icount_rt_timer,
688                    qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
689     icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
690                                         icount_adjust_vm, NULL);
691     timer_mod(icount_vm_timer,
692                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
693                    NANOSECONDS_PER_SECOND / 10);
694 }
695
696 /***********************************************************/
697 void hw_error(const char *fmt, ...)
698 {
699     va_list ap;
700     CPUState *cpu;
701
702     va_start(ap, fmt);
703     fprintf(stderr, "qemu: hardware error: ");
704     vfprintf(stderr, fmt, ap);
705     fprintf(stderr, "\n");
706     CPU_FOREACH(cpu) {
707         fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
708         cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
709     }
710     va_end(ap);
711     abort();
712 }
713
714 void cpu_synchronize_all_states(void)
715 {
716     CPUState *cpu;
717
718     CPU_FOREACH(cpu) {
719         cpu_synchronize_state(cpu);
720 #ifdef CONFIG_HAX
721         if (hax_enabled() && hax_ug_platform()) {
722             hax_cpu_synchronize_state(cpu);
723         }
724 #endif
725     }
726 }
727
728 void cpu_synchronize_all_post_reset(void)
729 {
730     CPUState *cpu;
731
732     CPU_FOREACH(cpu) {
733         cpu_synchronize_post_reset(cpu);
734 #ifdef CONFIG_HAX
735         if (hax_enabled() && hax_ug_platform())
736             hax_cpu_synchronize_post_reset(cpu);
737 #endif
738     }
739 }
740
741 void cpu_synchronize_all_post_init(void)
742 {
743     CPUState *cpu;
744
745     CPU_FOREACH(cpu) {
746         cpu_synchronize_post_init(cpu);
747 #ifdef CONFIG_HAX
748         if (hax_enabled() && hax_ug_platform())
749             hax_cpu_synchronize_post_init(cpu);
750 #endif
751     }
752 }
753
754 static int do_vm_stop(RunState state)
755 {
756     int ret = 0;
757
758     if (runstate_is_running()) {
759         cpu_disable_ticks();
760         pause_all_vcpus();
761         runstate_set(state);
762         vm_state_notify(0, state);
763         qapi_event_send_stop(&error_abort);
764     }
765
766     bdrv_drain_all();
767     replay_disable_events();
768     ret = bdrv_flush_all();
769
770     return ret;
771 }
772
773 static bool cpu_can_run(CPUState *cpu)
774 {
775     if (cpu->stop) {
776         return false;
777     }
778     if (cpu_is_stopped(cpu)) {
779         return false;
780     }
781     return true;
782 }
783
784 static void cpu_handle_guest_debug(CPUState *cpu)
785 {
786     gdb_set_stop_cpu(cpu);
787     qemu_system_debug_request();
788     cpu->stopped = true;
789 }
790
791 #ifdef CONFIG_LINUX
792 static void sigbus_reraise(void)
793 {
794     sigset_t set;
795     struct sigaction action;
796
797     memset(&action, 0, sizeof(action));
798     action.sa_handler = SIG_DFL;
799     if (!sigaction(SIGBUS, &action, NULL)) {
800         raise(SIGBUS);
801         sigemptyset(&set);
802         sigaddset(&set, SIGBUS);
803         pthread_sigmask(SIG_UNBLOCK, &set, NULL);
804     }
805     perror("Failed to re-raise SIGBUS!\n");
806     abort();
807 }
808
809 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
810                            void *ctx)
811 {
812     if (kvm_on_sigbus(siginfo->ssi_code,
813                       (void *)(intptr_t)siginfo->ssi_addr)) {
814         sigbus_reraise();
815     }
816 }
817
818 static void qemu_init_sigbus(void)
819 {
820     struct sigaction action;
821
822     memset(&action, 0, sizeof(action));
823     action.sa_flags = SA_SIGINFO;
824     action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
825     sigaction(SIGBUS, &action, NULL);
826
827     prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
828 }
829
830 static void qemu_kvm_eat_signals(CPUState *cpu)
831 {
832     struct timespec ts = { 0, 0 };
833     siginfo_t siginfo;
834     sigset_t waitset;
835     sigset_t chkset;
836     int r;
837
838     sigemptyset(&waitset);
839     sigaddset(&waitset, SIG_IPI);
840     sigaddset(&waitset, SIGBUS);
841
842     do {
843         r = sigtimedwait(&waitset, &siginfo, &ts);
844         if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
845             perror("sigtimedwait");
846             exit(1);
847         }
848
849         switch (r) {
850         case SIGBUS:
851             if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
852                 sigbus_reraise();
853             }
854             break;
855         default:
856             break;
857         }
858
859         r = sigpending(&chkset);
860         if (r == -1) {
861             perror("sigpending");
862             exit(1);
863         }
864     } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
865 }
866
867 #else /* !CONFIG_LINUX */
868
869 static void qemu_init_sigbus(void)
870 {
871 }
872
873 static void qemu_kvm_eat_signals(CPUState *cpu)
874 {
875 }
876 #endif /* !CONFIG_LINUX */
877
878 #ifndef _WIN32
879 static void dummy_signal(int sig)
880 {
881 }
882
883 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
884 {
885     int r;
886     sigset_t set;
887     struct sigaction sigact;
888
889     memset(&sigact, 0, sizeof(sigact));
890     sigact.sa_handler = dummy_signal;
891     sigaction(SIG_IPI, &sigact, NULL);
892
893     pthread_sigmask(SIG_BLOCK, NULL, &set);
894     sigdelset(&set, SIG_IPI);
895     sigdelset(&set, SIGBUS);
896     r = kvm_set_signal_mask(cpu, &set);
897     if (r) {
898         fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
899         exit(1);
900     }
901 }
902
903 #else /* _WIN32 */
904 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
905 {
906     abort();
907 }
908 #endif /* _WIN32 */
909
910 static QemuMutex qemu_global_mutex;
911 static QemuCond qemu_io_proceeded_cond;
912 static unsigned iothread_requesting_mutex;
913
914 static QemuThread io_thread;
915
916 /* cpu creation */
917 static QemuCond qemu_cpu_cond;
918 /* system init */
919 static QemuCond qemu_pause_cond;
920
921 void qemu_init_cpu_loop(void)
922 {
923     qemu_init_sigbus();
924     qemu_cond_init(&qemu_cpu_cond);
925     qemu_cond_init(&qemu_pause_cond);
926     qemu_cond_init(&qemu_io_proceeded_cond);
927     qemu_mutex_init(&qemu_global_mutex);
928
929     qemu_thread_get_self(&io_thread);
930 }
931
932 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
933 {
934     do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
935 }
936
937 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
938 {
939     if (kvm_destroy_vcpu(cpu) < 0) {
940         error_report("kvm_destroy_vcpu failed");
941         exit(EXIT_FAILURE);
942     }
943 }
944
945 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
946 {
947 }
948
949 static void qemu_wait_io_event_common(CPUState *cpu)
950 {
951     if (cpu->stop) {
952         cpu->stop = false;
953         cpu->stopped = true;
954         qemu_cond_broadcast(&qemu_pause_cond);
955     }
956     process_queued_cpu_work(cpu);
957     cpu->thread_kicked = false;
958 }
959
960 static void qemu_tcg_wait_io_event(CPUState *cpu)
961 {
962     while (all_cpu_threads_idle()) {
963         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
964     }
965
966     while (iothread_requesting_mutex) {
967         qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
968     }
969
970     CPU_FOREACH(cpu) {
971         qemu_wait_io_event_common(cpu);
972     }
973 }
974
975 #ifdef CONFIG_HAX
976 static void qemu_hax_wait_io_event(CPUState *cpu)
977 {
978     while (cpu_thread_is_idle(cpu)) {
979         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
980     }
981
982     qemu_wait_io_event_common(cpu);
983 }
984 #endif
985
986 static void qemu_kvm_wait_io_event(CPUState *cpu)
987 {
988     while (cpu_thread_is_idle(cpu)) {
989         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
990     }
991
992     qemu_kvm_eat_signals(cpu);
993     qemu_wait_io_event_common(cpu);
994 }
995
996 static void *qemu_kvm_cpu_thread_fn(void *arg)
997 {
998     CPUState *cpu = arg;
999     int r;
1000
1001     rcu_register_thread();
1002
1003     qemu_mutex_lock_iothread();
1004     qemu_thread_get_self(cpu->thread);
1005     cpu->thread_id = qemu_get_thread_id();
1006     cpu->can_do_io = 1;
1007     current_cpu = cpu;
1008
1009     r = kvm_init_vcpu(cpu);
1010     if (r < 0) {
1011         fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1012         exit(1);
1013     }
1014
1015     qemu_kvm_init_cpu_signals(cpu);
1016
1017     /* signal CPU creation */
1018     cpu->created = true;
1019     qemu_cond_signal(&qemu_cpu_cond);
1020
1021     do {
1022         if (cpu_can_run(cpu)) {
1023             r = kvm_cpu_exec(cpu);
1024             if (r == EXCP_DEBUG) {
1025                 cpu_handle_guest_debug(cpu);
1026             }
1027         }
1028         qemu_kvm_wait_io_event(cpu);
1029     } while (!cpu->unplug || cpu_can_run(cpu));
1030
1031     qemu_kvm_destroy_vcpu(cpu);
1032     cpu->created = false;
1033     qemu_cond_signal(&qemu_cpu_cond);
1034     qemu_mutex_unlock_iothread();
1035     return NULL;
1036 }
1037
1038 static void *qemu_dummy_cpu_thread_fn(void *arg)
1039 {
1040 #ifdef _WIN32
1041     fprintf(stderr, "qtest is not supported under Windows\n");
1042     exit(1);
1043 #else
1044     CPUState *cpu = arg;
1045     sigset_t waitset;
1046     int r;
1047
1048     rcu_register_thread();
1049
1050     qemu_mutex_lock_iothread();
1051     qemu_thread_get_self(cpu->thread);
1052     cpu->thread_id = qemu_get_thread_id();
1053     cpu->can_do_io = 1;
1054
1055     sigemptyset(&waitset);
1056     sigaddset(&waitset, SIG_IPI);
1057
1058     /* signal CPU creation */
1059     cpu->created = true;
1060     qemu_cond_signal(&qemu_cpu_cond);
1061
1062     current_cpu = cpu;
1063     while (1) {
1064         current_cpu = NULL;
1065         qemu_mutex_unlock_iothread();
1066         do {
1067             int sig;
1068             r = sigwait(&waitset, &sig);
1069         } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1070         if (r == -1) {
1071             perror("sigwait");
1072             exit(1);
1073         }
1074         qemu_mutex_lock_iothread();
1075         current_cpu = cpu;
1076         qemu_wait_io_event_common(cpu);
1077     }
1078
1079     return NULL;
1080 #endif
1081 }
1082
1083 static int64_t tcg_get_icount_limit(void)
1084 {
1085     int64_t deadline;
1086
1087     if (replay_mode != REPLAY_MODE_PLAY) {
1088         deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1089
1090         /* Maintain prior (possibly buggy) behaviour where if no deadline
1091          * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1092          * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1093          * nanoseconds.
1094          */
1095         if ((deadline < 0) || (deadline > INT32_MAX)) {
1096             deadline = INT32_MAX;
1097         }
1098
1099         return qemu_icount_round(deadline);
1100     } else {
1101         return replay_get_instructions();
1102     }
1103 }
1104
1105 static void handle_icount_deadline(void)
1106 {
1107     if (use_icount) {
1108         int64_t deadline =
1109             qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1110
1111         if (deadline == 0) {
1112             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1113         }
1114     }
1115 }
1116
1117 static int tcg_cpu_exec(CPUState *cpu)
1118 {
1119     int ret;
1120 #ifdef CONFIG_PROFILER
1121     int64_t ti;
1122 #endif
1123
1124 #ifdef CONFIG_PROFILER
1125     ti = profile_getclock();
1126 #endif
1127     if (use_icount) {
1128         int64_t count;
1129         int decr;
1130         timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1131                                     + cpu->icount_extra);
1132         cpu->icount_decr.u16.low = 0;
1133         cpu->icount_extra = 0;
1134         count = tcg_get_icount_limit();
1135         timers_state.qemu_icount += count;
1136         decr = (count > 0xffff) ? 0xffff : count;
1137         count -= decr;
1138         cpu->icount_decr.u16.low = decr;
1139         cpu->icount_extra = count;
1140     }
1141     cpu_exec_start(cpu);
1142     ret = cpu_exec(cpu);
1143     cpu_exec_end(cpu);
1144 #ifdef CONFIG_PROFILER
1145     tcg_time += profile_getclock() - ti;
1146 #endif
1147     if (use_icount) {
1148         /* Fold pending instructions back into the
1149            instruction counter, and clear the interrupt flag.  */
1150         timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1151                         + cpu->icount_extra);
1152         cpu->icount_decr.u32 = 0;
1153         cpu->icount_extra = 0;
1154         replay_account_executed_instructions();
1155     }
1156     return ret;
1157 }
1158
1159 /* Destroy any remaining vCPUs which have been unplugged and have
1160  * finished running
1161  */
1162 static void deal_with_unplugged_cpus(void)
1163 {
1164     CPUState *cpu;
1165
1166     CPU_FOREACH(cpu) {
1167         if (cpu->unplug && !cpu_can_run(cpu)) {
1168             qemu_tcg_destroy_vcpu(cpu);
1169             cpu->created = false;
1170             qemu_cond_signal(&qemu_cpu_cond);
1171             break;
1172         }
1173     }
1174 }
1175
1176 static void *qemu_tcg_cpu_thread_fn(void *arg)
1177 {
1178     CPUState *cpu = arg;
1179
1180     rcu_register_thread();
1181
1182     qemu_mutex_lock_iothread();
1183     qemu_thread_get_self(cpu->thread);
1184
1185     CPU_FOREACH(cpu) {
1186         cpu->thread_id = qemu_get_thread_id();
1187         cpu->created = true;
1188         cpu->can_do_io = 1;
1189     }
1190     qemu_cond_signal(&qemu_cpu_cond);
1191
1192     /* wait for initial kick-off after machine start */
1193     while (first_cpu->stopped) {
1194         qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1195
1196         /* process any pending work */
1197         CPU_FOREACH(cpu) {
1198             qemu_wait_io_event_common(cpu);
1199         }
1200     }
1201
1202     /* process any pending work */
1203     atomic_mb_set(&exit_request, 1);
1204
1205     cpu = first_cpu;
1206
1207     while (1) {
1208         /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
1209         qemu_account_warp_timer();
1210
1211         if (!cpu) {
1212             cpu = first_cpu;
1213         }
1214
1215         for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
1216
1217             qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1218                               (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1219
1220             if (cpu_can_run(cpu)) {
1221                 int r;
1222                 r = tcg_cpu_exec(cpu);
1223                 if (r == EXCP_DEBUG) {
1224                     cpu_handle_guest_debug(cpu);
1225                     break;
1226                 }
1227             } else if (cpu->stop || cpu->stopped) {
1228                 if (cpu->unplug) {
1229                     cpu = CPU_NEXT(cpu);
1230                 }
1231                 break;
1232             }
1233
1234         } /* for cpu.. */
1235
1236         /* Pairs with smp_wmb in qemu_cpu_kick.  */
1237         atomic_mb_set(&exit_request, 0);
1238
1239         handle_icount_deadline();
1240
1241         qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1242         deal_with_unplugged_cpus();
1243     }
1244
1245     return NULL;
1246 }
1247
1248 #ifdef CONFIG_HAX
1249 static void *qemu_hax_cpu_thread_fn(void *arg)
1250 {
1251     CPUState *cpu = arg;
1252     int r;
1253     qemu_thread_get_self(cpu->thread);
1254     qemu_mutex_lock(&qemu_global_mutex);
1255
1256     cpu->thread_id = qemu_get_thread_id();
1257     cpu->created = true;
1258     cpu->halted = 0;
1259     current_cpu = cpu;
1260
1261     hax_init_vcpu(cpu);
1262     qemu_cond_signal(&qemu_cpu_cond);
1263
1264     while (1) {
1265         if (cpu_can_run(cpu)) {
1266             r = hax_smp_cpu_exec(cpu);
1267             if (r == EXCP_DEBUG) {
1268                 cpu_handle_guest_debug(cpu);
1269             }
1270         }
1271         qemu_hax_wait_io_event(cpu);
1272     }
1273     return NULL;
1274 }
1275 #endif
1276
1277 static void qemu_cpu_kick_thread(CPUState *cpu)
1278 {
1279 #ifndef _WIN32
1280     int err;
1281
1282     if (cpu->thread_kicked) {
1283         return;
1284     }
1285     cpu->thread_kicked = true;
1286     err = pthread_kill(cpu->thread->thread, SIG_IPI);
1287     if (err) {
1288         fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1289         exit(1);
1290     }
1291
1292 # ifdef CONFIG_DARWIN
1293     /* The cpu thread cannot catch it reliably when shutdown the guest on Mac.
1294     * We can double check it and resend it
1295     */
1296     if (!exit_request) {
1297     // FIXME: check it soon
1298 //        cpu_signal(0);
1299     }
1300
1301     if (hax_enabled() && hax_ug_platform()) {
1302         cpu->exit_request = 1;
1303     }
1304 # endif
1305 #else /* _WIN32 */
1306 # ifndef CONFIG_HAX
1307     abort();
1308 # else
1309     // FIXME: check it soon
1310 #if 0
1311     if (!qemu_cpu_is_self(cpu)) {
1312         CONTEXT tcgContext;
1313
1314         if (SuspendThread(cpu->hThread) == (DWORD)-1) {
1315             fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1316                     GetLastError());
1317             exit(1);
1318         }
1319
1320         /* On multi-core systems, we are not sure that the thread is actually
1321          * suspended until we can get the context.
1322          */
1323         tcgContext.ContextFlags = CONTEXT_CONTROL;
1324         while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1325             continue;
1326         }
1327
1328 //        cpu_signal(0);
1329
1330         if(hax_enabled() && hax_ug_platform()) {
1331             cpu->exit_request = 1;
1332         } else {
1333             abort();
1334         }
1335         if (ResumeThread(cpu->hThread) == (DWORD)-1) {
1336             fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1337                     GetLastError());
1338             exit(1);
1339         }
1340     }
1341 #endif
1342     if (!qemu_cpu_is_self(cpu)) {
1343         if(hax_enabled() && hax_ug_platform()) {
1344             cpu->exit_request = 1;
1345         }
1346     }
1347 # endif
1348 #endif
1349 }
1350
1351 static void qemu_cpu_kick_no_halt(void)
1352 {
1353     CPUState *cpu;
1354     /* Ensure whatever caused the exit has reached the CPU threads before
1355      * writing exit_request.
1356      */
1357     atomic_mb_set(&exit_request, 1);
1358     cpu = atomic_mb_read(&tcg_current_cpu);
1359     if (cpu) {
1360         cpu_exit(cpu);
1361     }
1362 }
1363
1364 void qemu_cpu_kick(CPUState *cpu)
1365 {
1366     qemu_cond_broadcast(cpu->halt_cond);
1367     if (tcg_enabled()) {
1368         qemu_cpu_kick_no_halt();
1369     } else {
1370         qemu_cpu_kick_thread(cpu);
1371     }
1372 }
1373
1374 void qemu_cpu_kick_self(void)
1375 {
1376     assert(current_cpu);
1377     qemu_cpu_kick_thread(current_cpu);
1378 }
1379
1380 bool qemu_cpu_is_self(CPUState *cpu)
1381 {
1382     return qemu_thread_is_self(cpu->thread);
1383 }
1384
1385 bool qemu_in_vcpu_thread(void)
1386 {
1387     return current_cpu && qemu_cpu_is_self(current_cpu);
1388 }
1389
1390 static __thread bool iothread_locked = false;
1391
1392 bool qemu_mutex_iothread_locked(void)
1393 {
1394     return iothread_locked;
1395 }
1396
1397 void qemu_mutex_lock_iothread(void)
1398 {
1399     atomic_inc(&iothread_requesting_mutex);
1400     /* In the simple case there is no need to bump the VCPU thread out of
1401      * TCG code execution.
1402      */
1403     if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1404         !first_cpu || !first_cpu->created) {
1405         qemu_mutex_lock(&qemu_global_mutex);
1406         atomic_dec(&iothread_requesting_mutex);
1407     } else {
1408         if (qemu_mutex_trylock(&qemu_global_mutex)) {
1409             qemu_cpu_kick_no_halt();
1410             qemu_mutex_lock(&qemu_global_mutex);
1411         }
1412         atomic_dec(&iothread_requesting_mutex);
1413         qemu_cond_broadcast(&qemu_io_proceeded_cond);
1414     }
1415     iothread_locked = true;
1416 }
1417
1418 void qemu_mutex_unlock_iothread(void)
1419 {
1420     iothread_locked = false;
1421     qemu_mutex_unlock(&qemu_global_mutex);
1422 }
1423
1424 static bool all_vcpus_paused(void)
1425 {
1426     CPUState *cpu;
1427
1428     CPU_FOREACH(cpu) {
1429         if (!cpu->stopped) {
1430             return false;
1431         }
1432     }
1433
1434     return true;
1435 }
1436
1437 void pause_all_vcpus(void)
1438 {
1439     CPUState *cpu;
1440
1441     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1442     CPU_FOREACH(cpu) {
1443         cpu->stop = true;
1444         qemu_cpu_kick(cpu);
1445     }
1446
1447     if (qemu_in_vcpu_thread()) {
1448         cpu_stop_current();
1449         if (!kvm_enabled()) {
1450             CPU_FOREACH(cpu) {
1451                 cpu->stop = false;
1452                 cpu->stopped = true;
1453             }
1454             return;
1455         }
1456     }
1457
1458     while (!all_vcpus_paused()) {
1459         qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1460         CPU_FOREACH(cpu) {
1461             qemu_cpu_kick(cpu);
1462         }
1463     }
1464 }
1465
1466 void cpu_resume(CPUState *cpu)
1467 {
1468     cpu->stop = false;
1469     cpu->stopped = false;
1470     qemu_cpu_kick(cpu);
1471 }
1472
1473 void resume_all_vcpus(void)
1474 {
1475     CPUState *cpu;
1476
1477     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1478     CPU_FOREACH(cpu) {
1479         cpu_resume(cpu);
1480     }
1481 }
1482
1483 void cpu_remove(CPUState *cpu)
1484 {
1485     cpu->stop = true;
1486     cpu->unplug = true;
1487     qemu_cpu_kick(cpu);
1488 }
1489
1490 void cpu_remove_sync(CPUState *cpu)
1491 {
1492     cpu_remove(cpu);
1493     while (cpu->created) {
1494         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1495     }
1496 }
1497
1498 /* For temporary buffers for forming a name */
1499 #define VCPU_THREAD_NAME_SIZE 16
1500
1501 static void qemu_tcg_init_vcpu(CPUState *cpu)
1502 {
1503 #ifdef CONFIG_HAX
1504     if (hax_enabled()) {
1505         hax_init_vcpu(cpu);
1506     }
1507 #endif
1508     char thread_name[VCPU_THREAD_NAME_SIZE];
1509     static QemuCond *tcg_halt_cond;
1510     static QemuThread *tcg_cpu_thread;
1511
1512     /* share a single thread for all cpus with TCG */
1513     if (!tcg_cpu_thread) {
1514         cpu->thread = g_malloc0(sizeof(QemuThread));
1515         cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1516         qemu_cond_init(cpu->halt_cond);
1517         tcg_halt_cond = cpu->halt_cond;
1518         snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1519                  cpu->cpu_index);
1520         qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1521                            cpu, QEMU_THREAD_JOINABLE);
1522 #ifdef _WIN32
1523         cpu->hThread = qemu_thread_get_handle(cpu->thread);
1524 #endif
1525         while (!cpu->created) {
1526             qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1527         }
1528         tcg_cpu_thread = cpu->thread;
1529     } else {
1530         cpu->thread = tcg_cpu_thread;
1531         cpu->halt_cond = tcg_halt_cond;
1532     }
1533 }
1534
1535 #ifdef CONFIG_HAX
1536 static void qemu_hax_start_vcpu(CPUState *cpu)
1537 {
1538     char thread_name[VCPU_THREAD_NAME_SIZE];
1539
1540     cpu->thread = g_malloc0(sizeof(QemuThread));
1541     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1542     qemu_cond_init(cpu->halt_cond);
1543
1544     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1545              cpu->cpu_index);
1546
1547     qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1548                        cpu, QEMU_THREAD_JOINABLE);
1549 #ifdef _WIN32
1550      cpu->hThread = qemu_thread_get_handle(cpu->thread);
1551 #endif
1552     while (!cpu->created) {
1553         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1554     }
1555 }
1556 #endif
1557
1558 static void qemu_kvm_start_vcpu(CPUState *cpu)
1559 {
1560     char thread_name[VCPU_THREAD_NAME_SIZE];
1561
1562     cpu->thread = g_malloc0(sizeof(QemuThread));
1563     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1564     qemu_cond_init(cpu->halt_cond);
1565     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1566              cpu->cpu_index);
1567     qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1568                        cpu, QEMU_THREAD_JOINABLE);
1569     while (!cpu->created) {
1570         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1571     }
1572 }
1573
1574 static void qemu_dummy_start_vcpu(CPUState *cpu)
1575 {
1576     char thread_name[VCPU_THREAD_NAME_SIZE];
1577
1578     cpu->thread = g_malloc0(sizeof(QemuThread));
1579     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1580     qemu_cond_init(cpu->halt_cond);
1581     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1582              cpu->cpu_index);
1583     qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1584                        QEMU_THREAD_JOINABLE);
1585     while (!cpu->created) {
1586         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1587     }
1588 }
1589
1590 void qemu_init_vcpu(CPUState *cpu)
1591 {
1592     cpu->nr_cores = smp_cores;
1593     cpu->nr_threads = smp_threads;
1594     cpu->stopped = true;
1595
1596     if (!cpu->as) {
1597         /* If the target cpu hasn't set up any address spaces itself,
1598          * give it the default one.
1599          */
1600         AddressSpace *as = address_space_init_shareable(cpu->memory,
1601                                                         "cpu-memory");
1602         cpu->num_ases = 1;
1603         cpu_address_space_init(cpu, as, 0);
1604     }
1605
1606     if (kvm_enabled()) {
1607         qemu_kvm_start_vcpu(cpu);
1608 #ifdef CONFIG_HAX
1609     } else if (hax_enabled() && hax_ug_platform()) {
1610         qemu_hax_start_vcpu(cpu);
1611 #endif
1612     } else if (tcg_enabled()) {
1613         qemu_tcg_init_vcpu(cpu);
1614     } else {
1615         qemu_dummy_start_vcpu(cpu);
1616     }
1617 }
1618
1619 void cpu_stop_current(void)
1620 {
1621     if (current_cpu) {
1622         current_cpu->stop = false;
1623         current_cpu->stopped = true;
1624         cpu_exit(current_cpu);
1625         qemu_cond_broadcast(&qemu_pause_cond);
1626     }
1627 }
1628
1629 int vm_stop(RunState state)
1630 {
1631     if (qemu_in_vcpu_thread()) {
1632         qemu_system_vmstop_request_prepare();
1633         qemu_system_vmstop_request(state);
1634         /*
1635          * FIXME: should not return to device code in case
1636          * vm_stop() has been requested.
1637          */
1638         cpu_stop_current();
1639         return 0;
1640     }
1641
1642     return do_vm_stop(state);
1643 }
1644
1645 /* does a state transition even if the VM is already stopped,
1646    current state is forgotten forever */
1647 int vm_stop_force_state(RunState state)
1648 {
1649     if (runstate_is_running()) {
1650         return vm_stop(state);
1651     } else {
1652         runstate_set(state);
1653
1654         bdrv_drain_all();
1655         /* Make sure to return an error if the flush in a previous vm_stop()
1656          * failed. */
1657         return bdrv_flush_all();
1658     }
1659 }
1660
1661 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1662 {
1663     /* XXX: implement xxx_cpu_list for targets that still miss it */
1664 #if defined(cpu_list)
1665     cpu_list(f, cpu_fprintf);
1666 #endif
1667 }
1668
1669 CpuInfoList *qmp_query_cpus(Error **errp)
1670 {
1671     CpuInfoList *head = NULL, *cur_item = NULL;
1672     CPUState *cpu;
1673
1674     CPU_FOREACH(cpu) {
1675         CpuInfoList *info;
1676 #if defined(TARGET_I386)
1677         X86CPU *x86_cpu = X86_CPU(cpu);
1678         CPUX86State *env = &x86_cpu->env;
1679 #elif defined(TARGET_PPC)
1680         PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1681         CPUPPCState *env = &ppc_cpu->env;
1682 #elif defined(TARGET_SPARC)
1683         SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1684         CPUSPARCState *env = &sparc_cpu->env;
1685 #elif defined(TARGET_MIPS)
1686         MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1687         CPUMIPSState *env = &mips_cpu->env;
1688 #elif defined(TARGET_TRICORE)
1689         TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1690         CPUTriCoreState *env = &tricore_cpu->env;
1691 #endif
1692
1693         cpu_synchronize_state(cpu);
1694
1695         info = g_malloc0(sizeof(*info));
1696         info->value = g_malloc0(sizeof(*info->value));
1697         info->value->CPU = cpu->cpu_index;
1698         info->value->current = (cpu == first_cpu);
1699         info->value->halted = cpu->halted;
1700         info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1701         info->value->thread_id = cpu->thread_id;
1702 #if defined(TARGET_I386)
1703         info->value->arch = CPU_INFO_ARCH_X86;
1704         info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1705 #elif defined(TARGET_PPC)
1706         info->value->arch = CPU_INFO_ARCH_PPC;
1707         info->value->u.ppc.nip = env->nip;
1708 #elif defined(TARGET_SPARC)
1709         info->value->arch = CPU_INFO_ARCH_SPARC;
1710         info->value->u.q_sparc.pc = env->pc;
1711         info->value->u.q_sparc.npc = env->npc;
1712 #elif defined(TARGET_MIPS)
1713         info->value->arch = CPU_INFO_ARCH_MIPS;
1714         info->value->u.q_mips.PC = env->active_tc.PC;
1715 #elif defined(TARGET_TRICORE)
1716         info->value->arch = CPU_INFO_ARCH_TRICORE;
1717         info->value->u.tricore.PC = env->PC;
1718 #else
1719         info->value->arch = CPU_INFO_ARCH_OTHER;
1720 #endif
1721
1722         /* XXX: waiting for the qapi to support GSList */
1723         if (!cur_item) {
1724             head = cur_item = info;
1725         } else {
1726             cur_item->next = info;
1727             cur_item = info;
1728         }
1729     }
1730
1731     return head;
1732 }
1733
1734 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1735                  bool has_cpu, int64_t cpu_index, Error **errp)
1736 {
1737     FILE *f;
1738     uint32_t l;
1739     CPUState *cpu;
1740     uint8_t buf[1024];
1741     int64_t orig_addr = addr, orig_size = size;
1742
1743     if (!has_cpu) {
1744         cpu_index = 0;
1745     }
1746
1747     cpu = qemu_get_cpu(cpu_index);
1748     if (cpu == NULL) {
1749         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1750                    "a CPU number");
1751         return;
1752     }
1753
1754     f = fopen(filename, "wb");
1755     if (!f) {
1756         error_setg_file_open(errp, errno, filename);
1757         return;
1758     }
1759
1760     while (size != 0) {
1761         l = sizeof(buf);
1762         if (l > size)
1763             l = size;
1764         if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1765             error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1766                              " specified", orig_addr, orig_size);
1767             goto exit;
1768         }
1769         if (fwrite(buf, 1, l, f) != l) {
1770             error_setg(errp, QERR_IO_ERROR);
1771             goto exit;
1772         }
1773         addr += l;
1774         size -= l;
1775     }
1776
1777 exit:
1778     fclose(f);
1779 }
1780
1781 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1782                   Error **errp)
1783 {
1784     FILE *f;
1785     uint32_t l;
1786     uint8_t buf[1024];
1787
1788     f = fopen(filename, "wb");
1789     if (!f) {
1790         error_setg_file_open(errp, errno, filename);
1791         return;
1792     }
1793
1794     while (size != 0) {
1795         l = sizeof(buf);
1796         if (l > size)
1797             l = size;
1798         cpu_physical_memory_read(addr, buf, l);
1799         if (fwrite(buf, 1, l, f) != l) {
1800             error_setg(errp, QERR_IO_ERROR);
1801             goto exit;
1802         }
1803         addr += l;
1804         size -= l;
1805     }
1806
1807 exit:
1808     fclose(f);
1809 }
1810
1811 void qmp_inject_nmi(Error **errp)
1812 {
1813     nmi_monitor_handle(monitor_get_cpu_index(), errp);
1814 }
1815
1816 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1817 {
1818     if (!use_icount) {
1819         return;
1820     }
1821
1822     cpu_fprintf(f, "Host - Guest clock  %"PRIi64" ms\n",
1823                 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1824     if (icount_align_option) {
1825         cpu_fprintf(f, "Max guest delay     %"PRIi64" ms\n", -max_delay/SCALE_MS);
1826         cpu_fprintf(f, "Max guest advance   %"PRIi64" ms\n", max_advance/SCALE_MS);
1827     } else {
1828         cpu_fprintf(f, "Max guest delay     NA\n");
1829         cpu_fprintf(f, "Max guest advance   NA\n");
1830     }
1831 }