9pfs: add cleanup operation in FileOperations
[sdk/emulator/qemu.git] / cpus.c
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "monitor/monitor.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qemu/error-report.h"
32 #include "sysemu/sysemu.h"
33 #include "sysemu/block-backend.h"
34 #include "exec/gdbstub.h"
35 #include "sysemu/dma.h"
36 #include "sysemu/kvm.h"
37 #include "qmp-commands.h"
38 #include "exec/exec-all.h"
39
40 #include "qemu/thread.h"
41 #include "sysemu/cpus.h"
42 #include "sysemu/qtest.h"
43 #include "qemu/main-loop.h"
44 #include "qemu/bitmap.h"
45 #include "qemu/seqlock.h"
46 #include "qapi-event.h"
47 #include "hw/nmi.h"
48 #include "sysemu/replay.h"
49
50 #ifndef _WIN32
51 #include "qemu/compatfd.h"
52 #endif
53
54 #ifdef CONFIG_LINUX
55
56 #include <sys/prctl.h>
57
58 #ifndef PR_MCE_KILL
59 #define PR_MCE_KILL 33
60 #endif
61
62 #ifndef PR_MCE_KILL_SET
63 #define PR_MCE_KILL_SET 1
64 #endif
65
66 #ifndef PR_MCE_KILL_EARLY
67 #define PR_MCE_KILL_EARLY 1
68 #endif
69
70 #endif /* CONFIG_LINUX */
71
72 int64_t max_delay;
73 int64_t max_advance;
74
75 /* vcpu throttling controls */
76 static QEMUTimer *throttle_timer;
77 static unsigned int throttle_percentage;
78
79 #define CPU_THROTTLE_PCT_MIN 1
80 #define CPU_THROTTLE_PCT_MAX 99
81 #define CPU_THROTTLE_TIMESLICE_NS 10000000
82
83 bool cpu_is_stopped(CPUState *cpu)
84 {
85     return cpu->stopped || !runstate_is_running();
86 }
87
88 static bool cpu_thread_is_idle(CPUState *cpu)
89 {
90     if (cpu->stop || cpu->queued_work_first) {
91         return false;
92     }
93     if (cpu_is_stopped(cpu)) {
94         return true;
95     }
96     if (!cpu->halted || cpu_has_work(cpu) ||
97         kvm_halt_in_kernel()) {
98         return false;
99     }
100     return true;
101 }
102
103 static bool all_cpu_threads_idle(void)
104 {
105     CPUState *cpu;
106
107     CPU_FOREACH(cpu) {
108         if (!cpu_thread_is_idle(cpu)) {
109             return false;
110         }
111     }
112     return true;
113 }
114
115 /***********************************************************/
116 /* guest cycle counter */
117
118 /* Protected by TimersState seqlock */
119
120 static bool icount_sleep = true;
121 static int64_t vm_clock_warp_start = -1;
122 /* Conversion factor from emulated instructions to virtual clock ticks.  */
123 static int icount_time_shift;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
125 #define MAX_ICOUNT_SHIFT 10
126
127 static QEMUTimer *icount_rt_timer;
128 static QEMUTimer *icount_vm_timer;
129 static QEMUTimer *icount_warp_timer;
130
131 typedef struct TimersState {
132     /* Protected by BQL.  */
133     int64_t cpu_ticks_prev;
134     int64_t cpu_ticks_offset;
135
136     /* cpu_clock_offset can be read out of BQL, so protect it with
137      * this lock.
138      */
139     QemuSeqLock vm_clock_seqlock;
140     int64_t cpu_clock_offset;
141     int32_t cpu_ticks_enabled;
142     int64_t dummy;
143
144     /* Compensate for varying guest execution speed.  */
145     int64_t qemu_icount_bias;
146     /* Only written by TCG thread */
147     int64_t qemu_icount;
148 } TimersState;
149
150 static TimersState timers_state;
151
152 int64_t cpu_get_icount_raw(void)
153 {
154     int64_t icount;
155     CPUState *cpu = current_cpu;
156
157     icount = timers_state.qemu_icount;
158     if (cpu) {
159         if (!cpu->can_do_io) {
160             fprintf(stderr, "Bad icount read\n");
161             exit(1);
162         }
163         icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
164     }
165     return icount;
166 }
167
168 /* Return the virtual CPU time, based on the instruction counter.  */
169 static int64_t cpu_get_icount_locked(void)
170 {
171     int64_t icount = cpu_get_icount_raw();
172     return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
173 }
174
175 int64_t cpu_get_icount(void)
176 {
177     int64_t icount;
178     unsigned start;
179
180     do {
181         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
182         icount = cpu_get_icount_locked();
183     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
184
185     return icount;
186 }
187
188 int64_t cpu_icount_to_ns(int64_t icount)
189 {
190     return icount << icount_time_shift;
191 }
192
193 /* return the time elapsed in VM between vm_start and vm_stop.  Unless
194  * icount is active, cpu_get_ticks() uses units of the host CPU cycle
195  * counter.
196  *
197  * Caller must hold the BQL
198  */
199 int64_t cpu_get_ticks(void)
200 {
201     int64_t ticks;
202
203     if (use_icount) {
204         return cpu_get_icount();
205     }
206
207     ticks = timers_state.cpu_ticks_offset;
208     if (timers_state.cpu_ticks_enabled) {
209         ticks += cpu_get_host_ticks();
210     }
211
212     if (timers_state.cpu_ticks_prev > ticks) {
213         /* Note: non increasing ticks may happen if the host uses
214            software suspend */
215         timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
216         ticks = timers_state.cpu_ticks_prev;
217     }
218
219     timers_state.cpu_ticks_prev = ticks;
220     return ticks;
221 }
222
223 static int64_t cpu_get_clock_locked(void)
224 {
225     int64_t time;
226
227     time = timers_state.cpu_clock_offset;
228     if (timers_state.cpu_ticks_enabled) {
229         time += get_clock();
230     }
231
232     return time;
233 }
234
235 /* Return the monotonic time elapsed in VM, i.e.,
236  * the time between vm_start and vm_stop
237  */
238 int64_t cpu_get_clock(void)
239 {
240     int64_t ti;
241     unsigned start;
242
243     do {
244         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
245         ti = cpu_get_clock_locked();
246     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
247
248     return ti;
249 }
250
251 /* enable cpu_get_ticks()
252  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
253  */
254 void cpu_enable_ticks(void)
255 {
256     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
257     seqlock_write_begin(&timers_state.vm_clock_seqlock);
258     if (!timers_state.cpu_ticks_enabled) {
259         timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
260         timers_state.cpu_clock_offset -= get_clock();
261         timers_state.cpu_ticks_enabled = 1;
262     }
263     seqlock_write_end(&timers_state.vm_clock_seqlock);
264 }
265
266 /* disable cpu_get_ticks() : the clock is stopped. You must not call
267  * cpu_get_ticks() after that.
268  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
269  */
270 void cpu_disable_ticks(void)
271 {
272     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
273     seqlock_write_begin(&timers_state.vm_clock_seqlock);
274     if (timers_state.cpu_ticks_enabled) {
275         timers_state.cpu_ticks_offset += cpu_get_host_ticks();
276         timers_state.cpu_clock_offset = cpu_get_clock_locked();
277         timers_state.cpu_ticks_enabled = 0;
278     }
279     seqlock_write_end(&timers_state.vm_clock_seqlock);
280 }
281
282 /* Correlation between real and virtual time is always going to be
283    fairly approximate, so ignore small variation.
284    When the guest is idle real and virtual time will be aligned in
285    the IO wait loop.  */
286 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
287
288 static void icount_adjust(void)
289 {
290     int64_t cur_time;
291     int64_t cur_icount;
292     int64_t delta;
293
294     /* Protected by TimersState mutex.  */
295     static int64_t last_delta;
296
297     /* If the VM is not running, then do nothing.  */
298     if (!runstate_is_running()) {
299         return;
300     }
301
302     seqlock_write_begin(&timers_state.vm_clock_seqlock);
303     cur_time = cpu_get_clock_locked();
304     cur_icount = cpu_get_icount_locked();
305
306     delta = cur_icount - cur_time;
307     /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
308     if (delta > 0
309         && last_delta + ICOUNT_WOBBLE < delta * 2
310         && icount_time_shift > 0) {
311         /* The guest is getting too far ahead.  Slow time down.  */
312         icount_time_shift--;
313     }
314     if (delta < 0
315         && last_delta - ICOUNT_WOBBLE > delta * 2
316         && icount_time_shift < MAX_ICOUNT_SHIFT) {
317         /* The guest is getting too far behind.  Speed time up.  */
318         icount_time_shift++;
319     }
320     last_delta = delta;
321     timers_state.qemu_icount_bias = cur_icount
322                               - (timers_state.qemu_icount << icount_time_shift);
323     seqlock_write_end(&timers_state.vm_clock_seqlock);
324 }
325
326 static void icount_adjust_rt(void *opaque)
327 {
328     timer_mod(icount_rt_timer,
329               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
330     icount_adjust();
331 }
332
333 static void icount_adjust_vm(void *opaque)
334 {
335     timer_mod(icount_vm_timer,
336                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
337                    NANOSECONDS_PER_SECOND / 10);
338     icount_adjust();
339 }
340
341 static int64_t qemu_icount_round(int64_t count)
342 {
343     return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
344 }
345
346 static void icount_warp_rt(void)
347 {
348     unsigned seq;
349     int64_t warp_start;
350
351     /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
352      * changes from -1 to another value, so the race here is okay.
353      */
354     do {
355         seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
356         warp_start = vm_clock_warp_start;
357     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
358
359     if (warp_start == -1) {
360         return;
361     }
362
363     seqlock_write_begin(&timers_state.vm_clock_seqlock);
364     if (runstate_is_running()) {
365         int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
366                                      cpu_get_clock_locked());
367         int64_t warp_delta;
368
369         warp_delta = clock - vm_clock_warp_start;
370         if (use_icount == 2) {
371             /*
372              * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
373              * far ahead of real time.
374              */
375             int64_t cur_icount = cpu_get_icount_locked();
376             int64_t delta = clock - cur_icount;
377             warp_delta = MIN(warp_delta, delta);
378         }
379         timers_state.qemu_icount_bias += warp_delta;
380     }
381     vm_clock_warp_start = -1;
382     seqlock_write_end(&timers_state.vm_clock_seqlock);
383
384     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
385         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
386     }
387 }
388
389 static void icount_timer_cb(void *opaque)
390 {
391     /* No need for a checkpoint because the timer already synchronizes
392      * with CHECKPOINT_CLOCK_VIRTUAL_RT.
393      */
394     icount_warp_rt();
395 }
396
397 void qtest_clock_warp(int64_t dest)
398 {
399     int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
400     AioContext *aio_context;
401     assert(qtest_enabled());
402     aio_context = qemu_get_aio_context();
403     while (clock < dest) {
404         int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
405         int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
406
407         seqlock_write_begin(&timers_state.vm_clock_seqlock);
408         timers_state.qemu_icount_bias += warp;
409         seqlock_write_end(&timers_state.vm_clock_seqlock);
410
411         qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
412         timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
413         clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
414     }
415     qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
416 }
417
418 void qemu_start_warp_timer(void)
419 {
420     int64_t clock;
421     int64_t deadline;
422
423     if (!use_icount) {
424         return;
425     }
426
427     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
428      * do not fire, so computing the deadline does not make sense.
429      */
430     if (!runstate_is_running()) {
431         return;
432     }
433
434     /* warp clock deterministically in record/replay mode */
435     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
436         return;
437     }
438
439     if (!all_cpu_threads_idle()) {
440         return;
441     }
442
443     if (qtest_enabled()) {
444         /* When testing, qtest commands advance icount.  */
445         return;
446     }
447
448     /* We want to use the earliest deadline from ALL vm_clocks */
449     clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
450     deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
451     if (deadline < 0) {
452         static bool notified;
453         if (!icount_sleep && !notified) {
454             error_report("WARNING: icount sleep disabled and no active timers");
455             notified = true;
456         }
457         return;
458     }
459
460     if (deadline > 0) {
461         /*
462          * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
463          * sleep.  Otherwise, the CPU might be waiting for a future timer
464          * interrupt to wake it up, but the interrupt never comes because
465          * the vCPU isn't running any insns and thus doesn't advance the
466          * QEMU_CLOCK_VIRTUAL.
467          */
468         if (!icount_sleep) {
469             /*
470              * We never let VCPUs sleep in no sleep icount mode.
471              * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
472              * to the next QEMU_CLOCK_VIRTUAL event and notify it.
473              * It is useful when we want a deterministic execution time,
474              * isolated from host latencies.
475              */
476             seqlock_write_begin(&timers_state.vm_clock_seqlock);
477             timers_state.qemu_icount_bias += deadline;
478             seqlock_write_end(&timers_state.vm_clock_seqlock);
479             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
480         } else {
481             /*
482              * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
483              * "real" time, (related to the time left until the next event) has
484              * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
485              * This avoids that the warps are visible externally; for example,
486              * you will not be sending network packets continuously instead of
487              * every 100ms.
488              */
489             seqlock_write_begin(&timers_state.vm_clock_seqlock);
490             if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
491                 vm_clock_warp_start = clock;
492             }
493             seqlock_write_end(&timers_state.vm_clock_seqlock);
494             timer_mod_anticipate(icount_warp_timer, clock + deadline);
495         }
496     } else if (deadline == 0) {
497         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
498     }
499 }
500
501 static void qemu_account_warp_timer(void)
502 {
503     if (!use_icount || !icount_sleep) {
504         return;
505     }
506
507     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
508      * do not fire, so computing the deadline does not make sense.
509      */
510     if (!runstate_is_running()) {
511         return;
512     }
513
514     /* warp clock deterministically in record/replay mode */
515     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
516         return;
517     }
518
519     timer_del(icount_warp_timer);
520     icount_warp_rt();
521 }
522
523 static bool icount_state_needed(void *opaque)
524 {
525     return use_icount;
526 }
527
528 /*
529  * This is a subsection for icount migration.
530  */
531 static const VMStateDescription icount_vmstate_timers = {
532     .name = "timer/icount",
533     .version_id = 1,
534     .minimum_version_id = 1,
535     .needed = icount_state_needed,
536     .fields = (VMStateField[]) {
537         VMSTATE_INT64(qemu_icount_bias, TimersState),
538         VMSTATE_INT64(qemu_icount, TimersState),
539         VMSTATE_END_OF_LIST()
540     }
541 };
542
543 static const VMStateDescription vmstate_timers = {
544     .name = "timer",
545     .version_id = 2,
546     .minimum_version_id = 1,
547     .fields = (VMStateField[]) {
548         VMSTATE_INT64(cpu_ticks_offset, TimersState),
549         VMSTATE_INT64(dummy, TimersState),
550         VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
551         VMSTATE_END_OF_LIST()
552     },
553     .subsections = (const VMStateDescription*[]) {
554         &icount_vmstate_timers,
555         NULL
556     }
557 };
558
559 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
560 {
561     double pct;
562     double throttle_ratio;
563     long sleeptime_ns;
564
565     if (!cpu_throttle_get_percentage()) {
566         return;
567     }
568
569     pct = (double)cpu_throttle_get_percentage()/100;
570     throttle_ratio = pct / (1 - pct);
571     sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
572
573     qemu_mutex_unlock_iothread();
574     atomic_set(&cpu->throttle_thread_scheduled, 0);
575     g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
576     qemu_mutex_lock_iothread();
577 }
578
579 static void cpu_throttle_timer_tick(void *opaque)
580 {
581     CPUState *cpu;
582     double pct;
583
584     /* Stop the timer if needed */
585     if (!cpu_throttle_get_percentage()) {
586         return;
587     }
588     CPU_FOREACH(cpu) {
589         if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
590             async_run_on_cpu(cpu, cpu_throttle_thread,
591                              RUN_ON_CPU_NULL);
592         }
593     }
594
595     pct = (double)cpu_throttle_get_percentage()/100;
596     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
597                                    CPU_THROTTLE_TIMESLICE_NS / (1-pct));
598 }
599
600 void cpu_throttle_set(int new_throttle_pct)
601 {
602     /* Ensure throttle percentage is within valid range */
603     new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
604     new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
605
606     atomic_set(&throttle_percentage, new_throttle_pct);
607
608     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
609                                        CPU_THROTTLE_TIMESLICE_NS);
610 }
611
612 void cpu_throttle_stop(void)
613 {
614     atomic_set(&throttle_percentage, 0);
615 }
616
617 bool cpu_throttle_active(void)
618 {
619     return (cpu_throttle_get_percentage() != 0);
620 }
621
622 int cpu_throttle_get_percentage(void)
623 {
624     return atomic_read(&throttle_percentage);
625 }
626
627 void cpu_ticks_init(void)
628 {
629     seqlock_init(&timers_state.vm_clock_seqlock);
630     vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
631     throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
632                                            cpu_throttle_timer_tick, NULL);
633 }
634
635 void configure_icount(QemuOpts *opts, Error **errp)
636 {
637     const char *option;
638     char *rem_str = NULL;
639
640     option = qemu_opt_get(opts, "shift");
641     if (!option) {
642         if (qemu_opt_get(opts, "align") != NULL) {
643             error_setg(errp, "Please specify shift option when using align");
644         }
645         return;
646     }
647
648     icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
649     if (icount_sleep) {
650         icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
651                                          icount_timer_cb, NULL);
652     }
653
654     icount_align_option = qemu_opt_get_bool(opts, "align", false);
655
656     if (icount_align_option && !icount_sleep) {
657         error_setg(errp, "align=on and sleep=off are incompatible");
658     }
659     if (strcmp(option, "auto") != 0) {
660         errno = 0;
661         icount_time_shift = strtol(option, &rem_str, 0);
662         if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
663             error_setg(errp, "icount: Invalid shift value");
664         }
665         use_icount = 1;
666         return;
667     } else if (icount_align_option) {
668         error_setg(errp, "shift=auto and align=on are incompatible");
669     } else if (!icount_sleep) {
670         error_setg(errp, "shift=auto and sleep=off are incompatible");
671     }
672
673     use_icount = 2;
674
675     /* 125MIPS seems a reasonable initial guess at the guest speed.
676        It will be corrected fairly quickly anyway.  */
677     icount_time_shift = 3;
678
679     /* Have both realtime and virtual time triggers for speed adjustment.
680        The realtime trigger catches emulated time passing too slowly,
681        the virtual time trigger catches emulated time passing too fast.
682        Realtime triggers occur even when idle, so use them less frequently
683        than VM triggers.  */
684     icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
685                                    icount_adjust_rt, NULL);
686     timer_mod(icount_rt_timer,
687                    qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
688     icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
689                                         icount_adjust_vm, NULL);
690     timer_mod(icount_vm_timer,
691                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
692                    NANOSECONDS_PER_SECOND / 10);
693 }
694
695 /***********************************************************/
696 void hw_error(const char *fmt, ...)
697 {
698     va_list ap;
699     CPUState *cpu;
700
701     va_start(ap, fmt);
702     fprintf(stderr, "qemu: hardware error: ");
703     vfprintf(stderr, fmt, ap);
704     fprintf(stderr, "\n");
705     CPU_FOREACH(cpu) {
706         fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
707         cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
708     }
709     va_end(ap);
710     abort();
711 }
712
713 void cpu_synchronize_all_states(void)
714 {
715     CPUState *cpu;
716
717     CPU_FOREACH(cpu) {
718         cpu_synchronize_state(cpu);
719     }
720 }
721
722 void cpu_synchronize_all_post_reset(void)
723 {
724     CPUState *cpu;
725
726     CPU_FOREACH(cpu) {
727         cpu_synchronize_post_reset(cpu);
728     }
729 }
730
731 void cpu_synchronize_all_post_init(void)
732 {
733     CPUState *cpu;
734
735     CPU_FOREACH(cpu) {
736         cpu_synchronize_post_init(cpu);
737     }
738 }
739
740 static int do_vm_stop(RunState state)
741 {
742     int ret = 0;
743
744     if (runstate_is_running()) {
745         cpu_disable_ticks();
746         pause_all_vcpus();
747         runstate_set(state);
748         vm_state_notify(0, state);
749         qapi_event_send_stop(&error_abort);
750     }
751
752     bdrv_drain_all();
753     replay_disable_events();
754     ret = bdrv_flush_all();
755
756     return ret;
757 }
758
759 static bool cpu_can_run(CPUState *cpu)
760 {
761     if (cpu->stop) {
762         return false;
763     }
764     if (cpu_is_stopped(cpu)) {
765         return false;
766     }
767     return true;
768 }
769
770 static void cpu_handle_guest_debug(CPUState *cpu)
771 {
772     gdb_set_stop_cpu(cpu);
773     qemu_system_debug_request();
774     cpu->stopped = true;
775 }
776
777 #ifdef CONFIG_LINUX
778 static void sigbus_reraise(void)
779 {
780     sigset_t set;
781     struct sigaction action;
782
783     memset(&action, 0, sizeof(action));
784     action.sa_handler = SIG_DFL;
785     if (!sigaction(SIGBUS, &action, NULL)) {
786         raise(SIGBUS);
787         sigemptyset(&set);
788         sigaddset(&set, SIGBUS);
789         pthread_sigmask(SIG_UNBLOCK, &set, NULL);
790     }
791     perror("Failed to re-raise SIGBUS!\n");
792     abort();
793 }
794
795 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
796                            void *ctx)
797 {
798     if (kvm_on_sigbus(siginfo->ssi_code,
799                       (void *)(intptr_t)siginfo->ssi_addr)) {
800         sigbus_reraise();
801     }
802 }
803
804 static void qemu_init_sigbus(void)
805 {
806     struct sigaction action;
807
808     memset(&action, 0, sizeof(action));
809     action.sa_flags = SA_SIGINFO;
810     action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
811     sigaction(SIGBUS, &action, NULL);
812
813     prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
814 }
815
816 static void qemu_kvm_eat_signals(CPUState *cpu)
817 {
818     struct timespec ts = { 0, 0 };
819     siginfo_t siginfo;
820     sigset_t waitset;
821     sigset_t chkset;
822     int r;
823
824     sigemptyset(&waitset);
825     sigaddset(&waitset, SIG_IPI);
826     sigaddset(&waitset, SIGBUS);
827
828     do {
829         r = sigtimedwait(&waitset, &siginfo, &ts);
830         if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
831             perror("sigtimedwait");
832             exit(1);
833         }
834
835         switch (r) {
836         case SIGBUS:
837             if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
838                 sigbus_reraise();
839             }
840             break;
841         default:
842             break;
843         }
844
845         r = sigpending(&chkset);
846         if (r == -1) {
847             perror("sigpending");
848             exit(1);
849         }
850     } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
851 }
852
853 #else /* !CONFIG_LINUX */
854
855 static void qemu_init_sigbus(void)
856 {
857 }
858
859 static void qemu_kvm_eat_signals(CPUState *cpu)
860 {
861 }
862 #endif /* !CONFIG_LINUX */
863
864 #ifndef _WIN32
865 static void dummy_signal(int sig)
866 {
867 }
868
869 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
870 {
871     int r;
872     sigset_t set;
873     struct sigaction sigact;
874
875     memset(&sigact, 0, sizeof(sigact));
876     sigact.sa_handler = dummy_signal;
877     sigaction(SIG_IPI, &sigact, NULL);
878
879     pthread_sigmask(SIG_BLOCK, NULL, &set);
880     sigdelset(&set, SIG_IPI);
881     sigdelset(&set, SIGBUS);
882     r = kvm_set_signal_mask(cpu, &set);
883     if (r) {
884         fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
885         exit(1);
886     }
887 }
888
889 #else /* _WIN32 */
890 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
891 {
892     abort();
893 }
894 #endif /* _WIN32 */
895
896 static QemuMutex qemu_global_mutex;
897 static QemuCond qemu_io_proceeded_cond;
898 static unsigned iothread_requesting_mutex;
899
900 static QemuThread io_thread;
901
902 /* cpu creation */
903 static QemuCond qemu_cpu_cond;
904 /* system init */
905 static QemuCond qemu_pause_cond;
906
907 void qemu_init_cpu_loop(void)
908 {
909     qemu_init_sigbus();
910     qemu_cond_init(&qemu_cpu_cond);
911     qemu_cond_init(&qemu_pause_cond);
912     qemu_cond_init(&qemu_io_proceeded_cond);
913     qemu_mutex_init(&qemu_global_mutex);
914
915     qemu_thread_get_self(&io_thread);
916 }
917
918 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
919 {
920     do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
921 }
922
923 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
924 {
925     if (kvm_destroy_vcpu(cpu) < 0) {
926         error_report("kvm_destroy_vcpu failed");
927         exit(EXIT_FAILURE);
928     }
929 }
930
931 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
932 {
933 }
934
935 static void qemu_wait_io_event_common(CPUState *cpu)
936 {
937     if (cpu->stop) {
938         cpu->stop = false;
939         cpu->stopped = true;
940         qemu_cond_broadcast(&qemu_pause_cond);
941     }
942     process_queued_cpu_work(cpu);
943     cpu->thread_kicked = false;
944 }
945
946 static void qemu_tcg_wait_io_event(CPUState *cpu)
947 {
948     while (all_cpu_threads_idle()) {
949         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
950     }
951
952     while (iothread_requesting_mutex) {
953         qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
954     }
955
956     CPU_FOREACH(cpu) {
957         qemu_wait_io_event_common(cpu);
958     }
959 }
960
961 static void qemu_kvm_wait_io_event(CPUState *cpu)
962 {
963     while (cpu_thread_is_idle(cpu)) {
964         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
965     }
966
967     qemu_kvm_eat_signals(cpu);
968     qemu_wait_io_event_common(cpu);
969 }
970
971 static void *qemu_kvm_cpu_thread_fn(void *arg)
972 {
973     CPUState *cpu = arg;
974     int r;
975
976     rcu_register_thread();
977
978     qemu_mutex_lock_iothread();
979     qemu_thread_get_self(cpu->thread);
980     cpu->thread_id = qemu_get_thread_id();
981     cpu->can_do_io = 1;
982     current_cpu = cpu;
983
984     r = kvm_init_vcpu(cpu);
985     if (r < 0) {
986         fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
987         exit(1);
988     }
989
990     qemu_kvm_init_cpu_signals(cpu);
991
992     /* signal CPU creation */
993     cpu->created = true;
994     qemu_cond_signal(&qemu_cpu_cond);
995
996     do {
997         if (cpu_can_run(cpu)) {
998             r = kvm_cpu_exec(cpu);
999             if (r == EXCP_DEBUG) {
1000                 cpu_handle_guest_debug(cpu);
1001             }
1002         }
1003         qemu_kvm_wait_io_event(cpu);
1004     } while (!cpu->unplug || cpu_can_run(cpu));
1005
1006     qemu_kvm_destroy_vcpu(cpu);
1007     cpu->created = false;
1008     qemu_cond_signal(&qemu_cpu_cond);
1009     qemu_mutex_unlock_iothread();
1010     return NULL;
1011 }
1012
1013 static void *qemu_dummy_cpu_thread_fn(void *arg)
1014 {
1015 #ifdef _WIN32
1016     fprintf(stderr, "qtest is not supported under Windows\n");
1017     exit(1);
1018 #else
1019     CPUState *cpu = arg;
1020     sigset_t waitset;
1021     int r;
1022
1023     rcu_register_thread();
1024
1025     qemu_mutex_lock_iothread();
1026     qemu_thread_get_self(cpu->thread);
1027     cpu->thread_id = qemu_get_thread_id();
1028     cpu->can_do_io = 1;
1029
1030     sigemptyset(&waitset);
1031     sigaddset(&waitset, SIG_IPI);
1032
1033     /* signal CPU creation */
1034     cpu->created = true;
1035     qemu_cond_signal(&qemu_cpu_cond);
1036
1037     current_cpu = cpu;
1038     while (1) {
1039         current_cpu = NULL;
1040         qemu_mutex_unlock_iothread();
1041         do {
1042             int sig;
1043             r = sigwait(&waitset, &sig);
1044         } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1045         if (r == -1) {
1046             perror("sigwait");
1047             exit(1);
1048         }
1049         qemu_mutex_lock_iothread();
1050         current_cpu = cpu;
1051         qemu_wait_io_event_common(cpu);
1052     }
1053
1054     return NULL;
1055 #endif
1056 }
1057
1058 static int64_t tcg_get_icount_limit(void)
1059 {
1060     int64_t deadline;
1061
1062     if (replay_mode != REPLAY_MODE_PLAY) {
1063         deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1064
1065         /* Maintain prior (possibly buggy) behaviour where if no deadline
1066          * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1067          * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1068          * nanoseconds.
1069          */
1070         if ((deadline < 0) || (deadline > INT32_MAX)) {
1071             deadline = INT32_MAX;
1072         }
1073
1074         return qemu_icount_round(deadline);
1075     } else {
1076         return replay_get_instructions();
1077     }
1078 }
1079
1080 static void handle_icount_deadline(void)
1081 {
1082     if (use_icount) {
1083         int64_t deadline =
1084             qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1085
1086         if (deadline == 0) {
1087             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1088         }
1089     }
1090 }
1091
1092 static int tcg_cpu_exec(CPUState *cpu)
1093 {
1094     int ret;
1095 #ifdef CONFIG_PROFILER
1096     int64_t ti;
1097 #endif
1098
1099 #ifdef CONFIG_PROFILER
1100     ti = profile_getclock();
1101 #endif
1102     if (use_icount) {
1103         int64_t count;
1104         int decr;
1105         timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1106                                     + cpu->icount_extra);
1107         cpu->icount_decr.u16.low = 0;
1108         cpu->icount_extra = 0;
1109         count = tcg_get_icount_limit();
1110         timers_state.qemu_icount += count;
1111         decr = (count > 0xffff) ? 0xffff : count;
1112         count -= decr;
1113         cpu->icount_decr.u16.low = decr;
1114         cpu->icount_extra = count;
1115     }
1116     cpu_exec_start(cpu);
1117     ret = cpu_exec(cpu);
1118     cpu_exec_end(cpu);
1119 #ifdef CONFIG_PROFILER
1120     tcg_time += profile_getclock() - ti;
1121 #endif
1122     if (use_icount) {
1123         /* Fold pending instructions back into the
1124            instruction counter, and clear the interrupt flag.  */
1125         timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1126                         + cpu->icount_extra);
1127         cpu->icount_decr.u32 = 0;
1128         cpu->icount_extra = 0;
1129         replay_account_executed_instructions();
1130     }
1131     return ret;
1132 }
1133
1134 /* Destroy any remaining vCPUs which have been unplugged and have
1135  * finished running
1136  */
1137 static void deal_with_unplugged_cpus(void)
1138 {
1139     CPUState *cpu;
1140
1141     CPU_FOREACH(cpu) {
1142         if (cpu->unplug && !cpu_can_run(cpu)) {
1143             qemu_tcg_destroy_vcpu(cpu);
1144             cpu->created = false;
1145             qemu_cond_signal(&qemu_cpu_cond);
1146             break;
1147         }
1148     }
1149 }
1150
1151 static void *qemu_tcg_cpu_thread_fn(void *arg)
1152 {
1153     CPUState *cpu = arg;
1154
1155     rcu_register_thread();
1156
1157     qemu_mutex_lock_iothread();
1158     qemu_thread_get_self(cpu->thread);
1159
1160     CPU_FOREACH(cpu) {
1161         cpu->thread_id = qemu_get_thread_id();
1162         cpu->created = true;
1163         cpu->can_do_io = 1;
1164     }
1165     qemu_cond_signal(&qemu_cpu_cond);
1166
1167     /* wait for initial kick-off after machine start */
1168     while (first_cpu->stopped) {
1169         qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1170
1171         /* process any pending work */
1172         CPU_FOREACH(cpu) {
1173             qemu_wait_io_event_common(cpu);
1174         }
1175     }
1176
1177     /* process any pending work */
1178     atomic_mb_set(&exit_request, 1);
1179
1180     cpu = first_cpu;
1181
1182     while (1) {
1183         /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
1184         qemu_account_warp_timer();
1185
1186         if (!cpu) {
1187             cpu = first_cpu;
1188         }
1189
1190         for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
1191
1192             qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1193                               (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1194
1195             if (cpu_can_run(cpu)) {
1196                 int r;
1197                 r = tcg_cpu_exec(cpu);
1198                 if (r == EXCP_DEBUG) {
1199                     cpu_handle_guest_debug(cpu);
1200                     break;
1201                 }
1202             } else if (cpu->stop || cpu->stopped) {
1203                 if (cpu->unplug) {
1204                     cpu = CPU_NEXT(cpu);
1205                 }
1206                 break;
1207             }
1208
1209         } /* for cpu.. */
1210
1211         /* Pairs with smp_wmb in qemu_cpu_kick.  */
1212         atomic_mb_set(&exit_request, 0);
1213
1214         handle_icount_deadline();
1215
1216         qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1217         deal_with_unplugged_cpus();
1218     }
1219
1220     return NULL;
1221 }
1222
1223 static void qemu_cpu_kick_thread(CPUState *cpu)
1224 {
1225 #ifndef _WIN32
1226     int err;
1227
1228     if (cpu->thread_kicked) {
1229         return;
1230     }
1231     cpu->thread_kicked = true;
1232     err = pthread_kill(cpu->thread->thread, SIG_IPI);
1233     if (err) {
1234         fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1235         exit(1);
1236     }
1237 #else /* _WIN32 */
1238     abort();
1239 #endif
1240 }
1241
1242 static void qemu_cpu_kick_no_halt(void)
1243 {
1244     CPUState *cpu;
1245     /* Ensure whatever caused the exit has reached the CPU threads before
1246      * writing exit_request.
1247      */
1248     atomic_mb_set(&exit_request, 1);
1249     cpu = atomic_mb_read(&tcg_current_cpu);
1250     if (cpu) {
1251         cpu_exit(cpu);
1252     }
1253 }
1254
1255 void qemu_cpu_kick(CPUState *cpu)
1256 {
1257     qemu_cond_broadcast(cpu->halt_cond);
1258     if (tcg_enabled()) {
1259         qemu_cpu_kick_no_halt();
1260     } else {
1261         qemu_cpu_kick_thread(cpu);
1262     }
1263 }
1264
1265 void qemu_cpu_kick_self(void)
1266 {
1267     assert(current_cpu);
1268     qemu_cpu_kick_thread(current_cpu);
1269 }
1270
1271 bool qemu_cpu_is_self(CPUState *cpu)
1272 {
1273     return qemu_thread_is_self(cpu->thread);
1274 }
1275
1276 bool qemu_in_vcpu_thread(void)
1277 {
1278     return current_cpu && qemu_cpu_is_self(current_cpu);
1279 }
1280
1281 static __thread bool iothread_locked = false;
1282
1283 bool qemu_mutex_iothread_locked(void)
1284 {
1285     return iothread_locked;
1286 }
1287
1288 void qemu_mutex_lock_iothread(void)
1289 {
1290     atomic_inc(&iothread_requesting_mutex);
1291     /* In the simple case there is no need to bump the VCPU thread out of
1292      * TCG code execution.
1293      */
1294     if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1295         !first_cpu || !first_cpu->created) {
1296         qemu_mutex_lock(&qemu_global_mutex);
1297         atomic_dec(&iothread_requesting_mutex);
1298     } else {
1299         if (qemu_mutex_trylock(&qemu_global_mutex)) {
1300             qemu_cpu_kick_no_halt();
1301             qemu_mutex_lock(&qemu_global_mutex);
1302         }
1303         atomic_dec(&iothread_requesting_mutex);
1304         qemu_cond_broadcast(&qemu_io_proceeded_cond);
1305     }
1306     iothread_locked = true;
1307 }
1308
1309 void qemu_mutex_unlock_iothread(void)
1310 {
1311     iothread_locked = false;
1312     qemu_mutex_unlock(&qemu_global_mutex);
1313 }
1314
1315 static bool all_vcpus_paused(void)
1316 {
1317     CPUState *cpu;
1318
1319     CPU_FOREACH(cpu) {
1320         if (!cpu->stopped) {
1321             return false;
1322         }
1323     }
1324
1325     return true;
1326 }
1327
1328 void pause_all_vcpus(void)
1329 {
1330     CPUState *cpu;
1331
1332     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1333     CPU_FOREACH(cpu) {
1334         cpu->stop = true;
1335         qemu_cpu_kick(cpu);
1336     }
1337
1338     if (qemu_in_vcpu_thread()) {
1339         cpu_stop_current();
1340         if (!kvm_enabled()) {
1341             CPU_FOREACH(cpu) {
1342                 cpu->stop = false;
1343                 cpu->stopped = true;
1344             }
1345             return;
1346         }
1347     }
1348
1349     while (!all_vcpus_paused()) {
1350         qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1351         CPU_FOREACH(cpu) {
1352             qemu_cpu_kick(cpu);
1353         }
1354     }
1355 }
1356
1357 void cpu_resume(CPUState *cpu)
1358 {
1359     cpu->stop = false;
1360     cpu->stopped = false;
1361     qemu_cpu_kick(cpu);
1362 }
1363
1364 void resume_all_vcpus(void)
1365 {
1366     CPUState *cpu;
1367
1368     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1369     CPU_FOREACH(cpu) {
1370         cpu_resume(cpu);
1371     }
1372 }
1373
1374 void cpu_remove(CPUState *cpu)
1375 {
1376     cpu->stop = true;
1377     cpu->unplug = true;
1378     qemu_cpu_kick(cpu);
1379 }
1380
1381 void cpu_remove_sync(CPUState *cpu)
1382 {
1383     cpu_remove(cpu);
1384     while (cpu->created) {
1385         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1386     }
1387 }
1388
1389 /* For temporary buffers for forming a name */
1390 #define VCPU_THREAD_NAME_SIZE 16
1391
1392 static void qemu_tcg_init_vcpu(CPUState *cpu)
1393 {
1394     char thread_name[VCPU_THREAD_NAME_SIZE];
1395     static QemuCond *tcg_halt_cond;
1396     static QemuThread *tcg_cpu_thread;
1397
1398     /* share a single thread for all cpus with TCG */
1399     if (!tcg_cpu_thread) {
1400         cpu->thread = g_malloc0(sizeof(QemuThread));
1401         cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1402         qemu_cond_init(cpu->halt_cond);
1403         tcg_halt_cond = cpu->halt_cond;
1404         snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1405                  cpu->cpu_index);
1406         qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1407                            cpu, QEMU_THREAD_JOINABLE);
1408 #ifdef _WIN32
1409         cpu->hThread = qemu_thread_get_handle(cpu->thread);
1410 #endif
1411         while (!cpu->created) {
1412             qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1413         }
1414         tcg_cpu_thread = cpu->thread;
1415     } else {
1416         cpu->thread = tcg_cpu_thread;
1417         cpu->halt_cond = tcg_halt_cond;
1418     }
1419 }
1420
1421 static void qemu_kvm_start_vcpu(CPUState *cpu)
1422 {
1423     char thread_name[VCPU_THREAD_NAME_SIZE];
1424
1425     cpu->thread = g_malloc0(sizeof(QemuThread));
1426     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1427     qemu_cond_init(cpu->halt_cond);
1428     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1429              cpu->cpu_index);
1430     qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1431                        cpu, QEMU_THREAD_JOINABLE);
1432     while (!cpu->created) {
1433         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1434     }
1435 }
1436
1437 static void qemu_dummy_start_vcpu(CPUState *cpu)
1438 {
1439     char thread_name[VCPU_THREAD_NAME_SIZE];
1440
1441     cpu->thread = g_malloc0(sizeof(QemuThread));
1442     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1443     qemu_cond_init(cpu->halt_cond);
1444     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1445              cpu->cpu_index);
1446     qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1447                        QEMU_THREAD_JOINABLE);
1448     while (!cpu->created) {
1449         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1450     }
1451 }
1452
1453 void qemu_init_vcpu(CPUState *cpu)
1454 {
1455     cpu->nr_cores = smp_cores;
1456     cpu->nr_threads = smp_threads;
1457     cpu->stopped = true;
1458
1459     if (!cpu->as) {
1460         /* If the target cpu hasn't set up any address spaces itself,
1461          * give it the default one.
1462          */
1463         AddressSpace *as = address_space_init_shareable(cpu->memory,
1464                                                         "cpu-memory");
1465         cpu->num_ases = 1;
1466         cpu_address_space_init(cpu, as, 0);
1467     }
1468
1469     if (kvm_enabled()) {
1470         qemu_kvm_start_vcpu(cpu);
1471     } else if (tcg_enabled()) {
1472         qemu_tcg_init_vcpu(cpu);
1473     } else {
1474         qemu_dummy_start_vcpu(cpu);
1475     }
1476 }
1477
1478 void cpu_stop_current(void)
1479 {
1480     if (current_cpu) {
1481         current_cpu->stop = false;
1482         current_cpu->stopped = true;
1483         cpu_exit(current_cpu);
1484         qemu_cond_broadcast(&qemu_pause_cond);
1485     }
1486 }
1487
1488 int vm_stop(RunState state)
1489 {
1490     if (qemu_in_vcpu_thread()) {
1491         qemu_system_vmstop_request_prepare();
1492         qemu_system_vmstop_request(state);
1493         /*
1494          * FIXME: should not return to device code in case
1495          * vm_stop() has been requested.
1496          */
1497         cpu_stop_current();
1498         return 0;
1499     }
1500
1501     return do_vm_stop(state);
1502 }
1503
1504 /* does a state transition even if the VM is already stopped,
1505    current state is forgotten forever */
1506 int vm_stop_force_state(RunState state)
1507 {
1508     if (runstate_is_running()) {
1509         return vm_stop(state);
1510     } else {
1511         runstate_set(state);
1512
1513         bdrv_drain_all();
1514         /* Make sure to return an error if the flush in a previous vm_stop()
1515          * failed. */
1516         return bdrv_flush_all();
1517     }
1518 }
1519
1520 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1521 {
1522     /* XXX: implement xxx_cpu_list for targets that still miss it */
1523 #if defined(cpu_list)
1524     cpu_list(f, cpu_fprintf);
1525 #endif
1526 }
1527
1528 CpuInfoList *qmp_query_cpus(Error **errp)
1529 {
1530     CpuInfoList *head = NULL, *cur_item = NULL;
1531     CPUState *cpu;
1532
1533     CPU_FOREACH(cpu) {
1534         CpuInfoList *info;
1535 #if defined(TARGET_I386)
1536         X86CPU *x86_cpu = X86_CPU(cpu);
1537         CPUX86State *env = &x86_cpu->env;
1538 #elif defined(TARGET_PPC)
1539         PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1540         CPUPPCState *env = &ppc_cpu->env;
1541 #elif defined(TARGET_SPARC)
1542         SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1543         CPUSPARCState *env = &sparc_cpu->env;
1544 #elif defined(TARGET_MIPS)
1545         MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1546         CPUMIPSState *env = &mips_cpu->env;
1547 #elif defined(TARGET_TRICORE)
1548         TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1549         CPUTriCoreState *env = &tricore_cpu->env;
1550 #endif
1551
1552         cpu_synchronize_state(cpu);
1553
1554         info = g_malloc0(sizeof(*info));
1555         info->value = g_malloc0(sizeof(*info->value));
1556         info->value->CPU = cpu->cpu_index;
1557         info->value->current = (cpu == first_cpu);
1558         info->value->halted = cpu->halted;
1559         info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1560         info->value->thread_id = cpu->thread_id;
1561 #if defined(TARGET_I386)
1562         info->value->arch = CPU_INFO_ARCH_X86;
1563         info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1564 #elif defined(TARGET_PPC)
1565         info->value->arch = CPU_INFO_ARCH_PPC;
1566         info->value->u.ppc.nip = env->nip;
1567 #elif defined(TARGET_SPARC)
1568         info->value->arch = CPU_INFO_ARCH_SPARC;
1569         info->value->u.q_sparc.pc = env->pc;
1570         info->value->u.q_sparc.npc = env->npc;
1571 #elif defined(TARGET_MIPS)
1572         info->value->arch = CPU_INFO_ARCH_MIPS;
1573         info->value->u.q_mips.PC = env->active_tc.PC;
1574 #elif defined(TARGET_TRICORE)
1575         info->value->arch = CPU_INFO_ARCH_TRICORE;
1576         info->value->u.tricore.PC = env->PC;
1577 #else
1578         info->value->arch = CPU_INFO_ARCH_OTHER;
1579 #endif
1580
1581         /* XXX: waiting for the qapi to support GSList */
1582         if (!cur_item) {
1583             head = cur_item = info;
1584         } else {
1585             cur_item->next = info;
1586             cur_item = info;
1587         }
1588     }
1589
1590     return head;
1591 }
1592
1593 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1594                  bool has_cpu, int64_t cpu_index, Error **errp)
1595 {
1596     FILE *f;
1597     uint32_t l;
1598     CPUState *cpu;
1599     uint8_t buf[1024];
1600     int64_t orig_addr = addr, orig_size = size;
1601
1602     if (!has_cpu) {
1603         cpu_index = 0;
1604     }
1605
1606     cpu = qemu_get_cpu(cpu_index);
1607     if (cpu == NULL) {
1608         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1609                    "a CPU number");
1610         return;
1611     }
1612
1613     f = fopen(filename, "wb");
1614     if (!f) {
1615         error_setg_file_open(errp, errno, filename);
1616         return;
1617     }
1618
1619     while (size != 0) {
1620         l = sizeof(buf);
1621         if (l > size)
1622             l = size;
1623         if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1624             error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1625                              " specified", orig_addr, orig_size);
1626             goto exit;
1627         }
1628         if (fwrite(buf, 1, l, f) != l) {
1629             error_setg(errp, QERR_IO_ERROR);
1630             goto exit;
1631         }
1632         addr += l;
1633         size -= l;
1634     }
1635
1636 exit:
1637     fclose(f);
1638 }
1639
1640 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1641                   Error **errp)
1642 {
1643     FILE *f;
1644     uint32_t l;
1645     uint8_t buf[1024];
1646
1647     f = fopen(filename, "wb");
1648     if (!f) {
1649         error_setg_file_open(errp, errno, filename);
1650         return;
1651     }
1652
1653     while (size != 0) {
1654         l = sizeof(buf);
1655         if (l > size)
1656             l = size;
1657         cpu_physical_memory_read(addr, buf, l);
1658         if (fwrite(buf, 1, l, f) != l) {
1659             error_setg(errp, QERR_IO_ERROR);
1660             goto exit;
1661         }
1662         addr += l;
1663         size -= l;
1664     }
1665
1666 exit:
1667     fclose(f);
1668 }
1669
1670 void qmp_inject_nmi(Error **errp)
1671 {
1672     nmi_monitor_handle(monitor_get_cpu_index(), errp);
1673 }
1674
1675 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1676 {
1677     if (!use_icount) {
1678         return;
1679     }
1680
1681     cpu_fprintf(f, "Host - Guest clock  %"PRIi64" ms\n",
1682                 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1683     if (icount_align_option) {
1684         cpu_fprintf(f, "Max guest delay     %"PRIi64" ms\n", -max_delay/SCALE_MS);
1685         cpu_fprintf(f, "Max guest advance   %"PRIi64" ms\n", max_advance/SCALE_MS);
1686     } else {
1687         cpu_fprintf(f, "Max guest delay     NA\n");
1688         cpu_fprintf(f, "Max guest advance   NA\n");
1689     }
1690 }