254ae620ce7a32cfb2d6c2e39ad90b46e800d8fb
[sdk/emulator/qemu.git] / exec.c
1 /*
2  *  Virtual page mapping
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52
53 //#define DEBUG_UNASSIGNED
54 //#define DEBUG_SUBPAGE
55
56 #if !defined(CONFIG_USER_ONLY)
57 int phys_ram_fd;
58 static int in_migration;
59
60 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
61
62 static MemoryRegion *system_memory;
63 static MemoryRegion *system_io;
64
65 AddressSpace address_space_io;
66 AddressSpace address_space_memory;
67 DMAContext dma_context_memory;
68
69 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
70 static MemoryRegion io_mem_subpage_ram;
71
72 #endif
73
74 CPUArchState *first_cpu;
75 /* current CPU in the current thread. It is only valid inside
76    cpu_exec() */
77 DEFINE_TLS(CPUArchState *,cpu_single_env);
78 /* 0 = Do not count executed instructions.
79    1 = Precise instruction counting.
80    2 = Adaptive rate instruction counting.  */
81 int use_icount;
82
83 #if !defined(CONFIG_USER_ONLY)
84
85 static MemoryRegionSection *phys_sections;
86 static unsigned phys_sections_nb, phys_sections_nb_alloc;
87 static uint16_t phys_section_unassigned;
88 static uint16_t phys_section_notdirty;
89 static uint16_t phys_section_rom;
90 static uint16_t phys_section_watch;
91
92 /* Simple allocator for PhysPageEntry nodes */
93 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
94 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
95
96 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
97
98 static void io_mem_init(void);
99 static void memory_map_init(void);
100 static void *qemu_safe_ram_ptr(ram_addr_t addr);
101
102 static MemoryRegion io_mem_watch;
103 #endif
104
105 #if !defined(CONFIG_USER_ONLY)
106
107 static void phys_map_node_reserve(unsigned nodes)
108 {
109     if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
110         typedef PhysPageEntry Node[L2_SIZE];
111         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
112         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
113                                       phys_map_nodes_nb + nodes);
114         phys_map_nodes = g_renew(Node, phys_map_nodes,
115                                  phys_map_nodes_nb_alloc);
116     }
117 }
118
119 static uint16_t phys_map_node_alloc(void)
120 {
121     unsigned i;
122     uint16_t ret;
123
124     ret = phys_map_nodes_nb++;
125     assert(ret != PHYS_MAP_NODE_NIL);
126     assert(ret != phys_map_nodes_nb_alloc);
127     for (i = 0; i < L2_SIZE; ++i) {
128         phys_map_nodes[ret][i].is_leaf = 0;
129         phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
130     }
131     return ret;
132 }
133
134 static void phys_map_nodes_reset(void)
135 {
136     phys_map_nodes_nb = 0;
137 }
138
139
140 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
141                                 hwaddr *nb, uint16_t leaf,
142                                 int level)
143 {
144     PhysPageEntry *p;
145     int i;
146     hwaddr step = (hwaddr)1 << (level * L2_BITS);
147
148     if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
149         lp->ptr = phys_map_node_alloc();
150         p = phys_map_nodes[lp->ptr];
151         if (level == 0) {
152             for (i = 0; i < L2_SIZE; i++) {
153                 p[i].is_leaf = 1;
154                 p[i].ptr = phys_section_unassigned;
155             }
156         }
157     } else {
158         p = phys_map_nodes[lp->ptr];
159     }
160     lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
161
162     while (*nb && lp < &p[L2_SIZE]) {
163         if ((*index & (step - 1)) == 0 && *nb >= step) {
164             lp->is_leaf = true;
165             lp->ptr = leaf;
166             *index += step;
167             *nb -= step;
168         } else {
169             phys_page_set_level(lp, index, nb, leaf, level - 1);
170         }
171         ++lp;
172     }
173 }
174
175 static void phys_page_set(AddressSpaceDispatch *d,
176                           hwaddr index, hwaddr nb,
177                           uint16_t leaf)
178 {
179     /* Wildly overreserve - it doesn't matter much. */
180     phys_map_node_reserve(3 * P_L2_LEVELS);
181
182     phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
183 }
184
185 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
186 {
187     PhysPageEntry lp = d->phys_map;
188     PhysPageEntry *p;
189     int i;
190     uint16_t s_index = phys_section_unassigned;
191
192     for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
193         if (lp.ptr == PHYS_MAP_NODE_NIL) {
194             goto not_found;
195         }
196         p = phys_map_nodes[lp.ptr];
197         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
198     }
199
200     s_index = lp.ptr;
201 not_found:
202     return &phys_sections[s_index];
203 }
204
205 bool memory_region_is_unassigned(MemoryRegion *mr)
206 {
207     return mr != &io_mem_ram && mr != &io_mem_rom
208         && mr != &io_mem_notdirty && !mr->rom_device
209         && mr != &io_mem_watch;
210 }
211 #endif
212
213 void cpu_exec_init_all(void)
214 {
215 #if !defined(CONFIG_USER_ONLY)
216     qemu_mutex_init(&ram_list.mutex);
217     memory_map_init();
218     io_mem_init();
219 #endif
220 }
221
222 #if !defined(CONFIG_USER_ONLY)
223
224 static int cpu_common_post_load(void *opaque, int version_id)
225 {
226     CPUArchState *env = opaque;
227
228     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
229        version_id is increased. */
230     env->interrupt_request &= ~0x01;
231     tlb_flush(env, 1);
232
233     return 0;
234 }
235
236 static const VMStateDescription vmstate_cpu_common = {
237     .name = "cpu_common",
238     .version_id = 1,
239     .minimum_version_id = 1,
240     .minimum_version_id_old = 1,
241     .post_load = cpu_common_post_load,
242     .fields      = (VMStateField []) {
243         VMSTATE_UINT32(halted, CPUArchState),
244         VMSTATE_UINT32(interrupt_request, CPUArchState),
245         VMSTATE_END_OF_LIST()
246     }
247 };
248 #else
249 #define vmstate_cpu_common vmstate_dummy
250 #endif
251
252 CPUState *qemu_get_cpu(int index)
253 {
254     CPUArchState *env = first_cpu;
255     CPUState *cpu = NULL;
256
257     while (env) {
258         cpu = ENV_GET_CPU(env);
259         if (cpu->cpu_index == index) {
260             break;
261         }
262         env = env->next_cpu;
263     }
264
265     return env ? cpu : NULL;
266 }
267
268 void cpu_exec_init(CPUArchState *env)
269 {
270     CPUState *cpu = ENV_GET_CPU(env);
271     CPUClass *cc = CPU_GET_CLASS(cpu);
272     CPUArchState **penv;
273     int cpu_index;
274
275 #if defined(CONFIG_USER_ONLY)
276     cpu_list_lock();
277 #endif
278     env->next_cpu = NULL;
279     penv = &first_cpu;
280     cpu_index = 0;
281     while (*penv != NULL) {
282         penv = &(*penv)->next_cpu;
283         cpu_index++;
284     }
285     cpu->cpu_index = cpu_index;
286     cpu->numa_node = 0;
287     QTAILQ_INIT(&env->breakpoints);
288     QTAILQ_INIT(&env->watchpoints);
289 #ifndef CONFIG_USER_ONLY
290     cpu->thread_id = qemu_get_thread_id();
291 #endif
292     *penv = env;
293 #if defined(CONFIG_USER_ONLY)
294     cpu_list_unlock();
295 #endif
296     vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
297 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
298     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
299                     cpu_save, cpu_load, env);
300     assert(cc->vmsd == NULL);
301 #endif
302     if (cc->vmsd != NULL) {
303         vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
304     }
305 }
306
307 #if defined(TARGET_HAS_ICE)
308 #if defined(CONFIG_USER_ONLY)
309 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
310 {
311     tb_invalidate_phys_page_range(pc, pc + 1, 0);
312 }
313 #else
314 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
315 {
316     tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
317             (pc & ~TARGET_PAGE_MASK));
318 }
319 #endif
320 #endif /* TARGET_HAS_ICE */
321
322 #if defined(CONFIG_USER_ONLY)
323 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
324
325 {
326 }
327
328 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
329                           int flags, CPUWatchpoint **watchpoint)
330 {
331     return -ENOSYS;
332 }
333 #else
334 /* Add a watchpoint.  */
335 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
336                           int flags, CPUWatchpoint **watchpoint)
337 {
338     target_ulong len_mask = ~(len - 1);
339     CPUWatchpoint *wp;
340
341     /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
342     if ((len & (len - 1)) || (addr & ~len_mask) ||
343             len == 0 || len > TARGET_PAGE_SIZE) {
344         fprintf(stderr, "qemu: tried to set invalid watchpoint at "
345                 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
346         return -EINVAL;
347     }
348     wp = g_malloc(sizeof(*wp));
349
350     wp->vaddr = addr;
351     wp->len_mask = len_mask;
352     wp->flags = flags;
353
354     /* keep all GDB-injected watchpoints in front */
355     if (flags & BP_GDB)
356         QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
357     else
358         QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
359
360     tlb_flush_page(env, addr);
361
362     if (watchpoint)
363         *watchpoint = wp;
364     return 0;
365 }
366
367 /* Remove a specific watchpoint.  */
368 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
369                           int flags)
370 {
371     target_ulong len_mask = ~(len - 1);
372     CPUWatchpoint *wp;
373
374     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
375         if (addr == wp->vaddr && len_mask == wp->len_mask
376                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
377             cpu_watchpoint_remove_by_ref(env, wp);
378             return 0;
379         }
380     }
381     return -ENOENT;
382 }
383
384 /* Remove a specific watchpoint by reference.  */
385 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
386 {
387     QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
388
389     tlb_flush_page(env, watchpoint->vaddr);
390
391     g_free(watchpoint);
392 }
393
394 /* Remove all matching watchpoints.  */
395 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
396 {
397     CPUWatchpoint *wp, *next;
398
399     QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
400         if (wp->flags & mask)
401             cpu_watchpoint_remove_by_ref(env, wp);
402     }
403 }
404 #endif
405
406 /* Add a breakpoint.  */
407 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
408                           CPUBreakpoint **breakpoint)
409 {
410 #if defined(TARGET_HAS_ICE)
411     CPUBreakpoint *bp;
412
413     bp = g_malloc(sizeof(*bp));
414
415     bp->pc = pc;
416     bp->flags = flags;
417
418     /* keep all GDB-injected breakpoints in front */
419     if (flags & BP_GDB)
420         QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
421     else
422         QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
423
424     breakpoint_invalidate(env, pc);
425
426     if (breakpoint)
427         *breakpoint = bp;
428     return 0;
429 #else
430     return -ENOSYS;
431 #endif
432 }
433
434 /* Remove a specific breakpoint.  */
435 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
436 {
437 #if defined(TARGET_HAS_ICE)
438     CPUBreakpoint *bp;
439
440     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
441         if (bp->pc == pc && bp->flags == flags) {
442             cpu_breakpoint_remove_by_ref(env, bp);
443             return 0;
444         }
445     }
446     return -ENOENT;
447 #else
448     return -ENOSYS;
449 #endif
450 }
451
452 /* Remove a specific breakpoint by reference.  */
453 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
454 {
455 #if defined(TARGET_HAS_ICE)
456     QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
457
458     breakpoint_invalidate(env, breakpoint->pc);
459
460     g_free(breakpoint);
461 #endif
462 }
463
464 /* Remove all matching breakpoints. */
465 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
466 {
467 #if defined(TARGET_HAS_ICE)
468     CPUBreakpoint *bp, *next;
469
470     QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
471         if (bp->flags & mask)
472             cpu_breakpoint_remove_by_ref(env, bp);
473     }
474 #endif
475 }
476
477 /* enable or disable single step mode. EXCP_DEBUG is returned by the
478    CPU loop after each instruction */
479 void cpu_single_step(CPUArchState *env, int enabled)
480 {
481 #if defined(TARGET_HAS_ICE)
482     if (env->singlestep_enabled != enabled) {
483         env->singlestep_enabled = enabled;
484         if (kvm_enabled())
485             kvm_update_guest_debug(env, 0);
486         else {
487             /* must flush all the translated code to avoid inconsistencies */
488             /* XXX: only flush what is necessary */
489             tb_flush(env);
490         }
491     }
492 #endif
493 }
494
495 void cpu_reset_interrupt(CPUArchState *env, int mask)
496 {
497     env->interrupt_request &= ~mask;
498 }
499
500 void cpu_exit(CPUArchState *env)
501 {
502     CPUState *cpu = ENV_GET_CPU(env);
503
504     cpu->exit_request = 1;
505     cpu->tcg_exit_req = 1;
506 }
507
508 void cpu_abort(CPUArchState *env, const char *fmt, ...)
509 {
510     va_list ap;
511     va_list ap2;
512
513     va_start(ap, fmt);
514     va_copy(ap2, ap);
515     fprintf(stderr, "qemu: fatal: ");
516     vfprintf(stderr, fmt, ap);
517     fprintf(stderr, "\n");
518     cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
519     if (qemu_log_enabled()) {
520         qemu_log("qemu: fatal: ");
521         qemu_log_vprintf(fmt, ap2);
522         qemu_log("\n");
523         log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
524         qemu_log_flush();
525         qemu_log_close();
526     }
527     va_end(ap2);
528     va_end(ap);
529 #if defined(CONFIG_USER_ONLY)
530     {
531         struct sigaction act;
532         sigfillset(&act.sa_mask);
533         act.sa_handler = SIG_DFL;
534         sigaction(SIGABRT, &act, NULL);
535     }
536 #endif
537     abort();
538 }
539
540 CPUArchState *cpu_copy(CPUArchState *env)
541 {
542     CPUArchState *new_env = cpu_init(env->cpu_model_str);
543     CPUArchState *next_cpu = new_env->next_cpu;
544 #if defined(TARGET_HAS_ICE)
545     CPUBreakpoint *bp;
546     CPUWatchpoint *wp;
547 #endif
548
549     memcpy(new_env, env, sizeof(CPUArchState));
550
551     /* Preserve chaining. */
552     new_env->next_cpu = next_cpu;
553
554     /* Clone all break/watchpoints.
555        Note: Once we support ptrace with hw-debug register access, make sure
556        BP_CPU break/watchpoints are handled correctly on clone. */
557     QTAILQ_INIT(&env->breakpoints);
558     QTAILQ_INIT(&env->watchpoints);
559 #if defined(TARGET_HAS_ICE)
560     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
561         cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
562     }
563     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
564         cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
565                               wp->flags, NULL);
566     }
567 #endif
568
569     return new_env;
570 }
571
572 #if !defined(CONFIG_USER_ONLY)
573 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
574                                       uintptr_t length)
575 {
576     uintptr_t start1;
577
578     /* we modify the TLB cache so that the dirty bit will be set again
579        when accessing the range */
580     start1 = (uintptr_t)qemu_safe_ram_ptr(start);
581     /* Check that we don't span multiple blocks - this breaks the
582        address comparisons below.  */
583     if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
584             != (end - 1) - start) {
585         abort();
586     }
587     cpu_tlb_reset_dirty_all(start1, length);
588
589 }
590
591 /* Note: start and end must be within the same ram block.  */
592 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
593                                      int dirty_flags)
594 {
595     uintptr_t length;
596
597     start &= TARGET_PAGE_MASK;
598     end = TARGET_PAGE_ALIGN(end);
599
600     length = end - start;
601     if (length == 0)
602         return;
603     cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
604
605     if (tcg_enabled()) {
606         tlb_reset_dirty_range_all(start, end, length);
607     }
608 }
609
610 static int cpu_physical_memory_set_dirty_tracking(int enable)
611 {
612     int ret = 0;
613     in_migration = enable;
614     return ret;
615 }
616
617 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
618                                                    MemoryRegionSection *section,
619                                                    target_ulong vaddr,
620                                                    hwaddr paddr,
621                                                    int prot,
622                                                    target_ulong *address)
623 {
624     hwaddr iotlb;
625     CPUWatchpoint *wp;
626
627     if (memory_region_is_ram(section->mr)) {
628         /* Normal RAM.  */
629         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
630             + memory_region_section_addr(section, paddr);
631         if (!section->readonly) {
632             iotlb |= phys_section_notdirty;
633         } else {
634             iotlb |= phys_section_rom;
635         }
636     } else {
637         /* IO handlers are currently passed a physical address.
638            It would be nice to pass an offset from the base address
639            of that region.  This would avoid having to special case RAM,
640            and avoid full address decoding in every device.
641            We can't use the high bits of pd for this because
642            IO_MEM_ROMD uses these as a ram address.  */
643         iotlb = section - phys_sections;
644         iotlb += memory_region_section_addr(section, paddr);
645     }
646
647     /* Make accesses to pages with watchpoints go via the
648        watchpoint trap routines.  */
649     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
650         if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
651             /* Avoid trapping reads of pages with a write breakpoint. */
652             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
653                 iotlb = phys_section_watch + paddr;
654                 *address |= TLB_MMIO;
655                 break;
656             }
657         }
658     }
659
660     return iotlb;
661 }
662 #endif /* defined(CONFIG_USER_ONLY) */
663
664 #if !defined(CONFIG_USER_ONLY)
665
666 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
667 typedef struct subpage_t {
668     MemoryRegion iomem;
669     hwaddr base;
670     uint16_t sub_section[TARGET_PAGE_SIZE];
671 } subpage_t;
672
673 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
674                              uint16_t section);
675 static subpage_t *subpage_init(hwaddr base);
676 static void destroy_page_desc(uint16_t section_index)
677 {
678     MemoryRegionSection *section = &phys_sections[section_index];
679     MemoryRegion *mr = section->mr;
680
681     if (mr->subpage) {
682         subpage_t *subpage = container_of(mr, subpage_t, iomem);
683         memory_region_destroy(&subpage->iomem);
684         g_free(subpage);
685     }
686 }
687
688 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
689 {
690     unsigned i;
691     PhysPageEntry *p;
692
693     if (lp->ptr == PHYS_MAP_NODE_NIL) {
694         return;
695     }
696
697     p = phys_map_nodes[lp->ptr];
698     for (i = 0; i < L2_SIZE; ++i) {
699         if (!p[i].is_leaf) {
700             destroy_l2_mapping(&p[i], level - 1);
701         } else {
702             destroy_page_desc(p[i].ptr);
703         }
704     }
705     lp->is_leaf = 0;
706     lp->ptr = PHYS_MAP_NODE_NIL;
707 }
708
709 static void destroy_all_mappings(AddressSpaceDispatch *d)
710 {
711     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
712     phys_map_nodes_reset();
713 }
714
715 static uint16_t phys_section_add(MemoryRegionSection *section)
716 {
717     if (phys_sections_nb == phys_sections_nb_alloc) {
718         phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
719         phys_sections = g_renew(MemoryRegionSection, phys_sections,
720                                 phys_sections_nb_alloc);
721     }
722     phys_sections[phys_sections_nb] = *section;
723     return phys_sections_nb++;
724 }
725
726 static void phys_sections_clear(void)
727 {
728     phys_sections_nb = 0;
729 }
730
731 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
732 {
733     subpage_t *subpage;
734     hwaddr base = section->offset_within_address_space
735         & TARGET_PAGE_MASK;
736     MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
737     MemoryRegionSection subsection = {
738         .offset_within_address_space = base,
739         .size = TARGET_PAGE_SIZE,
740     };
741     hwaddr start, end;
742
743     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
744
745     if (!(existing->mr->subpage)) {
746         subpage = subpage_init(base);
747         subsection.mr = &subpage->iomem;
748         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
749                       phys_section_add(&subsection));
750     } else {
751         subpage = container_of(existing->mr, subpage_t, iomem);
752     }
753     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
754     end = start + section->size - 1;
755     subpage_register(subpage, start, end, phys_section_add(section));
756 }
757
758
759 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
760 {
761     hwaddr start_addr = section->offset_within_address_space;
762     ram_addr_t size = section->size;
763     hwaddr addr;
764     uint16_t section_index = phys_section_add(section);
765
766     assert(size);
767
768     addr = start_addr;
769     phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
770                   section_index);
771 }
772
773 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
774 {
775     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
776     MemoryRegionSection now = *section, remain = *section;
777
778     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
779         || (now.size < TARGET_PAGE_SIZE)) {
780         now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
781                        - now.offset_within_address_space,
782                        now.size);
783         register_subpage(d, &now);
784         remain.size -= now.size;
785         remain.offset_within_address_space += now.size;
786         remain.offset_within_region += now.size;
787     }
788     while (remain.size >= TARGET_PAGE_SIZE) {
789         now = remain;
790         if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
791             now.size = TARGET_PAGE_SIZE;
792             register_subpage(d, &now);
793         } else {
794             now.size &= TARGET_PAGE_MASK;
795             register_multipage(d, &now);
796         }
797         remain.size -= now.size;
798         remain.offset_within_address_space += now.size;
799         remain.offset_within_region += now.size;
800     }
801     now = remain;
802     if (now.size) {
803         register_subpage(d, &now);
804     }
805 }
806
807 void qemu_flush_coalesced_mmio_buffer(void)
808 {
809     if (kvm_enabled())
810         kvm_flush_coalesced_mmio_buffer();
811 }
812
813 void qemu_mutex_lock_ramlist(void)
814 {
815     qemu_mutex_lock(&ram_list.mutex);
816 }
817
818 void qemu_mutex_unlock_ramlist(void)
819 {
820     qemu_mutex_unlock(&ram_list.mutex);
821 }
822
823 #if defined(__linux__) && !defined(TARGET_S390X)
824
825 #include <sys/vfs.h>
826
827 #define HUGETLBFS_MAGIC       0x958458f6
828
829 static long gethugepagesize(const char *path)
830 {
831     struct statfs fs;
832     int ret;
833
834     do {
835         ret = statfs(path, &fs);
836     } while (ret != 0 && errno == EINTR);
837
838     if (ret != 0) {
839         perror(path);
840         return 0;
841     }
842
843     if (fs.f_type != HUGETLBFS_MAGIC)
844         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
845
846     return fs.f_bsize;
847 }
848
849 static void *file_ram_alloc(RAMBlock *block,
850                             ram_addr_t memory,
851                             const char *path)
852 {
853     char *filename;
854     void *area;
855     int fd;
856 #ifdef MAP_POPULATE
857     int flags;
858 #endif
859     unsigned long hpagesize;
860
861     hpagesize = gethugepagesize(path);
862     if (!hpagesize) {
863         return NULL;
864     }
865
866     if (memory < hpagesize) {
867         return NULL;
868     }
869
870     if (kvm_enabled() && !kvm_has_sync_mmu()) {
871         fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
872         return NULL;
873     }
874
875     filename = g_strdup_printf("%s/qemu_back_mem.XXXXXX", path);
876
877     fd = mkstemp(filename);
878     if (fd < 0) {
879         perror("unable to create backing store for hugepages");
880         g_free(filename);
881         return NULL;
882     }
883     unlink(filename);
884     g_free(filename);
885
886     memory = (memory+hpagesize-1) & ~(hpagesize-1);
887
888     /*
889      * ftruncate is not supported by hugetlbfs in older
890      * hosts, so don't bother bailing out on errors.
891      * If anything goes wrong with it under other filesystems,
892      * mmap will fail.
893      */
894     if (ftruncate(fd, memory))
895         perror("ftruncate");
896
897 #ifdef MAP_POPULATE
898     /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
899      * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
900      * to sidestep this quirk.
901      */
902     flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
903     area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
904 #else
905     area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
906 #endif
907     if (area == MAP_FAILED) {
908         perror("file_ram_alloc: can't mmap RAM pages");
909         close(fd);
910         return (NULL);
911     }
912     block->fd = fd;
913     return area;
914 }
915 #endif
916
917 static ram_addr_t find_ram_offset(ram_addr_t size)
918 {
919     RAMBlock *block, *next_block;
920     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
921
922     if (QTAILQ_EMPTY(&ram_list.blocks))
923         return 0;
924
925     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
926         ram_addr_t end, next = RAM_ADDR_MAX;
927
928         end = block->offset + block->length;
929
930         QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
931             if (next_block->offset >= end) {
932                 next = MIN(next, next_block->offset);
933             }
934         }
935         if (next - end >= size && next - end < mingap) {
936             offset = end;
937             mingap = next - end;
938         }
939     }
940
941     if (offset == RAM_ADDR_MAX) {
942         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
943                 (uint64_t)size);
944         abort();
945     }
946
947     return offset;
948 }
949
950 ram_addr_t last_ram_offset(void)
951 {
952     RAMBlock *block;
953     ram_addr_t last = 0;
954
955     QTAILQ_FOREACH(block, &ram_list.blocks, next)
956         last = MAX(last, block->offset + block->length);
957
958     return last;
959 }
960
961 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
962 {
963     int ret;
964     QemuOpts *machine_opts;
965
966     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
967     machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
968     if (machine_opts &&
969         !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
970         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
971         if (ret) {
972             perror("qemu_madvise");
973             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
974                             "but dump_guest_core=off specified\n");
975         }
976     }
977 }
978
979 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
980 {
981     RAMBlock *new_block, *block;
982
983     new_block = NULL;
984     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
985         if (block->offset == addr) {
986             new_block = block;
987             break;
988         }
989     }
990     assert(new_block);
991     assert(!new_block->idstr[0]);
992
993     if (dev) {
994         char *id = qdev_get_dev_path(dev);
995         if (id) {
996             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
997             g_free(id);
998         }
999     }
1000     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1001
1002     /* This assumes the iothread lock is taken here too.  */
1003     qemu_mutex_lock_ramlist();
1004     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1005         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1006             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1007                     new_block->idstr);
1008             abort();
1009         }
1010     }
1011     qemu_mutex_unlock_ramlist();
1012 }
1013
1014 static int memory_try_enable_merging(void *addr, size_t len)
1015 {
1016     QemuOpts *opts;
1017
1018     opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1019     if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1020         /* disabled by the user */
1021         return 0;
1022     }
1023
1024     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1025 }
1026
1027 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1028                                    MemoryRegion *mr)
1029 {
1030     RAMBlock *block, *new_block;
1031
1032     size = TARGET_PAGE_ALIGN(size);
1033     new_block = g_malloc0(sizeof(*new_block));
1034
1035     /* This assumes the iothread lock is taken here too.  */
1036     qemu_mutex_lock_ramlist();
1037     new_block->mr = mr;
1038     new_block->offset = find_ram_offset(size);
1039     if (host) {
1040         new_block->host = host;
1041         new_block->flags |= RAM_PREALLOC_MASK;
1042     } else {
1043         if (mem_path) {
1044 #if defined (__linux__) && !defined(TARGET_S390X)
1045             new_block->host = file_ram_alloc(new_block, size, mem_path);
1046             if (!new_block->host) {
1047                 new_block->host = qemu_vmalloc(size);
1048                 memory_try_enable_merging(new_block->host, size);
1049             }
1050 #else
1051             fprintf(stderr, "-mem-path option unsupported\n");
1052             exit(1);
1053 #endif
1054         } else {
1055             if (xen_enabled()) {
1056                 xen_ram_alloc(new_block->offset, size, mr);
1057             } else if (kvm_enabled()) {
1058                 /* some s390/kvm configurations have special constraints */
1059                 new_block->host = kvm_vmalloc(size);
1060             } else {
1061                 new_block->host = qemu_vmalloc(size);
1062             }
1063             memory_try_enable_merging(new_block->host, size);
1064         }
1065     }
1066     new_block->length = size;
1067
1068     /* Keep the list sorted from biggest to smallest block.  */
1069     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1070         if (block->length < new_block->length) {
1071             break;
1072         }
1073     }
1074     if (block) {
1075         QTAILQ_INSERT_BEFORE(block, new_block, next);
1076     } else {
1077         QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1078     }
1079     ram_list.mru_block = NULL;
1080
1081     ram_list.version++;
1082     qemu_mutex_unlock_ramlist();
1083
1084     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1085                                        last_ram_offset() >> TARGET_PAGE_BITS);
1086     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1087            0, size >> TARGET_PAGE_BITS);
1088     cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1089
1090     qemu_ram_setup_dump(new_block->host, size);
1091     qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1092
1093     if (kvm_enabled())
1094         kvm_setup_guest_memory(new_block->host, size);
1095
1096     return new_block->offset;
1097 }
1098
1099 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1100 {
1101     return qemu_ram_alloc_from_ptr(size, NULL, mr);
1102 }
1103
1104 void qemu_ram_free_from_ptr(ram_addr_t addr)
1105 {
1106     RAMBlock *block;
1107
1108     /* This assumes the iothread lock is taken here too.  */
1109     qemu_mutex_lock_ramlist();
1110     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1111         if (addr == block->offset) {
1112             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1113             ram_list.mru_block = NULL;
1114             ram_list.version++;
1115             g_free(block);
1116             break;
1117         }
1118     }
1119     qemu_mutex_unlock_ramlist();
1120 }
1121
1122 void qemu_ram_free(ram_addr_t addr)
1123 {
1124     RAMBlock *block;
1125
1126     /* This assumes the iothread lock is taken here too.  */
1127     qemu_mutex_lock_ramlist();
1128     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1129         if (addr == block->offset) {
1130             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1131             ram_list.mru_block = NULL;
1132             ram_list.version++;
1133             if (block->flags & RAM_PREALLOC_MASK) {
1134                 ;
1135             } else if (mem_path) {
1136 #if defined (__linux__) && !defined(TARGET_S390X)
1137                 if (block->fd) {
1138                     munmap(block->host, block->length);
1139                     close(block->fd);
1140                 } else {
1141                     qemu_vfree(block->host);
1142                 }
1143 #else
1144                 abort();
1145 #endif
1146             } else {
1147 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1148                 munmap(block->host, block->length);
1149 #else
1150                 if (xen_enabled()) {
1151                     xen_invalidate_map_cache_entry(block->host);
1152                 } else {
1153                     qemu_vfree(block->host);
1154                 }
1155 #endif
1156             }
1157             g_free(block);
1158             break;
1159         }
1160     }
1161     qemu_mutex_unlock_ramlist();
1162
1163 }
1164
1165 #ifndef _WIN32
1166 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1167 {
1168     RAMBlock *block;
1169     ram_addr_t offset;
1170     int flags;
1171     void *area, *vaddr;
1172
1173     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1174         offset = addr - block->offset;
1175         if (offset < block->length) {
1176             vaddr = block->host + offset;
1177             if (block->flags & RAM_PREALLOC_MASK) {
1178                 ;
1179             } else {
1180                 flags = MAP_FIXED;
1181                 munmap(vaddr, length);
1182                 if (mem_path) {
1183 #if defined(__linux__) && !defined(TARGET_S390X)
1184                     if (block->fd) {
1185 #ifdef MAP_POPULATE
1186                         flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1187                             MAP_PRIVATE;
1188 #else
1189                         flags |= MAP_PRIVATE;
1190 #endif
1191                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1192                                     flags, block->fd, offset);
1193                     } else {
1194                         flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1195                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1196                                     flags, -1, 0);
1197                     }
1198 #else
1199                     abort();
1200 #endif
1201                 } else {
1202 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1203                     flags |= MAP_SHARED | MAP_ANONYMOUS;
1204                     area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1205                                 flags, -1, 0);
1206 #else
1207                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1208                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1209                                 flags, -1, 0);
1210 #endif
1211                 }
1212                 if (area != vaddr) {
1213                     fprintf(stderr, "Could not remap addr: "
1214                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1215                             length, addr);
1216                     exit(1);
1217                 }
1218                 memory_try_enable_merging(vaddr, length);
1219                 qemu_ram_setup_dump(vaddr, length);
1220             }
1221             return;
1222         }
1223     }
1224 }
1225 #endif /* !_WIN32 */
1226
1227 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1228    With the exception of the softmmu code in this file, this should
1229    only be used for local memory (e.g. video ram) that the device owns,
1230    and knows it isn't going to access beyond the end of the block.
1231
1232    It should not be used for general purpose DMA.
1233    Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1234  */
1235 void *qemu_get_ram_ptr(ram_addr_t addr)
1236 {
1237     RAMBlock *block;
1238
1239     /* The list is protected by the iothread lock here.  */
1240     block = ram_list.mru_block;
1241     if (block && addr - block->offset < block->length) {
1242         goto found;
1243     }
1244     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1245         if (addr - block->offset < block->length) {
1246             goto found;
1247         }
1248     }
1249
1250     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1251     abort();
1252
1253 found:
1254     ram_list.mru_block = block;
1255     if (xen_enabled()) {
1256         /* We need to check if the requested address is in the RAM
1257          * because we don't want to map the entire memory in QEMU.
1258          * In that case just map until the end of the page.
1259          */
1260         if (block->offset == 0) {
1261             return xen_map_cache(addr, 0, 0);
1262         } else if (block->host == NULL) {
1263             block->host =
1264                 xen_map_cache(block->offset, block->length, 1);
1265         }
1266     }
1267     return block->host + (addr - block->offset);
1268 }
1269
1270 /* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1271  * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1272  *
1273  * ??? Is this still necessary?
1274  */
1275 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1276 {
1277     RAMBlock *block;
1278
1279     /* The list is protected by the iothread lock here.  */
1280     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1281         if (addr - block->offset < block->length) {
1282             if (xen_enabled()) {
1283                 /* We need to check if the requested address is in the RAM
1284                  * because we don't want to map the entire memory in QEMU.
1285                  * In that case just map until the end of the page.
1286                  */
1287                 if (block->offset == 0) {
1288                     return xen_map_cache(addr, 0, 0);
1289                 } else if (block->host == NULL) {
1290                     block->host =
1291                         xen_map_cache(block->offset, block->length, 1);
1292                 }
1293             }
1294             return block->host + (addr - block->offset);
1295         }
1296     }
1297
1298     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1299     abort();
1300
1301     return NULL;
1302 }
1303
1304 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1305  * but takes a size argument */
1306 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1307 {
1308     if (*size == 0) {
1309         return NULL;
1310     }
1311     if (xen_enabled()) {
1312         return xen_map_cache(addr, *size, 1);
1313     } else {
1314         RAMBlock *block;
1315
1316         QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1317             if (addr - block->offset < block->length) {
1318                 if (addr - block->offset + *size > block->length)
1319                     *size = block->length - addr + block->offset;
1320                 return block->host + (addr - block->offset);
1321             }
1322         }
1323
1324         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1325         abort();
1326     }
1327 }
1328
1329 void qemu_put_ram_ptr(void *addr)
1330 {
1331     trace_qemu_put_ram_ptr(addr);
1332 }
1333
1334 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1335 {
1336     RAMBlock *block;
1337     uint8_t *host = ptr;
1338
1339     if (xen_enabled()) {
1340         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1341         return 0;
1342     }
1343
1344     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1345         /* This case append when the block is not mapped. */
1346         if (block->host == NULL) {
1347             continue;
1348         }
1349         if (host - block->host < block->length) {
1350             *ram_addr = block->offset + (host - block->host);
1351             return 0;
1352         }
1353     }
1354
1355     return -1;
1356 }
1357
1358 /* Some of the softmmu routines need to translate from a host pointer
1359    (typically a TLB entry) back to a ram offset.  */
1360 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1361 {
1362     ram_addr_t ram_addr;
1363
1364     if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1365         fprintf(stderr, "Bad ram pointer %p\n", ptr);
1366         abort();
1367     }
1368     return ram_addr;
1369 }
1370
1371 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
1372                                     unsigned size)
1373 {
1374 #ifdef DEBUG_UNASSIGNED
1375     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
1376 #endif
1377 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1378     cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
1379 #endif
1380     return 0;
1381 }
1382
1383 static void unassigned_mem_write(void *opaque, hwaddr addr,
1384                                  uint64_t val, unsigned size)
1385 {
1386 #ifdef DEBUG_UNASSIGNED
1387     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
1388 #endif
1389 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1390     cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
1391 #endif
1392 }
1393
1394 static const MemoryRegionOps unassigned_mem_ops = {
1395     .read = unassigned_mem_read,
1396     .write = unassigned_mem_write,
1397     .endianness = DEVICE_NATIVE_ENDIAN,
1398 };
1399
1400 static uint64_t error_mem_read(void *opaque, hwaddr addr,
1401                                unsigned size)
1402 {
1403     abort();
1404 }
1405
1406 static void error_mem_write(void *opaque, hwaddr addr,
1407                             uint64_t value, unsigned size)
1408 {
1409     abort();
1410 }
1411
1412 static const MemoryRegionOps error_mem_ops = {
1413     .read = error_mem_read,
1414     .write = error_mem_write,
1415     .endianness = DEVICE_NATIVE_ENDIAN,
1416 };
1417
1418 static const MemoryRegionOps rom_mem_ops = {
1419     .read = error_mem_read,
1420     .write = unassigned_mem_write,
1421     .endianness = DEVICE_NATIVE_ENDIAN,
1422 };
1423
1424 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1425                                uint64_t val, unsigned size)
1426 {
1427     int dirty_flags;
1428     dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1429     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1430 #if !defined(CONFIG_USER_ONLY)
1431         tb_invalidate_phys_page_fast(ram_addr, size);
1432         dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1433 #endif
1434     }
1435     switch (size) {
1436     case 1:
1437         stb_p(qemu_get_ram_ptr(ram_addr), val);
1438         break;
1439     case 2:
1440         stw_p(qemu_get_ram_ptr(ram_addr), val);
1441         break;
1442     case 4:
1443         stl_p(qemu_get_ram_ptr(ram_addr), val);
1444         break;
1445     default:
1446         abort();
1447     }
1448     dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1449     cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1450     /* we remove the notdirty callback only if the code has been
1451        flushed */
1452     if (dirty_flags == 0xff)
1453         tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1454 }
1455
1456 static const MemoryRegionOps notdirty_mem_ops = {
1457     .read = error_mem_read,
1458     .write = notdirty_mem_write,
1459     .endianness = DEVICE_NATIVE_ENDIAN,
1460 };
1461
1462 /* Generate a debug exception if a watchpoint has been hit.  */
1463 static void check_watchpoint(int offset, int len_mask, int flags)
1464 {
1465     CPUArchState *env = cpu_single_env;
1466     target_ulong pc, cs_base;
1467     target_ulong vaddr;
1468     CPUWatchpoint *wp;
1469     int cpu_flags;
1470
1471     if (env->watchpoint_hit) {
1472         /* We re-entered the check after replacing the TB. Now raise
1473          * the debug interrupt so that is will trigger after the
1474          * current instruction. */
1475         cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
1476         return;
1477     }
1478     vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1479     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1480         if ((vaddr == (wp->vaddr & len_mask) ||
1481              (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1482             wp->flags |= BP_WATCHPOINT_HIT;
1483             if (!env->watchpoint_hit) {
1484                 env->watchpoint_hit = wp;
1485                 tb_check_watchpoint(env);
1486                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1487                     env->exception_index = EXCP_DEBUG;
1488                     cpu_loop_exit(env);
1489                 } else {
1490                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1491                     tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1492                     cpu_resume_from_signal(env, NULL);
1493                 }
1494             }
1495         } else {
1496             wp->flags &= ~BP_WATCHPOINT_HIT;
1497         }
1498     }
1499 }
1500
1501 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1502    so these check for a hit then pass through to the normal out-of-line
1503    phys routines.  */
1504 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1505                                unsigned size)
1506 {
1507     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1508     switch (size) {
1509     case 1: return ldub_phys(addr);
1510     case 2: return lduw_phys(addr);
1511     case 4: return ldl_phys(addr);
1512     default: abort();
1513     }
1514 }
1515
1516 static void watch_mem_write(void *opaque, hwaddr addr,
1517                             uint64_t val, unsigned size)
1518 {
1519     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1520     switch (size) {
1521     case 1:
1522         stb_phys(addr, val);
1523         break;
1524     case 2:
1525         stw_phys(addr, val);
1526         break;
1527     case 4:
1528         stl_phys(addr, val);
1529         break;
1530     default: abort();
1531     }
1532 }
1533
1534 static const MemoryRegionOps watch_mem_ops = {
1535     .read = watch_mem_read,
1536     .write = watch_mem_write,
1537     .endianness = DEVICE_NATIVE_ENDIAN,
1538 };
1539
1540 static uint64_t subpage_read(void *opaque, hwaddr addr,
1541                              unsigned len)
1542 {
1543     subpage_t *mmio = opaque;
1544     unsigned int idx = SUBPAGE_IDX(addr);
1545     MemoryRegionSection *section;
1546 #if defined(DEBUG_SUBPAGE)
1547     printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1548            mmio, len, addr, idx);
1549 #endif
1550
1551     section = &phys_sections[mmio->sub_section[idx]];
1552     addr += mmio->base;
1553     addr -= section->offset_within_address_space;
1554     addr += section->offset_within_region;
1555     return io_mem_read(section->mr, addr, len);
1556 }
1557
1558 static void subpage_write(void *opaque, hwaddr addr,
1559                           uint64_t value, unsigned len)
1560 {
1561     subpage_t *mmio = opaque;
1562     unsigned int idx = SUBPAGE_IDX(addr);
1563     MemoryRegionSection *section;
1564 #if defined(DEBUG_SUBPAGE)
1565     printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1566            " idx %d value %"PRIx64"\n",
1567            __func__, mmio, len, addr, idx, value);
1568 #endif
1569
1570     section = &phys_sections[mmio->sub_section[idx]];
1571     addr += mmio->base;
1572     addr -= section->offset_within_address_space;
1573     addr += section->offset_within_region;
1574     io_mem_write(section->mr, addr, value, len);
1575 }
1576
1577 static const MemoryRegionOps subpage_ops = {
1578     .read = subpage_read,
1579     .write = subpage_write,
1580     .endianness = DEVICE_NATIVE_ENDIAN,
1581 };
1582
1583 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1584                                  unsigned size)
1585 {
1586     ram_addr_t raddr = addr;
1587     void *ptr = qemu_get_ram_ptr(raddr);
1588     switch (size) {
1589     case 1: return ldub_p(ptr);
1590     case 2: return lduw_p(ptr);
1591     case 4: return ldl_p(ptr);
1592     default: abort();
1593     }
1594 }
1595
1596 static void subpage_ram_write(void *opaque, hwaddr addr,
1597                               uint64_t value, unsigned size)
1598 {
1599     ram_addr_t raddr = addr;
1600     void *ptr = qemu_get_ram_ptr(raddr);
1601     switch (size) {
1602     case 1: return stb_p(ptr, value);
1603     case 2: return stw_p(ptr, value);
1604     case 4: return stl_p(ptr, value);
1605     default: abort();
1606     }
1607 }
1608
1609 static const MemoryRegionOps subpage_ram_ops = {
1610     .read = subpage_ram_read,
1611     .write = subpage_ram_write,
1612     .endianness = DEVICE_NATIVE_ENDIAN,
1613 };
1614
1615 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1616                              uint16_t section)
1617 {
1618     int idx, eidx;
1619
1620     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1621         return -1;
1622     idx = SUBPAGE_IDX(start);
1623     eidx = SUBPAGE_IDX(end);
1624 #if defined(DEBUG_SUBPAGE)
1625     printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1626            mmio, start, end, idx, eidx, memory);
1627 #endif
1628     if (memory_region_is_ram(phys_sections[section].mr)) {
1629         MemoryRegionSection new_section = phys_sections[section];
1630         new_section.mr = &io_mem_subpage_ram;
1631         section = phys_section_add(&new_section);
1632     }
1633     for (; idx <= eidx; idx++) {
1634         mmio->sub_section[idx] = section;
1635     }
1636
1637     return 0;
1638 }
1639
1640 static subpage_t *subpage_init(hwaddr base)
1641 {
1642     subpage_t *mmio;
1643
1644     mmio = g_malloc0(sizeof(subpage_t));
1645
1646     mmio->base = base;
1647     memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1648                           "subpage", TARGET_PAGE_SIZE);
1649     mmio->iomem.subpage = true;
1650 #if defined(DEBUG_SUBPAGE)
1651     printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1652            mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1653 #endif
1654     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1655
1656     return mmio;
1657 }
1658
1659 static uint16_t dummy_section(MemoryRegion *mr)
1660 {
1661     MemoryRegionSection section = {
1662         .mr = mr,
1663         .offset_within_address_space = 0,
1664         .offset_within_region = 0,
1665         .size = UINT64_MAX,
1666     };
1667
1668     return phys_section_add(&section);
1669 }
1670
1671 MemoryRegion *iotlb_to_region(hwaddr index)
1672 {
1673     return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1674 }
1675
1676 static void io_mem_init(void)
1677 {
1678     memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
1679     memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
1680     memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1681                           "unassigned", UINT64_MAX);
1682     memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1683                           "notdirty", UINT64_MAX);
1684     memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1685                           "subpage-ram", UINT64_MAX);
1686     memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1687                           "watch", UINT64_MAX);
1688 }
1689
1690 static void mem_begin(MemoryListener *listener)
1691 {
1692     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1693
1694     destroy_all_mappings(d);
1695     d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1696 }
1697
1698 static void core_begin(MemoryListener *listener)
1699 {
1700     phys_sections_clear();
1701     phys_section_unassigned = dummy_section(&io_mem_unassigned);
1702     phys_section_notdirty = dummy_section(&io_mem_notdirty);
1703     phys_section_rom = dummy_section(&io_mem_rom);
1704     phys_section_watch = dummy_section(&io_mem_watch);
1705 }
1706
1707 static void tcg_commit(MemoryListener *listener)
1708 {
1709     CPUArchState *env;
1710
1711     /* since each CPU stores ram addresses in its TLB cache, we must
1712        reset the modified entries */
1713     /* XXX: slow ! */
1714     for(env = first_cpu; env != NULL; env = env->next_cpu) {
1715         tlb_flush(env, 1);
1716     }
1717 }
1718
1719 static void core_log_global_start(MemoryListener *listener)
1720 {
1721     cpu_physical_memory_set_dirty_tracking(1);
1722 }
1723
1724 static void core_log_global_stop(MemoryListener *listener)
1725 {
1726     cpu_physical_memory_set_dirty_tracking(0);
1727 }
1728
1729 static void io_region_add(MemoryListener *listener,
1730                           MemoryRegionSection *section)
1731 {
1732     MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1733
1734     mrio->mr = section->mr;
1735     mrio->offset = section->offset_within_region;
1736     iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1737                  section->offset_within_address_space, section->size);
1738     ioport_register(&mrio->iorange);
1739 }
1740
1741 static void io_region_del(MemoryListener *listener,
1742                           MemoryRegionSection *section)
1743 {
1744     isa_unassign_ioport(section->offset_within_address_space, section->size);
1745 }
1746
1747 static MemoryListener core_memory_listener = {
1748     .begin = core_begin,
1749     .log_global_start = core_log_global_start,
1750     .log_global_stop = core_log_global_stop,
1751     .priority = 1,
1752 };
1753
1754 static MemoryListener io_memory_listener = {
1755     .region_add = io_region_add,
1756     .region_del = io_region_del,
1757     .priority = 0,
1758 };
1759
1760 static MemoryListener tcg_memory_listener = {
1761     .commit = tcg_commit,
1762 };
1763
1764 void address_space_init_dispatch(AddressSpace *as)
1765 {
1766     AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1767
1768     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1769     d->listener = (MemoryListener) {
1770         .begin = mem_begin,
1771         .region_add = mem_add,
1772         .region_nop = mem_add,
1773         .priority = 0,
1774     };
1775     as->dispatch = d;
1776     memory_listener_register(&d->listener, as);
1777 }
1778
1779 void address_space_destroy_dispatch(AddressSpace *as)
1780 {
1781     AddressSpaceDispatch *d = as->dispatch;
1782
1783     memory_listener_unregister(&d->listener);
1784     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1785     g_free(d);
1786     as->dispatch = NULL;
1787 }
1788
1789 static void memory_map_init(void)
1790 {
1791     system_memory = g_malloc(sizeof(*system_memory));
1792     memory_region_init(system_memory, "system", INT64_MAX);
1793     address_space_init(&address_space_memory, system_memory);
1794     address_space_memory.name = "memory";
1795
1796     system_io = g_malloc(sizeof(*system_io));
1797     memory_region_init(system_io, "io", 65536);
1798     address_space_init(&address_space_io, system_io);
1799     address_space_io.name = "I/O";
1800
1801     memory_listener_register(&core_memory_listener, &address_space_memory);
1802     memory_listener_register(&io_memory_listener, &address_space_io);
1803     memory_listener_register(&tcg_memory_listener, &address_space_memory);
1804
1805     dma_context_init(&dma_context_memory, &address_space_memory,
1806                      NULL, NULL, NULL);
1807 }
1808
1809 MemoryRegion *get_system_memory(void)
1810 {
1811     return system_memory;
1812 }
1813
1814 MemoryRegion *get_system_io(void)
1815 {
1816     return system_io;
1817 }
1818
1819 #endif /* !defined(CONFIG_USER_ONLY) */
1820
1821 /* physical memory access (slow version, mainly for debug) */
1822 #if defined(CONFIG_USER_ONLY)
1823 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1824                         uint8_t *buf, int len, int is_write)
1825 {
1826     int l, flags;
1827     target_ulong page;
1828     void * p;
1829
1830     while (len > 0) {
1831         page = addr & TARGET_PAGE_MASK;
1832         l = (page + TARGET_PAGE_SIZE) - addr;
1833         if (l > len)
1834             l = len;
1835         flags = page_get_flags(page);
1836         if (!(flags & PAGE_VALID))
1837             return -1;
1838         if (is_write) {
1839             if (!(flags & PAGE_WRITE))
1840                 return -1;
1841             /* XXX: this code should not depend on lock_user */
1842             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1843                 return -1;
1844             memcpy(p, buf, l);
1845             unlock_user(p, addr, l);
1846         } else {
1847             if (!(flags & PAGE_READ))
1848                 return -1;
1849             /* XXX: this code should not depend on lock_user */
1850             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1851                 return -1;
1852             memcpy(buf, p, l);
1853             unlock_user(p, addr, 0);
1854         }
1855         len -= l;
1856         buf += l;
1857         addr += l;
1858     }
1859     return 0;
1860 }
1861
1862 #else
1863
1864 static void invalidate_and_set_dirty(hwaddr addr,
1865                                      hwaddr length)
1866 {
1867     if (!cpu_physical_memory_is_dirty(addr)) {
1868         /* invalidate code */
1869         tb_invalidate_phys_page_range(addr, addr + length, 0);
1870         /* set dirty bit */
1871         cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1872     }
1873     xen_modified_memory(addr, length);
1874 }
1875
1876 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1877                       int len, bool is_write)
1878 {
1879     AddressSpaceDispatch *d = as->dispatch;
1880     int l;
1881     uint8_t *ptr;
1882     uint32_t val;
1883     hwaddr page;
1884     MemoryRegionSection *section;
1885
1886     while (len > 0) {
1887         page = addr & TARGET_PAGE_MASK;
1888         l = (page + TARGET_PAGE_SIZE) - addr;
1889         if (l > len)
1890             l = len;
1891         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1892
1893         if (is_write) {
1894             if (!memory_region_is_ram(section->mr)) {
1895                 hwaddr addr1;
1896                 addr1 = memory_region_section_addr(section, addr);
1897                 /* XXX: could force cpu_single_env to NULL to avoid
1898                    potential bugs */
1899                 if (l >= 4 && ((addr1 & 3) == 0)) {
1900                     /* 32 bit write access */
1901                     val = ldl_p(buf);
1902                     io_mem_write(section->mr, addr1, val, 4);
1903                     l = 4;
1904                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1905                     /* 16 bit write access */
1906                     val = lduw_p(buf);
1907                     io_mem_write(section->mr, addr1, val, 2);
1908                     l = 2;
1909                 } else {
1910                     /* 8 bit write access */
1911                     val = ldub_p(buf);
1912                     io_mem_write(section->mr, addr1, val, 1);
1913                     l = 1;
1914                 }
1915             } else if (!section->readonly) {
1916                 ram_addr_t addr1;
1917                 addr1 = memory_region_get_ram_addr(section->mr)
1918                     + memory_region_section_addr(section, addr);
1919                 /* RAM case */
1920                 ptr = qemu_get_ram_ptr(addr1);
1921                 memcpy(ptr, buf, l);
1922                 invalidate_and_set_dirty(addr1, l);
1923                 qemu_put_ram_ptr(ptr);
1924             }
1925         } else {
1926             if (!(memory_region_is_ram(section->mr) ||
1927                   memory_region_is_romd(section->mr))) {
1928                 hwaddr addr1;
1929                 /* I/O case */
1930                 addr1 = memory_region_section_addr(section, addr);
1931                 if (l >= 4 && ((addr1 & 3) == 0)) {
1932                     /* 32 bit read access */
1933                     val = io_mem_read(section->mr, addr1, 4);
1934                     stl_p(buf, val);
1935                     l = 4;
1936                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1937                     /* 16 bit read access */
1938                     val = io_mem_read(section->mr, addr1, 2);
1939                     stw_p(buf, val);
1940                     l = 2;
1941                 } else {
1942                     /* 8 bit read access */
1943                     val = io_mem_read(section->mr, addr1, 1);
1944                     stb_p(buf, val);
1945                     l = 1;
1946                 }
1947             } else {
1948                 /* RAM case */
1949                 ptr = qemu_get_ram_ptr(section->mr->ram_addr
1950                                        + memory_region_section_addr(section,
1951                                                                     addr));
1952                 memcpy(buf, ptr, l);
1953                 qemu_put_ram_ptr(ptr);
1954             }
1955         }
1956         len -= l;
1957         buf += l;
1958         addr += l;
1959     }
1960 }
1961
1962 void address_space_write(AddressSpace *as, hwaddr addr,
1963                          const uint8_t *buf, int len)
1964 {
1965     address_space_rw(as, addr, (uint8_t *)buf, len, true);
1966 }
1967
1968 /**
1969  * address_space_read: read from an address space.
1970  *
1971  * @as: #AddressSpace to be accessed
1972  * @addr: address within that address space
1973  * @buf: buffer with the data transferred
1974  */
1975 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1976 {
1977     address_space_rw(as, addr, buf, len, false);
1978 }
1979
1980
1981 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1982                             int len, int is_write)
1983 {
1984     return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1985 }
1986
1987 /* used for ROM loading : can write in RAM and ROM */
1988 void cpu_physical_memory_write_rom(hwaddr addr,
1989                                    const uint8_t *buf, int len)
1990 {
1991     AddressSpaceDispatch *d = address_space_memory.dispatch;
1992     int l;
1993     uint8_t *ptr;
1994     hwaddr page;
1995     MemoryRegionSection *section;
1996
1997     while (len > 0) {
1998         page = addr & TARGET_PAGE_MASK;
1999         l = (page + TARGET_PAGE_SIZE) - addr;
2000         if (l > len)
2001             l = len;
2002         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2003
2004         if (!(memory_region_is_ram(section->mr) ||
2005               memory_region_is_romd(section->mr))) {
2006             /* do nothing */
2007         } else {
2008             unsigned long addr1;
2009             addr1 = memory_region_get_ram_addr(section->mr)
2010                 + memory_region_section_addr(section, addr);
2011             /* ROM/RAM case */
2012             ptr = qemu_get_ram_ptr(addr1);
2013             memcpy(ptr, buf, l);
2014             invalidate_and_set_dirty(addr1, l);
2015             qemu_put_ram_ptr(ptr);
2016         }
2017         len -= l;
2018         buf += l;
2019         addr += l;
2020     }
2021 }
2022
2023 typedef struct {
2024     void *buffer;
2025     hwaddr addr;
2026     hwaddr len;
2027 } BounceBuffer;
2028
2029 static BounceBuffer bounce;
2030
2031 typedef struct MapClient {
2032     void *opaque;
2033     void (*callback)(void *opaque);
2034     QLIST_ENTRY(MapClient) link;
2035 } MapClient;
2036
2037 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2038     = QLIST_HEAD_INITIALIZER(map_client_list);
2039
2040 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2041 {
2042     MapClient *client = g_malloc(sizeof(*client));
2043
2044     client->opaque = opaque;
2045     client->callback = callback;
2046     QLIST_INSERT_HEAD(&map_client_list, client, link);
2047     return client;
2048 }
2049
2050 static void cpu_unregister_map_client(void *_client)
2051 {
2052     MapClient *client = (MapClient *)_client;
2053
2054     QLIST_REMOVE(client, link);
2055     g_free(client);
2056 }
2057
2058 static void cpu_notify_map_clients(void)
2059 {
2060     MapClient *client;
2061
2062     while (!QLIST_EMPTY(&map_client_list)) {
2063         client = QLIST_FIRST(&map_client_list);
2064         client->callback(client->opaque);
2065         cpu_unregister_map_client(client);
2066     }
2067 }
2068
2069 /* Map a physical memory region into a host virtual address.
2070  * May map a subset of the requested range, given by and returned in *plen.
2071  * May return NULL if resources needed to perform the mapping are exhausted.
2072  * Use only for reads OR writes - not for read-modify-write operations.
2073  * Use cpu_register_map_client() to know when retrying the map operation is
2074  * likely to succeed.
2075  */
2076 void *address_space_map(AddressSpace *as,
2077                         hwaddr addr,
2078                         hwaddr *plen,
2079                         bool is_write)
2080 {
2081     AddressSpaceDispatch *d = as->dispatch;
2082     hwaddr len = *plen;
2083     hwaddr todo = 0;
2084     int l;
2085     hwaddr page;
2086     MemoryRegionSection *section;
2087     ram_addr_t raddr = RAM_ADDR_MAX;
2088     ram_addr_t rlen;
2089     void *ret;
2090
2091     while (len > 0) {
2092         page = addr & TARGET_PAGE_MASK;
2093         l = (page + TARGET_PAGE_SIZE) - addr;
2094         if (l > len)
2095             l = len;
2096         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2097
2098         if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
2099             if (todo || bounce.buffer) {
2100                 break;
2101             }
2102             bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2103             bounce.addr = addr;
2104             bounce.len = l;
2105             if (!is_write) {
2106                 address_space_read(as, addr, bounce.buffer, l);
2107             }
2108
2109             *plen = l;
2110             return bounce.buffer;
2111         }
2112         if (!todo) {
2113             raddr = memory_region_get_ram_addr(section->mr)
2114                 + memory_region_section_addr(section, addr);
2115         }
2116
2117         len -= l;
2118         addr += l;
2119         todo += l;
2120     }
2121     rlen = todo;
2122     ret = qemu_ram_ptr_length(raddr, &rlen);
2123     *plen = rlen;
2124     return ret;
2125 }
2126
2127 /* Unmaps a memory region previously mapped by address_space_map().
2128  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2129  * the amount of memory that was actually read or written by the caller.
2130  */
2131 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2132                          int is_write, hwaddr access_len)
2133 {
2134     if (buffer != bounce.buffer) {
2135         if (is_write) {
2136             ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2137             while (access_len) {
2138                 unsigned l;
2139                 l = TARGET_PAGE_SIZE;
2140                 if (l > access_len)
2141                     l = access_len;
2142                 invalidate_and_set_dirty(addr1, l);
2143                 addr1 += l;
2144                 access_len -= l;
2145             }
2146         }
2147         if (xen_enabled()) {
2148             xen_invalidate_map_cache_entry(buffer);
2149         }
2150         return;
2151     }
2152     if (is_write) {
2153         address_space_write(as, bounce.addr, bounce.buffer, access_len);
2154     }
2155     qemu_vfree(bounce.buffer);
2156     bounce.buffer = NULL;
2157     cpu_notify_map_clients();
2158 }
2159
2160 void *cpu_physical_memory_map(hwaddr addr,
2161                               hwaddr *plen,
2162                               int is_write)
2163 {
2164     return address_space_map(&address_space_memory, addr, plen, is_write);
2165 }
2166
2167 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2168                                int is_write, hwaddr access_len)
2169 {
2170     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2171 }
2172
2173 /* warning: addr must be aligned */
2174 static inline uint32_t ldl_phys_internal(hwaddr addr,
2175                                          enum device_endian endian)
2176 {
2177     uint8_t *ptr;
2178     uint32_t val;
2179     MemoryRegionSection *section;
2180
2181     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2182
2183     if (!(memory_region_is_ram(section->mr) ||
2184           memory_region_is_romd(section->mr))) {
2185         /* I/O case */
2186         addr = memory_region_section_addr(section, addr);
2187         val = io_mem_read(section->mr, addr, 4);
2188 #if defined(TARGET_WORDS_BIGENDIAN)
2189         if (endian == DEVICE_LITTLE_ENDIAN) {
2190             val = bswap32(val);
2191         }
2192 #else
2193         if (endian == DEVICE_BIG_ENDIAN) {
2194             val = bswap32(val);
2195         }
2196 #endif
2197     } else {
2198         /* RAM case */
2199         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2200                                 & TARGET_PAGE_MASK)
2201                                + memory_region_section_addr(section, addr));
2202         switch (endian) {
2203         case DEVICE_LITTLE_ENDIAN:
2204             val = ldl_le_p(ptr);
2205             break;
2206         case DEVICE_BIG_ENDIAN:
2207             val = ldl_be_p(ptr);
2208             break;
2209         default:
2210             val = ldl_p(ptr);
2211             break;
2212         }
2213     }
2214     return val;
2215 }
2216
2217 uint32_t ldl_phys(hwaddr addr)
2218 {
2219     return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2220 }
2221
2222 uint32_t ldl_le_phys(hwaddr addr)
2223 {
2224     return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2225 }
2226
2227 uint32_t ldl_be_phys(hwaddr addr)
2228 {
2229     return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2230 }
2231
2232 /* warning: addr must be aligned */
2233 static inline uint64_t ldq_phys_internal(hwaddr addr,
2234                                          enum device_endian endian)
2235 {
2236     uint8_t *ptr;
2237     uint64_t val;
2238     MemoryRegionSection *section;
2239
2240     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2241
2242     if (!(memory_region_is_ram(section->mr) ||
2243           memory_region_is_romd(section->mr))) {
2244         /* I/O case */
2245         addr = memory_region_section_addr(section, addr);
2246
2247         /* XXX This is broken when device endian != cpu endian.
2248                Fix and add "endian" variable check */
2249 #ifdef TARGET_WORDS_BIGENDIAN
2250         val = io_mem_read(section->mr, addr, 4) << 32;
2251         val |= io_mem_read(section->mr, addr + 4, 4);
2252 #else
2253         val = io_mem_read(section->mr, addr, 4);
2254         val |= io_mem_read(section->mr, addr + 4, 4) << 32;
2255 #endif
2256     } else {
2257         /* RAM case */
2258         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2259                                 & TARGET_PAGE_MASK)
2260                                + memory_region_section_addr(section, addr));
2261         switch (endian) {
2262         case DEVICE_LITTLE_ENDIAN:
2263             val = ldq_le_p(ptr);
2264             break;
2265         case DEVICE_BIG_ENDIAN:
2266             val = ldq_be_p(ptr);
2267             break;
2268         default:
2269             val = ldq_p(ptr);
2270             break;
2271         }
2272     }
2273     return val;
2274 }
2275
2276 uint64_t ldq_phys(hwaddr addr)
2277 {
2278     return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2279 }
2280
2281 uint64_t ldq_le_phys(hwaddr addr)
2282 {
2283     return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2284 }
2285
2286 uint64_t ldq_be_phys(hwaddr addr)
2287 {
2288     return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2289 }
2290
2291 /* XXX: optimize */
2292 uint32_t ldub_phys(hwaddr addr)
2293 {
2294     uint8_t val;
2295     cpu_physical_memory_read(addr, &val, 1);
2296     return val;
2297 }
2298
2299 /* warning: addr must be aligned */
2300 static inline uint32_t lduw_phys_internal(hwaddr addr,
2301                                           enum device_endian endian)
2302 {
2303     uint8_t *ptr;
2304     uint64_t val;
2305     MemoryRegionSection *section;
2306
2307     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2308
2309     if (!(memory_region_is_ram(section->mr) ||
2310           memory_region_is_romd(section->mr))) {
2311         /* I/O case */
2312         addr = memory_region_section_addr(section, addr);
2313         val = io_mem_read(section->mr, addr, 2);
2314 #if defined(TARGET_WORDS_BIGENDIAN)
2315         if (endian == DEVICE_LITTLE_ENDIAN) {
2316             val = bswap16(val);
2317         }
2318 #else
2319         if (endian == DEVICE_BIG_ENDIAN) {
2320             val = bswap16(val);
2321         }
2322 #endif
2323     } else {
2324         /* RAM case */
2325         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2326                                 & TARGET_PAGE_MASK)
2327                                + memory_region_section_addr(section, addr));
2328         switch (endian) {
2329         case DEVICE_LITTLE_ENDIAN:
2330             val = lduw_le_p(ptr);
2331             break;
2332         case DEVICE_BIG_ENDIAN:
2333             val = lduw_be_p(ptr);
2334             break;
2335         default:
2336             val = lduw_p(ptr);
2337             break;
2338         }
2339     }
2340     return val;
2341 }
2342
2343 uint32_t lduw_phys(hwaddr addr)
2344 {
2345     return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2346 }
2347
2348 uint32_t lduw_le_phys(hwaddr addr)
2349 {
2350     return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2351 }
2352
2353 uint32_t lduw_be_phys(hwaddr addr)
2354 {
2355     return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2356 }
2357
2358 /* warning: addr must be aligned. The ram page is not masked as dirty
2359    and the code inside is not invalidated. It is useful if the dirty
2360    bits are used to track modified PTEs */
2361 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2362 {
2363     uint8_t *ptr;
2364     MemoryRegionSection *section;
2365
2366     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2367
2368     if (!memory_region_is_ram(section->mr) || section->readonly) {
2369         addr = memory_region_section_addr(section, addr);
2370         if (memory_region_is_ram(section->mr)) {
2371             section = &phys_sections[phys_section_rom];
2372         }
2373         io_mem_write(section->mr, addr, val, 4);
2374     } else {
2375         unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
2376                                & TARGET_PAGE_MASK)
2377             + memory_region_section_addr(section, addr);
2378         ptr = qemu_get_ram_ptr(addr1);
2379         stl_p(ptr, val);
2380
2381         if (unlikely(in_migration)) {
2382             if (!cpu_physical_memory_is_dirty(addr1)) {
2383                 /* invalidate code */
2384                 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2385                 /* set dirty bit */
2386                 cpu_physical_memory_set_dirty_flags(
2387                     addr1, (0xff & ~CODE_DIRTY_FLAG));
2388             }
2389         }
2390     }
2391 }
2392
2393 void stq_phys_notdirty(hwaddr addr, uint64_t val)
2394 {
2395     uint8_t *ptr;
2396     MemoryRegionSection *section;
2397
2398     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2399
2400     if (!memory_region_is_ram(section->mr) || section->readonly) {
2401         addr = memory_region_section_addr(section, addr);
2402         if (memory_region_is_ram(section->mr)) {
2403             section = &phys_sections[phys_section_rom];
2404         }
2405 #ifdef TARGET_WORDS_BIGENDIAN
2406         io_mem_write(section->mr, addr, val >> 32, 4);
2407         io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
2408 #else
2409         io_mem_write(section->mr, addr, (uint32_t)val, 4);
2410         io_mem_write(section->mr, addr + 4, val >> 32, 4);
2411 #endif
2412     } else {
2413         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2414                                 & TARGET_PAGE_MASK)
2415                                + memory_region_section_addr(section, addr));
2416         stq_p(ptr, val);
2417     }
2418 }
2419
2420 /* warning: addr must be aligned */
2421 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2422                                      enum device_endian endian)
2423 {
2424     uint8_t *ptr;
2425     MemoryRegionSection *section;
2426
2427     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2428
2429     if (!memory_region_is_ram(section->mr) || section->readonly) {
2430         addr = memory_region_section_addr(section, addr);
2431         if (memory_region_is_ram(section->mr)) {
2432             section = &phys_sections[phys_section_rom];
2433         }
2434 #if defined(TARGET_WORDS_BIGENDIAN)
2435         if (endian == DEVICE_LITTLE_ENDIAN) {
2436             val = bswap32(val);
2437         }
2438 #else
2439         if (endian == DEVICE_BIG_ENDIAN) {
2440             val = bswap32(val);
2441         }
2442 #endif
2443         io_mem_write(section->mr, addr, val, 4);
2444     } else {
2445         unsigned long addr1;
2446         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2447             + memory_region_section_addr(section, addr);
2448         /* RAM case */
2449         ptr = qemu_get_ram_ptr(addr1);
2450         switch (endian) {
2451         case DEVICE_LITTLE_ENDIAN:
2452             stl_le_p(ptr, val);
2453             break;
2454         case DEVICE_BIG_ENDIAN:
2455             stl_be_p(ptr, val);
2456             break;
2457         default:
2458             stl_p(ptr, val);
2459             break;
2460         }
2461         invalidate_and_set_dirty(addr1, 4);
2462     }
2463 }
2464
2465 void stl_phys(hwaddr addr, uint32_t val)
2466 {
2467     stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2468 }
2469
2470 void stl_le_phys(hwaddr addr, uint32_t val)
2471 {
2472     stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2473 }
2474
2475 void stl_be_phys(hwaddr addr, uint32_t val)
2476 {
2477     stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2478 }
2479
2480 /* XXX: optimize */
2481 void stb_phys(hwaddr addr, uint32_t val)
2482 {
2483     uint8_t v = val;
2484     cpu_physical_memory_write(addr, &v, 1);
2485 }
2486
2487 /* warning: addr must be aligned */
2488 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2489                                      enum device_endian endian)
2490 {
2491     uint8_t *ptr;
2492     MemoryRegionSection *section;
2493
2494     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2495
2496     if (!memory_region_is_ram(section->mr) || section->readonly) {
2497         addr = memory_region_section_addr(section, addr);
2498         if (memory_region_is_ram(section->mr)) {
2499             section = &phys_sections[phys_section_rom];
2500         }
2501 #if defined(TARGET_WORDS_BIGENDIAN)
2502         if (endian == DEVICE_LITTLE_ENDIAN) {
2503             val = bswap16(val);
2504         }
2505 #else
2506         if (endian == DEVICE_BIG_ENDIAN) {
2507             val = bswap16(val);
2508         }
2509 #endif
2510         io_mem_write(section->mr, addr, val, 2);
2511     } else {
2512         unsigned long addr1;
2513         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2514             + memory_region_section_addr(section, addr);
2515         /* RAM case */
2516         ptr = qemu_get_ram_ptr(addr1);
2517         switch (endian) {
2518         case DEVICE_LITTLE_ENDIAN:
2519             stw_le_p(ptr, val);
2520             break;
2521         case DEVICE_BIG_ENDIAN:
2522             stw_be_p(ptr, val);
2523             break;
2524         default:
2525             stw_p(ptr, val);
2526             break;
2527         }
2528         invalidate_and_set_dirty(addr1, 2);
2529     }
2530 }
2531
2532 void stw_phys(hwaddr addr, uint32_t val)
2533 {
2534     stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2535 }
2536
2537 void stw_le_phys(hwaddr addr, uint32_t val)
2538 {
2539     stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2540 }
2541
2542 void stw_be_phys(hwaddr addr, uint32_t val)
2543 {
2544     stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2545 }
2546
2547 /* XXX: optimize */
2548 void stq_phys(hwaddr addr, uint64_t val)
2549 {
2550     val = tswap64(val);
2551     cpu_physical_memory_write(addr, &val, 8);
2552 }
2553
2554 void stq_le_phys(hwaddr addr, uint64_t val)
2555 {
2556     val = cpu_to_le64(val);
2557     cpu_physical_memory_write(addr, &val, 8);
2558 }
2559
2560 void stq_be_phys(hwaddr addr, uint64_t val)
2561 {
2562     val = cpu_to_be64(val);
2563     cpu_physical_memory_write(addr, &val, 8);
2564 }
2565
2566 /* virtual memory access for debug (includes writing to ROM) */
2567 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2568                         uint8_t *buf, int len, int is_write)
2569 {
2570     int l;
2571     hwaddr phys_addr;
2572     target_ulong page;
2573
2574     while (len > 0) {
2575         page = addr & TARGET_PAGE_MASK;
2576         phys_addr = cpu_get_phys_page_debug(env, page);
2577         /* if no physical page mapped, return an error */
2578         if (phys_addr == -1)
2579             return -1;
2580         l = (page + TARGET_PAGE_SIZE) - addr;
2581         if (l > len)
2582             l = len;
2583         phys_addr += (addr & ~TARGET_PAGE_MASK);
2584         if (is_write)
2585             cpu_physical_memory_write_rom(phys_addr, buf, l);
2586         else
2587             cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2588         len -= l;
2589         buf += l;
2590         addr += l;
2591     }
2592     return 0;
2593 }
2594 #endif
2595
2596 #if !defined(CONFIG_USER_ONLY)
2597
2598 /*
2599  * A helper function for the _utterly broken_ virtio device model to find out if
2600  * it's running on a big endian machine. Don't do this at home kids!
2601  */
2602 bool virtio_is_big_endian(void);
2603 bool virtio_is_big_endian(void)
2604 {
2605 #if defined(TARGET_WORDS_BIGENDIAN)
2606     return true;
2607 #else
2608     return false;
2609 #endif
2610 }
2611
2612 #endif
2613
2614 #ifndef CONFIG_USER_ONLY
2615 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2616 {
2617     MemoryRegionSection *section;
2618
2619     section = phys_page_find(address_space_memory.dispatch,
2620                              phys_addr >> TARGET_PAGE_BITS);
2621
2622     return !(memory_region_is_ram(section->mr) ||
2623              memory_region_is_romd(section->mr));
2624 }
2625 #endif