exec: move include files to include/exec/
[sdk/emulator/qemu.git] / exec.c
1 /*
2  *  Virtual page mapping
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "qemu-config.h"
37 #include "exec/memory.h"
38 #include "dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "xen-mapcache.h"
44 #include "trace.h"
45 #endif
46
47 #include "exec/cputlb.h"
48 #include "translate-all.h"
49
50 #include "exec/memory-internal.h"
51
52 //#define DEBUG_UNASSIGNED
53 //#define DEBUG_SUBPAGE
54
55 #if !defined(CONFIG_USER_ONLY)
56 int phys_ram_fd;
57 static int in_migration;
58
59 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
60
61 static MemoryRegion *system_memory;
62 static MemoryRegion *system_io;
63
64 AddressSpace address_space_io;
65 AddressSpace address_space_memory;
66 DMAContext dma_context_memory;
67
68 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
69 static MemoryRegion io_mem_subpage_ram;
70
71 #endif
72
73 CPUArchState *first_cpu;
74 /* current CPU in the current thread. It is only valid inside
75    cpu_exec() */
76 DEFINE_TLS(CPUArchState *,cpu_single_env);
77 /* 0 = Do not count executed instructions.
78    1 = Precise instruction counting.
79    2 = Adaptive rate instruction counting.  */
80 int use_icount = 0;
81
82 #if !defined(CONFIG_USER_ONLY)
83
84 static MemoryRegionSection *phys_sections;
85 static unsigned phys_sections_nb, phys_sections_nb_alloc;
86 static uint16_t phys_section_unassigned;
87 static uint16_t phys_section_notdirty;
88 static uint16_t phys_section_rom;
89 static uint16_t phys_section_watch;
90
91 /* Simple allocator for PhysPageEntry nodes */
92 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
93 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
94
95 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
96
97 static void io_mem_init(void);
98 static void memory_map_init(void);
99 static void *qemu_safe_ram_ptr(ram_addr_t addr);
100
101 static MemoryRegion io_mem_watch;
102 #endif
103
104 #if !defined(CONFIG_USER_ONLY)
105
106 static void phys_map_node_reserve(unsigned nodes)
107 {
108     if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
109         typedef PhysPageEntry Node[L2_SIZE];
110         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
111         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
112                                       phys_map_nodes_nb + nodes);
113         phys_map_nodes = g_renew(Node, phys_map_nodes,
114                                  phys_map_nodes_nb_alloc);
115     }
116 }
117
118 static uint16_t phys_map_node_alloc(void)
119 {
120     unsigned i;
121     uint16_t ret;
122
123     ret = phys_map_nodes_nb++;
124     assert(ret != PHYS_MAP_NODE_NIL);
125     assert(ret != phys_map_nodes_nb_alloc);
126     for (i = 0; i < L2_SIZE; ++i) {
127         phys_map_nodes[ret][i].is_leaf = 0;
128         phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
129     }
130     return ret;
131 }
132
133 static void phys_map_nodes_reset(void)
134 {
135     phys_map_nodes_nb = 0;
136 }
137
138
139 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
140                                 hwaddr *nb, uint16_t leaf,
141                                 int level)
142 {
143     PhysPageEntry *p;
144     int i;
145     hwaddr step = (hwaddr)1 << (level * L2_BITS);
146
147     if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
148         lp->ptr = phys_map_node_alloc();
149         p = phys_map_nodes[lp->ptr];
150         if (level == 0) {
151             for (i = 0; i < L2_SIZE; i++) {
152                 p[i].is_leaf = 1;
153                 p[i].ptr = phys_section_unassigned;
154             }
155         }
156     } else {
157         p = phys_map_nodes[lp->ptr];
158     }
159     lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
160
161     while (*nb && lp < &p[L2_SIZE]) {
162         if ((*index & (step - 1)) == 0 && *nb >= step) {
163             lp->is_leaf = true;
164             lp->ptr = leaf;
165             *index += step;
166             *nb -= step;
167         } else {
168             phys_page_set_level(lp, index, nb, leaf, level - 1);
169         }
170         ++lp;
171     }
172 }
173
174 static void phys_page_set(AddressSpaceDispatch *d,
175                           hwaddr index, hwaddr nb,
176                           uint16_t leaf)
177 {
178     /* Wildly overreserve - it doesn't matter much. */
179     phys_map_node_reserve(3 * P_L2_LEVELS);
180
181     phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
182 }
183
184 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
185 {
186     PhysPageEntry lp = d->phys_map;
187     PhysPageEntry *p;
188     int i;
189     uint16_t s_index = phys_section_unassigned;
190
191     for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
192         if (lp.ptr == PHYS_MAP_NODE_NIL) {
193             goto not_found;
194         }
195         p = phys_map_nodes[lp.ptr];
196         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
197     }
198
199     s_index = lp.ptr;
200 not_found:
201     return &phys_sections[s_index];
202 }
203
204 bool memory_region_is_unassigned(MemoryRegion *mr)
205 {
206     return mr != &io_mem_ram && mr != &io_mem_rom
207         && mr != &io_mem_notdirty && !mr->rom_device
208         && mr != &io_mem_watch;
209 }
210 #endif
211
212 void cpu_exec_init_all(void)
213 {
214 #if !defined(CONFIG_USER_ONLY)
215     memory_map_init();
216     io_mem_init();
217 #endif
218 }
219
220 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
221
222 static int cpu_common_post_load(void *opaque, int version_id)
223 {
224     CPUArchState *env = opaque;
225
226     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
227        version_id is increased. */
228     env->interrupt_request &= ~0x01;
229     tlb_flush(env, 1);
230
231     return 0;
232 }
233
234 static const VMStateDescription vmstate_cpu_common = {
235     .name = "cpu_common",
236     .version_id = 1,
237     .minimum_version_id = 1,
238     .minimum_version_id_old = 1,
239     .post_load = cpu_common_post_load,
240     .fields      = (VMStateField []) {
241         VMSTATE_UINT32(halted, CPUArchState),
242         VMSTATE_UINT32(interrupt_request, CPUArchState),
243         VMSTATE_END_OF_LIST()
244     }
245 };
246 #endif
247
248 CPUArchState *qemu_get_cpu(int cpu)
249 {
250     CPUArchState *env = first_cpu;
251
252     while (env) {
253         if (env->cpu_index == cpu)
254             break;
255         env = env->next_cpu;
256     }
257
258     return env;
259 }
260
261 void cpu_exec_init(CPUArchState *env)
262 {
263 #ifndef CONFIG_USER_ONLY
264     CPUState *cpu = ENV_GET_CPU(env);
265 #endif
266     CPUArchState **penv;
267     int cpu_index;
268
269 #if defined(CONFIG_USER_ONLY)
270     cpu_list_lock();
271 #endif
272     env->next_cpu = NULL;
273     penv = &first_cpu;
274     cpu_index = 0;
275     while (*penv != NULL) {
276         penv = &(*penv)->next_cpu;
277         cpu_index++;
278     }
279     env->cpu_index = cpu_index;
280     env->numa_node = 0;
281     QTAILQ_INIT(&env->breakpoints);
282     QTAILQ_INIT(&env->watchpoints);
283 #ifndef CONFIG_USER_ONLY
284     cpu->thread_id = qemu_get_thread_id();
285 #endif
286     *penv = env;
287 #if defined(CONFIG_USER_ONLY)
288     cpu_list_unlock();
289 #endif
290 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
291     vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
292     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
293                     cpu_save, cpu_load, env);
294 #endif
295 }
296
297 #if defined(TARGET_HAS_ICE)
298 #if defined(CONFIG_USER_ONLY)
299 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
300 {
301     tb_invalidate_phys_page_range(pc, pc + 1, 0);
302 }
303 #else
304 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
305 {
306     tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
307             (pc & ~TARGET_PAGE_MASK));
308 }
309 #endif
310 #endif /* TARGET_HAS_ICE */
311
312 #if defined(CONFIG_USER_ONLY)
313 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
314
315 {
316 }
317
318 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
319                           int flags, CPUWatchpoint **watchpoint)
320 {
321     return -ENOSYS;
322 }
323 #else
324 /* Add a watchpoint.  */
325 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
326                           int flags, CPUWatchpoint **watchpoint)
327 {
328     target_ulong len_mask = ~(len - 1);
329     CPUWatchpoint *wp;
330
331     /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
332     if ((len & (len - 1)) || (addr & ~len_mask) ||
333             len == 0 || len > TARGET_PAGE_SIZE) {
334         fprintf(stderr, "qemu: tried to set invalid watchpoint at "
335                 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
336         return -EINVAL;
337     }
338     wp = g_malloc(sizeof(*wp));
339
340     wp->vaddr = addr;
341     wp->len_mask = len_mask;
342     wp->flags = flags;
343
344     /* keep all GDB-injected watchpoints in front */
345     if (flags & BP_GDB)
346         QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
347     else
348         QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
349
350     tlb_flush_page(env, addr);
351
352     if (watchpoint)
353         *watchpoint = wp;
354     return 0;
355 }
356
357 /* Remove a specific watchpoint.  */
358 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
359                           int flags)
360 {
361     target_ulong len_mask = ~(len - 1);
362     CPUWatchpoint *wp;
363
364     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
365         if (addr == wp->vaddr && len_mask == wp->len_mask
366                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
367             cpu_watchpoint_remove_by_ref(env, wp);
368             return 0;
369         }
370     }
371     return -ENOENT;
372 }
373
374 /* Remove a specific watchpoint by reference.  */
375 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
376 {
377     QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
378
379     tlb_flush_page(env, watchpoint->vaddr);
380
381     g_free(watchpoint);
382 }
383
384 /* Remove all matching watchpoints.  */
385 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
386 {
387     CPUWatchpoint *wp, *next;
388
389     QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
390         if (wp->flags & mask)
391             cpu_watchpoint_remove_by_ref(env, wp);
392     }
393 }
394 #endif
395
396 /* Add a breakpoint.  */
397 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
398                           CPUBreakpoint **breakpoint)
399 {
400 #if defined(TARGET_HAS_ICE)
401     CPUBreakpoint *bp;
402
403     bp = g_malloc(sizeof(*bp));
404
405     bp->pc = pc;
406     bp->flags = flags;
407
408     /* keep all GDB-injected breakpoints in front */
409     if (flags & BP_GDB)
410         QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
411     else
412         QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
413
414     breakpoint_invalidate(env, pc);
415
416     if (breakpoint)
417         *breakpoint = bp;
418     return 0;
419 #else
420     return -ENOSYS;
421 #endif
422 }
423
424 /* Remove a specific breakpoint.  */
425 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
426 {
427 #if defined(TARGET_HAS_ICE)
428     CPUBreakpoint *bp;
429
430     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
431         if (bp->pc == pc && bp->flags == flags) {
432             cpu_breakpoint_remove_by_ref(env, bp);
433             return 0;
434         }
435     }
436     return -ENOENT;
437 #else
438     return -ENOSYS;
439 #endif
440 }
441
442 /* Remove a specific breakpoint by reference.  */
443 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
444 {
445 #if defined(TARGET_HAS_ICE)
446     QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
447
448     breakpoint_invalidate(env, breakpoint->pc);
449
450     g_free(breakpoint);
451 #endif
452 }
453
454 /* Remove all matching breakpoints. */
455 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
456 {
457 #if defined(TARGET_HAS_ICE)
458     CPUBreakpoint *bp, *next;
459
460     QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
461         if (bp->flags & mask)
462             cpu_breakpoint_remove_by_ref(env, bp);
463     }
464 #endif
465 }
466
467 /* enable or disable single step mode. EXCP_DEBUG is returned by the
468    CPU loop after each instruction */
469 void cpu_single_step(CPUArchState *env, int enabled)
470 {
471 #if defined(TARGET_HAS_ICE)
472     if (env->singlestep_enabled != enabled) {
473         env->singlestep_enabled = enabled;
474         if (kvm_enabled())
475             kvm_update_guest_debug(env, 0);
476         else {
477             /* must flush all the translated code to avoid inconsistencies */
478             /* XXX: only flush what is necessary */
479             tb_flush(env);
480         }
481     }
482 #endif
483 }
484
485 void cpu_reset_interrupt(CPUArchState *env, int mask)
486 {
487     env->interrupt_request &= ~mask;
488 }
489
490 void cpu_exit(CPUArchState *env)
491 {
492     env->exit_request = 1;
493     cpu_unlink_tb(env);
494 }
495
496 void cpu_abort(CPUArchState *env, const char *fmt, ...)
497 {
498     va_list ap;
499     va_list ap2;
500
501     va_start(ap, fmt);
502     va_copy(ap2, ap);
503     fprintf(stderr, "qemu: fatal: ");
504     vfprintf(stderr, fmt, ap);
505     fprintf(stderr, "\n");
506     cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
507     if (qemu_log_enabled()) {
508         qemu_log("qemu: fatal: ");
509         qemu_log_vprintf(fmt, ap2);
510         qemu_log("\n");
511         log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
512         qemu_log_flush();
513         qemu_log_close();
514     }
515     va_end(ap2);
516     va_end(ap);
517 #if defined(CONFIG_USER_ONLY)
518     {
519         struct sigaction act;
520         sigfillset(&act.sa_mask);
521         act.sa_handler = SIG_DFL;
522         sigaction(SIGABRT, &act, NULL);
523     }
524 #endif
525     abort();
526 }
527
528 CPUArchState *cpu_copy(CPUArchState *env)
529 {
530     CPUArchState *new_env = cpu_init(env->cpu_model_str);
531     CPUArchState *next_cpu = new_env->next_cpu;
532     int cpu_index = new_env->cpu_index;
533 #if defined(TARGET_HAS_ICE)
534     CPUBreakpoint *bp;
535     CPUWatchpoint *wp;
536 #endif
537
538     memcpy(new_env, env, sizeof(CPUArchState));
539
540     /* Preserve chaining and index. */
541     new_env->next_cpu = next_cpu;
542     new_env->cpu_index = cpu_index;
543
544     /* Clone all break/watchpoints.
545        Note: Once we support ptrace with hw-debug register access, make sure
546        BP_CPU break/watchpoints are handled correctly on clone. */
547     QTAILQ_INIT(&env->breakpoints);
548     QTAILQ_INIT(&env->watchpoints);
549 #if defined(TARGET_HAS_ICE)
550     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
551         cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
552     }
553     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
554         cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
555                               wp->flags, NULL);
556     }
557 #endif
558
559     return new_env;
560 }
561
562 #if !defined(CONFIG_USER_ONLY)
563 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
564                                       uintptr_t length)
565 {
566     uintptr_t start1;
567
568     /* we modify the TLB cache so that the dirty bit will be set again
569        when accessing the range */
570     start1 = (uintptr_t)qemu_safe_ram_ptr(start);
571     /* Check that we don't span multiple blocks - this breaks the
572        address comparisons below.  */
573     if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
574             != (end - 1) - start) {
575         abort();
576     }
577     cpu_tlb_reset_dirty_all(start1, length);
578
579 }
580
581 /* Note: start and end must be within the same ram block.  */
582 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
583                                      int dirty_flags)
584 {
585     uintptr_t length;
586
587     start &= TARGET_PAGE_MASK;
588     end = TARGET_PAGE_ALIGN(end);
589
590     length = end - start;
591     if (length == 0)
592         return;
593     cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
594
595     if (tcg_enabled()) {
596         tlb_reset_dirty_range_all(start, end, length);
597     }
598 }
599
600 static int cpu_physical_memory_set_dirty_tracking(int enable)
601 {
602     int ret = 0;
603     in_migration = enable;
604     return ret;
605 }
606
607 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
608                                                    MemoryRegionSection *section,
609                                                    target_ulong vaddr,
610                                                    hwaddr paddr,
611                                                    int prot,
612                                                    target_ulong *address)
613 {
614     hwaddr iotlb;
615     CPUWatchpoint *wp;
616
617     if (memory_region_is_ram(section->mr)) {
618         /* Normal RAM.  */
619         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
620             + memory_region_section_addr(section, paddr);
621         if (!section->readonly) {
622             iotlb |= phys_section_notdirty;
623         } else {
624             iotlb |= phys_section_rom;
625         }
626     } else {
627         /* IO handlers are currently passed a physical address.
628            It would be nice to pass an offset from the base address
629            of that region.  This would avoid having to special case RAM,
630            and avoid full address decoding in every device.
631            We can't use the high bits of pd for this because
632            IO_MEM_ROMD uses these as a ram address.  */
633         iotlb = section - phys_sections;
634         iotlb += memory_region_section_addr(section, paddr);
635     }
636
637     /* Make accesses to pages with watchpoints go via the
638        watchpoint trap routines.  */
639     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
640         if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
641             /* Avoid trapping reads of pages with a write breakpoint. */
642             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
643                 iotlb = phys_section_watch + paddr;
644                 *address |= TLB_MMIO;
645                 break;
646             }
647         }
648     }
649
650     return iotlb;
651 }
652 #endif /* defined(CONFIG_USER_ONLY) */
653
654 #if !defined(CONFIG_USER_ONLY)
655
656 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
657 typedef struct subpage_t {
658     MemoryRegion iomem;
659     hwaddr base;
660     uint16_t sub_section[TARGET_PAGE_SIZE];
661 } subpage_t;
662
663 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
664                              uint16_t section);
665 static subpage_t *subpage_init(hwaddr base);
666 static void destroy_page_desc(uint16_t section_index)
667 {
668     MemoryRegionSection *section = &phys_sections[section_index];
669     MemoryRegion *mr = section->mr;
670
671     if (mr->subpage) {
672         subpage_t *subpage = container_of(mr, subpage_t, iomem);
673         memory_region_destroy(&subpage->iomem);
674         g_free(subpage);
675     }
676 }
677
678 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
679 {
680     unsigned i;
681     PhysPageEntry *p;
682
683     if (lp->ptr == PHYS_MAP_NODE_NIL) {
684         return;
685     }
686
687     p = phys_map_nodes[lp->ptr];
688     for (i = 0; i < L2_SIZE; ++i) {
689         if (!p[i].is_leaf) {
690             destroy_l2_mapping(&p[i], level - 1);
691         } else {
692             destroy_page_desc(p[i].ptr);
693         }
694     }
695     lp->is_leaf = 0;
696     lp->ptr = PHYS_MAP_NODE_NIL;
697 }
698
699 static void destroy_all_mappings(AddressSpaceDispatch *d)
700 {
701     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
702     phys_map_nodes_reset();
703 }
704
705 static uint16_t phys_section_add(MemoryRegionSection *section)
706 {
707     if (phys_sections_nb == phys_sections_nb_alloc) {
708         phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
709         phys_sections = g_renew(MemoryRegionSection, phys_sections,
710                                 phys_sections_nb_alloc);
711     }
712     phys_sections[phys_sections_nb] = *section;
713     return phys_sections_nb++;
714 }
715
716 static void phys_sections_clear(void)
717 {
718     phys_sections_nb = 0;
719 }
720
721 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
722 {
723     subpage_t *subpage;
724     hwaddr base = section->offset_within_address_space
725         & TARGET_PAGE_MASK;
726     MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
727     MemoryRegionSection subsection = {
728         .offset_within_address_space = base,
729         .size = TARGET_PAGE_SIZE,
730     };
731     hwaddr start, end;
732
733     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
734
735     if (!(existing->mr->subpage)) {
736         subpage = subpage_init(base);
737         subsection.mr = &subpage->iomem;
738         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
739                       phys_section_add(&subsection));
740     } else {
741         subpage = container_of(existing->mr, subpage_t, iomem);
742     }
743     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
744     end = start + section->size - 1;
745     subpage_register(subpage, start, end, phys_section_add(section));
746 }
747
748
749 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
750 {
751     hwaddr start_addr = section->offset_within_address_space;
752     ram_addr_t size = section->size;
753     hwaddr addr;
754     uint16_t section_index = phys_section_add(section);
755
756     assert(size);
757
758     addr = start_addr;
759     phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
760                   section_index);
761 }
762
763 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
764 {
765     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
766     MemoryRegionSection now = *section, remain = *section;
767
768     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
769         || (now.size < TARGET_PAGE_SIZE)) {
770         now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
771                        - now.offset_within_address_space,
772                        now.size);
773         register_subpage(d, &now);
774         remain.size -= now.size;
775         remain.offset_within_address_space += now.size;
776         remain.offset_within_region += now.size;
777     }
778     while (remain.size >= TARGET_PAGE_SIZE) {
779         now = remain;
780         if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
781             now.size = TARGET_PAGE_SIZE;
782             register_subpage(d, &now);
783         } else {
784             now.size &= TARGET_PAGE_MASK;
785             register_multipage(d, &now);
786         }
787         remain.size -= now.size;
788         remain.offset_within_address_space += now.size;
789         remain.offset_within_region += now.size;
790     }
791     now = remain;
792     if (now.size) {
793         register_subpage(d, &now);
794     }
795 }
796
797 void qemu_flush_coalesced_mmio_buffer(void)
798 {
799     if (kvm_enabled())
800         kvm_flush_coalesced_mmio_buffer();
801 }
802
803 #if defined(__linux__) && !defined(TARGET_S390X)
804
805 #include <sys/vfs.h>
806
807 #define HUGETLBFS_MAGIC       0x958458f6
808
809 static long gethugepagesize(const char *path)
810 {
811     struct statfs fs;
812     int ret;
813
814     do {
815         ret = statfs(path, &fs);
816     } while (ret != 0 && errno == EINTR);
817
818     if (ret != 0) {
819         perror(path);
820         return 0;
821     }
822
823     if (fs.f_type != HUGETLBFS_MAGIC)
824         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
825
826     return fs.f_bsize;
827 }
828
829 static void *file_ram_alloc(RAMBlock *block,
830                             ram_addr_t memory,
831                             const char *path)
832 {
833     char *filename;
834     void *area;
835     int fd;
836 #ifdef MAP_POPULATE
837     int flags;
838 #endif
839     unsigned long hpagesize;
840
841     hpagesize = gethugepagesize(path);
842     if (!hpagesize) {
843         return NULL;
844     }
845
846     if (memory < hpagesize) {
847         return NULL;
848     }
849
850     if (kvm_enabled() && !kvm_has_sync_mmu()) {
851         fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
852         return NULL;
853     }
854
855     if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
856         return NULL;
857     }
858
859     fd = mkstemp(filename);
860     if (fd < 0) {
861         perror("unable to create backing store for hugepages");
862         free(filename);
863         return NULL;
864     }
865     unlink(filename);
866     free(filename);
867
868     memory = (memory+hpagesize-1) & ~(hpagesize-1);
869
870     /*
871      * ftruncate is not supported by hugetlbfs in older
872      * hosts, so don't bother bailing out on errors.
873      * If anything goes wrong with it under other filesystems,
874      * mmap will fail.
875      */
876     if (ftruncate(fd, memory))
877         perror("ftruncate");
878
879 #ifdef MAP_POPULATE
880     /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
881      * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
882      * to sidestep this quirk.
883      */
884     flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
885     area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
886 #else
887     area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
888 #endif
889     if (area == MAP_FAILED) {
890         perror("file_ram_alloc: can't mmap RAM pages");
891         close(fd);
892         return (NULL);
893     }
894     block->fd = fd;
895     return area;
896 }
897 #endif
898
899 static ram_addr_t find_ram_offset(ram_addr_t size)
900 {
901     RAMBlock *block, *next_block;
902     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
903
904     if (QLIST_EMPTY(&ram_list.blocks))
905         return 0;
906
907     QLIST_FOREACH(block, &ram_list.blocks, next) {
908         ram_addr_t end, next = RAM_ADDR_MAX;
909
910         end = block->offset + block->length;
911
912         QLIST_FOREACH(next_block, &ram_list.blocks, next) {
913             if (next_block->offset >= end) {
914                 next = MIN(next, next_block->offset);
915             }
916         }
917         if (next - end >= size && next - end < mingap) {
918             offset = end;
919             mingap = next - end;
920         }
921     }
922
923     if (offset == RAM_ADDR_MAX) {
924         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
925                 (uint64_t)size);
926         abort();
927     }
928
929     return offset;
930 }
931
932 ram_addr_t last_ram_offset(void)
933 {
934     RAMBlock *block;
935     ram_addr_t last = 0;
936
937     QLIST_FOREACH(block, &ram_list.blocks, next)
938         last = MAX(last, block->offset + block->length);
939
940     return last;
941 }
942
943 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
944 {
945     int ret;
946     QemuOpts *machine_opts;
947
948     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
949     machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
950     if (machine_opts &&
951         !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
952         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
953         if (ret) {
954             perror("qemu_madvise");
955             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
956                             "but dump_guest_core=off specified\n");
957         }
958     }
959 }
960
961 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
962 {
963     RAMBlock *new_block, *block;
964
965     new_block = NULL;
966     QLIST_FOREACH(block, &ram_list.blocks, next) {
967         if (block->offset == addr) {
968             new_block = block;
969             break;
970         }
971     }
972     assert(new_block);
973     assert(!new_block->idstr[0]);
974
975     if (dev) {
976         char *id = qdev_get_dev_path(dev);
977         if (id) {
978             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
979             g_free(id);
980         }
981     }
982     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
983
984     QLIST_FOREACH(block, &ram_list.blocks, next) {
985         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
986             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
987                     new_block->idstr);
988             abort();
989         }
990     }
991 }
992
993 static int memory_try_enable_merging(void *addr, size_t len)
994 {
995     QemuOpts *opts;
996
997     opts = qemu_opts_find(qemu_find_opts("machine"), 0);
998     if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
999         /* disabled by the user */
1000         return 0;
1001     }
1002
1003     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1004 }
1005
1006 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1007                                    MemoryRegion *mr)
1008 {
1009     RAMBlock *new_block;
1010
1011     size = TARGET_PAGE_ALIGN(size);
1012     new_block = g_malloc0(sizeof(*new_block));
1013
1014     new_block->mr = mr;
1015     new_block->offset = find_ram_offset(size);
1016     if (host) {
1017         new_block->host = host;
1018         new_block->flags |= RAM_PREALLOC_MASK;
1019     } else {
1020         if (mem_path) {
1021 #if defined (__linux__) && !defined(TARGET_S390X)
1022             new_block->host = file_ram_alloc(new_block, size, mem_path);
1023             if (!new_block->host) {
1024                 new_block->host = qemu_vmalloc(size);
1025                 memory_try_enable_merging(new_block->host, size);
1026             }
1027 #else
1028             fprintf(stderr, "-mem-path option unsupported\n");
1029             exit(1);
1030 #endif
1031         } else {
1032             if (xen_enabled()) {
1033                 xen_ram_alloc(new_block->offset, size, mr);
1034             } else if (kvm_enabled()) {
1035                 /* some s390/kvm configurations have special constraints */
1036                 new_block->host = kvm_vmalloc(size);
1037             } else {
1038                 new_block->host = qemu_vmalloc(size);
1039             }
1040             memory_try_enable_merging(new_block->host, size);
1041         }
1042     }
1043     new_block->length = size;
1044
1045     QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
1046
1047     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1048                                        last_ram_offset() >> TARGET_PAGE_BITS);
1049     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1050            0, size >> TARGET_PAGE_BITS);
1051     cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1052
1053     qemu_ram_setup_dump(new_block->host, size);
1054     qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1055
1056     if (kvm_enabled())
1057         kvm_setup_guest_memory(new_block->host, size);
1058
1059     return new_block->offset;
1060 }
1061
1062 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1063 {
1064     return qemu_ram_alloc_from_ptr(size, NULL, mr);
1065 }
1066
1067 void qemu_ram_free_from_ptr(ram_addr_t addr)
1068 {
1069     RAMBlock *block;
1070
1071     QLIST_FOREACH(block, &ram_list.blocks, next) {
1072         if (addr == block->offset) {
1073             QLIST_REMOVE(block, next);
1074             g_free(block);
1075             return;
1076         }
1077     }
1078 }
1079
1080 void qemu_ram_free(ram_addr_t addr)
1081 {
1082     RAMBlock *block;
1083
1084     QLIST_FOREACH(block, &ram_list.blocks, next) {
1085         if (addr == block->offset) {
1086             QLIST_REMOVE(block, next);
1087             if (block->flags & RAM_PREALLOC_MASK) {
1088                 ;
1089             } else if (mem_path) {
1090 #if defined (__linux__) && !defined(TARGET_S390X)
1091                 if (block->fd) {
1092                     munmap(block->host, block->length);
1093                     close(block->fd);
1094                 } else {
1095                     qemu_vfree(block->host);
1096                 }
1097 #else
1098                 abort();
1099 #endif
1100             } else {
1101 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1102                 munmap(block->host, block->length);
1103 #else
1104                 if (xen_enabled()) {
1105                     xen_invalidate_map_cache_entry(block->host);
1106                 } else {
1107                     qemu_vfree(block->host);
1108                 }
1109 #endif
1110             }
1111             g_free(block);
1112             return;
1113         }
1114     }
1115
1116 }
1117
1118 #ifndef _WIN32
1119 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1120 {
1121     RAMBlock *block;
1122     ram_addr_t offset;
1123     int flags;
1124     void *area, *vaddr;
1125
1126     QLIST_FOREACH(block, &ram_list.blocks, next) {
1127         offset = addr - block->offset;
1128         if (offset < block->length) {
1129             vaddr = block->host + offset;
1130             if (block->flags & RAM_PREALLOC_MASK) {
1131                 ;
1132             } else {
1133                 flags = MAP_FIXED;
1134                 munmap(vaddr, length);
1135                 if (mem_path) {
1136 #if defined(__linux__) && !defined(TARGET_S390X)
1137                     if (block->fd) {
1138 #ifdef MAP_POPULATE
1139                         flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1140                             MAP_PRIVATE;
1141 #else
1142                         flags |= MAP_PRIVATE;
1143 #endif
1144                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1145                                     flags, block->fd, offset);
1146                     } else {
1147                         flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1148                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1149                                     flags, -1, 0);
1150                     }
1151 #else
1152                     abort();
1153 #endif
1154                 } else {
1155 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1156                     flags |= MAP_SHARED | MAP_ANONYMOUS;
1157                     area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1158                                 flags, -1, 0);
1159 #else
1160                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1161                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1162                                 flags, -1, 0);
1163 #endif
1164                 }
1165                 if (area != vaddr) {
1166                     fprintf(stderr, "Could not remap addr: "
1167                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1168                             length, addr);
1169                     exit(1);
1170                 }
1171                 memory_try_enable_merging(vaddr, length);
1172                 qemu_ram_setup_dump(vaddr, length);
1173             }
1174             return;
1175         }
1176     }
1177 }
1178 #endif /* !_WIN32 */
1179
1180 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1181    With the exception of the softmmu code in this file, this should
1182    only be used for local memory (e.g. video ram) that the device owns,
1183    and knows it isn't going to access beyond the end of the block.
1184
1185    It should not be used for general purpose DMA.
1186    Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1187  */
1188 void *qemu_get_ram_ptr(ram_addr_t addr)
1189 {
1190     RAMBlock *block;
1191
1192     QLIST_FOREACH(block, &ram_list.blocks, next) {
1193         if (addr - block->offset < block->length) {
1194             /* Move this entry to to start of the list.  */
1195             if (block != QLIST_FIRST(&ram_list.blocks)) {
1196                 QLIST_REMOVE(block, next);
1197                 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
1198             }
1199             if (xen_enabled()) {
1200                 /* We need to check if the requested address is in the RAM
1201                  * because we don't want to map the entire memory in QEMU.
1202                  * In that case just map until the end of the page.
1203                  */
1204                 if (block->offset == 0) {
1205                     return xen_map_cache(addr, 0, 0);
1206                 } else if (block->host == NULL) {
1207                     block->host =
1208                         xen_map_cache(block->offset, block->length, 1);
1209                 }
1210             }
1211             return block->host + (addr - block->offset);
1212         }
1213     }
1214
1215     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1216     abort();
1217
1218     return NULL;
1219 }
1220
1221 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1222  * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
1223  */
1224 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1225 {
1226     RAMBlock *block;
1227
1228     QLIST_FOREACH(block, &ram_list.blocks, next) {
1229         if (addr - block->offset < block->length) {
1230             if (xen_enabled()) {
1231                 /* We need to check if the requested address is in the RAM
1232                  * because we don't want to map the entire memory in QEMU.
1233                  * In that case just map until the end of the page.
1234                  */
1235                 if (block->offset == 0) {
1236                     return xen_map_cache(addr, 0, 0);
1237                 } else if (block->host == NULL) {
1238                     block->host =
1239                         xen_map_cache(block->offset, block->length, 1);
1240                 }
1241             }
1242             return block->host + (addr - block->offset);
1243         }
1244     }
1245
1246     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1247     abort();
1248
1249     return NULL;
1250 }
1251
1252 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1253  * but takes a size argument */
1254 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1255 {
1256     if (*size == 0) {
1257         return NULL;
1258     }
1259     if (xen_enabled()) {
1260         return xen_map_cache(addr, *size, 1);
1261     } else {
1262         RAMBlock *block;
1263
1264         QLIST_FOREACH(block, &ram_list.blocks, next) {
1265             if (addr - block->offset < block->length) {
1266                 if (addr - block->offset + *size > block->length)
1267                     *size = block->length - addr + block->offset;
1268                 return block->host + (addr - block->offset);
1269             }
1270         }
1271
1272         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1273         abort();
1274     }
1275 }
1276
1277 void qemu_put_ram_ptr(void *addr)
1278 {
1279     trace_qemu_put_ram_ptr(addr);
1280 }
1281
1282 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1283 {
1284     RAMBlock *block;
1285     uint8_t *host = ptr;
1286
1287     if (xen_enabled()) {
1288         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1289         return 0;
1290     }
1291
1292     QLIST_FOREACH(block, &ram_list.blocks, next) {
1293         /* This case append when the block is not mapped. */
1294         if (block->host == NULL) {
1295             continue;
1296         }
1297         if (host - block->host < block->length) {
1298             *ram_addr = block->offset + (host - block->host);
1299             return 0;
1300         }
1301     }
1302
1303     return -1;
1304 }
1305
1306 /* Some of the softmmu routines need to translate from a host pointer
1307    (typically a TLB entry) back to a ram offset.  */
1308 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1309 {
1310     ram_addr_t ram_addr;
1311
1312     if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1313         fprintf(stderr, "Bad ram pointer %p\n", ptr);
1314         abort();
1315     }
1316     return ram_addr;
1317 }
1318
1319 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
1320                                     unsigned size)
1321 {
1322 #ifdef DEBUG_UNASSIGNED
1323     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
1324 #endif
1325 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1326     cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
1327 #endif
1328     return 0;
1329 }
1330
1331 static void unassigned_mem_write(void *opaque, hwaddr addr,
1332                                  uint64_t val, unsigned size)
1333 {
1334 #ifdef DEBUG_UNASSIGNED
1335     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
1336 #endif
1337 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1338     cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
1339 #endif
1340 }
1341
1342 static const MemoryRegionOps unassigned_mem_ops = {
1343     .read = unassigned_mem_read,
1344     .write = unassigned_mem_write,
1345     .endianness = DEVICE_NATIVE_ENDIAN,
1346 };
1347
1348 static uint64_t error_mem_read(void *opaque, hwaddr addr,
1349                                unsigned size)
1350 {
1351     abort();
1352 }
1353
1354 static void error_mem_write(void *opaque, hwaddr addr,
1355                             uint64_t value, unsigned size)
1356 {
1357     abort();
1358 }
1359
1360 static const MemoryRegionOps error_mem_ops = {
1361     .read = error_mem_read,
1362     .write = error_mem_write,
1363     .endianness = DEVICE_NATIVE_ENDIAN,
1364 };
1365
1366 static const MemoryRegionOps rom_mem_ops = {
1367     .read = error_mem_read,
1368     .write = unassigned_mem_write,
1369     .endianness = DEVICE_NATIVE_ENDIAN,
1370 };
1371
1372 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1373                                uint64_t val, unsigned size)
1374 {
1375     int dirty_flags;
1376     dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1377     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1378 #if !defined(CONFIG_USER_ONLY)
1379         tb_invalidate_phys_page_fast(ram_addr, size);
1380         dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1381 #endif
1382     }
1383     switch (size) {
1384     case 1:
1385         stb_p(qemu_get_ram_ptr(ram_addr), val);
1386         break;
1387     case 2:
1388         stw_p(qemu_get_ram_ptr(ram_addr), val);
1389         break;
1390     case 4:
1391         stl_p(qemu_get_ram_ptr(ram_addr), val);
1392         break;
1393     default:
1394         abort();
1395     }
1396     dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1397     cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1398     /* we remove the notdirty callback only if the code has been
1399        flushed */
1400     if (dirty_flags == 0xff)
1401         tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1402 }
1403
1404 static const MemoryRegionOps notdirty_mem_ops = {
1405     .read = error_mem_read,
1406     .write = notdirty_mem_write,
1407     .endianness = DEVICE_NATIVE_ENDIAN,
1408 };
1409
1410 /* Generate a debug exception if a watchpoint has been hit.  */
1411 static void check_watchpoint(int offset, int len_mask, int flags)
1412 {
1413     CPUArchState *env = cpu_single_env;
1414     target_ulong pc, cs_base;
1415     target_ulong vaddr;
1416     CPUWatchpoint *wp;
1417     int cpu_flags;
1418
1419     if (env->watchpoint_hit) {
1420         /* We re-entered the check after replacing the TB. Now raise
1421          * the debug interrupt so that is will trigger after the
1422          * current instruction. */
1423         cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
1424         return;
1425     }
1426     vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1427     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1428         if ((vaddr == (wp->vaddr & len_mask) ||
1429              (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1430             wp->flags |= BP_WATCHPOINT_HIT;
1431             if (!env->watchpoint_hit) {
1432                 env->watchpoint_hit = wp;
1433                 tb_check_watchpoint(env);
1434                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1435                     env->exception_index = EXCP_DEBUG;
1436                     cpu_loop_exit(env);
1437                 } else {
1438                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1439                     tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1440                     cpu_resume_from_signal(env, NULL);
1441                 }
1442             }
1443         } else {
1444             wp->flags &= ~BP_WATCHPOINT_HIT;
1445         }
1446     }
1447 }
1448
1449 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1450    so these check for a hit then pass through to the normal out-of-line
1451    phys routines.  */
1452 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1453                                unsigned size)
1454 {
1455     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1456     switch (size) {
1457     case 1: return ldub_phys(addr);
1458     case 2: return lduw_phys(addr);
1459     case 4: return ldl_phys(addr);
1460     default: abort();
1461     }
1462 }
1463
1464 static void watch_mem_write(void *opaque, hwaddr addr,
1465                             uint64_t val, unsigned size)
1466 {
1467     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1468     switch (size) {
1469     case 1:
1470         stb_phys(addr, val);
1471         break;
1472     case 2:
1473         stw_phys(addr, val);
1474         break;
1475     case 4:
1476         stl_phys(addr, val);
1477         break;
1478     default: abort();
1479     }
1480 }
1481
1482 static const MemoryRegionOps watch_mem_ops = {
1483     .read = watch_mem_read,
1484     .write = watch_mem_write,
1485     .endianness = DEVICE_NATIVE_ENDIAN,
1486 };
1487
1488 static uint64_t subpage_read(void *opaque, hwaddr addr,
1489                              unsigned len)
1490 {
1491     subpage_t *mmio = opaque;
1492     unsigned int idx = SUBPAGE_IDX(addr);
1493     MemoryRegionSection *section;
1494 #if defined(DEBUG_SUBPAGE)
1495     printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1496            mmio, len, addr, idx);
1497 #endif
1498
1499     section = &phys_sections[mmio->sub_section[idx]];
1500     addr += mmio->base;
1501     addr -= section->offset_within_address_space;
1502     addr += section->offset_within_region;
1503     return io_mem_read(section->mr, addr, len);
1504 }
1505
1506 static void subpage_write(void *opaque, hwaddr addr,
1507                           uint64_t value, unsigned len)
1508 {
1509     subpage_t *mmio = opaque;
1510     unsigned int idx = SUBPAGE_IDX(addr);
1511     MemoryRegionSection *section;
1512 #if defined(DEBUG_SUBPAGE)
1513     printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1514            " idx %d value %"PRIx64"\n",
1515            __func__, mmio, len, addr, idx, value);
1516 #endif
1517
1518     section = &phys_sections[mmio->sub_section[idx]];
1519     addr += mmio->base;
1520     addr -= section->offset_within_address_space;
1521     addr += section->offset_within_region;
1522     io_mem_write(section->mr, addr, value, len);
1523 }
1524
1525 static const MemoryRegionOps subpage_ops = {
1526     .read = subpage_read,
1527     .write = subpage_write,
1528     .endianness = DEVICE_NATIVE_ENDIAN,
1529 };
1530
1531 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1532                                  unsigned size)
1533 {
1534     ram_addr_t raddr = addr;
1535     void *ptr = qemu_get_ram_ptr(raddr);
1536     switch (size) {
1537     case 1: return ldub_p(ptr);
1538     case 2: return lduw_p(ptr);
1539     case 4: return ldl_p(ptr);
1540     default: abort();
1541     }
1542 }
1543
1544 static void subpage_ram_write(void *opaque, hwaddr addr,
1545                               uint64_t value, unsigned size)
1546 {
1547     ram_addr_t raddr = addr;
1548     void *ptr = qemu_get_ram_ptr(raddr);
1549     switch (size) {
1550     case 1: return stb_p(ptr, value);
1551     case 2: return stw_p(ptr, value);
1552     case 4: return stl_p(ptr, value);
1553     default: abort();
1554     }
1555 }
1556
1557 static const MemoryRegionOps subpage_ram_ops = {
1558     .read = subpage_ram_read,
1559     .write = subpage_ram_write,
1560     .endianness = DEVICE_NATIVE_ENDIAN,
1561 };
1562
1563 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1564                              uint16_t section)
1565 {
1566     int idx, eidx;
1567
1568     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1569         return -1;
1570     idx = SUBPAGE_IDX(start);
1571     eidx = SUBPAGE_IDX(end);
1572 #if defined(DEBUG_SUBPAGE)
1573     printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1574            mmio, start, end, idx, eidx, memory);
1575 #endif
1576     if (memory_region_is_ram(phys_sections[section].mr)) {
1577         MemoryRegionSection new_section = phys_sections[section];
1578         new_section.mr = &io_mem_subpage_ram;
1579         section = phys_section_add(&new_section);
1580     }
1581     for (; idx <= eidx; idx++) {
1582         mmio->sub_section[idx] = section;
1583     }
1584
1585     return 0;
1586 }
1587
1588 static subpage_t *subpage_init(hwaddr base)
1589 {
1590     subpage_t *mmio;
1591
1592     mmio = g_malloc0(sizeof(subpage_t));
1593
1594     mmio->base = base;
1595     memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1596                           "subpage", TARGET_PAGE_SIZE);
1597     mmio->iomem.subpage = true;
1598 #if defined(DEBUG_SUBPAGE)
1599     printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1600            mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1601 #endif
1602     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1603
1604     return mmio;
1605 }
1606
1607 static uint16_t dummy_section(MemoryRegion *mr)
1608 {
1609     MemoryRegionSection section = {
1610         .mr = mr,
1611         .offset_within_address_space = 0,
1612         .offset_within_region = 0,
1613         .size = UINT64_MAX,
1614     };
1615
1616     return phys_section_add(&section);
1617 }
1618
1619 MemoryRegion *iotlb_to_region(hwaddr index)
1620 {
1621     return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1622 }
1623
1624 static void io_mem_init(void)
1625 {
1626     memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
1627     memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
1628     memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1629                           "unassigned", UINT64_MAX);
1630     memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1631                           "notdirty", UINT64_MAX);
1632     memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1633                           "subpage-ram", UINT64_MAX);
1634     memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1635                           "watch", UINT64_MAX);
1636 }
1637
1638 static void mem_begin(MemoryListener *listener)
1639 {
1640     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1641
1642     destroy_all_mappings(d);
1643     d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1644 }
1645
1646 static void core_begin(MemoryListener *listener)
1647 {
1648     phys_sections_clear();
1649     phys_section_unassigned = dummy_section(&io_mem_unassigned);
1650     phys_section_notdirty = dummy_section(&io_mem_notdirty);
1651     phys_section_rom = dummy_section(&io_mem_rom);
1652     phys_section_watch = dummy_section(&io_mem_watch);
1653 }
1654
1655 static void tcg_commit(MemoryListener *listener)
1656 {
1657     CPUArchState *env;
1658
1659     /* since each CPU stores ram addresses in its TLB cache, we must
1660        reset the modified entries */
1661     /* XXX: slow ! */
1662     for(env = first_cpu; env != NULL; env = env->next_cpu) {
1663         tlb_flush(env, 1);
1664     }
1665 }
1666
1667 static void core_log_global_start(MemoryListener *listener)
1668 {
1669     cpu_physical_memory_set_dirty_tracking(1);
1670 }
1671
1672 static void core_log_global_stop(MemoryListener *listener)
1673 {
1674     cpu_physical_memory_set_dirty_tracking(0);
1675 }
1676
1677 static void io_region_add(MemoryListener *listener,
1678                           MemoryRegionSection *section)
1679 {
1680     MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1681
1682     mrio->mr = section->mr;
1683     mrio->offset = section->offset_within_region;
1684     iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1685                  section->offset_within_address_space, section->size);
1686     ioport_register(&mrio->iorange);
1687 }
1688
1689 static void io_region_del(MemoryListener *listener,
1690                           MemoryRegionSection *section)
1691 {
1692     isa_unassign_ioport(section->offset_within_address_space, section->size);
1693 }
1694
1695 static MemoryListener core_memory_listener = {
1696     .begin = core_begin,
1697     .log_global_start = core_log_global_start,
1698     .log_global_stop = core_log_global_stop,
1699     .priority = 1,
1700 };
1701
1702 static MemoryListener io_memory_listener = {
1703     .region_add = io_region_add,
1704     .region_del = io_region_del,
1705     .priority = 0,
1706 };
1707
1708 static MemoryListener tcg_memory_listener = {
1709     .commit = tcg_commit,
1710 };
1711
1712 void address_space_init_dispatch(AddressSpace *as)
1713 {
1714     AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1715
1716     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1717     d->listener = (MemoryListener) {
1718         .begin = mem_begin,
1719         .region_add = mem_add,
1720         .region_nop = mem_add,
1721         .priority = 0,
1722     };
1723     as->dispatch = d;
1724     memory_listener_register(&d->listener, as);
1725 }
1726
1727 void address_space_destroy_dispatch(AddressSpace *as)
1728 {
1729     AddressSpaceDispatch *d = as->dispatch;
1730
1731     memory_listener_unregister(&d->listener);
1732     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1733     g_free(d);
1734     as->dispatch = NULL;
1735 }
1736
1737 static void memory_map_init(void)
1738 {
1739     system_memory = g_malloc(sizeof(*system_memory));
1740     memory_region_init(system_memory, "system", INT64_MAX);
1741     address_space_init(&address_space_memory, system_memory);
1742     address_space_memory.name = "memory";
1743
1744     system_io = g_malloc(sizeof(*system_io));
1745     memory_region_init(system_io, "io", 65536);
1746     address_space_init(&address_space_io, system_io);
1747     address_space_io.name = "I/O";
1748
1749     memory_listener_register(&core_memory_listener, &address_space_memory);
1750     memory_listener_register(&io_memory_listener, &address_space_io);
1751     memory_listener_register(&tcg_memory_listener, &address_space_memory);
1752
1753     dma_context_init(&dma_context_memory, &address_space_memory,
1754                      NULL, NULL, NULL);
1755 }
1756
1757 MemoryRegion *get_system_memory(void)
1758 {
1759     return system_memory;
1760 }
1761
1762 MemoryRegion *get_system_io(void)
1763 {
1764     return system_io;
1765 }
1766
1767 #endif /* !defined(CONFIG_USER_ONLY) */
1768
1769 /* physical memory access (slow version, mainly for debug) */
1770 #if defined(CONFIG_USER_ONLY)
1771 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1772                         uint8_t *buf, int len, int is_write)
1773 {
1774     int l, flags;
1775     target_ulong page;
1776     void * p;
1777
1778     while (len > 0) {
1779         page = addr & TARGET_PAGE_MASK;
1780         l = (page + TARGET_PAGE_SIZE) - addr;
1781         if (l > len)
1782             l = len;
1783         flags = page_get_flags(page);
1784         if (!(flags & PAGE_VALID))
1785             return -1;
1786         if (is_write) {
1787             if (!(flags & PAGE_WRITE))
1788                 return -1;
1789             /* XXX: this code should not depend on lock_user */
1790             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1791                 return -1;
1792             memcpy(p, buf, l);
1793             unlock_user(p, addr, l);
1794         } else {
1795             if (!(flags & PAGE_READ))
1796                 return -1;
1797             /* XXX: this code should not depend on lock_user */
1798             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1799                 return -1;
1800             memcpy(buf, p, l);
1801             unlock_user(p, addr, 0);
1802         }
1803         len -= l;
1804         buf += l;
1805         addr += l;
1806     }
1807     return 0;
1808 }
1809
1810 #else
1811
1812 static void invalidate_and_set_dirty(hwaddr addr,
1813                                      hwaddr length)
1814 {
1815     if (!cpu_physical_memory_is_dirty(addr)) {
1816         /* invalidate code */
1817         tb_invalidate_phys_page_range(addr, addr + length, 0);
1818         /* set dirty bit */
1819         cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1820     }
1821     xen_modified_memory(addr, length);
1822 }
1823
1824 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1825                       int len, bool is_write)
1826 {
1827     AddressSpaceDispatch *d = as->dispatch;
1828     int l;
1829     uint8_t *ptr;
1830     uint32_t val;
1831     hwaddr page;
1832     MemoryRegionSection *section;
1833
1834     while (len > 0) {
1835         page = addr & TARGET_PAGE_MASK;
1836         l = (page + TARGET_PAGE_SIZE) - addr;
1837         if (l > len)
1838             l = len;
1839         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1840
1841         if (is_write) {
1842             if (!memory_region_is_ram(section->mr)) {
1843                 hwaddr addr1;
1844                 addr1 = memory_region_section_addr(section, addr);
1845                 /* XXX: could force cpu_single_env to NULL to avoid
1846                    potential bugs */
1847                 if (l >= 4 && ((addr1 & 3) == 0)) {
1848                     /* 32 bit write access */
1849                     val = ldl_p(buf);
1850                     io_mem_write(section->mr, addr1, val, 4);
1851                     l = 4;
1852                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1853                     /* 16 bit write access */
1854                     val = lduw_p(buf);
1855                     io_mem_write(section->mr, addr1, val, 2);
1856                     l = 2;
1857                 } else {
1858                     /* 8 bit write access */
1859                     val = ldub_p(buf);
1860                     io_mem_write(section->mr, addr1, val, 1);
1861                     l = 1;
1862                 }
1863             } else if (!section->readonly) {
1864                 ram_addr_t addr1;
1865                 addr1 = memory_region_get_ram_addr(section->mr)
1866                     + memory_region_section_addr(section, addr);
1867                 /* RAM case */
1868                 ptr = qemu_get_ram_ptr(addr1);
1869                 memcpy(ptr, buf, l);
1870                 invalidate_and_set_dirty(addr1, l);
1871                 qemu_put_ram_ptr(ptr);
1872             }
1873         } else {
1874             if (!(memory_region_is_ram(section->mr) ||
1875                   memory_region_is_romd(section->mr))) {
1876                 hwaddr addr1;
1877                 /* I/O case */
1878                 addr1 = memory_region_section_addr(section, addr);
1879                 if (l >= 4 && ((addr1 & 3) == 0)) {
1880                     /* 32 bit read access */
1881                     val = io_mem_read(section->mr, addr1, 4);
1882                     stl_p(buf, val);
1883                     l = 4;
1884                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1885                     /* 16 bit read access */
1886                     val = io_mem_read(section->mr, addr1, 2);
1887                     stw_p(buf, val);
1888                     l = 2;
1889                 } else {
1890                     /* 8 bit read access */
1891                     val = io_mem_read(section->mr, addr1, 1);
1892                     stb_p(buf, val);
1893                     l = 1;
1894                 }
1895             } else {
1896                 /* RAM case */
1897                 ptr = qemu_get_ram_ptr(section->mr->ram_addr
1898                                        + memory_region_section_addr(section,
1899                                                                     addr));
1900                 memcpy(buf, ptr, l);
1901                 qemu_put_ram_ptr(ptr);
1902             }
1903         }
1904         len -= l;
1905         buf += l;
1906         addr += l;
1907     }
1908 }
1909
1910 void address_space_write(AddressSpace *as, hwaddr addr,
1911                          const uint8_t *buf, int len)
1912 {
1913     address_space_rw(as, addr, (uint8_t *)buf, len, true);
1914 }
1915
1916 /**
1917  * address_space_read: read from an address space.
1918  *
1919  * @as: #AddressSpace to be accessed
1920  * @addr: address within that address space
1921  * @buf: buffer with the data transferred
1922  */
1923 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1924 {
1925     address_space_rw(as, addr, buf, len, false);
1926 }
1927
1928
1929 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1930                             int len, int is_write)
1931 {
1932     return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1933 }
1934
1935 /* used for ROM loading : can write in RAM and ROM */
1936 void cpu_physical_memory_write_rom(hwaddr addr,
1937                                    const uint8_t *buf, int len)
1938 {
1939     AddressSpaceDispatch *d = address_space_memory.dispatch;
1940     int l;
1941     uint8_t *ptr;
1942     hwaddr page;
1943     MemoryRegionSection *section;
1944
1945     while (len > 0) {
1946         page = addr & TARGET_PAGE_MASK;
1947         l = (page + TARGET_PAGE_SIZE) - addr;
1948         if (l > len)
1949             l = len;
1950         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1951
1952         if (!(memory_region_is_ram(section->mr) ||
1953               memory_region_is_romd(section->mr))) {
1954             /* do nothing */
1955         } else {
1956             unsigned long addr1;
1957             addr1 = memory_region_get_ram_addr(section->mr)
1958                 + memory_region_section_addr(section, addr);
1959             /* ROM/RAM case */
1960             ptr = qemu_get_ram_ptr(addr1);
1961             memcpy(ptr, buf, l);
1962             invalidate_and_set_dirty(addr1, l);
1963             qemu_put_ram_ptr(ptr);
1964         }
1965         len -= l;
1966         buf += l;
1967         addr += l;
1968     }
1969 }
1970
1971 typedef struct {
1972     void *buffer;
1973     hwaddr addr;
1974     hwaddr len;
1975 } BounceBuffer;
1976
1977 static BounceBuffer bounce;
1978
1979 typedef struct MapClient {
1980     void *opaque;
1981     void (*callback)(void *opaque);
1982     QLIST_ENTRY(MapClient) link;
1983 } MapClient;
1984
1985 static QLIST_HEAD(map_client_list, MapClient) map_client_list
1986     = QLIST_HEAD_INITIALIZER(map_client_list);
1987
1988 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
1989 {
1990     MapClient *client = g_malloc(sizeof(*client));
1991
1992     client->opaque = opaque;
1993     client->callback = callback;
1994     QLIST_INSERT_HEAD(&map_client_list, client, link);
1995     return client;
1996 }
1997
1998 static void cpu_unregister_map_client(void *_client)
1999 {
2000     MapClient *client = (MapClient *)_client;
2001
2002     QLIST_REMOVE(client, link);
2003     g_free(client);
2004 }
2005
2006 static void cpu_notify_map_clients(void)
2007 {
2008     MapClient *client;
2009
2010     while (!QLIST_EMPTY(&map_client_list)) {
2011         client = QLIST_FIRST(&map_client_list);
2012         client->callback(client->opaque);
2013         cpu_unregister_map_client(client);
2014     }
2015 }
2016
2017 /* Map a physical memory region into a host virtual address.
2018  * May map a subset of the requested range, given by and returned in *plen.
2019  * May return NULL if resources needed to perform the mapping are exhausted.
2020  * Use only for reads OR writes - not for read-modify-write operations.
2021  * Use cpu_register_map_client() to know when retrying the map operation is
2022  * likely to succeed.
2023  */
2024 void *address_space_map(AddressSpace *as,
2025                         hwaddr addr,
2026                         hwaddr *plen,
2027                         bool is_write)
2028 {
2029     AddressSpaceDispatch *d = as->dispatch;
2030     hwaddr len = *plen;
2031     hwaddr todo = 0;
2032     int l;
2033     hwaddr page;
2034     MemoryRegionSection *section;
2035     ram_addr_t raddr = RAM_ADDR_MAX;
2036     ram_addr_t rlen;
2037     void *ret;
2038
2039     while (len > 0) {
2040         page = addr & TARGET_PAGE_MASK;
2041         l = (page + TARGET_PAGE_SIZE) - addr;
2042         if (l > len)
2043             l = len;
2044         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2045
2046         if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
2047             if (todo || bounce.buffer) {
2048                 break;
2049             }
2050             bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2051             bounce.addr = addr;
2052             bounce.len = l;
2053             if (!is_write) {
2054                 address_space_read(as, addr, bounce.buffer, l);
2055             }
2056
2057             *plen = l;
2058             return bounce.buffer;
2059         }
2060         if (!todo) {
2061             raddr = memory_region_get_ram_addr(section->mr)
2062                 + memory_region_section_addr(section, addr);
2063         }
2064
2065         len -= l;
2066         addr += l;
2067         todo += l;
2068     }
2069     rlen = todo;
2070     ret = qemu_ram_ptr_length(raddr, &rlen);
2071     *plen = rlen;
2072     return ret;
2073 }
2074
2075 /* Unmaps a memory region previously mapped by address_space_map().
2076  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2077  * the amount of memory that was actually read or written by the caller.
2078  */
2079 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2080                          int is_write, hwaddr access_len)
2081 {
2082     if (buffer != bounce.buffer) {
2083         if (is_write) {
2084             ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2085             while (access_len) {
2086                 unsigned l;
2087                 l = TARGET_PAGE_SIZE;
2088                 if (l > access_len)
2089                     l = access_len;
2090                 invalidate_and_set_dirty(addr1, l);
2091                 addr1 += l;
2092                 access_len -= l;
2093             }
2094         }
2095         if (xen_enabled()) {
2096             xen_invalidate_map_cache_entry(buffer);
2097         }
2098         return;
2099     }
2100     if (is_write) {
2101         address_space_write(as, bounce.addr, bounce.buffer, access_len);
2102     }
2103     qemu_vfree(bounce.buffer);
2104     bounce.buffer = NULL;
2105     cpu_notify_map_clients();
2106 }
2107
2108 void *cpu_physical_memory_map(hwaddr addr,
2109                               hwaddr *plen,
2110                               int is_write)
2111 {
2112     return address_space_map(&address_space_memory, addr, plen, is_write);
2113 }
2114
2115 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2116                                int is_write, hwaddr access_len)
2117 {
2118     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2119 }
2120
2121 /* warning: addr must be aligned */
2122 static inline uint32_t ldl_phys_internal(hwaddr addr,
2123                                          enum device_endian endian)
2124 {
2125     uint8_t *ptr;
2126     uint32_t val;
2127     MemoryRegionSection *section;
2128
2129     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2130
2131     if (!(memory_region_is_ram(section->mr) ||
2132           memory_region_is_romd(section->mr))) {
2133         /* I/O case */
2134         addr = memory_region_section_addr(section, addr);
2135         val = io_mem_read(section->mr, addr, 4);
2136 #if defined(TARGET_WORDS_BIGENDIAN)
2137         if (endian == DEVICE_LITTLE_ENDIAN) {
2138             val = bswap32(val);
2139         }
2140 #else
2141         if (endian == DEVICE_BIG_ENDIAN) {
2142             val = bswap32(val);
2143         }
2144 #endif
2145     } else {
2146         /* RAM case */
2147         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2148                                 & TARGET_PAGE_MASK)
2149                                + memory_region_section_addr(section, addr));
2150         switch (endian) {
2151         case DEVICE_LITTLE_ENDIAN:
2152             val = ldl_le_p(ptr);
2153             break;
2154         case DEVICE_BIG_ENDIAN:
2155             val = ldl_be_p(ptr);
2156             break;
2157         default:
2158             val = ldl_p(ptr);
2159             break;
2160         }
2161     }
2162     return val;
2163 }
2164
2165 uint32_t ldl_phys(hwaddr addr)
2166 {
2167     return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2168 }
2169
2170 uint32_t ldl_le_phys(hwaddr addr)
2171 {
2172     return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2173 }
2174
2175 uint32_t ldl_be_phys(hwaddr addr)
2176 {
2177     return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2178 }
2179
2180 /* warning: addr must be aligned */
2181 static inline uint64_t ldq_phys_internal(hwaddr addr,
2182                                          enum device_endian endian)
2183 {
2184     uint8_t *ptr;
2185     uint64_t val;
2186     MemoryRegionSection *section;
2187
2188     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2189
2190     if (!(memory_region_is_ram(section->mr) ||
2191           memory_region_is_romd(section->mr))) {
2192         /* I/O case */
2193         addr = memory_region_section_addr(section, addr);
2194
2195         /* XXX This is broken when device endian != cpu endian.
2196                Fix and add "endian" variable check */
2197 #ifdef TARGET_WORDS_BIGENDIAN
2198         val = io_mem_read(section->mr, addr, 4) << 32;
2199         val |= io_mem_read(section->mr, addr + 4, 4);
2200 #else
2201         val = io_mem_read(section->mr, addr, 4);
2202         val |= io_mem_read(section->mr, addr + 4, 4) << 32;
2203 #endif
2204     } else {
2205         /* RAM case */
2206         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2207                                 & TARGET_PAGE_MASK)
2208                                + memory_region_section_addr(section, addr));
2209         switch (endian) {
2210         case DEVICE_LITTLE_ENDIAN:
2211             val = ldq_le_p(ptr);
2212             break;
2213         case DEVICE_BIG_ENDIAN:
2214             val = ldq_be_p(ptr);
2215             break;
2216         default:
2217             val = ldq_p(ptr);
2218             break;
2219         }
2220     }
2221     return val;
2222 }
2223
2224 uint64_t ldq_phys(hwaddr addr)
2225 {
2226     return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2227 }
2228
2229 uint64_t ldq_le_phys(hwaddr addr)
2230 {
2231     return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2232 }
2233
2234 uint64_t ldq_be_phys(hwaddr addr)
2235 {
2236     return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2237 }
2238
2239 /* XXX: optimize */
2240 uint32_t ldub_phys(hwaddr addr)
2241 {
2242     uint8_t val;
2243     cpu_physical_memory_read(addr, &val, 1);
2244     return val;
2245 }
2246
2247 /* warning: addr must be aligned */
2248 static inline uint32_t lduw_phys_internal(hwaddr addr,
2249                                           enum device_endian endian)
2250 {
2251     uint8_t *ptr;
2252     uint64_t val;
2253     MemoryRegionSection *section;
2254
2255     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2256
2257     if (!(memory_region_is_ram(section->mr) ||
2258           memory_region_is_romd(section->mr))) {
2259         /* I/O case */
2260         addr = memory_region_section_addr(section, addr);
2261         val = io_mem_read(section->mr, addr, 2);
2262 #if defined(TARGET_WORDS_BIGENDIAN)
2263         if (endian == DEVICE_LITTLE_ENDIAN) {
2264             val = bswap16(val);
2265         }
2266 #else
2267         if (endian == DEVICE_BIG_ENDIAN) {
2268             val = bswap16(val);
2269         }
2270 #endif
2271     } else {
2272         /* RAM case */
2273         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2274                                 & TARGET_PAGE_MASK)
2275                                + memory_region_section_addr(section, addr));
2276         switch (endian) {
2277         case DEVICE_LITTLE_ENDIAN:
2278             val = lduw_le_p(ptr);
2279             break;
2280         case DEVICE_BIG_ENDIAN:
2281             val = lduw_be_p(ptr);
2282             break;
2283         default:
2284             val = lduw_p(ptr);
2285             break;
2286         }
2287     }
2288     return val;
2289 }
2290
2291 uint32_t lduw_phys(hwaddr addr)
2292 {
2293     return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2294 }
2295
2296 uint32_t lduw_le_phys(hwaddr addr)
2297 {
2298     return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2299 }
2300
2301 uint32_t lduw_be_phys(hwaddr addr)
2302 {
2303     return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2304 }
2305
2306 /* warning: addr must be aligned. The ram page is not masked as dirty
2307    and the code inside is not invalidated. It is useful if the dirty
2308    bits are used to track modified PTEs */
2309 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2310 {
2311     uint8_t *ptr;
2312     MemoryRegionSection *section;
2313
2314     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2315
2316     if (!memory_region_is_ram(section->mr) || section->readonly) {
2317         addr = memory_region_section_addr(section, addr);
2318         if (memory_region_is_ram(section->mr)) {
2319             section = &phys_sections[phys_section_rom];
2320         }
2321         io_mem_write(section->mr, addr, val, 4);
2322     } else {
2323         unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
2324                                & TARGET_PAGE_MASK)
2325             + memory_region_section_addr(section, addr);
2326         ptr = qemu_get_ram_ptr(addr1);
2327         stl_p(ptr, val);
2328
2329         if (unlikely(in_migration)) {
2330             if (!cpu_physical_memory_is_dirty(addr1)) {
2331                 /* invalidate code */
2332                 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2333                 /* set dirty bit */
2334                 cpu_physical_memory_set_dirty_flags(
2335                     addr1, (0xff & ~CODE_DIRTY_FLAG));
2336             }
2337         }
2338     }
2339 }
2340
2341 void stq_phys_notdirty(hwaddr addr, uint64_t val)
2342 {
2343     uint8_t *ptr;
2344     MemoryRegionSection *section;
2345
2346     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2347
2348     if (!memory_region_is_ram(section->mr) || section->readonly) {
2349         addr = memory_region_section_addr(section, addr);
2350         if (memory_region_is_ram(section->mr)) {
2351             section = &phys_sections[phys_section_rom];
2352         }
2353 #ifdef TARGET_WORDS_BIGENDIAN
2354         io_mem_write(section->mr, addr, val >> 32, 4);
2355         io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
2356 #else
2357         io_mem_write(section->mr, addr, (uint32_t)val, 4);
2358         io_mem_write(section->mr, addr + 4, val >> 32, 4);
2359 #endif
2360     } else {
2361         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2362                                 & TARGET_PAGE_MASK)
2363                                + memory_region_section_addr(section, addr));
2364         stq_p(ptr, val);
2365     }
2366 }
2367
2368 /* warning: addr must be aligned */
2369 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2370                                      enum device_endian endian)
2371 {
2372     uint8_t *ptr;
2373     MemoryRegionSection *section;
2374
2375     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2376
2377     if (!memory_region_is_ram(section->mr) || section->readonly) {
2378         addr = memory_region_section_addr(section, addr);
2379         if (memory_region_is_ram(section->mr)) {
2380             section = &phys_sections[phys_section_rom];
2381         }
2382 #if defined(TARGET_WORDS_BIGENDIAN)
2383         if (endian == DEVICE_LITTLE_ENDIAN) {
2384             val = bswap32(val);
2385         }
2386 #else
2387         if (endian == DEVICE_BIG_ENDIAN) {
2388             val = bswap32(val);
2389         }
2390 #endif
2391         io_mem_write(section->mr, addr, val, 4);
2392     } else {
2393         unsigned long addr1;
2394         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2395             + memory_region_section_addr(section, addr);
2396         /* RAM case */
2397         ptr = qemu_get_ram_ptr(addr1);
2398         switch (endian) {
2399         case DEVICE_LITTLE_ENDIAN:
2400             stl_le_p(ptr, val);
2401             break;
2402         case DEVICE_BIG_ENDIAN:
2403             stl_be_p(ptr, val);
2404             break;
2405         default:
2406             stl_p(ptr, val);
2407             break;
2408         }
2409         invalidate_and_set_dirty(addr1, 4);
2410     }
2411 }
2412
2413 void stl_phys(hwaddr addr, uint32_t val)
2414 {
2415     stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2416 }
2417
2418 void stl_le_phys(hwaddr addr, uint32_t val)
2419 {
2420     stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2421 }
2422
2423 void stl_be_phys(hwaddr addr, uint32_t val)
2424 {
2425     stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2426 }
2427
2428 /* XXX: optimize */
2429 void stb_phys(hwaddr addr, uint32_t val)
2430 {
2431     uint8_t v = val;
2432     cpu_physical_memory_write(addr, &v, 1);
2433 }
2434
2435 /* warning: addr must be aligned */
2436 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2437                                      enum device_endian endian)
2438 {
2439     uint8_t *ptr;
2440     MemoryRegionSection *section;
2441
2442     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2443
2444     if (!memory_region_is_ram(section->mr) || section->readonly) {
2445         addr = memory_region_section_addr(section, addr);
2446         if (memory_region_is_ram(section->mr)) {
2447             section = &phys_sections[phys_section_rom];
2448         }
2449 #if defined(TARGET_WORDS_BIGENDIAN)
2450         if (endian == DEVICE_LITTLE_ENDIAN) {
2451             val = bswap16(val);
2452         }
2453 #else
2454         if (endian == DEVICE_BIG_ENDIAN) {
2455             val = bswap16(val);
2456         }
2457 #endif
2458         io_mem_write(section->mr, addr, val, 2);
2459     } else {
2460         unsigned long addr1;
2461         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2462             + memory_region_section_addr(section, addr);
2463         /* RAM case */
2464         ptr = qemu_get_ram_ptr(addr1);
2465         switch (endian) {
2466         case DEVICE_LITTLE_ENDIAN:
2467             stw_le_p(ptr, val);
2468             break;
2469         case DEVICE_BIG_ENDIAN:
2470             stw_be_p(ptr, val);
2471             break;
2472         default:
2473             stw_p(ptr, val);
2474             break;
2475         }
2476         invalidate_and_set_dirty(addr1, 2);
2477     }
2478 }
2479
2480 void stw_phys(hwaddr addr, uint32_t val)
2481 {
2482     stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2483 }
2484
2485 void stw_le_phys(hwaddr addr, uint32_t val)
2486 {
2487     stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2488 }
2489
2490 void stw_be_phys(hwaddr addr, uint32_t val)
2491 {
2492     stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2493 }
2494
2495 /* XXX: optimize */
2496 void stq_phys(hwaddr addr, uint64_t val)
2497 {
2498     val = tswap64(val);
2499     cpu_physical_memory_write(addr, &val, 8);
2500 }
2501
2502 void stq_le_phys(hwaddr addr, uint64_t val)
2503 {
2504     val = cpu_to_le64(val);
2505     cpu_physical_memory_write(addr, &val, 8);
2506 }
2507
2508 void stq_be_phys(hwaddr addr, uint64_t val)
2509 {
2510     val = cpu_to_be64(val);
2511     cpu_physical_memory_write(addr, &val, 8);
2512 }
2513
2514 /* virtual memory access for debug (includes writing to ROM) */
2515 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2516                         uint8_t *buf, int len, int is_write)
2517 {
2518     int l;
2519     hwaddr phys_addr;
2520     target_ulong page;
2521
2522     while (len > 0) {
2523         page = addr & TARGET_PAGE_MASK;
2524         phys_addr = cpu_get_phys_page_debug(env, page);
2525         /* if no physical page mapped, return an error */
2526         if (phys_addr == -1)
2527             return -1;
2528         l = (page + TARGET_PAGE_SIZE) - addr;
2529         if (l > len)
2530             l = len;
2531         phys_addr += (addr & ~TARGET_PAGE_MASK);
2532         if (is_write)
2533             cpu_physical_memory_write_rom(phys_addr, buf, l);
2534         else
2535             cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2536         len -= l;
2537         buf += l;
2538         addr += l;
2539     }
2540     return 0;
2541 }
2542 #endif
2543
2544 #if !defined(CONFIG_USER_ONLY)
2545
2546 /*
2547  * A helper function for the _utterly broken_ virtio device model to find out if
2548  * it's running on a big endian machine. Don't do this at home kids!
2549  */
2550 bool virtio_is_big_endian(void);
2551 bool virtio_is_big_endian(void)
2552 {
2553 #if defined(TARGET_WORDS_BIGENDIAN)
2554     return true;
2555 #else
2556     return false;
2557 #endif
2558 }
2559
2560 #endif
2561
2562 #ifndef CONFIG_USER_ONLY
2563 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2564 {
2565     MemoryRegionSection *section;
2566
2567     section = phys_page_find(address_space_memory.dispatch,
2568                              phys_addr >> TARGET_PAGE_BITS);
2569
2570     return !(memory_region_is_ram(section->mr) ||
2571              memory_region_is_romd(section->mr));
2572 }
2573 #endif