Refactor some hax related codes.
[sdk/emulator/qemu.git] / exec.c
1 /*
2  *  Virtual page mapping
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/hax.h"
35 #include "hw/xen/xen.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "exec/memory.h"
39 #include "sysemu/dma.h"
40 #include "exec/address-spaces.h"
41 #if defined(CONFIG_USER_ONLY)
42 #include <qemu.h>
43 #else /* !CONFIG_USER_ONLY */
44 #include "sysemu/xen-mapcache.h"
45 #include "trace.h"
46 #endif
47 #include "exec/cpu-all.h"
48
49 #include "exec/cputlb.h"
50 #include "translate-all.h"
51
52 #include "exec/memory-internal.h"
53
54 //#define DEBUG_UNASSIGNED
55 //#define DEBUG_SUBPAGE
56
57 #if !defined(CONFIG_USER_ONLY)
58 int phys_ram_fd;
59 static int in_migration;
60
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
62
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
65
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
68 DMAContext dma_context_memory;
69
70 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
71 static MemoryRegion io_mem_subpage_ram;
72
73 #endif
74
75 CPUArchState *first_cpu;
76 /* current CPU in the current thread. It is only valid inside
77    cpu_exec() */
78 DEFINE_TLS(CPUArchState *,cpu_single_env);
79 /* 0 = Do not count executed instructions.
80    1 = Precise instruction counting.
81    2 = Adaptive rate instruction counting.  */
82 int use_icount;
83
84 #if !defined(CONFIG_USER_ONLY)
85
86 static MemoryRegionSection *phys_sections;
87 static unsigned phys_sections_nb, phys_sections_nb_alloc;
88 static uint16_t phys_section_unassigned;
89 static uint16_t phys_section_notdirty;
90 static uint16_t phys_section_rom;
91 static uint16_t phys_section_watch;
92
93 /* Simple allocator for PhysPageEntry nodes */
94 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
95 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
96
97 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
98
99 static void io_mem_init(void);
100 static void memory_map_init(void);
101 static void *qemu_safe_ram_ptr(ram_addr_t addr);
102
103 static MemoryRegion io_mem_watch;
104 #endif
105
106 #if !defined(CONFIG_USER_ONLY)
107
108 static void phys_map_node_reserve(unsigned nodes)
109 {
110     if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
111         typedef PhysPageEntry Node[L2_SIZE];
112         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
113         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
114                                       phys_map_nodes_nb + nodes);
115         phys_map_nodes = g_renew(Node, phys_map_nodes,
116                                  phys_map_nodes_nb_alloc);
117     }
118 }
119
120 static uint16_t phys_map_node_alloc(void)
121 {
122     unsigned i;
123     uint16_t ret;
124
125     ret = phys_map_nodes_nb++;
126     assert(ret != PHYS_MAP_NODE_NIL);
127     assert(ret != phys_map_nodes_nb_alloc);
128     for (i = 0; i < L2_SIZE; ++i) {
129         phys_map_nodes[ret][i].is_leaf = 0;
130         phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
131     }
132     return ret;
133 }
134
135 static void phys_map_nodes_reset(void)
136 {
137     phys_map_nodes_nb = 0;
138 }
139
140
141 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
142                                 hwaddr *nb, uint16_t leaf,
143                                 int level)
144 {
145     PhysPageEntry *p;
146     int i;
147     hwaddr step = (hwaddr)1 << (level * L2_BITS);
148
149     if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
150         lp->ptr = phys_map_node_alloc();
151         p = phys_map_nodes[lp->ptr];
152         if (level == 0) {
153             for (i = 0; i < L2_SIZE; i++) {
154                 p[i].is_leaf = 1;
155                 p[i].ptr = phys_section_unassigned;
156             }
157         }
158     } else {
159         p = phys_map_nodes[lp->ptr];
160     }
161     lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
162
163     while (*nb && lp < &p[L2_SIZE]) {
164         if ((*index & (step - 1)) == 0 && *nb >= step) {
165             lp->is_leaf = true;
166             lp->ptr = leaf;
167             *index += step;
168             *nb -= step;
169         } else {
170             phys_page_set_level(lp, index, nb, leaf, level - 1);
171         }
172         ++lp;
173     }
174 }
175
176 static void phys_page_set(AddressSpaceDispatch *d,
177                           hwaddr index, hwaddr nb,
178                           uint16_t leaf)
179 {
180     /* Wildly overreserve - it doesn't matter much. */
181     phys_map_node_reserve(3 * P_L2_LEVELS);
182
183     phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
184 }
185
186 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
187 {
188     PhysPageEntry lp = d->phys_map;
189     PhysPageEntry *p;
190     int i;
191     uint16_t s_index = phys_section_unassigned;
192
193     for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
194         if (lp.ptr == PHYS_MAP_NODE_NIL) {
195             goto not_found;
196         }
197         p = phys_map_nodes[lp.ptr];
198         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
199     }
200
201     s_index = lp.ptr;
202 not_found:
203     return &phys_sections[s_index];
204 }
205
206 bool memory_region_is_unassigned(MemoryRegion *mr)
207 {
208     return mr != &io_mem_ram && mr != &io_mem_rom
209         && mr != &io_mem_notdirty && !mr->rom_device
210         && mr != &io_mem_watch;
211 }
212 #endif
213
214 void cpu_exec_init_all(void)
215 {
216 #if !defined(CONFIG_USER_ONLY)
217     qemu_mutex_init(&ram_list.mutex);
218     memory_map_init();
219     io_mem_init();
220 #endif
221 }
222
223 #if !defined(CONFIG_USER_ONLY)
224
225 static int cpu_common_post_load(void *opaque, int version_id)
226 {
227     CPUState *cpu = opaque;
228
229     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
230        version_id is increased. */
231     cpu->interrupt_request &= ~0x01;
232     tlb_flush(cpu->env_ptr, 1);
233
234     return 0;
235 }
236
237 static const VMStateDescription vmstate_cpu_common = {
238     .name = "cpu_common",
239     .version_id = 1,
240     .minimum_version_id = 1,
241     .minimum_version_id_old = 1,
242     .post_load = cpu_common_post_load,
243     .fields      = (VMStateField []) {
244         VMSTATE_UINT32(halted, CPUState),
245         VMSTATE_UINT32(interrupt_request, CPUState),
246         VMSTATE_END_OF_LIST()
247     }
248 };
249 #else
250 #define vmstate_cpu_common vmstate_dummy
251 #endif
252
253 CPUState *qemu_get_cpu(int index)
254 {
255     CPUArchState *env = first_cpu;
256     CPUState *cpu = NULL;
257
258     while (env) {
259         cpu = ENV_GET_CPU(env);
260         if (cpu->cpu_index == index) {
261             break;
262         }
263         env = env->next_cpu;
264     }
265
266     return env ? cpu : NULL;
267 }
268
269 void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
270 {
271     CPUArchState *env = first_cpu;
272
273     while (env) {
274         func(ENV_GET_CPU(env), data);
275         env = env->next_cpu;
276     }
277 }
278
279 void cpu_exec_init(CPUArchState *env)
280 {
281     CPUState *cpu = ENV_GET_CPU(env);
282     CPUClass *cc = CPU_GET_CLASS(cpu);
283     CPUArchState **penv;
284     int cpu_index;
285
286 #if defined(CONFIG_USER_ONLY)
287     cpu_list_lock();
288 #endif
289     env->next_cpu = NULL;
290     penv = &first_cpu;
291     cpu_index = 0;
292     while (*penv != NULL) {
293         penv = &(*penv)->next_cpu;
294         cpu_index++;
295     }
296     cpu->cpu_index = cpu_index;
297     cpu->numa_node = 0;
298     QTAILQ_INIT(&env->breakpoints);
299     QTAILQ_INIT(&env->watchpoints);
300 #ifndef CONFIG_USER_ONLY
301     cpu->thread_id = qemu_get_thread_id();
302 #endif
303     *penv = env;
304 #if defined(CONFIG_USER_ONLY)
305     cpu_list_unlock();
306 #endif
307     vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
308 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
309     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
310                     cpu_save, cpu_load, env);
311     assert(cc->vmsd == NULL);
312 #endif
313     if (cc->vmsd != NULL) {
314         vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
315     }
316 }
317
318 #if defined(TARGET_HAS_ICE)
319 #if defined(CONFIG_USER_ONLY)
320 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
321 {
322     tb_invalidate_phys_page_range(pc, pc + 1, 0);
323 }
324 #else
325 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
326 {
327     tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
328             (pc & ~TARGET_PAGE_MASK));
329 }
330 #endif
331 #endif /* TARGET_HAS_ICE */
332
333 #if defined(CONFIG_USER_ONLY)
334 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
335
336 {
337 }
338
339 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
340                           int flags, CPUWatchpoint **watchpoint)
341 {
342     return -ENOSYS;
343 }
344 #else
345 /* Add a watchpoint.  */
346 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
347                           int flags, CPUWatchpoint **watchpoint)
348 {
349     target_ulong len_mask = ~(len - 1);
350     CPUWatchpoint *wp;
351
352     /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
353     if ((len & (len - 1)) || (addr & ~len_mask) ||
354             len == 0 || len > TARGET_PAGE_SIZE) {
355         fprintf(stderr, "qemu: tried to set invalid watchpoint at "
356                 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
357         return -EINVAL;
358     }
359     wp = g_malloc(sizeof(*wp));
360
361     wp->vaddr = addr;
362     wp->len_mask = len_mask;
363     wp->flags = flags;
364
365     /* keep all GDB-injected watchpoints in front */
366     if (flags & BP_GDB)
367         QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
368     else
369         QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
370
371     tlb_flush_page(env, addr);
372
373     if (watchpoint)
374         *watchpoint = wp;
375     return 0;
376 }
377
378 /* Remove a specific watchpoint.  */
379 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
380                           int flags)
381 {
382     target_ulong len_mask = ~(len - 1);
383     CPUWatchpoint *wp;
384
385     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
386         if (addr == wp->vaddr && len_mask == wp->len_mask
387                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
388             cpu_watchpoint_remove_by_ref(env, wp);
389             return 0;
390         }
391     }
392     return -ENOENT;
393 }
394
395 /* Remove a specific watchpoint by reference.  */
396 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
397 {
398     QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
399
400     tlb_flush_page(env, watchpoint->vaddr);
401
402     g_free(watchpoint);
403 }
404
405 /* Remove all matching watchpoints.  */
406 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
407 {
408     CPUWatchpoint *wp, *next;
409
410     QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
411         if (wp->flags & mask)
412             cpu_watchpoint_remove_by_ref(env, wp);
413     }
414 }
415 #endif
416
417 /* Add a breakpoint.  */
418 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
419                           CPUBreakpoint **breakpoint)
420 {
421 #if defined(TARGET_HAS_ICE)
422     CPUBreakpoint *bp;
423
424     bp = g_malloc(sizeof(*bp));
425
426     bp->pc = pc;
427     bp->flags = flags;
428
429     /* keep all GDB-injected breakpoints in front */
430     if (flags & BP_GDB)
431         QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
432     else
433         QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
434
435     breakpoint_invalidate(env, pc);
436
437     if (breakpoint)
438         *breakpoint = bp;
439     return 0;
440 #else
441     return -ENOSYS;
442 #endif
443 }
444
445 /* Remove a specific breakpoint.  */
446 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
447 {
448 #if defined(TARGET_HAS_ICE)
449     CPUBreakpoint *bp;
450
451     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
452         if (bp->pc == pc && bp->flags == flags) {
453             cpu_breakpoint_remove_by_ref(env, bp);
454             return 0;
455         }
456     }
457     return -ENOENT;
458 #else
459     return -ENOSYS;
460 #endif
461 }
462
463 /* Remove a specific breakpoint by reference.  */
464 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
465 {
466 #if defined(TARGET_HAS_ICE)
467     QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
468
469     breakpoint_invalidate(env, breakpoint->pc);
470
471     g_free(breakpoint);
472 #endif
473 }
474
475 /* Remove all matching breakpoints. */
476 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
477 {
478 #if defined(TARGET_HAS_ICE)
479     CPUBreakpoint *bp, *next;
480
481     QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
482         if (bp->flags & mask)
483             cpu_breakpoint_remove_by_ref(env, bp);
484     }
485 #endif
486 }
487
488 /* enable or disable single step mode. EXCP_DEBUG is returned by the
489    CPU loop after each instruction */
490 void cpu_single_step(CPUArchState *env, int enabled)
491 {
492 #if defined(TARGET_HAS_ICE)
493     if (env->singlestep_enabled != enabled) {
494         env->singlestep_enabled = enabled;
495         if (kvm_enabled())
496             kvm_update_guest_debug(env, 0);
497         else {
498             /* must flush all the translated code to avoid inconsistencies */
499             /* XXX: only flush what is necessary */
500             tb_flush(env);
501         }
502     }
503 #endif
504 }
505
506 void cpu_exit(CPUArchState *env)
507 {
508     CPUState *cpu = ENV_GET_CPU(env);
509
510     cpu->exit_request = 1;
511     cpu->tcg_exit_req = 1;
512 }
513
514 void cpu_abort(CPUArchState *env, const char *fmt, ...)
515 {
516     va_list ap;
517     va_list ap2;
518
519     va_start(ap, fmt);
520     va_copy(ap2, ap);
521     fprintf(stderr, "qemu: fatal: ");
522     vfprintf(stderr, fmt, ap);
523     fprintf(stderr, "\n");
524     cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
525     if (qemu_log_enabled()) {
526         qemu_log("qemu: fatal: ");
527         qemu_log_vprintf(fmt, ap2);
528         qemu_log("\n");
529         log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
530         qemu_log_flush();
531         qemu_log_close();
532     }
533     va_end(ap2);
534     va_end(ap);
535 #if defined(CONFIG_USER_ONLY)
536     {
537         struct sigaction act;
538         sigfillset(&act.sa_mask);
539         act.sa_handler = SIG_DFL;
540         sigaction(SIGABRT, &act, NULL);
541     }
542 #endif
543     abort();
544 }
545
546 CPUArchState *cpu_copy(CPUArchState *env)
547 {
548     CPUArchState *new_env = cpu_init(env->cpu_model_str);
549     CPUArchState *next_cpu = new_env->next_cpu;
550 #if defined(TARGET_HAS_ICE)
551     CPUBreakpoint *bp;
552     CPUWatchpoint *wp;
553 #endif
554
555     memcpy(new_env, env, sizeof(CPUArchState));
556
557     /* Preserve chaining. */
558     new_env->next_cpu = next_cpu;
559
560     /* Clone all break/watchpoints.
561        Note: Once we support ptrace with hw-debug register access, make sure
562        BP_CPU break/watchpoints are handled correctly on clone. */
563     QTAILQ_INIT(&env->breakpoints);
564     QTAILQ_INIT(&env->watchpoints);
565 #if defined(TARGET_HAS_ICE)
566     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
567         cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
568     }
569     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
570         cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
571                               wp->flags, NULL);
572     }
573 #endif
574
575     return new_env;
576 }
577
578 #if !defined(CONFIG_USER_ONLY)
579 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
580                                       uintptr_t length)
581 {
582     uintptr_t start1;
583
584     /* we modify the TLB cache so that the dirty bit will be set again
585        when accessing the range */
586     start1 = (uintptr_t)qemu_safe_ram_ptr(start);
587     /* Check that we don't span multiple blocks - this breaks the
588        address comparisons below.  */
589     if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
590             != (end - 1) - start) {
591         abort();
592     }
593     cpu_tlb_reset_dirty_all(start1, length);
594
595 }
596
597 /* Note: start and end must be within the same ram block.  */
598 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
599                                      int dirty_flags)
600 {
601     uintptr_t length;
602
603     start &= TARGET_PAGE_MASK;
604     end = TARGET_PAGE_ALIGN(end);
605
606     length = end - start;
607     if (length == 0)
608         return;
609     cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
610
611     if (tcg_enabled()) {
612         tlb_reset_dirty_range_all(start, end, length);
613     }
614 }
615
616 static int cpu_physical_memory_set_dirty_tracking(int enable)
617 {
618     int ret = 0;
619     in_migration = enable;
620     return ret;
621 }
622
623 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
624                                                    MemoryRegionSection *section,
625                                                    target_ulong vaddr,
626                                                    hwaddr paddr,
627                                                    int prot,
628                                                    target_ulong *address)
629 {
630     hwaddr iotlb;
631     CPUWatchpoint *wp;
632
633     if (memory_region_is_ram(section->mr)) {
634         /* Normal RAM.  */
635         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
636             + memory_region_section_addr(section, paddr);
637         if (!section->readonly) {
638             iotlb |= phys_section_notdirty;
639         } else {
640             iotlb |= phys_section_rom;
641         }
642     } else {
643         /* IO handlers are currently passed a physical address.
644            It would be nice to pass an offset from the base address
645            of that region.  This would avoid having to special case RAM,
646            and avoid full address decoding in every device.
647            We can't use the high bits of pd for this because
648            IO_MEM_ROMD uses these as a ram address.  */
649         iotlb = section - phys_sections;
650         iotlb += memory_region_section_addr(section, paddr);
651     }
652
653     /* Make accesses to pages with watchpoints go via the
654        watchpoint trap routines.  */
655     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
656         if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
657             /* Avoid trapping reads of pages with a write breakpoint. */
658             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
659                 iotlb = phys_section_watch + paddr;
660                 *address |= TLB_MMIO;
661                 break;
662             }
663         }
664     }
665
666     return iotlb;
667 }
668 #endif /* defined(CONFIG_USER_ONLY) */
669
670 #if !defined(CONFIG_USER_ONLY)
671
672 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
673 typedef struct subpage_t {
674     MemoryRegion iomem;
675     hwaddr base;
676     uint16_t sub_section[TARGET_PAGE_SIZE];
677 } subpage_t;
678
679 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
680                              uint16_t section);
681 static subpage_t *subpage_init(hwaddr base);
682 static void destroy_page_desc(uint16_t section_index)
683 {
684     MemoryRegionSection *section = &phys_sections[section_index];
685     MemoryRegion *mr = section->mr;
686
687     if (mr->subpage) {
688         subpage_t *subpage = container_of(mr, subpage_t, iomem);
689         memory_region_destroy(&subpage->iomem);
690         g_free(subpage);
691     }
692 }
693
694 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
695 {
696     unsigned i;
697     PhysPageEntry *p;
698
699     if (lp->ptr == PHYS_MAP_NODE_NIL) {
700         return;
701     }
702
703     p = phys_map_nodes[lp->ptr];
704     for (i = 0; i < L2_SIZE; ++i) {
705         if (!p[i].is_leaf) {
706             destroy_l2_mapping(&p[i], level - 1);
707         } else {
708             destroy_page_desc(p[i].ptr);
709         }
710     }
711     lp->is_leaf = 0;
712     lp->ptr = PHYS_MAP_NODE_NIL;
713 }
714
715 static void destroy_all_mappings(AddressSpaceDispatch *d)
716 {
717     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
718     phys_map_nodes_reset();
719 }
720
721 static uint16_t phys_section_add(MemoryRegionSection *section)
722 {
723     if (phys_sections_nb == phys_sections_nb_alloc) {
724         phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
725         phys_sections = g_renew(MemoryRegionSection, phys_sections,
726                                 phys_sections_nb_alloc);
727     }
728     phys_sections[phys_sections_nb] = *section;
729     return phys_sections_nb++;
730 }
731
732 static void phys_sections_clear(void)
733 {
734     phys_sections_nb = 0;
735 }
736
737 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
738 {
739     subpage_t *subpage;
740     hwaddr base = section->offset_within_address_space
741         & TARGET_PAGE_MASK;
742     MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
743     MemoryRegionSection subsection = {
744         .offset_within_address_space = base,
745         .size = TARGET_PAGE_SIZE,
746     };
747     hwaddr start, end;
748
749     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
750
751     if (!(existing->mr->subpage)) {
752         subpage = subpage_init(base);
753         subsection.mr = &subpage->iomem;
754         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
755                       phys_section_add(&subsection));
756     } else {
757         subpage = container_of(existing->mr, subpage_t, iomem);
758     }
759     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
760     end = start + section->size - 1;
761     subpage_register(subpage, start, end, phys_section_add(section));
762 }
763
764
765 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
766 {
767     hwaddr start_addr = section->offset_within_address_space;
768     ram_addr_t size = section->size;
769     hwaddr addr;
770     uint16_t section_index = phys_section_add(section);
771
772     assert(size);
773
774     addr = start_addr;
775     phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
776                   section_index);
777 }
778
779 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
780 {
781     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
782     MemoryRegionSection now = *section, remain = *section;
783
784     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
785         || (now.size < TARGET_PAGE_SIZE)) {
786         now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
787                        - now.offset_within_address_space,
788                        now.size);
789         register_subpage(d, &now);
790         remain.size -= now.size;
791         remain.offset_within_address_space += now.size;
792         remain.offset_within_region += now.size;
793     }
794     while (remain.size >= TARGET_PAGE_SIZE) {
795         now = remain;
796         if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
797             now.size = TARGET_PAGE_SIZE;
798             register_subpage(d, &now);
799         } else {
800             now.size &= TARGET_PAGE_MASK;
801             register_multipage(d, &now);
802         }
803         remain.size -= now.size;
804         remain.offset_within_address_space += now.size;
805         remain.offset_within_region += now.size;
806     }
807     now = remain;
808     if (now.size) {
809         register_subpage(d, &now);
810     }
811 }
812
813 void qemu_flush_coalesced_mmio_buffer(void)
814 {
815     if (kvm_enabled())
816         kvm_flush_coalesced_mmio_buffer();
817 }
818
819 void qemu_mutex_lock_ramlist(void)
820 {
821     qemu_mutex_lock(&ram_list.mutex);
822 }
823
824 void qemu_mutex_unlock_ramlist(void)
825 {
826     qemu_mutex_unlock(&ram_list.mutex);
827 }
828
829 #if defined(__linux__) && !defined(TARGET_S390X)
830
831 #include <sys/vfs.h>
832
833 #define HUGETLBFS_MAGIC       0x958458f6
834
835 static long gethugepagesize(const char *path)
836 {
837     struct statfs fs;
838     int ret;
839
840     do {
841         ret = statfs(path, &fs);
842     } while (ret != 0 && errno == EINTR);
843
844     if (ret != 0) {
845         perror(path);
846         return 0;
847     }
848
849     if (fs.f_type != HUGETLBFS_MAGIC)
850         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
851
852     return fs.f_bsize;
853 }
854
855 static void *file_ram_alloc(RAMBlock *block,
856                             ram_addr_t memory,
857                             const char *path)
858 {
859     char *filename;
860     char *sanitized_name;
861     char *c;
862     void *area;
863     int fd;
864 #ifdef MAP_POPULATE
865     int flags;
866 #endif
867     unsigned long hpagesize;
868
869     hpagesize = gethugepagesize(path);
870     if (!hpagesize) {
871         return NULL;
872     }
873
874     if (memory < hpagesize) {
875         return NULL;
876     }
877
878     if (kvm_enabled() && !kvm_has_sync_mmu()) {
879         fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
880         return NULL;
881     }
882
883     /* Make name safe to use with mkstemp by replacing '/' with '_'. */
884     sanitized_name = g_strdup(block->mr->name);
885     for (c = sanitized_name; *c != '\0'; c++) {
886         if (*c == '/')
887             *c = '_';
888     }
889
890     filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
891                                sanitized_name);
892     g_free(sanitized_name);
893
894     fd = mkstemp(filename);
895     if (fd < 0) {
896         perror("unable to create backing store for hugepages");
897         g_free(filename);
898         return NULL;
899     }
900     unlink(filename);
901     g_free(filename);
902
903     memory = (memory+hpagesize-1) & ~(hpagesize-1);
904
905     /*
906      * ftruncate is not supported by hugetlbfs in older
907      * hosts, so don't bother bailing out on errors.
908      * If anything goes wrong with it under other filesystems,
909      * mmap will fail.
910      */
911     if (ftruncate(fd, memory))
912         perror("ftruncate");
913
914 #ifdef MAP_POPULATE
915     /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
916      * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
917      * to sidestep this quirk.
918      */
919     flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
920     area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
921 #else
922     area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
923 #endif
924     if (area == MAP_FAILED) {
925         perror("file_ram_alloc: can't mmap RAM pages");
926         close(fd);
927         return (NULL);
928     }
929     block->fd = fd;
930     return area;
931 }
932 #endif
933
934 static ram_addr_t find_ram_offset(ram_addr_t size)
935 {
936     RAMBlock *block, *next_block;
937     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
938
939     assert(size != 0); /* it would hand out same offset multiple times */
940
941     if (QTAILQ_EMPTY(&ram_list.blocks))
942         return 0;
943
944     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
945         ram_addr_t end, next = RAM_ADDR_MAX;
946
947         end = block->offset + block->length;
948
949         QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
950             if (next_block->offset >= end) {
951                 next = MIN(next, next_block->offset);
952             }
953         }
954         if (next - end >= size && next - end < mingap) {
955             offset = end;
956             mingap = next - end;
957         }
958     }
959
960     if (offset == RAM_ADDR_MAX) {
961         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
962                 (uint64_t)size);
963         abort();
964     }
965
966     return offset;
967 }
968
969 ram_addr_t last_ram_offset(void)
970 {
971     RAMBlock *block;
972     ram_addr_t last = 0;
973
974     QTAILQ_FOREACH(block, &ram_list.blocks, next)
975         last = MAX(last, block->offset + block->length);
976
977     return last;
978 }
979
980 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
981 {
982     int ret;
983     QemuOpts *machine_opts;
984
985     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
986     machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
987     if (machine_opts &&
988         !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
989         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
990         if (ret) {
991             perror("qemu_madvise");
992             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
993                             "but dump_guest_core=off specified\n");
994         }
995     }
996 }
997
998 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
999 {
1000     RAMBlock *new_block, *block;
1001
1002     new_block = NULL;
1003     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1004         if (block->offset == addr) {
1005             new_block = block;
1006             break;
1007         }
1008     }
1009     assert(new_block);
1010     assert(!new_block->idstr[0]);
1011
1012     if (dev) {
1013         char *id = qdev_get_dev_path(dev);
1014         if (id) {
1015             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1016             g_free(id);
1017         }
1018     }
1019     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1020
1021     /* This assumes the iothread lock is taken here too.  */
1022     qemu_mutex_lock_ramlist();
1023     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1024         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1025             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1026                     new_block->idstr);
1027             abort();
1028         }
1029     }
1030     qemu_mutex_unlock_ramlist();
1031 }
1032
1033 static int memory_try_enable_merging(void *addr, size_t len)
1034 {
1035     QemuOpts *opts;
1036
1037     opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1038     if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1039         /* disabled by the user */
1040         return 0;
1041     }
1042
1043     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1044 }
1045
1046 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1047                                    MemoryRegion *mr)
1048 {
1049     RAMBlock *block, *new_block;
1050
1051     size = TARGET_PAGE_ALIGN(size);
1052     new_block = g_malloc0(sizeof(*new_block));
1053
1054     /* This assumes the iothread lock is taken here too.  */
1055     qemu_mutex_lock_ramlist();
1056     new_block->mr = mr;
1057     new_block->offset = find_ram_offset(size);
1058     if (host) {
1059         new_block->host = host;
1060         new_block->flags |= RAM_PREALLOC_MASK;
1061     } else {
1062         if (mem_path) {
1063 #if defined (__linux__) && !defined(TARGET_S390X)
1064             new_block->host = file_ram_alloc(new_block, size, mem_path);
1065             if (!new_block->host) {
1066                 new_block->host = qemu_anon_ram_alloc(size);
1067                 memory_try_enable_merging(new_block->host, size);
1068             }
1069 #else
1070             fprintf(stderr, "-mem-path option unsupported\n");
1071             exit(1);
1072 #endif
1073         } else {
1074             if (xen_enabled()) {
1075                 xen_ram_alloc(new_block->offset, size, mr);
1076             } else if (kvm_enabled()) {
1077                 /* some s390/kvm configurations have special constraints */
1078                 new_block->host = kvm_ram_alloc(size);
1079             } else {
1080                 new_block->host = qemu_anon_ram_alloc(size);
1081 #ifdef CONFIG_HAX
1082                 /*
1083                  * In Hax, the qemu allocate the virtual address, and HAX kernel
1084                  * populate the memory with physical memory. Currently we have no
1085                  * paging, so user should make sure enough free memory in advance
1086                  */
1087                 if (hax_enabled()) {
1088                     int ret;
1089                     ret = hax_populate_ram((uint64_t)new_block->host, size);
1090                     if (ret < 0) {
1091                         fprintf(stderr, "Hax failed to populate ram\n");
1092                         exit(-1);
1093                     }
1094                 }
1095 #endif
1096             }
1097             memory_try_enable_merging(new_block->host, size);
1098         }
1099     }
1100     new_block->length = size;
1101
1102     /* Keep the list sorted from biggest to smallest block.  */
1103     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1104         if (block->length < new_block->length) {
1105             break;
1106         }
1107     }
1108     if (block) {
1109         QTAILQ_INSERT_BEFORE(block, new_block, next);
1110     } else {
1111         QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1112     }
1113     ram_list.mru_block = NULL;
1114
1115     ram_list.version++;
1116     qemu_mutex_unlock_ramlist();
1117
1118     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1119                                        last_ram_offset() >> TARGET_PAGE_BITS);
1120     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1121            0, size >> TARGET_PAGE_BITS);
1122     cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1123
1124     qemu_ram_setup_dump(new_block->host, size);
1125     qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1126
1127     if (kvm_enabled())
1128         kvm_setup_guest_memory(new_block->host, size);
1129
1130     return new_block->offset;
1131 }
1132
1133 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1134 {
1135     return qemu_ram_alloc_from_ptr(size, NULL, mr);
1136 }
1137
1138 void qemu_ram_free_from_ptr(ram_addr_t addr)
1139 {
1140     RAMBlock *block;
1141
1142     /* This assumes the iothread lock is taken here too.  */
1143     qemu_mutex_lock_ramlist();
1144     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1145         if (addr == block->offset) {
1146             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1147             ram_list.mru_block = NULL;
1148             ram_list.version++;
1149             g_free(block);
1150             break;
1151         }
1152     }
1153     qemu_mutex_unlock_ramlist();
1154 }
1155
1156 void qemu_ram_free(ram_addr_t addr)
1157 {
1158     RAMBlock *block;
1159
1160     /* This assumes the iothread lock is taken here too.  */
1161     qemu_mutex_lock_ramlist();
1162     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1163         if (addr == block->offset) {
1164             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1165             ram_list.mru_block = NULL;
1166             ram_list.version++;
1167             if (block->flags & RAM_PREALLOC_MASK) {
1168                 ;
1169             } else if (mem_path) {
1170 #if defined (__linux__) && !defined(TARGET_S390X)
1171                 if (block->fd) {
1172                     munmap(block->host, block->length);
1173                     close(block->fd);
1174                 } else {
1175                     qemu_anon_ram_free(block->host, block->length);
1176                 }
1177 #else
1178                 abort();
1179 #endif
1180             } else {
1181                 if (xen_enabled()) {
1182                     xen_invalidate_map_cache_entry(block->host);
1183                 } else {
1184                     qemu_anon_ram_free(block->host, block->length);
1185                 }
1186             }
1187             g_free(block);
1188             break;
1189         }
1190     }
1191     qemu_mutex_unlock_ramlist();
1192
1193 }
1194
1195 #ifndef _WIN32
1196 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1197 {
1198     RAMBlock *block;
1199     ram_addr_t offset;
1200     int flags;
1201     void *area, *vaddr;
1202
1203     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1204         offset = addr - block->offset;
1205         if (offset < block->length) {
1206             vaddr = block->host + offset;
1207             if (block->flags & RAM_PREALLOC_MASK) {
1208                 ;
1209             } else {
1210                 flags = MAP_FIXED;
1211                 munmap(vaddr, length);
1212                 if (mem_path) {
1213 #if defined(__linux__) && !defined(TARGET_S390X)
1214                     if (block->fd) {
1215 #ifdef MAP_POPULATE
1216                         flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1217                             MAP_PRIVATE;
1218 #else
1219                         flags |= MAP_PRIVATE;
1220 #endif
1221                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1222                                     flags, block->fd, offset);
1223                     } else {
1224                         flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1225                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1226                                     flags, -1, 0);
1227                     }
1228 #else
1229                     abort();
1230 #endif
1231                 } else {
1232 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1233                     flags |= MAP_SHARED | MAP_ANONYMOUS;
1234                     area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1235                                 flags, -1, 0);
1236 #else
1237                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1238                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1239                                 flags, -1, 0);
1240 #endif
1241                 }
1242                 if (area != vaddr) {
1243                     fprintf(stderr, "Could not remap addr: "
1244                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1245                             length, addr);
1246                     exit(1);
1247                 }
1248                 memory_try_enable_merging(vaddr, length);
1249                 qemu_ram_setup_dump(vaddr, length);
1250             }
1251             return;
1252         }
1253     }
1254 }
1255 #endif /* !_WIN32 */
1256
1257 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1258    With the exception of the softmmu code in this file, this should
1259    only be used for local memory (e.g. video ram) that the device owns,
1260    and knows it isn't going to access beyond the end of the block.
1261
1262    It should not be used for general purpose DMA.
1263    Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1264  */
1265 void *qemu_get_ram_ptr(ram_addr_t addr)
1266 {
1267     RAMBlock *block;
1268
1269     /* The list is protected by the iothread lock here.  */
1270     block = ram_list.mru_block;
1271     if (block && addr - block->offset < block->length) {
1272         goto found;
1273     }
1274     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1275         if (addr - block->offset < block->length) {
1276             goto found;
1277         }
1278     }
1279
1280     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1281     abort();
1282
1283 found:
1284     ram_list.mru_block = block;
1285     if (xen_enabled()) {
1286         /* We need to check if the requested address is in the RAM
1287          * because we don't want to map the entire memory in QEMU.
1288          * In that case just map until the end of the page.
1289          */
1290         if (block->offset == 0) {
1291             return xen_map_cache(addr, 0, 0);
1292         } else if (block->host == NULL) {
1293             block->host =
1294                 xen_map_cache(block->offset, block->length, 1);
1295         }
1296     }
1297     return block->host + (addr - block->offset);
1298 }
1299
1300 /* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1301  * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1302  *
1303  * ??? Is this still necessary?
1304  */
1305 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1306 {
1307     RAMBlock *block;
1308
1309     /* The list is protected by the iothread lock here.  */
1310     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1311         if (addr - block->offset < block->length) {
1312             if (xen_enabled()) {
1313                 /* We need to check if the requested address is in the RAM
1314                  * because we don't want to map the entire memory in QEMU.
1315                  * In that case just map until the end of the page.
1316                  */
1317                 if (block->offset == 0) {
1318                     return xen_map_cache(addr, 0, 0);
1319                 } else if (block->host == NULL) {
1320                     block->host =
1321                         xen_map_cache(block->offset, block->length, 1);
1322                 }
1323             }
1324             return block->host + (addr - block->offset);
1325         }
1326     }
1327
1328     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1329     abort();
1330
1331     return NULL;
1332 }
1333
1334 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1335  * but takes a size argument */
1336 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1337 {
1338     if (*size == 0) {
1339         return NULL;
1340     }
1341     if (xen_enabled()) {
1342         return xen_map_cache(addr, *size, 1);
1343     } else {
1344         RAMBlock *block;
1345
1346         QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1347             if (addr - block->offset < block->length) {
1348                 if (addr - block->offset + *size > block->length)
1349                     *size = block->length - addr + block->offset;
1350                 return block->host + (addr - block->offset);
1351             }
1352         }
1353
1354         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1355         abort();
1356     }
1357 }
1358
1359 void qemu_put_ram_ptr(void *addr)
1360 {
1361     trace_qemu_put_ram_ptr(addr);
1362 }
1363
1364 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1365 {
1366     RAMBlock *block;
1367     uint8_t *host = ptr;
1368
1369     if (xen_enabled()) {
1370         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1371         return 0;
1372     }
1373
1374     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1375         /* This case append when the block is not mapped. */
1376         if (block->host == NULL) {
1377             continue;
1378         }
1379         if (host - block->host < block->length) {
1380             *ram_addr = block->offset + (host - block->host);
1381             return 0;
1382         }
1383     }
1384
1385     return -1;
1386 }
1387
1388 /* Some of the softmmu routines need to translate from a host pointer
1389    (typically a TLB entry) back to a ram offset.  */
1390 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1391 {
1392     ram_addr_t ram_addr;
1393
1394     if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1395         fprintf(stderr, "Bad ram pointer %p\n", ptr);
1396         abort();
1397     }
1398     return ram_addr;
1399 }
1400
1401 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
1402                                     unsigned size)
1403 {
1404 #ifdef DEBUG_UNASSIGNED
1405     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
1406 #endif
1407 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1408     cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
1409 #endif
1410     return 0;
1411 }
1412
1413 static void unassigned_mem_write(void *opaque, hwaddr addr,
1414                                  uint64_t val, unsigned size)
1415 {
1416 #ifdef DEBUG_UNASSIGNED
1417     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
1418 #endif
1419 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1420     cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
1421 #endif
1422 }
1423
1424 static const MemoryRegionOps unassigned_mem_ops = {
1425     .read = unassigned_mem_read,
1426     .write = unassigned_mem_write,
1427     .endianness = DEVICE_NATIVE_ENDIAN,
1428 };
1429
1430 static uint64_t error_mem_read(void *opaque, hwaddr addr,
1431                                unsigned size)
1432 {
1433     abort();
1434 }
1435
1436 static void error_mem_write(void *opaque, hwaddr addr,
1437                             uint64_t value, unsigned size)
1438 {
1439     abort();
1440 }
1441
1442 static const MemoryRegionOps error_mem_ops = {
1443     .read = error_mem_read,
1444     .write = error_mem_write,
1445     .endianness = DEVICE_NATIVE_ENDIAN,
1446 };
1447
1448 static const MemoryRegionOps rom_mem_ops = {
1449     .read = error_mem_read,
1450     .write = unassigned_mem_write,
1451     .endianness = DEVICE_NATIVE_ENDIAN,
1452 };
1453
1454 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1455                                uint64_t val, unsigned size)
1456 {
1457     int dirty_flags;
1458     dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1459     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1460 #if !defined(CONFIG_USER_ONLY)
1461         tb_invalidate_phys_page_fast(ram_addr, size);
1462         dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1463 #endif
1464     }
1465     switch (size) {
1466     case 1:
1467         stb_p(qemu_get_ram_ptr(ram_addr), val);
1468         break;
1469     case 2:
1470         stw_p(qemu_get_ram_ptr(ram_addr), val);
1471         break;
1472     case 4:
1473         stl_p(qemu_get_ram_ptr(ram_addr), val);
1474         break;
1475     default:
1476         abort();
1477     }
1478     dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1479     cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1480     /* we remove the notdirty callback only if the code has been
1481        flushed */
1482     if (dirty_flags == 0xff)
1483         tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1484 }
1485
1486 static const MemoryRegionOps notdirty_mem_ops = {
1487     .read = error_mem_read,
1488     .write = notdirty_mem_write,
1489     .endianness = DEVICE_NATIVE_ENDIAN,
1490 };
1491
1492 /* Generate a debug exception if a watchpoint has been hit.  */
1493 static void check_watchpoint(int offset, int len_mask, int flags)
1494 {
1495     CPUArchState *env = cpu_single_env;
1496     target_ulong pc, cs_base;
1497     target_ulong vaddr;
1498     CPUWatchpoint *wp;
1499     int cpu_flags;
1500
1501     if (env->watchpoint_hit) {
1502         /* We re-entered the check after replacing the TB. Now raise
1503          * the debug interrupt so that is will trigger after the
1504          * current instruction. */
1505         cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1506         return;
1507     }
1508     vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1509     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1510         if ((vaddr == (wp->vaddr & len_mask) ||
1511              (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1512             wp->flags |= BP_WATCHPOINT_HIT;
1513             if (!env->watchpoint_hit) {
1514                 env->watchpoint_hit = wp;
1515                 tb_check_watchpoint(env);
1516                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1517                     env->exception_index = EXCP_DEBUG;
1518                     cpu_loop_exit(env);
1519                 } else {
1520                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1521                     tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1522                     cpu_resume_from_signal(env, NULL);
1523                 }
1524             }
1525         } else {
1526             wp->flags &= ~BP_WATCHPOINT_HIT;
1527         }
1528     }
1529 }
1530
1531 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1532    so these check for a hit then pass through to the normal out-of-line
1533    phys routines.  */
1534 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1535                                unsigned size)
1536 {
1537     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1538     switch (size) {
1539     case 1: return ldub_phys(addr);
1540     case 2: return lduw_phys(addr);
1541     case 4: return ldl_phys(addr);
1542     default: abort();
1543     }
1544 }
1545
1546 static void watch_mem_write(void *opaque, hwaddr addr,
1547                             uint64_t val, unsigned size)
1548 {
1549     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1550     switch (size) {
1551     case 1:
1552         stb_phys(addr, val);
1553         break;
1554     case 2:
1555         stw_phys(addr, val);
1556         break;
1557     case 4:
1558         stl_phys(addr, val);
1559         break;
1560     default: abort();
1561     }
1562 }
1563
1564 static const MemoryRegionOps watch_mem_ops = {
1565     .read = watch_mem_read,
1566     .write = watch_mem_write,
1567     .endianness = DEVICE_NATIVE_ENDIAN,
1568 };
1569
1570 static uint64_t subpage_read(void *opaque, hwaddr addr,
1571                              unsigned len)
1572 {
1573     subpage_t *mmio = opaque;
1574     unsigned int idx = SUBPAGE_IDX(addr);
1575     MemoryRegionSection *section;
1576 #if defined(DEBUG_SUBPAGE)
1577     printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1578            mmio, len, addr, idx);
1579 #endif
1580
1581     section = &phys_sections[mmio->sub_section[idx]];
1582     addr += mmio->base;
1583     addr -= section->offset_within_address_space;
1584     addr += section->offset_within_region;
1585     return io_mem_read(section->mr, addr, len);
1586 }
1587
1588 static void subpage_write(void *opaque, hwaddr addr,
1589                           uint64_t value, unsigned len)
1590 {
1591     subpage_t *mmio = opaque;
1592     unsigned int idx = SUBPAGE_IDX(addr);
1593     MemoryRegionSection *section;
1594 #if defined(DEBUG_SUBPAGE)
1595     printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1596            " idx %d value %"PRIx64"\n",
1597            __func__, mmio, len, addr, idx, value);
1598 #endif
1599
1600     section = &phys_sections[mmio->sub_section[idx]];
1601     addr += mmio->base;
1602     addr -= section->offset_within_address_space;
1603     addr += section->offset_within_region;
1604     io_mem_write(section->mr, addr, value, len);
1605 }
1606
1607 static const MemoryRegionOps subpage_ops = {
1608     .read = subpage_read,
1609     .write = subpage_write,
1610     .endianness = DEVICE_NATIVE_ENDIAN,
1611 };
1612
1613 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1614                                  unsigned size)
1615 {
1616     ram_addr_t raddr = addr;
1617     void *ptr = qemu_get_ram_ptr(raddr);
1618     switch (size) {
1619     case 1: return ldub_p(ptr);
1620     case 2: return lduw_p(ptr);
1621     case 4: return ldl_p(ptr);
1622     default: abort();
1623     }
1624 }
1625
1626 static void subpage_ram_write(void *opaque, hwaddr addr,
1627                               uint64_t value, unsigned size)
1628 {
1629     ram_addr_t raddr = addr;
1630     void *ptr = qemu_get_ram_ptr(raddr);
1631     switch (size) {
1632     case 1: return stb_p(ptr, value);
1633     case 2: return stw_p(ptr, value);
1634     case 4: return stl_p(ptr, value);
1635     default: abort();
1636     }
1637 }
1638
1639 static const MemoryRegionOps subpage_ram_ops = {
1640     .read = subpage_ram_read,
1641     .write = subpage_ram_write,
1642     .endianness = DEVICE_NATIVE_ENDIAN,
1643 };
1644
1645 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1646                              uint16_t section)
1647 {
1648     int idx, eidx;
1649
1650     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1651         return -1;
1652     idx = SUBPAGE_IDX(start);
1653     eidx = SUBPAGE_IDX(end);
1654 #if defined(DEBUG_SUBPAGE)
1655     printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1656            mmio, start, end, idx, eidx, memory);
1657 #endif
1658     if (memory_region_is_ram(phys_sections[section].mr)) {
1659         MemoryRegionSection new_section = phys_sections[section];
1660         new_section.mr = &io_mem_subpage_ram;
1661         section = phys_section_add(&new_section);
1662     }
1663     for (; idx <= eidx; idx++) {
1664         mmio->sub_section[idx] = section;
1665     }
1666
1667     return 0;
1668 }
1669
1670 static subpage_t *subpage_init(hwaddr base)
1671 {
1672     subpage_t *mmio;
1673
1674     mmio = g_malloc0(sizeof(subpage_t));
1675
1676     mmio->base = base;
1677     memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1678                           "subpage", TARGET_PAGE_SIZE);
1679     mmio->iomem.subpage = true;
1680 #if defined(DEBUG_SUBPAGE)
1681     printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1682            mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1683 #endif
1684     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1685
1686     return mmio;
1687 }
1688
1689 static uint16_t dummy_section(MemoryRegion *mr)
1690 {
1691     MemoryRegionSection section = {
1692         .mr = mr,
1693         .offset_within_address_space = 0,
1694         .offset_within_region = 0,
1695         .size = UINT64_MAX,
1696     };
1697
1698     return phys_section_add(&section);
1699 }
1700
1701 MemoryRegion *iotlb_to_region(hwaddr index)
1702 {
1703     return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1704 }
1705
1706 static void io_mem_init(void)
1707 {
1708     memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
1709     memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
1710     memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1711                           "unassigned", UINT64_MAX);
1712     memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1713                           "notdirty", UINT64_MAX);
1714     memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1715                           "subpage-ram", UINT64_MAX);
1716     memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1717                           "watch", UINT64_MAX);
1718 }
1719
1720 static void mem_begin(MemoryListener *listener)
1721 {
1722     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1723
1724     destroy_all_mappings(d);
1725     d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1726 }
1727
1728 static void core_begin(MemoryListener *listener)
1729 {
1730     phys_sections_clear();
1731     phys_section_unassigned = dummy_section(&io_mem_unassigned);
1732     phys_section_notdirty = dummy_section(&io_mem_notdirty);
1733     phys_section_rom = dummy_section(&io_mem_rom);
1734     phys_section_watch = dummy_section(&io_mem_watch);
1735 }
1736
1737 static void tcg_commit(MemoryListener *listener)
1738 {
1739     CPUArchState *env;
1740
1741     /* since each CPU stores ram addresses in its TLB cache, we must
1742        reset the modified entries */
1743     /* XXX: slow ! */
1744     for(env = first_cpu; env != NULL; env = env->next_cpu) {
1745         tlb_flush(env, 1);
1746     }
1747 }
1748
1749 static void core_log_global_start(MemoryListener *listener)
1750 {
1751     cpu_physical_memory_set_dirty_tracking(1);
1752 }
1753
1754 static void core_log_global_stop(MemoryListener *listener)
1755 {
1756     cpu_physical_memory_set_dirty_tracking(0);
1757 }
1758
1759 static void io_region_add(MemoryListener *listener,
1760                           MemoryRegionSection *section)
1761 {
1762     MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1763
1764     mrio->mr = section->mr;
1765     mrio->offset = section->offset_within_region;
1766     iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1767                  section->offset_within_address_space, section->size);
1768     ioport_register(&mrio->iorange);
1769 }
1770
1771 static void io_region_del(MemoryListener *listener,
1772                           MemoryRegionSection *section)
1773 {
1774     isa_unassign_ioport(section->offset_within_address_space, section->size);
1775 }
1776
1777 static MemoryListener core_memory_listener = {
1778     .begin = core_begin,
1779     .log_global_start = core_log_global_start,
1780     .log_global_stop = core_log_global_stop,
1781     .priority = 1,
1782 };
1783
1784 static MemoryListener io_memory_listener = {
1785     .region_add = io_region_add,
1786     .region_del = io_region_del,
1787     .priority = 0,
1788 };
1789
1790 static MemoryListener tcg_memory_listener = {
1791     .commit = tcg_commit,
1792 };
1793
1794 void address_space_init_dispatch(AddressSpace *as)
1795 {
1796     AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1797
1798     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1799     d->listener = (MemoryListener) {
1800         .begin = mem_begin,
1801         .region_add = mem_add,
1802         .region_nop = mem_add,
1803         .priority = 0,
1804     };
1805     as->dispatch = d;
1806     memory_listener_register(&d->listener, as);
1807 }
1808
1809 void address_space_destroy_dispatch(AddressSpace *as)
1810 {
1811     AddressSpaceDispatch *d = as->dispatch;
1812
1813     memory_listener_unregister(&d->listener);
1814     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1815     g_free(d);
1816     as->dispatch = NULL;
1817 }
1818
1819 static void memory_map_init(void)
1820 {
1821     system_memory = g_malloc(sizeof(*system_memory));
1822     memory_region_init(system_memory, "system", INT64_MAX);
1823     address_space_init(&address_space_memory, system_memory);
1824     address_space_memory.name = "memory";
1825
1826     system_io = g_malloc(sizeof(*system_io));
1827     memory_region_init(system_io, "io", 65536);
1828     address_space_init(&address_space_io, system_io);
1829     address_space_io.name = "I/O";
1830
1831     memory_listener_register(&core_memory_listener, &address_space_memory);
1832     memory_listener_register(&io_memory_listener, &address_space_io);
1833     memory_listener_register(&tcg_memory_listener, &address_space_memory);
1834
1835     dma_context_init(&dma_context_memory, &address_space_memory,
1836                      NULL, NULL, NULL);
1837 }
1838
1839 MemoryRegion *get_system_memory(void)
1840 {
1841     return system_memory;
1842 }
1843
1844 MemoryRegion *get_system_io(void)
1845 {
1846     return system_io;
1847 }
1848
1849 #endif /* !defined(CONFIG_USER_ONLY) */
1850
1851 /* physical memory access (slow version, mainly for debug) */
1852 #if defined(CONFIG_USER_ONLY)
1853 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1854                         uint8_t *buf, int len, int is_write)
1855 {
1856     int l, flags;
1857     target_ulong page;
1858     void * p;
1859
1860     while (len > 0) {
1861         page = addr & TARGET_PAGE_MASK;
1862         l = (page + TARGET_PAGE_SIZE) - addr;
1863         if (l > len)
1864             l = len;
1865         flags = page_get_flags(page);
1866         if (!(flags & PAGE_VALID))
1867             return -1;
1868         if (is_write) {
1869             if (!(flags & PAGE_WRITE))
1870                 return -1;
1871             /* XXX: this code should not depend on lock_user */
1872             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1873                 return -1;
1874             memcpy(p, buf, l);
1875             unlock_user(p, addr, l);
1876         } else {
1877             if (!(flags & PAGE_READ))
1878                 return -1;
1879             /* XXX: this code should not depend on lock_user */
1880             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1881                 return -1;
1882             memcpy(buf, p, l);
1883             unlock_user(p, addr, 0);
1884         }
1885         len -= l;
1886         buf += l;
1887         addr += l;
1888     }
1889     return 0;
1890 }
1891
1892 #else
1893
1894 static void invalidate_and_set_dirty(hwaddr addr,
1895                                      hwaddr length)
1896 {
1897     if (!cpu_physical_memory_is_dirty(addr)) {
1898         /* invalidate code */
1899         tb_invalidate_phys_page_range(addr, addr + length, 0);
1900         /* set dirty bit */
1901         cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1902     }
1903     xen_modified_memory(addr, length);
1904 }
1905
1906 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1907                       int len, bool is_write)
1908 {
1909     AddressSpaceDispatch *d = as->dispatch;
1910     int l;
1911     uint8_t *ptr;
1912     uint32_t val;
1913     hwaddr page;
1914     MemoryRegionSection *section;
1915
1916     while (len > 0) {
1917         page = addr & TARGET_PAGE_MASK;
1918         l = (page + TARGET_PAGE_SIZE) - addr;
1919         if (l > len)
1920             l = len;
1921         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1922
1923         if (is_write) {
1924             if (!memory_region_is_ram(section->mr)) {
1925                 hwaddr addr1;
1926                 addr1 = memory_region_section_addr(section, addr);
1927                 /* XXX: could force cpu_single_env to NULL to avoid
1928                    potential bugs */
1929                 if (l >= 4 && ((addr1 & 3) == 0)) {
1930                     /* 32 bit write access */
1931                     val = ldl_p(buf);
1932                     io_mem_write(section->mr, addr1, val, 4);
1933                     l = 4;
1934                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1935                     /* 16 bit write access */
1936                     val = lduw_p(buf);
1937                     io_mem_write(section->mr, addr1, val, 2);
1938                     l = 2;
1939                 } else {
1940                     /* 8 bit write access */
1941                     val = ldub_p(buf);
1942                     io_mem_write(section->mr, addr1, val, 1);
1943                     l = 1;
1944                 }
1945             } else if (!section->readonly) {
1946                 ram_addr_t addr1;
1947                 addr1 = memory_region_get_ram_addr(section->mr)
1948                     + memory_region_section_addr(section, addr);
1949                 /* RAM case */
1950                 ptr = qemu_get_ram_ptr(addr1);
1951                 memcpy(ptr, buf, l);
1952                 invalidate_and_set_dirty(addr1, l);
1953                 qemu_put_ram_ptr(ptr);
1954             }
1955         } else {
1956             if (!(memory_region_is_ram(section->mr) ||
1957                   memory_region_is_romd(section->mr))) {
1958                 hwaddr addr1;
1959                 /* I/O case */
1960                 addr1 = memory_region_section_addr(section, addr);
1961                 if (l >= 4 && ((addr1 & 3) == 0)) {
1962                     /* 32 bit read access */
1963                     val = io_mem_read(section->mr, addr1, 4);
1964                     stl_p(buf, val);
1965                     l = 4;
1966                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1967                     /* 16 bit read access */
1968                     val = io_mem_read(section->mr, addr1, 2);
1969                     stw_p(buf, val);
1970                     l = 2;
1971                 } else {
1972                     /* 8 bit read access */
1973                     val = io_mem_read(section->mr, addr1, 1);
1974                     stb_p(buf, val);
1975                     l = 1;
1976                 }
1977             } else {
1978                 /* RAM case */
1979                 ptr = qemu_get_ram_ptr(section->mr->ram_addr
1980                                        + memory_region_section_addr(section,
1981                                                                     addr));
1982                 memcpy(buf, ptr, l);
1983                 qemu_put_ram_ptr(ptr);
1984             }
1985         }
1986         len -= l;
1987         buf += l;
1988         addr += l;
1989     }
1990 }
1991
1992 void address_space_write(AddressSpace *as, hwaddr addr,
1993                          const uint8_t *buf, int len)
1994 {
1995     address_space_rw(as, addr, (uint8_t *)buf, len, true);
1996 }
1997
1998 /**
1999  * address_space_read: read from an address space.
2000  *
2001  * @as: #AddressSpace to be accessed
2002  * @addr: address within that address space
2003  * @buf: buffer with the data transferred
2004  */
2005 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2006 {
2007     address_space_rw(as, addr, buf, len, false);
2008 }
2009
2010
2011 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2012                             int len, int is_write)
2013 {
2014     return address_space_rw(&address_space_memory, addr, buf, len, is_write);
2015 }
2016
2017 /* used for ROM loading : can write in RAM and ROM */
2018 void cpu_physical_memory_write_rom(hwaddr addr,
2019                                    const uint8_t *buf, int len)
2020 {
2021     AddressSpaceDispatch *d = address_space_memory.dispatch;
2022     int l;
2023     uint8_t *ptr;
2024     hwaddr page;
2025     MemoryRegionSection *section;
2026
2027     while (len > 0) {
2028         page = addr & TARGET_PAGE_MASK;
2029         l = (page + TARGET_PAGE_SIZE) - addr;
2030         if (l > len)
2031             l = len;
2032         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2033
2034         if (!(memory_region_is_ram(section->mr) ||
2035               memory_region_is_romd(section->mr))) {
2036             /* do nothing */
2037         } else {
2038             unsigned long addr1;
2039             addr1 = memory_region_get_ram_addr(section->mr)
2040                 + memory_region_section_addr(section, addr);
2041             /* ROM/RAM case */
2042             ptr = qemu_get_ram_ptr(addr1);
2043             memcpy(ptr, buf, l);
2044             invalidate_and_set_dirty(addr1, l);
2045             qemu_put_ram_ptr(ptr);
2046         }
2047         len -= l;
2048         buf += l;
2049         addr += l;
2050     }
2051 }
2052
2053 typedef struct {
2054     void *buffer;
2055     hwaddr addr;
2056     hwaddr len;
2057 } BounceBuffer;
2058
2059 static BounceBuffer bounce;
2060
2061 typedef struct MapClient {
2062     void *opaque;
2063     void (*callback)(void *opaque);
2064     QLIST_ENTRY(MapClient) link;
2065 } MapClient;
2066
2067 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2068     = QLIST_HEAD_INITIALIZER(map_client_list);
2069
2070 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2071 {
2072     MapClient *client = g_malloc(sizeof(*client));
2073
2074     client->opaque = opaque;
2075     client->callback = callback;
2076     QLIST_INSERT_HEAD(&map_client_list, client, link);
2077     return client;
2078 }
2079
2080 static void cpu_unregister_map_client(void *_client)
2081 {
2082     MapClient *client = (MapClient *)_client;
2083
2084     QLIST_REMOVE(client, link);
2085     g_free(client);
2086 }
2087
2088 static void cpu_notify_map_clients(void)
2089 {
2090     MapClient *client;
2091
2092     while (!QLIST_EMPTY(&map_client_list)) {
2093         client = QLIST_FIRST(&map_client_list);
2094         client->callback(client->opaque);
2095         cpu_unregister_map_client(client);
2096     }
2097 }
2098
2099 /* Map a physical memory region into a host virtual address.
2100  * May map a subset of the requested range, given by and returned in *plen.
2101  * May return NULL if resources needed to perform the mapping are exhausted.
2102  * Use only for reads OR writes - not for read-modify-write operations.
2103  * Use cpu_register_map_client() to know when retrying the map operation is
2104  * likely to succeed.
2105  */
2106 void *address_space_map(AddressSpace *as,
2107                         hwaddr addr,
2108                         hwaddr *plen,
2109                         bool is_write)
2110 {
2111     AddressSpaceDispatch *d = as->dispatch;
2112     hwaddr len = *plen;
2113     hwaddr todo = 0;
2114     int l;
2115     hwaddr page;
2116     MemoryRegionSection *section;
2117     ram_addr_t raddr = RAM_ADDR_MAX;
2118     ram_addr_t rlen;
2119     void *ret;
2120
2121     while (len > 0) {
2122         page = addr & TARGET_PAGE_MASK;
2123         l = (page + TARGET_PAGE_SIZE) - addr;
2124         if (l > len)
2125             l = len;
2126         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2127
2128         if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
2129             if (todo || bounce.buffer) {
2130                 break;
2131             }
2132             bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2133             bounce.addr = addr;
2134             bounce.len = l;
2135             if (!is_write) {
2136                 address_space_read(as, addr, bounce.buffer, l);
2137             }
2138
2139             *plen = l;
2140             return bounce.buffer;
2141         }
2142         if (!todo) {
2143             raddr = memory_region_get_ram_addr(section->mr)
2144                 + memory_region_section_addr(section, addr);
2145         }
2146
2147         len -= l;
2148         addr += l;
2149         todo += l;
2150     }
2151     rlen = todo;
2152     ret = qemu_ram_ptr_length(raddr, &rlen);
2153     *plen = rlen;
2154     return ret;
2155 }
2156
2157 /* Unmaps a memory region previously mapped by address_space_map().
2158  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2159  * the amount of memory that was actually read or written by the caller.
2160  */
2161 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2162                          int is_write, hwaddr access_len)
2163 {
2164     if (buffer != bounce.buffer) {
2165         if (is_write) {
2166             ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2167             while (access_len) {
2168                 unsigned l;
2169                 l = TARGET_PAGE_SIZE;
2170                 if (l > access_len)
2171                     l = access_len;
2172                 invalidate_and_set_dirty(addr1, l);
2173                 addr1 += l;
2174                 access_len -= l;
2175             }
2176         }
2177         if (xen_enabled()) {
2178             xen_invalidate_map_cache_entry(buffer);
2179         }
2180         return;
2181     }
2182     if (is_write) {
2183         address_space_write(as, bounce.addr, bounce.buffer, access_len);
2184     }
2185     qemu_vfree(bounce.buffer);
2186     bounce.buffer = NULL;
2187     cpu_notify_map_clients();
2188 }
2189
2190 void *cpu_physical_memory_map(hwaddr addr,
2191                               hwaddr *plen,
2192                               int is_write)
2193 {
2194     return address_space_map(&address_space_memory, addr, plen, is_write);
2195 }
2196
2197 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2198                                int is_write, hwaddr access_len)
2199 {
2200     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2201 }
2202
2203 /* warning: addr must be aligned */
2204 static inline uint32_t ldl_phys_internal(hwaddr addr,
2205                                          enum device_endian endian)
2206 {
2207     uint8_t *ptr;
2208     uint32_t val;
2209     MemoryRegionSection *section;
2210
2211     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2212
2213     if (!(memory_region_is_ram(section->mr) ||
2214           memory_region_is_romd(section->mr))) {
2215         /* I/O case */
2216         addr = memory_region_section_addr(section, addr);
2217         val = io_mem_read(section->mr, addr, 4);
2218 #if defined(TARGET_WORDS_BIGENDIAN)
2219         if (endian == DEVICE_LITTLE_ENDIAN) {
2220             val = bswap32(val);
2221         }
2222 #else
2223         if (endian == DEVICE_BIG_ENDIAN) {
2224             val = bswap32(val);
2225         }
2226 #endif
2227     } else {
2228         /* RAM case */
2229         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2230                                 & TARGET_PAGE_MASK)
2231                                + memory_region_section_addr(section, addr));
2232         switch (endian) {
2233         case DEVICE_LITTLE_ENDIAN:
2234             val = ldl_le_p(ptr);
2235             break;
2236         case DEVICE_BIG_ENDIAN:
2237             val = ldl_be_p(ptr);
2238             break;
2239         default:
2240             val = ldl_p(ptr);
2241             break;
2242         }
2243     }
2244     return val;
2245 }
2246
2247 uint32_t ldl_phys(hwaddr addr)
2248 {
2249     return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2250 }
2251
2252 uint32_t ldl_le_phys(hwaddr addr)
2253 {
2254     return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2255 }
2256
2257 uint32_t ldl_be_phys(hwaddr addr)
2258 {
2259     return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2260 }
2261
2262 /* warning: addr must be aligned */
2263 static inline uint64_t ldq_phys_internal(hwaddr addr,
2264                                          enum device_endian endian)
2265 {
2266     uint8_t *ptr;
2267     uint64_t val;
2268     MemoryRegionSection *section;
2269
2270     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2271
2272     if (!(memory_region_is_ram(section->mr) ||
2273           memory_region_is_romd(section->mr))) {
2274         /* I/O case */
2275         addr = memory_region_section_addr(section, addr);
2276
2277         /* XXX This is broken when device endian != cpu endian.
2278                Fix and add "endian" variable check */
2279 #ifdef TARGET_WORDS_BIGENDIAN
2280         val = io_mem_read(section->mr, addr, 4) << 32;
2281         val |= io_mem_read(section->mr, addr + 4, 4);
2282 #else
2283         val = io_mem_read(section->mr, addr, 4);
2284         val |= io_mem_read(section->mr, addr + 4, 4) << 32;
2285 #endif
2286     } else {
2287         /* RAM case */
2288         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2289                                 & TARGET_PAGE_MASK)
2290                                + memory_region_section_addr(section, addr));
2291         switch (endian) {
2292         case DEVICE_LITTLE_ENDIAN:
2293             val = ldq_le_p(ptr);
2294             break;
2295         case DEVICE_BIG_ENDIAN:
2296             val = ldq_be_p(ptr);
2297             break;
2298         default:
2299             val = ldq_p(ptr);
2300             break;
2301         }
2302     }
2303     return val;
2304 }
2305
2306 uint64_t ldq_phys(hwaddr addr)
2307 {
2308     return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2309 }
2310
2311 uint64_t ldq_le_phys(hwaddr addr)
2312 {
2313     return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2314 }
2315
2316 uint64_t ldq_be_phys(hwaddr addr)
2317 {
2318     return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2319 }
2320
2321 /* XXX: optimize */
2322 uint32_t ldub_phys(hwaddr addr)
2323 {
2324     uint8_t val;
2325     cpu_physical_memory_read(addr, &val, 1);
2326     return val;
2327 }
2328
2329 /* warning: addr must be aligned */
2330 static inline uint32_t lduw_phys_internal(hwaddr addr,
2331                                           enum device_endian endian)
2332 {
2333     uint8_t *ptr;
2334     uint64_t val;
2335     MemoryRegionSection *section;
2336
2337     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2338
2339     if (!(memory_region_is_ram(section->mr) ||
2340           memory_region_is_romd(section->mr))) {
2341         /* I/O case */
2342         addr = memory_region_section_addr(section, addr);
2343         val = io_mem_read(section->mr, addr, 2);
2344 #if defined(TARGET_WORDS_BIGENDIAN)
2345         if (endian == DEVICE_LITTLE_ENDIAN) {
2346             val = bswap16(val);
2347         }
2348 #else
2349         if (endian == DEVICE_BIG_ENDIAN) {
2350             val = bswap16(val);
2351         }
2352 #endif
2353     } else {
2354         /* RAM case */
2355         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2356                                 & TARGET_PAGE_MASK)
2357                                + memory_region_section_addr(section, addr));
2358         switch (endian) {
2359         case DEVICE_LITTLE_ENDIAN:
2360             val = lduw_le_p(ptr);
2361             break;
2362         case DEVICE_BIG_ENDIAN:
2363             val = lduw_be_p(ptr);
2364             break;
2365         default:
2366             val = lduw_p(ptr);
2367             break;
2368         }
2369     }
2370     return val;
2371 }
2372
2373 uint32_t lduw_phys(hwaddr addr)
2374 {
2375     return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2376 }
2377
2378 uint32_t lduw_le_phys(hwaddr addr)
2379 {
2380     return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2381 }
2382
2383 uint32_t lduw_be_phys(hwaddr addr)
2384 {
2385     return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2386 }
2387
2388 /* warning: addr must be aligned. The ram page is not masked as dirty
2389    and the code inside is not invalidated. It is useful if the dirty
2390    bits are used to track modified PTEs */
2391 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2392 {
2393     uint8_t *ptr;
2394     MemoryRegionSection *section;
2395
2396     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2397
2398     if (!memory_region_is_ram(section->mr) || section->readonly) {
2399         addr = memory_region_section_addr(section, addr);
2400         if (memory_region_is_ram(section->mr)) {
2401             section = &phys_sections[phys_section_rom];
2402         }
2403         io_mem_write(section->mr, addr, val, 4);
2404     } else {
2405         unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
2406                                & TARGET_PAGE_MASK)
2407             + memory_region_section_addr(section, addr);
2408         ptr = qemu_get_ram_ptr(addr1);
2409         stl_p(ptr, val);
2410
2411         if (unlikely(in_migration)) {
2412             if (!cpu_physical_memory_is_dirty(addr1)) {
2413                 /* invalidate code */
2414                 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2415                 /* set dirty bit */
2416                 cpu_physical_memory_set_dirty_flags(
2417                     addr1, (0xff & ~CODE_DIRTY_FLAG));
2418             }
2419         }
2420     }
2421 }
2422
2423 void stq_phys_notdirty(hwaddr addr, uint64_t val)
2424 {
2425     uint8_t *ptr;
2426     MemoryRegionSection *section;
2427
2428     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2429
2430     if (!memory_region_is_ram(section->mr) || section->readonly) {
2431         addr = memory_region_section_addr(section, addr);
2432         if (memory_region_is_ram(section->mr)) {
2433             section = &phys_sections[phys_section_rom];
2434         }
2435 #ifdef TARGET_WORDS_BIGENDIAN
2436         io_mem_write(section->mr, addr, val >> 32, 4);
2437         io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
2438 #else
2439         io_mem_write(section->mr, addr, (uint32_t)val, 4);
2440         io_mem_write(section->mr, addr + 4, val >> 32, 4);
2441 #endif
2442     } else {
2443         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2444                                 & TARGET_PAGE_MASK)
2445                                + memory_region_section_addr(section, addr));
2446         stq_p(ptr, val);
2447     }
2448 }
2449
2450 /* warning: addr must be aligned */
2451 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2452                                      enum device_endian endian)
2453 {
2454     uint8_t *ptr;
2455     MemoryRegionSection *section;
2456
2457     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2458
2459     if (!memory_region_is_ram(section->mr) || section->readonly) {
2460         addr = memory_region_section_addr(section, addr);
2461         if (memory_region_is_ram(section->mr)) {
2462             section = &phys_sections[phys_section_rom];
2463         }
2464 #if defined(TARGET_WORDS_BIGENDIAN)
2465         if (endian == DEVICE_LITTLE_ENDIAN) {
2466             val = bswap32(val);
2467         }
2468 #else
2469         if (endian == DEVICE_BIG_ENDIAN) {
2470             val = bswap32(val);
2471         }
2472 #endif
2473         io_mem_write(section->mr, addr, val, 4);
2474     } else {
2475         unsigned long addr1;
2476         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2477             + memory_region_section_addr(section, addr);
2478         /* RAM case */
2479         ptr = qemu_get_ram_ptr(addr1);
2480         switch (endian) {
2481         case DEVICE_LITTLE_ENDIAN:
2482             stl_le_p(ptr, val);
2483             break;
2484         case DEVICE_BIG_ENDIAN:
2485             stl_be_p(ptr, val);
2486             break;
2487         default:
2488             stl_p(ptr, val);
2489             break;
2490         }
2491         invalidate_and_set_dirty(addr1, 4);
2492     }
2493 }
2494
2495 void stl_phys(hwaddr addr, uint32_t val)
2496 {
2497     stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2498 }
2499
2500 void stl_le_phys(hwaddr addr, uint32_t val)
2501 {
2502     stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2503 }
2504
2505 void stl_be_phys(hwaddr addr, uint32_t val)
2506 {
2507     stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2508 }
2509
2510 /* XXX: optimize */
2511 void stb_phys(hwaddr addr, uint32_t val)
2512 {
2513     uint8_t v = val;
2514     cpu_physical_memory_write(addr, &v, 1);
2515 }
2516
2517 /* warning: addr must be aligned */
2518 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2519                                      enum device_endian endian)
2520 {
2521     uint8_t *ptr;
2522     MemoryRegionSection *section;
2523
2524     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2525
2526     if (!memory_region_is_ram(section->mr) || section->readonly) {
2527         addr = memory_region_section_addr(section, addr);
2528         if (memory_region_is_ram(section->mr)) {
2529             section = &phys_sections[phys_section_rom];
2530         }
2531 #if defined(TARGET_WORDS_BIGENDIAN)
2532         if (endian == DEVICE_LITTLE_ENDIAN) {
2533             val = bswap16(val);
2534         }
2535 #else
2536         if (endian == DEVICE_BIG_ENDIAN) {
2537             val = bswap16(val);
2538         }
2539 #endif
2540         io_mem_write(section->mr, addr, val, 2);
2541     } else {
2542         unsigned long addr1;
2543         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2544             + memory_region_section_addr(section, addr);
2545         /* RAM case */
2546         ptr = qemu_get_ram_ptr(addr1);
2547         switch (endian) {
2548         case DEVICE_LITTLE_ENDIAN:
2549             stw_le_p(ptr, val);
2550             break;
2551         case DEVICE_BIG_ENDIAN:
2552             stw_be_p(ptr, val);
2553             break;
2554         default:
2555             stw_p(ptr, val);
2556             break;
2557         }
2558         invalidate_and_set_dirty(addr1, 2);
2559     }
2560 }
2561
2562 void stw_phys(hwaddr addr, uint32_t val)
2563 {
2564     stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2565 }
2566
2567 void stw_le_phys(hwaddr addr, uint32_t val)
2568 {
2569     stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2570 }
2571
2572 void stw_be_phys(hwaddr addr, uint32_t val)
2573 {
2574     stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2575 }
2576
2577 /* XXX: optimize */
2578 void stq_phys(hwaddr addr, uint64_t val)
2579 {
2580     val = tswap64(val);
2581     cpu_physical_memory_write(addr, &val, 8);
2582 }
2583
2584 void stq_le_phys(hwaddr addr, uint64_t val)
2585 {
2586     val = cpu_to_le64(val);
2587     cpu_physical_memory_write(addr, &val, 8);
2588 }
2589
2590 void stq_be_phys(hwaddr addr, uint64_t val)
2591 {
2592     val = cpu_to_be64(val);
2593     cpu_physical_memory_write(addr, &val, 8);
2594 }
2595
2596 /* virtual memory access for debug (includes writing to ROM) */
2597 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2598                         uint8_t *buf, int len, int is_write)
2599 {
2600     int l;
2601     hwaddr phys_addr;
2602     target_ulong page;
2603
2604     while (len > 0) {
2605         page = addr & TARGET_PAGE_MASK;
2606         phys_addr = cpu_get_phys_page_debug(env, page);
2607         /* if no physical page mapped, return an error */
2608         if (phys_addr == -1)
2609             return -1;
2610         l = (page + TARGET_PAGE_SIZE) - addr;
2611         if (l > len)
2612             l = len;
2613         phys_addr += (addr & ~TARGET_PAGE_MASK);
2614         if (is_write)
2615             cpu_physical_memory_write_rom(phys_addr, buf, l);
2616         else
2617             cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2618         len -= l;
2619         buf += l;
2620         addr += l;
2621     }
2622     return 0;
2623 }
2624 #endif
2625
2626 #if !defined(CONFIG_USER_ONLY)
2627
2628 /*
2629  * A helper function for the _utterly broken_ virtio device model to find out if
2630  * it's running on a big endian machine. Don't do this at home kids!
2631  */
2632 bool virtio_is_big_endian(void);
2633 bool virtio_is_big_endian(void)
2634 {
2635 #if defined(TARGET_WORDS_BIGENDIAN)
2636     return true;
2637 #else
2638     return false;
2639 #endif
2640 }
2641
2642 #endif
2643
2644 #ifndef CONFIG_USER_ONLY
2645 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2646 {
2647     MemoryRegionSection *section;
2648
2649     section = phys_page_find(address_space_memory.dispatch,
2650                              phys_addr >> TARGET_PAGE_BITS);
2651
2652     return !(memory_region_is_ram(section->mr) ||
2653              memory_region_is_romd(section->mr));
2654 }
2655 #endif