4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include <sys/types.h>
27 #include "qemu-common.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "hw/xen/xen.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "exec/memory.h"
39 #include "sysemu/dma.h"
40 #include "exec/address-spaces.h"
41 #if defined(CONFIG_USER_ONLY)
43 #else /* !CONFIG_USER_ONLY */
44 #include "sysemu/xen-mapcache.h"
47 #include "exec/cpu-all.h"
49 #include "exec/cputlb.h"
50 #include "translate-all.h"
52 #include "exec/memory-internal.h"
54 //#define DEBUG_SUBPAGE
56 #if !defined(CONFIG_USER_ONLY)
57 static int in_migration;
59 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
61 static MemoryRegion *system_memory;
62 static MemoryRegion *system_io;
64 AddressSpace address_space_io;
65 AddressSpace address_space_memory;
67 MemoryRegion io_mem_rom, io_mem_notdirty;
68 static MemoryRegion io_mem_unassigned;
73 /* current CPU in the current thread. It is only valid inside
75 DEFINE_TLS(CPUState *, current_cpu);
76 /* 0 = Do not count executed instructions.
77 1 = Precise instruction counting.
78 2 = Adaptive rate instruction counting. */
81 #if !defined(CONFIG_USER_ONLY)
83 typedef struct PhysPageEntry PhysPageEntry;
85 struct PhysPageEntry {
87 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
91 typedef PhysPageEntry Node[L2_SIZE];
93 struct AddressSpaceDispatch {
94 /* This is a multi-level map on the physical address space.
95 * The bottom level has pointers to MemoryRegionSections.
97 PhysPageEntry phys_map;
99 MemoryRegionSection *sections;
103 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
104 typedef struct subpage_t {
108 uint16_t sub_section[TARGET_PAGE_SIZE];
111 #define PHYS_SECTION_UNASSIGNED 0
112 #define PHYS_SECTION_NOTDIRTY 1
113 #define PHYS_SECTION_ROM 2
114 #define PHYS_SECTION_WATCH 3
116 typedef struct PhysPageMap {
117 unsigned sections_nb;
118 unsigned sections_nb_alloc;
120 unsigned nodes_nb_alloc;
122 MemoryRegionSection *sections;
125 static PhysPageMap *prev_map;
126 static PhysPageMap next_map;
128 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
130 static void io_mem_init(void);
131 static void memory_map_init(void);
132 static void *qemu_safe_ram_ptr(ram_addr_t addr);
134 static MemoryRegion io_mem_watch;
137 #if !defined(CONFIG_USER_ONLY)
139 static void phys_map_node_reserve(unsigned nodes)
141 if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) {
142 next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2,
144 next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc,
145 next_map.nodes_nb + nodes);
146 next_map.nodes = g_renew(Node, next_map.nodes,
147 next_map.nodes_nb_alloc);
151 static uint16_t phys_map_node_alloc(void)
156 ret = next_map.nodes_nb++;
157 assert(ret != PHYS_MAP_NODE_NIL);
158 assert(ret != next_map.nodes_nb_alloc);
159 for (i = 0; i < L2_SIZE; ++i) {
160 next_map.nodes[ret][i].is_leaf = 0;
161 next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
166 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
167 hwaddr *nb, uint16_t leaf,
172 hwaddr step = (hwaddr)1 << (level * L2_BITS);
174 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
175 lp->ptr = phys_map_node_alloc();
176 p = next_map.nodes[lp->ptr];
178 for (i = 0; i < L2_SIZE; i++) {
180 p[i].ptr = PHYS_SECTION_UNASSIGNED;
184 p = next_map.nodes[lp->ptr];
186 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
188 while (*nb && lp < &p[L2_SIZE]) {
189 if ((*index & (step - 1)) == 0 && *nb >= step) {
195 phys_page_set_level(lp, index, nb, leaf, level - 1);
201 static void phys_page_set(AddressSpaceDispatch *d,
202 hwaddr index, hwaddr nb,
205 /* Wildly overreserve - it doesn't matter much. */
206 phys_map_node_reserve(3 * P_L2_LEVELS);
208 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
211 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index,
212 Node *nodes, MemoryRegionSection *sections)
217 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
218 if (lp.ptr == PHYS_MAP_NODE_NIL) {
219 return §ions[PHYS_SECTION_UNASSIGNED];
222 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
224 return §ions[lp.ptr];
227 bool memory_region_is_unassigned(MemoryRegion *mr)
229 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
230 && mr != &io_mem_watch;
233 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
235 bool resolve_subpage)
237 MemoryRegionSection *section;
240 section = phys_page_find(d->phys_map, addr >> TARGET_PAGE_BITS,
241 d->nodes, d->sections);
242 if (resolve_subpage && section->mr->subpage) {
243 subpage = container_of(section->mr, subpage_t, iomem);
244 section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
249 static MemoryRegionSection *
250 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
251 hwaddr *plen, bool resolve_subpage)
253 MemoryRegionSection *section;
256 section = address_space_lookup_region(d, addr, resolve_subpage);
257 /* Compute offset within MemoryRegionSection */
258 addr -= section->offset_within_address_space;
260 /* Compute offset within MemoryRegion */
261 *xlat = addr + section->offset_within_region;
263 diff = int128_sub(section->mr->size, int128_make64(addr));
264 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
268 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
269 hwaddr *xlat, hwaddr *plen,
273 MemoryRegionSection *section;
278 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
281 if (!mr->iommu_ops) {
285 iotlb = mr->iommu_ops->translate(mr, addr);
286 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
287 | (addr & iotlb.addr_mask));
288 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
289 if (!(iotlb.perm & (1 << is_write))) {
290 mr = &io_mem_unassigned;
294 as = iotlb.target_as;
302 MemoryRegionSection *
303 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
306 MemoryRegionSection *section;
307 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
309 assert(!section->mr->iommu_ops);
314 void cpu_exec_init_all(void)
316 #if !defined(CONFIG_USER_ONLY)
317 qemu_mutex_init(&ram_list.mutex);
323 #if !defined(CONFIG_USER_ONLY)
325 static int cpu_common_post_load(void *opaque, int version_id)
327 CPUState *cpu = opaque;
329 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
330 version_id is increased. */
331 cpu->interrupt_request &= ~0x01;
332 tlb_flush(cpu->env_ptr, 1);
337 const VMStateDescription vmstate_cpu_common = {
338 .name = "cpu_common",
340 .minimum_version_id = 1,
341 .minimum_version_id_old = 1,
342 .post_load = cpu_common_post_load,
343 .fields = (VMStateField []) {
344 VMSTATE_UINT32(halted, CPUState),
345 VMSTATE_UINT32(interrupt_request, CPUState),
346 VMSTATE_END_OF_LIST()
352 CPUState *qemu_get_cpu(int index)
354 CPUState *cpu = first_cpu;
357 if (cpu->cpu_index == index) {
366 void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
377 void cpu_exec_init(CPUArchState *env)
379 CPUState *cpu = ENV_GET_CPU(env);
380 CPUClass *cc = CPU_GET_CLASS(cpu);
384 #if defined(CONFIG_USER_ONLY)
387 cpu->next_cpu = NULL;
390 while (*pcpu != NULL) {
391 pcpu = &(*pcpu)->next_cpu;
394 cpu->cpu_index = cpu_index;
396 QTAILQ_INIT(&env->breakpoints);
397 QTAILQ_INIT(&env->watchpoints);
398 #ifndef CONFIG_USER_ONLY
399 cpu->thread_id = qemu_get_thread_id();
402 #if defined(CONFIG_USER_ONLY)
405 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
406 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
407 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
408 cpu_save, cpu_load, env);
409 assert(cc->vmsd == NULL);
411 if (cc->vmsd != NULL) {
412 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
416 #if defined(TARGET_HAS_ICE)
417 #if defined(CONFIG_USER_ONLY)
418 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
420 tb_invalidate_phys_page_range(pc, pc + 1, 0);
423 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
425 tb_invalidate_phys_addr(cpu_get_phys_page_debug(cpu, pc) |
426 (pc & ~TARGET_PAGE_MASK));
429 #endif /* TARGET_HAS_ICE */
431 #if defined(CONFIG_USER_ONLY)
432 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
437 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
438 int flags, CPUWatchpoint **watchpoint)
443 /* Add a watchpoint. */
444 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
445 int flags, CPUWatchpoint **watchpoint)
447 target_ulong len_mask = ~(len - 1);
450 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
451 if ((len & (len - 1)) || (addr & ~len_mask) ||
452 len == 0 || len > TARGET_PAGE_SIZE) {
453 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
454 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
457 wp = g_malloc(sizeof(*wp));
460 wp->len_mask = len_mask;
463 /* keep all GDB-injected watchpoints in front */
465 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
467 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
469 tlb_flush_page(env, addr);
476 /* Remove a specific watchpoint. */
477 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
480 target_ulong len_mask = ~(len - 1);
483 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
484 if (addr == wp->vaddr && len_mask == wp->len_mask
485 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
486 cpu_watchpoint_remove_by_ref(env, wp);
493 /* Remove a specific watchpoint by reference. */
494 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
496 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
498 tlb_flush_page(env, watchpoint->vaddr);
503 /* Remove all matching watchpoints. */
504 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
506 CPUWatchpoint *wp, *next;
508 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
509 if (wp->flags & mask)
510 cpu_watchpoint_remove_by_ref(env, wp);
515 /* Add a breakpoint. */
516 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
517 CPUBreakpoint **breakpoint)
519 #if defined(TARGET_HAS_ICE)
522 bp = g_malloc(sizeof(*bp));
527 /* keep all GDB-injected breakpoints in front */
528 if (flags & BP_GDB) {
529 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
531 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
534 breakpoint_invalidate(ENV_GET_CPU(env), pc);
545 /* Remove a specific breakpoint. */
546 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
548 #if defined(TARGET_HAS_ICE)
551 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
552 if (bp->pc == pc && bp->flags == flags) {
553 cpu_breakpoint_remove_by_ref(env, bp);
563 /* Remove a specific breakpoint by reference. */
564 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
566 #if defined(TARGET_HAS_ICE)
567 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
569 breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
575 /* Remove all matching breakpoints. */
576 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
578 #if defined(TARGET_HAS_ICE)
579 CPUBreakpoint *bp, *next;
581 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
582 if (bp->flags & mask)
583 cpu_breakpoint_remove_by_ref(env, bp);
588 /* enable or disable single step mode. EXCP_DEBUG is returned by the
589 CPU loop after each instruction */
590 void cpu_single_step(CPUState *cpu, int enabled)
592 #if defined(TARGET_HAS_ICE)
593 CPUArchState *env = cpu->env_ptr;
595 if (cpu->singlestep_enabled != enabled) {
596 cpu->singlestep_enabled = enabled;
598 kvm_update_guest_debug(env, 0);
600 /* must flush all the translated code to avoid inconsistencies */
601 /* XXX: only flush what is necessary */
608 void cpu_abort(CPUArchState *env, const char *fmt, ...)
610 CPUState *cpu = ENV_GET_CPU(env);
616 fprintf(stderr, "qemu: fatal: ");
617 vfprintf(stderr, fmt, ap);
618 fprintf(stderr, "\n");
619 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
620 if (qemu_log_enabled()) {
621 qemu_log("qemu: fatal: ");
622 qemu_log_vprintf(fmt, ap2);
624 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
630 #if defined(CONFIG_USER_ONLY)
632 struct sigaction act;
633 sigfillset(&act.sa_mask);
634 act.sa_handler = SIG_DFL;
635 sigaction(SIGABRT, &act, NULL);
641 CPUArchState *cpu_copy(CPUArchState *env)
643 CPUArchState *new_env = cpu_init(env->cpu_model_str);
644 #if defined(TARGET_HAS_ICE)
649 memcpy(new_env, env, sizeof(CPUArchState));
651 /* Clone all break/watchpoints.
652 Note: Once we support ptrace with hw-debug register access, make sure
653 BP_CPU break/watchpoints are handled correctly on clone. */
654 QTAILQ_INIT(&env->breakpoints);
655 QTAILQ_INIT(&env->watchpoints);
656 #if defined(TARGET_HAS_ICE)
657 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
658 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
660 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
661 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
669 #if !defined(CONFIG_USER_ONLY)
670 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
675 /* we modify the TLB cache so that the dirty bit will be set again
676 when accessing the range */
677 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
678 /* Check that we don't span multiple blocks - this breaks the
679 address comparisons below. */
680 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
681 != (end - 1) - start) {
684 cpu_tlb_reset_dirty_all(start1, length);
688 /* Note: start and end must be within the same ram block. */
689 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
694 start &= TARGET_PAGE_MASK;
695 end = TARGET_PAGE_ALIGN(end);
697 length = end - start;
700 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
703 tlb_reset_dirty_range_all(start, end, length);
707 static int cpu_physical_memory_set_dirty_tracking(int enable)
710 in_migration = enable;
714 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
715 MemoryRegionSection *section,
717 hwaddr paddr, hwaddr xlat,
719 target_ulong *address)
724 if (memory_region_is_ram(section->mr)) {
726 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
728 if (!section->readonly) {
729 iotlb |= PHYS_SECTION_NOTDIRTY;
731 iotlb |= PHYS_SECTION_ROM;
734 iotlb = section - address_space_memory.dispatch->sections;
738 /* Make accesses to pages with watchpoints go via the
739 watchpoint trap routines. */
740 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
741 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
742 /* Avoid trapping reads of pages with a write breakpoint. */
743 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
744 iotlb = PHYS_SECTION_WATCH + paddr;
745 *address |= TLB_MMIO;
753 #endif /* defined(CONFIG_USER_ONLY) */
755 #if !defined(CONFIG_USER_ONLY)
757 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
759 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
761 static uint16_t phys_section_add(MemoryRegionSection *section)
763 /* The physical section number is ORed with a page-aligned
764 * pointer to produce the iotlb entries. Thus it should
765 * never overflow into the page-aligned value.
767 assert(next_map.sections_nb < TARGET_PAGE_SIZE);
769 if (next_map.sections_nb == next_map.sections_nb_alloc) {
770 next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
772 next_map.sections = g_renew(MemoryRegionSection, next_map.sections,
773 next_map.sections_nb_alloc);
775 next_map.sections[next_map.sections_nb] = *section;
776 memory_region_ref(section->mr);
777 return next_map.sections_nb++;
780 static void phys_section_destroy(MemoryRegion *mr)
782 memory_region_unref(mr);
785 subpage_t *subpage = container_of(mr, subpage_t, iomem);
786 memory_region_destroy(&subpage->iomem);
791 static void phys_sections_free(PhysPageMap *map)
793 while (map->sections_nb > 0) {
794 MemoryRegionSection *section = &map->sections[--map->sections_nb];
795 phys_section_destroy(section->mr);
797 g_free(map->sections);
802 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
805 hwaddr base = section->offset_within_address_space
807 MemoryRegionSection *existing = phys_page_find(d->phys_map, base >> TARGET_PAGE_BITS,
808 next_map.nodes, next_map.sections);
809 MemoryRegionSection subsection = {
810 .offset_within_address_space = base,
811 .size = int128_make64(TARGET_PAGE_SIZE),
815 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
817 if (!(existing->mr->subpage)) {
818 subpage = subpage_init(d->as, base);
819 subsection.mr = &subpage->iomem;
820 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
821 phys_section_add(&subsection));
823 subpage = container_of(existing->mr, subpage_t, iomem);
825 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
826 end = start + int128_get64(section->size) - 1;
827 subpage_register(subpage, start, end, phys_section_add(section));
831 static void register_multipage(AddressSpaceDispatch *d,
832 MemoryRegionSection *section)
834 hwaddr start_addr = section->offset_within_address_space;
835 uint16_t section_index = phys_section_add(section);
836 uint64_t num_pages = int128_get64(int128_rshift(section->size,
840 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
843 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
845 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
846 AddressSpaceDispatch *d = as->next_dispatch;
847 MemoryRegionSection now = *section, remain = *section;
848 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
850 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
851 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
852 - now.offset_within_address_space;
854 now.size = int128_min(int128_make64(left), now.size);
855 register_subpage(d, &now);
857 now.size = int128_zero();
859 while (int128_ne(remain.size, now.size)) {
860 remain.size = int128_sub(remain.size, now.size);
861 remain.offset_within_address_space += int128_get64(now.size);
862 remain.offset_within_region += int128_get64(now.size);
864 if (int128_lt(remain.size, page_size)) {
865 register_subpage(d, &now);
866 } else if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
867 now.size = page_size;
868 register_subpage(d, &now);
870 now.size = int128_and(now.size, int128_neg(page_size));
871 register_multipage(d, &now);
876 void qemu_flush_coalesced_mmio_buffer(void)
879 kvm_flush_coalesced_mmio_buffer();
882 void qemu_mutex_lock_ramlist(void)
884 qemu_mutex_lock(&ram_list.mutex);
887 void qemu_mutex_unlock_ramlist(void)
889 qemu_mutex_unlock(&ram_list.mutex);
892 #if defined(__linux__) && !defined(TARGET_S390X)
896 #define HUGETLBFS_MAGIC 0x958458f6
898 static long gethugepagesize(const char *path)
904 ret = statfs(path, &fs);
905 } while (ret != 0 && errno == EINTR);
912 if (fs.f_type != HUGETLBFS_MAGIC)
913 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
918 static void *file_ram_alloc(RAMBlock *block,
923 char *sanitized_name;
930 unsigned long hpagesize;
932 hpagesize = gethugepagesize(path);
937 if (memory < hpagesize) {
941 if (kvm_enabled() && !kvm_has_sync_mmu()) {
942 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
946 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
947 sanitized_name = g_strdup(block->mr->name);
948 for (c = sanitized_name; *c != '\0'; c++) {
953 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
955 g_free(sanitized_name);
957 fd = mkstemp(filename);
959 perror("unable to create backing store for hugepages");
966 memory = (memory+hpagesize-1) & ~(hpagesize-1);
969 * ftruncate is not supported by hugetlbfs in older
970 * hosts, so don't bother bailing out on errors.
971 * If anything goes wrong with it under other filesystems,
974 if (ftruncate(fd, memory))
978 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
979 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
980 * to sidestep this quirk.
982 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
983 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
985 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
987 if (area == MAP_FAILED) {
988 perror("file_ram_alloc: can't mmap RAM pages");
997 static ram_addr_t find_ram_offset(ram_addr_t size)
999 RAMBlock *block, *next_block;
1000 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1002 assert(size != 0); /* it would hand out same offset multiple times */
1004 if (QTAILQ_EMPTY(&ram_list.blocks))
1007 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1008 ram_addr_t end, next = RAM_ADDR_MAX;
1010 end = block->offset + block->length;
1012 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1013 if (next_block->offset >= end) {
1014 next = MIN(next, next_block->offset);
1017 if (next - end >= size && next - end < mingap) {
1019 mingap = next - end;
1023 if (offset == RAM_ADDR_MAX) {
1024 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1032 ram_addr_t last_ram_offset(void)
1035 ram_addr_t last = 0;
1037 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1038 last = MAX(last, block->offset + block->length);
1043 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1047 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1048 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1049 "dump-guest-core", true)) {
1050 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1052 perror("qemu_madvise");
1053 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1054 "but dump_guest_core=off specified\n");
1059 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1061 RAMBlock *new_block, *block;
1064 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1065 if (block->offset == addr) {
1071 assert(!new_block->idstr[0]);
1074 char *id = qdev_get_dev_path(dev);
1076 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1080 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1082 /* This assumes the iothread lock is taken here too. */
1083 qemu_mutex_lock_ramlist();
1084 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1085 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1086 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1091 qemu_mutex_unlock_ramlist();
1094 static int memory_try_enable_merging(void *addr, size_t len)
1096 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1097 /* disabled by the user */
1101 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1104 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1107 RAMBlock *block, *new_block;
1109 size = TARGET_PAGE_ALIGN(size);
1110 new_block = g_malloc0(sizeof(*new_block));
1112 /* This assumes the iothread lock is taken here too. */
1113 qemu_mutex_lock_ramlist();
1115 new_block->offset = find_ram_offset(size);
1117 new_block->host = host;
1118 new_block->flags |= RAM_PREALLOC_MASK;
1121 #if defined (__linux__) && !defined(TARGET_S390X)
1122 new_block->host = file_ram_alloc(new_block, size, mem_path);
1123 if (!new_block->host) {
1124 new_block->host = qemu_anon_ram_alloc(size);
1125 memory_try_enable_merging(new_block->host, size);
1128 fprintf(stderr, "-mem-path option unsupported\n");
1132 if (xen_enabled()) {
1133 xen_ram_alloc(new_block->offset, size, mr);
1134 } else if (kvm_enabled()) {
1135 /* some s390/kvm configurations have special constraints */
1136 new_block->host = kvm_ram_alloc(size);
1138 new_block->host = qemu_anon_ram_alloc(size);
1140 memory_try_enable_merging(new_block->host, size);
1143 new_block->length = size;
1145 /* Keep the list sorted from biggest to smallest block. */
1146 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1147 if (block->length < new_block->length) {
1152 QTAILQ_INSERT_BEFORE(block, new_block, next);
1154 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1156 ram_list.mru_block = NULL;
1159 qemu_mutex_unlock_ramlist();
1161 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1162 last_ram_offset() >> TARGET_PAGE_BITS);
1163 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1164 0, size >> TARGET_PAGE_BITS);
1165 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1167 qemu_ram_setup_dump(new_block->host, size);
1168 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1171 kvm_setup_guest_memory(new_block->host, size);
1173 return new_block->offset;
1176 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1178 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1181 void qemu_ram_free_from_ptr(ram_addr_t addr)
1185 /* This assumes the iothread lock is taken here too. */
1186 qemu_mutex_lock_ramlist();
1187 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1188 if (addr == block->offset) {
1189 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1190 ram_list.mru_block = NULL;
1196 qemu_mutex_unlock_ramlist();
1199 void qemu_ram_free(ram_addr_t addr)
1203 /* This assumes the iothread lock is taken here too. */
1204 qemu_mutex_lock_ramlist();
1205 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1206 if (addr == block->offset) {
1207 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1208 ram_list.mru_block = NULL;
1210 if (block->flags & RAM_PREALLOC_MASK) {
1212 } else if (mem_path) {
1213 #if defined (__linux__) && !defined(TARGET_S390X)
1215 munmap(block->host, block->length);
1218 qemu_anon_ram_free(block->host, block->length);
1224 if (xen_enabled()) {
1225 xen_invalidate_map_cache_entry(block->host);
1227 qemu_anon_ram_free(block->host, block->length);
1234 qemu_mutex_unlock_ramlist();
1239 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1246 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1247 offset = addr - block->offset;
1248 if (offset < block->length) {
1249 vaddr = block->host + offset;
1250 if (block->flags & RAM_PREALLOC_MASK) {
1254 munmap(vaddr, length);
1256 #if defined(__linux__) && !defined(TARGET_S390X)
1259 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1262 flags |= MAP_PRIVATE;
1264 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1265 flags, block->fd, offset);
1267 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1268 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1275 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1276 flags |= MAP_SHARED | MAP_ANONYMOUS;
1277 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1280 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1281 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1285 if (area != vaddr) {
1286 fprintf(stderr, "Could not remap addr: "
1287 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1291 memory_try_enable_merging(vaddr, length);
1292 qemu_ram_setup_dump(vaddr, length);
1298 #endif /* !_WIN32 */
1300 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
1304 /* The list is protected by the iothread lock here. */
1305 block = ram_list.mru_block;
1306 if (block && addr - block->offset < block->length) {
1309 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1310 if (addr - block->offset < block->length) {
1315 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1319 ram_list.mru_block = block;
1323 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1324 With the exception of the softmmu code in this file, this should
1325 only be used for local memory (e.g. video ram) that the device owns,
1326 and knows it isn't going to access beyond the end of the block.
1328 It should not be used for general purpose DMA.
1329 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1331 void *qemu_get_ram_ptr(ram_addr_t addr)
1333 RAMBlock *block = qemu_get_ram_block(addr);
1335 if (xen_enabled()) {
1336 /* We need to check if the requested address is in the RAM
1337 * because we don't want to map the entire memory in QEMU.
1338 * In that case just map until the end of the page.
1340 if (block->offset == 0) {
1341 return xen_map_cache(addr, 0, 0);
1342 } else if (block->host == NULL) {
1344 xen_map_cache(block->offset, block->length, 1);
1347 return block->host + (addr - block->offset);
1350 /* Return a host pointer to ram allocated with qemu_ram_alloc. Same as
1351 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1353 * ??? Is this still necessary?
1355 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1359 /* The list is protected by the iothread lock here. */
1360 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1361 if (addr - block->offset < block->length) {
1362 if (xen_enabled()) {
1363 /* We need to check if the requested address is in the RAM
1364 * because we don't want to map the entire memory in QEMU.
1365 * In that case just map until the end of the page.
1367 if (block->offset == 0) {
1368 return xen_map_cache(addr, 0, 0);
1369 } else if (block->host == NULL) {
1371 xen_map_cache(block->offset, block->length, 1);
1374 return block->host + (addr - block->offset);
1378 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1384 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1385 * but takes a size argument */
1386 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1391 if (xen_enabled()) {
1392 return xen_map_cache(addr, *size, 1);
1396 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1397 if (addr - block->offset < block->length) {
1398 if (addr - block->offset + *size > block->length)
1399 *size = block->length - addr + block->offset;
1400 return block->host + (addr - block->offset);
1404 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1409 /* Some of the softmmu routines need to translate from a host pointer
1410 (typically a TLB entry) back to a ram offset. */
1411 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1414 uint8_t *host = ptr;
1416 if (xen_enabled()) {
1417 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1418 return qemu_get_ram_block(*ram_addr)->mr;
1421 block = ram_list.mru_block;
1422 if (block && block->host && host - block->host < block->length) {
1426 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1427 /* This case append when the block is not mapped. */
1428 if (block->host == NULL) {
1431 if (host - block->host < block->length) {
1439 *ram_addr = block->offset + (host - block->host);
1443 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1444 uint64_t val, unsigned size)
1447 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1448 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1449 tb_invalidate_phys_page_fast(ram_addr, size);
1450 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1454 stb_p(qemu_get_ram_ptr(ram_addr), val);
1457 stw_p(qemu_get_ram_ptr(ram_addr), val);
1460 stl_p(qemu_get_ram_ptr(ram_addr), val);
1465 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1466 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1467 /* we remove the notdirty callback only if the code has been
1469 if (dirty_flags == 0xff) {
1470 CPUArchState *env = current_cpu->env_ptr;
1471 tlb_set_dirty(env, env->mem_io_vaddr);
1475 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1476 unsigned size, bool is_write)
1481 static const MemoryRegionOps notdirty_mem_ops = {
1482 .write = notdirty_mem_write,
1483 .valid.accepts = notdirty_mem_accepts,
1484 .endianness = DEVICE_NATIVE_ENDIAN,
1487 /* Generate a debug exception if a watchpoint has been hit. */
1488 static void check_watchpoint(int offset, int len_mask, int flags)
1490 CPUArchState *env = current_cpu->env_ptr;
1491 target_ulong pc, cs_base;
1496 if (env->watchpoint_hit) {
1497 /* We re-entered the check after replacing the TB. Now raise
1498 * the debug interrupt so that is will trigger after the
1499 * current instruction. */
1500 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1503 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1504 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1505 if ((vaddr == (wp->vaddr & len_mask) ||
1506 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1507 wp->flags |= BP_WATCHPOINT_HIT;
1508 if (!env->watchpoint_hit) {
1509 env->watchpoint_hit = wp;
1510 tb_check_watchpoint(env);
1511 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1512 env->exception_index = EXCP_DEBUG;
1515 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1516 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1517 cpu_resume_from_signal(env, NULL);
1521 wp->flags &= ~BP_WATCHPOINT_HIT;
1526 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1527 so these check for a hit then pass through to the normal out-of-line
1529 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1532 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1534 case 1: return ldub_phys(addr);
1535 case 2: return lduw_phys(addr);
1536 case 4: return ldl_phys(addr);
1541 static void watch_mem_write(void *opaque, hwaddr addr,
1542 uint64_t val, unsigned size)
1544 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1547 stb_phys(addr, val);
1550 stw_phys(addr, val);
1553 stl_phys(addr, val);
1559 static const MemoryRegionOps watch_mem_ops = {
1560 .read = watch_mem_read,
1561 .write = watch_mem_write,
1562 .endianness = DEVICE_NATIVE_ENDIAN,
1565 static uint64_t subpage_read(void *opaque, hwaddr addr,
1568 subpage_t *subpage = opaque;
1571 #if defined(DEBUG_SUBPAGE)
1572 printf("%s: subpage %p len %d addr " TARGET_FMT_plx "\n", __func__,
1573 subpage, len, addr);
1575 address_space_read(subpage->as, addr + subpage->base, buf, len);
1588 static void subpage_write(void *opaque, hwaddr addr,
1589 uint64_t value, unsigned len)
1591 subpage_t *subpage = opaque;
1594 #if defined(DEBUG_SUBPAGE)
1595 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1596 " value %"PRIx64"\n",
1597 __func__, subpage, len, addr, value);
1612 address_space_write(subpage->as, addr + subpage->base, buf, len);
1615 static bool subpage_accepts(void *opaque, hwaddr addr,
1616 unsigned size, bool is_write)
1618 subpage_t *subpage = opaque;
1619 #if defined(DEBUG_SUBPAGE)
1620 printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx "\n",
1621 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1624 return address_space_access_valid(subpage->as, addr + subpage->base,
1628 static const MemoryRegionOps subpage_ops = {
1629 .read = subpage_read,
1630 .write = subpage_write,
1631 .valid.accepts = subpage_accepts,
1632 .endianness = DEVICE_NATIVE_ENDIAN,
1635 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1640 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1642 idx = SUBPAGE_IDX(start);
1643 eidx = SUBPAGE_IDX(end);
1644 #if defined(DEBUG_SUBPAGE)
1645 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1646 mmio, start, end, idx, eidx, memory);
1648 for (; idx <= eidx; idx++) {
1649 mmio->sub_section[idx] = section;
1655 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1659 mmio = g_malloc0(sizeof(subpage_t));
1663 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1664 "subpage", TARGET_PAGE_SIZE);
1665 mmio->iomem.subpage = true;
1666 #if defined(DEBUG_SUBPAGE)
1667 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1668 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1670 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1675 static uint16_t dummy_section(MemoryRegion *mr)
1677 MemoryRegionSection section = {
1679 .offset_within_address_space = 0,
1680 .offset_within_region = 0,
1681 .size = int128_2_64(),
1684 return phys_section_add(§ion);
1687 MemoryRegion *iotlb_to_region(hwaddr index)
1689 return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
1692 static void io_mem_init(void)
1694 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1695 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1696 "unassigned", UINT64_MAX);
1697 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
1698 "notdirty", UINT64_MAX);
1699 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1700 "watch", UINT64_MAX);
1703 static void mem_begin(MemoryListener *listener)
1705 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1706 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1708 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1710 as->next_dispatch = d;
1713 static void mem_commit(MemoryListener *listener)
1715 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1716 AddressSpaceDispatch *cur = as->dispatch;
1717 AddressSpaceDispatch *next = as->next_dispatch;
1719 next->nodes = next_map.nodes;
1720 next->sections = next_map.sections;
1722 as->dispatch = next;
1726 static void core_begin(MemoryListener *listener)
1730 prev_map = g_new(PhysPageMap, 1);
1731 *prev_map = next_map;
1733 memset(&next_map, 0, sizeof(next_map));
1734 n = dummy_section(&io_mem_unassigned);
1735 assert(n == PHYS_SECTION_UNASSIGNED);
1736 n = dummy_section(&io_mem_notdirty);
1737 assert(n == PHYS_SECTION_NOTDIRTY);
1738 n = dummy_section(&io_mem_rom);
1739 assert(n == PHYS_SECTION_ROM);
1740 n = dummy_section(&io_mem_watch);
1741 assert(n == PHYS_SECTION_WATCH);
1744 /* This listener's commit run after the other AddressSpaceDispatch listeners'.
1745 * All AddressSpaceDispatch instances have switched to the next map.
1747 static void core_commit(MemoryListener *listener)
1749 phys_sections_free(prev_map);
1752 static void tcg_commit(MemoryListener *listener)
1756 /* since each CPU stores ram addresses in its TLB cache, we must
1757 reset the modified entries */
1759 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
1760 CPUArchState *env = cpu->env_ptr;
1766 static void core_log_global_start(MemoryListener *listener)
1768 cpu_physical_memory_set_dirty_tracking(1);
1771 static void core_log_global_stop(MemoryListener *listener)
1773 cpu_physical_memory_set_dirty_tracking(0);
1776 static MemoryListener core_memory_listener = {
1777 .begin = core_begin,
1778 .commit = core_commit,
1779 .log_global_start = core_log_global_start,
1780 .log_global_stop = core_log_global_stop,
1784 static MemoryListener tcg_memory_listener = {
1785 .commit = tcg_commit,
1788 void address_space_init_dispatch(AddressSpace *as)
1790 as->dispatch = NULL;
1791 as->dispatch_listener = (MemoryListener) {
1793 .commit = mem_commit,
1794 .region_add = mem_add,
1795 .region_nop = mem_add,
1798 memory_listener_register(&as->dispatch_listener, as);
1801 void address_space_destroy_dispatch(AddressSpace *as)
1803 AddressSpaceDispatch *d = as->dispatch;
1805 memory_listener_unregister(&as->dispatch_listener);
1807 as->dispatch = NULL;
1810 static void memory_map_init(void)
1812 system_memory = g_malloc(sizeof(*system_memory));
1813 memory_region_init(system_memory, NULL, "system", INT64_MAX);
1814 address_space_init(&address_space_memory, system_memory, "memory");
1816 system_io = g_malloc(sizeof(*system_io));
1817 memory_region_init(system_io, NULL, "io", 65536);
1818 address_space_init(&address_space_io, system_io, "I/O");
1820 memory_listener_register(&core_memory_listener, &address_space_memory);
1821 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1824 MemoryRegion *get_system_memory(void)
1826 return system_memory;
1829 MemoryRegion *get_system_io(void)
1834 #endif /* !defined(CONFIG_USER_ONLY) */
1836 /* physical memory access (slow version, mainly for debug) */
1837 #if defined(CONFIG_USER_ONLY)
1838 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1839 uint8_t *buf, int len, int is_write)
1846 page = addr & TARGET_PAGE_MASK;
1847 l = (page + TARGET_PAGE_SIZE) - addr;
1850 flags = page_get_flags(page);
1851 if (!(flags & PAGE_VALID))
1854 if (!(flags & PAGE_WRITE))
1856 /* XXX: this code should not depend on lock_user */
1857 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1860 unlock_user(p, addr, l);
1862 if (!(flags & PAGE_READ))
1864 /* XXX: this code should not depend on lock_user */
1865 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1868 unlock_user(p, addr, 0);
1879 static void invalidate_and_set_dirty(hwaddr addr,
1882 if (!cpu_physical_memory_is_dirty(addr)) {
1883 /* invalidate code */
1884 tb_invalidate_phys_page_range(addr, addr + length, 0);
1886 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1888 xen_modified_memory(addr, length);
1891 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1893 if (memory_region_is_ram(mr)) {
1894 return !(is_write && mr->readonly);
1896 if (memory_region_is_romd(mr)) {
1903 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1905 unsigned access_size_max = mr->ops->valid.max_access_size;
1907 /* Regions are assumed to support 1-4 byte accesses unless
1908 otherwise specified. */
1909 if (access_size_max == 0) {
1910 access_size_max = 4;
1913 /* Bound the maximum access by the alignment of the address. */
1914 if (!mr->ops->impl.unaligned) {
1915 unsigned align_size_max = addr & -addr;
1916 if (align_size_max != 0 && align_size_max < access_size_max) {
1917 access_size_max = align_size_max;
1921 /* Don't attempt accesses larger than the maximum. */
1922 if (l > access_size_max) {
1923 l = access_size_max;
1929 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1930 int len, bool is_write)
1941 mr = address_space_translate(as, addr, &addr1, &l, is_write);
1944 if (!memory_access_is_direct(mr, is_write)) {
1945 l = memory_access_size(mr, l, addr1);
1946 /* XXX: could force current_cpu to NULL to avoid
1950 /* 64 bit write access */
1952 error |= io_mem_write(mr, addr1, val, 8);
1955 /* 32 bit write access */
1957 error |= io_mem_write(mr, addr1, val, 4);
1960 /* 16 bit write access */
1962 error |= io_mem_write(mr, addr1, val, 2);
1965 /* 8 bit write access */
1967 error |= io_mem_write(mr, addr1, val, 1);
1973 addr1 += memory_region_get_ram_addr(mr);
1975 ptr = qemu_get_ram_ptr(addr1);
1976 memcpy(ptr, buf, l);
1977 invalidate_and_set_dirty(addr1, l);
1980 if (!memory_access_is_direct(mr, is_write)) {
1982 l = memory_access_size(mr, l, addr1);
1985 /* 64 bit read access */
1986 error |= io_mem_read(mr, addr1, &val, 8);
1990 /* 32 bit read access */
1991 error |= io_mem_read(mr, addr1, &val, 4);
1995 /* 16 bit read access */
1996 error |= io_mem_read(mr, addr1, &val, 2);
2000 /* 8 bit read access */
2001 error |= io_mem_read(mr, addr1, &val, 1);
2009 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2010 memcpy(buf, ptr, l);
2021 bool address_space_write(AddressSpace *as, hwaddr addr,
2022 const uint8_t *buf, int len)
2024 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2027 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2029 return address_space_rw(as, addr, buf, len, false);
2033 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2034 int len, int is_write)
2036 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2039 /* used for ROM loading : can write in RAM and ROM */
2040 void cpu_physical_memory_write_rom(hwaddr addr,
2041 const uint8_t *buf, int len)
2050 mr = address_space_translate(&address_space_memory,
2051 addr, &addr1, &l, true);
2053 if (!(memory_region_is_ram(mr) ||
2054 memory_region_is_romd(mr))) {
2057 addr1 += memory_region_get_ram_addr(mr);
2059 ptr = qemu_get_ram_ptr(addr1);
2060 memcpy(ptr, buf, l);
2061 invalidate_and_set_dirty(addr1, l);
2076 static BounceBuffer bounce;
2078 typedef struct MapClient {
2080 void (*callback)(void *opaque);
2081 QLIST_ENTRY(MapClient) link;
2084 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2085 = QLIST_HEAD_INITIALIZER(map_client_list);
2087 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2089 MapClient *client = g_malloc(sizeof(*client));
2091 client->opaque = opaque;
2092 client->callback = callback;
2093 QLIST_INSERT_HEAD(&map_client_list, client, link);
2097 static void cpu_unregister_map_client(void *_client)
2099 MapClient *client = (MapClient *)_client;
2101 QLIST_REMOVE(client, link);
2105 static void cpu_notify_map_clients(void)
2109 while (!QLIST_EMPTY(&map_client_list)) {
2110 client = QLIST_FIRST(&map_client_list);
2111 client->callback(client->opaque);
2112 cpu_unregister_map_client(client);
2116 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2123 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2124 if (!memory_access_is_direct(mr, is_write)) {
2125 l = memory_access_size(mr, l, addr);
2126 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2137 /* Map a physical memory region into a host virtual address.
2138 * May map a subset of the requested range, given by and returned in *plen.
2139 * May return NULL if resources needed to perform the mapping are exhausted.
2140 * Use only for reads OR writes - not for read-modify-write operations.
2141 * Use cpu_register_map_client() to know when retrying the map operation is
2142 * likely to succeed.
2144 void *address_space_map(AddressSpace *as,
2151 hwaddr l, xlat, base;
2152 MemoryRegion *mr, *this_mr;
2160 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2161 if (!memory_access_is_direct(mr, is_write)) {
2162 if (bounce.buffer) {
2165 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2169 memory_region_ref(mr);
2172 address_space_read(as, addr, bounce.buffer, l);
2176 return bounce.buffer;
2180 raddr = memory_region_get_ram_addr(mr);
2191 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2192 if (this_mr != mr || xlat != base + done) {
2197 memory_region_ref(mr);
2199 return qemu_ram_ptr_length(raddr + base, plen);
2202 /* Unmaps a memory region previously mapped by address_space_map().
2203 * Will also mark the memory as dirty if is_write == 1. access_len gives
2204 * the amount of memory that was actually read or written by the caller.
2206 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2207 int is_write, hwaddr access_len)
2209 if (buffer != bounce.buffer) {
2213 mr = qemu_ram_addr_from_host(buffer, &addr1);
2216 while (access_len) {
2218 l = TARGET_PAGE_SIZE;
2221 invalidate_and_set_dirty(addr1, l);
2226 if (xen_enabled()) {
2227 xen_invalidate_map_cache_entry(buffer);
2229 memory_region_unref(mr);
2233 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2235 qemu_vfree(bounce.buffer);
2236 bounce.buffer = NULL;
2237 memory_region_unref(bounce.mr);
2238 cpu_notify_map_clients();
2241 void *cpu_physical_memory_map(hwaddr addr,
2245 return address_space_map(&address_space_memory, addr, plen, is_write);
2248 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2249 int is_write, hwaddr access_len)
2251 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2254 /* warning: addr must be aligned */
2255 static inline uint32_t ldl_phys_internal(hwaddr addr,
2256 enum device_endian endian)
2264 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2266 if (l < 4 || !memory_access_is_direct(mr, false)) {
2268 io_mem_read(mr, addr1, &val, 4);
2269 #if defined(TARGET_WORDS_BIGENDIAN)
2270 if (endian == DEVICE_LITTLE_ENDIAN) {
2274 if (endian == DEVICE_BIG_ENDIAN) {
2280 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2284 case DEVICE_LITTLE_ENDIAN:
2285 val = ldl_le_p(ptr);
2287 case DEVICE_BIG_ENDIAN:
2288 val = ldl_be_p(ptr);
2298 uint32_t ldl_phys(hwaddr addr)
2300 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2303 uint32_t ldl_le_phys(hwaddr addr)
2305 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2308 uint32_t ldl_be_phys(hwaddr addr)
2310 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2313 /* warning: addr must be aligned */
2314 static inline uint64_t ldq_phys_internal(hwaddr addr,
2315 enum device_endian endian)
2323 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2325 if (l < 8 || !memory_access_is_direct(mr, false)) {
2327 io_mem_read(mr, addr1, &val, 8);
2328 #if defined(TARGET_WORDS_BIGENDIAN)
2329 if (endian == DEVICE_LITTLE_ENDIAN) {
2333 if (endian == DEVICE_BIG_ENDIAN) {
2339 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2343 case DEVICE_LITTLE_ENDIAN:
2344 val = ldq_le_p(ptr);
2346 case DEVICE_BIG_ENDIAN:
2347 val = ldq_be_p(ptr);
2357 uint64_t ldq_phys(hwaddr addr)
2359 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2362 uint64_t ldq_le_phys(hwaddr addr)
2364 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2367 uint64_t ldq_be_phys(hwaddr addr)
2369 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2373 uint32_t ldub_phys(hwaddr addr)
2376 cpu_physical_memory_read(addr, &val, 1);
2380 /* warning: addr must be aligned */
2381 static inline uint32_t lduw_phys_internal(hwaddr addr,
2382 enum device_endian endian)
2390 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2392 if (l < 2 || !memory_access_is_direct(mr, false)) {
2394 io_mem_read(mr, addr1, &val, 2);
2395 #if defined(TARGET_WORDS_BIGENDIAN)
2396 if (endian == DEVICE_LITTLE_ENDIAN) {
2400 if (endian == DEVICE_BIG_ENDIAN) {
2406 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2410 case DEVICE_LITTLE_ENDIAN:
2411 val = lduw_le_p(ptr);
2413 case DEVICE_BIG_ENDIAN:
2414 val = lduw_be_p(ptr);
2424 uint32_t lduw_phys(hwaddr addr)
2426 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2429 uint32_t lduw_le_phys(hwaddr addr)
2431 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2434 uint32_t lduw_be_phys(hwaddr addr)
2436 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2439 /* warning: addr must be aligned. The ram page is not masked as dirty
2440 and the code inside is not invalidated. It is useful if the dirty
2441 bits are used to track modified PTEs */
2442 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2449 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2451 if (l < 4 || !memory_access_is_direct(mr, true)) {
2452 io_mem_write(mr, addr1, val, 4);
2454 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2455 ptr = qemu_get_ram_ptr(addr1);
2458 if (unlikely(in_migration)) {
2459 if (!cpu_physical_memory_is_dirty(addr1)) {
2460 /* invalidate code */
2461 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2463 cpu_physical_memory_set_dirty_flags(
2464 addr1, (0xff & ~CODE_DIRTY_FLAG));
2470 /* warning: addr must be aligned */
2471 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2472 enum device_endian endian)
2479 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2481 if (l < 4 || !memory_access_is_direct(mr, true)) {
2482 #if defined(TARGET_WORDS_BIGENDIAN)
2483 if (endian == DEVICE_LITTLE_ENDIAN) {
2487 if (endian == DEVICE_BIG_ENDIAN) {
2491 io_mem_write(mr, addr1, val, 4);
2494 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2495 ptr = qemu_get_ram_ptr(addr1);
2497 case DEVICE_LITTLE_ENDIAN:
2500 case DEVICE_BIG_ENDIAN:
2507 invalidate_and_set_dirty(addr1, 4);
2511 void stl_phys(hwaddr addr, uint32_t val)
2513 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2516 void stl_le_phys(hwaddr addr, uint32_t val)
2518 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2521 void stl_be_phys(hwaddr addr, uint32_t val)
2523 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2527 void stb_phys(hwaddr addr, uint32_t val)
2530 cpu_physical_memory_write(addr, &v, 1);
2533 /* warning: addr must be aligned */
2534 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2535 enum device_endian endian)
2542 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2544 if (l < 2 || !memory_access_is_direct(mr, true)) {
2545 #if defined(TARGET_WORDS_BIGENDIAN)
2546 if (endian == DEVICE_LITTLE_ENDIAN) {
2550 if (endian == DEVICE_BIG_ENDIAN) {
2554 io_mem_write(mr, addr1, val, 2);
2557 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2558 ptr = qemu_get_ram_ptr(addr1);
2560 case DEVICE_LITTLE_ENDIAN:
2563 case DEVICE_BIG_ENDIAN:
2570 invalidate_and_set_dirty(addr1, 2);
2574 void stw_phys(hwaddr addr, uint32_t val)
2576 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2579 void stw_le_phys(hwaddr addr, uint32_t val)
2581 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2584 void stw_be_phys(hwaddr addr, uint32_t val)
2586 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2590 void stq_phys(hwaddr addr, uint64_t val)
2593 cpu_physical_memory_write(addr, &val, 8);
2596 void stq_le_phys(hwaddr addr, uint64_t val)
2598 val = cpu_to_le64(val);
2599 cpu_physical_memory_write(addr, &val, 8);
2602 void stq_be_phys(hwaddr addr, uint64_t val)
2604 val = cpu_to_be64(val);
2605 cpu_physical_memory_write(addr, &val, 8);
2608 /* virtual memory access for debug (includes writing to ROM) */
2609 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2610 uint8_t *buf, int len, int is_write)
2617 page = addr & TARGET_PAGE_MASK;
2618 phys_addr = cpu_get_phys_page_debug(ENV_GET_CPU(env), page);
2619 /* if no physical page mapped, return an error */
2620 if (phys_addr == -1)
2622 l = (page + TARGET_PAGE_SIZE) - addr;
2625 phys_addr += (addr & ~TARGET_PAGE_MASK);
2627 cpu_physical_memory_write_rom(phys_addr, buf, l);
2629 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2638 #if !defined(CONFIG_USER_ONLY)
2641 * A helper function for the _utterly broken_ virtio device model to find out if
2642 * it's running on a big endian machine. Don't do this at home kids!
2644 bool virtio_is_big_endian(void);
2645 bool virtio_is_big_endian(void)
2647 #if defined(TARGET_WORDS_BIGENDIAN)
2656 #ifndef CONFIG_USER_ONLY
2657 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2662 mr = address_space_translate(&address_space_memory,
2663 phys_addr, &phys_addr, &l, false);
2665 return !(memory_region_is_ram(mr) ||
2666 memory_region_is_romd(mr));
2669 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2673 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2674 func(block->host, block->offset, block->length, opaque);