4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include <sys/types.h>
27 #include "qemu-common.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/hax.h"
35 #include "hw/xen/xen.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "exec/memory.h"
39 #include "sysemu/dma.h"
40 #include "exec/address-spaces.h"
41 #if defined(CONFIG_USER_ONLY)
43 #else /* !CONFIG_USER_ONLY */
44 #include "sysemu/xen-mapcache.h"
47 #include "exec/cpu-all.h"
49 #include "exec/cputlb.h"
50 #include "translate-all.h"
52 #include "exec/memory-internal.h"
54 //#define DEBUG_UNASSIGNED
55 //#define DEBUG_SUBPAGE
57 #if !defined(CONFIG_USER_ONLY)
59 static int in_migration;
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
68 DMAContext dma_context_memory;
70 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
71 static MemoryRegion io_mem_subpage_ram;
75 CPUArchState *first_cpu;
76 /* current CPU in the current thread. It is only valid inside
78 DEFINE_TLS(CPUArchState *,cpu_single_env);
79 /* 0 = Do not count executed instructions.
80 1 = Precise instruction counting.
81 2 = Adaptive rate instruction counting. */
84 #if !defined(CONFIG_USER_ONLY)
86 static MemoryRegionSection *phys_sections;
87 static unsigned phys_sections_nb, phys_sections_nb_alloc;
88 static uint16_t phys_section_unassigned;
89 static uint16_t phys_section_notdirty;
90 static uint16_t phys_section_rom;
91 static uint16_t phys_section_watch;
93 /* Simple allocator for PhysPageEntry nodes */
94 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
95 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
97 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
99 static void io_mem_init(void);
100 static void memory_map_init(void);
101 static void *qemu_safe_ram_ptr(ram_addr_t addr);
103 static MemoryRegion io_mem_watch;
106 #if !defined(CONFIG_USER_ONLY)
108 static void phys_map_node_reserve(unsigned nodes)
110 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
111 typedef PhysPageEntry Node[L2_SIZE];
112 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
113 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
114 phys_map_nodes_nb + nodes);
115 phys_map_nodes = g_renew(Node, phys_map_nodes,
116 phys_map_nodes_nb_alloc);
120 static uint16_t phys_map_node_alloc(void)
125 ret = phys_map_nodes_nb++;
126 assert(ret != PHYS_MAP_NODE_NIL);
127 assert(ret != phys_map_nodes_nb_alloc);
128 for (i = 0; i < L2_SIZE; ++i) {
129 phys_map_nodes[ret][i].is_leaf = 0;
130 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
135 static void phys_map_nodes_reset(void)
137 phys_map_nodes_nb = 0;
141 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
142 hwaddr *nb, uint16_t leaf,
147 hwaddr step = (hwaddr)1 << (level * L2_BITS);
149 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
150 lp->ptr = phys_map_node_alloc();
151 p = phys_map_nodes[lp->ptr];
153 for (i = 0; i < L2_SIZE; i++) {
155 p[i].ptr = phys_section_unassigned;
159 p = phys_map_nodes[lp->ptr];
161 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
163 while (*nb && lp < &p[L2_SIZE]) {
164 if ((*index & (step - 1)) == 0 && *nb >= step) {
170 phys_page_set_level(lp, index, nb, leaf, level - 1);
176 static void phys_page_set(AddressSpaceDispatch *d,
177 hwaddr index, hwaddr nb,
180 /* Wildly overreserve - it doesn't matter much. */
181 phys_map_node_reserve(3 * P_L2_LEVELS);
183 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
186 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
188 PhysPageEntry lp = d->phys_map;
191 uint16_t s_index = phys_section_unassigned;
193 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
194 if (lp.ptr == PHYS_MAP_NODE_NIL) {
197 p = phys_map_nodes[lp.ptr];
198 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
203 return &phys_sections[s_index];
206 bool memory_region_is_unassigned(MemoryRegion *mr)
208 return mr != &io_mem_ram && mr != &io_mem_rom
209 && mr != &io_mem_notdirty && !mr->rom_device
210 && mr != &io_mem_watch;
214 void cpu_exec_init_all(void)
216 #if !defined(CONFIG_USER_ONLY)
217 qemu_mutex_init(&ram_list.mutex);
223 #if !defined(CONFIG_USER_ONLY)
225 static int cpu_common_post_load(void *opaque, int version_id)
227 CPUState *cpu = opaque;
229 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
230 version_id is increased. */
231 cpu->interrupt_request &= ~0x01;
232 tlb_flush(cpu->env_ptr, 1);
237 static const VMStateDescription vmstate_cpu_common = {
238 .name = "cpu_common",
240 .minimum_version_id = 1,
241 .minimum_version_id_old = 1,
242 .post_load = cpu_common_post_load,
243 .fields = (VMStateField []) {
244 VMSTATE_UINT32(halted, CPUState),
245 VMSTATE_UINT32(interrupt_request, CPUState),
246 VMSTATE_END_OF_LIST()
250 #define vmstate_cpu_common vmstate_dummy
253 CPUState *qemu_get_cpu(int index)
255 CPUArchState *env = first_cpu;
256 CPUState *cpu = NULL;
259 cpu = ENV_GET_CPU(env);
260 if (cpu->cpu_index == index) {
266 return env ? cpu : NULL;
269 void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
271 CPUArchState *env = first_cpu;
274 func(ENV_GET_CPU(env), data);
279 void cpu_exec_init(CPUArchState *env)
281 CPUState *cpu = ENV_GET_CPU(env);
282 CPUClass *cc = CPU_GET_CLASS(cpu);
286 #if defined(CONFIG_USER_ONLY)
289 env->next_cpu = NULL;
292 while (*penv != NULL) {
293 penv = &(*penv)->next_cpu;
296 cpu->cpu_index = cpu_index;
298 QTAILQ_INIT(&env->breakpoints);
299 QTAILQ_INIT(&env->watchpoints);
300 #ifndef CONFIG_USER_ONLY
301 cpu->thread_id = qemu_get_thread_id();
304 #if defined(CONFIG_USER_ONLY)
307 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
308 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
309 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
310 cpu_save, cpu_load, env);
311 assert(cc->vmsd == NULL);
313 if (cc->vmsd != NULL) {
314 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
318 #if defined(TARGET_HAS_ICE)
319 #if defined(CONFIG_USER_ONLY)
320 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
322 tb_invalidate_phys_page_range(pc, pc + 1, 0);
325 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
327 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
328 (pc & ~TARGET_PAGE_MASK));
331 #endif /* TARGET_HAS_ICE */
333 #if defined(CONFIG_USER_ONLY)
334 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
339 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
340 int flags, CPUWatchpoint **watchpoint)
345 /* Add a watchpoint. */
346 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
347 int flags, CPUWatchpoint **watchpoint)
349 target_ulong len_mask = ~(len - 1);
352 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
353 if ((len & (len - 1)) || (addr & ~len_mask) ||
354 len == 0 || len > TARGET_PAGE_SIZE) {
355 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
356 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
359 wp = g_malloc(sizeof(*wp));
362 wp->len_mask = len_mask;
365 /* keep all GDB-injected watchpoints in front */
367 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
369 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
371 tlb_flush_page(env, addr);
378 /* Remove a specific watchpoint. */
379 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
382 target_ulong len_mask = ~(len - 1);
385 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
386 if (addr == wp->vaddr && len_mask == wp->len_mask
387 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
388 cpu_watchpoint_remove_by_ref(env, wp);
395 /* Remove a specific watchpoint by reference. */
396 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
398 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
400 tlb_flush_page(env, watchpoint->vaddr);
405 /* Remove all matching watchpoints. */
406 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
408 CPUWatchpoint *wp, *next;
410 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
411 if (wp->flags & mask)
412 cpu_watchpoint_remove_by_ref(env, wp);
417 /* Add a breakpoint. */
418 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
419 CPUBreakpoint **breakpoint)
421 #if defined(TARGET_HAS_ICE)
424 bp = g_malloc(sizeof(*bp));
429 /* keep all GDB-injected breakpoints in front */
431 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
433 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
435 breakpoint_invalidate(env, pc);
445 /* Remove a specific breakpoint. */
446 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
448 #if defined(TARGET_HAS_ICE)
451 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
452 if (bp->pc == pc && bp->flags == flags) {
453 cpu_breakpoint_remove_by_ref(env, bp);
463 /* Remove a specific breakpoint by reference. */
464 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
466 #if defined(TARGET_HAS_ICE)
467 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
469 breakpoint_invalidate(env, breakpoint->pc);
475 /* Remove all matching breakpoints. */
476 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
478 #if defined(TARGET_HAS_ICE)
479 CPUBreakpoint *bp, *next;
481 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
482 if (bp->flags & mask)
483 cpu_breakpoint_remove_by_ref(env, bp);
488 /* enable or disable single step mode. EXCP_DEBUG is returned by the
489 CPU loop after each instruction */
490 void cpu_single_step(CPUArchState *env, int enabled)
492 #if defined(TARGET_HAS_ICE)
493 if (env->singlestep_enabled != enabled) {
494 env->singlestep_enabled = enabled;
496 kvm_update_guest_debug(env, 0);
498 /* must flush all the translated code to avoid inconsistencies */
499 /* XXX: only flush what is necessary */
506 void cpu_exit(CPUArchState *env)
508 CPUState *cpu = ENV_GET_CPU(env);
510 cpu->exit_request = 1;
511 cpu->tcg_exit_req = 1;
514 void cpu_abort(CPUArchState *env, const char *fmt, ...)
521 fprintf(stderr, "qemu: fatal: ");
522 vfprintf(stderr, fmt, ap);
523 fprintf(stderr, "\n");
524 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
525 if (qemu_log_enabled()) {
526 qemu_log("qemu: fatal: ");
527 qemu_log_vprintf(fmt, ap2);
529 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
535 #if defined(CONFIG_USER_ONLY)
537 struct sigaction act;
538 sigfillset(&act.sa_mask);
539 act.sa_handler = SIG_DFL;
540 sigaction(SIGABRT, &act, NULL);
546 CPUArchState *cpu_copy(CPUArchState *env)
548 CPUArchState *new_env = cpu_init(env->cpu_model_str);
549 CPUArchState *next_cpu = new_env->next_cpu;
550 #if defined(TARGET_HAS_ICE)
555 memcpy(new_env, env, sizeof(CPUArchState));
557 /* Preserve chaining. */
558 new_env->next_cpu = next_cpu;
560 /* Clone all break/watchpoints.
561 Note: Once we support ptrace with hw-debug register access, make sure
562 BP_CPU break/watchpoints are handled correctly on clone. */
563 QTAILQ_INIT(&env->breakpoints);
564 QTAILQ_INIT(&env->watchpoints);
565 #if defined(TARGET_HAS_ICE)
566 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
567 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
569 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
570 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
578 #if !defined(CONFIG_USER_ONLY)
579 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
584 /* we modify the TLB cache so that the dirty bit will be set again
585 when accessing the range */
586 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
587 /* Check that we don't span multiple blocks - this breaks the
588 address comparisons below. */
589 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
590 != (end - 1) - start) {
593 cpu_tlb_reset_dirty_all(start1, length);
597 /* Note: start and end must be within the same ram block. */
598 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
603 start &= TARGET_PAGE_MASK;
604 end = TARGET_PAGE_ALIGN(end);
606 length = end - start;
609 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
612 tlb_reset_dirty_range_all(start, end, length);
616 static int cpu_physical_memory_set_dirty_tracking(int enable)
619 in_migration = enable;
623 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
624 MemoryRegionSection *section,
628 target_ulong *address)
633 if (memory_region_is_ram(section->mr)) {
635 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
636 + memory_region_section_addr(section, paddr);
637 if (!section->readonly) {
638 iotlb |= phys_section_notdirty;
640 iotlb |= phys_section_rom;
643 /* IO handlers are currently passed a physical address.
644 It would be nice to pass an offset from the base address
645 of that region. This would avoid having to special case RAM,
646 and avoid full address decoding in every device.
647 We can't use the high bits of pd for this because
648 IO_MEM_ROMD uses these as a ram address. */
649 iotlb = section - phys_sections;
650 iotlb += memory_region_section_addr(section, paddr);
653 /* Make accesses to pages with watchpoints go via the
654 watchpoint trap routines. */
655 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
656 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
657 /* Avoid trapping reads of pages with a write breakpoint. */
658 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
659 iotlb = phys_section_watch + paddr;
660 *address |= TLB_MMIO;
668 #endif /* defined(CONFIG_USER_ONLY) */
670 #if !defined(CONFIG_USER_ONLY)
672 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
673 typedef struct subpage_t {
676 uint16_t sub_section[TARGET_PAGE_SIZE];
679 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
681 static subpage_t *subpage_init(hwaddr base);
682 static void destroy_page_desc(uint16_t section_index)
684 MemoryRegionSection *section = &phys_sections[section_index];
685 MemoryRegion *mr = section->mr;
688 subpage_t *subpage = container_of(mr, subpage_t, iomem);
689 memory_region_destroy(&subpage->iomem);
694 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
699 if (lp->ptr == PHYS_MAP_NODE_NIL) {
703 p = phys_map_nodes[lp->ptr];
704 for (i = 0; i < L2_SIZE; ++i) {
706 destroy_l2_mapping(&p[i], level - 1);
708 destroy_page_desc(p[i].ptr);
712 lp->ptr = PHYS_MAP_NODE_NIL;
715 static void destroy_all_mappings(AddressSpaceDispatch *d)
717 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
718 phys_map_nodes_reset();
721 static uint16_t phys_section_add(MemoryRegionSection *section)
723 if (phys_sections_nb == phys_sections_nb_alloc) {
724 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
725 phys_sections = g_renew(MemoryRegionSection, phys_sections,
726 phys_sections_nb_alloc);
728 phys_sections[phys_sections_nb] = *section;
729 return phys_sections_nb++;
732 static void phys_sections_clear(void)
734 phys_sections_nb = 0;
737 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
740 hwaddr base = section->offset_within_address_space
742 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
743 MemoryRegionSection subsection = {
744 .offset_within_address_space = base,
745 .size = TARGET_PAGE_SIZE,
749 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
751 if (!(existing->mr->subpage)) {
752 subpage = subpage_init(base);
753 subsection.mr = &subpage->iomem;
754 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
755 phys_section_add(&subsection));
757 subpage = container_of(existing->mr, subpage_t, iomem);
759 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
760 end = start + section->size - 1;
761 subpage_register(subpage, start, end, phys_section_add(section));
765 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
767 hwaddr start_addr = section->offset_within_address_space;
768 ram_addr_t size = section->size;
770 uint16_t section_index = phys_section_add(section);
775 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
779 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
781 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
782 MemoryRegionSection now = *section, remain = *section;
784 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
785 || (now.size < TARGET_PAGE_SIZE)) {
786 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
787 - now.offset_within_address_space,
789 register_subpage(d, &now);
790 remain.size -= now.size;
791 remain.offset_within_address_space += now.size;
792 remain.offset_within_region += now.size;
794 while (remain.size >= TARGET_PAGE_SIZE) {
796 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
797 now.size = TARGET_PAGE_SIZE;
798 register_subpage(d, &now);
800 now.size &= TARGET_PAGE_MASK;
801 register_multipage(d, &now);
803 remain.size -= now.size;
804 remain.offset_within_address_space += now.size;
805 remain.offset_within_region += now.size;
809 register_subpage(d, &now);
813 void qemu_flush_coalesced_mmio_buffer(void)
816 kvm_flush_coalesced_mmio_buffer();
819 void qemu_mutex_lock_ramlist(void)
821 qemu_mutex_lock(&ram_list.mutex);
824 void qemu_mutex_unlock_ramlist(void)
826 qemu_mutex_unlock(&ram_list.mutex);
829 #if defined(__linux__) && !defined(TARGET_S390X)
833 #define HUGETLBFS_MAGIC 0x958458f6
835 static long gethugepagesize(const char *path)
841 ret = statfs(path, &fs);
842 } while (ret != 0 && errno == EINTR);
849 if (fs.f_type != HUGETLBFS_MAGIC)
850 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
855 static void *file_ram_alloc(RAMBlock *block,
860 char *sanitized_name;
867 unsigned long hpagesize;
869 hpagesize = gethugepagesize(path);
874 if (memory < hpagesize) {
878 if (kvm_enabled() && !kvm_has_sync_mmu()) {
879 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
883 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
884 sanitized_name = g_strdup(block->mr->name);
885 for (c = sanitized_name; *c != '\0'; c++) {
890 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
892 g_free(sanitized_name);
894 fd = mkstemp(filename);
896 perror("unable to create backing store for hugepages");
903 memory = (memory+hpagesize-1) & ~(hpagesize-1);
906 * ftruncate is not supported by hugetlbfs in older
907 * hosts, so don't bother bailing out on errors.
908 * If anything goes wrong with it under other filesystems,
911 if (ftruncate(fd, memory))
915 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
916 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
917 * to sidestep this quirk.
919 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
920 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
922 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
924 if (area == MAP_FAILED) {
925 perror("file_ram_alloc: can't mmap RAM pages");
934 static ram_addr_t find_ram_offset(ram_addr_t size)
936 RAMBlock *block, *next_block;
937 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
939 assert(size != 0); /* it would hand out same offset multiple times */
941 if (QTAILQ_EMPTY(&ram_list.blocks))
944 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
945 ram_addr_t end, next = RAM_ADDR_MAX;
947 end = block->offset + block->length;
949 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
950 if (next_block->offset >= end) {
951 next = MIN(next, next_block->offset);
954 if (next - end >= size && next - end < mingap) {
960 if (offset == RAM_ADDR_MAX) {
961 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
969 ram_addr_t last_ram_offset(void)
974 QTAILQ_FOREACH(block, &ram_list.blocks, next)
975 last = MAX(last, block->offset + block->length);
980 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
983 QemuOpts *machine_opts;
985 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
986 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
988 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
989 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
991 perror("qemu_madvise");
992 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
993 "but dump_guest_core=off specified\n");
998 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1000 RAMBlock *new_block, *block;
1003 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1004 if (block->offset == addr) {
1010 assert(!new_block->idstr[0]);
1013 char *id = qdev_get_dev_path(dev);
1015 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1019 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1021 /* This assumes the iothread lock is taken here too. */
1022 qemu_mutex_lock_ramlist();
1023 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1024 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1025 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1030 qemu_mutex_unlock_ramlist();
1033 static int memory_try_enable_merging(void *addr, size_t len)
1037 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1038 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1039 /* disabled by the user */
1043 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1046 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1049 RAMBlock *block, *new_block;
1051 size = TARGET_PAGE_ALIGN(size);
1052 new_block = g_malloc0(sizeof(*new_block));
1054 /* This assumes the iothread lock is taken here too. */
1055 qemu_mutex_lock_ramlist();
1057 new_block->offset = find_ram_offset(size);
1059 new_block->host = host;
1060 new_block->flags |= RAM_PREALLOC_MASK;
1063 #if defined (__linux__) && !defined(TARGET_S390X)
1064 new_block->host = file_ram_alloc(new_block, size, mem_path);
1065 if (!new_block->host) {
1066 new_block->host = qemu_anon_ram_alloc(size);
1067 memory_try_enable_merging(new_block->host, size);
1070 fprintf(stderr, "-mem-path option unsupported\n");
1074 if (xen_enabled()) {
1075 xen_ram_alloc(new_block->offset, size, mr);
1076 } else if (kvm_enabled()) {
1077 /* some s390/kvm configurations have special constraints */
1078 new_block->host = kvm_ram_alloc(size);
1080 new_block->host = qemu_anon_ram_alloc(size);
1083 * In Hax, the qemu allocate the virtual address, and HAX kernel
1084 * populate the memory with physical memory. Currently we have no
1085 * paging, so user should make sure enough free memory in advance
1087 if (hax_enabled()) {
1089 ret = hax_populate_ram((uint64_t)new_block->host, size);
1091 fprintf(stderr, "Hax failed to populate ram\n");
1097 memory_try_enable_merging(new_block->host, size);
1100 new_block->length = size;
1102 /* Keep the list sorted from biggest to smallest block. */
1103 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1104 if (block->length < new_block->length) {
1109 QTAILQ_INSERT_BEFORE(block, new_block, next);
1111 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1113 ram_list.mru_block = NULL;
1116 qemu_mutex_unlock_ramlist();
1118 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1119 last_ram_offset() >> TARGET_PAGE_BITS);
1120 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1121 0, size >> TARGET_PAGE_BITS);
1122 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1124 qemu_ram_setup_dump(new_block->host, size);
1125 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1128 kvm_setup_guest_memory(new_block->host, size);
1130 return new_block->offset;
1133 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1135 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1138 void qemu_ram_free_from_ptr(ram_addr_t addr)
1142 /* This assumes the iothread lock is taken here too. */
1143 qemu_mutex_lock_ramlist();
1144 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1145 if (addr == block->offset) {
1146 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1147 ram_list.mru_block = NULL;
1153 qemu_mutex_unlock_ramlist();
1156 void qemu_ram_free(ram_addr_t addr)
1160 /* This assumes the iothread lock is taken here too. */
1161 qemu_mutex_lock_ramlist();
1162 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1163 if (addr == block->offset) {
1164 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1165 ram_list.mru_block = NULL;
1167 if (block->flags & RAM_PREALLOC_MASK) {
1169 } else if (mem_path) {
1170 #if defined (__linux__) && !defined(TARGET_S390X)
1172 munmap(block->host, block->length);
1175 qemu_anon_ram_free(block->host, block->length);
1181 if (xen_enabled()) {
1182 xen_invalidate_map_cache_entry(block->host);
1184 qemu_anon_ram_free(block->host, block->length);
1191 qemu_mutex_unlock_ramlist();
1196 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1203 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1204 offset = addr - block->offset;
1205 if (offset < block->length) {
1206 vaddr = block->host + offset;
1207 if (block->flags & RAM_PREALLOC_MASK) {
1211 munmap(vaddr, length);
1213 #if defined(__linux__) && !defined(TARGET_S390X)
1216 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1219 flags |= MAP_PRIVATE;
1221 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1222 flags, block->fd, offset);
1224 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1225 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1232 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1233 flags |= MAP_SHARED | MAP_ANONYMOUS;
1234 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1237 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1238 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1242 if (area != vaddr) {
1243 fprintf(stderr, "Could not remap addr: "
1244 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1248 memory_try_enable_merging(vaddr, length);
1249 qemu_ram_setup_dump(vaddr, length);
1255 #endif /* !_WIN32 */
1257 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1258 With the exception of the softmmu code in this file, this should
1259 only be used for local memory (e.g. video ram) that the device owns,
1260 and knows it isn't going to access beyond the end of the block.
1262 It should not be used for general purpose DMA.
1263 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1265 void *qemu_get_ram_ptr(ram_addr_t addr)
1269 /* The list is protected by the iothread lock here. */
1270 block = ram_list.mru_block;
1271 if (block && addr - block->offset < block->length) {
1274 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1275 if (addr - block->offset < block->length) {
1280 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1284 ram_list.mru_block = block;
1285 if (xen_enabled()) {
1286 /* We need to check if the requested address is in the RAM
1287 * because we don't want to map the entire memory in QEMU.
1288 * In that case just map until the end of the page.
1290 if (block->offset == 0) {
1291 return xen_map_cache(addr, 0, 0);
1292 } else if (block->host == NULL) {
1294 xen_map_cache(block->offset, block->length, 1);
1297 return block->host + (addr - block->offset);
1300 /* Return a host pointer to ram allocated with qemu_ram_alloc. Same as
1301 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1303 * ??? Is this still necessary?
1305 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1309 /* The list is protected by the iothread lock here. */
1310 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1311 if (addr - block->offset < block->length) {
1312 if (xen_enabled()) {
1313 /* We need to check if the requested address is in the RAM
1314 * because we don't want to map the entire memory in QEMU.
1315 * In that case just map until the end of the page.
1317 if (block->offset == 0) {
1318 return xen_map_cache(addr, 0, 0);
1319 } else if (block->host == NULL) {
1321 xen_map_cache(block->offset, block->length, 1);
1324 return block->host + (addr - block->offset);
1328 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1334 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1335 * but takes a size argument */
1336 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1341 if (xen_enabled()) {
1342 return xen_map_cache(addr, *size, 1);
1346 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1347 if (addr - block->offset < block->length) {
1348 if (addr - block->offset + *size > block->length)
1349 *size = block->length - addr + block->offset;
1350 return block->host + (addr - block->offset);
1354 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1359 void qemu_put_ram_ptr(void *addr)
1361 trace_qemu_put_ram_ptr(addr);
1364 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1367 uint8_t *host = ptr;
1369 if (xen_enabled()) {
1370 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1374 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1375 /* This case append when the block is not mapped. */
1376 if (block->host == NULL) {
1379 if (host - block->host < block->length) {
1380 *ram_addr = block->offset + (host - block->host);
1388 /* Some of the softmmu routines need to translate from a host pointer
1389 (typically a TLB entry) back to a ram offset. */
1390 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1392 ram_addr_t ram_addr;
1394 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1395 fprintf(stderr, "Bad ram pointer %p\n", ptr);
1401 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
1404 #ifdef DEBUG_UNASSIGNED
1405 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
1407 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1408 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
1413 static void unassigned_mem_write(void *opaque, hwaddr addr,
1414 uint64_t val, unsigned size)
1416 #ifdef DEBUG_UNASSIGNED
1417 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
1419 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1420 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
1424 static const MemoryRegionOps unassigned_mem_ops = {
1425 .read = unassigned_mem_read,
1426 .write = unassigned_mem_write,
1427 .endianness = DEVICE_NATIVE_ENDIAN,
1430 static uint64_t error_mem_read(void *opaque, hwaddr addr,
1436 static void error_mem_write(void *opaque, hwaddr addr,
1437 uint64_t value, unsigned size)
1442 static const MemoryRegionOps error_mem_ops = {
1443 .read = error_mem_read,
1444 .write = error_mem_write,
1445 .endianness = DEVICE_NATIVE_ENDIAN,
1448 static const MemoryRegionOps rom_mem_ops = {
1449 .read = error_mem_read,
1450 .write = unassigned_mem_write,
1451 .endianness = DEVICE_NATIVE_ENDIAN,
1454 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1455 uint64_t val, unsigned size)
1458 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1459 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1460 #if !defined(CONFIG_USER_ONLY)
1461 tb_invalidate_phys_page_fast(ram_addr, size);
1462 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1467 stb_p(qemu_get_ram_ptr(ram_addr), val);
1470 stw_p(qemu_get_ram_ptr(ram_addr), val);
1473 stl_p(qemu_get_ram_ptr(ram_addr), val);
1478 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1479 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1480 /* we remove the notdirty callback only if the code has been
1482 if (dirty_flags == 0xff)
1483 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1486 static const MemoryRegionOps notdirty_mem_ops = {
1487 .read = error_mem_read,
1488 .write = notdirty_mem_write,
1489 .endianness = DEVICE_NATIVE_ENDIAN,
1492 /* Generate a debug exception if a watchpoint has been hit. */
1493 static void check_watchpoint(int offset, int len_mask, int flags)
1495 CPUArchState *env = cpu_single_env;
1496 target_ulong pc, cs_base;
1501 if (env->watchpoint_hit) {
1502 /* We re-entered the check after replacing the TB. Now raise
1503 * the debug interrupt so that is will trigger after the
1504 * current instruction. */
1505 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1508 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1509 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1510 if ((vaddr == (wp->vaddr & len_mask) ||
1511 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1512 wp->flags |= BP_WATCHPOINT_HIT;
1513 if (!env->watchpoint_hit) {
1514 env->watchpoint_hit = wp;
1515 tb_check_watchpoint(env);
1516 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1517 env->exception_index = EXCP_DEBUG;
1520 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1521 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1522 cpu_resume_from_signal(env, NULL);
1526 wp->flags &= ~BP_WATCHPOINT_HIT;
1531 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1532 so these check for a hit then pass through to the normal out-of-line
1534 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1537 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1539 case 1: return ldub_phys(addr);
1540 case 2: return lduw_phys(addr);
1541 case 4: return ldl_phys(addr);
1546 static void watch_mem_write(void *opaque, hwaddr addr,
1547 uint64_t val, unsigned size)
1549 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1552 stb_phys(addr, val);
1555 stw_phys(addr, val);
1558 stl_phys(addr, val);
1564 static const MemoryRegionOps watch_mem_ops = {
1565 .read = watch_mem_read,
1566 .write = watch_mem_write,
1567 .endianness = DEVICE_NATIVE_ENDIAN,
1570 static uint64_t subpage_read(void *opaque, hwaddr addr,
1573 subpage_t *mmio = opaque;
1574 unsigned int idx = SUBPAGE_IDX(addr);
1575 MemoryRegionSection *section;
1576 #if defined(DEBUG_SUBPAGE)
1577 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1578 mmio, len, addr, idx);
1581 section = &phys_sections[mmio->sub_section[idx]];
1583 addr -= section->offset_within_address_space;
1584 addr += section->offset_within_region;
1585 return io_mem_read(section->mr, addr, len);
1588 static void subpage_write(void *opaque, hwaddr addr,
1589 uint64_t value, unsigned len)
1591 subpage_t *mmio = opaque;
1592 unsigned int idx = SUBPAGE_IDX(addr);
1593 MemoryRegionSection *section;
1594 #if defined(DEBUG_SUBPAGE)
1595 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1596 " idx %d value %"PRIx64"\n",
1597 __func__, mmio, len, addr, idx, value);
1600 section = &phys_sections[mmio->sub_section[idx]];
1602 addr -= section->offset_within_address_space;
1603 addr += section->offset_within_region;
1604 io_mem_write(section->mr, addr, value, len);
1607 static const MemoryRegionOps subpage_ops = {
1608 .read = subpage_read,
1609 .write = subpage_write,
1610 .endianness = DEVICE_NATIVE_ENDIAN,
1613 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1616 ram_addr_t raddr = addr;
1617 void *ptr = qemu_get_ram_ptr(raddr);
1619 case 1: return ldub_p(ptr);
1620 case 2: return lduw_p(ptr);
1621 case 4: return ldl_p(ptr);
1626 static void subpage_ram_write(void *opaque, hwaddr addr,
1627 uint64_t value, unsigned size)
1629 ram_addr_t raddr = addr;
1630 void *ptr = qemu_get_ram_ptr(raddr);
1632 case 1: return stb_p(ptr, value);
1633 case 2: return stw_p(ptr, value);
1634 case 4: return stl_p(ptr, value);
1639 static const MemoryRegionOps subpage_ram_ops = {
1640 .read = subpage_ram_read,
1641 .write = subpage_ram_write,
1642 .endianness = DEVICE_NATIVE_ENDIAN,
1645 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1650 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1652 idx = SUBPAGE_IDX(start);
1653 eidx = SUBPAGE_IDX(end);
1654 #if defined(DEBUG_SUBPAGE)
1655 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1656 mmio, start, end, idx, eidx, memory);
1658 if (memory_region_is_ram(phys_sections[section].mr)) {
1659 MemoryRegionSection new_section = phys_sections[section];
1660 new_section.mr = &io_mem_subpage_ram;
1661 section = phys_section_add(&new_section);
1663 for (; idx <= eidx; idx++) {
1664 mmio->sub_section[idx] = section;
1670 static subpage_t *subpage_init(hwaddr base)
1674 mmio = g_malloc0(sizeof(subpage_t));
1677 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1678 "subpage", TARGET_PAGE_SIZE);
1679 mmio->iomem.subpage = true;
1680 #if defined(DEBUG_SUBPAGE)
1681 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1682 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1684 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1689 static uint16_t dummy_section(MemoryRegion *mr)
1691 MemoryRegionSection section = {
1693 .offset_within_address_space = 0,
1694 .offset_within_region = 0,
1698 return phys_section_add(§ion);
1701 MemoryRegion *iotlb_to_region(hwaddr index)
1703 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1706 static void io_mem_init(void)
1708 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
1709 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
1710 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1711 "unassigned", UINT64_MAX);
1712 memory_region_init_io(&io_mem_notdirty, ¬dirty_mem_ops, NULL,
1713 "notdirty", UINT64_MAX);
1714 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1715 "subpage-ram", UINT64_MAX);
1716 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1717 "watch", UINT64_MAX);
1720 static void mem_begin(MemoryListener *listener)
1722 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1724 destroy_all_mappings(d);
1725 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1728 static void core_begin(MemoryListener *listener)
1730 phys_sections_clear();
1731 phys_section_unassigned = dummy_section(&io_mem_unassigned);
1732 phys_section_notdirty = dummy_section(&io_mem_notdirty);
1733 phys_section_rom = dummy_section(&io_mem_rom);
1734 phys_section_watch = dummy_section(&io_mem_watch);
1737 static void tcg_commit(MemoryListener *listener)
1741 /* since each CPU stores ram addresses in its TLB cache, we must
1742 reset the modified entries */
1744 for(env = first_cpu; env != NULL; env = env->next_cpu) {
1749 static void core_log_global_start(MemoryListener *listener)
1751 cpu_physical_memory_set_dirty_tracking(1);
1754 static void core_log_global_stop(MemoryListener *listener)
1756 cpu_physical_memory_set_dirty_tracking(0);
1759 static void io_region_add(MemoryListener *listener,
1760 MemoryRegionSection *section)
1762 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1764 mrio->mr = section->mr;
1765 mrio->offset = section->offset_within_region;
1766 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1767 section->offset_within_address_space, section->size);
1768 ioport_register(&mrio->iorange);
1771 static void io_region_del(MemoryListener *listener,
1772 MemoryRegionSection *section)
1774 isa_unassign_ioport(section->offset_within_address_space, section->size);
1777 static MemoryListener core_memory_listener = {
1778 .begin = core_begin,
1779 .log_global_start = core_log_global_start,
1780 .log_global_stop = core_log_global_stop,
1784 static MemoryListener io_memory_listener = {
1785 .region_add = io_region_add,
1786 .region_del = io_region_del,
1790 static MemoryListener tcg_memory_listener = {
1791 .commit = tcg_commit,
1794 void address_space_init_dispatch(AddressSpace *as)
1796 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1798 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1799 d->listener = (MemoryListener) {
1801 .region_add = mem_add,
1802 .region_nop = mem_add,
1806 memory_listener_register(&d->listener, as);
1809 void address_space_destroy_dispatch(AddressSpace *as)
1811 AddressSpaceDispatch *d = as->dispatch;
1813 memory_listener_unregister(&d->listener);
1814 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1816 as->dispatch = NULL;
1819 static void memory_map_init(void)
1821 system_memory = g_malloc(sizeof(*system_memory));
1822 memory_region_init(system_memory, "system", INT64_MAX);
1823 address_space_init(&address_space_memory, system_memory);
1824 address_space_memory.name = "memory";
1826 system_io = g_malloc(sizeof(*system_io));
1827 memory_region_init(system_io, "io", 65536);
1828 address_space_init(&address_space_io, system_io);
1829 address_space_io.name = "I/O";
1831 memory_listener_register(&core_memory_listener, &address_space_memory);
1832 memory_listener_register(&io_memory_listener, &address_space_io);
1833 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1835 dma_context_init(&dma_context_memory, &address_space_memory,
1839 MemoryRegion *get_system_memory(void)
1841 return system_memory;
1844 MemoryRegion *get_system_io(void)
1849 #endif /* !defined(CONFIG_USER_ONLY) */
1851 /* physical memory access (slow version, mainly for debug) */
1852 #if defined(CONFIG_USER_ONLY)
1853 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1854 uint8_t *buf, int len, int is_write)
1861 page = addr & TARGET_PAGE_MASK;
1862 l = (page + TARGET_PAGE_SIZE) - addr;
1865 flags = page_get_flags(page);
1866 if (!(flags & PAGE_VALID))
1869 if (!(flags & PAGE_WRITE))
1871 /* XXX: this code should not depend on lock_user */
1872 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1875 unlock_user(p, addr, l);
1877 if (!(flags & PAGE_READ))
1879 /* XXX: this code should not depend on lock_user */
1880 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1883 unlock_user(p, addr, 0);
1894 static void invalidate_and_set_dirty(hwaddr addr,
1897 if (!cpu_physical_memory_is_dirty(addr)) {
1898 /* invalidate code */
1899 tb_invalidate_phys_page_range(addr, addr + length, 0);
1901 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1903 xen_modified_memory(addr, length);
1906 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1907 int len, bool is_write)
1909 AddressSpaceDispatch *d = as->dispatch;
1914 MemoryRegionSection *section;
1917 page = addr & TARGET_PAGE_MASK;
1918 l = (page + TARGET_PAGE_SIZE) - addr;
1921 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1924 if (!memory_region_is_ram(section->mr)) {
1926 addr1 = memory_region_section_addr(section, addr);
1927 /* XXX: could force cpu_single_env to NULL to avoid
1929 if (l >= 4 && ((addr1 & 3) == 0)) {
1930 /* 32 bit write access */
1932 io_mem_write(section->mr, addr1, val, 4);
1934 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1935 /* 16 bit write access */
1937 io_mem_write(section->mr, addr1, val, 2);
1940 /* 8 bit write access */
1942 io_mem_write(section->mr, addr1, val, 1);
1945 } else if (!section->readonly) {
1947 addr1 = memory_region_get_ram_addr(section->mr)
1948 + memory_region_section_addr(section, addr);
1950 ptr = qemu_get_ram_ptr(addr1);
1951 memcpy(ptr, buf, l);
1952 invalidate_and_set_dirty(addr1, l);
1953 qemu_put_ram_ptr(ptr);
1956 if (!(memory_region_is_ram(section->mr) ||
1957 memory_region_is_romd(section->mr))) {
1960 addr1 = memory_region_section_addr(section, addr);
1961 if (l >= 4 && ((addr1 & 3) == 0)) {
1962 /* 32 bit read access */
1963 val = io_mem_read(section->mr, addr1, 4);
1966 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1967 /* 16 bit read access */
1968 val = io_mem_read(section->mr, addr1, 2);
1972 /* 8 bit read access */
1973 val = io_mem_read(section->mr, addr1, 1);
1979 ptr = qemu_get_ram_ptr(section->mr->ram_addr
1980 + memory_region_section_addr(section,
1982 memcpy(buf, ptr, l);
1983 qemu_put_ram_ptr(ptr);
1992 void address_space_write(AddressSpace *as, hwaddr addr,
1993 const uint8_t *buf, int len)
1995 address_space_rw(as, addr, (uint8_t *)buf, len, true);
1999 * address_space_read: read from an address space.
2001 * @as: #AddressSpace to be accessed
2002 * @addr: address within that address space
2003 * @buf: buffer with the data transferred
2005 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2007 address_space_rw(as, addr, buf, len, false);
2011 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2012 int len, int is_write)
2014 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
2017 /* used for ROM loading : can write in RAM and ROM */
2018 void cpu_physical_memory_write_rom(hwaddr addr,
2019 const uint8_t *buf, int len)
2021 AddressSpaceDispatch *d = address_space_memory.dispatch;
2025 MemoryRegionSection *section;
2028 page = addr & TARGET_PAGE_MASK;
2029 l = (page + TARGET_PAGE_SIZE) - addr;
2032 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2034 if (!(memory_region_is_ram(section->mr) ||
2035 memory_region_is_romd(section->mr))) {
2038 unsigned long addr1;
2039 addr1 = memory_region_get_ram_addr(section->mr)
2040 + memory_region_section_addr(section, addr);
2042 ptr = qemu_get_ram_ptr(addr1);
2043 memcpy(ptr, buf, l);
2044 invalidate_and_set_dirty(addr1, l);
2045 qemu_put_ram_ptr(ptr);
2059 static BounceBuffer bounce;
2061 typedef struct MapClient {
2063 void (*callback)(void *opaque);
2064 QLIST_ENTRY(MapClient) link;
2067 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2068 = QLIST_HEAD_INITIALIZER(map_client_list);
2070 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2072 MapClient *client = g_malloc(sizeof(*client));
2074 client->opaque = opaque;
2075 client->callback = callback;
2076 QLIST_INSERT_HEAD(&map_client_list, client, link);
2080 static void cpu_unregister_map_client(void *_client)
2082 MapClient *client = (MapClient *)_client;
2084 QLIST_REMOVE(client, link);
2088 static void cpu_notify_map_clients(void)
2092 while (!QLIST_EMPTY(&map_client_list)) {
2093 client = QLIST_FIRST(&map_client_list);
2094 client->callback(client->opaque);
2095 cpu_unregister_map_client(client);
2099 /* Map a physical memory region into a host virtual address.
2100 * May map a subset of the requested range, given by and returned in *plen.
2101 * May return NULL if resources needed to perform the mapping are exhausted.
2102 * Use only for reads OR writes - not for read-modify-write operations.
2103 * Use cpu_register_map_client() to know when retrying the map operation is
2104 * likely to succeed.
2106 void *address_space_map(AddressSpace *as,
2111 AddressSpaceDispatch *d = as->dispatch;
2116 MemoryRegionSection *section;
2117 ram_addr_t raddr = RAM_ADDR_MAX;
2122 page = addr & TARGET_PAGE_MASK;
2123 l = (page + TARGET_PAGE_SIZE) - addr;
2126 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2128 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
2129 if (todo || bounce.buffer) {
2132 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2136 address_space_read(as, addr, bounce.buffer, l);
2140 return bounce.buffer;
2143 raddr = memory_region_get_ram_addr(section->mr)
2144 + memory_region_section_addr(section, addr);
2152 ret = qemu_ram_ptr_length(raddr, &rlen);
2157 /* Unmaps a memory region previously mapped by address_space_map().
2158 * Will also mark the memory as dirty if is_write == 1. access_len gives
2159 * the amount of memory that was actually read or written by the caller.
2161 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2162 int is_write, hwaddr access_len)
2164 if (buffer != bounce.buffer) {
2166 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2167 while (access_len) {
2169 l = TARGET_PAGE_SIZE;
2172 invalidate_and_set_dirty(addr1, l);
2177 if (xen_enabled()) {
2178 xen_invalidate_map_cache_entry(buffer);
2183 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2185 qemu_vfree(bounce.buffer);
2186 bounce.buffer = NULL;
2187 cpu_notify_map_clients();
2190 void *cpu_physical_memory_map(hwaddr addr,
2194 return address_space_map(&address_space_memory, addr, plen, is_write);
2197 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2198 int is_write, hwaddr access_len)
2200 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2203 /* warning: addr must be aligned */
2204 static inline uint32_t ldl_phys_internal(hwaddr addr,
2205 enum device_endian endian)
2209 MemoryRegionSection *section;
2211 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2213 if (!(memory_region_is_ram(section->mr) ||
2214 memory_region_is_romd(section->mr))) {
2216 addr = memory_region_section_addr(section, addr);
2217 val = io_mem_read(section->mr, addr, 4);
2218 #if defined(TARGET_WORDS_BIGENDIAN)
2219 if (endian == DEVICE_LITTLE_ENDIAN) {
2223 if (endian == DEVICE_BIG_ENDIAN) {
2229 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2231 + memory_region_section_addr(section, addr));
2233 case DEVICE_LITTLE_ENDIAN:
2234 val = ldl_le_p(ptr);
2236 case DEVICE_BIG_ENDIAN:
2237 val = ldl_be_p(ptr);
2247 uint32_t ldl_phys(hwaddr addr)
2249 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2252 uint32_t ldl_le_phys(hwaddr addr)
2254 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2257 uint32_t ldl_be_phys(hwaddr addr)
2259 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2262 /* warning: addr must be aligned */
2263 static inline uint64_t ldq_phys_internal(hwaddr addr,
2264 enum device_endian endian)
2268 MemoryRegionSection *section;
2270 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2272 if (!(memory_region_is_ram(section->mr) ||
2273 memory_region_is_romd(section->mr))) {
2275 addr = memory_region_section_addr(section, addr);
2277 /* XXX This is broken when device endian != cpu endian.
2278 Fix and add "endian" variable check */
2279 #ifdef TARGET_WORDS_BIGENDIAN
2280 val = io_mem_read(section->mr, addr, 4) << 32;
2281 val |= io_mem_read(section->mr, addr + 4, 4);
2283 val = io_mem_read(section->mr, addr, 4);
2284 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
2288 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2290 + memory_region_section_addr(section, addr));
2292 case DEVICE_LITTLE_ENDIAN:
2293 val = ldq_le_p(ptr);
2295 case DEVICE_BIG_ENDIAN:
2296 val = ldq_be_p(ptr);
2306 uint64_t ldq_phys(hwaddr addr)
2308 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2311 uint64_t ldq_le_phys(hwaddr addr)
2313 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2316 uint64_t ldq_be_phys(hwaddr addr)
2318 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2322 uint32_t ldub_phys(hwaddr addr)
2325 cpu_physical_memory_read(addr, &val, 1);
2329 /* warning: addr must be aligned */
2330 static inline uint32_t lduw_phys_internal(hwaddr addr,
2331 enum device_endian endian)
2335 MemoryRegionSection *section;
2337 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2339 if (!(memory_region_is_ram(section->mr) ||
2340 memory_region_is_romd(section->mr))) {
2342 addr = memory_region_section_addr(section, addr);
2343 val = io_mem_read(section->mr, addr, 2);
2344 #if defined(TARGET_WORDS_BIGENDIAN)
2345 if (endian == DEVICE_LITTLE_ENDIAN) {
2349 if (endian == DEVICE_BIG_ENDIAN) {
2355 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2357 + memory_region_section_addr(section, addr));
2359 case DEVICE_LITTLE_ENDIAN:
2360 val = lduw_le_p(ptr);
2362 case DEVICE_BIG_ENDIAN:
2363 val = lduw_be_p(ptr);
2373 uint32_t lduw_phys(hwaddr addr)
2375 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2378 uint32_t lduw_le_phys(hwaddr addr)
2380 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2383 uint32_t lduw_be_phys(hwaddr addr)
2385 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2388 /* warning: addr must be aligned. The ram page is not masked as dirty
2389 and the code inside is not invalidated. It is useful if the dirty
2390 bits are used to track modified PTEs */
2391 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2394 MemoryRegionSection *section;
2396 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2398 if (!memory_region_is_ram(section->mr) || section->readonly) {
2399 addr = memory_region_section_addr(section, addr);
2400 if (memory_region_is_ram(section->mr)) {
2401 section = &phys_sections[phys_section_rom];
2403 io_mem_write(section->mr, addr, val, 4);
2405 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
2407 + memory_region_section_addr(section, addr);
2408 ptr = qemu_get_ram_ptr(addr1);
2411 if (unlikely(in_migration)) {
2412 if (!cpu_physical_memory_is_dirty(addr1)) {
2413 /* invalidate code */
2414 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2416 cpu_physical_memory_set_dirty_flags(
2417 addr1, (0xff & ~CODE_DIRTY_FLAG));
2423 void stq_phys_notdirty(hwaddr addr, uint64_t val)
2426 MemoryRegionSection *section;
2428 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2430 if (!memory_region_is_ram(section->mr) || section->readonly) {
2431 addr = memory_region_section_addr(section, addr);
2432 if (memory_region_is_ram(section->mr)) {
2433 section = &phys_sections[phys_section_rom];
2435 #ifdef TARGET_WORDS_BIGENDIAN
2436 io_mem_write(section->mr, addr, val >> 32, 4);
2437 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
2439 io_mem_write(section->mr, addr, (uint32_t)val, 4);
2440 io_mem_write(section->mr, addr + 4, val >> 32, 4);
2443 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2445 + memory_region_section_addr(section, addr));
2450 /* warning: addr must be aligned */
2451 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2452 enum device_endian endian)
2455 MemoryRegionSection *section;
2457 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2459 if (!memory_region_is_ram(section->mr) || section->readonly) {
2460 addr = memory_region_section_addr(section, addr);
2461 if (memory_region_is_ram(section->mr)) {
2462 section = &phys_sections[phys_section_rom];
2464 #if defined(TARGET_WORDS_BIGENDIAN)
2465 if (endian == DEVICE_LITTLE_ENDIAN) {
2469 if (endian == DEVICE_BIG_ENDIAN) {
2473 io_mem_write(section->mr, addr, val, 4);
2475 unsigned long addr1;
2476 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2477 + memory_region_section_addr(section, addr);
2479 ptr = qemu_get_ram_ptr(addr1);
2481 case DEVICE_LITTLE_ENDIAN:
2484 case DEVICE_BIG_ENDIAN:
2491 invalidate_and_set_dirty(addr1, 4);
2495 void stl_phys(hwaddr addr, uint32_t val)
2497 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2500 void stl_le_phys(hwaddr addr, uint32_t val)
2502 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2505 void stl_be_phys(hwaddr addr, uint32_t val)
2507 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2511 void stb_phys(hwaddr addr, uint32_t val)
2514 cpu_physical_memory_write(addr, &v, 1);
2517 /* warning: addr must be aligned */
2518 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2519 enum device_endian endian)
2522 MemoryRegionSection *section;
2524 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2526 if (!memory_region_is_ram(section->mr) || section->readonly) {
2527 addr = memory_region_section_addr(section, addr);
2528 if (memory_region_is_ram(section->mr)) {
2529 section = &phys_sections[phys_section_rom];
2531 #if defined(TARGET_WORDS_BIGENDIAN)
2532 if (endian == DEVICE_LITTLE_ENDIAN) {
2536 if (endian == DEVICE_BIG_ENDIAN) {
2540 io_mem_write(section->mr, addr, val, 2);
2542 unsigned long addr1;
2543 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2544 + memory_region_section_addr(section, addr);
2546 ptr = qemu_get_ram_ptr(addr1);
2548 case DEVICE_LITTLE_ENDIAN:
2551 case DEVICE_BIG_ENDIAN:
2558 invalidate_and_set_dirty(addr1, 2);
2562 void stw_phys(hwaddr addr, uint32_t val)
2564 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2567 void stw_le_phys(hwaddr addr, uint32_t val)
2569 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2572 void stw_be_phys(hwaddr addr, uint32_t val)
2574 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2578 void stq_phys(hwaddr addr, uint64_t val)
2581 cpu_physical_memory_write(addr, &val, 8);
2584 void stq_le_phys(hwaddr addr, uint64_t val)
2586 val = cpu_to_le64(val);
2587 cpu_physical_memory_write(addr, &val, 8);
2590 void stq_be_phys(hwaddr addr, uint64_t val)
2592 val = cpu_to_be64(val);
2593 cpu_physical_memory_write(addr, &val, 8);
2596 /* virtual memory access for debug (includes writing to ROM) */
2597 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2598 uint8_t *buf, int len, int is_write)
2605 page = addr & TARGET_PAGE_MASK;
2606 phys_addr = cpu_get_phys_page_debug(env, page);
2607 /* if no physical page mapped, return an error */
2608 if (phys_addr == -1)
2610 l = (page + TARGET_PAGE_SIZE) - addr;
2613 phys_addr += (addr & ~TARGET_PAGE_MASK);
2615 cpu_physical_memory_write_rom(phys_addr, buf, l);
2617 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2626 #if !defined(CONFIG_USER_ONLY)
2629 * A helper function for the _utterly broken_ virtio device model to find out if
2630 * it's running on a big endian machine. Don't do this at home kids!
2632 bool virtio_is_big_endian(void);
2633 bool virtio_is_big_endian(void)
2635 #if defined(TARGET_WORDS_BIGENDIAN)
2644 #ifndef CONFIG_USER_ONLY
2645 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2647 MemoryRegionSection *section;
2649 section = phys_page_find(address_space_memory.dispatch,
2650 phys_addr >> TARGET_PAGE_BITS);
2652 return !(memory_region_is_ram(section->mr) ||
2653 memory_region_is_romd(section->mr));