4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include <sys/types.h>
27 #include "qemu-common.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
53 //#define DEBUG_SUBPAGE
55 #if !defined(CONFIG_USER_ONLY)
57 static int in_migration;
59 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
61 static MemoryRegion *system_memory;
62 static MemoryRegion *system_io;
64 AddressSpace address_space_io;
65 AddressSpace address_space_memory;
66 DMAContext dma_context_memory;
68 MemoryRegion io_mem_rom, io_mem_notdirty;
69 static MemoryRegion io_mem_unassigned, io_mem_subpage_ram;
73 CPUArchState *first_cpu;
74 /* current CPU in the current thread. It is only valid inside
76 DEFINE_TLS(CPUArchState *,cpu_single_env);
77 /* 0 = Do not count executed instructions.
78 1 = Precise instruction counting.
79 2 = Adaptive rate instruction counting. */
82 #if !defined(CONFIG_USER_ONLY)
84 typedef struct PhysPageEntry PhysPageEntry;
86 struct PhysPageEntry {
88 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
92 struct AddressSpaceDispatch {
93 /* This is a multi-level map on the physical address space.
94 * The bottom level has pointers to MemoryRegionSections.
96 PhysPageEntry phys_map;
97 MemoryListener listener;
100 static MemoryRegionSection *phys_sections;
101 static unsigned phys_sections_nb, phys_sections_nb_alloc;
102 static uint16_t phys_section_unassigned;
103 static uint16_t phys_section_notdirty;
104 static uint16_t phys_section_rom;
105 static uint16_t phys_section_watch;
107 /* Simple allocator for PhysPageEntry nodes */
108 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
109 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
111 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
113 static void io_mem_init(void);
114 static void memory_map_init(void);
115 static void *qemu_safe_ram_ptr(ram_addr_t addr);
117 static MemoryRegion io_mem_watch;
120 #if !defined(CONFIG_USER_ONLY)
122 static void phys_map_node_reserve(unsigned nodes)
124 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
125 typedef PhysPageEntry Node[L2_SIZE];
126 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
127 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
128 phys_map_nodes_nb + nodes);
129 phys_map_nodes = g_renew(Node, phys_map_nodes,
130 phys_map_nodes_nb_alloc);
134 static uint16_t phys_map_node_alloc(void)
139 ret = phys_map_nodes_nb++;
140 assert(ret != PHYS_MAP_NODE_NIL);
141 assert(ret != phys_map_nodes_nb_alloc);
142 for (i = 0; i < L2_SIZE; ++i) {
143 phys_map_nodes[ret][i].is_leaf = 0;
144 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
149 static void phys_map_nodes_reset(void)
151 phys_map_nodes_nb = 0;
155 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
156 hwaddr *nb, uint16_t leaf,
161 hwaddr step = (hwaddr)1 << (level * L2_BITS);
163 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
164 lp->ptr = phys_map_node_alloc();
165 p = phys_map_nodes[lp->ptr];
167 for (i = 0; i < L2_SIZE; i++) {
169 p[i].ptr = phys_section_unassigned;
173 p = phys_map_nodes[lp->ptr];
175 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
177 while (*nb && lp < &p[L2_SIZE]) {
178 if ((*index & (step - 1)) == 0 && *nb >= step) {
184 phys_page_set_level(lp, index, nb, leaf, level - 1);
190 static void phys_page_set(AddressSpaceDispatch *d,
191 hwaddr index, hwaddr nb,
194 /* Wildly overreserve - it doesn't matter much. */
195 phys_map_node_reserve(3 * P_L2_LEVELS);
197 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
200 static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
202 PhysPageEntry lp = d->phys_map;
206 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
207 if (lp.ptr == PHYS_MAP_NODE_NIL) {
208 return &phys_sections[phys_section_unassigned];
210 p = phys_map_nodes[lp.ptr];
211 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
213 return &phys_sections[lp.ptr];
216 bool memory_region_is_unassigned(MemoryRegion *mr)
218 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
219 && mr != &io_mem_watch;
222 static MemoryRegionSection *address_space_lookup_region(AddressSpace *as,
225 return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
228 MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
229 hwaddr *xlat, hwaddr *plen,
232 MemoryRegionSection *section;
235 section = address_space_lookup_region(as, addr);
236 /* Compute offset within MemoryRegionSection */
237 addr -= section->offset_within_address_space;
239 /* Compute offset within MemoryRegion */
240 *xlat = addr + section->offset_within_region;
242 diff = int128_sub(section->mr->size, int128_make64(addr));
243 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
248 void cpu_exec_init_all(void)
250 #if !defined(CONFIG_USER_ONLY)
251 qemu_mutex_init(&ram_list.mutex);
257 #if !defined(CONFIG_USER_ONLY)
259 static int cpu_common_post_load(void *opaque, int version_id)
261 CPUState *cpu = opaque;
263 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
264 version_id is increased. */
265 cpu->interrupt_request &= ~0x01;
266 tlb_flush(cpu->env_ptr, 1);
271 static const VMStateDescription vmstate_cpu_common = {
272 .name = "cpu_common",
274 .minimum_version_id = 1,
275 .minimum_version_id_old = 1,
276 .post_load = cpu_common_post_load,
277 .fields = (VMStateField []) {
278 VMSTATE_UINT32(halted, CPUState),
279 VMSTATE_UINT32(interrupt_request, CPUState),
280 VMSTATE_END_OF_LIST()
284 #define vmstate_cpu_common vmstate_dummy
287 CPUState *qemu_get_cpu(int index)
289 CPUArchState *env = first_cpu;
290 CPUState *cpu = NULL;
293 cpu = ENV_GET_CPU(env);
294 if (cpu->cpu_index == index) {
300 return env ? cpu : NULL;
303 void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
305 CPUArchState *env = first_cpu;
308 func(ENV_GET_CPU(env), data);
313 void cpu_exec_init(CPUArchState *env)
315 CPUState *cpu = ENV_GET_CPU(env);
316 CPUClass *cc = CPU_GET_CLASS(cpu);
320 #if defined(CONFIG_USER_ONLY)
323 env->next_cpu = NULL;
326 while (*penv != NULL) {
327 penv = &(*penv)->next_cpu;
330 cpu->cpu_index = cpu_index;
332 QTAILQ_INIT(&env->breakpoints);
333 QTAILQ_INIT(&env->watchpoints);
334 #ifndef CONFIG_USER_ONLY
335 cpu->thread_id = qemu_get_thread_id();
338 #if defined(CONFIG_USER_ONLY)
341 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
342 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
343 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
344 cpu_save, cpu_load, env);
345 assert(cc->vmsd == NULL);
347 if (cc->vmsd != NULL) {
348 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
352 #if defined(TARGET_HAS_ICE)
353 #if defined(CONFIG_USER_ONLY)
354 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
356 tb_invalidate_phys_page_range(pc, pc + 1, 0);
359 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
361 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
362 (pc & ~TARGET_PAGE_MASK));
365 #endif /* TARGET_HAS_ICE */
367 #if defined(CONFIG_USER_ONLY)
368 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
373 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
374 int flags, CPUWatchpoint **watchpoint)
379 /* Add a watchpoint. */
380 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
381 int flags, CPUWatchpoint **watchpoint)
383 target_ulong len_mask = ~(len - 1);
386 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
387 if ((len & (len - 1)) || (addr & ~len_mask) ||
388 len == 0 || len > TARGET_PAGE_SIZE) {
389 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
390 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
393 wp = g_malloc(sizeof(*wp));
396 wp->len_mask = len_mask;
399 /* keep all GDB-injected watchpoints in front */
401 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
403 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
405 tlb_flush_page(env, addr);
412 /* Remove a specific watchpoint. */
413 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
416 target_ulong len_mask = ~(len - 1);
419 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
420 if (addr == wp->vaddr && len_mask == wp->len_mask
421 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
422 cpu_watchpoint_remove_by_ref(env, wp);
429 /* Remove a specific watchpoint by reference. */
430 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
432 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
434 tlb_flush_page(env, watchpoint->vaddr);
439 /* Remove all matching watchpoints. */
440 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
442 CPUWatchpoint *wp, *next;
444 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
445 if (wp->flags & mask)
446 cpu_watchpoint_remove_by_ref(env, wp);
451 /* Add a breakpoint. */
452 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
453 CPUBreakpoint **breakpoint)
455 #if defined(TARGET_HAS_ICE)
458 bp = g_malloc(sizeof(*bp));
463 /* keep all GDB-injected breakpoints in front */
465 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
467 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
469 breakpoint_invalidate(env, pc);
479 /* Remove a specific breakpoint. */
480 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
482 #if defined(TARGET_HAS_ICE)
485 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
486 if (bp->pc == pc && bp->flags == flags) {
487 cpu_breakpoint_remove_by_ref(env, bp);
497 /* Remove a specific breakpoint by reference. */
498 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
500 #if defined(TARGET_HAS_ICE)
501 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
503 breakpoint_invalidate(env, breakpoint->pc);
509 /* Remove all matching breakpoints. */
510 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
512 #if defined(TARGET_HAS_ICE)
513 CPUBreakpoint *bp, *next;
515 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
516 if (bp->flags & mask)
517 cpu_breakpoint_remove_by_ref(env, bp);
522 /* enable or disable single step mode. EXCP_DEBUG is returned by the
523 CPU loop after each instruction */
524 void cpu_single_step(CPUArchState *env, int enabled)
526 #if defined(TARGET_HAS_ICE)
527 if (env->singlestep_enabled != enabled) {
528 env->singlestep_enabled = enabled;
530 kvm_update_guest_debug(env, 0);
532 /* must flush all the translated code to avoid inconsistencies */
533 /* XXX: only flush what is necessary */
540 void cpu_exit(CPUArchState *env)
542 CPUState *cpu = ENV_GET_CPU(env);
544 cpu->exit_request = 1;
545 cpu->tcg_exit_req = 1;
548 void cpu_abort(CPUArchState *env, const char *fmt, ...)
555 fprintf(stderr, "qemu: fatal: ");
556 vfprintf(stderr, fmt, ap);
557 fprintf(stderr, "\n");
558 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
559 if (qemu_log_enabled()) {
560 qemu_log("qemu: fatal: ");
561 qemu_log_vprintf(fmt, ap2);
563 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
569 #if defined(CONFIG_USER_ONLY)
571 struct sigaction act;
572 sigfillset(&act.sa_mask);
573 act.sa_handler = SIG_DFL;
574 sigaction(SIGABRT, &act, NULL);
580 CPUArchState *cpu_copy(CPUArchState *env)
582 CPUArchState *new_env = cpu_init(env->cpu_model_str);
583 CPUArchState *next_cpu = new_env->next_cpu;
584 #if defined(TARGET_HAS_ICE)
589 memcpy(new_env, env, sizeof(CPUArchState));
591 /* Preserve chaining. */
592 new_env->next_cpu = next_cpu;
594 /* Clone all break/watchpoints.
595 Note: Once we support ptrace with hw-debug register access, make sure
596 BP_CPU break/watchpoints are handled correctly on clone. */
597 QTAILQ_INIT(&env->breakpoints);
598 QTAILQ_INIT(&env->watchpoints);
599 #if defined(TARGET_HAS_ICE)
600 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
601 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
603 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
604 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
612 #if !defined(CONFIG_USER_ONLY)
613 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
618 /* we modify the TLB cache so that the dirty bit will be set again
619 when accessing the range */
620 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
621 /* Check that we don't span multiple blocks - this breaks the
622 address comparisons below. */
623 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
624 != (end - 1) - start) {
627 cpu_tlb_reset_dirty_all(start1, length);
631 /* Note: start and end must be within the same ram block. */
632 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
637 start &= TARGET_PAGE_MASK;
638 end = TARGET_PAGE_ALIGN(end);
640 length = end - start;
643 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
646 tlb_reset_dirty_range_all(start, end, length);
650 static int cpu_physical_memory_set_dirty_tracking(int enable)
653 in_migration = enable;
657 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
658 MemoryRegionSection *section,
660 hwaddr paddr, hwaddr xlat,
662 target_ulong *address)
667 if (memory_region_is_ram(section->mr)) {
669 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
671 if (!section->readonly) {
672 iotlb |= phys_section_notdirty;
674 iotlb |= phys_section_rom;
677 iotlb = section - phys_sections;
681 /* Make accesses to pages with watchpoints go via the
682 watchpoint trap routines. */
683 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
684 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
685 /* Avoid trapping reads of pages with a write breakpoint. */
686 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
687 iotlb = phys_section_watch + paddr;
688 *address |= TLB_MMIO;
696 #endif /* defined(CONFIG_USER_ONLY) */
698 #if !defined(CONFIG_USER_ONLY)
700 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
701 typedef struct subpage_t {
704 uint16_t sub_section[TARGET_PAGE_SIZE];
707 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
709 static subpage_t *subpage_init(hwaddr base);
710 static void destroy_page_desc(uint16_t section_index)
712 MemoryRegionSection *section = &phys_sections[section_index];
713 MemoryRegion *mr = section->mr;
716 subpage_t *subpage = container_of(mr, subpage_t, iomem);
717 memory_region_destroy(&subpage->iomem);
722 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
727 if (lp->ptr == PHYS_MAP_NODE_NIL) {
731 p = phys_map_nodes[lp->ptr];
732 for (i = 0; i < L2_SIZE; ++i) {
734 destroy_l2_mapping(&p[i], level - 1);
736 destroy_page_desc(p[i].ptr);
740 lp->ptr = PHYS_MAP_NODE_NIL;
743 static void destroy_all_mappings(AddressSpaceDispatch *d)
745 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
746 phys_map_nodes_reset();
749 static uint16_t phys_section_add(MemoryRegionSection *section)
751 /* The physical section number is ORed with a page-aligned
752 * pointer to produce the iotlb entries. Thus it should
753 * never overflow into the page-aligned value.
755 assert(phys_sections_nb < TARGET_PAGE_SIZE);
757 if (phys_sections_nb == phys_sections_nb_alloc) {
758 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
759 phys_sections = g_renew(MemoryRegionSection, phys_sections,
760 phys_sections_nb_alloc);
762 phys_sections[phys_sections_nb] = *section;
763 return phys_sections_nb++;
766 static void phys_sections_clear(void)
768 phys_sections_nb = 0;
771 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
774 hwaddr base = section->offset_within_address_space
776 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
777 MemoryRegionSection subsection = {
778 .offset_within_address_space = base,
779 .size = TARGET_PAGE_SIZE,
783 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
785 if (!(existing->mr->subpage)) {
786 subpage = subpage_init(base);
787 subsection.mr = &subpage->iomem;
788 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
789 phys_section_add(&subsection));
791 subpage = container_of(existing->mr, subpage_t, iomem);
793 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
794 end = start + section->size - 1;
795 subpage_register(subpage, start, end, phys_section_add(section));
799 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
801 hwaddr start_addr = section->offset_within_address_space;
802 ram_addr_t size = section->size;
804 uint16_t section_index = phys_section_add(section);
809 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
813 QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > MAX_PHYS_ADDR_SPACE_BITS)
815 static MemoryRegionSection limit(MemoryRegionSection section)
817 section.size = MIN(section.offset_within_address_space + section.size,
819 - section.offset_within_address_space;
824 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
826 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
827 MemoryRegionSection now = limit(*section), remain = limit(*section);
829 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
830 || (now.size < TARGET_PAGE_SIZE)) {
831 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
832 - now.offset_within_address_space,
834 register_subpage(d, &now);
835 remain.size -= now.size;
836 remain.offset_within_address_space += now.size;
837 remain.offset_within_region += now.size;
839 while (remain.size >= TARGET_PAGE_SIZE) {
841 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
842 now.size = TARGET_PAGE_SIZE;
843 register_subpage(d, &now);
845 now.size &= TARGET_PAGE_MASK;
846 register_multipage(d, &now);
848 remain.size -= now.size;
849 remain.offset_within_address_space += now.size;
850 remain.offset_within_region += now.size;
854 register_subpage(d, &now);
858 void qemu_flush_coalesced_mmio_buffer(void)
861 kvm_flush_coalesced_mmio_buffer();
864 void qemu_mutex_lock_ramlist(void)
866 qemu_mutex_lock(&ram_list.mutex);
869 void qemu_mutex_unlock_ramlist(void)
871 qemu_mutex_unlock(&ram_list.mutex);
874 #if defined(__linux__) && !defined(TARGET_S390X)
878 #define HUGETLBFS_MAGIC 0x958458f6
880 static long gethugepagesize(const char *path)
886 ret = statfs(path, &fs);
887 } while (ret != 0 && errno == EINTR);
894 if (fs.f_type != HUGETLBFS_MAGIC)
895 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
900 static void *file_ram_alloc(RAMBlock *block,
905 char *sanitized_name;
912 unsigned long hpagesize;
914 hpagesize = gethugepagesize(path);
919 if (memory < hpagesize) {
923 if (kvm_enabled() && !kvm_has_sync_mmu()) {
924 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
928 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
929 sanitized_name = g_strdup(block->mr->name);
930 for (c = sanitized_name; *c != '\0'; c++) {
935 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
937 g_free(sanitized_name);
939 fd = mkstemp(filename);
941 perror("unable to create backing store for hugepages");
948 memory = (memory+hpagesize-1) & ~(hpagesize-1);
951 * ftruncate is not supported by hugetlbfs in older
952 * hosts, so don't bother bailing out on errors.
953 * If anything goes wrong with it under other filesystems,
956 if (ftruncate(fd, memory))
960 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
961 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
962 * to sidestep this quirk.
964 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
965 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
967 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
969 if (area == MAP_FAILED) {
970 perror("file_ram_alloc: can't mmap RAM pages");
979 static ram_addr_t find_ram_offset(ram_addr_t size)
981 RAMBlock *block, *next_block;
982 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
984 assert(size != 0); /* it would hand out same offset multiple times */
986 if (QTAILQ_EMPTY(&ram_list.blocks))
989 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
990 ram_addr_t end, next = RAM_ADDR_MAX;
992 end = block->offset + block->length;
994 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
995 if (next_block->offset >= end) {
996 next = MIN(next, next_block->offset);
999 if (next - end >= size && next - end < mingap) {
1001 mingap = next - end;
1005 if (offset == RAM_ADDR_MAX) {
1006 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1014 ram_addr_t last_ram_offset(void)
1017 ram_addr_t last = 0;
1019 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1020 last = MAX(last, block->offset + block->length);
1025 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1028 QemuOpts *machine_opts;
1030 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1031 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1033 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
1034 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1036 perror("qemu_madvise");
1037 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1038 "but dump_guest_core=off specified\n");
1043 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1045 RAMBlock *new_block, *block;
1048 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1049 if (block->offset == addr) {
1055 assert(!new_block->idstr[0]);
1058 char *id = qdev_get_dev_path(dev);
1060 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1064 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1066 /* This assumes the iothread lock is taken here too. */
1067 qemu_mutex_lock_ramlist();
1068 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1069 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1070 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1075 qemu_mutex_unlock_ramlist();
1078 static int memory_try_enable_merging(void *addr, size_t len)
1082 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1083 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1084 /* disabled by the user */
1088 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1091 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1094 RAMBlock *block, *new_block;
1096 size = TARGET_PAGE_ALIGN(size);
1097 new_block = g_malloc0(sizeof(*new_block));
1099 /* This assumes the iothread lock is taken here too. */
1100 qemu_mutex_lock_ramlist();
1102 new_block->offset = find_ram_offset(size);
1104 new_block->host = host;
1105 new_block->flags |= RAM_PREALLOC_MASK;
1108 #if defined (__linux__) && !defined(TARGET_S390X)
1109 new_block->host = file_ram_alloc(new_block, size, mem_path);
1110 if (!new_block->host) {
1111 new_block->host = qemu_anon_ram_alloc(size);
1112 memory_try_enable_merging(new_block->host, size);
1115 fprintf(stderr, "-mem-path option unsupported\n");
1119 if (xen_enabled()) {
1120 xen_ram_alloc(new_block->offset, size, mr);
1121 } else if (kvm_enabled()) {
1122 /* some s390/kvm configurations have special constraints */
1123 new_block->host = kvm_ram_alloc(size);
1125 new_block->host = qemu_anon_ram_alloc(size);
1127 memory_try_enable_merging(new_block->host, size);
1130 new_block->length = size;
1132 /* Keep the list sorted from biggest to smallest block. */
1133 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1134 if (block->length < new_block->length) {
1139 QTAILQ_INSERT_BEFORE(block, new_block, next);
1141 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1143 ram_list.mru_block = NULL;
1146 qemu_mutex_unlock_ramlist();
1148 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1149 last_ram_offset() >> TARGET_PAGE_BITS);
1150 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1151 0, size >> TARGET_PAGE_BITS);
1152 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1154 qemu_ram_setup_dump(new_block->host, size);
1155 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1158 kvm_setup_guest_memory(new_block->host, size);
1160 return new_block->offset;
1163 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1165 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1168 void qemu_ram_free_from_ptr(ram_addr_t addr)
1172 /* This assumes the iothread lock is taken here too. */
1173 qemu_mutex_lock_ramlist();
1174 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1175 if (addr == block->offset) {
1176 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1177 ram_list.mru_block = NULL;
1183 qemu_mutex_unlock_ramlist();
1186 void qemu_ram_free(ram_addr_t addr)
1190 /* This assumes the iothread lock is taken here too. */
1191 qemu_mutex_lock_ramlist();
1192 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1193 if (addr == block->offset) {
1194 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1195 ram_list.mru_block = NULL;
1197 if (block->flags & RAM_PREALLOC_MASK) {
1199 } else if (mem_path) {
1200 #if defined (__linux__) && !defined(TARGET_S390X)
1202 munmap(block->host, block->length);
1205 qemu_anon_ram_free(block->host, block->length);
1211 if (xen_enabled()) {
1212 xen_invalidate_map_cache_entry(block->host);
1214 qemu_anon_ram_free(block->host, block->length);
1221 qemu_mutex_unlock_ramlist();
1226 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1233 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1234 offset = addr - block->offset;
1235 if (offset < block->length) {
1236 vaddr = block->host + offset;
1237 if (block->flags & RAM_PREALLOC_MASK) {
1241 munmap(vaddr, length);
1243 #if defined(__linux__) && !defined(TARGET_S390X)
1246 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1249 flags |= MAP_PRIVATE;
1251 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1252 flags, block->fd, offset);
1254 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1255 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1262 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1263 flags |= MAP_SHARED | MAP_ANONYMOUS;
1264 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1267 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1268 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1272 if (area != vaddr) {
1273 fprintf(stderr, "Could not remap addr: "
1274 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1278 memory_try_enable_merging(vaddr, length);
1279 qemu_ram_setup_dump(vaddr, length);
1285 #endif /* !_WIN32 */
1287 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1288 With the exception of the softmmu code in this file, this should
1289 only be used for local memory (e.g. video ram) that the device owns,
1290 and knows it isn't going to access beyond the end of the block.
1292 It should not be used for general purpose DMA.
1293 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1295 void *qemu_get_ram_ptr(ram_addr_t addr)
1299 /* The list is protected by the iothread lock here. */
1300 block = ram_list.mru_block;
1301 if (block && addr - block->offset < block->length) {
1304 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1305 if (addr - block->offset < block->length) {
1310 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1314 ram_list.mru_block = block;
1315 if (xen_enabled()) {
1316 /* We need to check if the requested address is in the RAM
1317 * because we don't want to map the entire memory in QEMU.
1318 * In that case just map until the end of the page.
1320 if (block->offset == 0) {
1321 return xen_map_cache(addr, 0, 0);
1322 } else if (block->host == NULL) {
1324 xen_map_cache(block->offset, block->length, 1);
1327 return block->host + (addr - block->offset);
1330 /* Return a host pointer to ram allocated with qemu_ram_alloc. Same as
1331 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1333 * ??? Is this still necessary?
1335 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1339 /* The list is protected by the iothread lock here. */
1340 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1341 if (addr - block->offset < block->length) {
1342 if (xen_enabled()) {
1343 /* We need to check if the requested address is in the RAM
1344 * because we don't want to map the entire memory in QEMU.
1345 * In that case just map until the end of the page.
1347 if (block->offset == 0) {
1348 return xen_map_cache(addr, 0, 0);
1349 } else if (block->host == NULL) {
1351 xen_map_cache(block->offset, block->length, 1);
1354 return block->host + (addr - block->offset);
1358 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1364 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1365 * but takes a size argument */
1366 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1371 if (xen_enabled()) {
1372 return xen_map_cache(addr, *size, 1);
1376 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1377 if (addr - block->offset < block->length) {
1378 if (addr - block->offset + *size > block->length)
1379 *size = block->length - addr + block->offset;
1380 return block->host + (addr - block->offset);
1384 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1389 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1392 uint8_t *host = ptr;
1394 if (xen_enabled()) {
1395 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1399 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1400 /* This case append when the block is not mapped. */
1401 if (block->host == NULL) {
1404 if (host - block->host < block->length) {
1405 *ram_addr = block->offset + (host - block->host);
1413 /* Some of the softmmu routines need to translate from a host pointer
1414 (typically a TLB entry) back to a ram offset. */
1415 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1417 ram_addr_t ram_addr;
1419 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1420 fprintf(stderr, "Bad ram pointer %p\n", ptr);
1426 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1427 uint64_t val, unsigned size)
1430 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1431 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1432 tb_invalidate_phys_page_fast(ram_addr, size);
1433 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1437 stb_p(qemu_get_ram_ptr(ram_addr), val);
1440 stw_p(qemu_get_ram_ptr(ram_addr), val);
1443 stl_p(qemu_get_ram_ptr(ram_addr), val);
1448 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1449 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1450 /* we remove the notdirty callback only if the code has been
1452 if (dirty_flags == 0xff)
1453 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1456 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1457 unsigned size, bool is_write)
1462 static const MemoryRegionOps notdirty_mem_ops = {
1463 .write = notdirty_mem_write,
1464 .valid.accepts = notdirty_mem_accepts,
1465 .endianness = DEVICE_NATIVE_ENDIAN,
1468 /* Generate a debug exception if a watchpoint has been hit. */
1469 static void check_watchpoint(int offset, int len_mask, int flags)
1471 CPUArchState *env = cpu_single_env;
1472 target_ulong pc, cs_base;
1477 if (env->watchpoint_hit) {
1478 /* We re-entered the check after replacing the TB. Now raise
1479 * the debug interrupt so that is will trigger after the
1480 * current instruction. */
1481 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1484 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1485 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1486 if ((vaddr == (wp->vaddr & len_mask) ||
1487 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1488 wp->flags |= BP_WATCHPOINT_HIT;
1489 if (!env->watchpoint_hit) {
1490 env->watchpoint_hit = wp;
1491 tb_check_watchpoint(env);
1492 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1493 env->exception_index = EXCP_DEBUG;
1496 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1497 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1498 cpu_resume_from_signal(env, NULL);
1502 wp->flags &= ~BP_WATCHPOINT_HIT;
1507 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1508 so these check for a hit then pass through to the normal out-of-line
1510 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1513 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1515 case 1: return ldub_phys(addr);
1516 case 2: return lduw_phys(addr);
1517 case 4: return ldl_phys(addr);
1522 static void watch_mem_write(void *opaque, hwaddr addr,
1523 uint64_t val, unsigned size)
1525 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1528 stb_phys(addr, val);
1531 stw_phys(addr, val);
1534 stl_phys(addr, val);
1540 static const MemoryRegionOps watch_mem_ops = {
1541 .read = watch_mem_read,
1542 .write = watch_mem_write,
1543 .endianness = DEVICE_NATIVE_ENDIAN,
1546 static uint64_t subpage_read(void *opaque, hwaddr addr,
1549 subpage_t *mmio = opaque;
1550 unsigned int idx = SUBPAGE_IDX(addr);
1553 MemoryRegionSection *section;
1554 #if defined(DEBUG_SUBPAGE)
1555 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1556 mmio, len, addr, idx);
1559 section = &phys_sections[mmio->sub_section[idx]];
1561 addr -= section->offset_within_address_space;
1562 addr += section->offset_within_region;
1563 io_mem_read(section->mr, addr, &val, len);
1567 static void subpage_write(void *opaque, hwaddr addr,
1568 uint64_t value, unsigned len)
1570 subpage_t *mmio = opaque;
1571 unsigned int idx = SUBPAGE_IDX(addr);
1572 MemoryRegionSection *section;
1573 #if defined(DEBUG_SUBPAGE)
1574 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1575 " idx %d value %"PRIx64"\n",
1576 __func__, mmio, len, addr, idx, value);
1579 section = &phys_sections[mmio->sub_section[idx]];
1581 addr -= section->offset_within_address_space;
1582 addr += section->offset_within_region;
1583 io_mem_write(section->mr, addr, value, len);
1586 static bool subpage_accepts(void *opaque, hwaddr addr,
1587 unsigned size, bool is_write)
1589 subpage_t *mmio = opaque;
1590 unsigned int idx = SUBPAGE_IDX(addr);
1591 MemoryRegionSection *section;
1592 #if defined(DEBUG_SUBPAGE)
1593 printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx
1594 " idx %d\n", __func__, mmio,
1595 is_write ? 'w' : 'r', len, addr, idx);
1598 section = &phys_sections[mmio->sub_section[idx]];
1600 addr -= section->offset_within_address_space;
1601 addr += section->offset_within_region;
1602 return memory_region_access_valid(section->mr, addr, size, is_write);
1605 static const MemoryRegionOps subpage_ops = {
1606 .read = subpage_read,
1607 .write = subpage_write,
1608 .valid.accepts = subpage_accepts,
1609 .endianness = DEVICE_NATIVE_ENDIAN,
1612 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1615 ram_addr_t raddr = addr;
1616 void *ptr = qemu_get_ram_ptr(raddr);
1618 case 1: return ldub_p(ptr);
1619 case 2: return lduw_p(ptr);
1620 case 4: return ldl_p(ptr);
1625 static void subpage_ram_write(void *opaque, hwaddr addr,
1626 uint64_t value, unsigned size)
1628 ram_addr_t raddr = addr;
1629 void *ptr = qemu_get_ram_ptr(raddr);
1631 case 1: return stb_p(ptr, value);
1632 case 2: return stw_p(ptr, value);
1633 case 4: return stl_p(ptr, value);
1638 static const MemoryRegionOps subpage_ram_ops = {
1639 .read = subpage_ram_read,
1640 .write = subpage_ram_write,
1641 .endianness = DEVICE_NATIVE_ENDIAN,
1644 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1649 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1651 idx = SUBPAGE_IDX(start);
1652 eidx = SUBPAGE_IDX(end);
1653 #if defined(DEBUG_SUBPAGE)
1654 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1655 mmio, start, end, idx, eidx, memory);
1657 if (memory_region_is_ram(phys_sections[section].mr)) {
1658 MemoryRegionSection new_section = phys_sections[section];
1659 new_section.mr = &io_mem_subpage_ram;
1660 section = phys_section_add(&new_section);
1662 for (; idx <= eidx; idx++) {
1663 mmio->sub_section[idx] = section;
1669 static subpage_t *subpage_init(hwaddr base)
1673 mmio = g_malloc0(sizeof(subpage_t));
1676 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1677 "subpage", TARGET_PAGE_SIZE);
1678 mmio->iomem.subpage = true;
1679 #if defined(DEBUG_SUBPAGE)
1680 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1681 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1683 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1688 static uint16_t dummy_section(MemoryRegion *mr)
1690 MemoryRegionSection section = {
1692 .offset_within_address_space = 0,
1693 .offset_within_region = 0,
1697 return phys_section_add(§ion);
1700 MemoryRegion *iotlb_to_region(hwaddr index)
1702 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1705 static void io_mem_init(void)
1707 memory_region_init_io(&io_mem_rom, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1708 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1709 "unassigned", UINT64_MAX);
1710 memory_region_init_io(&io_mem_notdirty, ¬dirty_mem_ops, NULL,
1711 "notdirty", UINT64_MAX);
1712 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1713 "subpage-ram", UINT64_MAX);
1714 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1715 "watch", UINT64_MAX);
1718 static void mem_begin(MemoryListener *listener)
1720 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1722 destroy_all_mappings(d);
1723 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1726 static void core_begin(MemoryListener *listener)
1728 phys_sections_clear();
1729 phys_section_unassigned = dummy_section(&io_mem_unassigned);
1730 phys_section_notdirty = dummy_section(&io_mem_notdirty);
1731 phys_section_rom = dummy_section(&io_mem_rom);
1732 phys_section_watch = dummy_section(&io_mem_watch);
1735 static void tcg_commit(MemoryListener *listener)
1739 /* since each CPU stores ram addresses in its TLB cache, we must
1740 reset the modified entries */
1742 for(env = first_cpu; env != NULL; env = env->next_cpu) {
1747 static void core_log_global_start(MemoryListener *listener)
1749 cpu_physical_memory_set_dirty_tracking(1);
1752 static void core_log_global_stop(MemoryListener *listener)
1754 cpu_physical_memory_set_dirty_tracking(0);
1757 static void io_region_add(MemoryListener *listener,
1758 MemoryRegionSection *section)
1760 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1762 mrio->mr = section->mr;
1763 mrio->offset = section->offset_within_region;
1764 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1765 section->offset_within_address_space, section->size);
1766 ioport_register(&mrio->iorange);
1769 static void io_region_del(MemoryListener *listener,
1770 MemoryRegionSection *section)
1772 isa_unassign_ioport(section->offset_within_address_space, section->size);
1775 static MemoryListener core_memory_listener = {
1776 .begin = core_begin,
1777 .log_global_start = core_log_global_start,
1778 .log_global_stop = core_log_global_stop,
1782 static MemoryListener io_memory_listener = {
1783 .region_add = io_region_add,
1784 .region_del = io_region_del,
1788 static MemoryListener tcg_memory_listener = {
1789 .commit = tcg_commit,
1792 void address_space_init_dispatch(AddressSpace *as)
1794 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1796 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1797 d->listener = (MemoryListener) {
1799 .region_add = mem_add,
1800 .region_nop = mem_add,
1804 memory_listener_register(&d->listener, as);
1807 void address_space_destroy_dispatch(AddressSpace *as)
1809 AddressSpaceDispatch *d = as->dispatch;
1811 memory_listener_unregister(&d->listener);
1812 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1814 as->dispatch = NULL;
1817 static void memory_map_init(void)
1819 system_memory = g_malloc(sizeof(*system_memory));
1820 memory_region_init(system_memory, "system", INT64_MAX);
1821 address_space_init(&address_space_memory, system_memory);
1822 address_space_memory.name = "memory";
1824 system_io = g_malloc(sizeof(*system_io));
1825 memory_region_init(system_io, "io", 65536);
1826 address_space_init(&address_space_io, system_io);
1827 address_space_io.name = "I/O";
1829 memory_listener_register(&core_memory_listener, &address_space_memory);
1830 memory_listener_register(&io_memory_listener, &address_space_io);
1831 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1833 dma_context_init(&dma_context_memory, &address_space_memory,
1837 MemoryRegion *get_system_memory(void)
1839 return system_memory;
1842 MemoryRegion *get_system_io(void)
1847 #endif /* !defined(CONFIG_USER_ONLY) */
1849 /* physical memory access (slow version, mainly for debug) */
1850 #if defined(CONFIG_USER_ONLY)
1851 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1852 uint8_t *buf, int len, int is_write)
1859 page = addr & TARGET_PAGE_MASK;
1860 l = (page + TARGET_PAGE_SIZE) - addr;
1863 flags = page_get_flags(page);
1864 if (!(flags & PAGE_VALID))
1867 if (!(flags & PAGE_WRITE))
1869 /* XXX: this code should not depend on lock_user */
1870 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1873 unlock_user(p, addr, l);
1875 if (!(flags & PAGE_READ))
1877 /* XXX: this code should not depend on lock_user */
1878 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1881 unlock_user(p, addr, 0);
1892 static void invalidate_and_set_dirty(hwaddr addr,
1895 if (!cpu_physical_memory_is_dirty(addr)) {
1896 /* invalidate code */
1897 tb_invalidate_phys_page_range(addr, addr + length, 0);
1899 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1901 xen_modified_memory(addr, length);
1904 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1906 if (memory_region_is_ram(mr)) {
1907 return !(is_write && mr->readonly);
1909 if (memory_region_is_romd(mr)) {
1916 static inline int memory_access_size(MemoryRegion *mr, int l, hwaddr addr)
1918 if (l >= 4 && (((addr & 3) == 0 || mr->ops->impl.unaligned))) {
1921 if (l >= 2 && (((addr & 1) == 0) || mr->ops->impl.unaligned)) {
1927 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1928 int len, bool is_write)
1934 MemoryRegionSection *section;
1939 section = address_space_translate(as, addr, &addr1, &l, is_write);
1942 if (!memory_access_is_direct(section->mr, is_write)) {
1943 l = memory_access_size(section->mr, l, addr1);
1944 /* XXX: could force cpu_single_env to NULL to avoid
1947 /* 32 bit write access */
1949 error |= io_mem_write(section->mr, addr1, val, 4);
1950 } else if (l == 2) {
1951 /* 16 bit write access */
1953 error |= io_mem_write(section->mr, addr1, val, 2);
1955 /* 8 bit write access */
1957 error |= io_mem_write(section->mr, addr1, val, 1);
1960 addr1 += memory_region_get_ram_addr(section->mr);
1962 ptr = qemu_get_ram_ptr(addr1);
1963 memcpy(ptr, buf, l);
1964 invalidate_and_set_dirty(addr1, l);
1967 if (!memory_access_is_direct(section->mr, is_write)) {
1969 l = memory_access_size(section->mr, l, addr1);
1971 /* 32 bit read access */
1972 error |= io_mem_read(section->mr, addr1, &val, 4);
1974 } else if (l == 2) {
1975 /* 16 bit read access */
1976 error |= io_mem_read(section->mr, addr1, &val, 2);
1979 /* 8 bit read access */
1980 error |= io_mem_read(section->mr, addr1, &val, 1);
1985 ptr = qemu_get_ram_ptr(section->mr->ram_addr + addr1);
1986 memcpy(buf, ptr, l);
1997 bool address_space_write(AddressSpace *as, hwaddr addr,
1998 const uint8_t *buf, int len)
2000 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2003 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2005 return address_space_rw(as, addr, buf, len, false);
2009 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2010 int len, int is_write)
2012 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2015 /* used for ROM loading : can write in RAM and ROM */
2016 void cpu_physical_memory_write_rom(hwaddr addr,
2017 const uint8_t *buf, int len)
2022 MemoryRegionSection *section;
2026 section = address_space_translate(&address_space_memory,
2027 addr, &addr1, &l, true);
2029 if (!(memory_region_is_ram(section->mr) ||
2030 memory_region_is_romd(section->mr))) {
2033 addr1 += memory_region_get_ram_addr(section->mr);
2035 ptr = qemu_get_ram_ptr(addr1);
2036 memcpy(ptr, buf, l);
2037 invalidate_and_set_dirty(addr1, l);
2051 static BounceBuffer bounce;
2053 typedef struct MapClient {
2055 void (*callback)(void *opaque);
2056 QLIST_ENTRY(MapClient) link;
2059 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2060 = QLIST_HEAD_INITIALIZER(map_client_list);
2062 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2064 MapClient *client = g_malloc(sizeof(*client));
2066 client->opaque = opaque;
2067 client->callback = callback;
2068 QLIST_INSERT_HEAD(&map_client_list, client, link);
2072 static void cpu_unregister_map_client(void *_client)
2074 MapClient *client = (MapClient *)_client;
2076 QLIST_REMOVE(client, link);
2080 static void cpu_notify_map_clients(void)
2084 while (!QLIST_EMPTY(&map_client_list)) {
2085 client = QLIST_FIRST(&map_client_list);
2086 client->callback(client->opaque);
2087 cpu_unregister_map_client(client);
2091 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2093 MemoryRegionSection *section;
2098 section = address_space_translate(as, addr, &xlat, &l, is_write);
2099 if (!memory_access_is_direct(section->mr, is_write)) {
2100 l = memory_access_size(section->mr, l, addr);
2101 if (!memory_region_access_valid(section->mr, xlat, l, is_write)) {
2112 /* Map a physical memory region into a host virtual address.
2113 * May map a subset of the requested range, given by and returned in *plen.
2114 * May return NULL if resources needed to perform the mapping are exhausted.
2115 * Use only for reads OR writes - not for read-modify-write operations.
2116 * Use cpu_register_map_client() to know when retrying the map operation is
2117 * likely to succeed.
2119 void *address_space_map(AddressSpace *as,
2127 MemoryRegionSection *section;
2128 ram_addr_t raddr = RAM_ADDR_MAX;
2134 section = address_space_translate(as, addr, &xlat, &l, is_write);
2136 if (!memory_access_is_direct(section->mr, is_write)) {
2137 if (todo || bounce.buffer) {
2140 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2144 address_space_read(as, addr, bounce.buffer, l);
2148 return bounce.buffer;
2151 raddr = memory_region_get_ram_addr(section->mr) + xlat;
2153 if (memory_region_get_ram_addr(section->mr) + xlat != raddr + todo) {
2163 ret = qemu_ram_ptr_length(raddr, &rlen);
2168 /* Unmaps a memory region previously mapped by address_space_map().
2169 * Will also mark the memory as dirty if is_write == 1. access_len gives
2170 * the amount of memory that was actually read or written by the caller.
2172 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2173 int is_write, hwaddr access_len)
2175 if (buffer != bounce.buffer) {
2177 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2178 while (access_len) {
2180 l = TARGET_PAGE_SIZE;
2183 invalidate_and_set_dirty(addr1, l);
2188 if (xen_enabled()) {
2189 xen_invalidate_map_cache_entry(buffer);
2194 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2196 qemu_vfree(bounce.buffer);
2197 bounce.buffer = NULL;
2198 cpu_notify_map_clients();
2201 void *cpu_physical_memory_map(hwaddr addr,
2205 return address_space_map(&address_space_memory, addr, plen, is_write);
2208 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2209 int is_write, hwaddr access_len)
2211 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2214 /* warning: addr must be aligned */
2215 static inline uint32_t ldl_phys_internal(hwaddr addr,
2216 enum device_endian endian)
2220 MemoryRegionSection *section;
2224 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2226 if (l < 4 || !memory_access_is_direct(section->mr, false)) {
2228 io_mem_read(section->mr, addr1, &val, 4);
2229 #if defined(TARGET_WORDS_BIGENDIAN)
2230 if (endian == DEVICE_LITTLE_ENDIAN) {
2234 if (endian == DEVICE_BIG_ENDIAN) {
2240 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2244 case DEVICE_LITTLE_ENDIAN:
2245 val = ldl_le_p(ptr);
2247 case DEVICE_BIG_ENDIAN:
2248 val = ldl_be_p(ptr);
2258 uint32_t ldl_phys(hwaddr addr)
2260 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2263 uint32_t ldl_le_phys(hwaddr addr)
2265 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2268 uint32_t ldl_be_phys(hwaddr addr)
2270 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2273 /* warning: addr must be aligned */
2274 static inline uint64_t ldq_phys_internal(hwaddr addr,
2275 enum device_endian endian)
2279 MemoryRegionSection *section;
2283 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2285 if (l < 8 || !memory_access_is_direct(section->mr, false)) {
2287 io_mem_read(section->mr, addr1, &val, 8);
2288 #if defined(TARGET_WORDS_BIGENDIAN)
2289 if (endian == DEVICE_LITTLE_ENDIAN) {
2293 if (endian == DEVICE_BIG_ENDIAN) {
2299 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2303 case DEVICE_LITTLE_ENDIAN:
2304 val = ldq_le_p(ptr);
2306 case DEVICE_BIG_ENDIAN:
2307 val = ldq_be_p(ptr);
2317 uint64_t ldq_phys(hwaddr addr)
2319 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2322 uint64_t ldq_le_phys(hwaddr addr)
2324 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2327 uint64_t ldq_be_phys(hwaddr addr)
2329 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2333 uint32_t ldub_phys(hwaddr addr)
2336 cpu_physical_memory_read(addr, &val, 1);
2340 /* warning: addr must be aligned */
2341 static inline uint32_t lduw_phys_internal(hwaddr addr,
2342 enum device_endian endian)
2346 MemoryRegionSection *section;
2350 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2352 if (l < 2 || !memory_access_is_direct(section->mr, false)) {
2354 io_mem_read(section->mr, addr1, &val, 2);
2355 #if defined(TARGET_WORDS_BIGENDIAN)
2356 if (endian == DEVICE_LITTLE_ENDIAN) {
2360 if (endian == DEVICE_BIG_ENDIAN) {
2366 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2370 case DEVICE_LITTLE_ENDIAN:
2371 val = lduw_le_p(ptr);
2373 case DEVICE_BIG_ENDIAN:
2374 val = lduw_be_p(ptr);
2384 uint32_t lduw_phys(hwaddr addr)
2386 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2389 uint32_t lduw_le_phys(hwaddr addr)
2391 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2394 uint32_t lduw_be_phys(hwaddr addr)
2396 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2399 /* warning: addr must be aligned. The ram page is not masked as dirty
2400 and the code inside is not invalidated. It is useful if the dirty
2401 bits are used to track modified PTEs */
2402 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2405 MemoryRegionSection *section;
2409 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2411 if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2412 io_mem_write(section->mr, addr1, val, 4);
2414 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2415 ptr = qemu_get_ram_ptr(addr1);
2418 if (unlikely(in_migration)) {
2419 if (!cpu_physical_memory_is_dirty(addr1)) {
2420 /* invalidate code */
2421 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2423 cpu_physical_memory_set_dirty_flags(
2424 addr1, (0xff & ~CODE_DIRTY_FLAG));
2430 /* warning: addr must be aligned */
2431 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2432 enum device_endian endian)
2435 MemoryRegionSection *section;
2439 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2441 if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2442 #if defined(TARGET_WORDS_BIGENDIAN)
2443 if (endian == DEVICE_LITTLE_ENDIAN) {
2447 if (endian == DEVICE_BIG_ENDIAN) {
2451 io_mem_write(section->mr, addr1, val, 4);
2454 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2455 ptr = qemu_get_ram_ptr(addr1);
2457 case DEVICE_LITTLE_ENDIAN:
2460 case DEVICE_BIG_ENDIAN:
2467 invalidate_and_set_dirty(addr1, 4);
2471 void stl_phys(hwaddr addr, uint32_t val)
2473 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2476 void stl_le_phys(hwaddr addr, uint32_t val)
2478 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2481 void stl_be_phys(hwaddr addr, uint32_t val)
2483 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2487 void stb_phys(hwaddr addr, uint32_t val)
2490 cpu_physical_memory_write(addr, &v, 1);
2493 /* warning: addr must be aligned */
2494 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2495 enum device_endian endian)
2498 MemoryRegionSection *section;
2502 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2504 if (l < 2 || !memory_access_is_direct(section->mr, true)) {
2505 #if defined(TARGET_WORDS_BIGENDIAN)
2506 if (endian == DEVICE_LITTLE_ENDIAN) {
2510 if (endian == DEVICE_BIG_ENDIAN) {
2514 io_mem_write(section->mr, addr1, val, 2);
2517 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2518 ptr = qemu_get_ram_ptr(addr1);
2520 case DEVICE_LITTLE_ENDIAN:
2523 case DEVICE_BIG_ENDIAN:
2530 invalidate_and_set_dirty(addr1, 2);
2534 void stw_phys(hwaddr addr, uint32_t val)
2536 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2539 void stw_le_phys(hwaddr addr, uint32_t val)
2541 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2544 void stw_be_phys(hwaddr addr, uint32_t val)
2546 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2550 void stq_phys(hwaddr addr, uint64_t val)
2553 cpu_physical_memory_write(addr, &val, 8);
2556 void stq_le_phys(hwaddr addr, uint64_t val)
2558 val = cpu_to_le64(val);
2559 cpu_physical_memory_write(addr, &val, 8);
2562 void stq_be_phys(hwaddr addr, uint64_t val)
2564 val = cpu_to_be64(val);
2565 cpu_physical_memory_write(addr, &val, 8);
2568 /* virtual memory access for debug (includes writing to ROM) */
2569 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2570 uint8_t *buf, int len, int is_write)
2577 page = addr & TARGET_PAGE_MASK;
2578 phys_addr = cpu_get_phys_page_debug(env, page);
2579 /* if no physical page mapped, return an error */
2580 if (phys_addr == -1)
2582 l = (page + TARGET_PAGE_SIZE) - addr;
2585 phys_addr += (addr & ~TARGET_PAGE_MASK);
2587 cpu_physical_memory_write_rom(phys_addr, buf, l);
2589 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2598 #if !defined(CONFIG_USER_ONLY)
2601 * A helper function for the _utterly broken_ virtio device model to find out if
2602 * it's running on a big endian machine. Don't do this at home kids!
2604 bool virtio_is_big_endian(void);
2605 bool virtio_is_big_endian(void)
2607 #if defined(TARGET_WORDS_BIGENDIAN)
2616 #ifndef CONFIG_USER_ONLY
2617 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2619 MemoryRegionSection *section;
2622 section = address_space_translate(&address_space_memory,
2623 phys_addr, &phys_addr, &l, false);
2625 return !(memory_region_is_ram(section->mr) ||
2626 memory_region_is_romd(section->mr));