4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include <sys/types.h>
27 #include "qemu-common.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
53 //#define DEBUG_SUBPAGE
55 #if !defined(CONFIG_USER_ONLY)
57 static int in_migration;
59 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
61 static MemoryRegion *system_memory;
62 static MemoryRegion *system_io;
64 AddressSpace address_space_io;
65 AddressSpace address_space_memory;
66 DMAContext dma_context_memory;
68 MemoryRegion io_mem_rom, io_mem_notdirty;
69 static MemoryRegion io_mem_unassigned, io_mem_subpage_ram;
73 CPUArchState *first_cpu;
74 /* current CPU in the current thread. It is only valid inside
76 DEFINE_TLS(CPUArchState *,cpu_single_env);
77 /* 0 = Do not count executed instructions.
78 1 = Precise instruction counting.
79 2 = Adaptive rate instruction counting. */
82 #if !defined(CONFIG_USER_ONLY)
84 static MemoryRegionSection *phys_sections;
85 static unsigned phys_sections_nb, phys_sections_nb_alloc;
86 static uint16_t phys_section_unassigned;
87 static uint16_t phys_section_notdirty;
88 static uint16_t phys_section_rom;
89 static uint16_t phys_section_watch;
91 /* Simple allocator for PhysPageEntry nodes */
92 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
93 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
95 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
97 static void io_mem_init(void);
98 static void memory_map_init(void);
99 static void *qemu_safe_ram_ptr(ram_addr_t addr);
101 static MemoryRegion io_mem_watch;
104 #if !defined(CONFIG_USER_ONLY)
106 static void phys_map_node_reserve(unsigned nodes)
108 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
109 typedef PhysPageEntry Node[L2_SIZE];
110 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
111 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
112 phys_map_nodes_nb + nodes);
113 phys_map_nodes = g_renew(Node, phys_map_nodes,
114 phys_map_nodes_nb_alloc);
118 static uint16_t phys_map_node_alloc(void)
123 ret = phys_map_nodes_nb++;
124 assert(ret != PHYS_MAP_NODE_NIL);
125 assert(ret != phys_map_nodes_nb_alloc);
126 for (i = 0; i < L2_SIZE; ++i) {
127 phys_map_nodes[ret][i].is_leaf = 0;
128 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
133 static void phys_map_nodes_reset(void)
135 phys_map_nodes_nb = 0;
139 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
140 hwaddr *nb, uint16_t leaf,
145 hwaddr step = (hwaddr)1 << (level * L2_BITS);
147 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
148 lp->ptr = phys_map_node_alloc();
149 p = phys_map_nodes[lp->ptr];
151 for (i = 0; i < L2_SIZE; i++) {
153 p[i].ptr = phys_section_unassigned;
157 p = phys_map_nodes[lp->ptr];
159 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
161 while (*nb && lp < &p[L2_SIZE]) {
162 if ((*index & (step - 1)) == 0 && *nb >= step) {
168 phys_page_set_level(lp, index, nb, leaf, level - 1);
174 static void phys_page_set(AddressSpaceDispatch *d,
175 hwaddr index, hwaddr nb,
178 /* Wildly overreserve - it doesn't matter much. */
179 phys_map_node_reserve(3 * P_L2_LEVELS);
181 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
184 static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
186 PhysPageEntry lp = d->phys_map;
190 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
191 if (lp.ptr == PHYS_MAP_NODE_NIL) {
192 return &phys_sections[phys_section_unassigned];
194 p = phys_map_nodes[lp.ptr];
195 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
197 return &phys_sections[lp.ptr];
200 bool memory_region_is_unassigned(MemoryRegion *mr)
202 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
203 && mr != &io_mem_watch;
206 static MemoryRegionSection *address_space_lookup_region(AddressSpace *as,
209 return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
212 MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
213 hwaddr *xlat, hwaddr *plen,
216 MemoryRegionSection *section;
219 section = address_space_lookup_region(as, addr);
220 /* Compute offset within MemoryRegionSection */
221 addr -= section->offset_within_address_space;
223 /* Compute offset within MemoryRegion */
224 *xlat = addr + section->offset_within_region;
226 diff = int128_sub(section->mr->size, int128_make64(addr));
227 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
232 void cpu_exec_init_all(void)
234 #if !defined(CONFIG_USER_ONLY)
235 qemu_mutex_init(&ram_list.mutex);
241 #if !defined(CONFIG_USER_ONLY)
243 static int cpu_common_post_load(void *opaque, int version_id)
245 CPUState *cpu = opaque;
247 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
248 version_id is increased. */
249 cpu->interrupt_request &= ~0x01;
250 tlb_flush(cpu->env_ptr, 1);
255 static const VMStateDescription vmstate_cpu_common = {
256 .name = "cpu_common",
258 .minimum_version_id = 1,
259 .minimum_version_id_old = 1,
260 .post_load = cpu_common_post_load,
261 .fields = (VMStateField []) {
262 VMSTATE_UINT32(halted, CPUState),
263 VMSTATE_UINT32(interrupt_request, CPUState),
264 VMSTATE_END_OF_LIST()
268 #define vmstate_cpu_common vmstate_dummy
271 CPUState *qemu_get_cpu(int index)
273 CPUArchState *env = first_cpu;
274 CPUState *cpu = NULL;
277 cpu = ENV_GET_CPU(env);
278 if (cpu->cpu_index == index) {
284 return env ? cpu : NULL;
287 void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
289 CPUArchState *env = first_cpu;
292 func(ENV_GET_CPU(env), data);
297 void cpu_exec_init(CPUArchState *env)
299 CPUState *cpu = ENV_GET_CPU(env);
300 CPUClass *cc = CPU_GET_CLASS(cpu);
304 #if defined(CONFIG_USER_ONLY)
307 env->next_cpu = NULL;
310 while (*penv != NULL) {
311 penv = &(*penv)->next_cpu;
314 cpu->cpu_index = cpu_index;
316 QTAILQ_INIT(&env->breakpoints);
317 QTAILQ_INIT(&env->watchpoints);
318 #ifndef CONFIG_USER_ONLY
319 cpu->thread_id = qemu_get_thread_id();
322 #if defined(CONFIG_USER_ONLY)
325 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
326 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
327 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
328 cpu_save, cpu_load, env);
329 assert(cc->vmsd == NULL);
331 if (cc->vmsd != NULL) {
332 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
336 #if defined(TARGET_HAS_ICE)
337 #if defined(CONFIG_USER_ONLY)
338 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
340 tb_invalidate_phys_page_range(pc, pc + 1, 0);
343 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
345 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
346 (pc & ~TARGET_PAGE_MASK));
349 #endif /* TARGET_HAS_ICE */
351 #if defined(CONFIG_USER_ONLY)
352 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
357 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
358 int flags, CPUWatchpoint **watchpoint)
363 /* Add a watchpoint. */
364 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
365 int flags, CPUWatchpoint **watchpoint)
367 target_ulong len_mask = ~(len - 1);
370 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
371 if ((len & (len - 1)) || (addr & ~len_mask) ||
372 len == 0 || len > TARGET_PAGE_SIZE) {
373 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
374 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
377 wp = g_malloc(sizeof(*wp));
380 wp->len_mask = len_mask;
383 /* keep all GDB-injected watchpoints in front */
385 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
387 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
389 tlb_flush_page(env, addr);
396 /* Remove a specific watchpoint. */
397 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
400 target_ulong len_mask = ~(len - 1);
403 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
404 if (addr == wp->vaddr && len_mask == wp->len_mask
405 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
406 cpu_watchpoint_remove_by_ref(env, wp);
413 /* Remove a specific watchpoint by reference. */
414 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
416 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
418 tlb_flush_page(env, watchpoint->vaddr);
423 /* Remove all matching watchpoints. */
424 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
426 CPUWatchpoint *wp, *next;
428 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
429 if (wp->flags & mask)
430 cpu_watchpoint_remove_by_ref(env, wp);
435 /* Add a breakpoint. */
436 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
437 CPUBreakpoint **breakpoint)
439 #if defined(TARGET_HAS_ICE)
442 bp = g_malloc(sizeof(*bp));
447 /* keep all GDB-injected breakpoints in front */
449 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
451 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
453 breakpoint_invalidate(env, pc);
463 /* Remove a specific breakpoint. */
464 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
466 #if defined(TARGET_HAS_ICE)
469 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
470 if (bp->pc == pc && bp->flags == flags) {
471 cpu_breakpoint_remove_by_ref(env, bp);
481 /* Remove a specific breakpoint by reference. */
482 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
484 #if defined(TARGET_HAS_ICE)
485 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
487 breakpoint_invalidate(env, breakpoint->pc);
493 /* Remove all matching breakpoints. */
494 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
496 #if defined(TARGET_HAS_ICE)
497 CPUBreakpoint *bp, *next;
499 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
500 if (bp->flags & mask)
501 cpu_breakpoint_remove_by_ref(env, bp);
506 /* enable or disable single step mode. EXCP_DEBUG is returned by the
507 CPU loop after each instruction */
508 void cpu_single_step(CPUArchState *env, int enabled)
510 #if defined(TARGET_HAS_ICE)
511 if (env->singlestep_enabled != enabled) {
512 env->singlestep_enabled = enabled;
514 kvm_update_guest_debug(env, 0);
516 /* must flush all the translated code to avoid inconsistencies */
517 /* XXX: only flush what is necessary */
524 void cpu_exit(CPUArchState *env)
526 CPUState *cpu = ENV_GET_CPU(env);
528 cpu->exit_request = 1;
529 cpu->tcg_exit_req = 1;
532 void cpu_abort(CPUArchState *env, const char *fmt, ...)
539 fprintf(stderr, "qemu: fatal: ");
540 vfprintf(stderr, fmt, ap);
541 fprintf(stderr, "\n");
542 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
543 if (qemu_log_enabled()) {
544 qemu_log("qemu: fatal: ");
545 qemu_log_vprintf(fmt, ap2);
547 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
553 #if defined(CONFIG_USER_ONLY)
555 struct sigaction act;
556 sigfillset(&act.sa_mask);
557 act.sa_handler = SIG_DFL;
558 sigaction(SIGABRT, &act, NULL);
564 CPUArchState *cpu_copy(CPUArchState *env)
566 CPUArchState *new_env = cpu_init(env->cpu_model_str);
567 CPUArchState *next_cpu = new_env->next_cpu;
568 #if defined(TARGET_HAS_ICE)
573 memcpy(new_env, env, sizeof(CPUArchState));
575 /* Preserve chaining. */
576 new_env->next_cpu = next_cpu;
578 /* Clone all break/watchpoints.
579 Note: Once we support ptrace with hw-debug register access, make sure
580 BP_CPU break/watchpoints are handled correctly on clone. */
581 QTAILQ_INIT(&env->breakpoints);
582 QTAILQ_INIT(&env->watchpoints);
583 #if defined(TARGET_HAS_ICE)
584 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
585 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
587 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
588 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
596 #if !defined(CONFIG_USER_ONLY)
597 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
602 /* we modify the TLB cache so that the dirty bit will be set again
603 when accessing the range */
604 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
605 /* Check that we don't span multiple blocks - this breaks the
606 address comparisons below. */
607 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
608 != (end - 1) - start) {
611 cpu_tlb_reset_dirty_all(start1, length);
615 /* Note: start and end must be within the same ram block. */
616 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
621 start &= TARGET_PAGE_MASK;
622 end = TARGET_PAGE_ALIGN(end);
624 length = end - start;
627 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
630 tlb_reset_dirty_range_all(start, end, length);
634 static int cpu_physical_memory_set_dirty_tracking(int enable)
637 in_migration = enable;
641 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
642 MemoryRegionSection *section,
644 hwaddr paddr, hwaddr xlat,
646 target_ulong *address)
651 if (memory_region_is_ram(section->mr)) {
653 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
655 if (!section->readonly) {
656 iotlb |= phys_section_notdirty;
658 iotlb |= phys_section_rom;
661 iotlb = section - phys_sections;
665 /* Make accesses to pages with watchpoints go via the
666 watchpoint trap routines. */
667 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
668 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
669 /* Avoid trapping reads of pages with a write breakpoint. */
670 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
671 iotlb = phys_section_watch + paddr;
672 *address |= TLB_MMIO;
680 #endif /* defined(CONFIG_USER_ONLY) */
682 #if !defined(CONFIG_USER_ONLY)
684 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
685 typedef struct subpage_t {
688 uint16_t sub_section[TARGET_PAGE_SIZE];
691 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
693 static subpage_t *subpage_init(hwaddr base);
694 static void destroy_page_desc(uint16_t section_index)
696 MemoryRegionSection *section = &phys_sections[section_index];
697 MemoryRegion *mr = section->mr;
700 subpage_t *subpage = container_of(mr, subpage_t, iomem);
701 memory_region_destroy(&subpage->iomem);
706 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
711 if (lp->ptr == PHYS_MAP_NODE_NIL) {
715 p = phys_map_nodes[lp->ptr];
716 for (i = 0; i < L2_SIZE; ++i) {
718 destroy_l2_mapping(&p[i], level - 1);
720 destroy_page_desc(p[i].ptr);
724 lp->ptr = PHYS_MAP_NODE_NIL;
727 static void destroy_all_mappings(AddressSpaceDispatch *d)
729 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
730 phys_map_nodes_reset();
733 static uint16_t phys_section_add(MemoryRegionSection *section)
735 /* The physical section number is ORed with a page-aligned
736 * pointer to produce the iotlb entries. Thus it should
737 * never overflow into the page-aligned value.
739 assert(phys_sections_nb < TARGET_PAGE_SIZE);
741 if (phys_sections_nb == phys_sections_nb_alloc) {
742 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
743 phys_sections = g_renew(MemoryRegionSection, phys_sections,
744 phys_sections_nb_alloc);
746 phys_sections[phys_sections_nb] = *section;
747 return phys_sections_nb++;
750 static void phys_sections_clear(void)
752 phys_sections_nb = 0;
755 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
758 hwaddr base = section->offset_within_address_space
760 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
761 MemoryRegionSection subsection = {
762 .offset_within_address_space = base,
763 .size = TARGET_PAGE_SIZE,
767 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
769 if (!(existing->mr->subpage)) {
770 subpage = subpage_init(base);
771 subsection.mr = &subpage->iomem;
772 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
773 phys_section_add(&subsection));
775 subpage = container_of(existing->mr, subpage_t, iomem);
777 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
778 end = start + section->size - 1;
779 subpage_register(subpage, start, end, phys_section_add(section));
783 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
785 hwaddr start_addr = section->offset_within_address_space;
786 ram_addr_t size = section->size;
788 uint16_t section_index = phys_section_add(section);
793 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
797 QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > MAX_PHYS_ADDR_SPACE_BITS)
799 static MemoryRegionSection limit(MemoryRegionSection section)
801 section.size = MIN(section.offset_within_address_space + section.size,
803 - section.offset_within_address_space;
808 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
810 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
811 MemoryRegionSection now = limit(*section), remain = limit(*section);
813 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
814 || (now.size < TARGET_PAGE_SIZE)) {
815 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
816 - now.offset_within_address_space,
818 register_subpage(d, &now);
819 remain.size -= now.size;
820 remain.offset_within_address_space += now.size;
821 remain.offset_within_region += now.size;
823 while (remain.size >= TARGET_PAGE_SIZE) {
825 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
826 now.size = TARGET_PAGE_SIZE;
827 register_subpage(d, &now);
829 now.size &= TARGET_PAGE_MASK;
830 register_multipage(d, &now);
832 remain.size -= now.size;
833 remain.offset_within_address_space += now.size;
834 remain.offset_within_region += now.size;
838 register_subpage(d, &now);
842 void qemu_flush_coalesced_mmio_buffer(void)
845 kvm_flush_coalesced_mmio_buffer();
848 void qemu_mutex_lock_ramlist(void)
850 qemu_mutex_lock(&ram_list.mutex);
853 void qemu_mutex_unlock_ramlist(void)
855 qemu_mutex_unlock(&ram_list.mutex);
858 #if defined(__linux__) && !defined(TARGET_S390X)
862 #define HUGETLBFS_MAGIC 0x958458f6
864 static long gethugepagesize(const char *path)
870 ret = statfs(path, &fs);
871 } while (ret != 0 && errno == EINTR);
878 if (fs.f_type != HUGETLBFS_MAGIC)
879 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
884 static void *file_ram_alloc(RAMBlock *block,
889 char *sanitized_name;
896 unsigned long hpagesize;
898 hpagesize = gethugepagesize(path);
903 if (memory < hpagesize) {
907 if (kvm_enabled() && !kvm_has_sync_mmu()) {
908 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
912 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
913 sanitized_name = g_strdup(block->mr->name);
914 for (c = sanitized_name; *c != '\0'; c++) {
919 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
921 g_free(sanitized_name);
923 fd = mkstemp(filename);
925 perror("unable to create backing store for hugepages");
932 memory = (memory+hpagesize-1) & ~(hpagesize-1);
935 * ftruncate is not supported by hugetlbfs in older
936 * hosts, so don't bother bailing out on errors.
937 * If anything goes wrong with it under other filesystems,
940 if (ftruncate(fd, memory))
944 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
945 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
946 * to sidestep this quirk.
948 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
949 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
951 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
953 if (area == MAP_FAILED) {
954 perror("file_ram_alloc: can't mmap RAM pages");
963 static ram_addr_t find_ram_offset(ram_addr_t size)
965 RAMBlock *block, *next_block;
966 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
968 assert(size != 0); /* it would hand out same offset multiple times */
970 if (QTAILQ_EMPTY(&ram_list.blocks))
973 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
974 ram_addr_t end, next = RAM_ADDR_MAX;
976 end = block->offset + block->length;
978 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
979 if (next_block->offset >= end) {
980 next = MIN(next, next_block->offset);
983 if (next - end >= size && next - end < mingap) {
989 if (offset == RAM_ADDR_MAX) {
990 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
998 ram_addr_t last_ram_offset(void)
1001 ram_addr_t last = 0;
1003 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1004 last = MAX(last, block->offset + block->length);
1009 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1012 QemuOpts *machine_opts;
1014 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1015 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1017 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
1018 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1020 perror("qemu_madvise");
1021 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1022 "but dump_guest_core=off specified\n");
1027 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1029 RAMBlock *new_block, *block;
1032 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1033 if (block->offset == addr) {
1039 assert(!new_block->idstr[0]);
1042 char *id = qdev_get_dev_path(dev);
1044 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1048 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1050 /* This assumes the iothread lock is taken here too. */
1051 qemu_mutex_lock_ramlist();
1052 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1053 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1054 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1059 qemu_mutex_unlock_ramlist();
1062 static int memory_try_enable_merging(void *addr, size_t len)
1066 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1067 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1068 /* disabled by the user */
1072 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1075 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1078 RAMBlock *block, *new_block;
1080 size = TARGET_PAGE_ALIGN(size);
1081 new_block = g_malloc0(sizeof(*new_block));
1083 /* This assumes the iothread lock is taken here too. */
1084 qemu_mutex_lock_ramlist();
1086 new_block->offset = find_ram_offset(size);
1088 new_block->host = host;
1089 new_block->flags |= RAM_PREALLOC_MASK;
1092 #if defined (__linux__) && !defined(TARGET_S390X)
1093 new_block->host = file_ram_alloc(new_block, size, mem_path);
1094 if (!new_block->host) {
1095 new_block->host = qemu_anon_ram_alloc(size);
1096 memory_try_enable_merging(new_block->host, size);
1099 fprintf(stderr, "-mem-path option unsupported\n");
1103 if (xen_enabled()) {
1104 xen_ram_alloc(new_block->offset, size, mr);
1105 } else if (kvm_enabled()) {
1106 /* some s390/kvm configurations have special constraints */
1107 new_block->host = kvm_ram_alloc(size);
1109 new_block->host = qemu_anon_ram_alloc(size);
1111 memory_try_enable_merging(new_block->host, size);
1114 new_block->length = size;
1116 /* Keep the list sorted from biggest to smallest block. */
1117 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1118 if (block->length < new_block->length) {
1123 QTAILQ_INSERT_BEFORE(block, new_block, next);
1125 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1127 ram_list.mru_block = NULL;
1130 qemu_mutex_unlock_ramlist();
1132 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1133 last_ram_offset() >> TARGET_PAGE_BITS);
1134 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1135 0, size >> TARGET_PAGE_BITS);
1136 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1138 qemu_ram_setup_dump(new_block->host, size);
1139 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1142 kvm_setup_guest_memory(new_block->host, size);
1144 return new_block->offset;
1147 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1149 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1152 void qemu_ram_free_from_ptr(ram_addr_t addr)
1156 /* This assumes the iothread lock is taken here too. */
1157 qemu_mutex_lock_ramlist();
1158 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1159 if (addr == block->offset) {
1160 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1161 ram_list.mru_block = NULL;
1167 qemu_mutex_unlock_ramlist();
1170 void qemu_ram_free(ram_addr_t addr)
1174 /* This assumes the iothread lock is taken here too. */
1175 qemu_mutex_lock_ramlist();
1176 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1177 if (addr == block->offset) {
1178 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1179 ram_list.mru_block = NULL;
1181 if (block->flags & RAM_PREALLOC_MASK) {
1183 } else if (mem_path) {
1184 #if defined (__linux__) && !defined(TARGET_S390X)
1186 munmap(block->host, block->length);
1189 qemu_anon_ram_free(block->host, block->length);
1195 if (xen_enabled()) {
1196 xen_invalidate_map_cache_entry(block->host);
1198 qemu_anon_ram_free(block->host, block->length);
1205 qemu_mutex_unlock_ramlist();
1210 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1217 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1218 offset = addr - block->offset;
1219 if (offset < block->length) {
1220 vaddr = block->host + offset;
1221 if (block->flags & RAM_PREALLOC_MASK) {
1225 munmap(vaddr, length);
1227 #if defined(__linux__) && !defined(TARGET_S390X)
1230 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1233 flags |= MAP_PRIVATE;
1235 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1236 flags, block->fd, offset);
1238 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1239 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1246 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1247 flags |= MAP_SHARED | MAP_ANONYMOUS;
1248 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1251 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1252 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1256 if (area != vaddr) {
1257 fprintf(stderr, "Could not remap addr: "
1258 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1262 memory_try_enable_merging(vaddr, length);
1263 qemu_ram_setup_dump(vaddr, length);
1269 #endif /* !_WIN32 */
1271 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1272 With the exception of the softmmu code in this file, this should
1273 only be used for local memory (e.g. video ram) that the device owns,
1274 and knows it isn't going to access beyond the end of the block.
1276 It should not be used for general purpose DMA.
1277 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1279 void *qemu_get_ram_ptr(ram_addr_t addr)
1283 /* The list is protected by the iothread lock here. */
1284 block = ram_list.mru_block;
1285 if (block && addr - block->offset < block->length) {
1288 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1289 if (addr - block->offset < block->length) {
1294 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1298 ram_list.mru_block = block;
1299 if (xen_enabled()) {
1300 /* We need to check if the requested address is in the RAM
1301 * because we don't want to map the entire memory in QEMU.
1302 * In that case just map until the end of the page.
1304 if (block->offset == 0) {
1305 return xen_map_cache(addr, 0, 0);
1306 } else if (block->host == NULL) {
1308 xen_map_cache(block->offset, block->length, 1);
1311 return block->host + (addr - block->offset);
1314 /* Return a host pointer to ram allocated with qemu_ram_alloc. Same as
1315 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1317 * ??? Is this still necessary?
1319 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1323 /* The list is protected by the iothread lock here. */
1324 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1325 if (addr - block->offset < block->length) {
1326 if (xen_enabled()) {
1327 /* We need to check if the requested address is in the RAM
1328 * because we don't want to map the entire memory in QEMU.
1329 * In that case just map until the end of the page.
1331 if (block->offset == 0) {
1332 return xen_map_cache(addr, 0, 0);
1333 } else if (block->host == NULL) {
1335 xen_map_cache(block->offset, block->length, 1);
1338 return block->host + (addr - block->offset);
1342 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1348 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1349 * but takes a size argument */
1350 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1355 if (xen_enabled()) {
1356 return xen_map_cache(addr, *size, 1);
1360 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1361 if (addr - block->offset < block->length) {
1362 if (addr - block->offset + *size > block->length)
1363 *size = block->length - addr + block->offset;
1364 return block->host + (addr - block->offset);
1368 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1373 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1376 uint8_t *host = ptr;
1378 if (xen_enabled()) {
1379 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1383 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1384 /* This case append when the block is not mapped. */
1385 if (block->host == NULL) {
1388 if (host - block->host < block->length) {
1389 *ram_addr = block->offset + (host - block->host);
1397 /* Some of the softmmu routines need to translate from a host pointer
1398 (typically a TLB entry) back to a ram offset. */
1399 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1401 ram_addr_t ram_addr;
1403 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1404 fprintf(stderr, "Bad ram pointer %p\n", ptr);
1410 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1411 uint64_t val, unsigned size)
1414 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1415 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1416 tb_invalidate_phys_page_fast(ram_addr, size);
1417 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1421 stb_p(qemu_get_ram_ptr(ram_addr), val);
1424 stw_p(qemu_get_ram_ptr(ram_addr), val);
1427 stl_p(qemu_get_ram_ptr(ram_addr), val);
1432 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1433 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1434 /* we remove the notdirty callback only if the code has been
1436 if (dirty_flags == 0xff)
1437 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1440 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1441 unsigned size, bool is_write)
1446 static const MemoryRegionOps notdirty_mem_ops = {
1447 .write = notdirty_mem_write,
1448 .valid.accepts = notdirty_mem_accepts,
1449 .endianness = DEVICE_NATIVE_ENDIAN,
1452 /* Generate a debug exception if a watchpoint has been hit. */
1453 static void check_watchpoint(int offset, int len_mask, int flags)
1455 CPUArchState *env = cpu_single_env;
1456 target_ulong pc, cs_base;
1461 if (env->watchpoint_hit) {
1462 /* We re-entered the check after replacing the TB. Now raise
1463 * the debug interrupt so that is will trigger after the
1464 * current instruction. */
1465 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1468 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1469 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1470 if ((vaddr == (wp->vaddr & len_mask) ||
1471 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1472 wp->flags |= BP_WATCHPOINT_HIT;
1473 if (!env->watchpoint_hit) {
1474 env->watchpoint_hit = wp;
1475 tb_check_watchpoint(env);
1476 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1477 env->exception_index = EXCP_DEBUG;
1480 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1481 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1482 cpu_resume_from_signal(env, NULL);
1486 wp->flags &= ~BP_WATCHPOINT_HIT;
1491 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1492 so these check for a hit then pass through to the normal out-of-line
1494 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1497 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1499 case 1: return ldub_phys(addr);
1500 case 2: return lduw_phys(addr);
1501 case 4: return ldl_phys(addr);
1506 static void watch_mem_write(void *opaque, hwaddr addr,
1507 uint64_t val, unsigned size)
1509 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1512 stb_phys(addr, val);
1515 stw_phys(addr, val);
1518 stl_phys(addr, val);
1524 static const MemoryRegionOps watch_mem_ops = {
1525 .read = watch_mem_read,
1526 .write = watch_mem_write,
1527 .endianness = DEVICE_NATIVE_ENDIAN,
1530 static uint64_t subpage_read(void *opaque, hwaddr addr,
1533 subpage_t *mmio = opaque;
1534 unsigned int idx = SUBPAGE_IDX(addr);
1537 MemoryRegionSection *section;
1538 #if defined(DEBUG_SUBPAGE)
1539 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1540 mmio, len, addr, idx);
1543 section = &phys_sections[mmio->sub_section[idx]];
1545 addr -= section->offset_within_address_space;
1546 addr += section->offset_within_region;
1547 io_mem_read(section->mr, addr, &val, len);
1551 static void subpage_write(void *opaque, hwaddr addr,
1552 uint64_t value, unsigned len)
1554 subpage_t *mmio = opaque;
1555 unsigned int idx = SUBPAGE_IDX(addr);
1556 MemoryRegionSection *section;
1557 #if defined(DEBUG_SUBPAGE)
1558 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1559 " idx %d value %"PRIx64"\n",
1560 __func__, mmio, len, addr, idx, value);
1563 section = &phys_sections[mmio->sub_section[idx]];
1565 addr -= section->offset_within_address_space;
1566 addr += section->offset_within_region;
1567 io_mem_write(section->mr, addr, value, len);
1570 static bool subpage_accepts(void *opaque, hwaddr addr,
1571 unsigned size, bool is_write)
1573 subpage_t *mmio = opaque;
1574 unsigned int idx = SUBPAGE_IDX(addr);
1575 MemoryRegionSection *section;
1576 #if defined(DEBUG_SUBPAGE)
1577 printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx
1578 " idx %d\n", __func__, mmio,
1579 is_write ? 'w' : 'r', len, addr, idx);
1582 section = &phys_sections[mmio->sub_section[idx]];
1584 addr -= section->offset_within_address_space;
1585 addr += section->offset_within_region;
1586 return memory_region_access_valid(section->mr, addr, size, is_write);
1589 static const MemoryRegionOps subpage_ops = {
1590 .read = subpage_read,
1591 .write = subpage_write,
1592 .valid.accepts = subpage_accepts,
1593 .endianness = DEVICE_NATIVE_ENDIAN,
1596 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1599 ram_addr_t raddr = addr;
1600 void *ptr = qemu_get_ram_ptr(raddr);
1602 case 1: return ldub_p(ptr);
1603 case 2: return lduw_p(ptr);
1604 case 4: return ldl_p(ptr);
1609 static void subpage_ram_write(void *opaque, hwaddr addr,
1610 uint64_t value, unsigned size)
1612 ram_addr_t raddr = addr;
1613 void *ptr = qemu_get_ram_ptr(raddr);
1615 case 1: return stb_p(ptr, value);
1616 case 2: return stw_p(ptr, value);
1617 case 4: return stl_p(ptr, value);
1622 static const MemoryRegionOps subpage_ram_ops = {
1623 .read = subpage_ram_read,
1624 .write = subpage_ram_write,
1625 .endianness = DEVICE_NATIVE_ENDIAN,
1628 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1633 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1635 idx = SUBPAGE_IDX(start);
1636 eidx = SUBPAGE_IDX(end);
1637 #if defined(DEBUG_SUBPAGE)
1638 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1639 mmio, start, end, idx, eidx, memory);
1641 if (memory_region_is_ram(phys_sections[section].mr)) {
1642 MemoryRegionSection new_section = phys_sections[section];
1643 new_section.mr = &io_mem_subpage_ram;
1644 section = phys_section_add(&new_section);
1646 for (; idx <= eidx; idx++) {
1647 mmio->sub_section[idx] = section;
1653 static subpage_t *subpage_init(hwaddr base)
1657 mmio = g_malloc0(sizeof(subpage_t));
1660 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1661 "subpage", TARGET_PAGE_SIZE);
1662 mmio->iomem.subpage = true;
1663 #if defined(DEBUG_SUBPAGE)
1664 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1665 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1667 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1672 static uint16_t dummy_section(MemoryRegion *mr)
1674 MemoryRegionSection section = {
1676 .offset_within_address_space = 0,
1677 .offset_within_region = 0,
1681 return phys_section_add(§ion);
1684 MemoryRegion *iotlb_to_region(hwaddr index)
1686 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1689 static void io_mem_init(void)
1691 memory_region_init_io(&io_mem_rom, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1692 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1693 "unassigned", UINT64_MAX);
1694 memory_region_init_io(&io_mem_notdirty, ¬dirty_mem_ops, NULL,
1695 "notdirty", UINT64_MAX);
1696 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1697 "subpage-ram", UINT64_MAX);
1698 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1699 "watch", UINT64_MAX);
1702 static void mem_begin(MemoryListener *listener)
1704 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1706 destroy_all_mappings(d);
1707 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1710 static void core_begin(MemoryListener *listener)
1712 phys_sections_clear();
1713 phys_section_unassigned = dummy_section(&io_mem_unassigned);
1714 phys_section_notdirty = dummy_section(&io_mem_notdirty);
1715 phys_section_rom = dummy_section(&io_mem_rom);
1716 phys_section_watch = dummy_section(&io_mem_watch);
1719 static void tcg_commit(MemoryListener *listener)
1723 /* since each CPU stores ram addresses in its TLB cache, we must
1724 reset the modified entries */
1726 for(env = first_cpu; env != NULL; env = env->next_cpu) {
1731 static void core_log_global_start(MemoryListener *listener)
1733 cpu_physical_memory_set_dirty_tracking(1);
1736 static void core_log_global_stop(MemoryListener *listener)
1738 cpu_physical_memory_set_dirty_tracking(0);
1741 static void io_region_add(MemoryListener *listener,
1742 MemoryRegionSection *section)
1744 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1746 mrio->mr = section->mr;
1747 mrio->offset = section->offset_within_region;
1748 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1749 section->offset_within_address_space, section->size);
1750 ioport_register(&mrio->iorange);
1753 static void io_region_del(MemoryListener *listener,
1754 MemoryRegionSection *section)
1756 isa_unassign_ioport(section->offset_within_address_space, section->size);
1759 static MemoryListener core_memory_listener = {
1760 .begin = core_begin,
1761 .log_global_start = core_log_global_start,
1762 .log_global_stop = core_log_global_stop,
1766 static MemoryListener io_memory_listener = {
1767 .region_add = io_region_add,
1768 .region_del = io_region_del,
1772 static MemoryListener tcg_memory_listener = {
1773 .commit = tcg_commit,
1776 void address_space_init_dispatch(AddressSpace *as)
1778 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1780 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1781 d->listener = (MemoryListener) {
1783 .region_add = mem_add,
1784 .region_nop = mem_add,
1788 memory_listener_register(&d->listener, as);
1791 void address_space_destroy_dispatch(AddressSpace *as)
1793 AddressSpaceDispatch *d = as->dispatch;
1795 memory_listener_unregister(&d->listener);
1796 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1798 as->dispatch = NULL;
1801 static void memory_map_init(void)
1803 system_memory = g_malloc(sizeof(*system_memory));
1804 memory_region_init(system_memory, "system", INT64_MAX);
1805 address_space_init(&address_space_memory, system_memory);
1806 address_space_memory.name = "memory";
1808 system_io = g_malloc(sizeof(*system_io));
1809 memory_region_init(system_io, "io", 65536);
1810 address_space_init(&address_space_io, system_io);
1811 address_space_io.name = "I/O";
1813 memory_listener_register(&core_memory_listener, &address_space_memory);
1814 memory_listener_register(&io_memory_listener, &address_space_io);
1815 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1817 dma_context_init(&dma_context_memory, &address_space_memory,
1821 MemoryRegion *get_system_memory(void)
1823 return system_memory;
1826 MemoryRegion *get_system_io(void)
1831 #endif /* !defined(CONFIG_USER_ONLY) */
1833 /* physical memory access (slow version, mainly for debug) */
1834 #if defined(CONFIG_USER_ONLY)
1835 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1836 uint8_t *buf, int len, int is_write)
1843 page = addr & TARGET_PAGE_MASK;
1844 l = (page + TARGET_PAGE_SIZE) - addr;
1847 flags = page_get_flags(page);
1848 if (!(flags & PAGE_VALID))
1851 if (!(flags & PAGE_WRITE))
1853 /* XXX: this code should not depend on lock_user */
1854 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1857 unlock_user(p, addr, l);
1859 if (!(flags & PAGE_READ))
1861 /* XXX: this code should not depend on lock_user */
1862 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1865 unlock_user(p, addr, 0);
1876 static void invalidate_and_set_dirty(hwaddr addr,
1879 if (!cpu_physical_memory_is_dirty(addr)) {
1880 /* invalidate code */
1881 tb_invalidate_phys_page_range(addr, addr + length, 0);
1883 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1885 xen_modified_memory(addr, length);
1888 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1890 if (memory_region_is_ram(mr)) {
1891 return !(is_write && mr->readonly);
1893 if (memory_region_is_romd(mr)) {
1900 static inline int memory_access_size(int l, hwaddr addr)
1902 if (l >= 4 && ((addr & 3) == 0)) {
1905 if (l >= 2 && ((addr & 1) == 0)) {
1911 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1912 int len, bool is_write)
1918 MemoryRegionSection *section;
1923 section = address_space_translate(as, addr, &addr1, &l, is_write);
1926 if (!memory_access_is_direct(section->mr, is_write)) {
1927 l = memory_access_size(l, addr1);
1928 /* XXX: could force cpu_single_env to NULL to avoid
1931 /* 32 bit write access */
1933 error |= io_mem_write(section->mr, addr1, val, 4);
1934 } else if (l == 2) {
1935 /* 16 bit write access */
1937 error |= io_mem_write(section->mr, addr1, val, 2);
1939 /* 8 bit write access */
1941 error |= io_mem_write(section->mr, addr1, val, 1);
1944 addr1 += memory_region_get_ram_addr(section->mr);
1946 ptr = qemu_get_ram_ptr(addr1);
1947 memcpy(ptr, buf, l);
1948 invalidate_and_set_dirty(addr1, l);
1951 if (!memory_access_is_direct(section->mr, is_write)) {
1953 l = memory_access_size(l, addr1);
1955 /* 32 bit read access */
1956 error |= io_mem_read(section->mr, addr1, &val, 4);
1958 } else if (l == 2) {
1959 /* 16 bit read access */
1960 error |= io_mem_read(section->mr, addr1, &val, 2);
1963 /* 8 bit read access */
1964 error |= io_mem_read(section->mr, addr1, &val, 1);
1969 ptr = qemu_get_ram_ptr(section->mr->ram_addr + addr1);
1970 memcpy(buf, ptr, l);
1981 bool address_space_write(AddressSpace *as, hwaddr addr,
1982 const uint8_t *buf, int len)
1984 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
1987 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1989 return address_space_rw(as, addr, buf, len, false);
1993 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1994 int len, int is_write)
1996 address_space_rw(&address_space_memory, addr, buf, len, is_write);
1999 /* used for ROM loading : can write in RAM and ROM */
2000 void cpu_physical_memory_write_rom(hwaddr addr,
2001 const uint8_t *buf, int len)
2006 MemoryRegionSection *section;
2010 section = address_space_translate(&address_space_memory,
2011 addr, &addr1, &l, true);
2013 if (!(memory_region_is_ram(section->mr) ||
2014 memory_region_is_romd(section->mr))) {
2017 addr1 += memory_region_get_ram_addr(section->mr);
2019 ptr = qemu_get_ram_ptr(addr1);
2020 memcpy(ptr, buf, l);
2021 invalidate_and_set_dirty(addr1, l);
2035 static BounceBuffer bounce;
2037 typedef struct MapClient {
2039 void (*callback)(void *opaque);
2040 QLIST_ENTRY(MapClient) link;
2043 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2044 = QLIST_HEAD_INITIALIZER(map_client_list);
2046 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2048 MapClient *client = g_malloc(sizeof(*client));
2050 client->opaque = opaque;
2051 client->callback = callback;
2052 QLIST_INSERT_HEAD(&map_client_list, client, link);
2056 static void cpu_unregister_map_client(void *_client)
2058 MapClient *client = (MapClient *)_client;
2060 QLIST_REMOVE(client, link);
2064 static void cpu_notify_map_clients(void)
2068 while (!QLIST_EMPTY(&map_client_list)) {
2069 client = QLIST_FIRST(&map_client_list);
2070 client->callback(client->opaque);
2071 cpu_unregister_map_client(client);
2075 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2077 MemoryRegionSection *section;
2082 section = address_space_translate(as, addr, &xlat, &l, is_write);
2083 if (!memory_access_is_direct(section->mr, is_write)) {
2084 l = memory_access_size(l, addr);
2085 if (!memory_region_access_valid(section->mr, xlat, l, is_write)) {
2096 /* Map a physical memory region into a host virtual address.
2097 * May map a subset of the requested range, given by and returned in *plen.
2098 * May return NULL if resources needed to perform the mapping are exhausted.
2099 * Use only for reads OR writes - not for read-modify-write operations.
2100 * Use cpu_register_map_client() to know when retrying the map operation is
2101 * likely to succeed.
2103 void *address_space_map(AddressSpace *as,
2111 MemoryRegionSection *section;
2112 ram_addr_t raddr = RAM_ADDR_MAX;
2118 section = address_space_translate(as, addr, &xlat, &l, is_write);
2120 if (!memory_access_is_direct(section->mr, is_write)) {
2121 if (todo || bounce.buffer) {
2124 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2128 address_space_read(as, addr, bounce.buffer, l);
2132 return bounce.buffer;
2135 raddr = memory_region_get_ram_addr(section->mr) + xlat;
2137 if (memory_region_get_ram_addr(section->mr) + xlat != raddr + todo) {
2147 ret = qemu_ram_ptr_length(raddr, &rlen);
2152 /* Unmaps a memory region previously mapped by address_space_map().
2153 * Will also mark the memory as dirty if is_write == 1. access_len gives
2154 * the amount of memory that was actually read or written by the caller.
2156 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2157 int is_write, hwaddr access_len)
2159 if (buffer != bounce.buffer) {
2161 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2162 while (access_len) {
2164 l = TARGET_PAGE_SIZE;
2167 invalidate_and_set_dirty(addr1, l);
2172 if (xen_enabled()) {
2173 xen_invalidate_map_cache_entry(buffer);
2178 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2180 qemu_vfree(bounce.buffer);
2181 bounce.buffer = NULL;
2182 cpu_notify_map_clients();
2185 void *cpu_physical_memory_map(hwaddr addr,
2189 return address_space_map(&address_space_memory, addr, plen, is_write);
2192 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2193 int is_write, hwaddr access_len)
2195 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2198 /* warning: addr must be aligned */
2199 static inline uint32_t ldl_phys_internal(hwaddr addr,
2200 enum device_endian endian)
2204 MemoryRegionSection *section;
2208 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2210 if (l < 4 || !memory_access_is_direct(section->mr, false)) {
2212 io_mem_read(section->mr, addr1, &val, 4);
2213 #if defined(TARGET_WORDS_BIGENDIAN)
2214 if (endian == DEVICE_LITTLE_ENDIAN) {
2218 if (endian == DEVICE_BIG_ENDIAN) {
2224 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2228 case DEVICE_LITTLE_ENDIAN:
2229 val = ldl_le_p(ptr);
2231 case DEVICE_BIG_ENDIAN:
2232 val = ldl_be_p(ptr);
2242 uint32_t ldl_phys(hwaddr addr)
2244 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2247 uint32_t ldl_le_phys(hwaddr addr)
2249 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2252 uint32_t ldl_be_phys(hwaddr addr)
2254 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2257 /* warning: addr must be aligned */
2258 static inline uint64_t ldq_phys_internal(hwaddr addr,
2259 enum device_endian endian)
2263 MemoryRegionSection *section;
2267 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2269 if (l < 8 || !memory_access_is_direct(section->mr, false)) {
2271 io_mem_read(section->mr, addr1, &val, 8);
2272 #if defined(TARGET_WORDS_BIGENDIAN)
2273 if (endian == DEVICE_LITTLE_ENDIAN) {
2277 if (endian == DEVICE_BIG_ENDIAN) {
2283 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2287 case DEVICE_LITTLE_ENDIAN:
2288 val = ldq_le_p(ptr);
2290 case DEVICE_BIG_ENDIAN:
2291 val = ldq_be_p(ptr);
2301 uint64_t ldq_phys(hwaddr addr)
2303 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2306 uint64_t ldq_le_phys(hwaddr addr)
2308 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2311 uint64_t ldq_be_phys(hwaddr addr)
2313 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2317 uint32_t ldub_phys(hwaddr addr)
2320 cpu_physical_memory_read(addr, &val, 1);
2324 /* warning: addr must be aligned */
2325 static inline uint32_t lduw_phys_internal(hwaddr addr,
2326 enum device_endian endian)
2330 MemoryRegionSection *section;
2334 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2336 if (l < 2 || !memory_access_is_direct(section->mr, false)) {
2338 io_mem_read(section->mr, addr1, &val, 2);
2339 #if defined(TARGET_WORDS_BIGENDIAN)
2340 if (endian == DEVICE_LITTLE_ENDIAN) {
2344 if (endian == DEVICE_BIG_ENDIAN) {
2350 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2354 case DEVICE_LITTLE_ENDIAN:
2355 val = lduw_le_p(ptr);
2357 case DEVICE_BIG_ENDIAN:
2358 val = lduw_be_p(ptr);
2368 uint32_t lduw_phys(hwaddr addr)
2370 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2373 uint32_t lduw_le_phys(hwaddr addr)
2375 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2378 uint32_t lduw_be_phys(hwaddr addr)
2380 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2383 /* warning: addr must be aligned. The ram page is not masked as dirty
2384 and the code inside is not invalidated. It is useful if the dirty
2385 bits are used to track modified PTEs */
2386 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2389 MemoryRegionSection *section;
2393 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2395 if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2396 io_mem_write(section->mr, addr1, val, 4);
2398 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2399 ptr = qemu_get_ram_ptr(addr1);
2402 if (unlikely(in_migration)) {
2403 if (!cpu_physical_memory_is_dirty(addr1)) {
2404 /* invalidate code */
2405 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2407 cpu_physical_memory_set_dirty_flags(
2408 addr1, (0xff & ~CODE_DIRTY_FLAG));
2414 /* warning: addr must be aligned */
2415 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2416 enum device_endian endian)
2419 MemoryRegionSection *section;
2423 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2425 if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2426 #if defined(TARGET_WORDS_BIGENDIAN)
2427 if (endian == DEVICE_LITTLE_ENDIAN) {
2431 if (endian == DEVICE_BIG_ENDIAN) {
2435 io_mem_write(section->mr, addr1, val, 4);
2438 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2439 ptr = qemu_get_ram_ptr(addr1);
2441 case DEVICE_LITTLE_ENDIAN:
2444 case DEVICE_BIG_ENDIAN:
2451 invalidate_and_set_dirty(addr1, 4);
2455 void stl_phys(hwaddr addr, uint32_t val)
2457 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2460 void stl_le_phys(hwaddr addr, uint32_t val)
2462 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2465 void stl_be_phys(hwaddr addr, uint32_t val)
2467 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2471 void stb_phys(hwaddr addr, uint32_t val)
2474 cpu_physical_memory_write(addr, &v, 1);
2477 /* warning: addr must be aligned */
2478 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2479 enum device_endian endian)
2482 MemoryRegionSection *section;
2486 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2488 if (l < 2 || !memory_access_is_direct(section->mr, true)) {
2489 #if defined(TARGET_WORDS_BIGENDIAN)
2490 if (endian == DEVICE_LITTLE_ENDIAN) {
2494 if (endian == DEVICE_BIG_ENDIAN) {
2498 io_mem_write(section->mr, addr1, val, 2);
2501 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2502 ptr = qemu_get_ram_ptr(addr1);
2504 case DEVICE_LITTLE_ENDIAN:
2507 case DEVICE_BIG_ENDIAN:
2514 invalidate_and_set_dirty(addr1, 2);
2518 void stw_phys(hwaddr addr, uint32_t val)
2520 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2523 void stw_le_phys(hwaddr addr, uint32_t val)
2525 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2528 void stw_be_phys(hwaddr addr, uint32_t val)
2530 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2534 void stq_phys(hwaddr addr, uint64_t val)
2537 cpu_physical_memory_write(addr, &val, 8);
2540 void stq_le_phys(hwaddr addr, uint64_t val)
2542 val = cpu_to_le64(val);
2543 cpu_physical_memory_write(addr, &val, 8);
2546 void stq_be_phys(hwaddr addr, uint64_t val)
2548 val = cpu_to_be64(val);
2549 cpu_physical_memory_write(addr, &val, 8);
2552 /* virtual memory access for debug (includes writing to ROM) */
2553 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2554 uint8_t *buf, int len, int is_write)
2561 page = addr & TARGET_PAGE_MASK;
2562 phys_addr = cpu_get_phys_page_debug(env, page);
2563 /* if no physical page mapped, return an error */
2564 if (phys_addr == -1)
2566 l = (page + TARGET_PAGE_SIZE) - addr;
2569 phys_addr += (addr & ~TARGET_PAGE_MASK);
2571 cpu_physical_memory_write_rom(phys_addr, buf, l);
2573 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2582 #if !defined(CONFIG_USER_ONLY)
2585 * A helper function for the _utterly broken_ virtio device model to find out if
2586 * it's running on a big endian machine. Don't do this at home kids!
2588 bool virtio_is_big_endian(void);
2589 bool virtio_is_big_endian(void)
2591 #if defined(TARGET_WORDS_BIGENDIAN)
2600 #ifndef CONFIG_USER_ONLY
2601 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2603 MemoryRegionSection *section;
2606 section = address_space_translate(&address_space_memory,
2607 phys_addr, &phys_addr, &l, false);
2609 return !(memory_region_is_ram(section->mr) ||
2610 memory_region_is_romd(section->mr));