4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include <sys/types.h>
27 #include "qemu-common.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
53 //#define DEBUG_SUBPAGE
55 #if !defined(CONFIG_USER_ONLY)
57 static int in_migration;
59 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
61 static MemoryRegion *system_memory;
62 static MemoryRegion *system_io;
64 AddressSpace address_space_io;
65 AddressSpace address_space_memory;
66 DMAContext dma_context_memory;
68 MemoryRegion io_mem_rom, io_mem_notdirty;
69 static MemoryRegion io_mem_unassigned, io_mem_subpage_ram;
73 CPUArchState *first_cpu;
74 /* current CPU in the current thread. It is only valid inside
76 DEFINE_TLS(CPUArchState *,cpu_single_env);
77 /* 0 = Do not count executed instructions.
78 1 = Precise instruction counting.
79 2 = Adaptive rate instruction counting. */
82 #if !defined(CONFIG_USER_ONLY)
84 static MemoryRegionSection *phys_sections;
85 static unsigned phys_sections_nb, phys_sections_nb_alloc;
86 static uint16_t phys_section_unassigned;
87 static uint16_t phys_section_notdirty;
88 static uint16_t phys_section_rom;
89 static uint16_t phys_section_watch;
91 /* Simple allocator for PhysPageEntry nodes */
92 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
93 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
95 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
97 static void io_mem_init(void);
98 static void memory_map_init(void);
99 static void *qemu_safe_ram_ptr(ram_addr_t addr);
101 static MemoryRegion io_mem_watch;
104 #if !defined(CONFIG_USER_ONLY)
106 static void phys_map_node_reserve(unsigned nodes)
108 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
109 typedef PhysPageEntry Node[L2_SIZE];
110 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
111 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
112 phys_map_nodes_nb + nodes);
113 phys_map_nodes = g_renew(Node, phys_map_nodes,
114 phys_map_nodes_nb_alloc);
118 static uint16_t phys_map_node_alloc(void)
123 ret = phys_map_nodes_nb++;
124 assert(ret != PHYS_MAP_NODE_NIL);
125 assert(ret != phys_map_nodes_nb_alloc);
126 for (i = 0; i < L2_SIZE; ++i) {
127 phys_map_nodes[ret][i].is_leaf = 0;
128 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
133 static void phys_map_nodes_reset(void)
135 phys_map_nodes_nb = 0;
139 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
140 hwaddr *nb, uint16_t leaf,
145 hwaddr step = (hwaddr)1 << (level * L2_BITS);
147 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
148 lp->ptr = phys_map_node_alloc();
149 p = phys_map_nodes[lp->ptr];
151 for (i = 0; i < L2_SIZE; i++) {
153 p[i].ptr = phys_section_unassigned;
157 p = phys_map_nodes[lp->ptr];
159 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
161 while (*nb && lp < &p[L2_SIZE]) {
162 if ((*index & (step - 1)) == 0 && *nb >= step) {
168 phys_page_set_level(lp, index, nb, leaf, level - 1);
174 static void phys_page_set(AddressSpaceDispatch *d,
175 hwaddr index, hwaddr nb,
178 /* Wildly overreserve - it doesn't matter much. */
179 phys_map_node_reserve(3 * P_L2_LEVELS);
181 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
184 static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
186 PhysPageEntry lp = d->phys_map;
190 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
191 if (lp.ptr == PHYS_MAP_NODE_NIL) {
192 return &phys_sections[phys_section_unassigned];
194 p = phys_map_nodes[lp.ptr];
195 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
197 return &phys_sections[lp.ptr];
200 bool memory_region_is_unassigned(MemoryRegion *mr)
202 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
203 && mr != &io_mem_watch;
206 MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
207 hwaddr *xlat, hwaddr *plen,
210 MemoryRegionSection *section;
213 section = phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
214 /* Compute offset within MemoryRegionSection */
215 addr -= section->offset_within_address_space;
217 /* Compute offset within MemoryRegion */
218 *xlat = addr + section->offset_within_region;
220 diff = int128_sub(section->mr->size, int128_make64(addr));
221 *plen = MIN(int128_get64(diff), *plen);
226 void cpu_exec_init_all(void)
228 #if !defined(CONFIG_USER_ONLY)
229 qemu_mutex_init(&ram_list.mutex);
235 #if !defined(CONFIG_USER_ONLY)
237 static int cpu_common_post_load(void *opaque, int version_id)
239 CPUState *cpu = opaque;
241 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
242 version_id is increased. */
243 cpu->interrupt_request &= ~0x01;
244 tlb_flush(cpu->env_ptr, 1);
249 static const VMStateDescription vmstate_cpu_common = {
250 .name = "cpu_common",
252 .minimum_version_id = 1,
253 .minimum_version_id_old = 1,
254 .post_load = cpu_common_post_load,
255 .fields = (VMStateField []) {
256 VMSTATE_UINT32(halted, CPUState),
257 VMSTATE_UINT32(interrupt_request, CPUState),
258 VMSTATE_END_OF_LIST()
262 #define vmstate_cpu_common vmstate_dummy
265 CPUState *qemu_get_cpu(int index)
267 CPUArchState *env = first_cpu;
268 CPUState *cpu = NULL;
271 cpu = ENV_GET_CPU(env);
272 if (cpu->cpu_index == index) {
278 return env ? cpu : NULL;
281 void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
283 CPUArchState *env = first_cpu;
286 func(ENV_GET_CPU(env), data);
291 void cpu_exec_init(CPUArchState *env)
293 CPUState *cpu = ENV_GET_CPU(env);
294 CPUClass *cc = CPU_GET_CLASS(cpu);
298 #if defined(CONFIG_USER_ONLY)
301 env->next_cpu = NULL;
304 while (*penv != NULL) {
305 penv = &(*penv)->next_cpu;
308 cpu->cpu_index = cpu_index;
310 QTAILQ_INIT(&env->breakpoints);
311 QTAILQ_INIT(&env->watchpoints);
312 #ifndef CONFIG_USER_ONLY
313 cpu->thread_id = qemu_get_thread_id();
316 #if defined(CONFIG_USER_ONLY)
319 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
320 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
321 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
322 cpu_save, cpu_load, env);
323 assert(cc->vmsd == NULL);
325 if (cc->vmsd != NULL) {
326 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
330 #if defined(TARGET_HAS_ICE)
331 #if defined(CONFIG_USER_ONLY)
332 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
334 tb_invalidate_phys_page_range(pc, pc + 1, 0);
337 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
339 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
340 (pc & ~TARGET_PAGE_MASK));
343 #endif /* TARGET_HAS_ICE */
345 #if defined(CONFIG_USER_ONLY)
346 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
351 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
352 int flags, CPUWatchpoint **watchpoint)
357 /* Add a watchpoint. */
358 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
359 int flags, CPUWatchpoint **watchpoint)
361 target_ulong len_mask = ~(len - 1);
364 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
365 if ((len & (len - 1)) || (addr & ~len_mask) ||
366 len == 0 || len > TARGET_PAGE_SIZE) {
367 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
368 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
371 wp = g_malloc(sizeof(*wp));
374 wp->len_mask = len_mask;
377 /* keep all GDB-injected watchpoints in front */
379 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
381 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
383 tlb_flush_page(env, addr);
390 /* Remove a specific watchpoint. */
391 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
394 target_ulong len_mask = ~(len - 1);
397 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
398 if (addr == wp->vaddr && len_mask == wp->len_mask
399 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
400 cpu_watchpoint_remove_by_ref(env, wp);
407 /* Remove a specific watchpoint by reference. */
408 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
410 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
412 tlb_flush_page(env, watchpoint->vaddr);
417 /* Remove all matching watchpoints. */
418 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
420 CPUWatchpoint *wp, *next;
422 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
423 if (wp->flags & mask)
424 cpu_watchpoint_remove_by_ref(env, wp);
429 /* Add a breakpoint. */
430 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
431 CPUBreakpoint **breakpoint)
433 #if defined(TARGET_HAS_ICE)
436 bp = g_malloc(sizeof(*bp));
441 /* keep all GDB-injected breakpoints in front */
443 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
445 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
447 breakpoint_invalidate(env, pc);
457 /* Remove a specific breakpoint. */
458 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
460 #if defined(TARGET_HAS_ICE)
463 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
464 if (bp->pc == pc && bp->flags == flags) {
465 cpu_breakpoint_remove_by_ref(env, bp);
475 /* Remove a specific breakpoint by reference. */
476 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
478 #if defined(TARGET_HAS_ICE)
479 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
481 breakpoint_invalidate(env, breakpoint->pc);
487 /* Remove all matching breakpoints. */
488 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
490 #if defined(TARGET_HAS_ICE)
491 CPUBreakpoint *bp, *next;
493 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
494 if (bp->flags & mask)
495 cpu_breakpoint_remove_by_ref(env, bp);
500 /* enable or disable single step mode. EXCP_DEBUG is returned by the
501 CPU loop after each instruction */
502 void cpu_single_step(CPUArchState *env, int enabled)
504 #if defined(TARGET_HAS_ICE)
505 if (env->singlestep_enabled != enabled) {
506 env->singlestep_enabled = enabled;
508 kvm_update_guest_debug(env, 0);
510 /* must flush all the translated code to avoid inconsistencies */
511 /* XXX: only flush what is necessary */
518 void cpu_exit(CPUArchState *env)
520 CPUState *cpu = ENV_GET_CPU(env);
522 cpu->exit_request = 1;
523 cpu->tcg_exit_req = 1;
526 void cpu_abort(CPUArchState *env, const char *fmt, ...)
533 fprintf(stderr, "qemu: fatal: ");
534 vfprintf(stderr, fmt, ap);
535 fprintf(stderr, "\n");
536 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
537 if (qemu_log_enabled()) {
538 qemu_log("qemu: fatal: ");
539 qemu_log_vprintf(fmt, ap2);
541 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
547 #if defined(CONFIG_USER_ONLY)
549 struct sigaction act;
550 sigfillset(&act.sa_mask);
551 act.sa_handler = SIG_DFL;
552 sigaction(SIGABRT, &act, NULL);
558 CPUArchState *cpu_copy(CPUArchState *env)
560 CPUArchState *new_env = cpu_init(env->cpu_model_str);
561 CPUArchState *next_cpu = new_env->next_cpu;
562 #if defined(TARGET_HAS_ICE)
567 memcpy(new_env, env, sizeof(CPUArchState));
569 /* Preserve chaining. */
570 new_env->next_cpu = next_cpu;
572 /* Clone all break/watchpoints.
573 Note: Once we support ptrace with hw-debug register access, make sure
574 BP_CPU break/watchpoints are handled correctly on clone. */
575 QTAILQ_INIT(&env->breakpoints);
576 QTAILQ_INIT(&env->watchpoints);
577 #if defined(TARGET_HAS_ICE)
578 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
579 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
581 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
582 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
590 #if !defined(CONFIG_USER_ONLY)
591 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
596 /* we modify the TLB cache so that the dirty bit will be set again
597 when accessing the range */
598 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
599 /* Check that we don't span multiple blocks - this breaks the
600 address comparisons below. */
601 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
602 != (end - 1) - start) {
605 cpu_tlb_reset_dirty_all(start1, length);
609 /* Note: start and end must be within the same ram block. */
610 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
615 start &= TARGET_PAGE_MASK;
616 end = TARGET_PAGE_ALIGN(end);
618 length = end - start;
621 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
624 tlb_reset_dirty_range_all(start, end, length);
628 static int cpu_physical_memory_set_dirty_tracking(int enable)
631 in_migration = enable;
635 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
636 MemoryRegionSection *section,
638 hwaddr paddr, hwaddr xlat,
640 target_ulong *address)
645 if (memory_region_is_ram(section->mr)) {
647 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
649 if (!section->readonly) {
650 iotlb |= phys_section_notdirty;
652 iotlb |= phys_section_rom;
655 iotlb = section - phys_sections;
659 /* Make accesses to pages with watchpoints go via the
660 watchpoint trap routines. */
661 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
662 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
663 /* Avoid trapping reads of pages with a write breakpoint. */
664 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
665 iotlb = phys_section_watch + paddr;
666 *address |= TLB_MMIO;
674 #endif /* defined(CONFIG_USER_ONLY) */
676 #if !defined(CONFIG_USER_ONLY)
678 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
679 typedef struct subpage_t {
682 uint16_t sub_section[TARGET_PAGE_SIZE];
685 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
687 static subpage_t *subpage_init(hwaddr base);
688 static void destroy_page_desc(uint16_t section_index)
690 MemoryRegionSection *section = &phys_sections[section_index];
691 MemoryRegion *mr = section->mr;
694 subpage_t *subpage = container_of(mr, subpage_t, iomem);
695 memory_region_destroy(&subpage->iomem);
700 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
705 if (lp->ptr == PHYS_MAP_NODE_NIL) {
709 p = phys_map_nodes[lp->ptr];
710 for (i = 0; i < L2_SIZE; ++i) {
712 destroy_l2_mapping(&p[i], level - 1);
714 destroy_page_desc(p[i].ptr);
718 lp->ptr = PHYS_MAP_NODE_NIL;
721 static void destroy_all_mappings(AddressSpaceDispatch *d)
723 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
724 phys_map_nodes_reset();
727 static uint16_t phys_section_add(MemoryRegionSection *section)
729 /* The physical section number is ORed with a page-aligned
730 * pointer to produce the iotlb entries. Thus it should
731 * never overflow into the page-aligned value.
733 assert(phys_sections_nb < TARGET_PAGE_SIZE);
735 if (phys_sections_nb == phys_sections_nb_alloc) {
736 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
737 phys_sections = g_renew(MemoryRegionSection, phys_sections,
738 phys_sections_nb_alloc);
740 phys_sections[phys_sections_nb] = *section;
741 return phys_sections_nb++;
744 static void phys_sections_clear(void)
746 phys_sections_nb = 0;
749 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
752 hwaddr base = section->offset_within_address_space
754 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
755 MemoryRegionSection subsection = {
756 .offset_within_address_space = base,
757 .size = TARGET_PAGE_SIZE,
761 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
763 if (!(existing->mr->subpage)) {
764 subpage = subpage_init(base);
765 subsection.mr = &subpage->iomem;
766 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
767 phys_section_add(&subsection));
769 subpage = container_of(existing->mr, subpage_t, iomem);
771 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
772 end = start + section->size - 1;
773 subpage_register(subpage, start, end, phys_section_add(section));
777 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
779 hwaddr start_addr = section->offset_within_address_space;
780 ram_addr_t size = section->size;
782 uint16_t section_index = phys_section_add(section);
787 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
791 QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > MAX_PHYS_ADDR_SPACE_BITS)
793 static MemoryRegionSection limit(MemoryRegionSection section)
795 section.size = MIN(section.offset_within_address_space + section.size,
797 - section.offset_within_address_space;
802 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
804 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
805 MemoryRegionSection now = limit(*section), remain = limit(*section);
807 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
808 || (now.size < TARGET_PAGE_SIZE)) {
809 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
810 - now.offset_within_address_space,
812 register_subpage(d, &now);
813 remain.size -= now.size;
814 remain.offset_within_address_space += now.size;
815 remain.offset_within_region += now.size;
817 while (remain.size >= TARGET_PAGE_SIZE) {
819 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
820 now.size = TARGET_PAGE_SIZE;
821 register_subpage(d, &now);
823 now.size &= TARGET_PAGE_MASK;
824 register_multipage(d, &now);
826 remain.size -= now.size;
827 remain.offset_within_address_space += now.size;
828 remain.offset_within_region += now.size;
832 register_subpage(d, &now);
836 void qemu_flush_coalesced_mmio_buffer(void)
839 kvm_flush_coalesced_mmio_buffer();
842 void qemu_mutex_lock_ramlist(void)
844 qemu_mutex_lock(&ram_list.mutex);
847 void qemu_mutex_unlock_ramlist(void)
849 qemu_mutex_unlock(&ram_list.mutex);
852 #if defined(__linux__) && !defined(TARGET_S390X)
856 #define HUGETLBFS_MAGIC 0x958458f6
858 static long gethugepagesize(const char *path)
864 ret = statfs(path, &fs);
865 } while (ret != 0 && errno == EINTR);
872 if (fs.f_type != HUGETLBFS_MAGIC)
873 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
878 static void *file_ram_alloc(RAMBlock *block,
883 char *sanitized_name;
890 unsigned long hpagesize;
892 hpagesize = gethugepagesize(path);
897 if (memory < hpagesize) {
901 if (kvm_enabled() && !kvm_has_sync_mmu()) {
902 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
906 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
907 sanitized_name = g_strdup(block->mr->name);
908 for (c = sanitized_name; *c != '\0'; c++) {
913 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
915 g_free(sanitized_name);
917 fd = mkstemp(filename);
919 perror("unable to create backing store for hugepages");
926 memory = (memory+hpagesize-1) & ~(hpagesize-1);
929 * ftruncate is not supported by hugetlbfs in older
930 * hosts, so don't bother bailing out on errors.
931 * If anything goes wrong with it under other filesystems,
934 if (ftruncate(fd, memory))
938 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
939 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
940 * to sidestep this quirk.
942 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
943 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
945 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
947 if (area == MAP_FAILED) {
948 perror("file_ram_alloc: can't mmap RAM pages");
957 static ram_addr_t find_ram_offset(ram_addr_t size)
959 RAMBlock *block, *next_block;
960 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
962 assert(size != 0); /* it would hand out same offset multiple times */
964 if (QTAILQ_EMPTY(&ram_list.blocks))
967 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
968 ram_addr_t end, next = RAM_ADDR_MAX;
970 end = block->offset + block->length;
972 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
973 if (next_block->offset >= end) {
974 next = MIN(next, next_block->offset);
977 if (next - end >= size && next - end < mingap) {
983 if (offset == RAM_ADDR_MAX) {
984 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
992 ram_addr_t last_ram_offset(void)
997 QTAILQ_FOREACH(block, &ram_list.blocks, next)
998 last = MAX(last, block->offset + block->length);
1003 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1006 QemuOpts *machine_opts;
1008 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1009 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1011 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
1012 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1014 perror("qemu_madvise");
1015 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1016 "but dump_guest_core=off specified\n");
1021 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1023 RAMBlock *new_block, *block;
1026 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1027 if (block->offset == addr) {
1033 assert(!new_block->idstr[0]);
1036 char *id = qdev_get_dev_path(dev);
1038 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1042 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1044 /* This assumes the iothread lock is taken here too. */
1045 qemu_mutex_lock_ramlist();
1046 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1047 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1048 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1053 qemu_mutex_unlock_ramlist();
1056 static int memory_try_enable_merging(void *addr, size_t len)
1060 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1061 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1062 /* disabled by the user */
1066 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1069 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1072 RAMBlock *block, *new_block;
1074 size = TARGET_PAGE_ALIGN(size);
1075 new_block = g_malloc0(sizeof(*new_block));
1077 /* This assumes the iothread lock is taken here too. */
1078 qemu_mutex_lock_ramlist();
1080 new_block->offset = find_ram_offset(size);
1082 new_block->host = host;
1083 new_block->flags |= RAM_PREALLOC_MASK;
1086 #if defined (__linux__) && !defined(TARGET_S390X)
1087 new_block->host = file_ram_alloc(new_block, size, mem_path);
1088 if (!new_block->host) {
1089 new_block->host = qemu_anon_ram_alloc(size);
1090 memory_try_enable_merging(new_block->host, size);
1093 fprintf(stderr, "-mem-path option unsupported\n");
1097 if (xen_enabled()) {
1098 xen_ram_alloc(new_block->offset, size, mr);
1099 } else if (kvm_enabled()) {
1100 /* some s390/kvm configurations have special constraints */
1101 new_block->host = kvm_ram_alloc(size);
1103 new_block->host = qemu_anon_ram_alloc(size);
1105 memory_try_enable_merging(new_block->host, size);
1108 new_block->length = size;
1110 /* Keep the list sorted from biggest to smallest block. */
1111 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1112 if (block->length < new_block->length) {
1117 QTAILQ_INSERT_BEFORE(block, new_block, next);
1119 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1121 ram_list.mru_block = NULL;
1124 qemu_mutex_unlock_ramlist();
1126 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1127 last_ram_offset() >> TARGET_PAGE_BITS);
1128 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1129 0, size >> TARGET_PAGE_BITS);
1130 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1132 qemu_ram_setup_dump(new_block->host, size);
1133 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1136 kvm_setup_guest_memory(new_block->host, size);
1138 return new_block->offset;
1141 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1143 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1146 void qemu_ram_free_from_ptr(ram_addr_t addr)
1150 /* This assumes the iothread lock is taken here too. */
1151 qemu_mutex_lock_ramlist();
1152 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1153 if (addr == block->offset) {
1154 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1155 ram_list.mru_block = NULL;
1161 qemu_mutex_unlock_ramlist();
1164 void qemu_ram_free(ram_addr_t addr)
1168 /* This assumes the iothread lock is taken here too. */
1169 qemu_mutex_lock_ramlist();
1170 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1171 if (addr == block->offset) {
1172 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1173 ram_list.mru_block = NULL;
1175 if (block->flags & RAM_PREALLOC_MASK) {
1177 } else if (mem_path) {
1178 #if defined (__linux__) && !defined(TARGET_S390X)
1180 munmap(block->host, block->length);
1183 qemu_anon_ram_free(block->host, block->length);
1189 if (xen_enabled()) {
1190 xen_invalidate_map_cache_entry(block->host);
1192 qemu_anon_ram_free(block->host, block->length);
1199 qemu_mutex_unlock_ramlist();
1204 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1211 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1212 offset = addr - block->offset;
1213 if (offset < block->length) {
1214 vaddr = block->host + offset;
1215 if (block->flags & RAM_PREALLOC_MASK) {
1219 munmap(vaddr, length);
1221 #if defined(__linux__) && !defined(TARGET_S390X)
1224 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1227 flags |= MAP_PRIVATE;
1229 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1230 flags, block->fd, offset);
1232 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1233 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1240 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1241 flags |= MAP_SHARED | MAP_ANONYMOUS;
1242 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1245 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1246 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1250 if (area != vaddr) {
1251 fprintf(stderr, "Could not remap addr: "
1252 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1256 memory_try_enable_merging(vaddr, length);
1257 qemu_ram_setup_dump(vaddr, length);
1263 #endif /* !_WIN32 */
1265 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1266 With the exception of the softmmu code in this file, this should
1267 only be used for local memory (e.g. video ram) that the device owns,
1268 and knows it isn't going to access beyond the end of the block.
1270 It should not be used for general purpose DMA.
1271 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1273 void *qemu_get_ram_ptr(ram_addr_t addr)
1277 /* The list is protected by the iothread lock here. */
1278 block = ram_list.mru_block;
1279 if (block && addr - block->offset < block->length) {
1282 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1283 if (addr - block->offset < block->length) {
1288 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1292 ram_list.mru_block = block;
1293 if (xen_enabled()) {
1294 /* We need to check if the requested address is in the RAM
1295 * because we don't want to map the entire memory in QEMU.
1296 * In that case just map until the end of the page.
1298 if (block->offset == 0) {
1299 return xen_map_cache(addr, 0, 0);
1300 } else if (block->host == NULL) {
1302 xen_map_cache(block->offset, block->length, 1);
1305 return block->host + (addr - block->offset);
1308 /* Return a host pointer to ram allocated with qemu_ram_alloc. Same as
1309 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1311 * ??? Is this still necessary?
1313 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1317 /* The list is protected by the iothread lock here. */
1318 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1319 if (addr - block->offset < block->length) {
1320 if (xen_enabled()) {
1321 /* We need to check if the requested address is in the RAM
1322 * because we don't want to map the entire memory in QEMU.
1323 * In that case just map until the end of the page.
1325 if (block->offset == 0) {
1326 return xen_map_cache(addr, 0, 0);
1327 } else if (block->host == NULL) {
1329 xen_map_cache(block->offset, block->length, 1);
1332 return block->host + (addr - block->offset);
1336 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1342 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1343 * but takes a size argument */
1344 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1349 if (xen_enabled()) {
1350 return xen_map_cache(addr, *size, 1);
1354 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1355 if (addr - block->offset < block->length) {
1356 if (addr - block->offset + *size > block->length)
1357 *size = block->length - addr + block->offset;
1358 return block->host + (addr - block->offset);
1362 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1367 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1370 uint8_t *host = ptr;
1372 if (xen_enabled()) {
1373 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1377 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1378 /* This case append when the block is not mapped. */
1379 if (block->host == NULL) {
1382 if (host - block->host < block->length) {
1383 *ram_addr = block->offset + (host - block->host);
1391 /* Some of the softmmu routines need to translate from a host pointer
1392 (typically a TLB entry) back to a ram offset. */
1393 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1395 ram_addr_t ram_addr;
1397 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1398 fprintf(stderr, "Bad ram pointer %p\n", ptr);
1404 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1405 uint64_t val, unsigned size)
1408 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1409 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1410 tb_invalidate_phys_page_fast(ram_addr, size);
1411 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1415 stb_p(qemu_get_ram_ptr(ram_addr), val);
1418 stw_p(qemu_get_ram_ptr(ram_addr), val);
1421 stl_p(qemu_get_ram_ptr(ram_addr), val);
1426 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1427 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1428 /* we remove the notdirty callback only if the code has been
1430 if (dirty_flags == 0xff)
1431 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1434 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1435 unsigned size, bool is_write)
1440 static const MemoryRegionOps notdirty_mem_ops = {
1441 .write = notdirty_mem_write,
1442 .valid.accepts = notdirty_mem_accepts,
1443 .endianness = DEVICE_NATIVE_ENDIAN,
1446 /* Generate a debug exception if a watchpoint has been hit. */
1447 static void check_watchpoint(int offset, int len_mask, int flags)
1449 CPUArchState *env = cpu_single_env;
1450 target_ulong pc, cs_base;
1455 if (env->watchpoint_hit) {
1456 /* We re-entered the check after replacing the TB. Now raise
1457 * the debug interrupt so that is will trigger after the
1458 * current instruction. */
1459 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1462 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1463 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1464 if ((vaddr == (wp->vaddr & len_mask) ||
1465 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1466 wp->flags |= BP_WATCHPOINT_HIT;
1467 if (!env->watchpoint_hit) {
1468 env->watchpoint_hit = wp;
1469 tb_check_watchpoint(env);
1470 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1471 env->exception_index = EXCP_DEBUG;
1474 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1475 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1476 cpu_resume_from_signal(env, NULL);
1480 wp->flags &= ~BP_WATCHPOINT_HIT;
1485 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1486 so these check for a hit then pass through to the normal out-of-line
1488 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1491 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1493 case 1: return ldub_phys(addr);
1494 case 2: return lduw_phys(addr);
1495 case 4: return ldl_phys(addr);
1500 static void watch_mem_write(void *opaque, hwaddr addr,
1501 uint64_t val, unsigned size)
1503 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1506 stb_phys(addr, val);
1509 stw_phys(addr, val);
1512 stl_phys(addr, val);
1518 static const MemoryRegionOps watch_mem_ops = {
1519 .read = watch_mem_read,
1520 .write = watch_mem_write,
1521 .endianness = DEVICE_NATIVE_ENDIAN,
1524 static uint64_t subpage_read(void *opaque, hwaddr addr,
1527 subpage_t *mmio = opaque;
1528 unsigned int idx = SUBPAGE_IDX(addr);
1529 MemoryRegionSection *section;
1530 #if defined(DEBUG_SUBPAGE)
1531 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1532 mmio, len, addr, idx);
1535 section = &phys_sections[mmio->sub_section[idx]];
1537 addr -= section->offset_within_address_space;
1538 addr += section->offset_within_region;
1539 return io_mem_read(section->mr, addr, len);
1542 static void subpage_write(void *opaque, hwaddr addr,
1543 uint64_t value, unsigned len)
1545 subpage_t *mmio = opaque;
1546 unsigned int idx = SUBPAGE_IDX(addr);
1547 MemoryRegionSection *section;
1548 #if defined(DEBUG_SUBPAGE)
1549 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1550 " idx %d value %"PRIx64"\n",
1551 __func__, mmio, len, addr, idx, value);
1554 section = &phys_sections[mmio->sub_section[idx]];
1556 addr -= section->offset_within_address_space;
1557 addr += section->offset_within_region;
1558 io_mem_write(section->mr, addr, value, len);
1561 static bool subpage_accepts(void *opaque, hwaddr addr,
1562 unsigned size, bool is_write)
1564 subpage_t *mmio = opaque;
1565 unsigned int idx = SUBPAGE_IDX(addr);
1566 MemoryRegionSection *section;
1567 #if defined(DEBUG_SUBPAGE)
1568 printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx
1569 " idx %d\n", __func__, mmio,
1570 is_write ? 'w' : 'r', len, addr, idx);
1573 section = &phys_sections[mmio->sub_section[idx]];
1575 addr -= section->offset_within_address_space;
1576 addr += section->offset_within_region;
1577 return memory_region_access_valid(section->mr, addr, size, is_write);
1580 static const MemoryRegionOps subpage_ops = {
1581 .read = subpage_read,
1582 .write = subpage_write,
1583 .valid.accepts = subpage_accepts,
1584 .endianness = DEVICE_NATIVE_ENDIAN,
1587 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1590 ram_addr_t raddr = addr;
1591 void *ptr = qemu_get_ram_ptr(raddr);
1593 case 1: return ldub_p(ptr);
1594 case 2: return lduw_p(ptr);
1595 case 4: return ldl_p(ptr);
1600 static void subpage_ram_write(void *opaque, hwaddr addr,
1601 uint64_t value, unsigned size)
1603 ram_addr_t raddr = addr;
1604 void *ptr = qemu_get_ram_ptr(raddr);
1606 case 1: return stb_p(ptr, value);
1607 case 2: return stw_p(ptr, value);
1608 case 4: return stl_p(ptr, value);
1613 static const MemoryRegionOps subpage_ram_ops = {
1614 .read = subpage_ram_read,
1615 .write = subpage_ram_write,
1616 .endianness = DEVICE_NATIVE_ENDIAN,
1619 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1624 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1626 idx = SUBPAGE_IDX(start);
1627 eidx = SUBPAGE_IDX(end);
1628 #if defined(DEBUG_SUBPAGE)
1629 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1630 mmio, start, end, idx, eidx, memory);
1632 if (memory_region_is_ram(phys_sections[section].mr)) {
1633 MemoryRegionSection new_section = phys_sections[section];
1634 new_section.mr = &io_mem_subpage_ram;
1635 section = phys_section_add(&new_section);
1637 for (; idx <= eidx; idx++) {
1638 mmio->sub_section[idx] = section;
1644 static subpage_t *subpage_init(hwaddr base)
1648 mmio = g_malloc0(sizeof(subpage_t));
1651 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1652 "subpage", TARGET_PAGE_SIZE);
1653 mmio->iomem.subpage = true;
1654 #if defined(DEBUG_SUBPAGE)
1655 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1656 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1658 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1663 static uint16_t dummy_section(MemoryRegion *mr)
1665 MemoryRegionSection section = {
1667 .offset_within_address_space = 0,
1668 .offset_within_region = 0,
1672 return phys_section_add(§ion);
1675 MemoryRegion *iotlb_to_region(hwaddr index)
1677 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1680 static void io_mem_init(void)
1682 memory_region_init_io(&io_mem_rom, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1683 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1684 "unassigned", UINT64_MAX);
1685 memory_region_init_io(&io_mem_notdirty, ¬dirty_mem_ops, NULL,
1686 "notdirty", UINT64_MAX);
1687 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1688 "subpage-ram", UINT64_MAX);
1689 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1690 "watch", UINT64_MAX);
1693 static void mem_begin(MemoryListener *listener)
1695 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1697 destroy_all_mappings(d);
1698 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1701 static void core_begin(MemoryListener *listener)
1703 phys_sections_clear();
1704 phys_section_unassigned = dummy_section(&io_mem_unassigned);
1705 phys_section_notdirty = dummy_section(&io_mem_notdirty);
1706 phys_section_rom = dummy_section(&io_mem_rom);
1707 phys_section_watch = dummy_section(&io_mem_watch);
1710 static void tcg_commit(MemoryListener *listener)
1714 /* since each CPU stores ram addresses in its TLB cache, we must
1715 reset the modified entries */
1717 for(env = first_cpu; env != NULL; env = env->next_cpu) {
1722 static void core_log_global_start(MemoryListener *listener)
1724 cpu_physical_memory_set_dirty_tracking(1);
1727 static void core_log_global_stop(MemoryListener *listener)
1729 cpu_physical_memory_set_dirty_tracking(0);
1732 static void io_region_add(MemoryListener *listener,
1733 MemoryRegionSection *section)
1735 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1737 mrio->mr = section->mr;
1738 mrio->offset = section->offset_within_region;
1739 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1740 section->offset_within_address_space, section->size);
1741 ioport_register(&mrio->iorange);
1744 static void io_region_del(MemoryListener *listener,
1745 MemoryRegionSection *section)
1747 isa_unassign_ioport(section->offset_within_address_space, section->size);
1750 static MemoryListener core_memory_listener = {
1751 .begin = core_begin,
1752 .log_global_start = core_log_global_start,
1753 .log_global_stop = core_log_global_stop,
1757 static MemoryListener io_memory_listener = {
1758 .region_add = io_region_add,
1759 .region_del = io_region_del,
1763 static MemoryListener tcg_memory_listener = {
1764 .commit = tcg_commit,
1767 void address_space_init_dispatch(AddressSpace *as)
1769 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1771 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1772 d->listener = (MemoryListener) {
1774 .region_add = mem_add,
1775 .region_nop = mem_add,
1779 memory_listener_register(&d->listener, as);
1782 void address_space_destroy_dispatch(AddressSpace *as)
1784 AddressSpaceDispatch *d = as->dispatch;
1786 memory_listener_unregister(&d->listener);
1787 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1789 as->dispatch = NULL;
1792 static void memory_map_init(void)
1794 system_memory = g_malloc(sizeof(*system_memory));
1795 memory_region_init(system_memory, "system", INT64_MAX);
1796 address_space_init(&address_space_memory, system_memory);
1797 address_space_memory.name = "memory";
1799 system_io = g_malloc(sizeof(*system_io));
1800 memory_region_init(system_io, "io", 65536);
1801 address_space_init(&address_space_io, system_io);
1802 address_space_io.name = "I/O";
1804 memory_listener_register(&core_memory_listener, &address_space_memory);
1805 memory_listener_register(&io_memory_listener, &address_space_io);
1806 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1808 dma_context_init(&dma_context_memory, &address_space_memory,
1812 MemoryRegion *get_system_memory(void)
1814 return system_memory;
1817 MemoryRegion *get_system_io(void)
1822 #endif /* !defined(CONFIG_USER_ONLY) */
1824 /* physical memory access (slow version, mainly for debug) */
1825 #if defined(CONFIG_USER_ONLY)
1826 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1827 uint8_t *buf, int len, int is_write)
1834 page = addr & TARGET_PAGE_MASK;
1835 l = (page + TARGET_PAGE_SIZE) - addr;
1838 flags = page_get_flags(page);
1839 if (!(flags & PAGE_VALID))
1842 if (!(flags & PAGE_WRITE))
1844 /* XXX: this code should not depend on lock_user */
1845 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1848 unlock_user(p, addr, l);
1850 if (!(flags & PAGE_READ))
1852 /* XXX: this code should not depend on lock_user */
1853 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1856 unlock_user(p, addr, 0);
1867 static void invalidate_and_set_dirty(hwaddr addr,
1870 if (!cpu_physical_memory_is_dirty(addr)) {
1871 /* invalidate code */
1872 tb_invalidate_phys_page_range(addr, addr + length, 0);
1874 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1876 xen_modified_memory(addr, length);
1879 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1881 if (memory_region_is_ram(mr)) {
1882 return !(is_write && mr->readonly);
1884 if (memory_region_is_romd(mr)) {
1891 static inline int memory_access_size(int l, hwaddr addr)
1893 if (l >= 4 && ((addr & 3) == 0)) {
1896 if (l >= 2 && ((addr & 1) == 0)) {
1902 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1903 int len, bool is_write)
1909 MemoryRegionSection *section;
1913 section = address_space_translate(as, addr, &addr1, &l, is_write);
1916 if (!memory_access_is_direct(section->mr, is_write)) {
1917 l = memory_access_size(l, addr1);
1918 /* XXX: could force cpu_single_env to NULL to avoid
1921 /* 32 bit write access */
1923 io_mem_write(section->mr, addr1, val, 4);
1924 } else if (l == 2) {
1925 /* 16 bit write access */
1927 io_mem_write(section->mr, addr1, val, 2);
1929 /* 8 bit write access */
1931 io_mem_write(section->mr, addr1, val, 1);
1934 addr1 += memory_region_get_ram_addr(section->mr);
1936 ptr = qemu_get_ram_ptr(addr1);
1937 memcpy(ptr, buf, l);
1938 invalidate_and_set_dirty(addr1, l);
1941 if (!memory_access_is_direct(section->mr, is_write)) {
1943 l = memory_access_size(l, addr1);
1945 /* 32 bit read access */
1946 val = io_mem_read(section->mr, addr1, 4);
1948 } else if (l == 2) {
1949 /* 16 bit read access */
1950 val = io_mem_read(section->mr, addr1, 2);
1953 /* 8 bit read access */
1954 val = io_mem_read(section->mr, addr1, 1);
1959 ptr = qemu_get_ram_ptr(section->mr->ram_addr + addr1);
1960 memcpy(buf, ptr, l);
1969 void address_space_write(AddressSpace *as, hwaddr addr,
1970 const uint8_t *buf, int len)
1972 address_space_rw(as, addr, (uint8_t *)buf, len, true);
1976 * address_space_read: read from an address space.
1978 * @as: #AddressSpace to be accessed
1979 * @addr: address within that address space
1980 * @buf: buffer with the data transferred
1982 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1984 address_space_rw(as, addr, buf, len, false);
1988 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1989 int len, int is_write)
1991 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1994 /* used for ROM loading : can write in RAM and ROM */
1995 void cpu_physical_memory_write_rom(hwaddr addr,
1996 const uint8_t *buf, int len)
2001 MemoryRegionSection *section;
2005 section = address_space_translate(&address_space_memory,
2006 addr, &addr1, &l, true);
2008 if (!(memory_region_is_ram(section->mr) ||
2009 memory_region_is_romd(section->mr))) {
2012 addr1 += memory_region_get_ram_addr(section->mr);
2014 ptr = qemu_get_ram_ptr(addr1);
2015 memcpy(ptr, buf, l);
2016 invalidate_and_set_dirty(addr1, l);
2030 static BounceBuffer bounce;
2032 typedef struct MapClient {
2034 void (*callback)(void *opaque);
2035 QLIST_ENTRY(MapClient) link;
2038 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2039 = QLIST_HEAD_INITIALIZER(map_client_list);
2041 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2043 MapClient *client = g_malloc(sizeof(*client));
2045 client->opaque = opaque;
2046 client->callback = callback;
2047 QLIST_INSERT_HEAD(&map_client_list, client, link);
2051 static void cpu_unregister_map_client(void *_client)
2053 MapClient *client = (MapClient *)_client;
2055 QLIST_REMOVE(client, link);
2059 static void cpu_notify_map_clients(void)
2063 while (!QLIST_EMPTY(&map_client_list)) {
2064 client = QLIST_FIRST(&map_client_list);
2065 client->callback(client->opaque);
2066 cpu_unregister_map_client(client);
2070 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2072 MemoryRegionSection *section;
2077 section = address_space_translate(as, addr, &xlat, &l, is_write);
2078 if (!memory_access_is_direct(section->mr, is_write)) {
2079 l = memory_access_size(l, addr);
2080 if (!memory_region_access_valid(section->mr, xlat, l, is_write)) {
2091 /* Map a physical memory region into a host virtual address.
2092 * May map a subset of the requested range, given by and returned in *plen.
2093 * May return NULL if resources needed to perform the mapping are exhausted.
2094 * Use only for reads OR writes - not for read-modify-write operations.
2095 * Use cpu_register_map_client() to know when retrying the map operation is
2096 * likely to succeed.
2098 void *address_space_map(AddressSpace *as,
2106 MemoryRegionSection *section;
2107 ram_addr_t raddr = RAM_ADDR_MAX;
2113 section = address_space_translate(as, addr, &xlat, &l, is_write);
2115 if (!memory_access_is_direct(section->mr, is_write)) {
2116 if (todo || bounce.buffer) {
2119 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2123 address_space_read(as, addr, bounce.buffer, l);
2127 return bounce.buffer;
2130 raddr = memory_region_get_ram_addr(section->mr) + xlat;
2132 if (memory_region_get_ram_addr(section->mr) + xlat != raddr + todo) {
2142 ret = qemu_ram_ptr_length(raddr, &rlen);
2147 /* Unmaps a memory region previously mapped by address_space_map().
2148 * Will also mark the memory as dirty if is_write == 1. access_len gives
2149 * the amount of memory that was actually read or written by the caller.
2151 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2152 int is_write, hwaddr access_len)
2154 if (buffer != bounce.buffer) {
2156 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2157 while (access_len) {
2159 l = TARGET_PAGE_SIZE;
2162 invalidate_and_set_dirty(addr1, l);
2167 if (xen_enabled()) {
2168 xen_invalidate_map_cache_entry(buffer);
2173 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2175 qemu_vfree(bounce.buffer);
2176 bounce.buffer = NULL;
2177 cpu_notify_map_clients();
2180 void *cpu_physical_memory_map(hwaddr addr,
2184 return address_space_map(&address_space_memory, addr, plen, is_write);
2187 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2188 int is_write, hwaddr access_len)
2190 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2193 /* warning: addr must be aligned */
2194 static inline uint32_t ldl_phys_internal(hwaddr addr,
2195 enum device_endian endian)
2199 MemoryRegionSection *section;
2203 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2205 if (l < 4 || !memory_access_is_direct(section->mr, false)) {
2207 val = io_mem_read(section->mr, addr1, 4);
2208 #if defined(TARGET_WORDS_BIGENDIAN)
2209 if (endian == DEVICE_LITTLE_ENDIAN) {
2213 if (endian == DEVICE_BIG_ENDIAN) {
2219 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2223 case DEVICE_LITTLE_ENDIAN:
2224 val = ldl_le_p(ptr);
2226 case DEVICE_BIG_ENDIAN:
2227 val = ldl_be_p(ptr);
2237 uint32_t ldl_phys(hwaddr addr)
2239 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2242 uint32_t ldl_le_phys(hwaddr addr)
2244 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2247 uint32_t ldl_be_phys(hwaddr addr)
2249 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2252 /* warning: addr must be aligned */
2253 static inline uint64_t ldq_phys_internal(hwaddr addr,
2254 enum device_endian endian)
2258 MemoryRegionSection *section;
2262 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2264 if (l < 8 || !memory_access_is_direct(section->mr, false)) {
2267 /* XXX This is broken when device endian != cpu endian.
2268 Fix and add "endian" variable check */
2269 #ifdef TARGET_WORDS_BIGENDIAN
2270 val = io_mem_read(section->mr, addr1, 4) << 32;
2271 val |= io_mem_read(section->mr, addr1 + 4, 4);
2273 val = io_mem_read(section->mr, addr1, 4);
2274 val |= io_mem_read(section->mr, addr1 + 4, 4) << 32;
2278 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2282 case DEVICE_LITTLE_ENDIAN:
2283 val = ldq_le_p(ptr);
2285 case DEVICE_BIG_ENDIAN:
2286 val = ldq_be_p(ptr);
2296 uint64_t ldq_phys(hwaddr addr)
2298 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2301 uint64_t ldq_le_phys(hwaddr addr)
2303 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2306 uint64_t ldq_be_phys(hwaddr addr)
2308 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2312 uint32_t ldub_phys(hwaddr addr)
2315 cpu_physical_memory_read(addr, &val, 1);
2319 /* warning: addr must be aligned */
2320 static inline uint32_t lduw_phys_internal(hwaddr addr,
2321 enum device_endian endian)
2325 MemoryRegionSection *section;
2329 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2331 if (l < 2 || !memory_access_is_direct(section->mr, false)) {
2333 val = io_mem_read(section->mr, addr1, 2);
2334 #if defined(TARGET_WORDS_BIGENDIAN)
2335 if (endian == DEVICE_LITTLE_ENDIAN) {
2339 if (endian == DEVICE_BIG_ENDIAN) {
2345 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2349 case DEVICE_LITTLE_ENDIAN:
2350 val = lduw_le_p(ptr);
2352 case DEVICE_BIG_ENDIAN:
2353 val = lduw_be_p(ptr);
2363 uint32_t lduw_phys(hwaddr addr)
2365 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2368 uint32_t lduw_le_phys(hwaddr addr)
2370 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2373 uint32_t lduw_be_phys(hwaddr addr)
2375 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2378 /* warning: addr must be aligned. The ram page is not masked as dirty
2379 and the code inside is not invalidated. It is useful if the dirty
2380 bits are used to track modified PTEs */
2381 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2384 MemoryRegionSection *section;
2388 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2390 if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2391 io_mem_write(section->mr, addr1, val, 4);
2393 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2394 ptr = qemu_get_ram_ptr(addr1);
2397 if (unlikely(in_migration)) {
2398 if (!cpu_physical_memory_is_dirty(addr1)) {
2399 /* invalidate code */
2400 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2402 cpu_physical_memory_set_dirty_flags(
2403 addr1, (0xff & ~CODE_DIRTY_FLAG));
2409 /* warning: addr must be aligned */
2410 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2411 enum device_endian endian)
2414 MemoryRegionSection *section;
2418 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2420 if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2421 #if defined(TARGET_WORDS_BIGENDIAN)
2422 if (endian == DEVICE_LITTLE_ENDIAN) {
2426 if (endian == DEVICE_BIG_ENDIAN) {
2430 io_mem_write(section->mr, addr1, val, 4);
2433 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2434 ptr = qemu_get_ram_ptr(addr1);
2436 case DEVICE_LITTLE_ENDIAN:
2439 case DEVICE_BIG_ENDIAN:
2446 invalidate_and_set_dirty(addr1, 4);
2450 void stl_phys(hwaddr addr, uint32_t val)
2452 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2455 void stl_le_phys(hwaddr addr, uint32_t val)
2457 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2460 void stl_be_phys(hwaddr addr, uint32_t val)
2462 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2466 void stb_phys(hwaddr addr, uint32_t val)
2469 cpu_physical_memory_write(addr, &v, 1);
2472 /* warning: addr must be aligned */
2473 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2474 enum device_endian endian)
2477 MemoryRegionSection *section;
2481 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2483 if (l < 2 || !memory_access_is_direct(section->mr, true)) {
2484 #if defined(TARGET_WORDS_BIGENDIAN)
2485 if (endian == DEVICE_LITTLE_ENDIAN) {
2489 if (endian == DEVICE_BIG_ENDIAN) {
2493 io_mem_write(section->mr, addr1, val, 2);
2496 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2497 ptr = qemu_get_ram_ptr(addr1);
2499 case DEVICE_LITTLE_ENDIAN:
2502 case DEVICE_BIG_ENDIAN:
2509 invalidate_and_set_dirty(addr1, 2);
2513 void stw_phys(hwaddr addr, uint32_t val)
2515 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2518 void stw_le_phys(hwaddr addr, uint32_t val)
2520 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2523 void stw_be_phys(hwaddr addr, uint32_t val)
2525 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2529 void stq_phys(hwaddr addr, uint64_t val)
2532 cpu_physical_memory_write(addr, &val, 8);
2535 void stq_le_phys(hwaddr addr, uint64_t val)
2537 val = cpu_to_le64(val);
2538 cpu_physical_memory_write(addr, &val, 8);
2541 void stq_be_phys(hwaddr addr, uint64_t val)
2543 val = cpu_to_be64(val);
2544 cpu_physical_memory_write(addr, &val, 8);
2547 /* virtual memory access for debug (includes writing to ROM) */
2548 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2549 uint8_t *buf, int len, int is_write)
2556 page = addr & TARGET_PAGE_MASK;
2557 phys_addr = cpu_get_phys_page_debug(env, page);
2558 /* if no physical page mapped, return an error */
2559 if (phys_addr == -1)
2561 l = (page + TARGET_PAGE_SIZE) - addr;
2564 phys_addr += (addr & ~TARGET_PAGE_MASK);
2566 cpu_physical_memory_write_rom(phys_addr, buf, l);
2568 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2577 #if !defined(CONFIG_USER_ONLY)
2580 * A helper function for the _utterly broken_ virtio device model to find out if
2581 * it's running on a big endian machine. Don't do this at home kids!
2583 bool virtio_is_big_endian(void);
2584 bool virtio_is_big_endian(void)
2586 #if defined(TARGET_WORDS_BIGENDIAN)
2595 #ifndef CONFIG_USER_ONLY
2596 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2598 MemoryRegionSection *section;
2601 section = address_space_translate(&address_space_memory,
2602 phys_addr, &phys_addr, &l, false);
2604 return !(memory_region_is_ram(section->mr) ||
2605 memory_region_is_romd(section->mr));