4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include <sys/types.h>
27 #include "qemu-common.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
53 //#define DEBUG_SUBPAGE
55 #if !defined(CONFIG_USER_ONLY)
57 static int in_migration;
59 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
61 static MemoryRegion *system_memory;
62 static MemoryRegion *system_io;
64 AddressSpace address_space_io;
65 AddressSpace address_space_memory;
66 DMAContext dma_context_memory;
68 MemoryRegion io_mem_rom, io_mem_notdirty;
69 static MemoryRegion io_mem_unassigned, io_mem_subpage_ram;
73 CPUArchState *first_cpu;
74 /* current CPU in the current thread. It is only valid inside
76 DEFINE_TLS(CPUArchState *,cpu_single_env);
77 /* 0 = Do not count executed instructions.
78 1 = Precise instruction counting.
79 2 = Adaptive rate instruction counting. */
82 #if !defined(CONFIG_USER_ONLY)
84 static MemoryRegionSection *phys_sections;
85 static unsigned phys_sections_nb, phys_sections_nb_alloc;
86 static uint16_t phys_section_unassigned;
87 static uint16_t phys_section_notdirty;
88 static uint16_t phys_section_rom;
89 static uint16_t phys_section_watch;
91 /* Simple allocator for PhysPageEntry nodes */
92 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
93 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
95 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
97 static void io_mem_init(void);
98 static void memory_map_init(void);
99 static void *qemu_safe_ram_ptr(ram_addr_t addr);
101 static MemoryRegion io_mem_watch;
104 #if !defined(CONFIG_USER_ONLY)
106 static void phys_map_node_reserve(unsigned nodes)
108 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
109 typedef PhysPageEntry Node[L2_SIZE];
110 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
111 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
112 phys_map_nodes_nb + nodes);
113 phys_map_nodes = g_renew(Node, phys_map_nodes,
114 phys_map_nodes_nb_alloc);
118 static uint16_t phys_map_node_alloc(void)
123 ret = phys_map_nodes_nb++;
124 assert(ret != PHYS_MAP_NODE_NIL);
125 assert(ret != phys_map_nodes_nb_alloc);
126 for (i = 0; i < L2_SIZE; ++i) {
127 phys_map_nodes[ret][i].is_leaf = 0;
128 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
133 static void phys_map_nodes_reset(void)
135 phys_map_nodes_nb = 0;
139 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
140 hwaddr *nb, uint16_t leaf,
145 hwaddr step = (hwaddr)1 << (level * L2_BITS);
147 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
148 lp->ptr = phys_map_node_alloc();
149 p = phys_map_nodes[lp->ptr];
151 for (i = 0; i < L2_SIZE; i++) {
153 p[i].ptr = phys_section_unassigned;
157 p = phys_map_nodes[lp->ptr];
159 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
161 while (*nb && lp < &p[L2_SIZE]) {
162 if ((*index & (step - 1)) == 0 && *nb >= step) {
168 phys_page_set_level(lp, index, nb, leaf, level - 1);
174 static void phys_page_set(AddressSpaceDispatch *d,
175 hwaddr index, hwaddr nb,
178 /* Wildly overreserve - it doesn't matter much. */
179 phys_map_node_reserve(3 * P_L2_LEVELS);
181 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
184 static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
186 PhysPageEntry lp = d->phys_map;
190 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
191 if (lp.ptr == PHYS_MAP_NODE_NIL) {
192 return &phys_sections[phys_section_unassigned];
194 p = phys_map_nodes[lp.ptr];
195 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
197 return &phys_sections[lp.ptr];
200 bool memory_region_is_unassigned(MemoryRegion *mr)
202 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
203 && mr != &io_mem_watch;
206 MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
207 hwaddr *xlat, hwaddr *plen,
210 MemoryRegionSection *section;
213 section = phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
214 /* Compute offset within MemoryRegionSection */
215 addr -= section->offset_within_address_space;
217 /* Compute offset within MemoryRegion */
218 *xlat = addr + section->offset_within_region;
220 diff = int128_sub(section->mr->size, int128_make64(addr));
221 *plen = MIN(int128_get64(diff), *plen);
226 void cpu_exec_init_all(void)
228 #if !defined(CONFIG_USER_ONLY)
229 qemu_mutex_init(&ram_list.mutex);
235 #if !defined(CONFIG_USER_ONLY)
237 static int cpu_common_post_load(void *opaque, int version_id)
239 CPUState *cpu = opaque;
241 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
242 version_id is increased. */
243 cpu->interrupt_request &= ~0x01;
244 tlb_flush(cpu->env_ptr, 1);
249 static const VMStateDescription vmstate_cpu_common = {
250 .name = "cpu_common",
252 .minimum_version_id = 1,
253 .minimum_version_id_old = 1,
254 .post_load = cpu_common_post_load,
255 .fields = (VMStateField []) {
256 VMSTATE_UINT32(halted, CPUState),
257 VMSTATE_UINT32(interrupt_request, CPUState),
258 VMSTATE_END_OF_LIST()
262 #define vmstate_cpu_common vmstate_dummy
265 CPUState *qemu_get_cpu(int index)
267 CPUArchState *env = first_cpu;
268 CPUState *cpu = NULL;
271 cpu = ENV_GET_CPU(env);
272 if (cpu->cpu_index == index) {
278 return env ? cpu : NULL;
281 void qemu_for_each_cpu(void (*func)(CPUState *cpu, void *data), void *data)
283 CPUArchState *env = first_cpu;
286 func(ENV_GET_CPU(env), data);
291 void cpu_exec_init(CPUArchState *env)
293 CPUState *cpu = ENV_GET_CPU(env);
294 CPUClass *cc = CPU_GET_CLASS(cpu);
298 #if defined(CONFIG_USER_ONLY)
301 env->next_cpu = NULL;
304 while (*penv != NULL) {
305 penv = &(*penv)->next_cpu;
308 cpu->cpu_index = cpu_index;
310 QTAILQ_INIT(&env->breakpoints);
311 QTAILQ_INIT(&env->watchpoints);
312 #ifndef CONFIG_USER_ONLY
313 cpu->thread_id = qemu_get_thread_id();
316 #if defined(CONFIG_USER_ONLY)
319 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
320 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
321 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
322 cpu_save, cpu_load, env);
323 assert(cc->vmsd == NULL);
325 if (cc->vmsd != NULL) {
326 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
330 #if defined(TARGET_HAS_ICE)
331 #if defined(CONFIG_USER_ONLY)
332 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
334 tb_invalidate_phys_page_range(pc, pc + 1, 0);
337 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
339 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
340 (pc & ~TARGET_PAGE_MASK));
343 #endif /* TARGET_HAS_ICE */
345 #if defined(CONFIG_USER_ONLY)
346 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
351 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
352 int flags, CPUWatchpoint **watchpoint)
357 /* Add a watchpoint. */
358 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
359 int flags, CPUWatchpoint **watchpoint)
361 target_ulong len_mask = ~(len - 1);
364 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
365 if ((len & (len - 1)) || (addr & ~len_mask) ||
366 len == 0 || len > TARGET_PAGE_SIZE) {
367 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
368 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
371 wp = g_malloc(sizeof(*wp));
374 wp->len_mask = len_mask;
377 /* keep all GDB-injected watchpoints in front */
379 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
381 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
383 tlb_flush_page(env, addr);
390 /* Remove a specific watchpoint. */
391 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
394 target_ulong len_mask = ~(len - 1);
397 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
398 if (addr == wp->vaddr && len_mask == wp->len_mask
399 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
400 cpu_watchpoint_remove_by_ref(env, wp);
407 /* Remove a specific watchpoint by reference. */
408 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
410 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
412 tlb_flush_page(env, watchpoint->vaddr);
417 /* Remove all matching watchpoints. */
418 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
420 CPUWatchpoint *wp, *next;
422 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
423 if (wp->flags & mask)
424 cpu_watchpoint_remove_by_ref(env, wp);
429 /* Add a breakpoint. */
430 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
431 CPUBreakpoint **breakpoint)
433 #if defined(TARGET_HAS_ICE)
436 bp = g_malloc(sizeof(*bp));
441 /* keep all GDB-injected breakpoints in front */
443 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
445 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
447 breakpoint_invalidate(env, pc);
457 /* Remove a specific breakpoint. */
458 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
460 #if defined(TARGET_HAS_ICE)
463 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
464 if (bp->pc == pc && bp->flags == flags) {
465 cpu_breakpoint_remove_by_ref(env, bp);
475 /* Remove a specific breakpoint by reference. */
476 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
478 #if defined(TARGET_HAS_ICE)
479 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
481 breakpoint_invalidate(env, breakpoint->pc);
487 /* Remove all matching breakpoints. */
488 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
490 #if defined(TARGET_HAS_ICE)
491 CPUBreakpoint *bp, *next;
493 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
494 if (bp->flags & mask)
495 cpu_breakpoint_remove_by_ref(env, bp);
500 /* enable or disable single step mode. EXCP_DEBUG is returned by the
501 CPU loop after each instruction */
502 void cpu_single_step(CPUArchState *env, int enabled)
504 #if defined(TARGET_HAS_ICE)
505 if (env->singlestep_enabled != enabled) {
506 env->singlestep_enabled = enabled;
508 kvm_update_guest_debug(env, 0);
510 /* must flush all the translated code to avoid inconsistencies */
511 /* XXX: only flush what is necessary */
518 void cpu_exit(CPUArchState *env)
520 CPUState *cpu = ENV_GET_CPU(env);
522 cpu->exit_request = 1;
523 cpu->tcg_exit_req = 1;
526 void cpu_abort(CPUArchState *env, const char *fmt, ...)
533 fprintf(stderr, "qemu: fatal: ");
534 vfprintf(stderr, fmt, ap);
535 fprintf(stderr, "\n");
536 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
537 if (qemu_log_enabled()) {
538 qemu_log("qemu: fatal: ");
539 qemu_log_vprintf(fmt, ap2);
541 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
547 #if defined(CONFIG_USER_ONLY)
549 struct sigaction act;
550 sigfillset(&act.sa_mask);
551 act.sa_handler = SIG_DFL;
552 sigaction(SIGABRT, &act, NULL);
558 CPUArchState *cpu_copy(CPUArchState *env)
560 CPUArchState *new_env = cpu_init(env->cpu_model_str);
561 CPUArchState *next_cpu = new_env->next_cpu;
562 #if defined(TARGET_HAS_ICE)
567 memcpy(new_env, env, sizeof(CPUArchState));
569 /* Preserve chaining. */
570 new_env->next_cpu = next_cpu;
572 /* Clone all break/watchpoints.
573 Note: Once we support ptrace with hw-debug register access, make sure
574 BP_CPU break/watchpoints are handled correctly on clone. */
575 QTAILQ_INIT(&env->breakpoints);
576 QTAILQ_INIT(&env->watchpoints);
577 #if defined(TARGET_HAS_ICE)
578 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
579 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
581 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
582 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
590 #if !defined(CONFIG_USER_ONLY)
591 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
596 /* we modify the TLB cache so that the dirty bit will be set again
597 when accessing the range */
598 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
599 /* Check that we don't span multiple blocks - this breaks the
600 address comparisons below. */
601 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
602 != (end - 1) - start) {
605 cpu_tlb_reset_dirty_all(start1, length);
609 /* Note: start and end must be within the same ram block. */
610 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
615 start &= TARGET_PAGE_MASK;
616 end = TARGET_PAGE_ALIGN(end);
618 length = end - start;
621 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
624 tlb_reset_dirty_range_all(start, end, length);
628 static int cpu_physical_memory_set_dirty_tracking(int enable)
631 in_migration = enable;
635 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
636 MemoryRegionSection *section,
638 hwaddr paddr, hwaddr xlat,
640 target_ulong *address)
645 if (memory_region_is_ram(section->mr)) {
647 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
649 if (!section->readonly) {
650 iotlb |= phys_section_notdirty;
652 iotlb |= phys_section_rom;
655 iotlb = section - phys_sections;
659 /* Make accesses to pages with watchpoints go via the
660 watchpoint trap routines. */
661 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
662 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
663 /* Avoid trapping reads of pages with a write breakpoint. */
664 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
665 iotlb = phys_section_watch + paddr;
666 *address |= TLB_MMIO;
674 #endif /* defined(CONFIG_USER_ONLY) */
676 #if !defined(CONFIG_USER_ONLY)
678 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
679 typedef struct subpage_t {
682 uint16_t sub_section[TARGET_PAGE_SIZE];
685 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
687 static subpage_t *subpage_init(hwaddr base);
688 static void destroy_page_desc(uint16_t section_index)
690 MemoryRegionSection *section = &phys_sections[section_index];
691 MemoryRegion *mr = section->mr;
694 subpage_t *subpage = container_of(mr, subpage_t, iomem);
695 memory_region_destroy(&subpage->iomem);
700 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
705 if (lp->ptr == PHYS_MAP_NODE_NIL) {
709 p = phys_map_nodes[lp->ptr];
710 for (i = 0; i < L2_SIZE; ++i) {
712 destroy_l2_mapping(&p[i], level - 1);
714 destroy_page_desc(p[i].ptr);
718 lp->ptr = PHYS_MAP_NODE_NIL;
721 static void destroy_all_mappings(AddressSpaceDispatch *d)
723 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
724 phys_map_nodes_reset();
727 static uint16_t phys_section_add(MemoryRegionSection *section)
729 /* The physical section number is ORed with a page-aligned
730 * pointer to produce the iotlb entries. Thus it should
731 * never overflow into the page-aligned value.
733 assert(phys_sections_nb < TARGET_PAGE_SIZE);
735 if (phys_sections_nb == phys_sections_nb_alloc) {
736 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
737 phys_sections = g_renew(MemoryRegionSection, phys_sections,
738 phys_sections_nb_alloc);
740 phys_sections[phys_sections_nb] = *section;
741 return phys_sections_nb++;
744 static void phys_sections_clear(void)
746 phys_sections_nb = 0;
749 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
752 hwaddr base = section->offset_within_address_space
754 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
755 MemoryRegionSection subsection = {
756 .offset_within_address_space = base,
757 .size = TARGET_PAGE_SIZE,
761 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
763 if (!(existing->mr->subpage)) {
764 subpage = subpage_init(base);
765 subsection.mr = &subpage->iomem;
766 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
767 phys_section_add(&subsection));
769 subpage = container_of(existing->mr, subpage_t, iomem);
771 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
772 end = start + section->size - 1;
773 subpage_register(subpage, start, end, phys_section_add(section));
777 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
779 hwaddr start_addr = section->offset_within_address_space;
780 ram_addr_t size = section->size;
782 uint16_t section_index = phys_section_add(section);
787 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
791 QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > MAX_PHYS_ADDR_SPACE_BITS)
793 static MemoryRegionSection limit(MemoryRegionSection section)
795 section.size = MIN(section.offset_within_address_space + section.size,
797 - section.offset_within_address_space;
802 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
804 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
805 MemoryRegionSection now = limit(*section), remain = limit(*section);
807 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
808 || (now.size < TARGET_PAGE_SIZE)) {
809 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
810 - now.offset_within_address_space,
812 register_subpage(d, &now);
813 remain.size -= now.size;
814 remain.offset_within_address_space += now.size;
815 remain.offset_within_region += now.size;
817 while (remain.size >= TARGET_PAGE_SIZE) {
819 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
820 now.size = TARGET_PAGE_SIZE;
821 register_subpage(d, &now);
823 now.size &= TARGET_PAGE_MASK;
824 register_multipage(d, &now);
826 remain.size -= now.size;
827 remain.offset_within_address_space += now.size;
828 remain.offset_within_region += now.size;
832 register_subpage(d, &now);
836 void qemu_flush_coalesced_mmio_buffer(void)
839 kvm_flush_coalesced_mmio_buffer();
842 void qemu_mutex_lock_ramlist(void)
844 qemu_mutex_lock(&ram_list.mutex);
847 void qemu_mutex_unlock_ramlist(void)
849 qemu_mutex_unlock(&ram_list.mutex);
852 #if defined(__linux__) && !defined(TARGET_S390X)
856 #define HUGETLBFS_MAGIC 0x958458f6
858 static long gethugepagesize(const char *path)
864 ret = statfs(path, &fs);
865 } while (ret != 0 && errno == EINTR);
872 if (fs.f_type != HUGETLBFS_MAGIC)
873 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
878 static void *file_ram_alloc(RAMBlock *block,
883 char *sanitized_name;
890 unsigned long hpagesize;
892 hpagesize = gethugepagesize(path);
897 if (memory < hpagesize) {
901 if (kvm_enabled() && !kvm_has_sync_mmu()) {
902 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
906 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
907 sanitized_name = g_strdup(block->mr->name);
908 for (c = sanitized_name; *c != '\0'; c++) {
913 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
915 g_free(sanitized_name);
917 fd = mkstemp(filename);
919 perror("unable to create backing store for hugepages");
926 memory = (memory+hpagesize-1) & ~(hpagesize-1);
929 * ftruncate is not supported by hugetlbfs in older
930 * hosts, so don't bother bailing out on errors.
931 * If anything goes wrong with it under other filesystems,
934 if (ftruncate(fd, memory))
938 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
939 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
940 * to sidestep this quirk.
942 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
943 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
945 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
947 if (area == MAP_FAILED) {
948 perror("file_ram_alloc: can't mmap RAM pages");
957 static ram_addr_t find_ram_offset(ram_addr_t size)
959 RAMBlock *block, *next_block;
960 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
962 assert(size != 0); /* it would hand out same offset multiple times */
964 if (QTAILQ_EMPTY(&ram_list.blocks))
967 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
968 ram_addr_t end, next = RAM_ADDR_MAX;
970 end = block->offset + block->length;
972 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
973 if (next_block->offset >= end) {
974 next = MIN(next, next_block->offset);
977 if (next - end >= size && next - end < mingap) {
983 if (offset == RAM_ADDR_MAX) {
984 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
992 ram_addr_t last_ram_offset(void)
997 QTAILQ_FOREACH(block, &ram_list.blocks, next)
998 last = MAX(last, block->offset + block->length);
1003 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1006 QemuOpts *machine_opts;
1008 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1009 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1011 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
1012 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1014 perror("qemu_madvise");
1015 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1016 "but dump_guest_core=off specified\n");
1021 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1023 RAMBlock *new_block, *block;
1026 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1027 if (block->offset == addr) {
1033 assert(!new_block->idstr[0]);
1036 char *id = qdev_get_dev_path(dev);
1038 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1042 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1044 /* This assumes the iothread lock is taken here too. */
1045 qemu_mutex_lock_ramlist();
1046 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1047 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1048 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1053 qemu_mutex_unlock_ramlist();
1056 static int memory_try_enable_merging(void *addr, size_t len)
1060 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1061 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1062 /* disabled by the user */
1066 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1069 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1072 RAMBlock *block, *new_block;
1074 size = TARGET_PAGE_ALIGN(size);
1075 new_block = g_malloc0(sizeof(*new_block));
1077 /* This assumes the iothread lock is taken here too. */
1078 qemu_mutex_lock_ramlist();
1080 new_block->offset = find_ram_offset(size);
1082 new_block->host = host;
1083 new_block->flags |= RAM_PREALLOC_MASK;
1086 #if defined (__linux__) && !defined(TARGET_S390X)
1087 new_block->host = file_ram_alloc(new_block, size, mem_path);
1088 if (!new_block->host) {
1089 new_block->host = qemu_anon_ram_alloc(size);
1090 memory_try_enable_merging(new_block->host, size);
1093 fprintf(stderr, "-mem-path option unsupported\n");
1097 if (xen_enabled()) {
1098 xen_ram_alloc(new_block->offset, size, mr);
1099 } else if (kvm_enabled()) {
1100 /* some s390/kvm configurations have special constraints */
1101 new_block->host = kvm_ram_alloc(size);
1103 new_block->host = qemu_anon_ram_alloc(size);
1105 memory_try_enable_merging(new_block->host, size);
1108 new_block->length = size;
1110 /* Keep the list sorted from biggest to smallest block. */
1111 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1112 if (block->length < new_block->length) {
1117 QTAILQ_INSERT_BEFORE(block, new_block, next);
1119 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1121 ram_list.mru_block = NULL;
1124 qemu_mutex_unlock_ramlist();
1126 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1127 last_ram_offset() >> TARGET_PAGE_BITS);
1128 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1129 0, size >> TARGET_PAGE_BITS);
1130 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1132 qemu_ram_setup_dump(new_block->host, size);
1133 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1136 kvm_setup_guest_memory(new_block->host, size);
1138 return new_block->offset;
1141 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1143 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1146 void qemu_ram_free_from_ptr(ram_addr_t addr)
1150 /* This assumes the iothread lock is taken here too. */
1151 qemu_mutex_lock_ramlist();
1152 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1153 if (addr == block->offset) {
1154 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1155 ram_list.mru_block = NULL;
1161 qemu_mutex_unlock_ramlist();
1164 void qemu_ram_free(ram_addr_t addr)
1168 /* This assumes the iothread lock is taken here too. */
1169 qemu_mutex_lock_ramlist();
1170 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1171 if (addr == block->offset) {
1172 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1173 ram_list.mru_block = NULL;
1175 if (block->flags & RAM_PREALLOC_MASK) {
1177 } else if (mem_path) {
1178 #if defined (__linux__) && !defined(TARGET_S390X)
1180 munmap(block->host, block->length);
1183 qemu_anon_ram_free(block->host, block->length);
1189 if (xen_enabled()) {
1190 xen_invalidate_map_cache_entry(block->host);
1192 qemu_anon_ram_free(block->host, block->length);
1199 qemu_mutex_unlock_ramlist();
1204 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1211 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1212 offset = addr - block->offset;
1213 if (offset < block->length) {
1214 vaddr = block->host + offset;
1215 if (block->flags & RAM_PREALLOC_MASK) {
1219 munmap(vaddr, length);
1221 #if defined(__linux__) && !defined(TARGET_S390X)
1224 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1227 flags |= MAP_PRIVATE;
1229 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1230 flags, block->fd, offset);
1232 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1233 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1240 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1241 flags |= MAP_SHARED | MAP_ANONYMOUS;
1242 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1245 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1246 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1250 if (area != vaddr) {
1251 fprintf(stderr, "Could not remap addr: "
1252 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1256 memory_try_enable_merging(vaddr, length);
1257 qemu_ram_setup_dump(vaddr, length);
1263 #endif /* !_WIN32 */
1265 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1266 With the exception of the softmmu code in this file, this should
1267 only be used for local memory (e.g. video ram) that the device owns,
1268 and knows it isn't going to access beyond the end of the block.
1270 It should not be used for general purpose DMA.
1271 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1273 void *qemu_get_ram_ptr(ram_addr_t addr)
1277 /* The list is protected by the iothread lock here. */
1278 block = ram_list.mru_block;
1279 if (block && addr - block->offset < block->length) {
1282 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1283 if (addr - block->offset < block->length) {
1288 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1292 ram_list.mru_block = block;
1293 if (xen_enabled()) {
1294 /* We need to check if the requested address is in the RAM
1295 * because we don't want to map the entire memory in QEMU.
1296 * In that case just map until the end of the page.
1298 if (block->offset == 0) {
1299 return xen_map_cache(addr, 0, 0);
1300 } else if (block->host == NULL) {
1302 xen_map_cache(block->offset, block->length, 1);
1305 return block->host + (addr - block->offset);
1308 /* Return a host pointer to ram allocated with qemu_ram_alloc. Same as
1309 * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1311 * ??? Is this still necessary?
1313 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1317 /* The list is protected by the iothread lock here. */
1318 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1319 if (addr - block->offset < block->length) {
1320 if (xen_enabled()) {
1321 /* We need to check if the requested address is in the RAM
1322 * because we don't want to map the entire memory in QEMU.
1323 * In that case just map until the end of the page.
1325 if (block->offset == 0) {
1326 return xen_map_cache(addr, 0, 0);
1327 } else if (block->host == NULL) {
1329 xen_map_cache(block->offset, block->length, 1);
1332 return block->host + (addr - block->offset);
1336 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1342 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1343 * but takes a size argument */
1344 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1349 if (xen_enabled()) {
1350 return xen_map_cache(addr, *size, 1);
1354 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1355 if (addr - block->offset < block->length) {
1356 if (addr - block->offset + *size > block->length)
1357 *size = block->length - addr + block->offset;
1358 return block->host + (addr - block->offset);
1362 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1367 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1370 uint8_t *host = ptr;
1372 if (xen_enabled()) {
1373 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1377 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1378 /* This case append when the block is not mapped. */
1379 if (block->host == NULL) {
1382 if (host - block->host < block->length) {
1383 *ram_addr = block->offset + (host - block->host);
1391 /* Some of the softmmu routines need to translate from a host pointer
1392 (typically a TLB entry) back to a ram offset. */
1393 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1395 ram_addr_t ram_addr;
1397 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1398 fprintf(stderr, "Bad ram pointer %p\n", ptr);
1404 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1405 uint64_t val, unsigned size)
1408 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1409 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1410 tb_invalidate_phys_page_fast(ram_addr, size);
1411 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1415 stb_p(qemu_get_ram_ptr(ram_addr), val);
1418 stw_p(qemu_get_ram_ptr(ram_addr), val);
1421 stl_p(qemu_get_ram_ptr(ram_addr), val);
1426 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1427 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1428 /* we remove the notdirty callback only if the code has been
1430 if (dirty_flags == 0xff)
1431 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1434 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1435 unsigned size, bool is_write)
1440 static const MemoryRegionOps notdirty_mem_ops = {
1441 .write = notdirty_mem_write,
1442 .valid.accepts = notdirty_mem_accepts,
1443 .endianness = DEVICE_NATIVE_ENDIAN,
1446 /* Generate a debug exception if a watchpoint has been hit. */
1447 static void check_watchpoint(int offset, int len_mask, int flags)
1449 CPUArchState *env = cpu_single_env;
1450 target_ulong pc, cs_base;
1455 if (env->watchpoint_hit) {
1456 /* We re-entered the check after replacing the TB. Now raise
1457 * the debug interrupt so that is will trigger after the
1458 * current instruction. */
1459 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1462 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1463 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1464 if ((vaddr == (wp->vaddr & len_mask) ||
1465 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1466 wp->flags |= BP_WATCHPOINT_HIT;
1467 if (!env->watchpoint_hit) {
1468 env->watchpoint_hit = wp;
1469 tb_check_watchpoint(env);
1470 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1471 env->exception_index = EXCP_DEBUG;
1474 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1475 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1476 cpu_resume_from_signal(env, NULL);
1480 wp->flags &= ~BP_WATCHPOINT_HIT;
1485 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1486 so these check for a hit then pass through to the normal out-of-line
1488 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1491 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1493 case 1: return ldub_phys(addr);
1494 case 2: return lduw_phys(addr);
1495 case 4: return ldl_phys(addr);
1500 static void watch_mem_write(void *opaque, hwaddr addr,
1501 uint64_t val, unsigned size)
1503 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1506 stb_phys(addr, val);
1509 stw_phys(addr, val);
1512 stl_phys(addr, val);
1518 static const MemoryRegionOps watch_mem_ops = {
1519 .read = watch_mem_read,
1520 .write = watch_mem_write,
1521 .endianness = DEVICE_NATIVE_ENDIAN,
1524 static uint64_t subpage_read(void *opaque, hwaddr addr,
1527 subpage_t *mmio = opaque;
1528 unsigned int idx = SUBPAGE_IDX(addr);
1529 MemoryRegionSection *section;
1530 #if defined(DEBUG_SUBPAGE)
1531 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1532 mmio, len, addr, idx);
1535 section = &phys_sections[mmio->sub_section[idx]];
1537 addr -= section->offset_within_address_space;
1538 addr += section->offset_within_region;
1539 return io_mem_read(section->mr, addr, len);
1542 static void subpage_write(void *opaque, hwaddr addr,
1543 uint64_t value, unsigned len)
1545 subpage_t *mmio = opaque;
1546 unsigned int idx = SUBPAGE_IDX(addr);
1547 MemoryRegionSection *section;
1548 #if defined(DEBUG_SUBPAGE)
1549 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1550 " idx %d value %"PRIx64"\n",
1551 __func__, mmio, len, addr, idx, value);
1554 section = &phys_sections[mmio->sub_section[idx]];
1556 addr -= section->offset_within_address_space;
1557 addr += section->offset_within_region;
1558 io_mem_write(section->mr, addr, value, len);
1561 static const MemoryRegionOps subpage_ops = {
1562 .read = subpage_read,
1563 .write = subpage_write,
1564 .endianness = DEVICE_NATIVE_ENDIAN,
1567 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1570 ram_addr_t raddr = addr;
1571 void *ptr = qemu_get_ram_ptr(raddr);
1573 case 1: return ldub_p(ptr);
1574 case 2: return lduw_p(ptr);
1575 case 4: return ldl_p(ptr);
1580 static void subpage_ram_write(void *opaque, hwaddr addr,
1581 uint64_t value, unsigned size)
1583 ram_addr_t raddr = addr;
1584 void *ptr = qemu_get_ram_ptr(raddr);
1586 case 1: return stb_p(ptr, value);
1587 case 2: return stw_p(ptr, value);
1588 case 4: return stl_p(ptr, value);
1593 static const MemoryRegionOps subpage_ram_ops = {
1594 .read = subpage_ram_read,
1595 .write = subpage_ram_write,
1596 .endianness = DEVICE_NATIVE_ENDIAN,
1599 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1604 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1606 idx = SUBPAGE_IDX(start);
1607 eidx = SUBPAGE_IDX(end);
1608 #if defined(DEBUG_SUBPAGE)
1609 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1610 mmio, start, end, idx, eidx, memory);
1612 if (memory_region_is_ram(phys_sections[section].mr)) {
1613 MemoryRegionSection new_section = phys_sections[section];
1614 new_section.mr = &io_mem_subpage_ram;
1615 section = phys_section_add(&new_section);
1617 for (; idx <= eidx; idx++) {
1618 mmio->sub_section[idx] = section;
1624 static subpage_t *subpage_init(hwaddr base)
1628 mmio = g_malloc0(sizeof(subpage_t));
1631 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1632 "subpage", TARGET_PAGE_SIZE);
1633 mmio->iomem.subpage = true;
1634 #if defined(DEBUG_SUBPAGE)
1635 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1636 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1638 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1643 static uint16_t dummy_section(MemoryRegion *mr)
1645 MemoryRegionSection section = {
1647 .offset_within_address_space = 0,
1648 .offset_within_region = 0,
1652 return phys_section_add(§ion);
1655 MemoryRegion *iotlb_to_region(hwaddr index)
1657 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1660 static void io_mem_init(void)
1662 memory_region_init_io(&io_mem_rom, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1663 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1664 "unassigned", UINT64_MAX);
1665 memory_region_init_io(&io_mem_notdirty, ¬dirty_mem_ops, NULL,
1666 "notdirty", UINT64_MAX);
1667 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1668 "subpage-ram", UINT64_MAX);
1669 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1670 "watch", UINT64_MAX);
1673 static void mem_begin(MemoryListener *listener)
1675 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1677 destroy_all_mappings(d);
1678 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1681 static void core_begin(MemoryListener *listener)
1683 phys_sections_clear();
1684 phys_section_unassigned = dummy_section(&io_mem_unassigned);
1685 phys_section_notdirty = dummy_section(&io_mem_notdirty);
1686 phys_section_rom = dummy_section(&io_mem_rom);
1687 phys_section_watch = dummy_section(&io_mem_watch);
1690 static void tcg_commit(MemoryListener *listener)
1694 /* since each CPU stores ram addresses in its TLB cache, we must
1695 reset the modified entries */
1697 for(env = first_cpu; env != NULL; env = env->next_cpu) {
1702 static void core_log_global_start(MemoryListener *listener)
1704 cpu_physical_memory_set_dirty_tracking(1);
1707 static void core_log_global_stop(MemoryListener *listener)
1709 cpu_physical_memory_set_dirty_tracking(0);
1712 static void io_region_add(MemoryListener *listener,
1713 MemoryRegionSection *section)
1715 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1717 mrio->mr = section->mr;
1718 mrio->offset = section->offset_within_region;
1719 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1720 section->offset_within_address_space, section->size);
1721 ioport_register(&mrio->iorange);
1724 static void io_region_del(MemoryListener *listener,
1725 MemoryRegionSection *section)
1727 isa_unassign_ioport(section->offset_within_address_space, section->size);
1730 static MemoryListener core_memory_listener = {
1731 .begin = core_begin,
1732 .log_global_start = core_log_global_start,
1733 .log_global_stop = core_log_global_stop,
1737 static MemoryListener io_memory_listener = {
1738 .region_add = io_region_add,
1739 .region_del = io_region_del,
1743 static MemoryListener tcg_memory_listener = {
1744 .commit = tcg_commit,
1747 void address_space_init_dispatch(AddressSpace *as)
1749 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1751 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1752 d->listener = (MemoryListener) {
1754 .region_add = mem_add,
1755 .region_nop = mem_add,
1759 memory_listener_register(&d->listener, as);
1762 void address_space_destroy_dispatch(AddressSpace *as)
1764 AddressSpaceDispatch *d = as->dispatch;
1766 memory_listener_unregister(&d->listener);
1767 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1769 as->dispatch = NULL;
1772 static void memory_map_init(void)
1774 system_memory = g_malloc(sizeof(*system_memory));
1775 memory_region_init(system_memory, "system", INT64_MAX);
1776 address_space_init(&address_space_memory, system_memory);
1777 address_space_memory.name = "memory";
1779 system_io = g_malloc(sizeof(*system_io));
1780 memory_region_init(system_io, "io", 65536);
1781 address_space_init(&address_space_io, system_io);
1782 address_space_io.name = "I/O";
1784 memory_listener_register(&core_memory_listener, &address_space_memory);
1785 memory_listener_register(&io_memory_listener, &address_space_io);
1786 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1788 dma_context_init(&dma_context_memory, &address_space_memory,
1792 MemoryRegion *get_system_memory(void)
1794 return system_memory;
1797 MemoryRegion *get_system_io(void)
1802 #endif /* !defined(CONFIG_USER_ONLY) */
1804 /* physical memory access (slow version, mainly for debug) */
1805 #if defined(CONFIG_USER_ONLY)
1806 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1807 uint8_t *buf, int len, int is_write)
1814 page = addr & TARGET_PAGE_MASK;
1815 l = (page + TARGET_PAGE_SIZE) - addr;
1818 flags = page_get_flags(page);
1819 if (!(flags & PAGE_VALID))
1822 if (!(flags & PAGE_WRITE))
1824 /* XXX: this code should not depend on lock_user */
1825 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1828 unlock_user(p, addr, l);
1830 if (!(flags & PAGE_READ))
1832 /* XXX: this code should not depend on lock_user */
1833 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1836 unlock_user(p, addr, 0);
1847 static void invalidate_and_set_dirty(hwaddr addr,
1850 if (!cpu_physical_memory_is_dirty(addr)) {
1851 /* invalidate code */
1852 tb_invalidate_phys_page_range(addr, addr + length, 0);
1854 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1856 xen_modified_memory(addr, length);
1859 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1861 if (memory_region_is_ram(mr)) {
1862 return !(is_write && mr->readonly);
1864 if (memory_region_is_romd(mr)) {
1871 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1872 int len, bool is_write)
1878 MemoryRegionSection *section;
1882 section = address_space_translate(as, addr, &addr1, &l, is_write);
1885 if (!memory_access_is_direct(section->mr, is_write)) {
1886 /* XXX: could force cpu_single_env to NULL to avoid
1888 if (l >= 4 && ((addr1 & 3) == 0)) {
1889 /* 32 bit write access */
1891 io_mem_write(section->mr, addr1, val, 4);
1893 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1894 /* 16 bit write access */
1896 io_mem_write(section->mr, addr1, val, 2);
1899 /* 8 bit write access */
1901 io_mem_write(section->mr, addr1, val, 1);
1905 addr1 += memory_region_get_ram_addr(section->mr);
1907 ptr = qemu_get_ram_ptr(addr1);
1908 memcpy(ptr, buf, l);
1909 invalidate_and_set_dirty(addr1, l);
1912 if (!memory_access_is_direct(section->mr, is_write)) {
1914 if (l >= 4 && ((addr1 & 3) == 0)) {
1915 /* 32 bit read access */
1916 val = io_mem_read(section->mr, addr1, 4);
1919 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1920 /* 16 bit read access */
1921 val = io_mem_read(section->mr, addr1, 2);
1925 /* 8 bit read access */
1926 val = io_mem_read(section->mr, addr1, 1);
1932 ptr = qemu_get_ram_ptr(section->mr->ram_addr + addr1);
1933 memcpy(buf, ptr, l);
1942 void address_space_write(AddressSpace *as, hwaddr addr,
1943 const uint8_t *buf, int len)
1945 address_space_rw(as, addr, (uint8_t *)buf, len, true);
1949 * address_space_read: read from an address space.
1951 * @as: #AddressSpace to be accessed
1952 * @addr: address within that address space
1953 * @buf: buffer with the data transferred
1955 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1957 address_space_rw(as, addr, buf, len, false);
1961 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1962 int len, int is_write)
1964 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1967 /* used for ROM loading : can write in RAM and ROM */
1968 void cpu_physical_memory_write_rom(hwaddr addr,
1969 const uint8_t *buf, int len)
1974 MemoryRegionSection *section;
1978 section = address_space_translate(&address_space_memory,
1979 addr, &addr1, &l, true);
1981 if (!(memory_region_is_ram(section->mr) ||
1982 memory_region_is_romd(section->mr))) {
1985 addr1 += memory_region_get_ram_addr(section->mr);
1987 ptr = qemu_get_ram_ptr(addr1);
1988 memcpy(ptr, buf, l);
1989 invalidate_and_set_dirty(addr1, l);
2003 static BounceBuffer bounce;
2005 typedef struct MapClient {
2007 void (*callback)(void *opaque);
2008 QLIST_ENTRY(MapClient) link;
2011 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2012 = QLIST_HEAD_INITIALIZER(map_client_list);
2014 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2016 MapClient *client = g_malloc(sizeof(*client));
2018 client->opaque = opaque;
2019 client->callback = callback;
2020 QLIST_INSERT_HEAD(&map_client_list, client, link);
2024 static void cpu_unregister_map_client(void *_client)
2026 MapClient *client = (MapClient *)_client;
2028 QLIST_REMOVE(client, link);
2032 static void cpu_notify_map_clients(void)
2036 while (!QLIST_EMPTY(&map_client_list)) {
2037 client = QLIST_FIRST(&map_client_list);
2038 client->callback(client->opaque);
2039 cpu_unregister_map_client(client);
2043 /* Map a physical memory region into a host virtual address.
2044 * May map a subset of the requested range, given by and returned in *plen.
2045 * May return NULL if resources needed to perform the mapping are exhausted.
2046 * Use only for reads OR writes - not for read-modify-write operations.
2047 * Use cpu_register_map_client() to know when retrying the map operation is
2048 * likely to succeed.
2050 void *address_space_map(AddressSpace *as,
2058 MemoryRegionSection *section;
2059 ram_addr_t raddr = RAM_ADDR_MAX;
2065 section = address_space_translate(as, addr, &xlat, &l, is_write);
2067 if (!memory_access_is_direct(section->mr, is_write)) {
2068 if (todo || bounce.buffer) {
2071 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2075 address_space_read(as, addr, bounce.buffer, l);
2079 return bounce.buffer;
2082 raddr = memory_region_get_ram_addr(section->mr) + xlat;
2084 if (memory_region_get_ram_addr(section->mr) + xlat != raddr + todo) {
2094 ret = qemu_ram_ptr_length(raddr, &rlen);
2099 /* Unmaps a memory region previously mapped by address_space_map().
2100 * Will also mark the memory as dirty if is_write == 1. access_len gives
2101 * the amount of memory that was actually read or written by the caller.
2103 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2104 int is_write, hwaddr access_len)
2106 if (buffer != bounce.buffer) {
2108 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2109 while (access_len) {
2111 l = TARGET_PAGE_SIZE;
2114 invalidate_and_set_dirty(addr1, l);
2119 if (xen_enabled()) {
2120 xen_invalidate_map_cache_entry(buffer);
2125 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2127 qemu_vfree(bounce.buffer);
2128 bounce.buffer = NULL;
2129 cpu_notify_map_clients();
2132 void *cpu_physical_memory_map(hwaddr addr,
2136 return address_space_map(&address_space_memory, addr, plen, is_write);
2139 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2140 int is_write, hwaddr access_len)
2142 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2145 /* warning: addr must be aligned */
2146 static inline uint32_t ldl_phys_internal(hwaddr addr,
2147 enum device_endian endian)
2151 MemoryRegionSection *section;
2155 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2157 if (l < 4 || !memory_access_is_direct(section->mr, false)) {
2159 val = io_mem_read(section->mr, addr1, 4);
2160 #if defined(TARGET_WORDS_BIGENDIAN)
2161 if (endian == DEVICE_LITTLE_ENDIAN) {
2165 if (endian == DEVICE_BIG_ENDIAN) {
2171 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2175 case DEVICE_LITTLE_ENDIAN:
2176 val = ldl_le_p(ptr);
2178 case DEVICE_BIG_ENDIAN:
2179 val = ldl_be_p(ptr);
2189 uint32_t ldl_phys(hwaddr addr)
2191 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2194 uint32_t ldl_le_phys(hwaddr addr)
2196 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2199 uint32_t ldl_be_phys(hwaddr addr)
2201 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2204 /* warning: addr must be aligned */
2205 static inline uint64_t ldq_phys_internal(hwaddr addr,
2206 enum device_endian endian)
2210 MemoryRegionSection *section;
2214 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2216 if (l < 8 || !memory_access_is_direct(section->mr, false)) {
2219 /* XXX This is broken when device endian != cpu endian.
2220 Fix and add "endian" variable check */
2221 #ifdef TARGET_WORDS_BIGENDIAN
2222 val = io_mem_read(section->mr, addr1, 4) << 32;
2223 val |= io_mem_read(section->mr, addr1 + 4, 4);
2225 val = io_mem_read(section->mr, addr1, 4);
2226 val |= io_mem_read(section->mr, addr1 + 4, 4) << 32;
2230 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2234 case DEVICE_LITTLE_ENDIAN:
2235 val = ldq_le_p(ptr);
2237 case DEVICE_BIG_ENDIAN:
2238 val = ldq_be_p(ptr);
2248 uint64_t ldq_phys(hwaddr addr)
2250 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2253 uint64_t ldq_le_phys(hwaddr addr)
2255 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2258 uint64_t ldq_be_phys(hwaddr addr)
2260 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2264 uint32_t ldub_phys(hwaddr addr)
2267 cpu_physical_memory_read(addr, &val, 1);
2271 /* warning: addr must be aligned */
2272 static inline uint32_t lduw_phys_internal(hwaddr addr,
2273 enum device_endian endian)
2277 MemoryRegionSection *section;
2281 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2283 if (l < 2 || !memory_access_is_direct(section->mr, false)) {
2285 val = io_mem_read(section->mr, addr1, 2);
2286 #if defined(TARGET_WORDS_BIGENDIAN)
2287 if (endian == DEVICE_LITTLE_ENDIAN) {
2291 if (endian == DEVICE_BIG_ENDIAN) {
2297 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2301 case DEVICE_LITTLE_ENDIAN:
2302 val = lduw_le_p(ptr);
2304 case DEVICE_BIG_ENDIAN:
2305 val = lduw_be_p(ptr);
2315 uint32_t lduw_phys(hwaddr addr)
2317 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2320 uint32_t lduw_le_phys(hwaddr addr)
2322 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2325 uint32_t lduw_be_phys(hwaddr addr)
2327 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2330 /* warning: addr must be aligned. The ram page is not masked as dirty
2331 and the code inside is not invalidated. It is useful if the dirty
2332 bits are used to track modified PTEs */
2333 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2336 MemoryRegionSection *section;
2340 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2342 if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2343 io_mem_write(section->mr, addr1, val, 4);
2345 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2346 ptr = qemu_get_ram_ptr(addr1);
2349 if (unlikely(in_migration)) {
2350 if (!cpu_physical_memory_is_dirty(addr1)) {
2351 /* invalidate code */
2352 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2354 cpu_physical_memory_set_dirty_flags(
2355 addr1, (0xff & ~CODE_DIRTY_FLAG));
2361 /* warning: addr must be aligned */
2362 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2363 enum device_endian endian)
2366 MemoryRegionSection *section;
2370 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2372 if (l < 4 || !memory_access_is_direct(section->mr, true)) {
2373 #if defined(TARGET_WORDS_BIGENDIAN)
2374 if (endian == DEVICE_LITTLE_ENDIAN) {
2378 if (endian == DEVICE_BIG_ENDIAN) {
2382 io_mem_write(section->mr, addr1, val, 4);
2385 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2386 ptr = qemu_get_ram_ptr(addr1);
2388 case DEVICE_LITTLE_ENDIAN:
2391 case DEVICE_BIG_ENDIAN:
2398 invalidate_and_set_dirty(addr1, 4);
2402 void stl_phys(hwaddr addr, uint32_t val)
2404 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2407 void stl_le_phys(hwaddr addr, uint32_t val)
2409 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2412 void stl_be_phys(hwaddr addr, uint32_t val)
2414 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2418 void stb_phys(hwaddr addr, uint32_t val)
2421 cpu_physical_memory_write(addr, &v, 1);
2424 /* warning: addr must be aligned */
2425 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2426 enum device_endian endian)
2429 MemoryRegionSection *section;
2433 section = address_space_translate(&address_space_memory, addr, &addr1, &l,
2435 if (l < 2 || !memory_access_is_direct(section->mr, true)) {
2436 #if defined(TARGET_WORDS_BIGENDIAN)
2437 if (endian == DEVICE_LITTLE_ENDIAN) {
2441 if (endian == DEVICE_BIG_ENDIAN) {
2445 io_mem_write(section->mr, addr1, val, 2);
2448 addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
2449 ptr = qemu_get_ram_ptr(addr1);
2451 case DEVICE_LITTLE_ENDIAN:
2454 case DEVICE_BIG_ENDIAN:
2461 invalidate_and_set_dirty(addr1, 2);
2465 void stw_phys(hwaddr addr, uint32_t val)
2467 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2470 void stw_le_phys(hwaddr addr, uint32_t val)
2472 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2475 void stw_be_phys(hwaddr addr, uint32_t val)
2477 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2481 void stq_phys(hwaddr addr, uint64_t val)
2484 cpu_physical_memory_write(addr, &val, 8);
2487 void stq_le_phys(hwaddr addr, uint64_t val)
2489 val = cpu_to_le64(val);
2490 cpu_physical_memory_write(addr, &val, 8);
2493 void stq_be_phys(hwaddr addr, uint64_t val)
2495 val = cpu_to_be64(val);
2496 cpu_physical_memory_write(addr, &val, 8);
2499 /* virtual memory access for debug (includes writing to ROM) */
2500 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2501 uint8_t *buf, int len, int is_write)
2508 page = addr & TARGET_PAGE_MASK;
2509 phys_addr = cpu_get_phys_page_debug(env, page);
2510 /* if no physical page mapped, return an error */
2511 if (phys_addr == -1)
2513 l = (page + TARGET_PAGE_SIZE) - addr;
2516 phys_addr += (addr & ~TARGET_PAGE_MASK);
2518 cpu_physical_memory_write_rom(phys_addr, buf, l);
2520 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2529 #if !defined(CONFIG_USER_ONLY)
2532 * A helper function for the _utterly broken_ virtio device model to find out if
2533 * it's running on a big endian machine. Don't do this at home kids!
2535 bool virtio_is_big_endian(void);
2536 bool virtio_is_big_endian(void)
2538 #if defined(TARGET_WORDS_BIGENDIAN)
2547 #ifndef CONFIG_USER_ONLY
2548 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2550 MemoryRegionSection *section;
2553 section = address_space_translate(&address_space_memory,
2554 phys_addr, &phys_addr, &l, false);
2556 return !(memory_region_is_ram(section->mr) ||
2557 memory_region_is_romd(section->mr));