4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include <sys/types.h>
27 #include "qemu-common.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/hax.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "exec/memory.h"
40 #include "sysemu/dma.h"
41 #include "exec/address-spaces.h"
42 #if defined(CONFIG_USER_ONLY)
44 #else /* !CONFIG_USER_ONLY */
45 #include "sysemu/xen-mapcache.h"
48 #include "exec/cpu-all.h"
50 #include "exec/cputlb.h"
51 #include "translate-all.h"
53 #include "exec/memory-internal.h"
55 //#define DEBUG_SUBPAGE
57 #if !defined(CONFIG_USER_ONLY)
58 static int in_migration;
60 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
62 static MemoryRegion *system_memory;
63 static MemoryRegion *system_io;
65 AddressSpace address_space_io;
66 AddressSpace address_space_memory;
68 MemoryRegion io_mem_rom, io_mem_notdirty;
69 static MemoryRegion io_mem_unassigned;
73 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
74 /* current CPU in the current thread. It is only valid inside
76 DEFINE_TLS(CPUState *, current_cpu);
77 /* 0 = Do not count executed instructions.
78 1 = Precise instruction counting.
79 2 = Adaptive rate instruction counting. */
82 #if !defined(CONFIG_USER_ONLY)
84 typedef struct PhysPageEntry PhysPageEntry;
86 struct PhysPageEntry {
88 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
92 typedef PhysPageEntry Node[L2_SIZE];
94 struct AddressSpaceDispatch {
95 /* This is a multi-level map on the physical address space.
96 * The bottom level has pointers to MemoryRegionSections.
98 PhysPageEntry phys_map;
100 MemoryRegionSection *sections;
104 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
105 typedef struct subpage_t {
109 uint16_t sub_section[TARGET_PAGE_SIZE];
112 #define PHYS_SECTION_UNASSIGNED 0
113 #define PHYS_SECTION_NOTDIRTY 1
114 #define PHYS_SECTION_ROM 2
115 #define PHYS_SECTION_WATCH 3
117 typedef struct PhysPageMap {
118 unsigned sections_nb;
119 unsigned sections_nb_alloc;
121 unsigned nodes_nb_alloc;
123 MemoryRegionSection *sections;
126 static PhysPageMap *prev_map;
127 static PhysPageMap next_map;
129 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
131 static void io_mem_init(void);
132 static void memory_map_init(void);
134 static MemoryRegion io_mem_watch;
137 #if !defined(CONFIG_USER_ONLY)
139 static void phys_map_node_reserve(unsigned nodes)
141 if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) {
142 next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2,
144 next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc,
145 next_map.nodes_nb + nodes);
146 next_map.nodes = g_renew(Node, next_map.nodes,
147 next_map.nodes_nb_alloc);
151 static uint16_t phys_map_node_alloc(void)
156 ret = next_map.nodes_nb++;
157 assert(ret != PHYS_MAP_NODE_NIL);
158 assert(ret != next_map.nodes_nb_alloc);
159 for (i = 0; i < L2_SIZE; ++i) {
160 next_map.nodes[ret][i].is_leaf = 0;
161 next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
166 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
167 hwaddr *nb, uint16_t leaf,
172 hwaddr step = (hwaddr)1 << (level * L2_BITS);
174 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
175 lp->ptr = phys_map_node_alloc();
176 p = next_map.nodes[lp->ptr];
178 for (i = 0; i < L2_SIZE; i++) {
180 p[i].ptr = PHYS_SECTION_UNASSIGNED;
184 p = next_map.nodes[lp->ptr];
186 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
188 while (*nb && lp < &p[L2_SIZE]) {
189 if ((*index & (step - 1)) == 0 && *nb >= step) {
195 phys_page_set_level(lp, index, nb, leaf, level - 1);
201 static void phys_page_set(AddressSpaceDispatch *d,
202 hwaddr index, hwaddr nb,
205 /* Wildly overreserve - it doesn't matter much. */
206 phys_map_node_reserve(3 * P_L2_LEVELS);
208 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
211 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr index,
212 Node *nodes, MemoryRegionSection *sections)
217 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
218 if (lp.ptr == PHYS_MAP_NODE_NIL) {
219 return §ions[PHYS_SECTION_UNASSIGNED];
222 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
224 return §ions[lp.ptr];
227 bool memory_region_is_unassigned(MemoryRegion *mr)
229 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
230 && mr != &io_mem_watch;
233 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
235 bool resolve_subpage)
237 MemoryRegionSection *section;
240 section = phys_page_find(d->phys_map, addr >> TARGET_PAGE_BITS,
241 d->nodes, d->sections);
242 if (resolve_subpage && section->mr->subpage) {
243 subpage = container_of(section->mr, subpage_t, iomem);
244 section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
249 static MemoryRegionSection *
250 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
251 hwaddr *plen, bool resolve_subpage)
253 MemoryRegionSection *section;
256 section = address_space_lookup_region(d, addr, resolve_subpage);
257 /* Compute offset within MemoryRegionSection */
258 addr -= section->offset_within_address_space;
260 /* Compute offset within MemoryRegion */
261 *xlat = addr + section->offset_within_region;
263 diff = int128_sub(section->mr->size, int128_make64(addr));
264 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
268 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
269 hwaddr *xlat, hwaddr *plen,
273 MemoryRegionSection *section;
278 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
281 if (!mr->iommu_ops) {
285 iotlb = mr->iommu_ops->translate(mr, addr);
286 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
287 | (addr & iotlb.addr_mask));
288 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
289 if (!(iotlb.perm & (1 << is_write))) {
290 mr = &io_mem_unassigned;
294 as = iotlb.target_as;
302 MemoryRegionSection *
303 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
306 MemoryRegionSection *section;
307 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
309 assert(!section->mr->iommu_ops);
314 void cpu_exec_init_all(void)
316 #if !defined(CONFIG_USER_ONLY)
317 qemu_mutex_init(&ram_list.mutex);
323 #if !defined(CONFIG_USER_ONLY)
325 static int cpu_common_post_load(void *opaque, int version_id)
327 CPUState *cpu = opaque;
329 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
330 version_id is increased. */
331 cpu->interrupt_request &= ~0x01;
332 tlb_flush(cpu->env_ptr, 1);
337 const VMStateDescription vmstate_cpu_common = {
338 .name = "cpu_common",
340 .minimum_version_id = 1,
341 .minimum_version_id_old = 1,
342 .post_load = cpu_common_post_load,
343 .fields = (VMStateField []) {
344 VMSTATE_UINT32(halted, CPUState),
345 VMSTATE_UINT32(interrupt_request, CPUState),
346 VMSTATE_END_OF_LIST()
352 CPUState *qemu_get_cpu(int index)
357 if (cpu->cpu_index == index) {
365 void cpu_exec_init(CPUArchState *env)
367 CPUState *cpu = ENV_GET_CPU(env);
368 CPUClass *cc = CPU_GET_CLASS(cpu);
372 #if defined(CONFIG_USER_ONLY)
376 CPU_FOREACH(some_cpu) {
379 cpu->cpu_index = cpu_index;
381 QTAILQ_INIT(&env->breakpoints);
382 QTAILQ_INIT(&env->watchpoints);
383 #ifndef CONFIG_USER_ONLY
384 cpu->thread_id = qemu_get_thread_id();
386 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
387 #if defined(CONFIG_USER_ONLY)
390 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
391 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
393 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
394 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
395 cpu_save, cpu_load, env);
396 assert(cc->vmsd == NULL);
397 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
399 if (cc->vmsd != NULL) {
400 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
404 #if defined(TARGET_HAS_ICE)
405 #if defined(CONFIG_USER_ONLY)
406 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
408 tb_invalidate_phys_page_range(pc, pc + 1, 0);
411 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
413 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
415 tb_invalidate_phys_addr(phys | (pc & ~TARGET_PAGE_MASK));
419 #endif /* TARGET_HAS_ICE */
421 #if defined(CONFIG_USER_ONLY)
422 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
427 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
428 int flags, CPUWatchpoint **watchpoint)
433 /* Add a watchpoint. */
434 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
435 int flags, CPUWatchpoint **watchpoint)
437 target_ulong len_mask = ~(len - 1);
440 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
441 if ((len & (len - 1)) || (addr & ~len_mask) ||
442 len == 0 || len > TARGET_PAGE_SIZE) {
443 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
444 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
447 wp = g_malloc(sizeof(*wp));
450 wp->len_mask = len_mask;
453 /* keep all GDB-injected watchpoints in front */
455 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
457 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
459 tlb_flush_page(env, addr);
466 /* Remove a specific watchpoint. */
467 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
470 target_ulong len_mask = ~(len - 1);
473 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
474 if (addr == wp->vaddr && len_mask == wp->len_mask
475 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
476 cpu_watchpoint_remove_by_ref(env, wp);
483 /* Remove a specific watchpoint by reference. */
484 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
486 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
488 tlb_flush_page(env, watchpoint->vaddr);
493 /* Remove all matching watchpoints. */
494 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
496 CPUWatchpoint *wp, *next;
498 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
499 if (wp->flags & mask)
500 cpu_watchpoint_remove_by_ref(env, wp);
505 /* Add a breakpoint. */
506 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
507 CPUBreakpoint **breakpoint)
509 #if defined(TARGET_HAS_ICE)
512 bp = g_malloc(sizeof(*bp));
517 /* keep all GDB-injected breakpoints in front */
518 if (flags & BP_GDB) {
519 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
521 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
524 breakpoint_invalidate(ENV_GET_CPU(env), pc);
535 /* Remove a specific breakpoint. */
536 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
538 #if defined(TARGET_HAS_ICE)
541 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
542 if (bp->pc == pc && bp->flags == flags) {
543 cpu_breakpoint_remove_by_ref(env, bp);
553 /* Remove a specific breakpoint by reference. */
554 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
556 #if defined(TARGET_HAS_ICE)
557 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
559 breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
565 /* Remove all matching breakpoints. */
566 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
568 #if defined(TARGET_HAS_ICE)
569 CPUBreakpoint *bp, *next;
571 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
572 if (bp->flags & mask)
573 cpu_breakpoint_remove_by_ref(env, bp);
578 /* enable or disable single step mode. EXCP_DEBUG is returned by the
579 CPU loop after each instruction */
580 void cpu_single_step(CPUState *cpu, int enabled)
582 #if defined(TARGET_HAS_ICE)
583 if (cpu->singlestep_enabled != enabled) {
584 cpu->singlestep_enabled = enabled;
586 kvm_update_guest_debug(cpu, 0);
588 /* must flush all the translated code to avoid inconsistencies */
589 /* XXX: only flush what is necessary */
590 CPUArchState *env = cpu->env_ptr;
597 void cpu_abort(CPUArchState *env, const char *fmt, ...)
599 CPUState *cpu = ENV_GET_CPU(env);
605 fprintf(stderr, "qemu: fatal: ");
606 vfprintf(stderr, fmt, ap);
607 fprintf(stderr, "\n");
608 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
609 if (qemu_log_enabled()) {
610 qemu_log("qemu: fatal: ");
611 qemu_log_vprintf(fmt, ap2);
613 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
619 #if defined(CONFIG_USER_ONLY)
621 struct sigaction act;
622 sigfillset(&act.sa_mask);
623 act.sa_handler = SIG_DFL;
624 sigaction(SIGABRT, &act, NULL);
630 #if !defined(CONFIG_USER_ONLY)
631 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
635 /* The list is protected by the iothread lock here. */
636 block = ram_list.mru_block;
637 if (block && addr - block->offset < block->length) {
640 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
641 if (addr - block->offset < block->length) {
646 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
650 ram_list.mru_block = block;
654 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
660 block = qemu_get_ram_block(start);
661 assert(block == qemu_get_ram_block(end - 1));
662 start1 = (uintptr_t)block->host + (start - block->offset);
663 cpu_tlb_reset_dirty_all(start1, length);
666 /* Note: start and end must be within the same ram block. */
667 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
672 start &= TARGET_PAGE_MASK;
673 end = TARGET_PAGE_ALIGN(end);
675 length = end - start;
678 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
681 tlb_reset_dirty_range_all(start, end, length);
685 static int cpu_physical_memory_set_dirty_tracking(int enable)
688 in_migration = enable;
692 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
693 MemoryRegionSection *section,
695 hwaddr paddr, hwaddr xlat,
697 target_ulong *address)
702 if (memory_region_is_ram(section->mr)) {
704 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
706 if (!section->readonly) {
707 iotlb |= PHYS_SECTION_NOTDIRTY;
709 iotlb |= PHYS_SECTION_ROM;
712 iotlb = section - address_space_memory.dispatch->sections;
716 /* Make accesses to pages with watchpoints go via the
717 watchpoint trap routines. */
718 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
719 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
720 /* Avoid trapping reads of pages with a write breakpoint. */
721 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
722 iotlb = PHYS_SECTION_WATCH + paddr;
723 *address |= TLB_MMIO;
731 #endif /* defined(CONFIG_USER_ONLY) */
733 #if !defined(CONFIG_USER_ONLY)
735 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
737 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
739 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
742 * Set a custom physical guest memory alloator.
743 * Accelerators with unusual needs may need this. Hopefully, we can
744 * get rid of it eventually.
746 void phys_mem_set_alloc(void *(*alloc)(size_t))
748 phys_mem_alloc = alloc;
751 static uint16_t phys_section_add(MemoryRegionSection *section)
753 /* The physical section number is ORed with a page-aligned
754 * pointer to produce the iotlb entries. Thus it should
755 * never overflow into the page-aligned value.
757 assert(next_map.sections_nb < TARGET_PAGE_SIZE);
759 if (next_map.sections_nb == next_map.sections_nb_alloc) {
760 next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
762 next_map.sections = g_renew(MemoryRegionSection, next_map.sections,
763 next_map.sections_nb_alloc);
765 next_map.sections[next_map.sections_nb] = *section;
766 memory_region_ref(section->mr);
767 return next_map.sections_nb++;
770 static void phys_section_destroy(MemoryRegion *mr)
772 memory_region_unref(mr);
775 subpage_t *subpage = container_of(mr, subpage_t, iomem);
776 memory_region_destroy(&subpage->iomem);
781 static void phys_sections_free(PhysPageMap *map)
783 while (map->sections_nb > 0) {
784 MemoryRegionSection *section = &map->sections[--map->sections_nb];
785 phys_section_destroy(section->mr);
787 g_free(map->sections);
792 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
795 hwaddr base = section->offset_within_address_space
797 MemoryRegionSection *existing = phys_page_find(d->phys_map, base >> TARGET_PAGE_BITS,
798 next_map.nodes, next_map.sections);
799 MemoryRegionSection subsection = {
800 .offset_within_address_space = base,
801 .size = int128_make64(TARGET_PAGE_SIZE),
805 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
807 if (!(existing->mr->subpage)) {
808 subpage = subpage_init(d->as, base);
809 subsection.mr = &subpage->iomem;
810 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
811 phys_section_add(&subsection));
813 subpage = container_of(existing->mr, subpage_t, iomem);
815 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
816 end = start + int128_get64(section->size) - 1;
817 subpage_register(subpage, start, end, phys_section_add(section));
821 static void register_multipage(AddressSpaceDispatch *d,
822 MemoryRegionSection *section)
824 hwaddr start_addr = section->offset_within_address_space;
825 uint16_t section_index = phys_section_add(section);
826 uint64_t num_pages = int128_get64(int128_rshift(section->size,
830 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
833 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
835 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
836 AddressSpaceDispatch *d = as->next_dispatch;
837 MemoryRegionSection now = *section, remain = *section;
838 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
840 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
841 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
842 - now.offset_within_address_space;
844 now.size = int128_min(int128_make64(left), now.size);
845 register_subpage(d, &now);
847 now.size = int128_zero();
849 while (int128_ne(remain.size, now.size)) {
850 remain.size = int128_sub(remain.size, now.size);
851 remain.offset_within_address_space += int128_get64(now.size);
852 remain.offset_within_region += int128_get64(now.size);
854 if (int128_lt(remain.size, page_size)) {
855 register_subpage(d, &now);
856 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
857 now.size = page_size;
858 register_subpage(d, &now);
860 now.size = int128_and(now.size, int128_neg(page_size));
861 register_multipage(d, &now);
866 void qemu_flush_coalesced_mmio_buffer(void)
869 kvm_flush_coalesced_mmio_buffer();
872 void qemu_mutex_lock_ramlist(void)
874 qemu_mutex_lock(&ram_list.mutex);
877 void qemu_mutex_unlock_ramlist(void)
879 qemu_mutex_unlock(&ram_list.mutex);
886 #define HUGETLBFS_MAGIC 0x958458f6
888 static long gethugepagesize(const char *path)
894 ret = statfs(path, &fs);
895 } while (ret != 0 && errno == EINTR);
902 if (fs.f_type != HUGETLBFS_MAGIC)
903 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
908 static void *file_ram_alloc(RAMBlock *block,
913 char *sanitized_name;
920 unsigned long hpagesize;
922 hpagesize = gethugepagesize(path);
927 if (memory < hpagesize) {
931 if (kvm_enabled() && !kvm_has_sync_mmu()) {
932 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
936 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
937 sanitized_name = g_strdup(block->mr->name);
938 for (c = sanitized_name; *c != '\0'; c++) {
943 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
945 g_free(sanitized_name);
947 fd = mkstemp(filename);
949 perror("unable to create backing store for hugepages");
956 memory = (memory+hpagesize-1) & ~(hpagesize-1);
959 * ftruncate is not supported by hugetlbfs in older
960 * hosts, so don't bother bailing out on errors.
961 * If anything goes wrong with it under other filesystems,
964 if (ftruncate(fd, memory))
968 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
969 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
970 * to sidestep this quirk.
972 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
973 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
975 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
977 if (area == MAP_FAILED) {
978 perror("file_ram_alloc: can't mmap RAM pages");
986 static void *file_ram_alloc(RAMBlock *block,
990 fprintf(stderr, "-mem-path not supported on this host\n");
995 static ram_addr_t find_ram_offset(ram_addr_t size)
997 RAMBlock *block, *next_block;
998 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1000 assert(size != 0); /* it would hand out same offset multiple times */
1002 if (QTAILQ_EMPTY(&ram_list.blocks))
1005 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1006 ram_addr_t end, next = RAM_ADDR_MAX;
1008 end = block->offset + block->length;
1010 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1011 if (next_block->offset >= end) {
1012 next = MIN(next, next_block->offset);
1015 if (next - end >= size && next - end < mingap) {
1017 mingap = next - end;
1021 if (offset == RAM_ADDR_MAX) {
1022 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1030 ram_addr_t last_ram_offset(void)
1033 ram_addr_t last = 0;
1035 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1036 last = MAX(last, block->offset + block->length);
1041 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1045 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1046 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1047 "dump-guest-core", true)) {
1048 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1050 perror("qemu_madvise");
1051 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1052 "but dump_guest_core=off specified\n");
1057 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1059 RAMBlock *new_block, *block;
1062 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1063 if (block->offset == addr) {
1069 assert(!new_block->idstr[0]);
1072 char *id = qdev_get_dev_path(dev);
1074 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1078 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1080 /* This assumes the iothread lock is taken here too. */
1081 qemu_mutex_lock_ramlist();
1082 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1083 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1084 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1089 qemu_mutex_unlock_ramlist();
1092 static int memory_try_enable_merging(void *addr, size_t len)
1094 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1095 /* disabled by the user */
1099 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1102 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1105 RAMBlock *block, *new_block;
1107 size = TARGET_PAGE_ALIGN(size);
1108 new_block = g_malloc0(sizeof(*new_block));
1111 /* This assumes the iothread lock is taken here too. */
1112 qemu_mutex_lock_ramlist();
1114 new_block->offset = find_ram_offset(size);
1116 new_block->host = host;
1117 new_block->flags |= RAM_PREALLOC_MASK;
1118 } else if (xen_enabled()) {
1120 fprintf(stderr, "-mem-path not supported with Xen\n");
1123 xen_ram_alloc(new_block->offset, size, mr);
1126 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1128 * file_ram_alloc() needs to allocate just like
1129 * phys_mem_alloc, but we haven't bothered to provide
1133 "-mem-path not supported with this accelerator\n");
1136 new_block->host = file_ram_alloc(new_block, size, mem_path);
1138 if (!new_block->host) {
1139 new_block->host = phys_mem_alloc(size);
1142 * In Hax, the qemu allocate the virtual address, and HAX kernel
1143 * populate the memory with physical memory. Currently we have no
1144 * paging, so user should make sure enough free memory in advance
1146 if (hax_enabled()) {
1148 ret = hax_populate_ram((uint64_t)new_block->host, size);
1150 fprintf(stderr, "Hax failed to populate ram\n");
1155 if (!new_block->host) {
1156 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1157 new_block->mr->name, strerror(errno));
1160 memory_try_enable_merging(new_block->host, size);
1163 new_block->length = size;
1165 /* Keep the list sorted from biggest to smallest block. */
1166 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1167 if (block->length < new_block->length) {
1172 QTAILQ_INSERT_BEFORE(block, new_block, next);
1174 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1176 ram_list.mru_block = NULL;
1179 qemu_mutex_unlock_ramlist();
1181 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1182 last_ram_offset() >> TARGET_PAGE_BITS);
1183 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1184 0, size >> TARGET_PAGE_BITS);
1185 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1187 qemu_ram_setup_dump(new_block->host, size);
1188 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1189 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1192 kvm_setup_guest_memory(new_block->host, size);
1194 return new_block->offset;
1197 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1199 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1202 void qemu_ram_free_from_ptr(ram_addr_t addr)
1206 /* This assumes the iothread lock is taken here too. */
1207 qemu_mutex_lock_ramlist();
1208 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1209 if (addr == block->offset) {
1210 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1211 ram_list.mru_block = NULL;
1217 qemu_mutex_unlock_ramlist();
1220 void qemu_ram_free(ram_addr_t addr)
1224 /* This assumes the iothread lock is taken here too. */
1225 qemu_mutex_lock_ramlist();
1226 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1227 if (addr == block->offset) {
1228 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1229 ram_list.mru_block = NULL;
1231 if (block->flags & RAM_PREALLOC_MASK) {
1233 } else if (xen_enabled()) {
1234 xen_invalidate_map_cache_entry(block->host);
1236 } else if (block->fd >= 0) {
1237 munmap(block->host, block->length);
1241 qemu_anon_ram_free(block->host, block->length);
1247 qemu_mutex_unlock_ramlist();
1252 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1259 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1260 offset = addr - block->offset;
1261 if (offset < block->length) {
1262 vaddr = block->host + offset;
1263 if (block->flags & RAM_PREALLOC_MASK) {
1265 } else if (xen_enabled()) {
1269 munmap(vaddr, length);
1270 if (block->fd >= 0) {
1272 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1275 flags |= MAP_PRIVATE;
1277 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1278 flags, block->fd, offset);
1281 * Remap needs to match alloc. Accelerators that
1282 * set phys_mem_alloc never remap. If they did,
1283 * we'd need a remap hook here.
1285 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1287 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1288 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1291 if (area != vaddr) {
1292 fprintf(stderr, "Could not remap addr: "
1293 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1297 memory_try_enable_merging(vaddr, length);
1298 qemu_ram_setup_dump(vaddr, length);
1304 #endif /* !_WIN32 */
1306 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1307 With the exception of the softmmu code in this file, this should
1308 only be used for local memory (e.g. video ram) that the device owns,
1309 and knows it isn't going to access beyond the end of the block.
1311 It should not be used for general purpose DMA.
1312 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1314 void *qemu_get_ram_ptr(ram_addr_t addr)
1316 RAMBlock *block = qemu_get_ram_block(addr);
1318 if (xen_enabled()) {
1319 /* We need to check if the requested address is in the RAM
1320 * because we don't want to map the entire memory in QEMU.
1321 * In that case just map until the end of the page.
1323 if (block->offset == 0) {
1324 return xen_map_cache(addr, 0, 0);
1325 } else if (block->host == NULL) {
1327 xen_map_cache(block->offset, block->length, 1);
1330 return block->host + (addr - block->offset);
1333 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1334 * but takes a size argument */
1335 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1340 if (xen_enabled()) {
1341 return xen_map_cache(addr, *size, 1);
1345 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1346 if (addr - block->offset < block->length) {
1347 if (addr - block->offset + *size > block->length)
1348 *size = block->length - addr + block->offset;
1349 return block->host + (addr - block->offset);
1353 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1358 /* Some of the softmmu routines need to translate from a host pointer
1359 (typically a TLB entry) back to a ram offset. */
1360 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1363 uint8_t *host = ptr;
1365 if (xen_enabled()) {
1366 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1367 return qemu_get_ram_block(*ram_addr)->mr;
1370 block = ram_list.mru_block;
1371 if (block && block->host && host - block->host < block->length) {
1375 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1376 /* This case append when the block is not mapped. */
1377 if (block->host == NULL) {
1380 if (host - block->host < block->length) {
1388 *ram_addr = block->offset + (host - block->host);
1392 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1393 uint64_t val, unsigned size)
1396 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1397 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1398 tb_invalidate_phys_page_fast(ram_addr, size);
1399 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1403 stb_p(qemu_get_ram_ptr(ram_addr), val);
1406 stw_p(qemu_get_ram_ptr(ram_addr), val);
1409 stl_p(qemu_get_ram_ptr(ram_addr), val);
1414 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1415 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1416 /* we remove the notdirty callback only if the code has been
1418 if (dirty_flags == 0xff) {
1419 CPUArchState *env = current_cpu->env_ptr;
1420 tlb_set_dirty(env, env->mem_io_vaddr);
1424 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1425 unsigned size, bool is_write)
1430 static const MemoryRegionOps notdirty_mem_ops = {
1431 .write = notdirty_mem_write,
1432 .valid.accepts = notdirty_mem_accepts,
1433 .endianness = DEVICE_NATIVE_ENDIAN,
1436 /* Generate a debug exception if a watchpoint has been hit. */
1437 static void check_watchpoint(int offset, int len_mask, int flags)
1439 CPUArchState *env = current_cpu->env_ptr;
1440 target_ulong pc, cs_base;
1445 if (env->watchpoint_hit) {
1446 /* We re-entered the check after replacing the TB. Now raise
1447 * the debug interrupt so that is will trigger after the
1448 * current instruction. */
1449 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1452 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1453 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1454 if ((vaddr == (wp->vaddr & len_mask) ||
1455 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1456 wp->flags |= BP_WATCHPOINT_HIT;
1457 if (!env->watchpoint_hit) {
1458 env->watchpoint_hit = wp;
1459 tb_check_watchpoint(env);
1460 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1461 env->exception_index = EXCP_DEBUG;
1464 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1465 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1466 cpu_resume_from_signal(env, NULL);
1470 wp->flags &= ~BP_WATCHPOINT_HIT;
1475 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1476 so these check for a hit then pass through to the normal out-of-line
1478 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1481 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1483 case 1: return ldub_phys(addr);
1484 case 2: return lduw_phys(addr);
1485 case 4: return ldl_phys(addr);
1490 static void watch_mem_write(void *opaque, hwaddr addr,
1491 uint64_t val, unsigned size)
1493 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1496 stb_phys(addr, val);
1499 stw_phys(addr, val);
1502 stl_phys(addr, val);
1508 static const MemoryRegionOps watch_mem_ops = {
1509 .read = watch_mem_read,
1510 .write = watch_mem_write,
1511 .endianness = DEVICE_NATIVE_ENDIAN,
1514 static uint64_t subpage_read(void *opaque, hwaddr addr,
1517 subpage_t *subpage = opaque;
1520 #if defined(DEBUG_SUBPAGE)
1521 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1522 subpage, len, addr);
1524 address_space_read(subpage->as, addr + subpage->base, buf, len);
1537 static void subpage_write(void *opaque, hwaddr addr,
1538 uint64_t value, unsigned len)
1540 subpage_t *subpage = opaque;
1543 #if defined(DEBUG_SUBPAGE)
1544 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1545 " value %"PRIx64"\n",
1546 __func__, subpage, len, addr, value);
1561 address_space_write(subpage->as, addr + subpage->base, buf, len);
1564 static bool subpage_accepts(void *opaque, hwaddr addr,
1565 unsigned len, bool is_write)
1567 subpage_t *subpage = opaque;
1568 #if defined(DEBUG_SUBPAGE)
1569 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1570 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1573 return address_space_access_valid(subpage->as, addr + subpage->base,
1577 static const MemoryRegionOps subpage_ops = {
1578 .read = subpage_read,
1579 .write = subpage_write,
1580 .valid.accepts = subpage_accepts,
1581 .endianness = DEVICE_NATIVE_ENDIAN,
1584 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1589 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1591 idx = SUBPAGE_IDX(start);
1592 eidx = SUBPAGE_IDX(end);
1593 #if defined(DEBUG_SUBPAGE)
1594 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1595 __func__, mmio, start, end, idx, eidx, section);
1597 for (; idx <= eidx; idx++) {
1598 mmio->sub_section[idx] = section;
1604 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1608 mmio = g_malloc0(sizeof(subpage_t));
1612 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1613 "subpage", TARGET_PAGE_SIZE);
1614 mmio->iomem.subpage = true;
1615 #if defined(DEBUG_SUBPAGE)
1616 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1617 mmio, base, TARGET_PAGE_SIZE);
1619 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1624 static uint16_t dummy_section(MemoryRegion *mr)
1626 MemoryRegionSection section = {
1628 .offset_within_address_space = 0,
1629 .offset_within_region = 0,
1630 .size = int128_2_64(),
1633 return phys_section_add(§ion);
1636 MemoryRegion *iotlb_to_region(hwaddr index)
1638 return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
1641 static void io_mem_init(void)
1643 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1644 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1645 "unassigned", UINT64_MAX);
1646 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
1647 "notdirty", UINT64_MAX);
1648 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1649 "watch", UINT64_MAX);
1652 static void mem_begin(MemoryListener *listener)
1654 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1655 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1657 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1659 as->next_dispatch = d;
1662 static void mem_commit(MemoryListener *listener)
1664 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1665 AddressSpaceDispatch *cur = as->dispatch;
1666 AddressSpaceDispatch *next = as->next_dispatch;
1668 next->nodes = next_map.nodes;
1669 next->sections = next_map.sections;
1671 as->dispatch = next;
1675 static void core_begin(MemoryListener *listener)
1679 prev_map = g_new(PhysPageMap, 1);
1680 *prev_map = next_map;
1682 memset(&next_map, 0, sizeof(next_map));
1683 n = dummy_section(&io_mem_unassigned);
1684 assert(n == PHYS_SECTION_UNASSIGNED);
1685 n = dummy_section(&io_mem_notdirty);
1686 assert(n == PHYS_SECTION_NOTDIRTY);
1687 n = dummy_section(&io_mem_rom);
1688 assert(n == PHYS_SECTION_ROM);
1689 n = dummy_section(&io_mem_watch);
1690 assert(n == PHYS_SECTION_WATCH);
1693 /* This listener's commit run after the other AddressSpaceDispatch listeners'.
1694 * All AddressSpaceDispatch instances have switched to the next map.
1696 static void core_commit(MemoryListener *listener)
1698 phys_sections_free(prev_map);
1701 static void tcg_commit(MemoryListener *listener)
1705 /* since each CPU stores ram addresses in its TLB cache, we must
1706 reset the modified entries */
1709 CPUArchState *env = cpu->env_ptr;
1715 static void core_log_global_start(MemoryListener *listener)
1717 cpu_physical_memory_set_dirty_tracking(1);
1720 static void core_log_global_stop(MemoryListener *listener)
1722 cpu_physical_memory_set_dirty_tracking(0);
1725 static MemoryListener core_memory_listener = {
1726 .begin = core_begin,
1727 .commit = core_commit,
1728 .log_global_start = core_log_global_start,
1729 .log_global_stop = core_log_global_stop,
1733 static MemoryListener tcg_memory_listener = {
1734 .commit = tcg_commit,
1737 void address_space_init_dispatch(AddressSpace *as)
1739 as->dispatch = NULL;
1740 as->dispatch_listener = (MemoryListener) {
1742 .commit = mem_commit,
1743 .region_add = mem_add,
1744 .region_nop = mem_add,
1747 memory_listener_register(&as->dispatch_listener, as);
1750 void address_space_destroy_dispatch(AddressSpace *as)
1752 AddressSpaceDispatch *d = as->dispatch;
1754 memory_listener_unregister(&as->dispatch_listener);
1756 as->dispatch = NULL;
1759 static void memory_map_init(void)
1761 system_memory = g_malloc(sizeof(*system_memory));
1762 memory_region_init(system_memory, NULL, "system", INT64_MAX);
1763 address_space_init(&address_space_memory, system_memory, "memory");
1765 system_io = g_malloc(sizeof(*system_io));
1766 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1768 address_space_init(&address_space_io, system_io, "I/O");
1770 memory_listener_register(&core_memory_listener, &address_space_memory);
1771 if (tcg_enabled()) {
1772 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1776 MemoryRegion *get_system_memory(void)
1778 return system_memory;
1781 MemoryRegion *get_system_io(void)
1786 #endif /* !defined(CONFIG_USER_ONLY) */
1788 /* physical memory access (slow version, mainly for debug) */
1789 #if defined(CONFIG_USER_ONLY)
1790 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1791 uint8_t *buf, int len, int is_write)
1798 page = addr & TARGET_PAGE_MASK;
1799 l = (page + TARGET_PAGE_SIZE) - addr;
1802 flags = page_get_flags(page);
1803 if (!(flags & PAGE_VALID))
1806 if (!(flags & PAGE_WRITE))
1808 /* XXX: this code should not depend on lock_user */
1809 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1812 unlock_user(p, addr, l);
1814 if (!(flags & PAGE_READ))
1816 /* XXX: this code should not depend on lock_user */
1817 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1820 unlock_user(p, addr, 0);
1831 static void invalidate_and_set_dirty(hwaddr addr,
1834 if (!cpu_physical_memory_is_dirty(addr)) {
1835 /* invalidate code */
1836 tb_invalidate_phys_page_range(addr, addr + length, 0);
1838 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1840 xen_modified_memory(addr, length);
1843 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1845 if (memory_region_is_ram(mr)) {
1846 return !(is_write && mr->readonly);
1848 if (memory_region_is_romd(mr)) {
1855 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1857 unsigned access_size_max = mr->ops->valid.max_access_size;
1859 /* Regions are assumed to support 1-4 byte accesses unless
1860 otherwise specified. */
1861 if (access_size_max == 0) {
1862 access_size_max = 4;
1865 /* Bound the maximum access by the alignment of the address. */
1866 if (!mr->ops->impl.unaligned) {
1867 unsigned align_size_max = addr & -addr;
1868 if (align_size_max != 0 && align_size_max < access_size_max) {
1869 access_size_max = align_size_max;
1873 /* Don't attempt accesses larger than the maximum. */
1874 if (l > access_size_max) {
1875 l = access_size_max;
1878 l = 1 << (qemu_fls(l) - 1);
1884 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1885 int len, bool is_write)
1896 mr = address_space_translate(as, addr, &addr1, &l, is_write);
1899 if (!memory_access_is_direct(mr, is_write)) {
1900 l = memory_access_size(mr, l, addr1);
1901 /* XXX: could force current_cpu to NULL to avoid
1905 /* 64 bit write access */
1907 error |= io_mem_write(mr, addr1, val, 8);
1910 /* 32 bit write access */
1912 error |= io_mem_write(mr, addr1, val, 4);
1915 /* 16 bit write access */
1917 error |= io_mem_write(mr, addr1, val, 2);
1920 /* 8 bit write access */
1922 error |= io_mem_write(mr, addr1, val, 1);
1928 addr1 += memory_region_get_ram_addr(mr);
1930 ptr = qemu_get_ram_ptr(addr1);
1931 memcpy(ptr, buf, l);
1932 invalidate_and_set_dirty(addr1, l);
1935 if (!memory_access_is_direct(mr, is_write)) {
1937 l = memory_access_size(mr, l, addr1);
1940 /* 64 bit read access */
1941 error |= io_mem_read(mr, addr1, &val, 8);
1945 /* 32 bit read access */
1946 error |= io_mem_read(mr, addr1, &val, 4);
1950 /* 16 bit read access */
1951 error |= io_mem_read(mr, addr1, &val, 2);
1955 /* 8 bit read access */
1956 error |= io_mem_read(mr, addr1, &val, 1);
1964 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
1965 memcpy(buf, ptr, l);
1976 bool address_space_write(AddressSpace *as, hwaddr addr,
1977 const uint8_t *buf, int len)
1979 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
1982 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1984 return address_space_rw(as, addr, buf, len, false);
1988 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1989 int len, int is_write)
1991 address_space_rw(&address_space_memory, addr, buf, len, is_write);
1994 /* used for ROM loading : can write in RAM and ROM */
1995 void cpu_physical_memory_write_rom(hwaddr addr,
1996 const uint8_t *buf, int len)
2005 mr = address_space_translate(&address_space_memory,
2006 addr, &addr1, &l, true);
2008 if (!(memory_region_is_ram(mr) ||
2009 memory_region_is_romd(mr))) {
2012 addr1 += memory_region_get_ram_addr(mr);
2014 ptr = qemu_get_ram_ptr(addr1);
2015 memcpy(ptr, buf, l);
2016 invalidate_and_set_dirty(addr1, l);
2031 static BounceBuffer bounce;
2033 typedef struct MapClient {
2035 void (*callback)(void *opaque);
2036 QLIST_ENTRY(MapClient) link;
2039 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2040 = QLIST_HEAD_INITIALIZER(map_client_list);
2042 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2044 MapClient *client = g_malloc(sizeof(*client));
2046 client->opaque = opaque;
2047 client->callback = callback;
2048 QLIST_INSERT_HEAD(&map_client_list, client, link);
2052 static void cpu_unregister_map_client(void *_client)
2054 MapClient *client = (MapClient *)_client;
2056 QLIST_REMOVE(client, link);
2060 static void cpu_notify_map_clients(void)
2064 while (!QLIST_EMPTY(&map_client_list)) {
2065 client = QLIST_FIRST(&map_client_list);
2066 client->callback(client->opaque);
2067 cpu_unregister_map_client(client);
2071 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2078 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2079 if (!memory_access_is_direct(mr, is_write)) {
2080 l = memory_access_size(mr, l, addr);
2081 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2092 /* Map a physical memory region into a host virtual address.
2093 * May map a subset of the requested range, given by and returned in *plen.
2094 * May return NULL if resources needed to perform the mapping are exhausted.
2095 * Use only for reads OR writes - not for read-modify-write operations.
2096 * Use cpu_register_map_client() to know when retrying the map operation is
2097 * likely to succeed.
2099 void *address_space_map(AddressSpace *as,
2106 hwaddr l, xlat, base;
2107 MemoryRegion *mr, *this_mr;
2115 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2116 if (!memory_access_is_direct(mr, is_write)) {
2117 if (bounce.buffer) {
2120 /* Avoid unbounded allocations */
2121 l = MIN(l, TARGET_PAGE_SIZE);
2122 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2126 memory_region_ref(mr);
2129 address_space_read(as, addr, bounce.buffer, l);
2133 return bounce.buffer;
2137 raddr = memory_region_get_ram_addr(mr);
2148 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2149 if (this_mr != mr || xlat != base + done) {
2154 memory_region_ref(mr);
2156 return qemu_ram_ptr_length(raddr + base, plen);
2159 /* Unmaps a memory region previously mapped by address_space_map().
2160 * Will also mark the memory as dirty if is_write == 1. access_len gives
2161 * the amount of memory that was actually read or written by the caller.
2163 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2164 int is_write, hwaddr access_len)
2166 if (buffer != bounce.buffer) {
2170 mr = qemu_ram_addr_from_host(buffer, &addr1);
2173 while (access_len) {
2175 l = TARGET_PAGE_SIZE;
2178 invalidate_and_set_dirty(addr1, l);
2183 if (xen_enabled()) {
2184 xen_invalidate_map_cache_entry(buffer);
2186 memory_region_unref(mr);
2190 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2192 qemu_vfree(bounce.buffer);
2193 bounce.buffer = NULL;
2194 memory_region_unref(bounce.mr);
2195 cpu_notify_map_clients();
2198 void *cpu_physical_memory_map(hwaddr addr,
2202 return address_space_map(&address_space_memory, addr, plen, is_write);
2205 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2206 int is_write, hwaddr access_len)
2208 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2211 /* warning: addr must be aligned */
2212 static inline uint32_t ldl_phys_internal(hwaddr addr,
2213 enum device_endian endian)
2221 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2223 if (l < 4 || !memory_access_is_direct(mr, false)) {
2225 io_mem_read(mr, addr1, &val, 4);
2226 #if defined(TARGET_WORDS_BIGENDIAN)
2227 if (endian == DEVICE_LITTLE_ENDIAN) {
2231 if (endian == DEVICE_BIG_ENDIAN) {
2237 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2241 case DEVICE_LITTLE_ENDIAN:
2242 val = ldl_le_p(ptr);
2244 case DEVICE_BIG_ENDIAN:
2245 val = ldl_be_p(ptr);
2255 uint32_t ldl_phys(hwaddr addr)
2257 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2260 uint32_t ldl_le_phys(hwaddr addr)
2262 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2265 uint32_t ldl_be_phys(hwaddr addr)
2267 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2270 /* warning: addr must be aligned */
2271 static inline uint64_t ldq_phys_internal(hwaddr addr,
2272 enum device_endian endian)
2280 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2282 if (l < 8 || !memory_access_is_direct(mr, false)) {
2284 io_mem_read(mr, addr1, &val, 8);
2285 #if defined(TARGET_WORDS_BIGENDIAN)
2286 if (endian == DEVICE_LITTLE_ENDIAN) {
2290 if (endian == DEVICE_BIG_ENDIAN) {
2296 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2300 case DEVICE_LITTLE_ENDIAN:
2301 val = ldq_le_p(ptr);
2303 case DEVICE_BIG_ENDIAN:
2304 val = ldq_be_p(ptr);
2314 uint64_t ldq_phys(hwaddr addr)
2316 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2319 uint64_t ldq_le_phys(hwaddr addr)
2321 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2324 uint64_t ldq_be_phys(hwaddr addr)
2326 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2330 uint32_t ldub_phys(hwaddr addr)
2333 cpu_physical_memory_read(addr, &val, 1);
2337 /* warning: addr must be aligned */
2338 static inline uint32_t lduw_phys_internal(hwaddr addr,
2339 enum device_endian endian)
2347 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2349 if (l < 2 || !memory_access_is_direct(mr, false)) {
2351 io_mem_read(mr, addr1, &val, 2);
2352 #if defined(TARGET_WORDS_BIGENDIAN)
2353 if (endian == DEVICE_LITTLE_ENDIAN) {
2357 if (endian == DEVICE_BIG_ENDIAN) {
2363 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2367 case DEVICE_LITTLE_ENDIAN:
2368 val = lduw_le_p(ptr);
2370 case DEVICE_BIG_ENDIAN:
2371 val = lduw_be_p(ptr);
2381 uint32_t lduw_phys(hwaddr addr)
2383 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2386 uint32_t lduw_le_phys(hwaddr addr)
2388 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2391 uint32_t lduw_be_phys(hwaddr addr)
2393 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2396 /* warning: addr must be aligned. The ram page is not masked as dirty
2397 and the code inside is not invalidated. It is useful if the dirty
2398 bits are used to track modified PTEs */
2399 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2406 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2408 if (l < 4 || !memory_access_is_direct(mr, true)) {
2409 io_mem_write(mr, addr1, val, 4);
2411 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2412 ptr = qemu_get_ram_ptr(addr1);
2415 if (unlikely(in_migration)) {
2416 if (!cpu_physical_memory_is_dirty(addr1)) {
2417 /* invalidate code */
2418 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2420 cpu_physical_memory_set_dirty_flags(
2421 addr1, (0xff & ~CODE_DIRTY_FLAG));
2427 /* warning: addr must be aligned */
2428 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2429 enum device_endian endian)
2436 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2438 if (l < 4 || !memory_access_is_direct(mr, true)) {
2439 #if defined(TARGET_WORDS_BIGENDIAN)
2440 if (endian == DEVICE_LITTLE_ENDIAN) {
2444 if (endian == DEVICE_BIG_ENDIAN) {
2448 io_mem_write(mr, addr1, val, 4);
2451 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2452 ptr = qemu_get_ram_ptr(addr1);
2454 case DEVICE_LITTLE_ENDIAN:
2457 case DEVICE_BIG_ENDIAN:
2464 invalidate_and_set_dirty(addr1, 4);
2468 void stl_phys(hwaddr addr, uint32_t val)
2470 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2473 void stl_le_phys(hwaddr addr, uint32_t val)
2475 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2478 void stl_be_phys(hwaddr addr, uint32_t val)
2480 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2484 void stb_phys(hwaddr addr, uint32_t val)
2487 cpu_physical_memory_write(addr, &v, 1);
2490 /* warning: addr must be aligned */
2491 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2492 enum device_endian endian)
2499 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2501 if (l < 2 || !memory_access_is_direct(mr, true)) {
2502 #if defined(TARGET_WORDS_BIGENDIAN)
2503 if (endian == DEVICE_LITTLE_ENDIAN) {
2507 if (endian == DEVICE_BIG_ENDIAN) {
2511 io_mem_write(mr, addr1, val, 2);
2514 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2515 ptr = qemu_get_ram_ptr(addr1);
2517 case DEVICE_LITTLE_ENDIAN:
2520 case DEVICE_BIG_ENDIAN:
2527 invalidate_and_set_dirty(addr1, 2);
2531 void stw_phys(hwaddr addr, uint32_t val)
2533 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2536 void stw_le_phys(hwaddr addr, uint32_t val)
2538 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2541 void stw_be_phys(hwaddr addr, uint32_t val)
2543 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2547 void stq_phys(hwaddr addr, uint64_t val)
2550 cpu_physical_memory_write(addr, &val, 8);
2553 void stq_le_phys(hwaddr addr, uint64_t val)
2555 val = cpu_to_le64(val);
2556 cpu_physical_memory_write(addr, &val, 8);
2559 void stq_be_phys(hwaddr addr, uint64_t val)
2561 val = cpu_to_be64(val);
2562 cpu_physical_memory_write(addr, &val, 8);
2565 /* virtual memory access for debug (includes writing to ROM) */
2566 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2567 uint8_t *buf, int len, int is_write)
2574 page = addr & TARGET_PAGE_MASK;
2575 phys_addr = cpu_get_phys_page_debug(cpu, page);
2576 /* if no physical page mapped, return an error */
2577 if (phys_addr == -1)
2579 l = (page + TARGET_PAGE_SIZE) - addr;
2582 phys_addr += (addr & ~TARGET_PAGE_MASK);
2584 cpu_physical_memory_write_rom(phys_addr, buf, l);
2586 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2595 #if !defined(CONFIG_USER_ONLY)
2598 * A helper function for the _utterly broken_ virtio device model to find out if
2599 * it's running on a big endian machine. Don't do this at home kids!
2601 bool virtio_is_big_endian(void);
2602 bool virtio_is_big_endian(void)
2604 #if defined(TARGET_WORDS_BIGENDIAN)
2613 #ifndef CONFIG_USER_ONLY
2614 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2619 mr = address_space_translate(&address_space_memory,
2620 phys_addr, &phys_addr, &l, false);
2622 return !(memory_region_is_ram(mr) ||
2623 memory_region_is_romd(mr));
2626 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2630 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2631 func(block->host, block->offset, block->length, opaque);