4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include <sys/types.h>
25 #include "qemu-common.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
53 #include "sysemu/replay.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
58 #include "qemu/range.h"
60 #include "qemu/mmap-alloc.h"
63 //#define DEBUG_SUBPAGE
65 #if !defined(CONFIG_USER_ONLY)
66 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
67 * are protected by the ramlist lock.
69 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
71 static MemoryRegion *system_memory;
72 static MemoryRegion *system_io;
74 AddressSpace address_space_io;
75 AddressSpace address_space_memory;
77 MemoryRegion io_mem_rom, io_mem_notdirty;
78 static MemoryRegion io_mem_unassigned;
80 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
81 #define RAM_PREALLOC (1 << 0)
83 /* RAM is mmap-ed with MAP_SHARED */
84 #define RAM_SHARED (1 << 1)
86 /* Only a portion of RAM (used_length) is actually used, and migrated.
87 * This used_length size can change across reboots.
89 #define RAM_RESIZEABLE (1 << 2)
91 /* RAM is backed by an mmapped file.
93 #define RAM_FILE (1 << 3)
96 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
97 /* current CPU in the current thread. It is only valid inside
99 __thread CPUState *current_cpu;
100 /* 0 = Do not count executed instructions.
101 1 = Precise instruction counting.
102 2 = Adaptive rate instruction counting. */
105 #if !defined(CONFIG_USER_ONLY)
107 typedef struct PhysPageEntry PhysPageEntry;
109 struct PhysPageEntry {
110 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
112 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
116 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
118 /* Size of the L2 (and L3, etc) page tables. */
119 #define ADDR_SPACE_BITS 64
122 #define P_L2_SIZE (1 << P_L2_BITS)
124 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
126 typedef PhysPageEntry Node[P_L2_SIZE];
128 typedef struct PhysPageMap {
131 unsigned sections_nb;
132 unsigned sections_nb_alloc;
134 unsigned nodes_nb_alloc;
136 MemoryRegionSection *sections;
139 struct AddressSpaceDispatch {
142 /* This is a multi-level map on the physical address space.
143 * The bottom level has pointers to MemoryRegionSections.
145 PhysPageEntry phys_map;
150 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
151 typedef struct subpage_t {
155 uint16_t sub_section[TARGET_PAGE_SIZE];
158 #define PHYS_SECTION_UNASSIGNED 0
159 #define PHYS_SECTION_NOTDIRTY 1
160 #define PHYS_SECTION_ROM 2
161 #define PHYS_SECTION_WATCH 3
163 static void io_mem_init(void);
164 static void memory_map_init(void);
165 static void tcg_commit(MemoryListener *listener);
167 static MemoryRegion io_mem_watch;
170 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
171 * @cpu: the CPU whose AddressSpace this is
172 * @as: the AddressSpace itself
173 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
174 * @tcg_as_listener: listener for tracking changes to the AddressSpace
176 struct CPUAddressSpace {
179 struct AddressSpaceDispatch *memory_dispatch;
180 MemoryListener tcg_as_listener;
185 #if !defined(CONFIG_USER_ONLY)
187 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
189 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
190 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
191 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
192 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
196 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
203 ret = map->nodes_nb++;
205 assert(ret != PHYS_MAP_NODE_NIL);
206 assert(ret != map->nodes_nb_alloc);
208 e.skip = leaf ? 0 : 1;
209 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
210 for (i = 0; i < P_L2_SIZE; ++i) {
211 memcpy(&p[i], &e, sizeof(e));
216 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
217 hwaddr *index, hwaddr *nb, uint16_t leaf,
221 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
223 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
224 lp->ptr = phys_map_node_alloc(map, level == 0);
226 p = map->nodes[lp->ptr];
227 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
229 while (*nb && lp < &p[P_L2_SIZE]) {
230 if ((*index & (step - 1)) == 0 && *nb >= step) {
236 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
242 static void phys_page_set(AddressSpaceDispatch *d,
243 hwaddr index, hwaddr nb,
246 /* Wildly overreserve - it doesn't matter much. */
247 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
249 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
252 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
253 * and update our entry so we can skip it and go directly to the destination.
255 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
257 unsigned valid_ptr = P_L2_SIZE;
262 if (lp->ptr == PHYS_MAP_NODE_NIL) {
267 for (i = 0; i < P_L2_SIZE; i++) {
268 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
275 phys_page_compact(&p[i], nodes, compacted);
279 /* We can only compress if there's only one child. */
284 assert(valid_ptr < P_L2_SIZE);
286 /* Don't compress if it won't fit in the # of bits we have. */
287 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
291 lp->ptr = p[valid_ptr].ptr;
292 if (!p[valid_ptr].skip) {
293 /* If our only child is a leaf, make this a leaf. */
294 /* By design, we should have made this node a leaf to begin with so we
295 * should never reach here.
296 * But since it's so simple to handle this, let's do it just in case we
301 lp->skip += p[valid_ptr].skip;
305 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
307 DECLARE_BITMAP(compacted, nodes_nb);
309 if (d->phys_map.skip) {
310 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
314 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
315 Node *nodes, MemoryRegionSection *sections)
318 hwaddr index = addr >> TARGET_PAGE_BITS;
321 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
322 if (lp.ptr == PHYS_MAP_NODE_NIL) {
323 return §ions[PHYS_SECTION_UNASSIGNED];
326 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
329 if (sections[lp.ptr].size.hi ||
330 range_covers_byte(sections[lp.ptr].offset_within_address_space,
331 sections[lp.ptr].size.lo, addr)) {
332 return §ions[lp.ptr];
334 return §ions[PHYS_SECTION_UNASSIGNED];
338 bool memory_region_is_unassigned(MemoryRegion *mr)
340 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
341 && mr != &io_mem_watch;
344 /* Called from RCU critical section */
345 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
347 bool resolve_subpage)
349 MemoryRegionSection *section;
352 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
353 if (resolve_subpage && section->mr->subpage) {
354 subpage = container_of(section->mr, subpage_t, iomem);
355 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
360 /* Called from RCU critical section */
361 static MemoryRegionSection *
362 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
363 hwaddr *plen, bool resolve_subpage)
365 MemoryRegionSection *section;
369 section = address_space_lookup_region(d, addr, resolve_subpage);
370 /* Compute offset within MemoryRegionSection */
371 addr -= section->offset_within_address_space;
373 /* Compute offset within MemoryRegion */
374 *xlat = addr + section->offset_within_region;
378 /* MMIO registers can be expected to perform full-width accesses based only
379 * on their address, without considering adjacent registers that could
380 * decode to completely different MemoryRegions. When such registers
381 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
382 * regions overlap wildly. For this reason we cannot clamp the accesses
385 * If the length is small (as is the case for address_space_ldl/stl),
386 * everything works fine. If the incoming length is large, however,
387 * the caller really has to do the clamping through memory_access_size.
389 if (memory_region_is_ram(mr)) {
390 diff = int128_sub(section->size, int128_make64(addr));
391 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
396 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
398 if (memory_region_is_ram(mr)) {
399 return !(is_write && mr->readonly);
401 if (memory_region_is_romd(mr)) {
408 /* Called from RCU critical section */
409 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
410 hwaddr *xlat, hwaddr *plen,
414 MemoryRegionSection *section;
418 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
419 section = address_space_translate_internal(d, addr, &addr, plen, true);
422 if (!mr->iommu_ops) {
426 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
427 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
428 | (addr & iotlb.addr_mask));
429 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
430 if (!(iotlb.perm & (1 << is_write))) {
431 mr = &io_mem_unassigned;
435 as = iotlb.target_as;
438 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
439 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
440 *plen = MIN(page, *plen);
447 /* Called from RCU critical section */
448 MemoryRegionSection *
449 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
450 hwaddr *xlat, hwaddr *plen)
452 MemoryRegionSection *section;
453 section = address_space_translate_internal(cpu->cpu_ases[0].memory_dispatch,
454 addr, xlat, plen, false);
456 assert(!section->mr->iommu_ops);
461 #if !defined(CONFIG_USER_ONLY)
463 static int cpu_common_post_load(void *opaque, int version_id)
465 CPUState *cpu = opaque;
467 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
468 version_id is increased. */
469 cpu->interrupt_request &= ~0x01;
475 static int cpu_common_pre_load(void *opaque)
477 CPUState *cpu = opaque;
479 cpu->exception_index = -1;
484 static bool cpu_common_exception_index_needed(void *opaque)
486 CPUState *cpu = opaque;
488 return tcg_enabled() && cpu->exception_index != -1;
491 static const VMStateDescription vmstate_cpu_common_exception_index = {
492 .name = "cpu_common/exception_index",
494 .minimum_version_id = 1,
495 .needed = cpu_common_exception_index_needed,
496 .fields = (VMStateField[]) {
497 VMSTATE_INT32(exception_index, CPUState),
498 VMSTATE_END_OF_LIST()
502 static bool cpu_common_crash_occurred_needed(void *opaque)
504 CPUState *cpu = opaque;
506 return cpu->crash_occurred;
509 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
510 .name = "cpu_common/crash_occurred",
512 .minimum_version_id = 1,
513 .needed = cpu_common_crash_occurred_needed,
514 .fields = (VMStateField[]) {
515 VMSTATE_BOOL(crash_occurred, CPUState),
516 VMSTATE_END_OF_LIST()
520 const VMStateDescription vmstate_cpu_common = {
521 .name = "cpu_common",
523 .minimum_version_id = 1,
524 .pre_load = cpu_common_pre_load,
525 .post_load = cpu_common_post_load,
526 .fields = (VMStateField[]) {
527 VMSTATE_UINT32(halted, CPUState),
528 VMSTATE_UINT32(interrupt_request, CPUState),
529 VMSTATE_END_OF_LIST()
531 .subsections = (const VMStateDescription*[]) {
532 &vmstate_cpu_common_exception_index,
533 &vmstate_cpu_common_crash_occurred,
540 CPUState *qemu_get_cpu(int index)
545 if (cpu->cpu_index == index) {
553 #if !defined(CONFIG_USER_ONLY)
554 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
556 /* We only support one address space per cpu at the moment. */
557 assert(cpu->as == as);
560 /* We've already registered the listener for our only AS */
564 cpu->cpu_ases = g_new0(CPUAddressSpace, 1);
565 cpu->cpu_ases[0].cpu = cpu;
566 cpu->cpu_ases[0].as = as;
567 cpu->cpu_ases[0].tcg_as_listener.commit = tcg_commit;
568 memory_listener_register(&cpu->cpu_ases[0].tcg_as_listener, as);
572 #ifndef CONFIG_USER_ONLY
573 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
575 static int cpu_get_free_index(Error **errp)
577 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
579 if (cpu >= MAX_CPUMASK_BITS) {
580 error_setg(errp, "Trying to use more CPUs than max of %d",
585 bitmap_set(cpu_index_map, cpu, 1);
589 void cpu_exec_exit(CPUState *cpu)
591 if (cpu->cpu_index == -1) {
592 /* cpu_index was never allocated by this @cpu or was already freed. */
596 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
601 static int cpu_get_free_index(Error **errp)
606 CPU_FOREACH(some_cpu) {
612 void cpu_exec_exit(CPUState *cpu)
617 void cpu_exec_init(CPUState *cpu, Error **errp)
619 CPUClass *cc = CPU_GET_CLASS(cpu);
621 Error *local_err = NULL;
623 #ifndef CONFIG_USER_ONLY
624 cpu->as = &address_space_memory;
625 cpu->thread_id = qemu_get_thread_id();
628 #if defined(CONFIG_USER_ONLY)
631 cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
633 error_propagate(errp, local_err);
634 #if defined(CONFIG_USER_ONLY)
639 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
640 #if defined(CONFIG_USER_ONLY)
643 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
644 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
646 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
647 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
648 cpu_save, cpu_load, cpu->env_ptr);
649 assert(cc->vmsd == NULL);
650 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
652 if (cc->vmsd != NULL) {
653 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
657 #if defined(CONFIG_USER_ONLY)
658 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
660 tb_invalidate_phys_page_range(pc, pc + 1, 0);
663 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
665 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
667 tb_invalidate_phys_addr(cpu->as,
668 phys | (pc & ~TARGET_PAGE_MASK));
673 #if defined(CONFIG_USER_ONLY)
674 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
679 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
685 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
689 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
690 int flags, CPUWatchpoint **watchpoint)
695 /* Add a watchpoint. */
696 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
697 int flags, CPUWatchpoint **watchpoint)
701 /* forbid ranges which are empty or run off the end of the address space */
702 if (len == 0 || (addr + len - 1) < addr) {
703 error_report("tried to set invalid watchpoint at %"
704 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
707 wp = g_malloc(sizeof(*wp));
713 /* keep all GDB-injected watchpoints in front */
714 if (flags & BP_GDB) {
715 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
717 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
720 tlb_flush_page(cpu, addr);
727 /* Remove a specific watchpoint. */
728 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
733 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
734 if (addr == wp->vaddr && len == wp->len
735 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
736 cpu_watchpoint_remove_by_ref(cpu, wp);
743 /* Remove a specific watchpoint by reference. */
744 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
746 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
748 tlb_flush_page(cpu, watchpoint->vaddr);
753 /* Remove all matching watchpoints. */
754 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
756 CPUWatchpoint *wp, *next;
758 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
759 if (wp->flags & mask) {
760 cpu_watchpoint_remove_by_ref(cpu, wp);
765 /* Return true if this watchpoint address matches the specified
766 * access (ie the address range covered by the watchpoint overlaps
767 * partially or completely with the address range covered by the
770 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
774 /* We know the lengths are non-zero, but a little caution is
775 * required to avoid errors in the case where the range ends
776 * exactly at the top of the address space and so addr + len
777 * wraps round to zero.
779 vaddr wpend = wp->vaddr + wp->len - 1;
780 vaddr addrend = addr + len - 1;
782 return !(addr > wpend || wp->vaddr > addrend);
787 /* Add a breakpoint. */
788 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
789 CPUBreakpoint **breakpoint)
793 bp = g_malloc(sizeof(*bp));
798 /* keep all GDB-injected breakpoints in front */
799 if (flags & BP_GDB) {
800 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
802 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
805 breakpoint_invalidate(cpu, pc);
813 /* Remove a specific breakpoint. */
814 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
818 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
819 if (bp->pc == pc && bp->flags == flags) {
820 cpu_breakpoint_remove_by_ref(cpu, bp);
827 /* Remove a specific breakpoint by reference. */
828 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
830 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
832 breakpoint_invalidate(cpu, breakpoint->pc);
837 /* Remove all matching breakpoints. */
838 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
840 CPUBreakpoint *bp, *next;
842 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
843 if (bp->flags & mask) {
844 cpu_breakpoint_remove_by_ref(cpu, bp);
849 /* enable or disable single step mode. EXCP_DEBUG is returned by the
850 CPU loop after each instruction */
851 void cpu_single_step(CPUState *cpu, int enabled)
853 if (cpu->singlestep_enabled != enabled) {
854 cpu->singlestep_enabled = enabled;
856 kvm_update_guest_debug(cpu, 0);
858 /* must flush all the translated code to avoid inconsistencies */
859 /* XXX: only flush what is necessary */
865 void cpu_abort(CPUState *cpu, const char *fmt, ...)
872 fprintf(stderr, "qemu: fatal: ");
873 vfprintf(stderr, fmt, ap);
874 fprintf(stderr, "\n");
875 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
876 if (qemu_log_enabled()) {
877 qemu_log("qemu: fatal: ");
878 qemu_log_vprintf(fmt, ap2);
880 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
887 #if defined(CONFIG_USER_ONLY)
889 struct sigaction act;
890 sigfillset(&act.sa_mask);
891 act.sa_handler = SIG_DFL;
892 sigaction(SIGABRT, &act, NULL);
898 #if !defined(CONFIG_USER_ONLY)
899 /* Called from RCU critical section */
900 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
904 block = atomic_rcu_read(&ram_list.mru_block);
905 if (block && addr - block->offset < block->max_length) {
908 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
909 if (addr - block->offset < block->max_length) {
914 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
918 /* It is safe to write mru_block outside the iothread lock. This
923 * xxx removed from list
927 * call_rcu(reclaim_ramblock, xxx);
930 * atomic_rcu_set is not needed here. The block was already published
931 * when it was placed into the list. Here we're just making an extra
932 * copy of the pointer.
934 ram_list.mru_block = block;
938 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
945 end = TARGET_PAGE_ALIGN(start + length);
946 start &= TARGET_PAGE_MASK;
949 block = qemu_get_ram_block(start);
950 assert(block == qemu_get_ram_block(end - 1));
951 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
953 tlb_reset_dirty(cpu, start1, length);
958 /* Note: start and end must be within the same ram block. */
959 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
963 unsigned long end, page;
970 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
971 page = start >> TARGET_PAGE_BITS;
972 dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
975 if (dirty && tcg_enabled()) {
976 tlb_reset_dirty_range_all(start, length);
982 /* Called from RCU critical section */
983 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
984 MemoryRegionSection *section,
986 hwaddr paddr, hwaddr xlat,
988 target_ulong *address)
993 if (memory_region_is_ram(section->mr)) {
995 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
997 if (!section->readonly) {
998 iotlb |= PHYS_SECTION_NOTDIRTY;
1000 iotlb |= PHYS_SECTION_ROM;
1003 AddressSpaceDispatch *d;
1005 d = atomic_rcu_read(§ion->address_space->dispatch);
1006 iotlb = section - d->map.sections;
1010 /* Make accesses to pages with watchpoints go via the
1011 watchpoint trap routines. */
1012 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1013 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1014 /* Avoid trapping reads of pages with a write breakpoint. */
1015 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1016 iotlb = PHYS_SECTION_WATCH + paddr;
1017 *address |= TLB_MMIO;
1025 #endif /* defined(CONFIG_USER_ONLY) */
1027 #if !defined(CONFIG_USER_ONLY)
1029 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1031 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1033 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1034 qemu_anon_ram_alloc;
1037 * Set a custom physical guest memory alloator.
1038 * Accelerators with unusual needs may need this. Hopefully, we can
1039 * get rid of it eventually.
1041 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1043 phys_mem_alloc = alloc;
1046 static uint16_t phys_section_add(PhysPageMap *map,
1047 MemoryRegionSection *section)
1049 /* The physical section number is ORed with a page-aligned
1050 * pointer to produce the iotlb entries. Thus it should
1051 * never overflow into the page-aligned value.
1053 assert(map->sections_nb < TARGET_PAGE_SIZE);
1055 if (map->sections_nb == map->sections_nb_alloc) {
1056 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1057 map->sections = g_renew(MemoryRegionSection, map->sections,
1058 map->sections_nb_alloc);
1060 map->sections[map->sections_nb] = *section;
1061 memory_region_ref(section->mr);
1062 return map->sections_nb++;
1065 static void phys_section_destroy(MemoryRegion *mr)
1067 memory_region_unref(mr);
1070 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1071 object_unref(OBJECT(&subpage->iomem));
1076 static void phys_sections_free(PhysPageMap *map)
1078 while (map->sections_nb > 0) {
1079 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1080 phys_section_destroy(section->mr);
1082 g_free(map->sections);
1086 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1089 hwaddr base = section->offset_within_address_space
1091 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1092 d->map.nodes, d->map.sections);
1093 MemoryRegionSection subsection = {
1094 .offset_within_address_space = base,
1095 .size = int128_make64(TARGET_PAGE_SIZE),
1099 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1101 if (!(existing->mr->subpage)) {
1102 subpage = subpage_init(d->as, base);
1103 subsection.address_space = d->as;
1104 subsection.mr = &subpage->iomem;
1105 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1106 phys_section_add(&d->map, &subsection));
1108 subpage = container_of(existing->mr, subpage_t, iomem);
1110 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1111 end = start + int128_get64(section->size) - 1;
1112 subpage_register(subpage, start, end,
1113 phys_section_add(&d->map, section));
1117 static void register_multipage(AddressSpaceDispatch *d,
1118 MemoryRegionSection *section)
1120 hwaddr start_addr = section->offset_within_address_space;
1121 uint16_t section_index = phys_section_add(&d->map, section);
1122 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1126 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1129 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1131 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1132 AddressSpaceDispatch *d = as->next_dispatch;
1133 MemoryRegionSection now = *section, remain = *section;
1134 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1136 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1137 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1138 - now.offset_within_address_space;
1140 now.size = int128_min(int128_make64(left), now.size);
1141 register_subpage(d, &now);
1143 now.size = int128_zero();
1145 while (int128_ne(remain.size, now.size)) {
1146 remain.size = int128_sub(remain.size, now.size);
1147 remain.offset_within_address_space += int128_get64(now.size);
1148 remain.offset_within_region += int128_get64(now.size);
1150 if (int128_lt(remain.size, page_size)) {
1151 register_subpage(d, &now);
1152 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1153 now.size = page_size;
1154 register_subpage(d, &now);
1156 now.size = int128_and(now.size, int128_neg(page_size));
1157 register_multipage(d, &now);
1162 void qemu_flush_coalesced_mmio_buffer(void)
1165 kvm_flush_coalesced_mmio_buffer();
1168 void qemu_mutex_lock_ramlist(void)
1170 qemu_mutex_lock(&ram_list.mutex);
1173 void qemu_mutex_unlock_ramlist(void)
1175 qemu_mutex_unlock(&ram_list.mutex);
1180 #include <sys/vfs.h>
1182 #define HUGETLBFS_MAGIC 0x958458f6
1184 static long gethugepagesize(const char *path, Error **errp)
1190 ret = statfs(path, &fs);
1191 } while (ret != 0 && errno == EINTR);
1194 error_setg_errno(errp, errno, "failed to get page size of file %s",
1199 if (fs.f_type != HUGETLBFS_MAGIC)
1200 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1205 static void *file_ram_alloc(RAMBlock *block,
1212 char *sanitized_name;
1217 Error *local_err = NULL;
1219 hpagesize = gethugepagesize(path, &local_err);
1221 error_propagate(errp, local_err);
1224 block->mr->align = hpagesize;
1226 if (memory < hpagesize) {
1227 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1228 "or larger than huge page size 0x%" PRIx64,
1233 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1235 "host lacks kvm mmu notifiers, -mem-path unsupported");
1239 if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
1240 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1241 sanitized_name = g_strdup(memory_region_name(block->mr));
1242 for (c = sanitized_name; *c != '\0'; c++) {
1248 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1250 g_free(sanitized_name);
1252 fd = mkstemp(filename);
1258 fd = open(path, O_RDWR | O_CREAT, 0644);
1262 error_setg_errno(errp, errno,
1263 "unable to create backing store for hugepages");
1267 memory = ROUND_UP(memory, hpagesize);
1270 * ftruncate is not supported by hugetlbfs in older
1271 * hosts, so don't bother bailing out on errors.
1272 * If anything goes wrong with it under other filesystems,
1275 if (ftruncate(fd, memory)) {
1276 perror("ftruncate");
1279 area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1280 if (area == MAP_FAILED) {
1281 error_setg_errno(errp, errno,
1282 "unable to map backing store for hugepages");
1288 os_mem_prealloc(fd, area, memory);
1299 /* Called with the ramlist lock held. */
1300 static ram_addr_t find_ram_offset(ram_addr_t size)
1302 RAMBlock *block, *next_block;
1303 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1305 assert(size != 0); /* it would hand out same offset multiple times */
1307 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1311 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1312 ram_addr_t end, next = RAM_ADDR_MAX;
1314 end = block->offset + block->max_length;
1316 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1317 if (next_block->offset >= end) {
1318 next = MIN(next, next_block->offset);
1321 if (next - end >= size && next - end < mingap) {
1323 mingap = next - end;
1327 if (offset == RAM_ADDR_MAX) {
1328 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1336 ram_addr_t last_ram_offset(void)
1339 ram_addr_t last = 0;
1342 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1343 last = MAX(last, block->offset + block->max_length);
1349 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1353 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1354 if (!machine_dump_guest_core(current_machine)) {
1355 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1357 perror("qemu_madvise");
1358 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1359 "but dump_guest_core=off specified\n");
1364 /* Called within an RCU critical section, or while the ramlist lock
1367 static RAMBlock *find_ram_block(ram_addr_t addr)
1371 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1372 if (block->offset == addr) {
1380 const char *qemu_ram_get_idstr(RAMBlock *rb)
1385 /* Called with iothread lock held. */
1386 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1388 RAMBlock *new_block, *block;
1391 new_block = find_ram_block(addr);
1393 assert(!new_block->idstr[0]);
1396 char *id = qdev_get_dev_path(dev);
1398 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1402 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1404 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1405 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1406 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1414 /* Called with iothread lock held. */
1415 void qemu_ram_unset_idstr(ram_addr_t addr)
1419 /* FIXME: arch_init.c assumes that this is not called throughout
1420 * migration. Ignore the problem since hot-unplug during migration
1421 * does not work anyway.
1425 block = find_ram_block(addr);
1427 memset(block->idstr, 0, sizeof(block->idstr));
1432 static int memory_try_enable_merging(void *addr, size_t len)
1434 if (!machine_mem_merge(current_machine)) {
1435 /* disabled by the user */
1439 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1442 /* Only legal before guest might have detected the memory size: e.g. on
1443 * incoming migration, or right after reset.
1445 * As memory core doesn't know how is memory accessed, it is up to
1446 * resize callback to update device state and/or add assertions to detect
1447 * misuse, if necessary.
1449 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1451 RAMBlock *block = find_ram_block(base);
1455 newsize = TARGET_PAGE_ALIGN(newsize);
1457 if (block->used_length == newsize) {
1461 if (!(block->flags & RAM_RESIZEABLE)) {
1462 error_setg_errno(errp, EINVAL,
1463 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1464 " in != 0x" RAM_ADDR_FMT, block->idstr,
1465 newsize, block->used_length);
1469 if (block->max_length < newsize) {
1470 error_setg_errno(errp, EINVAL,
1471 "Length too large: %s: 0x" RAM_ADDR_FMT
1472 " > 0x" RAM_ADDR_FMT, block->idstr,
1473 newsize, block->max_length);
1477 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1478 block->used_length = newsize;
1479 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1481 memory_region_set_size(block->mr, newsize);
1482 if (block->resized) {
1483 block->resized(block->idstr, newsize, block->host);
1488 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1491 RAMBlock *last_block = NULL;
1492 ram_addr_t old_ram_size, new_ram_size;
1494 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1496 qemu_mutex_lock_ramlist();
1497 new_block->offset = find_ram_offset(new_block->max_length);
1499 if (!new_block->host) {
1500 if (xen_enabled()) {
1501 xen_ram_alloc(new_block->offset, new_block->max_length,
1504 new_block->host = phys_mem_alloc(new_block->max_length,
1505 &new_block->mr->align);
1506 if (!new_block->host) {
1507 error_setg_errno(errp, errno,
1508 "cannot set up guest memory '%s'",
1509 memory_region_name(new_block->mr));
1510 qemu_mutex_unlock_ramlist();
1513 memory_try_enable_merging(new_block->host, new_block->max_length);
1517 new_ram_size = MAX(old_ram_size,
1518 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1519 if (new_ram_size > old_ram_size) {
1520 migration_bitmap_extend(old_ram_size, new_ram_size);
1522 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1523 * QLIST (which has an RCU-friendly variant) does not have insertion at
1524 * tail, so save the last element in last_block.
1526 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1528 if (block->max_length < new_block->max_length) {
1533 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1534 } else if (last_block) {
1535 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1536 } else { /* list is empty */
1537 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1539 ram_list.mru_block = NULL;
1541 /* Write list before version */
1544 qemu_mutex_unlock_ramlist();
1546 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1548 if (new_ram_size > old_ram_size) {
1551 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1552 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1553 ram_list.dirty_memory[i] =
1554 bitmap_zero_extend(ram_list.dirty_memory[i],
1555 old_ram_size, new_ram_size);
1558 cpu_physical_memory_set_dirty_range(new_block->offset,
1559 new_block->used_length,
1562 if (new_block->host) {
1563 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1564 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1565 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1566 if (kvm_enabled()) {
1567 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1571 return new_block->offset;
1575 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1576 bool share, const char *mem_path,
1579 RAMBlock *new_block;
1581 Error *local_err = NULL;
1583 if (xen_enabled()) {
1584 error_setg(errp, "-mem-path not supported with Xen");
1588 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1590 * file_ram_alloc() needs to allocate just like
1591 * phys_mem_alloc, but we haven't bothered to provide
1595 "-mem-path not supported with this accelerator");
1599 size = TARGET_PAGE_ALIGN(size);
1600 new_block = g_malloc0(sizeof(*new_block));
1602 new_block->used_length = size;
1603 new_block->max_length = size;
1604 new_block->flags = share ? RAM_SHARED : 0;
1605 new_block->flags |= RAM_FILE;
1606 new_block->host = file_ram_alloc(new_block, size,
1608 if (!new_block->host) {
1613 addr = ram_block_add(new_block, &local_err);
1616 error_propagate(errp, local_err);
1624 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1625 void (*resized)(const char*,
1628 void *host, bool resizeable,
1629 MemoryRegion *mr, Error **errp)
1631 RAMBlock *new_block;
1633 Error *local_err = NULL;
1635 size = TARGET_PAGE_ALIGN(size);
1636 max_size = TARGET_PAGE_ALIGN(max_size);
1637 new_block = g_malloc0(sizeof(*new_block));
1639 new_block->resized = resized;
1640 new_block->used_length = size;
1641 new_block->max_length = max_size;
1642 assert(max_size >= size);
1644 new_block->host = host;
1646 new_block->flags |= RAM_PREALLOC;
1649 new_block->flags |= RAM_RESIZEABLE;
1651 addr = ram_block_add(new_block, &local_err);
1654 error_propagate(errp, local_err);
1660 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1661 MemoryRegion *mr, Error **errp)
1663 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1666 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1668 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1671 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1672 void (*resized)(const char*,
1675 MemoryRegion *mr, Error **errp)
1677 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1680 void qemu_ram_free_from_ptr(ram_addr_t addr)
1684 qemu_mutex_lock_ramlist();
1685 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1686 if (addr == block->offset) {
1687 QLIST_REMOVE_RCU(block, next);
1688 ram_list.mru_block = NULL;
1689 /* Write list before version */
1692 g_free_rcu(block, rcu);
1696 qemu_mutex_unlock_ramlist();
1699 static void reclaim_ramblock(RAMBlock *block)
1701 if (block->flags & RAM_PREALLOC) {
1703 } else if (xen_enabled()) {
1704 xen_invalidate_map_cache_entry(block->host);
1706 } else if (block->fd >= 0) {
1707 if (block->flags & RAM_FILE) {
1708 qemu_ram_munmap(block->host, block->max_length);
1710 munmap(block->host, block->max_length);
1715 qemu_anon_ram_free(block->host, block->max_length);
1720 void qemu_ram_free(ram_addr_t addr)
1724 qemu_mutex_lock_ramlist();
1725 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1726 if (addr == block->offset) {
1727 QLIST_REMOVE_RCU(block, next);
1728 ram_list.mru_block = NULL;
1729 /* Write list before version */
1732 call_rcu(block, reclaim_ramblock, rcu);
1736 qemu_mutex_unlock_ramlist();
1740 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1747 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1748 offset = addr - block->offset;
1749 if (offset < block->max_length) {
1750 vaddr = ramblock_ptr(block, offset);
1751 if (block->flags & RAM_PREALLOC) {
1753 } else if (xen_enabled()) {
1757 if (block->fd >= 0) {
1758 flags |= (block->flags & RAM_SHARED ?
1759 MAP_SHARED : MAP_PRIVATE);
1760 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1761 flags, block->fd, offset);
1764 * Remap needs to match alloc. Accelerators that
1765 * set phys_mem_alloc never remap. If they did,
1766 * we'd need a remap hook here.
1768 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1770 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1771 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1774 if (area != vaddr) {
1775 fprintf(stderr, "Could not remap addr: "
1776 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1780 memory_try_enable_merging(vaddr, length);
1781 qemu_ram_setup_dump(vaddr, length);
1786 #endif /* !_WIN32 */
1788 int qemu_get_ram_fd(ram_addr_t addr)
1794 block = qemu_get_ram_block(addr);
1800 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1806 block = qemu_get_ram_block(addr);
1807 ptr = ramblock_ptr(block, 0);
1812 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1813 * This should not be used for general purpose DMA. Use address_space_map
1814 * or address_space_rw instead. For local memory (e.g. video ram) that the
1815 * device owns, use memory_region_get_ram_ptr.
1817 * By the time this function returns, the returned pointer is not protected
1818 * by RCU anymore. If the caller is not within an RCU critical section and
1819 * does not hold the iothread lock, it must have other means of protecting the
1820 * pointer, such as a reference to the region that includes the incoming
1823 void *qemu_get_ram_ptr(ram_addr_t addr)
1829 block = qemu_get_ram_block(addr);
1831 if (xen_enabled() && block->host == NULL) {
1832 /* We need to check if the requested address is in the RAM
1833 * because we don't want to map the entire memory in QEMU.
1834 * In that case just map until the end of the page.
1836 if (block->offset == 0) {
1837 ptr = xen_map_cache(addr, 0, 0);
1841 block->host = xen_map_cache(block->offset, block->max_length, 1);
1843 ptr = ramblock_ptr(block, addr - block->offset);
1850 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1851 * but takes a size argument.
1853 * By the time this function returns, the returned pointer is not protected
1854 * by RCU anymore. If the caller is not within an RCU critical section and
1855 * does not hold the iothread lock, it must have other means of protecting the
1856 * pointer, such as a reference to the region that includes the incoming
1859 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1865 if (xen_enabled()) {
1866 return xen_map_cache(addr, *size, 1);
1870 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1871 if (addr - block->offset < block->max_length) {
1872 if (addr - block->offset + *size > block->max_length)
1873 *size = block->max_length - addr + block->offset;
1874 ptr = ramblock_ptr(block, addr - block->offset);
1880 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1886 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1889 * ptr: Host pointer to look up
1890 * round_offset: If true round the result offset down to a page boundary
1891 * *ram_addr: set to result ram_addr
1892 * *offset: set to result offset within the RAMBlock
1894 * Returns: RAMBlock (or NULL if not found)
1896 * By the time this function returns, the returned pointer is not protected
1897 * by RCU anymore. If the caller is not within an RCU critical section and
1898 * does not hold the iothread lock, it must have other means of protecting the
1899 * pointer, such as a reference to the region that includes the incoming
1902 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1903 ram_addr_t *ram_addr,
1907 uint8_t *host = ptr;
1909 if (xen_enabled()) {
1911 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1912 block = qemu_get_ram_block(*ram_addr);
1914 *offset = (host - block->host);
1921 block = atomic_rcu_read(&ram_list.mru_block);
1922 if (block && block->host && host - block->host < block->max_length) {
1926 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1927 /* This case append when the block is not mapped. */
1928 if (block->host == NULL) {
1931 if (host - block->host < block->max_length) {
1940 *offset = (host - block->host);
1942 *offset &= TARGET_PAGE_MASK;
1944 *ram_addr = block->offset + *offset;
1949 /* Some of the softmmu routines need to translate from a host pointer
1950 (typically a TLB entry) back to a ram offset. */
1951 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1954 ram_addr_t offset; /* Not used */
1956 block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
1965 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1966 uint64_t val, unsigned size)
1968 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1969 tb_invalidate_phys_page_fast(ram_addr, size);
1973 stb_p(qemu_get_ram_ptr(ram_addr), val);
1976 stw_p(qemu_get_ram_ptr(ram_addr), val);
1979 stl_p(qemu_get_ram_ptr(ram_addr), val);
1984 /* Set both VGA and migration bits for simplicity and to remove
1985 * the notdirty callback faster.
1987 cpu_physical_memory_set_dirty_range(ram_addr, size,
1988 DIRTY_CLIENTS_NOCODE);
1989 /* we remove the notdirty callback only if the code has been
1991 if (!cpu_physical_memory_is_clean(ram_addr)) {
1992 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
1996 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1997 unsigned size, bool is_write)
2002 static const MemoryRegionOps notdirty_mem_ops = {
2003 .write = notdirty_mem_write,
2004 .valid.accepts = notdirty_mem_accepts,
2005 .endianness = DEVICE_NATIVE_ENDIAN,
2008 /* Generate a debug exception if a watchpoint has been hit. */
2009 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2011 CPUState *cpu = current_cpu;
2012 CPUArchState *env = cpu->env_ptr;
2013 target_ulong pc, cs_base;
2018 if (cpu->watchpoint_hit) {
2019 /* We re-entered the check after replacing the TB. Now raise
2020 * the debug interrupt so that is will trigger after the
2021 * current instruction. */
2022 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2025 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2026 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2027 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2028 && (wp->flags & flags)) {
2029 if (flags == BP_MEM_READ) {
2030 wp->flags |= BP_WATCHPOINT_HIT_READ;
2032 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2034 wp->hitaddr = vaddr;
2035 wp->hitattrs = attrs;
2036 if (!cpu->watchpoint_hit) {
2037 cpu->watchpoint_hit = wp;
2038 tb_check_watchpoint(cpu);
2039 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2040 cpu->exception_index = EXCP_DEBUG;
2043 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2044 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2045 cpu_resume_from_signal(cpu, NULL);
2049 wp->flags &= ~BP_WATCHPOINT_HIT;
2054 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2055 so these check for a hit then pass through to the normal out-of-line
2057 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2058 unsigned size, MemTxAttrs attrs)
2063 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2066 data = address_space_ldub(&address_space_memory, addr, attrs, &res);
2069 data = address_space_lduw(&address_space_memory, addr, attrs, &res);
2072 data = address_space_ldl(&address_space_memory, addr, attrs, &res);
2080 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2081 uint64_t val, unsigned size,
2086 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2089 address_space_stb(&address_space_memory, addr, val, attrs, &res);
2092 address_space_stw(&address_space_memory, addr, val, attrs, &res);
2095 address_space_stl(&address_space_memory, addr, val, attrs, &res);
2102 static const MemoryRegionOps watch_mem_ops = {
2103 .read_with_attrs = watch_mem_read,
2104 .write_with_attrs = watch_mem_write,
2105 .endianness = DEVICE_NATIVE_ENDIAN,
2108 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2109 unsigned len, MemTxAttrs attrs)
2111 subpage_t *subpage = opaque;
2115 #if defined(DEBUG_SUBPAGE)
2116 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2117 subpage, len, addr);
2119 res = address_space_read(subpage->as, addr + subpage->base,
2126 *data = ldub_p(buf);
2129 *data = lduw_p(buf);
2142 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2143 uint64_t value, unsigned len, MemTxAttrs attrs)
2145 subpage_t *subpage = opaque;
2148 #if defined(DEBUG_SUBPAGE)
2149 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2150 " value %"PRIx64"\n",
2151 __func__, subpage, len, addr, value);
2169 return address_space_write(subpage->as, addr + subpage->base,
2173 static bool subpage_accepts(void *opaque, hwaddr addr,
2174 unsigned len, bool is_write)
2176 subpage_t *subpage = opaque;
2177 #if defined(DEBUG_SUBPAGE)
2178 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2179 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2182 return address_space_access_valid(subpage->as, addr + subpage->base,
2186 static const MemoryRegionOps subpage_ops = {
2187 .read_with_attrs = subpage_read,
2188 .write_with_attrs = subpage_write,
2189 .impl.min_access_size = 1,
2190 .impl.max_access_size = 8,
2191 .valid.min_access_size = 1,
2192 .valid.max_access_size = 8,
2193 .valid.accepts = subpage_accepts,
2194 .endianness = DEVICE_NATIVE_ENDIAN,
2197 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2202 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2204 idx = SUBPAGE_IDX(start);
2205 eidx = SUBPAGE_IDX(end);
2206 #if defined(DEBUG_SUBPAGE)
2207 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2208 __func__, mmio, start, end, idx, eidx, section);
2210 for (; idx <= eidx; idx++) {
2211 mmio->sub_section[idx] = section;
2217 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2221 mmio = g_malloc0(sizeof(subpage_t));
2225 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2226 NULL, TARGET_PAGE_SIZE);
2227 mmio->iomem.subpage = true;
2228 #if defined(DEBUG_SUBPAGE)
2229 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2230 mmio, base, TARGET_PAGE_SIZE);
2232 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2237 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2241 MemoryRegionSection section = {
2242 .address_space = as,
2244 .offset_within_address_space = 0,
2245 .offset_within_region = 0,
2246 .size = int128_2_64(),
2249 return phys_section_add(map, §ion);
2252 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2254 CPUAddressSpace *cpuas = &cpu->cpu_ases[0];
2255 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2256 MemoryRegionSection *sections = d->map.sections;
2258 return sections[index & ~TARGET_PAGE_MASK].mr;
2261 static void io_mem_init(void)
2263 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2264 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2266 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
2268 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2272 static void mem_begin(MemoryListener *listener)
2274 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2275 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2278 n = dummy_section(&d->map, as, &io_mem_unassigned);
2279 assert(n == PHYS_SECTION_UNASSIGNED);
2280 n = dummy_section(&d->map, as, &io_mem_notdirty);
2281 assert(n == PHYS_SECTION_NOTDIRTY);
2282 n = dummy_section(&d->map, as, &io_mem_rom);
2283 assert(n == PHYS_SECTION_ROM);
2284 n = dummy_section(&d->map, as, &io_mem_watch);
2285 assert(n == PHYS_SECTION_WATCH);
2287 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2289 as->next_dispatch = d;
2292 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2294 phys_sections_free(&d->map);
2298 static void mem_commit(MemoryListener *listener)
2300 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2301 AddressSpaceDispatch *cur = as->dispatch;
2302 AddressSpaceDispatch *next = as->next_dispatch;
2304 phys_page_compact_all(next, next->map.nodes_nb);
2306 atomic_rcu_set(&as->dispatch, next);
2308 call_rcu(cur, address_space_dispatch_free, rcu);
2312 static void tcg_commit(MemoryListener *listener)
2314 CPUAddressSpace *cpuas;
2315 AddressSpaceDispatch *d;
2317 /* since each CPU stores ram addresses in its TLB cache, we must
2318 reset the modified entries */
2319 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2320 cpu_reloading_memory_map();
2321 /* The CPU and TLB are protected by the iothread lock.
2322 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2323 * may have split the RCU critical section.
2325 d = atomic_rcu_read(&cpuas->as->dispatch);
2326 cpuas->memory_dispatch = d;
2327 tlb_flush(cpuas->cpu, 1);
2330 void address_space_init_dispatch(AddressSpace *as)
2332 as->dispatch = NULL;
2333 as->dispatch_listener = (MemoryListener) {
2335 .commit = mem_commit,
2336 .region_add = mem_add,
2337 .region_nop = mem_add,
2340 memory_listener_register(&as->dispatch_listener, as);
2343 void address_space_unregister(AddressSpace *as)
2345 memory_listener_unregister(&as->dispatch_listener);
2348 void address_space_destroy_dispatch(AddressSpace *as)
2350 AddressSpaceDispatch *d = as->dispatch;
2352 atomic_rcu_set(&as->dispatch, NULL);
2354 call_rcu(d, address_space_dispatch_free, rcu);
2358 static void memory_map_init(void)
2360 system_memory = g_malloc(sizeof(*system_memory));
2362 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2363 address_space_init(&address_space_memory, system_memory, "memory");
2365 system_io = g_malloc(sizeof(*system_io));
2366 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2368 address_space_init(&address_space_io, system_io, "I/O");
2371 MemoryRegion *get_system_memory(void)
2373 return system_memory;
2376 MemoryRegion *get_system_io(void)
2381 #endif /* !defined(CONFIG_USER_ONLY) */
2383 /* physical memory access (slow version, mainly for debug) */
2384 #if defined(CONFIG_USER_ONLY)
2385 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2386 uint8_t *buf, int len, int is_write)
2393 page = addr & TARGET_PAGE_MASK;
2394 l = (page + TARGET_PAGE_SIZE) - addr;
2397 flags = page_get_flags(page);
2398 if (!(flags & PAGE_VALID))
2401 if (!(flags & PAGE_WRITE))
2403 /* XXX: this code should not depend on lock_user */
2404 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2407 unlock_user(p, addr, l);
2409 if (!(flags & PAGE_READ))
2411 /* XXX: this code should not depend on lock_user */
2412 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2415 unlock_user(p, addr, 0);
2426 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2429 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2430 /* No early return if dirty_log_mask is or becomes 0, because
2431 * cpu_physical_memory_set_dirty_range will still call
2432 * xen_modified_memory.
2434 if (dirty_log_mask) {
2436 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2438 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2439 tb_invalidate_phys_range(addr, addr + length);
2440 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2442 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2445 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2447 unsigned access_size_max = mr->ops->valid.max_access_size;
2449 /* Regions are assumed to support 1-4 byte accesses unless
2450 otherwise specified. */
2451 if (access_size_max == 0) {
2452 access_size_max = 4;
2455 /* Bound the maximum access by the alignment of the address. */
2456 if (!mr->ops->impl.unaligned) {
2457 unsigned align_size_max = addr & -addr;
2458 if (align_size_max != 0 && align_size_max < access_size_max) {
2459 access_size_max = align_size_max;
2463 /* Don't attempt accesses larger than the maximum. */
2464 if (l > access_size_max) {
2465 l = access_size_max;
2472 static bool prepare_mmio_access(MemoryRegion *mr)
2474 bool unlocked = !qemu_mutex_iothread_locked();
2475 bool release_lock = false;
2477 if (unlocked && mr->global_locking) {
2478 qemu_mutex_lock_iothread();
2480 release_lock = true;
2482 if (mr->flush_coalesced_mmio) {
2484 qemu_mutex_lock_iothread();
2486 qemu_flush_coalesced_mmio_buffer();
2488 qemu_mutex_unlock_iothread();
2492 return release_lock;
2495 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2496 uint8_t *buf, int len, bool is_write)
2503 MemTxResult result = MEMTX_OK;
2504 bool release_lock = false;
2509 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2512 if (!memory_access_is_direct(mr, is_write)) {
2513 release_lock |= prepare_mmio_access(mr);
2514 l = memory_access_size(mr, l, addr1);
2515 /* XXX: could force current_cpu to NULL to avoid
2519 /* 64 bit write access */
2521 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2525 /* 32 bit write access */
2527 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2531 /* 16 bit write access */
2533 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2537 /* 8 bit write access */
2539 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2546 addr1 += memory_region_get_ram_addr(mr);
2548 ptr = qemu_get_ram_ptr(addr1);
2549 memcpy(ptr, buf, l);
2550 invalidate_and_set_dirty(mr, addr1, l);
2553 if (!memory_access_is_direct(mr, is_write)) {
2555 release_lock |= prepare_mmio_access(mr);
2556 l = memory_access_size(mr, l, addr1);
2559 /* 64 bit read access */
2560 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2565 /* 32 bit read access */
2566 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2571 /* 16 bit read access */
2572 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2577 /* 8 bit read access */
2578 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2587 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2588 memcpy(buf, ptr, l);
2593 qemu_mutex_unlock_iothread();
2594 release_lock = false;
2606 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2607 const uint8_t *buf, int len)
2609 return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2612 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2613 uint8_t *buf, int len)
2615 return address_space_rw(as, addr, attrs, buf, len, false);
2619 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2620 int len, int is_write)
2622 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2623 buf, len, is_write);
2626 enum write_rom_type {
2631 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2632 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2642 mr = address_space_translate(as, addr, &addr1, &l, true);
2644 if (!(memory_region_is_ram(mr) ||
2645 memory_region_is_romd(mr))) {
2646 l = memory_access_size(mr, l, addr1);
2648 addr1 += memory_region_get_ram_addr(mr);
2650 ptr = qemu_get_ram_ptr(addr1);
2653 memcpy(ptr, buf, l);
2654 invalidate_and_set_dirty(mr, addr1, l);
2657 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2668 /* used for ROM loading : can write in RAM and ROM */
2669 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2670 const uint8_t *buf, int len)
2672 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2675 void cpu_flush_icache_range(hwaddr start, int len)
2678 * This function should do the same thing as an icache flush that was
2679 * triggered from within the guest. For TCG we are always cache coherent,
2680 * so there is no need to flush anything. For KVM / Xen we need to flush
2681 * the host's instruction cache at least.
2683 if (tcg_enabled()) {
2687 cpu_physical_memory_write_rom_internal(&address_space_memory,
2688 start, NULL, len, FLUSH_CACHE);
2699 static BounceBuffer bounce;
2701 typedef struct MapClient {
2703 QLIST_ENTRY(MapClient) link;
2706 QemuMutex map_client_list_lock;
2707 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2708 = QLIST_HEAD_INITIALIZER(map_client_list);
2710 static void cpu_unregister_map_client_do(MapClient *client)
2712 QLIST_REMOVE(client, link);
2716 static void cpu_notify_map_clients_locked(void)
2720 while (!QLIST_EMPTY(&map_client_list)) {
2721 client = QLIST_FIRST(&map_client_list);
2722 qemu_bh_schedule(client->bh);
2723 cpu_unregister_map_client_do(client);
2727 void cpu_register_map_client(QEMUBH *bh)
2729 MapClient *client = g_malloc(sizeof(*client));
2731 qemu_mutex_lock(&map_client_list_lock);
2733 QLIST_INSERT_HEAD(&map_client_list, client, link);
2734 if (!atomic_read(&bounce.in_use)) {
2735 cpu_notify_map_clients_locked();
2737 qemu_mutex_unlock(&map_client_list_lock);
2740 void cpu_exec_init_all(void)
2742 qemu_mutex_init(&ram_list.mutex);
2745 qemu_mutex_init(&map_client_list_lock);
2748 void cpu_unregister_map_client(QEMUBH *bh)
2752 qemu_mutex_lock(&map_client_list_lock);
2753 QLIST_FOREACH(client, &map_client_list, link) {
2754 if (client->bh == bh) {
2755 cpu_unregister_map_client_do(client);
2759 qemu_mutex_unlock(&map_client_list_lock);
2762 static void cpu_notify_map_clients(void)
2764 qemu_mutex_lock(&map_client_list_lock);
2765 cpu_notify_map_clients_locked();
2766 qemu_mutex_unlock(&map_client_list_lock);
2769 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2777 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2778 if (!memory_access_is_direct(mr, is_write)) {
2779 l = memory_access_size(mr, l, addr);
2780 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2792 /* Map a physical memory region into a host virtual address.
2793 * May map a subset of the requested range, given by and returned in *plen.
2794 * May return NULL if resources needed to perform the mapping are exhausted.
2795 * Use only for reads OR writes - not for read-modify-write operations.
2796 * Use cpu_register_map_client() to know when retrying the map operation is
2797 * likely to succeed.
2799 void *address_space_map(AddressSpace *as,
2806 hwaddr l, xlat, base;
2807 MemoryRegion *mr, *this_mr;
2816 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2818 if (!memory_access_is_direct(mr, is_write)) {
2819 if (atomic_xchg(&bounce.in_use, true)) {
2823 /* Avoid unbounded allocations */
2824 l = MIN(l, TARGET_PAGE_SIZE);
2825 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2829 memory_region_ref(mr);
2832 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2838 return bounce.buffer;
2842 raddr = memory_region_get_ram_addr(mr);
2853 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2854 if (this_mr != mr || xlat != base + done) {
2859 memory_region_ref(mr);
2862 return qemu_ram_ptr_length(raddr + base, plen);
2865 /* Unmaps a memory region previously mapped by address_space_map().
2866 * Will also mark the memory as dirty if is_write == 1. access_len gives
2867 * the amount of memory that was actually read or written by the caller.
2869 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2870 int is_write, hwaddr access_len)
2872 if (buffer != bounce.buffer) {
2876 mr = qemu_ram_addr_from_host(buffer, &addr1);
2879 invalidate_and_set_dirty(mr, addr1, access_len);
2881 if (xen_enabled()) {
2882 xen_invalidate_map_cache_entry(buffer);
2884 memory_region_unref(mr);
2888 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2889 bounce.buffer, access_len);
2891 qemu_vfree(bounce.buffer);
2892 bounce.buffer = NULL;
2893 memory_region_unref(bounce.mr);
2894 atomic_mb_set(&bounce.in_use, false);
2895 cpu_notify_map_clients();
2898 void *cpu_physical_memory_map(hwaddr addr,
2902 return address_space_map(&address_space_memory, addr, plen, is_write);
2905 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2906 int is_write, hwaddr access_len)
2908 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2911 /* warning: addr must be aligned */
2912 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2914 MemTxResult *result,
2915 enum device_endian endian)
2923 bool release_lock = false;
2926 mr = address_space_translate(as, addr, &addr1, &l, false);
2927 if (l < 4 || !memory_access_is_direct(mr, false)) {
2928 release_lock |= prepare_mmio_access(mr);
2931 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2932 #if defined(TARGET_WORDS_BIGENDIAN)
2933 if (endian == DEVICE_LITTLE_ENDIAN) {
2937 if (endian == DEVICE_BIG_ENDIAN) {
2943 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2947 case DEVICE_LITTLE_ENDIAN:
2948 val = ldl_le_p(ptr);
2950 case DEVICE_BIG_ENDIAN:
2951 val = ldl_be_p(ptr);
2963 qemu_mutex_unlock_iothread();
2969 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2970 MemTxAttrs attrs, MemTxResult *result)
2972 return address_space_ldl_internal(as, addr, attrs, result,
2973 DEVICE_NATIVE_ENDIAN);
2976 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2977 MemTxAttrs attrs, MemTxResult *result)
2979 return address_space_ldl_internal(as, addr, attrs, result,
2980 DEVICE_LITTLE_ENDIAN);
2983 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
2984 MemTxAttrs attrs, MemTxResult *result)
2986 return address_space_ldl_internal(as, addr, attrs, result,
2990 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2992 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2995 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2997 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3000 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3002 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3005 /* warning: addr must be aligned */
3006 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3008 MemTxResult *result,
3009 enum device_endian endian)
3017 bool release_lock = false;
3020 mr = address_space_translate(as, addr, &addr1, &l,
3022 if (l < 8 || !memory_access_is_direct(mr, false)) {
3023 release_lock |= prepare_mmio_access(mr);
3026 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3027 #if defined(TARGET_WORDS_BIGENDIAN)
3028 if (endian == DEVICE_LITTLE_ENDIAN) {
3032 if (endian == DEVICE_BIG_ENDIAN) {
3038 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3042 case DEVICE_LITTLE_ENDIAN:
3043 val = ldq_le_p(ptr);
3045 case DEVICE_BIG_ENDIAN:
3046 val = ldq_be_p(ptr);
3058 qemu_mutex_unlock_iothread();
3064 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3065 MemTxAttrs attrs, MemTxResult *result)
3067 return address_space_ldq_internal(as, addr, attrs, result,
3068 DEVICE_NATIVE_ENDIAN);
3071 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3072 MemTxAttrs attrs, MemTxResult *result)
3074 return address_space_ldq_internal(as, addr, attrs, result,
3075 DEVICE_LITTLE_ENDIAN);
3078 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3079 MemTxAttrs attrs, MemTxResult *result)
3081 return address_space_ldq_internal(as, addr, attrs, result,
3085 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3087 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3090 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3092 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3095 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3097 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3101 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3102 MemTxAttrs attrs, MemTxResult *result)
3107 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3114 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3116 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3119 /* warning: addr must be aligned */
3120 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3123 MemTxResult *result,
3124 enum device_endian endian)
3132 bool release_lock = false;
3135 mr = address_space_translate(as, addr, &addr1, &l,
3137 if (l < 2 || !memory_access_is_direct(mr, false)) {
3138 release_lock |= prepare_mmio_access(mr);
3141 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3142 #if defined(TARGET_WORDS_BIGENDIAN)
3143 if (endian == DEVICE_LITTLE_ENDIAN) {
3147 if (endian == DEVICE_BIG_ENDIAN) {
3153 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3157 case DEVICE_LITTLE_ENDIAN:
3158 val = lduw_le_p(ptr);
3160 case DEVICE_BIG_ENDIAN:
3161 val = lduw_be_p(ptr);
3173 qemu_mutex_unlock_iothread();
3179 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3180 MemTxAttrs attrs, MemTxResult *result)
3182 return address_space_lduw_internal(as, addr, attrs, result,
3183 DEVICE_NATIVE_ENDIAN);
3186 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3187 MemTxAttrs attrs, MemTxResult *result)
3189 return address_space_lduw_internal(as, addr, attrs, result,
3190 DEVICE_LITTLE_ENDIAN);
3193 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3194 MemTxAttrs attrs, MemTxResult *result)
3196 return address_space_lduw_internal(as, addr, attrs, result,
3200 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3202 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3205 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3207 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3210 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3212 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3215 /* warning: addr must be aligned. The ram page is not masked as dirty
3216 and the code inside is not invalidated. It is useful if the dirty
3217 bits are used to track modified PTEs */
3218 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3219 MemTxAttrs attrs, MemTxResult *result)
3226 uint8_t dirty_log_mask;
3227 bool release_lock = false;
3230 mr = address_space_translate(as, addr, &addr1, &l,
3232 if (l < 4 || !memory_access_is_direct(mr, true)) {
3233 release_lock |= prepare_mmio_access(mr);
3235 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3237 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3238 ptr = qemu_get_ram_ptr(addr1);
3241 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3242 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3243 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3250 qemu_mutex_unlock_iothread();
3255 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3257 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3260 /* warning: addr must be aligned */
3261 static inline void address_space_stl_internal(AddressSpace *as,
3262 hwaddr addr, uint32_t val,
3264 MemTxResult *result,
3265 enum device_endian endian)
3272 bool release_lock = false;
3275 mr = address_space_translate(as, addr, &addr1, &l,
3277 if (l < 4 || !memory_access_is_direct(mr, true)) {
3278 release_lock |= prepare_mmio_access(mr);
3280 #if defined(TARGET_WORDS_BIGENDIAN)
3281 if (endian == DEVICE_LITTLE_ENDIAN) {
3285 if (endian == DEVICE_BIG_ENDIAN) {
3289 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3292 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3293 ptr = qemu_get_ram_ptr(addr1);
3295 case DEVICE_LITTLE_ENDIAN:
3298 case DEVICE_BIG_ENDIAN:
3305 invalidate_and_set_dirty(mr, addr1, 4);
3312 qemu_mutex_unlock_iothread();
3317 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3318 MemTxAttrs attrs, MemTxResult *result)
3320 address_space_stl_internal(as, addr, val, attrs, result,
3321 DEVICE_NATIVE_ENDIAN);
3324 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3325 MemTxAttrs attrs, MemTxResult *result)
3327 address_space_stl_internal(as, addr, val, attrs, result,
3328 DEVICE_LITTLE_ENDIAN);
3331 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3332 MemTxAttrs attrs, MemTxResult *result)
3334 address_space_stl_internal(as, addr, val, attrs, result,
3338 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3340 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3343 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3345 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3348 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3350 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3354 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3355 MemTxAttrs attrs, MemTxResult *result)
3360 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3366 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3368 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3371 /* warning: addr must be aligned */
3372 static inline void address_space_stw_internal(AddressSpace *as,
3373 hwaddr addr, uint32_t val,
3375 MemTxResult *result,
3376 enum device_endian endian)
3383 bool release_lock = false;
3386 mr = address_space_translate(as, addr, &addr1, &l, true);
3387 if (l < 2 || !memory_access_is_direct(mr, true)) {
3388 release_lock |= prepare_mmio_access(mr);
3390 #if defined(TARGET_WORDS_BIGENDIAN)
3391 if (endian == DEVICE_LITTLE_ENDIAN) {
3395 if (endian == DEVICE_BIG_ENDIAN) {
3399 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3402 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3403 ptr = qemu_get_ram_ptr(addr1);
3405 case DEVICE_LITTLE_ENDIAN:
3408 case DEVICE_BIG_ENDIAN:
3415 invalidate_and_set_dirty(mr, addr1, 2);
3422 qemu_mutex_unlock_iothread();
3427 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3428 MemTxAttrs attrs, MemTxResult *result)
3430 address_space_stw_internal(as, addr, val, attrs, result,
3431 DEVICE_NATIVE_ENDIAN);
3434 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3435 MemTxAttrs attrs, MemTxResult *result)
3437 address_space_stw_internal(as, addr, val, attrs, result,
3438 DEVICE_LITTLE_ENDIAN);
3441 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3442 MemTxAttrs attrs, MemTxResult *result)
3444 address_space_stw_internal(as, addr, val, attrs, result,
3448 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3450 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3453 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3455 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3458 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3460 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3464 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3465 MemTxAttrs attrs, MemTxResult *result)
3469 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3475 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3476 MemTxAttrs attrs, MemTxResult *result)
3479 val = cpu_to_le64(val);
3480 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3485 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3486 MemTxAttrs attrs, MemTxResult *result)
3489 val = cpu_to_be64(val);
3490 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3496 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3498 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3501 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3503 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3506 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3508 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3511 /* virtual memory access for debug (includes writing to ROM) */
3512 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3513 uint8_t *buf, int len, int is_write)
3520 page = addr & TARGET_PAGE_MASK;
3521 phys_addr = cpu_get_phys_page_debug(cpu, page);
3522 /* if no physical page mapped, return an error */
3523 if (phys_addr == -1)
3525 l = (page + TARGET_PAGE_SIZE) - addr;
3528 phys_addr += (addr & ~TARGET_PAGE_MASK);
3530 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3532 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3543 * Allows code that needs to deal with migration bitmaps etc to still be built
3544 * target independent.
3546 size_t qemu_target_page_bits(void)
3548 return TARGET_PAGE_BITS;
3554 * A helper function for the _utterly broken_ virtio device model to find out if
3555 * it's running on a big endian machine. Don't do this at home kids!
3557 bool target_words_bigendian(void);
3558 bool target_words_bigendian(void)
3560 #if defined(TARGET_WORDS_BIGENDIAN)
3567 #ifndef CONFIG_USER_ONLY
3568 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3575 mr = address_space_translate(&address_space_memory,
3576 phys_addr, &phys_addr, &l, false);
3578 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3583 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3589 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3590 ret = func(block->idstr, block->host, block->offset,
3591 block->used_length, opaque);