4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include <sys/types.h>
25 #include "qemu-common.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/hax.h"
36 #include "sysemu/sysemu.h"
37 #include "hw/xen/xen.h"
38 #include "qemu/timer.h"
39 #include "qemu/config-file.h"
40 #include "qemu/error-report.h"
41 #include "exec/memory.h"
42 #include "sysemu/dma.h"
43 #include "exec/address-spaces.h"
44 #if defined(CONFIG_USER_ONLY)
46 #else /* !CONFIG_USER_ONLY */
47 #include "sysemu/xen-mapcache.h"
50 #include "exec/cpu-all.h"
51 #include "qemu/rcu_queue.h"
52 #include "qemu/main-loop.h"
53 #include "translate-all.h"
54 #include "sysemu/replay.h"
56 #include "exec/memory-internal.h"
57 #include "exec/ram_addr.h"
59 #include "qemu/range.h"
61 #include "qemu/mmap-alloc.h"
64 //#define DEBUG_SUBPAGE
66 #if !defined(CONFIG_USER_ONLY)
67 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
68 * are protected by the ramlist lock.
70 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
72 static MemoryRegion *system_memory;
73 static MemoryRegion *system_io;
75 AddressSpace address_space_io;
76 AddressSpace address_space_memory;
78 MemoryRegion io_mem_rom, io_mem_notdirty;
79 static MemoryRegion io_mem_unassigned;
81 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
82 #define RAM_PREALLOC (1 << 0)
84 /* RAM is mmap-ed with MAP_SHARED */
85 #define RAM_SHARED (1 << 1)
87 /* Only a portion of RAM (used_length) is actually used, and migrated.
88 * This used_length size can change across reboots.
90 #define RAM_RESIZEABLE (1 << 2)
92 /* RAM is backed by an mmapped file.
94 #define RAM_FILE (1 << 3)
97 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
98 /* current CPU in the current thread. It is only valid inside
100 __thread CPUState *current_cpu;
101 /* 0 = Do not count executed instructions.
102 1 = Precise instruction counting.
103 2 = Adaptive rate instruction counting. */
106 #if !defined(CONFIG_USER_ONLY)
108 typedef struct PhysPageEntry PhysPageEntry;
110 struct PhysPageEntry {
111 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
113 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
117 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
119 /* Size of the L2 (and L3, etc) page tables. */
120 #define ADDR_SPACE_BITS 64
123 #define P_L2_SIZE (1 << P_L2_BITS)
125 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
127 typedef PhysPageEntry Node[P_L2_SIZE];
129 typedef struct PhysPageMap {
132 unsigned sections_nb;
133 unsigned sections_nb_alloc;
135 unsigned nodes_nb_alloc;
137 MemoryRegionSection *sections;
140 struct AddressSpaceDispatch {
143 /* This is a multi-level map on the physical address space.
144 * The bottom level has pointers to MemoryRegionSections.
146 PhysPageEntry phys_map;
151 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
152 typedef struct subpage_t {
156 uint16_t sub_section[TARGET_PAGE_SIZE];
159 #define PHYS_SECTION_UNASSIGNED 0
160 #define PHYS_SECTION_NOTDIRTY 1
161 #define PHYS_SECTION_ROM 2
162 #define PHYS_SECTION_WATCH 3
164 static void io_mem_init(void);
165 static void memory_map_init(void);
166 static void tcg_commit(MemoryListener *listener);
168 static MemoryRegion io_mem_watch;
171 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
172 * @cpu: the CPU whose AddressSpace this is
173 * @as: the AddressSpace itself
174 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
175 * @tcg_as_listener: listener for tracking changes to the AddressSpace
177 struct CPUAddressSpace {
180 struct AddressSpaceDispatch *memory_dispatch;
181 MemoryListener tcg_as_listener;
186 #if !defined(CONFIG_USER_ONLY)
188 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
190 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
191 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
192 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
193 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
197 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
204 ret = map->nodes_nb++;
206 assert(ret != PHYS_MAP_NODE_NIL);
207 assert(ret != map->nodes_nb_alloc);
209 e.skip = leaf ? 0 : 1;
210 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
211 for (i = 0; i < P_L2_SIZE; ++i) {
212 memcpy(&p[i], &e, sizeof(e));
217 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
218 hwaddr *index, hwaddr *nb, uint16_t leaf,
222 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
224 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
225 lp->ptr = phys_map_node_alloc(map, level == 0);
227 p = map->nodes[lp->ptr];
228 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
230 while (*nb && lp < &p[P_L2_SIZE]) {
231 if ((*index & (step - 1)) == 0 && *nb >= step) {
237 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
243 static void phys_page_set(AddressSpaceDispatch *d,
244 hwaddr index, hwaddr nb,
247 /* Wildly overreserve - it doesn't matter much. */
248 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
250 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
253 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
254 * and update our entry so we can skip it and go directly to the destination.
256 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
258 unsigned valid_ptr = P_L2_SIZE;
263 if (lp->ptr == PHYS_MAP_NODE_NIL) {
268 for (i = 0; i < P_L2_SIZE; i++) {
269 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
276 phys_page_compact(&p[i], nodes, compacted);
280 /* We can only compress if there's only one child. */
285 assert(valid_ptr < P_L2_SIZE);
287 /* Don't compress if it won't fit in the # of bits we have. */
288 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
292 lp->ptr = p[valid_ptr].ptr;
293 if (!p[valid_ptr].skip) {
294 /* If our only child is a leaf, make this a leaf. */
295 /* By design, we should have made this node a leaf to begin with so we
296 * should never reach here.
297 * But since it's so simple to handle this, let's do it just in case we
302 lp->skip += p[valid_ptr].skip;
306 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
308 DECLARE_BITMAP(compacted, nodes_nb);
310 if (d->phys_map.skip) {
311 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
315 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
316 Node *nodes, MemoryRegionSection *sections)
319 hwaddr index = addr >> TARGET_PAGE_BITS;
322 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
323 if (lp.ptr == PHYS_MAP_NODE_NIL) {
324 return §ions[PHYS_SECTION_UNASSIGNED];
327 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
330 if (sections[lp.ptr].size.hi ||
331 range_covers_byte(sections[lp.ptr].offset_within_address_space,
332 sections[lp.ptr].size.lo, addr)) {
333 return §ions[lp.ptr];
335 return §ions[PHYS_SECTION_UNASSIGNED];
339 bool memory_region_is_unassigned(MemoryRegion *mr)
341 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
342 && mr != &io_mem_watch;
345 /* Called from RCU critical section */
346 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
348 bool resolve_subpage)
350 MemoryRegionSection *section;
353 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
354 if (resolve_subpage && section->mr->subpage) {
355 subpage = container_of(section->mr, subpage_t, iomem);
356 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
361 /* Called from RCU critical section */
362 static MemoryRegionSection *
363 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
364 hwaddr *plen, bool resolve_subpage)
366 MemoryRegionSection *section;
370 section = address_space_lookup_region(d, addr, resolve_subpage);
371 /* Compute offset within MemoryRegionSection */
372 addr -= section->offset_within_address_space;
374 /* Compute offset within MemoryRegion */
375 *xlat = addr + section->offset_within_region;
379 /* MMIO registers can be expected to perform full-width accesses based only
380 * on their address, without considering adjacent registers that could
381 * decode to completely different MemoryRegions. When such registers
382 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
383 * regions overlap wildly. For this reason we cannot clamp the accesses
386 * If the length is small (as is the case for address_space_ldl/stl),
387 * everything works fine. If the incoming length is large, however,
388 * the caller really has to do the clamping through memory_access_size.
390 if (memory_region_is_ram(mr)) {
391 diff = int128_sub(section->size, int128_make64(addr));
392 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
397 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
399 if (memory_region_is_ram(mr)) {
400 return !(is_write && mr->readonly);
402 if (memory_region_is_romd(mr)) {
409 /* Called from RCU critical section */
410 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
411 hwaddr *xlat, hwaddr *plen,
415 MemoryRegionSection *section;
419 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
420 section = address_space_translate_internal(d, addr, &addr, plen, true);
423 if (!mr->iommu_ops) {
427 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
428 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
429 | (addr & iotlb.addr_mask));
430 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
431 if (!(iotlb.perm & (1 << is_write))) {
432 mr = &io_mem_unassigned;
436 as = iotlb.target_as;
439 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
440 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
441 *plen = MIN(page, *plen);
448 /* Called from RCU critical section */
449 MemoryRegionSection *
450 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
451 hwaddr *xlat, hwaddr *plen)
453 MemoryRegionSection *section;
454 section = address_space_translate_internal(cpu->cpu_ases[0].memory_dispatch,
455 addr, xlat, plen, false);
457 assert(!section->mr->iommu_ops);
462 #if !defined(CONFIG_USER_ONLY)
464 static int cpu_common_post_load(void *opaque, int version_id)
466 CPUState *cpu = opaque;
468 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
469 version_id is increased. */
470 cpu->interrupt_request &= ~0x01;
476 static int cpu_common_pre_load(void *opaque)
478 CPUState *cpu = opaque;
480 cpu->exception_index = -1;
485 static bool cpu_common_exception_index_needed(void *opaque)
487 CPUState *cpu = opaque;
489 return tcg_enabled() && cpu->exception_index != -1;
492 static const VMStateDescription vmstate_cpu_common_exception_index = {
493 .name = "cpu_common/exception_index",
495 .minimum_version_id = 1,
496 .needed = cpu_common_exception_index_needed,
497 .fields = (VMStateField[]) {
498 VMSTATE_INT32(exception_index, CPUState),
499 VMSTATE_END_OF_LIST()
503 static bool cpu_common_crash_occurred_needed(void *opaque)
505 CPUState *cpu = opaque;
507 return cpu->crash_occurred;
510 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
511 .name = "cpu_common/crash_occurred",
513 .minimum_version_id = 1,
514 .needed = cpu_common_crash_occurred_needed,
515 .fields = (VMStateField[]) {
516 VMSTATE_BOOL(crash_occurred, CPUState),
517 VMSTATE_END_OF_LIST()
521 const VMStateDescription vmstate_cpu_common = {
522 .name = "cpu_common",
524 .minimum_version_id = 1,
525 .pre_load = cpu_common_pre_load,
526 .post_load = cpu_common_post_load,
527 .fields = (VMStateField[]) {
528 VMSTATE_UINT32(halted, CPUState),
529 VMSTATE_UINT32(interrupt_request, CPUState),
530 VMSTATE_END_OF_LIST()
532 .subsections = (const VMStateDescription*[]) {
533 &vmstate_cpu_common_exception_index,
534 &vmstate_cpu_common_crash_occurred,
541 CPUState *qemu_get_cpu(int index)
546 if (cpu->cpu_index == index) {
554 #if !defined(CONFIG_USER_ONLY)
555 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
557 /* We only support one address space per cpu at the moment. */
558 assert(cpu->as == as);
561 /* We've already registered the listener for our only AS */
565 cpu->cpu_ases = g_new0(CPUAddressSpace, 1);
566 cpu->cpu_ases[0].cpu = cpu;
567 cpu->cpu_ases[0].as = as;
568 cpu->cpu_ases[0].tcg_as_listener.commit = tcg_commit;
569 memory_listener_register(&cpu->cpu_ases[0].tcg_as_listener, as);
573 #ifndef CONFIG_USER_ONLY
574 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
576 static int cpu_get_free_index(Error **errp)
578 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
580 if (cpu >= MAX_CPUMASK_BITS) {
581 error_setg(errp, "Trying to use more CPUs than max of %d",
586 bitmap_set(cpu_index_map, cpu, 1);
590 void cpu_exec_exit(CPUState *cpu)
592 if (cpu->cpu_index == -1) {
593 /* cpu_index was never allocated by this @cpu or was already freed. */
597 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
602 static int cpu_get_free_index(Error **errp)
607 CPU_FOREACH(some_cpu) {
613 void cpu_exec_exit(CPUState *cpu)
618 void cpu_exec_init(CPUState *cpu, Error **errp)
620 CPUClass *cc = CPU_GET_CLASS(cpu);
622 Error *local_err = NULL;
624 #ifndef CONFIG_USER_ONLY
625 cpu->as = &address_space_memory;
626 cpu->thread_id = qemu_get_thread_id();
629 #if defined(CONFIG_USER_ONLY)
632 cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
634 error_propagate(errp, local_err);
635 #if defined(CONFIG_USER_ONLY)
640 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
641 #if defined(CONFIG_USER_ONLY)
644 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
645 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
647 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
648 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
649 cpu_save, cpu_load, cpu->env_ptr);
650 assert(cc->vmsd == NULL);
651 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
653 if (cc->vmsd != NULL) {
654 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
658 #if defined(CONFIG_USER_ONLY)
659 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
661 tb_invalidate_phys_page_range(pc, pc + 1, 0);
664 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
666 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
668 tb_invalidate_phys_addr(cpu->as,
669 phys | (pc & ~TARGET_PAGE_MASK));
674 #if defined(CONFIG_USER_ONLY)
675 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
680 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
686 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
690 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
691 int flags, CPUWatchpoint **watchpoint)
696 /* Add a watchpoint. */
697 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
698 int flags, CPUWatchpoint **watchpoint)
702 /* forbid ranges which are empty or run off the end of the address space */
703 if (len == 0 || (addr + len - 1) < addr) {
704 error_report("tried to set invalid watchpoint at %"
705 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
708 wp = g_malloc(sizeof(*wp));
714 /* keep all GDB-injected watchpoints in front */
715 if (flags & BP_GDB) {
716 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
718 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
721 tlb_flush_page(cpu, addr);
728 /* Remove a specific watchpoint. */
729 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
734 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
735 if (addr == wp->vaddr && len == wp->len
736 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
737 cpu_watchpoint_remove_by_ref(cpu, wp);
744 /* Remove a specific watchpoint by reference. */
745 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
747 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
749 tlb_flush_page(cpu, watchpoint->vaddr);
754 /* Remove all matching watchpoints. */
755 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
757 CPUWatchpoint *wp, *next;
759 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
760 if (wp->flags & mask) {
761 cpu_watchpoint_remove_by_ref(cpu, wp);
766 /* Return true if this watchpoint address matches the specified
767 * access (ie the address range covered by the watchpoint overlaps
768 * partially or completely with the address range covered by the
771 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
775 /* We know the lengths are non-zero, but a little caution is
776 * required to avoid errors in the case where the range ends
777 * exactly at the top of the address space and so addr + len
778 * wraps round to zero.
780 vaddr wpend = wp->vaddr + wp->len - 1;
781 vaddr addrend = addr + len - 1;
783 return !(addr > wpend || wp->vaddr > addrend);
788 /* Add a breakpoint. */
789 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
790 CPUBreakpoint **breakpoint)
794 bp = g_malloc(sizeof(*bp));
799 /* keep all GDB-injected breakpoints in front */
800 if (flags & BP_GDB) {
801 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
803 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
806 breakpoint_invalidate(cpu, pc);
814 /* Remove a specific breakpoint. */
815 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
819 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
820 if (bp->pc == pc && bp->flags == flags) {
821 cpu_breakpoint_remove_by_ref(cpu, bp);
828 /* Remove a specific breakpoint by reference. */
829 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
831 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
833 breakpoint_invalidate(cpu, breakpoint->pc);
838 /* Remove all matching breakpoints. */
839 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
841 CPUBreakpoint *bp, *next;
843 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
844 if (bp->flags & mask) {
845 cpu_breakpoint_remove_by_ref(cpu, bp);
850 /* enable or disable single step mode. EXCP_DEBUG is returned by the
851 CPU loop after each instruction */
852 void cpu_single_step(CPUState *cpu, int enabled)
854 if (cpu->singlestep_enabled != enabled) {
855 cpu->singlestep_enabled = enabled;
857 kvm_update_guest_debug(cpu, 0);
859 /* must flush all the translated code to avoid inconsistencies */
860 /* XXX: only flush what is necessary */
866 void cpu_abort(CPUState *cpu, const char *fmt, ...)
873 fprintf(stderr, "qemu: fatal: ");
874 vfprintf(stderr, fmt, ap);
875 fprintf(stderr, "\n");
876 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
877 if (qemu_log_enabled()) {
878 qemu_log("qemu: fatal: ");
879 qemu_log_vprintf(fmt, ap2);
881 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
888 #if defined(CONFIG_USER_ONLY)
890 struct sigaction act;
891 sigfillset(&act.sa_mask);
892 act.sa_handler = SIG_DFL;
893 sigaction(SIGABRT, &act, NULL);
899 #if !defined(CONFIG_USER_ONLY)
900 /* Called from RCU critical section */
901 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
905 block = atomic_rcu_read(&ram_list.mru_block);
906 if (block && addr - block->offset < block->max_length) {
909 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
910 if (addr - block->offset < block->max_length) {
915 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
919 /* It is safe to write mru_block outside the iothread lock. This
924 * xxx removed from list
928 * call_rcu(reclaim_ramblock, xxx);
931 * atomic_rcu_set is not needed here. The block was already published
932 * when it was placed into the list. Here we're just making an extra
933 * copy of the pointer.
935 ram_list.mru_block = block;
939 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
946 end = TARGET_PAGE_ALIGN(start + length);
947 start &= TARGET_PAGE_MASK;
950 block = qemu_get_ram_block(start);
951 assert(block == qemu_get_ram_block(end - 1));
952 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
954 tlb_reset_dirty(cpu, start1, length);
959 /* Note: start and end must be within the same ram block. */
960 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
964 unsigned long end, page;
971 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
972 page = start >> TARGET_PAGE_BITS;
973 dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
976 if (dirty && tcg_enabled()) {
977 tlb_reset_dirty_range_all(start, length);
983 /* Called from RCU critical section */
984 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
985 MemoryRegionSection *section,
987 hwaddr paddr, hwaddr xlat,
989 target_ulong *address)
994 if (memory_region_is_ram(section->mr)) {
996 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
998 if (!section->readonly) {
999 iotlb |= PHYS_SECTION_NOTDIRTY;
1001 iotlb |= PHYS_SECTION_ROM;
1004 AddressSpaceDispatch *d;
1006 d = atomic_rcu_read(§ion->address_space->dispatch);
1007 iotlb = section - d->map.sections;
1011 /* Make accesses to pages with watchpoints go via the
1012 watchpoint trap routines. */
1013 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1014 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1015 /* Avoid trapping reads of pages with a write breakpoint. */
1016 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1017 iotlb = PHYS_SECTION_WATCH + paddr;
1018 *address |= TLB_MMIO;
1026 #endif /* defined(CONFIG_USER_ONLY) */
1028 #if !defined(CONFIG_USER_ONLY)
1030 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1032 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1034 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1035 qemu_anon_ram_alloc;
1038 * Set a custom physical guest memory alloator.
1039 * Accelerators with unusual needs may need this. Hopefully, we can
1040 * get rid of it eventually.
1042 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1044 phys_mem_alloc = alloc;
1047 static uint16_t phys_section_add(PhysPageMap *map,
1048 MemoryRegionSection *section)
1050 /* The physical section number is ORed with a page-aligned
1051 * pointer to produce the iotlb entries. Thus it should
1052 * never overflow into the page-aligned value.
1054 assert(map->sections_nb < TARGET_PAGE_SIZE);
1056 if (map->sections_nb == map->sections_nb_alloc) {
1057 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1058 map->sections = g_renew(MemoryRegionSection, map->sections,
1059 map->sections_nb_alloc);
1061 map->sections[map->sections_nb] = *section;
1062 memory_region_ref(section->mr);
1063 return map->sections_nb++;
1066 static void phys_section_destroy(MemoryRegion *mr)
1068 bool have_sub_page = mr->subpage;
1070 memory_region_unref(mr);
1072 if (have_sub_page) {
1073 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1074 object_unref(OBJECT(&subpage->iomem));
1079 static void phys_sections_free(PhysPageMap *map)
1081 while (map->sections_nb > 0) {
1082 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1083 phys_section_destroy(section->mr);
1085 g_free(map->sections);
1089 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1092 hwaddr base = section->offset_within_address_space
1094 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1095 d->map.nodes, d->map.sections);
1096 MemoryRegionSection subsection = {
1097 .offset_within_address_space = base,
1098 .size = int128_make64(TARGET_PAGE_SIZE),
1102 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1104 if (!(existing->mr->subpage)) {
1105 subpage = subpage_init(d->as, base);
1106 subsection.address_space = d->as;
1107 subsection.mr = &subpage->iomem;
1108 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1109 phys_section_add(&d->map, &subsection));
1111 subpage = container_of(existing->mr, subpage_t, iomem);
1113 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1114 end = start + int128_get64(section->size) - 1;
1115 subpage_register(subpage, start, end,
1116 phys_section_add(&d->map, section));
1120 static void register_multipage(AddressSpaceDispatch *d,
1121 MemoryRegionSection *section)
1123 hwaddr start_addr = section->offset_within_address_space;
1124 uint16_t section_index = phys_section_add(&d->map, section);
1125 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1129 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1132 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1134 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1135 AddressSpaceDispatch *d = as->next_dispatch;
1136 MemoryRegionSection now = *section, remain = *section;
1137 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1139 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1140 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1141 - now.offset_within_address_space;
1143 now.size = int128_min(int128_make64(left), now.size);
1144 register_subpage(d, &now);
1146 now.size = int128_zero();
1148 while (int128_ne(remain.size, now.size)) {
1149 remain.size = int128_sub(remain.size, now.size);
1150 remain.offset_within_address_space += int128_get64(now.size);
1151 remain.offset_within_region += int128_get64(now.size);
1153 if (int128_lt(remain.size, page_size)) {
1154 register_subpage(d, &now);
1155 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1156 now.size = page_size;
1157 register_subpage(d, &now);
1159 now.size = int128_and(now.size, int128_neg(page_size));
1160 register_multipage(d, &now);
1165 void qemu_flush_coalesced_mmio_buffer(void)
1168 kvm_flush_coalesced_mmio_buffer();
1171 void qemu_mutex_lock_ramlist(void)
1173 qemu_mutex_lock(&ram_list.mutex);
1176 void qemu_mutex_unlock_ramlist(void)
1178 qemu_mutex_unlock(&ram_list.mutex);
1183 #include <sys/vfs.h>
1185 #define HUGETLBFS_MAGIC 0x958458f6
1187 static long gethugepagesize(const char *path, Error **errp)
1193 ret = statfs(path, &fs);
1194 } while (ret != 0 && errno == EINTR);
1197 error_setg_errno(errp, errno, "failed to get page size of file %s",
1205 static void *file_ram_alloc(RAMBlock *block,
1212 char *sanitized_name;
1217 Error *local_err = NULL;
1219 hpagesize = gethugepagesize(path, &local_err);
1221 error_propagate(errp, local_err);
1224 block->mr->align = hpagesize;
1226 if (memory < hpagesize) {
1227 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1228 "or larger than huge page size 0x%" PRIx64,
1233 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1235 "host lacks kvm mmu notifiers, -mem-path unsupported");
1239 if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
1240 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1241 sanitized_name = g_strdup(memory_region_name(block->mr));
1242 for (c = sanitized_name; *c != '\0'; c++) {
1248 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1250 g_free(sanitized_name);
1252 fd = mkstemp(filename);
1258 fd = open(path, O_RDWR | O_CREAT, 0644);
1262 error_setg_errno(errp, errno,
1263 "unable to create backing store for hugepages");
1267 memory = ROUND_UP(memory, hpagesize);
1270 * ftruncate is not supported by hugetlbfs in older
1271 * hosts, so don't bother bailing out on errors.
1272 * If anything goes wrong with it under other filesystems,
1275 if (ftruncate(fd, memory)) {
1276 perror("ftruncate");
1279 area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1280 if (area == MAP_FAILED) {
1281 error_setg_errno(errp, errno,
1282 "unable to map backing store for hugepages");
1288 os_mem_prealloc(fd, area, memory);
1299 /* Called with the ramlist lock held. */
1300 static ram_addr_t find_ram_offset(ram_addr_t size)
1302 RAMBlock *block, *next_block;
1303 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1305 assert(size != 0); /* it would hand out same offset multiple times */
1307 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1311 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1312 ram_addr_t end, next = RAM_ADDR_MAX;
1314 end = block->offset + block->max_length;
1316 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1317 if (next_block->offset >= end) {
1318 next = MIN(next, next_block->offset);
1321 if (next - end >= size && next - end < mingap) {
1323 mingap = next - end;
1327 if (offset == RAM_ADDR_MAX) {
1328 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1336 ram_addr_t last_ram_offset(void)
1339 ram_addr_t last = 0;
1342 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1343 last = MAX(last, block->offset + block->max_length);
1349 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1353 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1354 if (!machine_dump_guest_core(current_machine)) {
1355 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1357 perror("qemu_madvise");
1358 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1359 "but dump_guest_core=off specified\n");
1364 /* Called within an RCU critical section, or while the ramlist lock
1367 static RAMBlock *find_ram_block(ram_addr_t addr)
1371 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1372 if (block->offset == addr) {
1380 const char *qemu_ram_get_idstr(RAMBlock *rb)
1385 /* Called with iothread lock held. */
1386 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1388 RAMBlock *new_block, *block;
1391 new_block = find_ram_block(addr);
1393 assert(!new_block->idstr[0]);
1396 char *id = qdev_get_dev_path(dev);
1398 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1402 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1404 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1405 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1406 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1414 /* Called with iothread lock held. */
1415 void qemu_ram_unset_idstr(ram_addr_t addr)
1419 /* FIXME: arch_init.c assumes that this is not called throughout
1420 * migration. Ignore the problem since hot-unplug during migration
1421 * does not work anyway.
1425 block = find_ram_block(addr);
1427 memset(block->idstr, 0, sizeof(block->idstr));
1432 static int memory_try_enable_merging(void *addr, size_t len)
1434 if (!machine_mem_merge(current_machine)) {
1435 /* disabled by the user */
1439 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1442 /* Only legal before guest might have detected the memory size: e.g. on
1443 * incoming migration, or right after reset.
1445 * As memory core doesn't know how is memory accessed, it is up to
1446 * resize callback to update device state and/or add assertions to detect
1447 * misuse, if necessary.
1449 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1451 RAMBlock *block = find_ram_block(base);
1455 newsize = HOST_PAGE_ALIGN(newsize);
1457 if (block->used_length == newsize) {
1461 if (!(block->flags & RAM_RESIZEABLE)) {
1462 error_setg_errno(errp, EINVAL,
1463 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1464 " in != 0x" RAM_ADDR_FMT, block->idstr,
1465 newsize, block->used_length);
1469 if (block->max_length < newsize) {
1470 error_setg_errno(errp, EINVAL,
1471 "Length too large: %s: 0x" RAM_ADDR_FMT
1472 " > 0x" RAM_ADDR_FMT, block->idstr,
1473 newsize, block->max_length);
1477 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1478 block->used_length = newsize;
1479 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1481 memory_region_set_size(block->mr, newsize);
1482 if (block->resized) {
1483 block->resized(block->idstr, newsize, block->host);
1488 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1491 RAMBlock *last_block = NULL;
1492 ram_addr_t old_ram_size, new_ram_size;
1494 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1496 qemu_mutex_lock_ramlist();
1497 new_block->offset = find_ram_offset(new_block->max_length);
1499 if (!new_block->host) {
1500 if (xen_enabled()) {
1501 xen_ram_alloc(new_block->offset, new_block->max_length,
1504 new_block->host = phys_mem_alloc(new_block->max_length,
1505 &new_block->mr->align);
1506 if (!new_block->host) {
1507 error_setg_errno(errp, errno,
1508 "cannot set up guest memory '%s'",
1509 memory_region_name(new_block->mr));
1510 qemu_mutex_unlock_ramlist();
1513 memory_try_enable_merging(new_block->host, new_block->max_length);
1517 new_ram_size = MAX(old_ram_size,
1518 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1519 if (new_ram_size > old_ram_size) {
1520 migration_bitmap_extend(old_ram_size, new_ram_size);
1522 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1523 * QLIST (which has an RCU-friendly variant) does not have insertion at
1524 * tail, so save the last element in last_block.
1526 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1528 if (block->max_length < new_block->max_length) {
1533 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1534 } else if (last_block) {
1535 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1536 } else { /* list is empty */
1537 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1539 ram_list.mru_block = NULL;
1541 /* Write list before version */
1544 qemu_mutex_unlock_ramlist();
1546 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1548 if (new_ram_size > old_ram_size) {
1551 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1552 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1553 ram_list.dirty_memory[i] =
1554 bitmap_zero_extend(ram_list.dirty_memory[i],
1555 old_ram_size, new_ram_size);
1558 cpu_physical_memory_set_dirty_range(new_block->offset,
1559 new_block->used_length,
1562 if (new_block->host) {
1563 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1564 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1565 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1566 if (kvm_enabled()) {
1567 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1572 * In Hax, the qemu allocate the virtual address, and HAX kernel
1573 * populate the memory with physical memory. Currently we have no
1574 * paging, so user should make sure enough free memory in advance
1576 if (hax_enabled()) {
1577 int ret = hax_populate_ram((uint64_t)(uintptr_t)new_block->host,
1578 new_block->max_length);
1580 fprintf(stderr, "HAX failed to populate ram\n");
1586 return new_block->offset;
1590 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1591 bool share, const char *mem_path,
1594 RAMBlock *new_block;
1596 Error *local_err = NULL;
1598 if (xen_enabled()) {
1599 error_setg(errp, "-mem-path not supported with Xen");
1603 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1605 * file_ram_alloc() needs to allocate just like
1606 * phys_mem_alloc, but we haven't bothered to provide
1610 "-mem-path not supported with this accelerator");
1614 size = HOST_PAGE_ALIGN(size);
1615 new_block = g_malloc0(sizeof(*new_block));
1617 new_block->used_length = size;
1618 new_block->max_length = size;
1619 new_block->flags = share ? RAM_SHARED : 0;
1620 new_block->flags |= RAM_FILE;
1621 new_block->host = file_ram_alloc(new_block, size,
1623 if (!new_block->host) {
1628 addr = ram_block_add(new_block, &local_err);
1631 error_propagate(errp, local_err);
1639 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1640 void (*resized)(const char*,
1643 void *host, bool resizeable,
1644 MemoryRegion *mr, Error **errp)
1646 RAMBlock *new_block;
1648 Error *local_err = NULL;
1650 size = HOST_PAGE_ALIGN(size);
1651 max_size = HOST_PAGE_ALIGN(max_size);
1652 new_block = g_malloc0(sizeof(*new_block));
1654 new_block->resized = resized;
1655 new_block->used_length = size;
1656 new_block->max_length = max_size;
1657 assert(max_size >= size);
1659 new_block->host = host;
1661 new_block->flags |= RAM_PREALLOC;
1664 new_block->flags |= RAM_RESIZEABLE;
1666 addr = ram_block_add(new_block, &local_err);
1669 error_propagate(errp, local_err);
1675 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1676 MemoryRegion *mr, Error **errp)
1678 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1681 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1683 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1686 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1687 void (*resized)(const char*,
1690 MemoryRegion *mr, Error **errp)
1692 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1695 void qemu_ram_free_from_ptr(ram_addr_t addr)
1699 qemu_mutex_lock_ramlist();
1700 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1701 if (addr == block->offset) {
1702 QLIST_REMOVE_RCU(block, next);
1703 ram_list.mru_block = NULL;
1704 /* Write list before version */
1707 g_free_rcu(block, rcu);
1711 qemu_mutex_unlock_ramlist();
1714 static void reclaim_ramblock(RAMBlock *block)
1716 if (block->flags & RAM_PREALLOC) {
1718 } else if (xen_enabled()) {
1719 xen_invalidate_map_cache_entry(block->host);
1721 } else if (block->fd >= 0) {
1722 if (block->flags & RAM_FILE) {
1723 qemu_ram_munmap(block->host, block->max_length);
1725 munmap(block->host, block->max_length);
1730 qemu_anon_ram_free(block->host, block->max_length);
1735 void qemu_ram_free(ram_addr_t addr)
1739 qemu_mutex_lock_ramlist();
1740 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1741 if (addr == block->offset) {
1742 QLIST_REMOVE_RCU(block, next);
1743 ram_list.mru_block = NULL;
1744 /* Write list before version */
1747 call_rcu(block, reclaim_ramblock, rcu);
1751 qemu_mutex_unlock_ramlist();
1755 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1762 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1763 offset = addr - block->offset;
1764 if (offset < block->max_length) {
1765 vaddr = ramblock_ptr(block, offset);
1766 if (block->flags & RAM_PREALLOC) {
1768 } else if (xen_enabled()) {
1772 if (block->fd >= 0) {
1773 flags |= (block->flags & RAM_SHARED ?
1774 MAP_SHARED : MAP_PRIVATE);
1775 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1776 flags, block->fd, offset);
1779 * Remap needs to match alloc. Accelerators that
1780 * set phys_mem_alloc never remap. If they did,
1781 * we'd need a remap hook here.
1783 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1785 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1786 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1789 if (area != vaddr) {
1790 fprintf(stderr, "Could not remap addr: "
1791 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1795 memory_try_enable_merging(vaddr, length);
1796 qemu_ram_setup_dump(vaddr, length);
1801 #endif /* !_WIN32 */
1803 int qemu_get_ram_fd(ram_addr_t addr)
1809 block = qemu_get_ram_block(addr);
1815 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1821 block = qemu_get_ram_block(addr);
1822 ptr = ramblock_ptr(block, 0);
1827 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1828 * This should not be used for general purpose DMA. Use address_space_map
1829 * or address_space_rw instead. For local memory (e.g. video ram) that the
1830 * device owns, use memory_region_get_ram_ptr.
1832 * By the time this function returns, the returned pointer is not protected
1833 * by RCU anymore. If the caller is not within an RCU critical section and
1834 * does not hold the iothread lock, it must have other means of protecting the
1835 * pointer, such as a reference to the region that includes the incoming
1838 void *qemu_get_ram_ptr(ram_addr_t addr)
1844 block = qemu_get_ram_block(addr);
1846 if (xen_enabled() && block->host == NULL) {
1847 /* We need to check if the requested address is in the RAM
1848 * because we don't want to map the entire memory in QEMU.
1849 * In that case just map until the end of the page.
1851 if (block->offset == 0) {
1852 ptr = xen_map_cache(addr, 0, 0);
1856 block->host = xen_map_cache(block->offset, block->max_length, 1);
1858 ptr = ramblock_ptr(block, addr - block->offset);
1865 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1866 * but takes a size argument.
1868 * By the time this function returns, the returned pointer is not protected
1869 * by RCU anymore. If the caller is not within an RCU critical section and
1870 * does not hold the iothread lock, it must have other means of protecting the
1871 * pointer, such as a reference to the region that includes the incoming
1874 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1880 if (xen_enabled()) {
1881 return xen_map_cache(addr, *size, 1);
1885 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1886 if (addr - block->offset < block->max_length) {
1887 if (addr - block->offset + *size > block->max_length)
1888 *size = block->max_length - addr + block->offset;
1889 ptr = ramblock_ptr(block, addr - block->offset);
1895 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1901 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1904 * ptr: Host pointer to look up
1905 * round_offset: If true round the result offset down to a page boundary
1906 * *ram_addr: set to result ram_addr
1907 * *offset: set to result offset within the RAMBlock
1909 * Returns: RAMBlock (or NULL if not found)
1911 * By the time this function returns, the returned pointer is not protected
1912 * by RCU anymore. If the caller is not within an RCU critical section and
1913 * does not hold the iothread lock, it must have other means of protecting the
1914 * pointer, such as a reference to the region that includes the incoming
1917 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1918 ram_addr_t *ram_addr,
1922 uint8_t *host = ptr;
1924 if (xen_enabled()) {
1926 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1927 block = qemu_get_ram_block(*ram_addr);
1929 *offset = (host - block->host);
1936 block = atomic_rcu_read(&ram_list.mru_block);
1937 if (block && block->host && host - block->host < block->max_length) {
1941 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1942 /* This case append when the block is not mapped. */
1943 if (block->host == NULL) {
1946 if (host - block->host < block->max_length) {
1955 *offset = (host - block->host);
1957 *offset &= TARGET_PAGE_MASK;
1959 *ram_addr = block->offset + *offset;
1965 * Finds the named RAMBlock
1967 * name: The name of RAMBlock to find
1969 * Returns: RAMBlock (or NULL if not found)
1971 RAMBlock *qemu_ram_block_by_name(const char *name)
1975 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1976 if (!strcmp(name, block->idstr)) {
1984 /* Some of the softmmu routines need to translate from a host pointer
1985 (typically a TLB entry) back to a ram offset. */
1986 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1989 ram_addr_t offset; /* Not used */
1991 block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
2000 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2001 uint64_t val, unsigned size)
2003 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2004 tb_invalidate_phys_page_fast(ram_addr, size);
2008 stb_p(qemu_get_ram_ptr(ram_addr), val);
2011 stw_p(qemu_get_ram_ptr(ram_addr), val);
2014 stl_p(qemu_get_ram_ptr(ram_addr), val);
2019 /* Set both VGA and migration bits for simplicity and to remove
2020 * the notdirty callback faster.
2022 cpu_physical_memory_set_dirty_range(ram_addr, size,
2023 DIRTY_CLIENTS_NOCODE);
2024 /* we remove the notdirty callback only if the code has been
2026 if (!cpu_physical_memory_is_clean(ram_addr)) {
2027 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2031 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2032 unsigned size, bool is_write)
2037 static const MemoryRegionOps notdirty_mem_ops = {
2038 .write = notdirty_mem_write,
2039 .valid.accepts = notdirty_mem_accepts,
2040 .endianness = DEVICE_NATIVE_ENDIAN,
2043 /* Generate a debug exception if a watchpoint has been hit. */
2044 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2046 CPUState *cpu = current_cpu;
2047 CPUArchState *env = cpu->env_ptr;
2048 target_ulong pc, cs_base;
2053 if (cpu->watchpoint_hit) {
2054 /* We re-entered the check after replacing the TB. Now raise
2055 * the debug interrupt so that is will trigger after the
2056 * current instruction. */
2057 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2060 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2061 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2062 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2063 && (wp->flags & flags)) {
2064 if (flags == BP_MEM_READ) {
2065 wp->flags |= BP_WATCHPOINT_HIT_READ;
2067 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2069 wp->hitaddr = vaddr;
2070 wp->hitattrs = attrs;
2071 if (!cpu->watchpoint_hit) {
2072 cpu->watchpoint_hit = wp;
2073 tb_check_watchpoint(cpu);
2074 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2075 cpu->exception_index = EXCP_DEBUG;
2078 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2079 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2080 cpu_resume_from_signal(cpu, NULL);
2084 wp->flags &= ~BP_WATCHPOINT_HIT;
2089 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2090 so these check for a hit then pass through to the normal out-of-line
2092 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2093 unsigned size, MemTxAttrs attrs)
2098 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2101 data = address_space_ldub(&address_space_memory, addr, attrs, &res);
2104 data = address_space_lduw(&address_space_memory, addr, attrs, &res);
2107 data = address_space_ldl(&address_space_memory, addr, attrs, &res);
2115 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2116 uint64_t val, unsigned size,
2121 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2124 address_space_stb(&address_space_memory, addr, val, attrs, &res);
2127 address_space_stw(&address_space_memory, addr, val, attrs, &res);
2130 address_space_stl(&address_space_memory, addr, val, attrs, &res);
2137 static const MemoryRegionOps watch_mem_ops = {
2138 .read_with_attrs = watch_mem_read,
2139 .write_with_attrs = watch_mem_write,
2140 .endianness = DEVICE_NATIVE_ENDIAN,
2143 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2144 unsigned len, MemTxAttrs attrs)
2146 subpage_t *subpage = opaque;
2150 #if defined(DEBUG_SUBPAGE)
2151 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2152 subpage, len, addr);
2154 res = address_space_read(subpage->as, addr + subpage->base,
2161 *data = ldub_p(buf);
2164 *data = lduw_p(buf);
2177 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2178 uint64_t value, unsigned len, MemTxAttrs attrs)
2180 subpage_t *subpage = opaque;
2183 #if defined(DEBUG_SUBPAGE)
2184 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2185 " value %"PRIx64"\n",
2186 __func__, subpage, len, addr, value);
2204 return address_space_write(subpage->as, addr + subpage->base,
2208 static bool subpage_accepts(void *opaque, hwaddr addr,
2209 unsigned len, bool is_write)
2211 subpage_t *subpage = opaque;
2212 #if defined(DEBUG_SUBPAGE)
2213 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2214 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2217 return address_space_access_valid(subpage->as, addr + subpage->base,
2221 static const MemoryRegionOps subpage_ops = {
2222 .read_with_attrs = subpage_read,
2223 .write_with_attrs = subpage_write,
2224 .impl.min_access_size = 1,
2225 .impl.max_access_size = 8,
2226 .valid.min_access_size = 1,
2227 .valid.max_access_size = 8,
2228 .valid.accepts = subpage_accepts,
2229 .endianness = DEVICE_NATIVE_ENDIAN,
2232 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2237 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2239 idx = SUBPAGE_IDX(start);
2240 eidx = SUBPAGE_IDX(end);
2241 #if defined(DEBUG_SUBPAGE)
2242 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2243 __func__, mmio, start, end, idx, eidx, section);
2245 for (; idx <= eidx; idx++) {
2246 mmio->sub_section[idx] = section;
2252 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2256 mmio = g_malloc0(sizeof(subpage_t));
2260 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2261 NULL, TARGET_PAGE_SIZE);
2262 mmio->iomem.subpage = true;
2263 #if defined(DEBUG_SUBPAGE)
2264 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2265 mmio, base, TARGET_PAGE_SIZE);
2267 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2272 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2276 MemoryRegionSection section = {
2277 .address_space = as,
2279 .offset_within_address_space = 0,
2280 .offset_within_region = 0,
2281 .size = int128_2_64(),
2284 return phys_section_add(map, §ion);
2287 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2289 CPUAddressSpace *cpuas = &cpu->cpu_ases[0];
2290 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2291 MemoryRegionSection *sections = d->map.sections;
2293 return sections[index & ~TARGET_PAGE_MASK].mr;
2296 static void io_mem_init(void)
2298 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2299 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2301 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
2303 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2307 static void mem_begin(MemoryListener *listener)
2309 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2310 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2313 n = dummy_section(&d->map, as, &io_mem_unassigned);
2314 assert(n == PHYS_SECTION_UNASSIGNED);
2315 n = dummy_section(&d->map, as, &io_mem_notdirty);
2316 assert(n == PHYS_SECTION_NOTDIRTY);
2317 n = dummy_section(&d->map, as, &io_mem_rom);
2318 assert(n == PHYS_SECTION_ROM);
2319 n = dummy_section(&d->map, as, &io_mem_watch);
2320 assert(n == PHYS_SECTION_WATCH);
2322 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2324 as->next_dispatch = d;
2327 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2329 phys_sections_free(&d->map);
2333 static void mem_commit(MemoryListener *listener)
2335 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2336 AddressSpaceDispatch *cur = as->dispatch;
2337 AddressSpaceDispatch *next = as->next_dispatch;
2339 phys_page_compact_all(next, next->map.nodes_nb);
2341 atomic_rcu_set(&as->dispatch, next);
2343 call_rcu(cur, address_space_dispatch_free, rcu);
2347 static void tcg_commit(MemoryListener *listener)
2349 CPUAddressSpace *cpuas;
2350 AddressSpaceDispatch *d;
2352 /* since each CPU stores ram addresses in its TLB cache, we must
2353 reset the modified entries */
2354 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2355 cpu_reloading_memory_map();
2356 /* The CPU and TLB are protected by the iothread lock.
2357 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2358 * may have split the RCU critical section.
2360 d = atomic_rcu_read(&cpuas->as->dispatch);
2361 cpuas->memory_dispatch = d;
2362 tlb_flush(cpuas->cpu, 1);
2365 void address_space_init_dispatch(AddressSpace *as)
2367 as->dispatch = NULL;
2368 as->dispatch_listener = (MemoryListener) {
2370 .commit = mem_commit,
2371 .region_add = mem_add,
2372 .region_nop = mem_add,
2375 memory_listener_register(&as->dispatch_listener, as);
2378 void address_space_unregister(AddressSpace *as)
2380 memory_listener_unregister(&as->dispatch_listener);
2383 void address_space_destroy_dispatch(AddressSpace *as)
2385 AddressSpaceDispatch *d = as->dispatch;
2387 atomic_rcu_set(&as->dispatch, NULL);
2389 call_rcu(d, address_space_dispatch_free, rcu);
2393 static void memory_map_init(void)
2395 system_memory = g_malloc(sizeof(*system_memory));
2397 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2398 address_space_init(&address_space_memory, system_memory, "memory");
2400 system_io = g_malloc(sizeof(*system_io));
2401 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2403 address_space_init(&address_space_io, system_io, "I/O");
2406 MemoryRegion *get_system_memory(void)
2408 return system_memory;
2411 MemoryRegion *get_system_io(void)
2416 #endif /* !defined(CONFIG_USER_ONLY) */
2418 /* physical memory access (slow version, mainly for debug) */
2419 #if defined(CONFIG_USER_ONLY)
2420 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2421 uint8_t *buf, int len, int is_write)
2428 page = addr & TARGET_PAGE_MASK;
2429 l = (page + TARGET_PAGE_SIZE) - addr;
2432 flags = page_get_flags(page);
2433 if (!(flags & PAGE_VALID))
2436 if (!(flags & PAGE_WRITE))
2438 /* XXX: this code should not depend on lock_user */
2439 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2442 unlock_user(p, addr, l);
2444 if (!(flags & PAGE_READ))
2446 /* XXX: this code should not depend on lock_user */
2447 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2450 unlock_user(p, addr, 0);
2461 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2464 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2465 /* No early return if dirty_log_mask is or becomes 0, because
2466 * cpu_physical_memory_set_dirty_range will still call
2467 * xen_modified_memory.
2469 if (dirty_log_mask) {
2471 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2473 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2474 tb_invalidate_phys_range(addr, addr + length);
2475 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2477 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2480 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2482 unsigned access_size_max = mr->ops->valid.max_access_size;
2484 /* Regions are assumed to support 1-4 byte accesses unless
2485 otherwise specified. */
2486 if (access_size_max == 0) {
2487 access_size_max = 4;
2490 /* Bound the maximum access by the alignment of the address. */
2491 if (!mr->ops->impl.unaligned) {
2492 unsigned align_size_max = addr & -addr;
2493 if (align_size_max != 0 && align_size_max < access_size_max) {
2494 access_size_max = align_size_max;
2498 /* Don't attempt accesses larger than the maximum. */
2499 if (l > access_size_max) {
2500 l = access_size_max;
2507 static bool prepare_mmio_access(MemoryRegion *mr)
2509 bool unlocked = !qemu_mutex_iothread_locked();
2510 bool release_lock = false;
2512 if (unlocked && mr->global_locking) {
2513 qemu_mutex_lock_iothread();
2515 release_lock = true;
2517 if (mr->flush_coalesced_mmio) {
2519 qemu_mutex_lock_iothread();
2521 qemu_flush_coalesced_mmio_buffer();
2523 qemu_mutex_unlock_iothread();
2527 return release_lock;
2530 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2531 uint8_t *buf, int len, bool is_write)
2538 MemTxResult result = MEMTX_OK;
2539 bool release_lock = false;
2544 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2547 if (!memory_access_is_direct(mr, is_write)) {
2548 release_lock |= prepare_mmio_access(mr);
2549 l = memory_access_size(mr, l, addr1);
2550 /* XXX: could force current_cpu to NULL to avoid
2554 /* 64 bit write access */
2556 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2560 /* 32 bit write access */
2562 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2566 /* 16 bit write access */
2568 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2572 /* 8 bit write access */
2574 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2581 addr1 += memory_region_get_ram_addr(mr);
2583 ptr = qemu_get_ram_ptr(addr1);
2584 memcpy(ptr, buf, l);
2585 invalidate_and_set_dirty(mr, addr1, l);
2588 if (!memory_access_is_direct(mr, is_write)) {
2590 release_lock |= prepare_mmio_access(mr);
2591 l = memory_access_size(mr, l, addr1);
2594 /* 64 bit read access */
2595 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2600 /* 32 bit read access */
2601 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2606 /* 16 bit read access */
2607 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2612 /* 8 bit read access */
2613 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2622 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2623 memcpy(buf, ptr, l);
2628 qemu_mutex_unlock_iothread();
2629 release_lock = false;
2641 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2642 const uint8_t *buf, int len)
2644 return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2647 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2648 uint8_t *buf, int len)
2650 return address_space_rw(as, addr, attrs, buf, len, false);
2654 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2655 int len, int is_write)
2657 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2658 buf, len, is_write);
2661 enum write_rom_type {
2666 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2667 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2677 mr = address_space_translate(as, addr, &addr1, &l, true);
2679 if (!(memory_region_is_ram(mr) ||
2680 memory_region_is_romd(mr))) {
2681 l = memory_access_size(mr, l, addr1);
2683 addr1 += memory_region_get_ram_addr(mr);
2685 ptr = qemu_get_ram_ptr(addr1);
2688 memcpy(ptr, buf, l);
2689 invalidate_and_set_dirty(mr, addr1, l);
2692 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2703 /* used for ROM loading : can write in RAM and ROM */
2704 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2705 const uint8_t *buf, int len)
2707 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2710 void cpu_flush_icache_range(hwaddr start, int len)
2713 * This function should do the same thing as an icache flush that was
2714 * triggered from within the guest. For TCG we are always cache coherent,
2715 * so there is no need to flush anything. For KVM / Xen we need to flush
2716 * the host's instruction cache at least.
2718 if (tcg_enabled()) {
2722 cpu_physical_memory_write_rom_internal(&address_space_memory,
2723 start, NULL, len, FLUSH_CACHE);
2734 static BounceBuffer bounce;
2736 typedef struct MapClient {
2738 QLIST_ENTRY(MapClient) link;
2741 QemuMutex map_client_list_lock;
2742 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2743 = QLIST_HEAD_INITIALIZER(map_client_list);
2745 static void cpu_unregister_map_client_do(MapClient *client)
2747 QLIST_REMOVE(client, link);
2751 static void cpu_notify_map_clients_locked(void)
2755 while (!QLIST_EMPTY(&map_client_list)) {
2756 client = QLIST_FIRST(&map_client_list);
2757 qemu_bh_schedule(client->bh);
2758 cpu_unregister_map_client_do(client);
2762 void cpu_register_map_client(QEMUBH *bh)
2764 MapClient *client = g_malloc(sizeof(*client));
2766 qemu_mutex_lock(&map_client_list_lock);
2768 QLIST_INSERT_HEAD(&map_client_list, client, link);
2769 if (!atomic_read(&bounce.in_use)) {
2770 cpu_notify_map_clients_locked();
2772 qemu_mutex_unlock(&map_client_list_lock);
2775 void cpu_exec_init_all(void)
2777 qemu_mutex_init(&ram_list.mutex);
2780 qemu_mutex_init(&map_client_list_lock);
2783 void cpu_unregister_map_client(QEMUBH *bh)
2787 qemu_mutex_lock(&map_client_list_lock);
2788 QLIST_FOREACH(client, &map_client_list, link) {
2789 if (client->bh == bh) {
2790 cpu_unregister_map_client_do(client);
2794 qemu_mutex_unlock(&map_client_list_lock);
2797 static void cpu_notify_map_clients(void)
2799 qemu_mutex_lock(&map_client_list_lock);
2800 cpu_notify_map_clients_locked();
2801 qemu_mutex_unlock(&map_client_list_lock);
2804 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2812 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2813 if (!memory_access_is_direct(mr, is_write)) {
2814 l = memory_access_size(mr, l, addr);
2815 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2827 /* Map a physical memory region into a host virtual address.
2828 * May map a subset of the requested range, given by and returned in *plen.
2829 * May return NULL if resources needed to perform the mapping are exhausted.
2830 * Use only for reads OR writes - not for read-modify-write operations.
2831 * Use cpu_register_map_client() to know when retrying the map operation is
2832 * likely to succeed.
2834 void *address_space_map(AddressSpace *as,
2841 hwaddr l, xlat, base;
2842 MemoryRegion *mr, *this_mr;
2851 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2853 if (!memory_access_is_direct(mr, is_write)) {
2854 if (atomic_xchg(&bounce.in_use, true)) {
2858 /* Avoid unbounded allocations */
2859 l = MIN(l, TARGET_PAGE_SIZE);
2860 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2864 memory_region_ref(mr);
2867 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2873 return bounce.buffer;
2877 raddr = memory_region_get_ram_addr(mr);
2888 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2889 if (this_mr != mr || xlat != base + done) {
2894 memory_region_ref(mr);
2897 return qemu_ram_ptr_length(raddr + base, plen);
2900 /* Unmaps a memory region previously mapped by address_space_map().
2901 * Will also mark the memory as dirty if is_write == 1. access_len gives
2902 * the amount of memory that was actually read or written by the caller.
2904 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2905 int is_write, hwaddr access_len)
2907 if (buffer != bounce.buffer) {
2911 mr = qemu_ram_addr_from_host(buffer, &addr1);
2914 invalidate_and_set_dirty(mr, addr1, access_len);
2916 if (xen_enabled()) {
2917 xen_invalidate_map_cache_entry(buffer);
2919 memory_region_unref(mr);
2923 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2924 bounce.buffer, access_len);
2926 qemu_vfree(bounce.buffer);
2927 bounce.buffer = NULL;
2928 memory_region_unref(bounce.mr);
2929 atomic_mb_set(&bounce.in_use, false);
2930 cpu_notify_map_clients();
2933 void *cpu_physical_memory_map(hwaddr addr,
2937 return address_space_map(&address_space_memory, addr, plen, is_write);
2940 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2941 int is_write, hwaddr access_len)
2943 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2946 /* warning: addr must be aligned */
2947 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2949 MemTxResult *result,
2950 enum device_endian endian)
2958 bool release_lock = false;
2961 mr = address_space_translate(as, addr, &addr1, &l, false);
2962 if (l < 4 || !memory_access_is_direct(mr, false)) {
2963 release_lock |= prepare_mmio_access(mr);
2966 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2967 #if defined(TARGET_WORDS_BIGENDIAN)
2968 if (endian == DEVICE_LITTLE_ENDIAN) {
2972 if (endian == DEVICE_BIG_ENDIAN) {
2978 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2982 case DEVICE_LITTLE_ENDIAN:
2983 val = ldl_le_p(ptr);
2985 case DEVICE_BIG_ENDIAN:
2986 val = ldl_be_p(ptr);
2998 qemu_mutex_unlock_iothread();
3004 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3005 MemTxAttrs attrs, MemTxResult *result)
3007 return address_space_ldl_internal(as, addr, attrs, result,
3008 DEVICE_NATIVE_ENDIAN);
3011 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3012 MemTxAttrs attrs, MemTxResult *result)
3014 return address_space_ldl_internal(as, addr, attrs, result,
3015 DEVICE_LITTLE_ENDIAN);
3018 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3019 MemTxAttrs attrs, MemTxResult *result)
3021 return address_space_ldl_internal(as, addr, attrs, result,
3025 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3027 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3030 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3032 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3035 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3037 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3040 /* warning: addr must be aligned */
3041 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3043 MemTxResult *result,
3044 enum device_endian endian)
3052 bool release_lock = false;
3055 mr = address_space_translate(as, addr, &addr1, &l,
3057 if (l < 8 || !memory_access_is_direct(mr, false)) {
3058 release_lock |= prepare_mmio_access(mr);
3061 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3062 #if defined(TARGET_WORDS_BIGENDIAN)
3063 if (endian == DEVICE_LITTLE_ENDIAN) {
3067 if (endian == DEVICE_BIG_ENDIAN) {
3073 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3077 case DEVICE_LITTLE_ENDIAN:
3078 val = ldq_le_p(ptr);
3080 case DEVICE_BIG_ENDIAN:
3081 val = ldq_be_p(ptr);
3093 qemu_mutex_unlock_iothread();
3099 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3100 MemTxAttrs attrs, MemTxResult *result)
3102 return address_space_ldq_internal(as, addr, attrs, result,
3103 DEVICE_NATIVE_ENDIAN);
3106 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3107 MemTxAttrs attrs, MemTxResult *result)
3109 return address_space_ldq_internal(as, addr, attrs, result,
3110 DEVICE_LITTLE_ENDIAN);
3113 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3114 MemTxAttrs attrs, MemTxResult *result)
3116 return address_space_ldq_internal(as, addr, attrs, result,
3120 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3122 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3125 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3127 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3130 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3132 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3136 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3137 MemTxAttrs attrs, MemTxResult *result)
3142 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3149 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3151 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3154 /* warning: addr must be aligned */
3155 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3158 MemTxResult *result,
3159 enum device_endian endian)
3167 bool release_lock = false;
3170 mr = address_space_translate(as, addr, &addr1, &l,
3172 if (l < 2 || !memory_access_is_direct(mr, false)) {
3173 release_lock |= prepare_mmio_access(mr);
3176 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3177 #if defined(TARGET_WORDS_BIGENDIAN)
3178 if (endian == DEVICE_LITTLE_ENDIAN) {
3182 if (endian == DEVICE_BIG_ENDIAN) {
3188 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3192 case DEVICE_LITTLE_ENDIAN:
3193 val = lduw_le_p(ptr);
3195 case DEVICE_BIG_ENDIAN:
3196 val = lduw_be_p(ptr);
3208 qemu_mutex_unlock_iothread();
3214 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3215 MemTxAttrs attrs, MemTxResult *result)
3217 return address_space_lduw_internal(as, addr, attrs, result,
3218 DEVICE_NATIVE_ENDIAN);
3221 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3222 MemTxAttrs attrs, MemTxResult *result)
3224 return address_space_lduw_internal(as, addr, attrs, result,
3225 DEVICE_LITTLE_ENDIAN);
3228 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3229 MemTxAttrs attrs, MemTxResult *result)
3231 return address_space_lduw_internal(as, addr, attrs, result,
3235 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3237 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3240 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3242 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3245 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3247 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3250 /* warning: addr must be aligned. The ram page is not masked as dirty
3251 and the code inside is not invalidated. It is useful if the dirty
3252 bits are used to track modified PTEs */
3253 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3254 MemTxAttrs attrs, MemTxResult *result)
3261 uint8_t dirty_log_mask;
3262 bool release_lock = false;
3265 mr = address_space_translate(as, addr, &addr1, &l,
3267 if (l < 4 || !memory_access_is_direct(mr, true)) {
3268 release_lock |= prepare_mmio_access(mr);
3270 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3272 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3273 ptr = qemu_get_ram_ptr(addr1);
3276 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3277 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3278 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3285 qemu_mutex_unlock_iothread();
3290 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3292 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3295 /* warning: addr must be aligned */
3296 static inline void address_space_stl_internal(AddressSpace *as,
3297 hwaddr addr, uint32_t val,
3299 MemTxResult *result,
3300 enum device_endian endian)
3307 bool release_lock = false;
3310 mr = address_space_translate(as, addr, &addr1, &l,
3312 if (l < 4 || !memory_access_is_direct(mr, true)) {
3313 release_lock |= prepare_mmio_access(mr);
3315 #if defined(TARGET_WORDS_BIGENDIAN)
3316 if (endian == DEVICE_LITTLE_ENDIAN) {
3320 if (endian == DEVICE_BIG_ENDIAN) {
3324 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3327 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3328 ptr = qemu_get_ram_ptr(addr1);
3330 case DEVICE_LITTLE_ENDIAN:
3333 case DEVICE_BIG_ENDIAN:
3340 invalidate_and_set_dirty(mr, addr1, 4);
3347 qemu_mutex_unlock_iothread();
3352 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3353 MemTxAttrs attrs, MemTxResult *result)
3355 address_space_stl_internal(as, addr, val, attrs, result,
3356 DEVICE_NATIVE_ENDIAN);
3359 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3360 MemTxAttrs attrs, MemTxResult *result)
3362 address_space_stl_internal(as, addr, val, attrs, result,
3363 DEVICE_LITTLE_ENDIAN);
3366 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3367 MemTxAttrs attrs, MemTxResult *result)
3369 address_space_stl_internal(as, addr, val, attrs, result,
3373 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3375 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3378 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3380 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3383 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3385 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3389 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3390 MemTxAttrs attrs, MemTxResult *result)
3395 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3401 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3403 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3406 /* warning: addr must be aligned */
3407 static inline void address_space_stw_internal(AddressSpace *as,
3408 hwaddr addr, uint32_t val,
3410 MemTxResult *result,
3411 enum device_endian endian)
3418 bool release_lock = false;
3421 mr = address_space_translate(as, addr, &addr1, &l, true);
3422 if (l < 2 || !memory_access_is_direct(mr, true)) {
3423 release_lock |= prepare_mmio_access(mr);
3425 #if defined(TARGET_WORDS_BIGENDIAN)
3426 if (endian == DEVICE_LITTLE_ENDIAN) {
3430 if (endian == DEVICE_BIG_ENDIAN) {
3434 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3437 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3438 ptr = qemu_get_ram_ptr(addr1);
3440 case DEVICE_LITTLE_ENDIAN:
3443 case DEVICE_BIG_ENDIAN:
3450 invalidate_and_set_dirty(mr, addr1, 2);
3457 qemu_mutex_unlock_iothread();
3462 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3463 MemTxAttrs attrs, MemTxResult *result)
3465 address_space_stw_internal(as, addr, val, attrs, result,
3466 DEVICE_NATIVE_ENDIAN);
3469 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3470 MemTxAttrs attrs, MemTxResult *result)
3472 address_space_stw_internal(as, addr, val, attrs, result,
3473 DEVICE_LITTLE_ENDIAN);
3476 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3477 MemTxAttrs attrs, MemTxResult *result)
3479 address_space_stw_internal(as, addr, val, attrs, result,
3483 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3485 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3488 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3490 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3493 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3495 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3499 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3500 MemTxAttrs attrs, MemTxResult *result)
3504 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3510 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3511 MemTxAttrs attrs, MemTxResult *result)
3514 val = cpu_to_le64(val);
3515 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3520 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3521 MemTxAttrs attrs, MemTxResult *result)
3524 val = cpu_to_be64(val);
3525 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3531 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3533 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3536 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3538 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3541 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3543 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3546 /* virtual memory access for debug (includes writing to ROM) */
3547 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3548 uint8_t *buf, int len, int is_write)
3555 page = addr & TARGET_PAGE_MASK;
3556 phys_addr = cpu_get_phys_page_debug(cpu, page);
3557 /* if no physical page mapped, return an error */
3558 if (phys_addr == -1)
3560 l = (page + TARGET_PAGE_SIZE) - addr;
3563 phys_addr += (addr & ~TARGET_PAGE_MASK);
3565 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3567 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3578 * Allows code that needs to deal with migration bitmaps etc to still be built
3579 * target independent.
3581 size_t qemu_target_page_bits(void)
3583 return TARGET_PAGE_BITS;
3589 * A helper function for the _utterly broken_ virtio device model to find out if
3590 * it's running on a big endian machine. Don't do this at home kids!
3592 bool target_words_bigendian(void);
3593 bool target_words_bigendian(void)
3595 #if defined(TARGET_WORDS_BIGENDIAN)
3602 #ifndef CONFIG_USER_ONLY
3603 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3610 mr = address_space_translate(&address_space_memory,
3611 phys_addr, &phys_addr, &l, false);
3613 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3618 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3624 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3625 ret = func(block->idstr, block->host, block->offset,
3626 block->used_length, opaque);