4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
24 #include "qemu/cutils.h"
26 #include "exec/exec-all.h"
28 #include "hw/qdev-core.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #include "hw/xen/xen.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/hax.h"
35 #include "sysemu/sysemu.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "qemu/error-report.h"
39 #if defined(CONFIG_USER_ONLY)
41 #else /* !CONFIG_USER_ONLY */
43 #include "exec/memory.h"
44 #include "exec/ioport.h"
45 #include "sysemu/dma.h"
46 #include "exec/address-spaces.h"
47 #include "sysemu/xen-mapcache.h"
50 #include "exec/cpu-all.h"
51 #include "qemu/rcu_queue.h"
52 #include "qemu/main-loop.h"
53 #include "translate-all.h"
54 #include "sysemu/replay.h"
56 #include "exec/memory-internal.h"
57 #include "exec/ram_addr.h"
60 #include "migration/vmstate.h"
62 #include "qemu/range.h"
64 #include "qemu/mmap-alloc.h"
67 //#define DEBUG_SUBPAGE
69 #if !defined(CONFIG_USER_ONLY)
70 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
71 * are protected by the ramlist lock.
73 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
75 static MemoryRegion *system_memory;
76 static MemoryRegion *system_io;
78 AddressSpace address_space_io;
79 AddressSpace address_space_memory;
81 MemoryRegion io_mem_rom, io_mem_notdirty;
82 static MemoryRegion io_mem_unassigned;
84 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
85 #define RAM_PREALLOC (1 << 0)
87 /* RAM is mmap-ed with MAP_SHARED */
88 #define RAM_SHARED (1 << 1)
90 /* Only a portion of RAM (used_length) is actually used, and migrated.
91 * This used_length size can change across reboots.
93 #define RAM_RESIZEABLE (1 << 2)
97 #ifdef TARGET_PAGE_BITS_VARY
99 bool target_page_bits_decided;
102 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
103 /* current CPU in the current thread. It is only valid inside
105 __thread CPUState *current_cpu;
106 /* 0 = Do not count executed instructions.
107 1 = Precise instruction counting.
108 2 = Adaptive rate instruction counting. */
111 bool set_preferred_target_page_bits(int bits)
113 /* The target page size is the lowest common denominator for all
114 * the CPUs in the system, so we can only make it smaller, never
115 * larger. And we can't make it smaller once we've committed to
118 #ifdef TARGET_PAGE_BITS_VARY
119 assert(bits >= TARGET_PAGE_BITS_MIN);
120 if (target_page_bits == 0 || target_page_bits > bits) {
121 if (target_page_bits_decided) {
124 target_page_bits = bits;
130 #if !defined(CONFIG_USER_ONLY)
132 static void finalize_target_page_bits(void)
134 #ifdef TARGET_PAGE_BITS_VARY
135 if (target_page_bits == 0) {
136 target_page_bits = TARGET_PAGE_BITS_MIN;
138 target_page_bits_decided = true;
142 typedef struct PhysPageEntry PhysPageEntry;
144 struct PhysPageEntry {
145 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
147 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
151 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
153 /* Size of the L2 (and L3, etc) page tables. */
154 #define ADDR_SPACE_BITS 64
157 #define P_L2_SIZE (1 << P_L2_BITS)
159 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
161 typedef PhysPageEntry Node[P_L2_SIZE];
163 typedef struct PhysPageMap {
166 unsigned sections_nb;
167 unsigned sections_nb_alloc;
169 unsigned nodes_nb_alloc;
171 MemoryRegionSection *sections;
174 struct AddressSpaceDispatch {
177 MemoryRegionSection *mru_section;
178 /* This is a multi-level map on the physical address space.
179 * The bottom level has pointers to MemoryRegionSections.
181 PhysPageEntry phys_map;
186 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
187 typedef struct subpage_t {
191 uint16_t sub_section[];
194 #define PHYS_SECTION_UNASSIGNED 0
195 #define PHYS_SECTION_NOTDIRTY 1
196 #define PHYS_SECTION_ROM 2
197 #define PHYS_SECTION_WATCH 3
199 static void io_mem_init(void);
200 static void memory_map_init(void);
201 static void tcg_commit(MemoryListener *listener);
203 static MemoryRegion io_mem_watch;
206 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
207 * @cpu: the CPU whose AddressSpace this is
208 * @as: the AddressSpace itself
209 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
210 * @tcg_as_listener: listener for tracking changes to the AddressSpace
212 struct CPUAddressSpace {
215 struct AddressSpaceDispatch *memory_dispatch;
216 MemoryListener tcg_as_listener;
221 #if !defined(CONFIG_USER_ONLY)
223 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
225 static unsigned alloc_hint = 16;
226 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
227 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
228 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
229 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
230 alloc_hint = map->nodes_nb_alloc;
234 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
241 ret = map->nodes_nb++;
243 assert(ret != PHYS_MAP_NODE_NIL);
244 assert(ret != map->nodes_nb_alloc);
246 e.skip = leaf ? 0 : 1;
247 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
248 for (i = 0; i < P_L2_SIZE; ++i) {
249 memcpy(&p[i], &e, sizeof(e));
254 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
255 hwaddr *index, hwaddr *nb, uint16_t leaf,
259 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
261 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
262 lp->ptr = phys_map_node_alloc(map, level == 0);
264 p = map->nodes[lp->ptr];
265 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
267 while (*nb && lp < &p[P_L2_SIZE]) {
268 if ((*index & (step - 1)) == 0 && *nb >= step) {
274 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
280 static void phys_page_set(AddressSpaceDispatch *d,
281 hwaddr index, hwaddr nb,
284 /* Wildly overreserve - it doesn't matter much. */
285 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
287 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
290 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
291 * and update our entry so we can skip it and go directly to the destination.
293 static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
295 unsigned valid_ptr = P_L2_SIZE;
300 if (lp->ptr == PHYS_MAP_NODE_NIL) {
305 for (i = 0; i < P_L2_SIZE; i++) {
306 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
313 phys_page_compact(&p[i], nodes);
317 /* We can only compress if there's only one child. */
322 assert(valid_ptr < P_L2_SIZE);
324 /* Don't compress if it won't fit in the # of bits we have. */
325 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
329 lp->ptr = p[valid_ptr].ptr;
330 if (!p[valid_ptr].skip) {
331 /* If our only child is a leaf, make this a leaf. */
332 /* By design, we should have made this node a leaf to begin with so we
333 * should never reach here.
334 * But since it's so simple to handle this, let's do it just in case we
339 lp->skip += p[valid_ptr].skip;
343 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
345 if (d->phys_map.skip) {
346 phys_page_compact(&d->phys_map, d->map.nodes);
350 static inline bool section_covers_addr(const MemoryRegionSection *section,
353 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
354 * the section must cover the entire address space.
356 return int128_gethi(section->size) ||
357 range_covers_byte(section->offset_within_address_space,
358 int128_getlo(section->size), addr);
361 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
362 Node *nodes, MemoryRegionSection *sections)
365 hwaddr index = addr >> TARGET_PAGE_BITS;
368 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
369 if (lp.ptr == PHYS_MAP_NODE_NIL) {
370 return §ions[PHYS_SECTION_UNASSIGNED];
373 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
376 if (section_covers_addr(§ions[lp.ptr], addr)) {
377 return §ions[lp.ptr];
379 return §ions[PHYS_SECTION_UNASSIGNED];
383 bool memory_region_is_unassigned(MemoryRegion *mr)
385 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
386 && mr != &io_mem_watch;
389 /* Called from RCU critical section */
390 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
392 bool resolve_subpage)
394 MemoryRegionSection *section = atomic_read(&d->mru_section);
398 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
399 section_covers_addr(section, addr)) {
402 section = phys_page_find(d->phys_map, addr, d->map.nodes,
406 if (resolve_subpage && section->mr->subpage) {
407 subpage = container_of(section->mr, subpage_t, iomem);
408 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
411 atomic_set(&d->mru_section, section);
416 /* Called from RCU critical section */
417 static MemoryRegionSection *
418 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
419 hwaddr *plen, bool resolve_subpage)
421 MemoryRegionSection *section;
425 section = address_space_lookup_region(d, addr, resolve_subpage);
426 /* Compute offset within MemoryRegionSection */
427 addr -= section->offset_within_address_space;
429 /* Compute offset within MemoryRegion */
430 *xlat = addr + section->offset_within_region;
434 /* MMIO registers can be expected to perform full-width accesses based only
435 * on their address, without considering adjacent registers that could
436 * decode to completely different MemoryRegions. When such registers
437 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
438 * regions overlap wildly. For this reason we cannot clamp the accesses
441 * If the length is small (as is the case for address_space_ldl/stl),
442 * everything works fine. If the incoming length is large, however,
443 * the caller really has to do the clamping through memory_access_size.
445 if (memory_region_is_ram(mr)) {
446 diff = int128_sub(section->size, int128_make64(addr));
447 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
452 /* Called from RCU critical section */
453 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
454 hwaddr *xlat, hwaddr *plen,
458 MemoryRegionSection *section;
462 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
463 section = address_space_translate_internal(d, addr, &addr, plen, true);
466 if (!mr->iommu_ops) {
470 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
471 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
472 | (addr & iotlb.addr_mask));
473 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
474 if (!(iotlb.perm & (1 << is_write))) {
475 mr = &io_mem_unassigned;
479 as = iotlb.target_as;
482 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
483 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
484 *plen = MIN(page, *plen);
491 /* Called from RCU critical section */
492 MemoryRegionSection *
493 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
494 hwaddr *xlat, hwaddr *plen)
496 MemoryRegionSection *section;
497 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
499 section = address_space_translate_internal(d, addr, xlat, plen, false);
501 assert(!section->mr->iommu_ops);
506 #if !defined(CONFIG_USER_ONLY)
508 static int cpu_common_post_load(void *opaque, int version_id)
510 CPUState *cpu = opaque;
512 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
513 version_id is increased. */
514 cpu->interrupt_request &= ~0x01;
520 static int cpu_common_pre_load(void *opaque)
522 CPUState *cpu = opaque;
524 cpu->exception_index = -1;
529 static bool cpu_common_exception_index_needed(void *opaque)
531 CPUState *cpu = opaque;
533 return tcg_enabled() && cpu->exception_index != -1;
536 static const VMStateDescription vmstate_cpu_common_exception_index = {
537 .name = "cpu_common/exception_index",
539 .minimum_version_id = 1,
540 .needed = cpu_common_exception_index_needed,
541 .fields = (VMStateField[]) {
542 VMSTATE_INT32(exception_index, CPUState),
543 VMSTATE_END_OF_LIST()
547 static bool cpu_common_crash_occurred_needed(void *opaque)
549 CPUState *cpu = opaque;
551 return cpu->crash_occurred;
554 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
555 .name = "cpu_common/crash_occurred",
557 .minimum_version_id = 1,
558 .needed = cpu_common_crash_occurred_needed,
559 .fields = (VMStateField[]) {
560 VMSTATE_BOOL(crash_occurred, CPUState),
561 VMSTATE_END_OF_LIST()
565 const VMStateDescription vmstate_cpu_common = {
566 .name = "cpu_common",
568 .minimum_version_id = 1,
569 .pre_load = cpu_common_pre_load,
570 .post_load = cpu_common_post_load,
571 .fields = (VMStateField[]) {
572 VMSTATE_UINT32(halted, CPUState),
573 VMSTATE_UINT32(interrupt_request, CPUState),
574 VMSTATE_END_OF_LIST()
576 .subsections = (const VMStateDescription*[]) {
577 &vmstate_cpu_common_exception_index,
578 &vmstate_cpu_common_crash_occurred,
585 CPUState *qemu_get_cpu(int index)
590 if (cpu->cpu_index == index) {
598 #if !defined(CONFIG_USER_ONLY)
599 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
601 CPUAddressSpace *newas;
603 /* Target code should have set num_ases before calling us */
604 assert(asidx < cpu->num_ases);
607 /* address space 0 gets the convenience alias */
611 /* KVM cannot currently support multiple address spaces. */
612 assert(asidx == 0 || !kvm_enabled());
614 if (!cpu->cpu_ases) {
615 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
618 newas = &cpu->cpu_ases[asidx];
622 newas->tcg_as_listener.commit = tcg_commit;
623 memory_listener_register(&newas->tcg_as_listener, as);
627 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
629 /* Return the AddressSpace corresponding to the specified index */
630 return cpu->cpu_ases[asidx].as;
634 void cpu_exec_unrealizefn(CPUState *cpu)
636 CPUClass *cc = CPU_GET_CLASS(cpu);
638 cpu_list_remove(cpu);
640 if (cc->vmsd != NULL) {
641 vmstate_unregister(NULL, cc->vmsd, cpu);
643 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
644 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
648 void cpu_exec_initfn(CPUState *cpu)
653 #ifndef CONFIG_USER_ONLY
654 cpu->thread_id = qemu_get_thread_id();
656 /* This is a softmmu CPU object, so create a property for it
657 * so users can wire up its memory. (This can't go in qom/cpu.c
658 * because that file is compiled only once for both user-mode
659 * and system builds.) The default if no link is set up is to use
660 * the system address space.
662 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
663 (Object **)&cpu->memory,
664 qdev_prop_allow_set_link_before_realize,
665 OBJ_PROP_LINK_UNREF_ON_RELEASE,
667 cpu->memory = system_memory;
668 object_ref(OBJECT(cpu->memory));
672 void cpu_exec_realizefn(CPUState *cpu, Error **errp)
674 CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
678 #ifndef CONFIG_USER_ONLY
679 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
680 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
682 if (cc->vmsd != NULL) {
683 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
688 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
690 /* Flush the whole TB as this will not have race conditions
691 * even if we don't have proper locking yet.
692 * Ideally we would just invalidate the TBs for the
698 #if defined(CONFIG_USER_ONLY)
699 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
704 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
710 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
714 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
715 int flags, CPUWatchpoint **watchpoint)
720 /* Add a watchpoint. */
721 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
722 int flags, CPUWatchpoint **watchpoint)
726 /* forbid ranges which are empty or run off the end of the address space */
727 if (len == 0 || (addr + len - 1) < addr) {
728 error_report("tried to set invalid watchpoint at %"
729 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
732 wp = g_malloc(sizeof(*wp));
738 /* keep all GDB-injected watchpoints in front */
739 if (flags & BP_GDB) {
740 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
742 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
745 tlb_flush_page(cpu, addr);
752 /* Remove a specific watchpoint. */
753 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
758 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
759 if (addr == wp->vaddr && len == wp->len
760 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
761 cpu_watchpoint_remove_by_ref(cpu, wp);
768 /* Remove a specific watchpoint by reference. */
769 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
771 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
773 tlb_flush_page(cpu, watchpoint->vaddr);
778 /* Remove all matching watchpoints. */
779 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
781 CPUWatchpoint *wp, *next;
783 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
784 if (wp->flags & mask) {
785 cpu_watchpoint_remove_by_ref(cpu, wp);
790 /* Return true if this watchpoint address matches the specified
791 * access (ie the address range covered by the watchpoint overlaps
792 * partially or completely with the address range covered by the
795 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
799 /* We know the lengths are non-zero, but a little caution is
800 * required to avoid errors in the case where the range ends
801 * exactly at the top of the address space and so addr + len
802 * wraps round to zero.
804 vaddr wpend = wp->vaddr + wp->len - 1;
805 vaddr addrend = addr + len - 1;
807 return !(addr > wpend || wp->vaddr > addrend);
812 /* Add a breakpoint. */
813 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
814 CPUBreakpoint **breakpoint)
818 bp = g_malloc(sizeof(*bp));
823 /* keep all GDB-injected breakpoints in front */
824 if (flags & BP_GDB) {
825 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
827 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
830 breakpoint_invalidate(cpu, pc);
838 /* Remove a specific breakpoint. */
839 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
843 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
844 if (bp->pc == pc && bp->flags == flags) {
845 cpu_breakpoint_remove_by_ref(cpu, bp);
852 /* Remove a specific breakpoint by reference. */
853 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
855 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
857 breakpoint_invalidate(cpu, breakpoint->pc);
862 /* Remove all matching breakpoints. */
863 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
865 CPUBreakpoint *bp, *next;
867 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
868 if (bp->flags & mask) {
869 cpu_breakpoint_remove_by_ref(cpu, bp);
874 /* enable or disable single step mode. EXCP_DEBUG is returned by the
875 CPU loop after each instruction */
876 void cpu_single_step(CPUState *cpu, int enabled)
878 if (cpu->singlestep_enabled != enabled) {
879 cpu->singlestep_enabled = enabled;
881 kvm_update_guest_debug(cpu, 0);
883 /* must flush all the translated code to avoid inconsistencies */
884 /* XXX: only flush what is necessary */
890 void cpu_abort(CPUState *cpu, const char *fmt, ...)
897 fprintf(stderr, "qemu: fatal: ");
898 vfprintf(stderr, fmt, ap);
899 fprintf(stderr, "\n");
900 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
901 if (qemu_log_separate()) {
903 qemu_log("qemu: fatal: ");
904 qemu_log_vprintf(fmt, ap2);
906 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
914 #if defined(CONFIG_USER_ONLY)
916 struct sigaction act;
917 sigfillset(&act.sa_mask);
918 act.sa_handler = SIG_DFL;
919 sigaction(SIGABRT, &act, NULL);
925 #if !defined(CONFIG_USER_ONLY)
926 /* Called from RCU critical section */
927 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
931 block = atomic_rcu_read(&ram_list.mru_block);
932 if (block && addr - block->offset < block->max_length) {
935 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
936 if (addr - block->offset < block->max_length) {
941 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
945 /* It is safe to write mru_block outside the iothread lock. This
950 * xxx removed from list
954 * call_rcu(reclaim_ramblock, xxx);
957 * atomic_rcu_set is not needed here. The block was already published
958 * when it was placed into the list. Here we're just making an extra
959 * copy of the pointer.
961 ram_list.mru_block = block;
965 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
972 end = TARGET_PAGE_ALIGN(start + length);
973 start &= TARGET_PAGE_MASK;
976 block = qemu_get_ram_block(start);
977 assert(block == qemu_get_ram_block(end - 1));
978 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
980 tlb_reset_dirty(cpu, start1, length);
985 /* Note: start and end must be within the same ram block. */
986 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
990 DirtyMemoryBlocks *blocks;
991 unsigned long end, page;
998 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
999 page = start >> TARGET_PAGE_BITS;
1003 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1005 while (page < end) {
1006 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1007 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1008 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1010 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1017 if (dirty && tcg_enabled()) {
1018 tlb_reset_dirty_range_all(start, length);
1024 /* Called from RCU critical section */
1025 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1026 MemoryRegionSection *section,
1028 hwaddr paddr, hwaddr xlat,
1030 target_ulong *address)
1035 if (memory_region_is_ram(section->mr)) {
1037 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1038 if (!section->readonly) {
1039 iotlb |= PHYS_SECTION_NOTDIRTY;
1041 iotlb |= PHYS_SECTION_ROM;
1044 AddressSpaceDispatch *d;
1046 d = atomic_rcu_read(§ion->address_space->dispatch);
1047 iotlb = section - d->map.sections;
1051 /* Make accesses to pages with watchpoints go via the
1052 watchpoint trap routines. */
1053 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1054 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1055 /* Avoid trapping reads of pages with a write breakpoint. */
1056 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1057 iotlb = PHYS_SECTION_WATCH + paddr;
1058 *address |= TLB_MMIO;
1066 #endif /* defined(CONFIG_USER_ONLY) */
1068 #if !defined(CONFIG_USER_ONLY)
1070 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1072 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1074 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1075 qemu_anon_ram_alloc;
1078 * Set a custom physical guest memory alloator.
1079 * Accelerators with unusual needs may need this. Hopefully, we can
1080 * get rid of it eventually.
1082 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1084 phys_mem_alloc = alloc;
1087 static uint16_t phys_section_add(PhysPageMap *map,
1088 MemoryRegionSection *section)
1090 /* The physical section number is ORed with a page-aligned
1091 * pointer to produce the iotlb entries. Thus it should
1092 * never overflow into the page-aligned value.
1094 assert(map->sections_nb < TARGET_PAGE_SIZE);
1096 if (map->sections_nb == map->sections_nb_alloc) {
1097 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1098 map->sections = g_renew(MemoryRegionSection, map->sections,
1099 map->sections_nb_alloc);
1101 map->sections[map->sections_nb] = *section;
1102 memory_region_ref(section->mr);
1103 return map->sections_nb++;
1106 static void phys_section_destroy(MemoryRegion *mr)
1108 bool have_sub_page = mr->subpage;
1110 memory_region_unref(mr);
1112 if (have_sub_page) {
1113 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1114 object_unref(OBJECT(&subpage->iomem));
1119 static void phys_sections_free(PhysPageMap *map)
1121 while (map->sections_nb > 0) {
1122 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1123 phys_section_destroy(section->mr);
1125 g_free(map->sections);
1129 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1132 hwaddr base = section->offset_within_address_space
1134 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1135 d->map.nodes, d->map.sections);
1136 MemoryRegionSection subsection = {
1137 .offset_within_address_space = base,
1138 .size = int128_make64(TARGET_PAGE_SIZE),
1142 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1144 if (!(existing->mr->subpage)) {
1145 subpage = subpage_init(d->as, base);
1146 subsection.address_space = d->as;
1147 subsection.mr = &subpage->iomem;
1148 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1149 phys_section_add(&d->map, &subsection));
1151 subpage = container_of(existing->mr, subpage_t, iomem);
1153 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1154 end = start + int128_get64(section->size) - 1;
1155 subpage_register(subpage, start, end,
1156 phys_section_add(&d->map, section));
1160 static void register_multipage(AddressSpaceDispatch *d,
1161 MemoryRegionSection *section)
1163 hwaddr start_addr = section->offset_within_address_space;
1164 uint16_t section_index = phys_section_add(&d->map, section);
1165 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1169 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1172 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1174 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1175 AddressSpaceDispatch *d = as->next_dispatch;
1176 MemoryRegionSection now = *section, remain = *section;
1177 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1179 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1180 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1181 - now.offset_within_address_space;
1183 now.size = int128_min(int128_make64(left), now.size);
1184 register_subpage(d, &now);
1186 now.size = int128_zero();
1188 while (int128_ne(remain.size, now.size)) {
1189 remain.size = int128_sub(remain.size, now.size);
1190 remain.offset_within_address_space += int128_get64(now.size);
1191 remain.offset_within_region += int128_get64(now.size);
1193 if (int128_lt(remain.size, page_size)) {
1194 register_subpage(d, &now);
1195 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1196 now.size = page_size;
1197 register_subpage(d, &now);
1199 now.size = int128_and(now.size, int128_neg(page_size));
1200 register_multipage(d, &now);
1205 void qemu_flush_coalesced_mmio_buffer(void)
1208 kvm_flush_coalesced_mmio_buffer();
1211 void qemu_mutex_lock_ramlist(void)
1213 qemu_mutex_lock(&ram_list.mutex);
1216 void qemu_mutex_unlock_ramlist(void)
1218 qemu_mutex_unlock(&ram_list.mutex);
1222 static int64_t get_file_size(int fd)
1224 int64_t size = lseek(fd, 0, SEEK_END);
1231 static void *file_ram_alloc(RAMBlock *block,
1236 bool unlink_on_error = false;
1238 char *sanitized_name;
1240 void *area = MAP_FAILED;
1244 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1246 "host lacks kvm mmu notifiers, -mem-path unsupported");
1251 fd = open(path, O_RDWR);
1253 /* @path names an existing file, use it */
1256 if (errno == ENOENT) {
1257 /* @path names a file that doesn't exist, create it */
1258 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1260 unlink_on_error = true;
1263 } else if (errno == EISDIR) {
1264 /* @path names a directory, create a file there */
1265 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1266 sanitized_name = g_strdup(memory_region_name(block->mr));
1267 for (c = sanitized_name; *c != '\0'; c++) {
1273 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1275 g_free(sanitized_name);
1277 fd = mkstemp(filename);
1285 if (errno != EEXIST && errno != EINTR) {
1286 error_setg_errno(errp, errno,
1287 "can't open backing store %s for guest RAM",
1292 * Try again on EINTR and EEXIST. The latter happens when
1293 * something else creates the file between our two open().
1297 block->page_size = qemu_fd_getpagesize(fd);
1298 block->mr->align = block->page_size;
1299 #if defined(__s390x__)
1300 if (kvm_enabled()) {
1301 block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1305 file_size = get_file_size(fd);
1307 if (memory < block->page_size) {
1308 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1309 "or larger than page size 0x%zx",
1310 memory, block->page_size);
1314 if (file_size > 0 && file_size < memory) {
1315 error_setg(errp, "backing store %s size 0x%" PRIx64
1316 " does not match 'size' option 0x" RAM_ADDR_FMT,
1317 path, file_size, memory);
1321 memory = ROUND_UP(memory, block->page_size);
1324 * ftruncate is not supported by hugetlbfs in older
1325 * hosts, so don't bother bailing out on errors.
1326 * If anything goes wrong with it under other filesystems,
1329 * Do not truncate the non-empty backend file to avoid corrupting
1330 * the existing data in the file. Disabling shrinking is not
1331 * enough. For example, the current vNVDIMM implementation stores
1332 * the guest NVDIMM labels at the end of the backend file. If the
1333 * backend file is later extended, QEMU will not be able to find
1334 * those labels. Therefore, extending the non-empty backend file
1335 * is disabled as well.
1337 if (!file_size && ftruncate(fd, memory)) {
1338 perror("ftruncate");
1341 area = qemu_ram_mmap(fd, memory, block->mr->align,
1342 block->flags & RAM_SHARED);
1343 if (area == MAP_FAILED) {
1344 error_setg_errno(errp, errno,
1345 "unable to map backing store for guest RAM");
1350 os_mem_prealloc(fd, area, memory, errp);
1351 if (errp && *errp) {
1360 if (area != MAP_FAILED) {
1361 qemu_ram_munmap(area, memory);
1363 if (unlink_on_error) {
1373 /* Called with the ramlist lock held. */
1374 static ram_addr_t find_ram_offset(ram_addr_t size)
1376 RAMBlock *block, *next_block;
1377 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1379 assert(size != 0); /* it would hand out same offset multiple times */
1381 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1385 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1386 ram_addr_t end, next = RAM_ADDR_MAX;
1388 end = block->offset + block->max_length;
1390 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1391 if (next_block->offset >= end) {
1392 next = MIN(next, next_block->offset);
1395 if (next - end >= size && next - end < mingap) {
1397 mingap = next - end;
1401 if (offset == RAM_ADDR_MAX) {
1402 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1410 ram_addr_t last_ram_offset(void)
1413 ram_addr_t last = 0;
1416 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1417 last = MAX(last, block->offset + block->max_length);
1423 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1427 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1428 if (!machine_dump_guest_core(current_machine)) {
1429 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1431 perror("qemu_madvise");
1432 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1433 "but dump_guest_core=off specified\n");
1438 const char *qemu_ram_get_idstr(RAMBlock *rb)
1443 /* Called with iothread lock held. */
1444 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1449 assert(!new_block->idstr[0]);
1452 char *id = qdev_get_dev_path(dev);
1454 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1458 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1461 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1462 if (block != new_block &&
1463 !strcmp(block->idstr, new_block->idstr)) {
1464 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1472 /* Called with iothread lock held. */
1473 void qemu_ram_unset_idstr(RAMBlock *block)
1475 /* FIXME: arch_init.c assumes that this is not called throughout
1476 * migration. Ignore the problem since hot-unplug during migration
1477 * does not work anyway.
1480 memset(block->idstr, 0, sizeof(block->idstr));
1484 size_t qemu_ram_pagesize(RAMBlock *rb)
1486 return rb->page_size;
1489 static int memory_try_enable_merging(void *addr, size_t len)
1491 if (!machine_mem_merge(current_machine)) {
1492 /* disabled by the user */
1496 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1499 /* Only legal before guest might have detected the memory size: e.g. on
1500 * incoming migration, or right after reset.
1502 * As memory core doesn't know how is memory accessed, it is up to
1503 * resize callback to update device state and/or add assertions to detect
1504 * misuse, if necessary.
1506 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1510 newsize = HOST_PAGE_ALIGN(newsize);
1512 if (block->used_length == newsize) {
1516 if (!(block->flags & RAM_RESIZEABLE)) {
1517 error_setg_errno(errp, EINVAL,
1518 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1519 " in != 0x" RAM_ADDR_FMT, block->idstr,
1520 newsize, block->used_length);
1524 if (block->max_length < newsize) {
1525 error_setg_errno(errp, EINVAL,
1526 "Length too large: %s: 0x" RAM_ADDR_FMT
1527 " > 0x" RAM_ADDR_FMT, block->idstr,
1528 newsize, block->max_length);
1532 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1533 block->used_length = newsize;
1534 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1536 memory_region_set_size(block->mr, newsize);
1537 if (block->resized) {
1538 block->resized(block->idstr, newsize, block->host);
1543 /* Called with ram_list.mutex held */
1544 static void dirty_memory_extend(ram_addr_t old_ram_size,
1545 ram_addr_t new_ram_size)
1547 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1548 DIRTY_MEMORY_BLOCK_SIZE);
1549 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1550 DIRTY_MEMORY_BLOCK_SIZE);
1553 /* Only need to extend if block count increased */
1554 if (new_num_blocks <= old_num_blocks) {
1558 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1559 DirtyMemoryBlocks *old_blocks;
1560 DirtyMemoryBlocks *new_blocks;
1563 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1564 new_blocks = g_malloc(sizeof(*new_blocks) +
1565 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1567 if (old_num_blocks) {
1568 memcpy(new_blocks->blocks, old_blocks->blocks,
1569 old_num_blocks * sizeof(old_blocks->blocks[0]));
1572 for (j = old_num_blocks; j < new_num_blocks; j++) {
1573 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1576 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1579 g_free_rcu(old_blocks, rcu);
1584 static void ram_block_add(RAMBlock *new_block, Error **errp)
1587 RAMBlock *last_block = NULL;
1588 ram_addr_t old_ram_size, new_ram_size;
1591 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1593 qemu_mutex_lock_ramlist();
1594 new_block->offset = find_ram_offset(new_block->max_length);
1596 if (!new_block->host) {
1597 if (xen_enabled()) {
1598 xen_ram_alloc(new_block->offset, new_block->max_length,
1599 new_block->mr, &err);
1601 error_propagate(errp, err);
1602 qemu_mutex_unlock_ramlist();
1606 new_block->host = phys_mem_alloc(new_block->max_length,
1607 &new_block->mr->align);
1608 if (!new_block->host) {
1609 error_setg_errno(errp, errno,
1610 "cannot set up guest memory '%s'",
1611 memory_region_name(new_block->mr));
1612 qemu_mutex_unlock_ramlist();
1615 memory_try_enable_merging(new_block->host, new_block->max_length);
1619 new_ram_size = MAX(old_ram_size,
1620 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1621 if (new_ram_size > old_ram_size) {
1622 migration_bitmap_extend(old_ram_size, new_ram_size);
1623 dirty_memory_extend(old_ram_size, new_ram_size);
1625 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1626 * QLIST (which has an RCU-friendly variant) does not have insertion at
1627 * tail, so save the last element in last_block.
1629 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1631 if (block->max_length < new_block->max_length) {
1636 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1637 } else if (last_block) {
1638 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1639 } else { /* list is empty */
1640 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1642 ram_list.mru_block = NULL;
1644 /* Write list before version */
1647 qemu_mutex_unlock_ramlist();
1649 cpu_physical_memory_set_dirty_range(new_block->offset,
1650 new_block->used_length,
1653 if (new_block->host) {
1654 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1655 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1656 /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1657 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1660 * In Hax, the qemu allocate the virtual address, and HAX kernel
1661 * populate the memory with physical memory. Currently we have no
1662 * paging, so user should make sure enough free memory in advance
1664 if (hax_enabled()) {
1665 int ret = hax_populate_ram((uint64_t)(uintptr_t)new_block->host,
1666 new_block->max_length);
1668 fprintf(stderr, "HAX failed to populate ram\n");
1677 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1678 bool share, const char *mem_path,
1681 RAMBlock *new_block;
1682 Error *local_err = NULL;
1684 if (xen_enabled()) {
1685 error_setg(errp, "-mem-path not supported with Xen");
1689 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1691 * file_ram_alloc() needs to allocate just like
1692 * phys_mem_alloc, but we haven't bothered to provide
1696 "-mem-path not supported with this accelerator");
1700 size = HOST_PAGE_ALIGN(size);
1701 new_block = g_malloc0(sizeof(*new_block));
1703 new_block->used_length = size;
1704 new_block->max_length = size;
1705 new_block->flags = share ? RAM_SHARED : 0;
1706 new_block->host = file_ram_alloc(new_block, size,
1708 if (!new_block->host) {
1713 ram_block_add(new_block, &local_err);
1716 error_propagate(errp, local_err);
1724 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1725 void (*resized)(const char*,
1728 void *host, bool resizeable,
1729 MemoryRegion *mr, Error **errp)
1731 RAMBlock *new_block;
1732 Error *local_err = NULL;
1734 size = HOST_PAGE_ALIGN(size);
1735 max_size = HOST_PAGE_ALIGN(max_size);
1736 new_block = g_malloc0(sizeof(*new_block));
1738 new_block->resized = resized;
1739 new_block->used_length = size;
1740 new_block->max_length = max_size;
1741 assert(max_size >= size);
1743 new_block->page_size = getpagesize();
1744 new_block->host = host;
1746 new_block->flags |= RAM_PREALLOC;
1749 new_block->flags |= RAM_RESIZEABLE;
1751 ram_block_add(new_block, &local_err);
1754 error_propagate(errp, local_err);
1760 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1761 MemoryRegion *mr, Error **errp)
1763 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1766 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1768 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1771 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1772 void (*resized)(const char*,
1775 MemoryRegion *mr, Error **errp)
1777 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1780 static void reclaim_ramblock(RAMBlock *block)
1782 if (block->flags & RAM_PREALLOC) {
1784 } else if (xen_enabled()) {
1785 xen_invalidate_map_cache_entry(block->host);
1787 } else if (block->fd >= 0) {
1788 qemu_ram_munmap(block->host, block->max_length);
1792 qemu_anon_ram_free(block->host, block->max_length);
1797 void qemu_ram_free(RAMBlock *block)
1803 qemu_mutex_lock_ramlist();
1804 QLIST_REMOVE_RCU(block, next);
1805 ram_list.mru_block = NULL;
1806 /* Write list before version */
1809 call_rcu(block, reclaim_ramblock, rcu);
1810 qemu_mutex_unlock_ramlist();
1814 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1821 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1822 offset = addr - block->offset;
1823 if (offset < block->max_length) {
1824 vaddr = ramblock_ptr(block, offset);
1825 if (block->flags & RAM_PREALLOC) {
1827 } else if (xen_enabled()) {
1831 if (block->fd >= 0) {
1832 flags |= (block->flags & RAM_SHARED ?
1833 MAP_SHARED : MAP_PRIVATE);
1834 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1835 flags, block->fd, offset);
1838 * Remap needs to match alloc. Accelerators that
1839 * set phys_mem_alloc never remap. If they did,
1840 * we'd need a remap hook here.
1842 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1844 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1845 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1848 if (area != vaddr) {
1849 fprintf(stderr, "Could not remap addr: "
1850 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1854 memory_try_enable_merging(vaddr, length);
1855 qemu_ram_setup_dump(vaddr, length);
1860 #endif /* !_WIN32 */
1862 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1863 * This should not be used for general purpose DMA. Use address_space_map
1864 * or address_space_rw instead. For local memory (e.g. video ram) that the
1865 * device owns, use memory_region_get_ram_ptr.
1867 * Called within RCU critical section.
1869 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1871 RAMBlock *block = ram_block;
1873 if (block == NULL) {
1874 block = qemu_get_ram_block(addr);
1875 addr -= block->offset;
1878 if (xen_enabled() && block->host == NULL) {
1879 /* We need to check if the requested address is in the RAM
1880 * because we don't want to map the entire memory in QEMU.
1881 * In that case just map until the end of the page.
1883 if (block->offset == 0) {
1884 return xen_map_cache(addr, 0, 0);
1887 block->host = xen_map_cache(block->offset, block->max_length, 1);
1889 return ramblock_ptr(block, addr);
1892 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1893 * but takes a size argument.
1895 * Called within RCU critical section.
1897 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1900 RAMBlock *block = ram_block;
1905 if (block == NULL) {
1906 block = qemu_get_ram_block(addr);
1907 addr -= block->offset;
1909 *size = MIN(*size, block->max_length - addr);
1911 if (xen_enabled() && block->host == NULL) {
1912 /* We need to check if the requested address is in the RAM
1913 * because we don't want to map the entire memory in QEMU.
1914 * In that case just map the requested area.
1916 if (block->offset == 0) {
1917 return xen_map_cache(addr, *size, 1);
1920 block->host = xen_map_cache(block->offset, block->max_length, 1);
1923 return ramblock_ptr(block, addr);
1927 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1930 * ptr: Host pointer to look up
1931 * round_offset: If true round the result offset down to a page boundary
1932 * *ram_addr: set to result ram_addr
1933 * *offset: set to result offset within the RAMBlock
1935 * Returns: RAMBlock (or NULL if not found)
1937 * By the time this function returns, the returned pointer is not protected
1938 * by RCU anymore. If the caller is not within an RCU critical section and
1939 * does not hold the iothread lock, it must have other means of protecting the
1940 * pointer, such as a reference to the region that includes the incoming
1943 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1947 uint8_t *host = ptr;
1949 if (xen_enabled()) {
1950 ram_addr_t ram_addr;
1952 ram_addr = xen_ram_addr_from_mapcache(ptr);
1953 block = qemu_get_ram_block(ram_addr);
1955 *offset = ram_addr - block->offset;
1962 block = atomic_rcu_read(&ram_list.mru_block);
1963 if (block && block->host && host - block->host < block->max_length) {
1967 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1968 /* This case append when the block is not mapped. */
1969 if (block->host == NULL) {
1972 if (host - block->host < block->max_length) {
1981 *offset = (host - block->host);
1983 *offset &= TARGET_PAGE_MASK;
1990 * Finds the named RAMBlock
1992 * name: The name of RAMBlock to find
1994 * Returns: RAMBlock (or NULL if not found)
1996 RAMBlock *qemu_ram_block_by_name(const char *name)
2000 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2001 if (!strcmp(name, block->idstr)) {
2009 /* Some of the softmmu routines need to translate from a host pointer
2010 (typically a TLB entry) back to a ram offset. */
2011 ram_addr_t qemu_ram_addr_from_host(void *ptr)
2016 block = qemu_ram_block_from_host(ptr, false, &offset);
2018 return RAM_ADDR_INVALID;
2021 return block->offset + offset;
2024 /* Called within RCU critical section. */
2025 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2026 uint64_t val, unsigned size)
2028 bool locked = false;
2030 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
2033 tb_invalidate_phys_page_fast(ram_addr, size);
2037 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2040 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2043 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2053 /* Set both VGA and migration bits for simplicity and to remove
2054 * the notdirty callback faster.
2056 cpu_physical_memory_set_dirty_range(ram_addr, size,
2057 DIRTY_CLIENTS_NOCODE);
2058 /* we remove the notdirty callback only if the code has been
2060 if (!cpu_physical_memory_is_clean(ram_addr)) {
2061 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2065 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2066 unsigned size, bool is_write)
2071 static const MemoryRegionOps notdirty_mem_ops = {
2072 .write = notdirty_mem_write,
2073 .valid.accepts = notdirty_mem_accepts,
2074 .endianness = DEVICE_NATIVE_ENDIAN,
2077 /* Generate a debug exception if a watchpoint has been hit. */
2078 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2080 CPUState *cpu = current_cpu;
2081 CPUClass *cc = CPU_GET_CLASS(cpu);
2082 CPUArchState *env = cpu->env_ptr;
2083 target_ulong pc, cs_base;
2088 if (cpu->watchpoint_hit) {
2089 /* We re-entered the check after replacing the TB. Now raise
2090 * the debug interrupt so that is will trigger after the
2091 * current instruction. */
2092 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2095 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2096 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2097 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2098 && (wp->flags & flags)) {
2099 if (flags == BP_MEM_READ) {
2100 wp->flags |= BP_WATCHPOINT_HIT_READ;
2102 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2104 wp->hitaddr = vaddr;
2105 wp->hitattrs = attrs;
2106 if (!cpu->watchpoint_hit) {
2107 if (wp->flags & BP_CPU &&
2108 !cc->debug_check_watchpoint(cpu, wp)) {
2109 wp->flags &= ~BP_WATCHPOINT_HIT;
2112 cpu->watchpoint_hit = wp;
2114 /* The tb_lock will be reset when cpu_loop_exit or
2115 * cpu_loop_exit_noexc longjmp back into the cpu_exec
2119 tb_check_watchpoint(cpu);
2120 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2121 cpu->exception_index = EXCP_DEBUG;
2124 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2125 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2126 cpu_loop_exit_noexc(cpu);
2130 wp->flags &= ~BP_WATCHPOINT_HIT;
2135 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2136 so these check for a hit then pass through to the normal out-of-line
2138 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2139 unsigned size, MemTxAttrs attrs)
2143 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2144 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2146 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2149 data = address_space_ldub(as, addr, attrs, &res);
2152 data = address_space_lduw(as, addr, attrs, &res);
2155 data = address_space_ldl(as, addr, attrs, &res);
2163 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2164 uint64_t val, unsigned size,
2168 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2169 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2171 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2174 address_space_stb(as, addr, val, attrs, &res);
2177 address_space_stw(as, addr, val, attrs, &res);
2180 address_space_stl(as, addr, val, attrs, &res);
2187 static const MemoryRegionOps watch_mem_ops = {
2188 .read_with_attrs = watch_mem_read,
2189 .write_with_attrs = watch_mem_write,
2190 .endianness = DEVICE_NATIVE_ENDIAN,
2193 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2194 unsigned len, MemTxAttrs attrs)
2196 subpage_t *subpage = opaque;
2200 #if defined(DEBUG_SUBPAGE)
2201 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2202 subpage, len, addr);
2204 res = address_space_read(subpage->as, addr + subpage->base,
2211 *data = ldub_p(buf);
2214 *data = lduw_p(buf);
2227 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2228 uint64_t value, unsigned len, MemTxAttrs attrs)
2230 subpage_t *subpage = opaque;
2233 #if defined(DEBUG_SUBPAGE)
2234 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2235 " value %"PRIx64"\n",
2236 __func__, subpage, len, addr, value);
2254 return address_space_write(subpage->as, addr + subpage->base,
2258 static bool subpage_accepts(void *opaque, hwaddr addr,
2259 unsigned len, bool is_write)
2261 subpage_t *subpage = opaque;
2262 #if defined(DEBUG_SUBPAGE)
2263 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2264 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2267 return address_space_access_valid(subpage->as, addr + subpage->base,
2271 static const MemoryRegionOps subpage_ops = {
2272 .read_with_attrs = subpage_read,
2273 .write_with_attrs = subpage_write,
2274 .impl.min_access_size = 1,
2275 .impl.max_access_size = 8,
2276 .valid.min_access_size = 1,
2277 .valid.max_access_size = 8,
2278 .valid.accepts = subpage_accepts,
2279 .endianness = DEVICE_NATIVE_ENDIAN,
2282 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2287 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2289 idx = SUBPAGE_IDX(start);
2290 eidx = SUBPAGE_IDX(end);
2291 #if defined(DEBUG_SUBPAGE)
2292 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2293 __func__, mmio, start, end, idx, eidx, section);
2295 for (; idx <= eidx; idx++) {
2296 mmio->sub_section[idx] = section;
2302 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2306 mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
2309 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2310 NULL, TARGET_PAGE_SIZE);
2311 mmio->iomem.subpage = true;
2312 #if defined(DEBUG_SUBPAGE)
2313 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2314 mmio, base, TARGET_PAGE_SIZE);
2316 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2321 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2325 MemoryRegionSection section = {
2326 .address_space = as,
2328 .offset_within_address_space = 0,
2329 .offset_within_region = 0,
2330 .size = int128_2_64(),
2333 return phys_section_add(map, §ion);
2336 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2338 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2339 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2340 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2341 MemoryRegionSection *sections = d->map.sections;
2343 return sections[index & ~TARGET_PAGE_MASK].mr;
2346 static void io_mem_init(void)
2348 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2349 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2351 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
2353 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2357 static void mem_begin(MemoryListener *listener)
2359 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2360 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2363 n = dummy_section(&d->map, as, &io_mem_unassigned);
2364 assert(n == PHYS_SECTION_UNASSIGNED);
2365 n = dummy_section(&d->map, as, &io_mem_notdirty);
2366 assert(n == PHYS_SECTION_NOTDIRTY);
2367 n = dummy_section(&d->map, as, &io_mem_rom);
2368 assert(n == PHYS_SECTION_ROM);
2369 n = dummy_section(&d->map, as, &io_mem_watch);
2370 assert(n == PHYS_SECTION_WATCH);
2372 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2374 as->next_dispatch = d;
2377 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2379 phys_sections_free(&d->map);
2383 static void mem_commit(MemoryListener *listener)
2385 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2386 AddressSpaceDispatch *cur = as->dispatch;
2387 AddressSpaceDispatch *next = as->next_dispatch;
2389 phys_page_compact_all(next, next->map.nodes_nb);
2391 atomic_rcu_set(&as->dispatch, next);
2393 call_rcu(cur, address_space_dispatch_free, rcu);
2397 static void tcg_commit(MemoryListener *listener)
2399 CPUAddressSpace *cpuas;
2400 AddressSpaceDispatch *d;
2402 /* since each CPU stores ram addresses in its TLB cache, we must
2403 reset the modified entries */
2404 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2405 cpu_reloading_memory_map();
2406 /* The CPU and TLB are protected by the iothread lock.
2407 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2408 * may have split the RCU critical section.
2410 d = atomic_rcu_read(&cpuas->as->dispatch);
2411 atomic_rcu_set(&cpuas->memory_dispatch, d);
2412 tlb_flush(cpuas->cpu, 1);
2415 void address_space_init_dispatch(AddressSpace *as)
2417 as->dispatch = NULL;
2418 as->dispatch_listener = (MemoryListener) {
2420 .commit = mem_commit,
2421 .region_add = mem_add,
2422 .region_nop = mem_add,
2425 memory_listener_register(&as->dispatch_listener, as);
2428 void address_space_unregister(AddressSpace *as)
2430 memory_listener_unregister(&as->dispatch_listener);
2433 void address_space_destroy_dispatch(AddressSpace *as)
2435 AddressSpaceDispatch *d = as->dispatch;
2437 atomic_rcu_set(&as->dispatch, NULL);
2439 call_rcu(d, address_space_dispatch_free, rcu);
2443 static void memory_map_init(void)
2445 system_memory = g_malloc(sizeof(*system_memory));
2447 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2448 address_space_init(&address_space_memory, system_memory, "memory");
2450 system_io = g_malloc(sizeof(*system_io));
2451 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2453 address_space_init(&address_space_io, system_io, "I/O");
2456 MemoryRegion *get_system_memory(void)
2458 return system_memory;
2461 MemoryRegion *get_system_io(void)
2466 #endif /* !defined(CONFIG_USER_ONLY) */
2468 /* physical memory access (slow version, mainly for debug) */
2469 #if defined(CONFIG_USER_ONLY)
2470 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2471 uint8_t *buf, int len, int is_write)
2478 page = addr & TARGET_PAGE_MASK;
2479 l = (page + TARGET_PAGE_SIZE) - addr;
2482 flags = page_get_flags(page);
2483 if (!(flags & PAGE_VALID))
2486 if (!(flags & PAGE_WRITE))
2488 /* XXX: this code should not depend on lock_user */
2489 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2492 unlock_user(p, addr, l);
2494 if (!(flags & PAGE_READ))
2496 /* XXX: this code should not depend on lock_user */
2497 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2500 unlock_user(p, addr, 0);
2511 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2514 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2515 addr += memory_region_get_ram_addr(mr);
2517 /* No early return if dirty_log_mask is or becomes 0, because
2518 * cpu_physical_memory_set_dirty_range will still call
2519 * xen_modified_memory.
2521 if (dirty_log_mask) {
2523 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2525 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2527 tb_invalidate_phys_range(addr, addr + length);
2529 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2531 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2534 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2536 unsigned access_size_max = mr->ops->valid.max_access_size;
2538 /* Regions are assumed to support 1-4 byte accesses unless
2539 otherwise specified. */
2540 if (access_size_max == 0) {
2541 access_size_max = 4;
2544 /* Bound the maximum access by the alignment of the address. */
2545 if (!mr->ops->impl.unaligned) {
2546 unsigned align_size_max = addr & -addr;
2547 if (align_size_max != 0 && align_size_max < access_size_max) {
2548 access_size_max = align_size_max;
2552 /* Don't attempt accesses larger than the maximum. */
2553 if (l > access_size_max) {
2554 l = access_size_max;
2561 static bool prepare_mmio_access(MemoryRegion *mr)
2563 bool unlocked = !qemu_mutex_iothread_locked();
2564 bool release_lock = false;
2566 if (unlocked && mr->global_locking) {
2567 qemu_mutex_lock_iothread();
2569 release_lock = true;
2571 if (mr->flush_coalesced_mmio) {
2573 qemu_mutex_lock_iothread();
2575 qemu_flush_coalesced_mmio_buffer();
2577 qemu_mutex_unlock_iothread();
2581 return release_lock;
2584 /* Called within RCU critical section. */
2585 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2588 int len, hwaddr addr1,
2589 hwaddr l, MemoryRegion *mr)
2593 MemTxResult result = MEMTX_OK;
2594 bool release_lock = false;
2597 if (!memory_access_is_direct(mr, true)) {
2598 release_lock |= prepare_mmio_access(mr);
2599 l = memory_access_size(mr, l, addr1);
2600 /* XXX: could force current_cpu to NULL to avoid
2604 /* 64 bit write access */
2606 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2610 /* 32 bit write access */
2612 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2616 /* 16 bit write access */
2618 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2622 /* 8 bit write access */
2624 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2632 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2633 memcpy(ptr, buf, l);
2634 invalidate_and_set_dirty(mr, addr1, l);
2638 qemu_mutex_unlock_iothread();
2639 release_lock = false;
2651 mr = address_space_translate(as, addr, &addr1, &l, true);
2657 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2658 const uint8_t *buf, int len)
2663 MemTxResult result = MEMTX_OK;
2668 mr = address_space_translate(as, addr, &addr1, &l, true);
2669 result = address_space_write_continue(as, addr, attrs, buf, len,
2677 /* Called within RCU critical section. */
2678 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2679 MemTxAttrs attrs, uint8_t *buf,
2680 int len, hwaddr addr1, hwaddr l,
2685 MemTxResult result = MEMTX_OK;
2686 bool release_lock = false;
2689 if (!memory_access_is_direct(mr, false)) {
2691 release_lock |= prepare_mmio_access(mr);
2692 l = memory_access_size(mr, l, addr1);
2695 /* 64 bit read access */
2696 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2701 /* 32 bit read access */
2702 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2707 /* 16 bit read access */
2708 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2713 /* 8 bit read access */
2714 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2723 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2724 memcpy(buf, ptr, l);
2728 qemu_mutex_unlock_iothread();
2729 release_lock = false;
2741 mr = address_space_translate(as, addr, &addr1, &l, false);
2747 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2748 MemTxAttrs attrs, uint8_t *buf, int len)
2753 MemTxResult result = MEMTX_OK;
2758 mr = address_space_translate(as, addr, &addr1, &l, false);
2759 result = address_space_read_continue(as, addr, attrs, buf, len,
2767 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2768 uint8_t *buf, int len, bool is_write)
2771 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2773 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2777 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2778 int len, int is_write)
2780 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2781 buf, len, is_write);
2784 enum write_rom_type {
2789 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2790 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2800 mr = address_space_translate(as, addr, &addr1, &l, true);
2802 if (!(memory_region_is_ram(mr) ||
2803 memory_region_is_romd(mr))) {
2804 l = memory_access_size(mr, l, addr1);
2807 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2810 memcpy(ptr, buf, l);
2811 invalidate_and_set_dirty(mr, addr1, l);
2814 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2825 /* used for ROM loading : can write in RAM and ROM */
2826 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2827 const uint8_t *buf, int len)
2829 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2832 void cpu_flush_icache_range(hwaddr start, int len)
2835 * This function should do the same thing as an icache flush that was
2836 * triggered from within the guest. For TCG we are always cache coherent,
2837 * so there is no need to flush anything. For KVM / Xen we need to flush
2838 * the host's instruction cache at least.
2840 if (tcg_enabled()) {
2844 cpu_physical_memory_write_rom_internal(&address_space_memory,
2845 start, NULL, len, FLUSH_CACHE);
2856 static BounceBuffer bounce;
2858 typedef struct MapClient {
2860 QLIST_ENTRY(MapClient) link;
2863 QemuMutex map_client_list_lock;
2864 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2865 = QLIST_HEAD_INITIALIZER(map_client_list);
2867 static void cpu_unregister_map_client_do(MapClient *client)
2869 QLIST_REMOVE(client, link);
2873 static void cpu_notify_map_clients_locked(void)
2877 while (!QLIST_EMPTY(&map_client_list)) {
2878 client = QLIST_FIRST(&map_client_list);
2879 qemu_bh_schedule(client->bh);
2880 cpu_unregister_map_client_do(client);
2884 void cpu_register_map_client(QEMUBH *bh)
2886 MapClient *client = g_malloc(sizeof(*client));
2888 qemu_mutex_lock(&map_client_list_lock);
2890 QLIST_INSERT_HEAD(&map_client_list, client, link);
2891 if (!atomic_read(&bounce.in_use)) {
2892 cpu_notify_map_clients_locked();
2894 qemu_mutex_unlock(&map_client_list_lock);
2897 void cpu_exec_init_all(void)
2899 qemu_mutex_init(&ram_list.mutex);
2900 /* The data structures we set up here depend on knowing the page size,
2901 * so no more changes can be made after this point.
2902 * In an ideal world, nothing we did before we had finished the
2903 * machine setup would care about the target page size, and we could
2904 * do this much later, rather than requiring board models to state
2905 * up front what their requirements are.
2907 finalize_target_page_bits();
2910 qemu_mutex_init(&map_client_list_lock);
2913 void cpu_unregister_map_client(QEMUBH *bh)
2917 qemu_mutex_lock(&map_client_list_lock);
2918 QLIST_FOREACH(client, &map_client_list, link) {
2919 if (client->bh == bh) {
2920 cpu_unregister_map_client_do(client);
2924 qemu_mutex_unlock(&map_client_list_lock);
2927 static void cpu_notify_map_clients(void)
2929 qemu_mutex_lock(&map_client_list_lock);
2930 cpu_notify_map_clients_locked();
2931 qemu_mutex_unlock(&map_client_list_lock);
2934 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2942 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2943 if (!memory_access_is_direct(mr, is_write)) {
2944 l = memory_access_size(mr, l, addr);
2945 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2957 /* Map a physical memory region into a host virtual address.
2958 * May map a subset of the requested range, given by and returned in *plen.
2959 * May return NULL if resources needed to perform the mapping are exhausted.
2960 * Use only for reads OR writes - not for read-modify-write operations.
2961 * Use cpu_register_map_client() to know when retrying the map operation is
2962 * likely to succeed.
2964 void *address_space_map(AddressSpace *as,
2971 hwaddr l, xlat, base;
2972 MemoryRegion *mr, *this_mr;
2981 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2983 if (!memory_access_is_direct(mr, is_write)) {
2984 if (atomic_xchg(&bounce.in_use, true)) {
2988 /* Avoid unbounded allocations */
2989 l = MIN(l, TARGET_PAGE_SIZE);
2990 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2994 memory_region_ref(mr);
2997 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
3003 return bounce.buffer;
3017 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
3018 if (this_mr != mr || xlat != base + done) {
3023 memory_region_ref(mr);
3025 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
3031 /* Unmaps a memory region previously mapped by address_space_map().
3032 * Will also mark the memory as dirty if is_write == 1. access_len gives
3033 * the amount of memory that was actually read or written by the caller.
3035 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3036 int is_write, hwaddr access_len)
3038 if (buffer != bounce.buffer) {
3042 mr = memory_region_from_host(buffer, &addr1);
3045 invalidate_and_set_dirty(mr, addr1, access_len);
3047 if (xen_enabled()) {
3048 xen_invalidate_map_cache_entry(buffer);
3050 memory_region_unref(mr);
3054 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
3055 bounce.buffer, access_len);
3057 qemu_vfree(bounce.buffer);
3058 bounce.buffer = NULL;
3059 memory_region_unref(bounce.mr);
3060 atomic_mb_set(&bounce.in_use, false);
3061 cpu_notify_map_clients();
3064 void *cpu_physical_memory_map(hwaddr addr,
3068 return address_space_map(&address_space_memory, addr, plen, is_write);
3071 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3072 int is_write, hwaddr access_len)
3074 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3077 /* warning: addr must be aligned */
3078 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3080 MemTxResult *result,
3081 enum device_endian endian)
3089 bool release_lock = false;
3092 mr = address_space_translate(as, addr, &addr1, &l, false);
3093 if (l < 4 || !memory_access_is_direct(mr, false)) {
3094 release_lock |= prepare_mmio_access(mr);
3097 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3098 #if defined(TARGET_WORDS_BIGENDIAN)
3099 if (endian == DEVICE_LITTLE_ENDIAN) {
3103 if (endian == DEVICE_BIG_ENDIAN) {
3109 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3111 case DEVICE_LITTLE_ENDIAN:
3112 val = ldl_le_p(ptr);
3114 case DEVICE_BIG_ENDIAN:
3115 val = ldl_be_p(ptr);
3127 qemu_mutex_unlock_iothread();
3133 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3134 MemTxAttrs attrs, MemTxResult *result)
3136 return address_space_ldl_internal(as, addr, attrs, result,
3137 DEVICE_NATIVE_ENDIAN);
3140 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3141 MemTxAttrs attrs, MemTxResult *result)
3143 return address_space_ldl_internal(as, addr, attrs, result,
3144 DEVICE_LITTLE_ENDIAN);
3147 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3148 MemTxAttrs attrs, MemTxResult *result)
3150 return address_space_ldl_internal(as, addr, attrs, result,
3154 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3156 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3159 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3161 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3164 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3166 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3169 /* warning: addr must be aligned */
3170 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3172 MemTxResult *result,
3173 enum device_endian endian)
3181 bool release_lock = false;
3184 mr = address_space_translate(as, addr, &addr1, &l,
3186 if (l < 8 || !memory_access_is_direct(mr, false)) {
3187 release_lock |= prepare_mmio_access(mr);
3190 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3191 #if defined(TARGET_WORDS_BIGENDIAN)
3192 if (endian == DEVICE_LITTLE_ENDIAN) {
3196 if (endian == DEVICE_BIG_ENDIAN) {
3202 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3204 case DEVICE_LITTLE_ENDIAN:
3205 val = ldq_le_p(ptr);
3207 case DEVICE_BIG_ENDIAN:
3208 val = ldq_be_p(ptr);
3220 qemu_mutex_unlock_iothread();
3226 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3227 MemTxAttrs attrs, MemTxResult *result)
3229 return address_space_ldq_internal(as, addr, attrs, result,
3230 DEVICE_NATIVE_ENDIAN);
3233 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3234 MemTxAttrs attrs, MemTxResult *result)
3236 return address_space_ldq_internal(as, addr, attrs, result,
3237 DEVICE_LITTLE_ENDIAN);
3240 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3241 MemTxAttrs attrs, MemTxResult *result)
3243 return address_space_ldq_internal(as, addr, attrs, result,
3247 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3249 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3252 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3254 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3257 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3259 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3263 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3264 MemTxAttrs attrs, MemTxResult *result)
3269 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3276 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3278 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3281 /* warning: addr must be aligned */
3282 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3285 MemTxResult *result,
3286 enum device_endian endian)
3294 bool release_lock = false;
3297 mr = address_space_translate(as, addr, &addr1, &l,
3299 if (l < 2 || !memory_access_is_direct(mr, false)) {
3300 release_lock |= prepare_mmio_access(mr);
3303 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3304 #if defined(TARGET_WORDS_BIGENDIAN)
3305 if (endian == DEVICE_LITTLE_ENDIAN) {
3309 if (endian == DEVICE_BIG_ENDIAN) {
3315 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3317 case DEVICE_LITTLE_ENDIAN:
3318 val = lduw_le_p(ptr);
3320 case DEVICE_BIG_ENDIAN:
3321 val = lduw_be_p(ptr);
3333 qemu_mutex_unlock_iothread();
3339 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3340 MemTxAttrs attrs, MemTxResult *result)
3342 return address_space_lduw_internal(as, addr, attrs, result,
3343 DEVICE_NATIVE_ENDIAN);
3346 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3347 MemTxAttrs attrs, MemTxResult *result)
3349 return address_space_lduw_internal(as, addr, attrs, result,
3350 DEVICE_LITTLE_ENDIAN);
3353 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3354 MemTxAttrs attrs, MemTxResult *result)
3356 return address_space_lduw_internal(as, addr, attrs, result,
3360 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3362 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3365 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3367 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3370 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3372 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3375 /* warning: addr must be aligned. The ram page is not masked as dirty
3376 and the code inside is not invalidated. It is useful if the dirty
3377 bits are used to track modified PTEs */
3378 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3379 MemTxAttrs attrs, MemTxResult *result)
3386 uint8_t dirty_log_mask;
3387 bool release_lock = false;
3390 mr = address_space_translate(as, addr, &addr1, &l,
3392 if (l < 4 || !memory_access_is_direct(mr, true)) {
3393 release_lock |= prepare_mmio_access(mr);
3395 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3397 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3400 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3401 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3402 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3410 qemu_mutex_unlock_iothread();
3415 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3417 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3420 /* warning: addr must be aligned */
3421 static inline void address_space_stl_internal(AddressSpace *as,
3422 hwaddr addr, uint32_t val,
3424 MemTxResult *result,
3425 enum device_endian endian)
3432 bool release_lock = false;
3435 mr = address_space_translate(as, addr, &addr1, &l,
3437 if (l < 4 || !memory_access_is_direct(mr, true)) {
3438 release_lock |= prepare_mmio_access(mr);
3440 #if defined(TARGET_WORDS_BIGENDIAN)
3441 if (endian == DEVICE_LITTLE_ENDIAN) {
3445 if (endian == DEVICE_BIG_ENDIAN) {
3449 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3452 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3454 case DEVICE_LITTLE_ENDIAN:
3457 case DEVICE_BIG_ENDIAN:
3464 invalidate_and_set_dirty(mr, addr1, 4);
3471 qemu_mutex_unlock_iothread();
3476 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3477 MemTxAttrs attrs, MemTxResult *result)
3479 address_space_stl_internal(as, addr, val, attrs, result,
3480 DEVICE_NATIVE_ENDIAN);
3483 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3484 MemTxAttrs attrs, MemTxResult *result)
3486 address_space_stl_internal(as, addr, val, attrs, result,
3487 DEVICE_LITTLE_ENDIAN);
3490 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3491 MemTxAttrs attrs, MemTxResult *result)
3493 address_space_stl_internal(as, addr, val, attrs, result,
3497 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3499 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3502 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3504 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3507 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3509 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3513 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3514 MemTxAttrs attrs, MemTxResult *result)
3519 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3525 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3527 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3530 /* warning: addr must be aligned */
3531 static inline void address_space_stw_internal(AddressSpace *as,
3532 hwaddr addr, uint32_t val,
3534 MemTxResult *result,
3535 enum device_endian endian)
3542 bool release_lock = false;
3545 mr = address_space_translate(as, addr, &addr1, &l, true);
3546 if (l < 2 || !memory_access_is_direct(mr, true)) {
3547 release_lock |= prepare_mmio_access(mr);
3549 #if defined(TARGET_WORDS_BIGENDIAN)
3550 if (endian == DEVICE_LITTLE_ENDIAN) {
3554 if (endian == DEVICE_BIG_ENDIAN) {
3558 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3561 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3563 case DEVICE_LITTLE_ENDIAN:
3566 case DEVICE_BIG_ENDIAN:
3573 invalidate_and_set_dirty(mr, addr1, 2);
3580 qemu_mutex_unlock_iothread();
3585 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3586 MemTxAttrs attrs, MemTxResult *result)
3588 address_space_stw_internal(as, addr, val, attrs, result,
3589 DEVICE_NATIVE_ENDIAN);
3592 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3593 MemTxAttrs attrs, MemTxResult *result)
3595 address_space_stw_internal(as, addr, val, attrs, result,
3596 DEVICE_LITTLE_ENDIAN);
3599 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3600 MemTxAttrs attrs, MemTxResult *result)
3602 address_space_stw_internal(as, addr, val, attrs, result,
3606 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3608 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3611 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3613 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3616 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3618 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3622 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3623 MemTxAttrs attrs, MemTxResult *result)
3627 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3633 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3634 MemTxAttrs attrs, MemTxResult *result)
3637 val = cpu_to_le64(val);
3638 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3643 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3644 MemTxAttrs attrs, MemTxResult *result)
3647 val = cpu_to_be64(val);
3648 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3654 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3656 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3659 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3661 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3664 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3666 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3669 /* virtual memory access for debug (includes writing to ROM) */
3670 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3671 uint8_t *buf, int len, int is_write)
3681 page = addr & TARGET_PAGE_MASK;
3682 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3683 asidx = cpu_asidx_from_attrs(cpu, attrs);
3684 /* if no physical page mapped, return an error */
3685 if (phys_addr == -1)
3687 l = (page + TARGET_PAGE_SIZE) - addr;
3690 phys_addr += (addr & ~TARGET_PAGE_MASK);
3692 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3695 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3696 MEMTXATTRS_UNSPECIFIED,
3707 * Allows code that needs to deal with migration bitmaps etc to still be built
3708 * target independent.
3710 size_t qemu_target_page_bits(void)
3712 return TARGET_PAGE_BITS;
3718 * A helper function for the _utterly broken_ virtio device model to find out if
3719 * it's running on a big endian machine. Don't do this at home kids!
3721 bool target_words_bigendian(void);
3722 bool target_words_bigendian(void)
3724 #if defined(TARGET_WORDS_BIGENDIAN)
3731 #ifndef CONFIG_USER_ONLY
3732 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3739 mr = address_space_translate(&address_space_memory,
3740 phys_addr, &phys_addr, &l, false);
3742 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3747 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3753 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3754 ret = func(block->idstr, block->host, block->offset,
3755 block->used_length, opaque);