4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
25 #include "qemu/cutils.h"
27 #include "exec/exec-all.h"
29 #include "hw/qdev-core.h"
30 #if !defined(CONFIG_USER_ONLY)
31 #include "hw/boards.h"
32 #include "hw/xen/xen.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "qemu/error-report.h"
39 #if defined(CONFIG_USER_ONLY)
41 #else /* !CONFIG_USER_ONLY */
43 #include "exec/memory.h"
44 #include "exec/ioport.h"
45 #include "sysemu/dma.h"
46 #include "exec/address-spaces.h"
47 #include "sysemu/xen-mapcache.h"
50 #include "exec/cpu-all.h"
51 #include "qemu/rcu_queue.h"
52 #include "qemu/main-loop.h"
53 #include "translate-all.h"
54 #include "sysemu/replay.h"
56 #include "exec/memory-internal.h"
57 #include "exec/ram_addr.h"
60 #include "qemu/range.h"
62 #include "qemu/mmap-alloc.h"
65 //#define DEBUG_SUBPAGE
67 #if !defined(CONFIG_USER_ONLY)
68 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
69 * are protected by the ramlist lock.
71 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
73 static MemoryRegion *system_memory;
74 static MemoryRegion *system_io;
76 AddressSpace address_space_io;
77 AddressSpace address_space_memory;
79 MemoryRegion io_mem_rom, io_mem_notdirty;
80 static MemoryRegion io_mem_unassigned;
82 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
83 #define RAM_PREALLOC (1 << 0)
85 /* RAM is mmap-ed with MAP_SHARED */
86 #define RAM_SHARED (1 << 1)
88 /* Only a portion of RAM (used_length) is actually used, and migrated.
89 * This used_length size can change across reboots.
91 #define RAM_RESIZEABLE (1 << 2)
95 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
96 /* current CPU in the current thread. It is only valid inside
98 __thread CPUState *current_cpu;
99 /* 0 = Do not count executed instructions.
100 1 = Precise instruction counting.
101 2 = Adaptive rate instruction counting. */
104 #if !defined(CONFIG_USER_ONLY)
106 typedef struct PhysPageEntry PhysPageEntry;
108 struct PhysPageEntry {
109 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
111 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
115 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
117 /* Size of the L2 (and L3, etc) page tables. */
118 #define ADDR_SPACE_BITS 64
121 #define P_L2_SIZE (1 << P_L2_BITS)
123 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
125 typedef PhysPageEntry Node[P_L2_SIZE];
127 typedef struct PhysPageMap {
130 unsigned sections_nb;
131 unsigned sections_nb_alloc;
133 unsigned nodes_nb_alloc;
135 MemoryRegionSection *sections;
138 struct AddressSpaceDispatch {
141 MemoryRegionSection *mru_section;
142 /* This is a multi-level map on the physical address space.
143 * The bottom level has pointers to MemoryRegionSections.
145 PhysPageEntry phys_map;
150 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
151 typedef struct subpage_t {
155 uint16_t sub_section[TARGET_PAGE_SIZE];
158 #define PHYS_SECTION_UNASSIGNED 0
159 #define PHYS_SECTION_NOTDIRTY 1
160 #define PHYS_SECTION_ROM 2
161 #define PHYS_SECTION_WATCH 3
163 static void io_mem_init(void);
164 static void memory_map_init(void);
165 static void tcg_commit(MemoryListener *listener);
167 static MemoryRegion io_mem_watch;
170 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
171 * @cpu: the CPU whose AddressSpace this is
172 * @as: the AddressSpace itself
173 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
174 * @tcg_as_listener: listener for tracking changes to the AddressSpace
176 struct CPUAddressSpace {
179 struct AddressSpaceDispatch *memory_dispatch;
180 MemoryListener tcg_as_listener;
185 #if !defined(CONFIG_USER_ONLY)
187 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
189 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
190 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
191 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
192 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
196 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
203 ret = map->nodes_nb++;
205 assert(ret != PHYS_MAP_NODE_NIL);
206 assert(ret != map->nodes_nb_alloc);
208 e.skip = leaf ? 0 : 1;
209 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
210 for (i = 0; i < P_L2_SIZE; ++i) {
211 memcpy(&p[i], &e, sizeof(e));
216 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
217 hwaddr *index, hwaddr *nb, uint16_t leaf,
221 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
223 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
224 lp->ptr = phys_map_node_alloc(map, level == 0);
226 p = map->nodes[lp->ptr];
227 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
229 while (*nb && lp < &p[P_L2_SIZE]) {
230 if ((*index & (step - 1)) == 0 && *nb >= step) {
236 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
242 static void phys_page_set(AddressSpaceDispatch *d,
243 hwaddr index, hwaddr nb,
246 /* Wildly overreserve - it doesn't matter much. */
247 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
249 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
252 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
253 * and update our entry so we can skip it and go directly to the destination.
255 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
257 unsigned valid_ptr = P_L2_SIZE;
262 if (lp->ptr == PHYS_MAP_NODE_NIL) {
267 for (i = 0; i < P_L2_SIZE; i++) {
268 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
275 phys_page_compact(&p[i], nodes, compacted);
279 /* We can only compress if there's only one child. */
284 assert(valid_ptr < P_L2_SIZE);
286 /* Don't compress if it won't fit in the # of bits we have. */
287 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
291 lp->ptr = p[valid_ptr].ptr;
292 if (!p[valid_ptr].skip) {
293 /* If our only child is a leaf, make this a leaf. */
294 /* By design, we should have made this node a leaf to begin with so we
295 * should never reach here.
296 * But since it's so simple to handle this, let's do it just in case we
301 lp->skip += p[valid_ptr].skip;
305 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
307 DECLARE_BITMAP(compacted, nodes_nb);
309 if (d->phys_map.skip) {
310 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
314 static inline bool section_covers_addr(const MemoryRegionSection *section,
317 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
318 * the section must cover the entire address space.
320 return section->size.hi ||
321 range_covers_byte(section->offset_within_address_space,
322 section->size.lo, addr);
325 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
326 Node *nodes, MemoryRegionSection *sections)
329 hwaddr index = addr >> TARGET_PAGE_BITS;
332 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
333 if (lp.ptr == PHYS_MAP_NODE_NIL) {
334 return §ions[PHYS_SECTION_UNASSIGNED];
337 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
340 if (section_covers_addr(§ions[lp.ptr], addr)) {
341 return §ions[lp.ptr];
343 return §ions[PHYS_SECTION_UNASSIGNED];
347 bool memory_region_is_unassigned(MemoryRegion *mr)
349 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
350 && mr != &io_mem_watch;
353 /* Called from RCU critical section */
354 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
356 bool resolve_subpage)
358 MemoryRegionSection *section = atomic_read(&d->mru_section);
362 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
363 section_covers_addr(section, addr)) {
366 section = phys_page_find(d->phys_map, addr, d->map.nodes,
370 if (resolve_subpage && section->mr->subpage) {
371 subpage = container_of(section->mr, subpage_t, iomem);
372 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
375 atomic_set(&d->mru_section, section);
380 /* Called from RCU critical section */
381 static MemoryRegionSection *
382 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
383 hwaddr *plen, bool resolve_subpage)
385 MemoryRegionSection *section;
389 section = address_space_lookup_region(d, addr, resolve_subpage);
390 /* Compute offset within MemoryRegionSection */
391 addr -= section->offset_within_address_space;
393 /* Compute offset within MemoryRegion */
394 *xlat = addr + section->offset_within_region;
398 /* MMIO registers can be expected to perform full-width accesses based only
399 * on their address, without considering adjacent registers that could
400 * decode to completely different MemoryRegions. When such registers
401 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
402 * regions overlap wildly. For this reason we cannot clamp the accesses
405 * If the length is small (as is the case for address_space_ldl/stl),
406 * everything works fine. If the incoming length is large, however,
407 * the caller really has to do the clamping through memory_access_size.
409 if (memory_region_is_ram(mr)) {
410 diff = int128_sub(section->size, int128_make64(addr));
411 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
416 /* Called from RCU critical section */
417 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
418 hwaddr *xlat, hwaddr *plen,
422 MemoryRegionSection *section;
426 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
427 section = address_space_translate_internal(d, addr, &addr, plen, true);
430 if (!mr->iommu_ops) {
434 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
435 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
436 | (addr & iotlb.addr_mask));
437 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
438 if (!(iotlb.perm & (1 << is_write))) {
439 mr = &io_mem_unassigned;
443 as = iotlb.target_as;
446 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
447 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
448 *plen = MIN(page, *plen);
455 /* Called from RCU critical section */
456 MemoryRegionSection *
457 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
458 hwaddr *xlat, hwaddr *plen)
460 MemoryRegionSection *section;
461 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
463 section = address_space_translate_internal(d, addr, xlat, plen, false);
465 assert(!section->mr->iommu_ops);
470 #if !defined(CONFIG_USER_ONLY)
472 static int cpu_common_post_load(void *opaque, int version_id)
474 CPUState *cpu = opaque;
476 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
477 version_id is increased. */
478 cpu->interrupt_request &= ~0x01;
484 static int cpu_common_pre_load(void *opaque)
486 CPUState *cpu = opaque;
488 cpu->exception_index = -1;
493 static bool cpu_common_exception_index_needed(void *opaque)
495 CPUState *cpu = opaque;
497 return tcg_enabled() && cpu->exception_index != -1;
500 static const VMStateDescription vmstate_cpu_common_exception_index = {
501 .name = "cpu_common/exception_index",
503 .minimum_version_id = 1,
504 .needed = cpu_common_exception_index_needed,
505 .fields = (VMStateField[]) {
506 VMSTATE_INT32(exception_index, CPUState),
507 VMSTATE_END_OF_LIST()
511 static bool cpu_common_crash_occurred_needed(void *opaque)
513 CPUState *cpu = opaque;
515 return cpu->crash_occurred;
518 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
519 .name = "cpu_common/crash_occurred",
521 .minimum_version_id = 1,
522 .needed = cpu_common_crash_occurred_needed,
523 .fields = (VMStateField[]) {
524 VMSTATE_BOOL(crash_occurred, CPUState),
525 VMSTATE_END_OF_LIST()
529 const VMStateDescription vmstate_cpu_common = {
530 .name = "cpu_common",
532 .minimum_version_id = 1,
533 .pre_load = cpu_common_pre_load,
534 .post_load = cpu_common_post_load,
535 .fields = (VMStateField[]) {
536 VMSTATE_UINT32(halted, CPUState),
537 VMSTATE_UINT32(interrupt_request, CPUState),
538 VMSTATE_END_OF_LIST()
540 .subsections = (const VMStateDescription*[]) {
541 &vmstate_cpu_common_exception_index,
542 &vmstate_cpu_common_crash_occurred,
549 CPUState *qemu_get_cpu(int index)
554 if (cpu->cpu_index == index) {
562 #if !defined(CONFIG_USER_ONLY)
563 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
565 CPUAddressSpace *newas;
567 /* Target code should have set num_ases before calling us */
568 assert(asidx < cpu->num_ases);
571 /* address space 0 gets the convenience alias */
575 /* KVM cannot currently support multiple address spaces. */
576 assert(asidx == 0 || !kvm_enabled());
578 if (!cpu->cpu_ases) {
579 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
582 newas = &cpu->cpu_ases[asidx];
586 newas->tcg_as_listener.commit = tcg_commit;
587 memory_listener_register(&newas->tcg_as_listener, as);
591 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
593 /* Return the AddressSpace corresponding to the specified index */
594 return cpu->cpu_ases[asidx].as;
598 #ifndef CONFIG_USER_ONLY
599 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
601 static int cpu_get_free_index(Error **errp)
603 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
605 if (cpu >= MAX_CPUMASK_BITS) {
606 error_setg(errp, "Trying to use more CPUs than max of %d",
611 bitmap_set(cpu_index_map, cpu, 1);
615 void cpu_exec_exit(CPUState *cpu)
617 if (cpu->cpu_index == -1) {
618 /* cpu_index was never allocated by this @cpu or was already freed. */
622 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
627 static int cpu_get_free_index(Error **errp)
632 CPU_FOREACH(some_cpu) {
638 void cpu_exec_exit(CPUState *cpu)
643 void cpu_exec_init(CPUState *cpu, Error **errp)
645 CPUClass *cc = CPU_GET_CLASS(cpu);
646 Error *local_err = NULL;
651 #ifndef CONFIG_USER_ONLY
652 cpu->thread_id = qemu_get_thread_id();
654 /* This is a softmmu CPU object, so create a property for it
655 * so users can wire up its memory. (This can't go in qom/cpu.c
656 * because that file is compiled only once for both user-mode
657 * and system builds.) The default if no link is set up is to use
658 * the system address space.
660 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
661 (Object **)&cpu->memory,
662 qdev_prop_allow_set_link_before_realize,
663 OBJ_PROP_LINK_UNREF_ON_RELEASE,
665 cpu->memory = system_memory;
666 object_ref(OBJECT(cpu->memory));
669 #if defined(CONFIG_USER_ONLY)
672 cpu->cpu_index = cpu_get_free_index(&local_err);
674 error_propagate(errp, local_err);
675 #if defined(CONFIG_USER_ONLY)
680 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
681 #if defined(CONFIG_USER_ONLY)
685 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
686 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
688 if (cc->vmsd != NULL) {
689 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
694 #if defined(CONFIG_USER_ONLY)
695 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
697 tb_invalidate_phys_page_range(pc, pc + 1, 0);
700 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
703 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
704 int asidx = cpu_asidx_from_attrs(cpu, attrs);
706 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
707 phys | (pc & ~TARGET_PAGE_MASK));
712 #if defined(CONFIG_USER_ONLY)
713 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
718 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
724 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
728 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
729 int flags, CPUWatchpoint **watchpoint)
734 /* Add a watchpoint. */
735 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
736 int flags, CPUWatchpoint **watchpoint)
740 /* forbid ranges which are empty or run off the end of the address space */
741 if (len == 0 || (addr + len - 1) < addr) {
742 error_report("tried to set invalid watchpoint at %"
743 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
746 wp = g_malloc(sizeof(*wp));
752 /* keep all GDB-injected watchpoints in front */
753 if (flags & BP_GDB) {
754 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
756 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
759 tlb_flush_page(cpu, addr);
766 /* Remove a specific watchpoint. */
767 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
772 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
773 if (addr == wp->vaddr && len == wp->len
774 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
775 cpu_watchpoint_remove_by_ref(cpu, wp);
782 /* Remove a specific watchpoint by reference. */
783 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
785 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
787 tlb_flush_page(cpu, watchpoint->vaddr);
792 /* Remove all matching watchpoints. */
793 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
795 CPUWatchpoint *wp, *next;
797 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
798 if (wp->flags & mask) {
799 cpu_watchpoint_remove_by_ref(cpu, wp);
804 /* Return true if this watchpoint address matches the specified
805 * access (ie the address range covered by the watchpoint overlaps
806 * partially or completely with the address range covered by the
809 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
813 /* We know the lengths are non-zero, but a little caution is
814 * required to avoid errors in the case where the range ends
815 * exactly at the top of the address space and so addr + len
816 * wraps round to zero.
818 vaddr wpend = wp->vaddr + wp->len - 1;
819 vaddr addrend = addr + len - 1;
821 return !(addr > wpend || wp->vaddr > addrend);
826 /* Add a breakpoint. */
827 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
828 CPUBreakpoint **breakpoint)
832 bp = g_malloc(sizeof(*bp));
837 /* keep all GDB-injected breakpoints in front */
838 if (flags & BP_GDB) {
839 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
841 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
844 breakpoint_invalidate(cpu, pc);
852 /* Remove a specific breakpoint. */
853 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
857 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
858 if (bp->pc == pc && bp->flags == flags) {
859 cpu_breakpoint_remove_by_ref(cpu, bp);
866 /* Remove a specific breakpoint by reference. */
867 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
869 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
871 breakpoint_invalidate(cpu, breakpoint->pc);
876 /* Remove all matching breakpoints. */
877 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
879 CPUBreakpoint *bp, *next;
881 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
882 if (bp->flags & mask) {
883 cpu_breakpoint_remove_by_ref(cpu, bp);
888 /* enable or disable single step mode. EXCP_DEBUG is returned by the
889 CPU loop after each instruction */
890 void cpu_single_step(CPUState *cpu, int enabled)
892 if (cpu->singlestep_enabled != enabled) {
893 cpu->singlestep_enabled = enabled;
895 kvm_update_guest_debug(cpu, 0);
897 /* must flush all the translated code to avoid inconsistencies */
898 /* XXX: only flush what is necessary */
904 void cpu_abort(CPUState *cpu, const char *fmt, ...)
911 fprintf(stderr, "qemu: fatal: ");
912 vfprintf(stderr, fmt, ap);
913 fprintf(stderr, "\n");
914 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
915 if (qemu_log_separate()) {
916 qemu_log("qemu: fatal: ");
917 qemu_log_vprintf(fmt, ap2);
919 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
926 #if defined(CONFIG_USER_ONLY)
928 struct sigaction act;
929 sigfillset(&act.sa_mask);
930 act.sa_handler = SIG_DFL;
931 sigaction(SIGABRT, &act, NULL);
937 #if !defined(CONFIG_USER_ONLY)
938 /* Called from RCU critical section */
939 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
943 block = atomic_rcu_read(&ram_list.mru_block);
944 if (block && addr - block->offset < block->max_length) {
947 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
948 if (addr - block->offset < block->max_length) {
953 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
957 /* It is safe to write mru_block outside the iothread lock. This
962 * xxx removed from list
966 * call_rcu(reclaim_ramblock, xxx);
969 * atomic_rcu_set is not needed here. The block was already published
970 * when it was placed into the list. Here we're just making an extra
971 * copy of the pointer.
973 ram_list.mru_block = block;
977 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
984 end = TARGET_PAGE_ALIGN(start + length);
985 start &= TARGET_PAGE_MASK;
988 block = qemu_get_ram_block(start);
989 assert(block == qemu_get_ram_block(end - 1));
990 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
992 tlb_reset_dirty(cpu, start1, length);
997 /* Note: start and end must be within the same ram block. */
998 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
1002 DirtyMemoryBlocks *blocks;
1003 unsigned long end, page;
1010 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
1011 page = start >> TARGET_PAGE_BITS;
1015 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
1017 while (page < end) {
1018 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
1019 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
1020 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
1022 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
1029 if (dirty && tcg_enabled()) {
1030 tlb_reset_dirty_range_all(start, length);
1036 /* Called from RCU critical section */
1037 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1038 MemoryRegionSection *section,
1040 hwaddr paddr, hwaddr xlat,
1042 target_ulong *address)
1047 if (memory_region_is_ram(section->mr)) {
1049 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1050 if (!section->readonly) {
1051 iotlb |= PHYS_SECTION_NOTDIRTY;
1053 iotlb |= PHYS_SECTION_ROM;
1056 AddressSpaceDispatch *d;
1058 d = atomic_rcu_read(§ion->address_space->dispatch);
1059 iotlb = section - d->map.sections;
1063 /* Make accesses to pages with watchpoints go via the
1064 watchpoint trap routines. */
1065 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1066 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1067 /* Avoid trapping reads of pages with a write breakpoint. */
1068 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1069 iotlb = PHYS_SECTION_WATCH + paddr;
1070 *address |= TLB_MMIO;
1078 #endif /* defined(CONFIG_USER_ONLY) */
1080 #if !defined(CONFIG_USER_ONLY)
1082 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1084 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1086 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1087 qemu_anon_ram_alloc;
1090 * Set a custom physical guest memory alloator.
1091 * Accelerators with unusual needs may need this. Hopefully, we can
1092 * get rid of it eventually.
1094 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1096 phys_mem_alloc = alloc;
1099 static uint16_t phys_section_add(PhysPageMap *map,
1100 MemoryRegionSection *section)
1102 /* The physical section number is ORed with a page-aligned
1103 * pointer to produce the iotlb entries. Thus it should
1104 * never overflow into the page-aligned value.
1106 assert(map->sections_nb < TARGET_PAGE_SIZE);
1108 if (map->sections_nb == map->sections_nb_alloc) {
1109 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1110 map->sections = g_renew(MemoryRegionSection, map->sections,
1111 map->sections_nb_alloc);
1113 map->sections[map->sections_nb] = *section;
1114 memory_region_ref(section->mr);
1115 return map->sections_nb++;
1118 static void phys_section_destroy(MemoryRegion *mr)
1120 bool have_sub_page = mr->subpage;
1122 memory_region_unref(mr);
1124 if (have_sub_page) {
1125 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1126 object_unref(OBJECT(&subpage->iomem));
1131 static void phys_sections_free(PhysPageMap *map)
1133 while (map->sections_nb > 0) {
1134 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1135 phys_section_destroy(section->mr);
1137 g_free(map->sections);
1141 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1144 hwaddr base = section->offset_within_address_space
1146 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1147 d->map.nodes, d->map.sections);
1148 MemoryRegionSection subsection = {
1149 .offset_within_address_space = base,
1150 .size = int128_make64(TARGET_PAGE_SIZE),
1154 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1156 if (!(existing->mr->subpage)) {
1157 subpage = subpage_init(d->as, base);
1158 subsection.address_space = d->as;
1159 subsection.mr = &subpage->iomem;
1160 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1161 phys_section_add(&d->map, &subsection));
1163 subpage = container_of(existing->mr, subpage_t, iomem);
1165 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1166 end = start + int128_get64(section->size) - 1;
1167 subpage_register(subpage, start, end,
1168 phys_section_add(&d->map, section));
1172 static void register_multipage(AddressSpaceDispatch *d,
1173 MemoryRegionSection *section)
1175 hwaddr start_addr = section->offset_within_address_space;
1176 uint16_t section_index = phys_section_add(&d->map, section);
1177 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1181 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1184 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1186 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1187 AddressSpaceDispatch *d = as->next_dispatch;
1188 MemoryRegionSection now = *section, remain = *section;
1189 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1191 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1192 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1193 - now.offset_within_address_space;
1195 now.size = int128_min(int128_make64(left), now.size);
1196 register_subpage(d, &now);
1198 now.size = int128_zero();
1200 while (int128_ne(remain.size, now.size)) {
1201 remain.size = int128_sub(remain.size, now.size);
1202 remain.offset_within_address_space += int128_get64(now.size);
1203 remain.offset_within_region += int128_get64(now.size);
1205 if (int128_lt(remain.size, page_size)) {
1206 register_subpage(d, &now);
1207 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1208 now.size = page_size;
1209 register_subpage(d, &now);
1211 now.size = int128_and(now.size, int128_neg(page_size));
1212 register_multipage(d, &now);
1217 void qemu_flush_coalesced_mmio_buffer(void)
1220 kvm_flush_coalesced_mmio_buffer();
1223 void qemu_mutex_lock_ramlist(void)
1225 qemu_mutex_lock(&ram_list.mutex);
1228 void qemu_mutex_unlock_ramlist(void)
1230 qemu_mutex_unlock(&ram_list.mutex);
1234 static void *file_ram_alloc(RAMBlock *block,
1239 bool unlink_on_error = false;
1241 char *sanitized_name;
1247 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1249 "host lacks kvm mmu notifiers, -mem-path unsupported");
1254 fd = open(path, O_RDWR);
1256 /* @path names an existing file, use it */
1259 if (errno == ENOENT) {
1260 /* @path names a file that doesn't exist, create it */
1261 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1263 unlink_on_error = true;
1266 } else if (errno == EISDIR) {
1267 /* @path names a directory, create a file there */
1268 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1269 sanitized_name = g_strdup(memory_region_name(block->mr));
1270 for (c = sanitized_name; *c != '\0'; c++) {
1276 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1278 g_free(sanitized_name);
1280 fd = mkstemp(filename);
1288 if (errno != EEXIST && errno != EINTR) {
1289 error_setg_errno(errp, errno,
1290 "can't open backing store %s for guest RAM",
1295 * Try again on EINTR and EEXIST. The latter happens when
1296 * something else creates the file between our two open().
1300 page_size = qemu_fd_getpagesize(fd);
1301 block->mr->align = MAX(page_size, QEMU_VMALLOC_ALIGN);
1303 if (memory < page_size) {
1304 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1305 "or larger than page size 0x%" PRIx64,
1310 memory = ROUND_UP(memory, page_size);
1313 * ftruncate is not supported by hugetlbfs in older
1314 * hosts, so don't bother bailing out on errors.
1315 * If anything goes wrong with it under other filesystems,
1318 if (ftruncate(fd, memory)) {
1319 perror("ftruncate");
1322 area = qemu_ram_mmap(fd, memory, block->mr->align,
1323 block->flags & RAM_SHARED);
1324 if (area == MAP_FAILED) {
1325 error_setg_errno(errp, errno,
1326 "unable to map backing store for guest RAM");
1331 os_mem_prealloc(fd, area, memory);
1338 if (unlink_on_error) {
1348 /* Called with the ramlist lock held. */
1349 static ram_addr_t find_ram_offset(ram_addr_t size)
1351 RAMBlock *block, *next_block;
1352 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1354 assert(size != 0); /* it would hand out same offset multiple times */
1356 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1360 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1361 ram_addr_t end, next = RAM_ADDR_MAX;
1363 end = block->offset + block->max_length;
1365 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1366 if (next_block->offset >= end) {
1367 next = MIN(next, next_block->offset);
1370 if (next - end >= size && next - end < mingap) {
1372 mingap = next - end;
1376 if (offset == RAM_ADDR_MAX) {
1377 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1385 ram_addr_t last_ram_offset(void)
1388 ram_addr_t last = 0;
1391 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1392 last = MAX(last, block->offset + block->max_length);
1398 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1402 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1403 if (!machine_dump_guest_core(current_machine)) {
1404 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1406 perror("qemu_madvise");
1407 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1408 "but dump_guest_core=off specified\n");
1413 const char *qemu_ram_get_idstr(RAMBlock *rb)
1418 /* Called with iothread lock held. */
1419 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1424 assert(!new_block->idstr[0]);
1427 char *id = qdev_get_dev_path(dev);
1429 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1433 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1436 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1437 if (block != new_block &&
1438 !strcmp(block->idstr, new_block->idstr)) {
1439 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1447 /* Called with iothread lock held. */
1448 void qemu_ram_unset_idstr(RAMBlock *block)
1450 /* FIXME: arch_init.c assumes that this is not called throughout
1451 * migration. Ignore the problem since hot-unplug during migration
1452 * does not work anyway.
1455 memset(block->idstr, 0, sizeof(block->idstr));
1459 static int memory_try_enable_merging(void *addr, size_t len)
1461 if (!machine_mem_merge(current_machine)) {
1462 /* disabled by the user */
1466 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1469 /* Only legal before guest might have detected the memory size: e.g. on
1470 * incoming migration, or right after reset.
1472 * As memory core doesn't know how is memory accessed, it is up to
1473 * resize callback to update device state and/or add assertions to detect
1474 * misuse, if necessary.
1476 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1480 newsize = HOST_PAGE_ALIGN(newsize);
1482 if (block->used_length == newsize) {
1486 if (!(block->flags & RAM_RESIZEABLE)) {
1487 error_setg_errno(errp, EINVAL,
1488 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1489 " in != 0x" RAM_ADDR_FMT, block->idstr,
1490 newsize, block->used_length);
1494 if (block->max_length < newsize) {
1495 error_setg_errno(errp, EINVAL,
1496 "Length too large: %s: 0x" RAM_ADDR_FMT
1497 " > 0x" RAM_ADDR_FMT, block->idstr,
1498 newsize, block->max_length);
1502 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1503 block->used_length = newsize;
1504 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1506 memory_region_set_size(block->mr, newsize);
1507 if (block->resized) {
1508 block->resized(block->idstr, newsize, block->host);
1513 /* Called with ram_list.mutex held */
1514 static void dirty_memory_extend(ram_addr_t old_ram_size,
1515 ram_addr_t new_ram_size)
1517 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1518 DIRTY_MEMORY_BLOCK_SIZE);
1519 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1520 DIRTY_MEMORY_BLOCK_SIZE);
1523 /* Only need to extend if block count increased */
1524 if (new_num_blocks <= old_num_blocks) {
1528 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1529 DirtyMemoryBlocks *old_blocks;
1530 DirtyMemoryBlocks *new_blocks;
1533 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1534 new_blocks = g_malloc(sizeof(*new_blocks) +
1535 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1537 if (old_num_blocks) {
1538 memcpy(new_blocks->blocks, old_blocks->blocks,
1539 old_num_blocks * sizeof(old_blocks->blocks[0]));
1542 for (j = old_num_blocks; j < new_num_blocks; j++) {
1543 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1546 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1549 g_free_rcu(old_blocks, rcu);
1554 static void ram_block_add(RAMBlock *new_block, Error **errp)
1557 RAMBlock *last_block = NULL;
1558 ram_addr_t old_ram_size, new_ram_size;
1561 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1563 qemu_mutex_lock_ramlist();
1564 new_block->offset = find_ram_offset(new_block->max_length);
1566 if (!new_block->host) {
1567 if (xen_enabled()) {
1568 xen_ram_alloc(new_block->offset, new_block->max_length,
1569 new_block->mr, &err);
1571 error_propagate(errp, err);
1572 qemu_mutex_unlock_ramlist();
1576 new_block->host = phys_mem_alloc(new_block->max_length,
1577 &new_block->mr->align);
1578 if (!new_block->host) {
1579 error_setg_errno(errp, errno,
1580 "cannot set up guest memory '%s'",
1581 memory_region_name(new_block->mr));
1582 qemu_mutex_unlock_ramlist();
1585 memory_try_enable_merging(new_block->host, new_block->max_length);
1589 new_ram_size = MAX(old_ram_size,
1590 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1591 if (new_ram_size > old_ram_size) {
1592 migration_bitmap_extend(old_ram_size, new_ram_size);
1593 dirty_memory_extend(old_ram_size, new_ram_size);
1595 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1596 * QLIST (which has an RCU-friendly variant) does not have insertion at
1597 * tail, so save the last element in last_block.
1599 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1601 if (block->max_length < new_block->max_length) {
1606 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1607 } else if (last_block) {
1608 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1609 } else { /* list is empty */
1610 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1612 ram_list.mru_block = NULL;
1614 /* Write list before version */
1617 qemu_mutex_unlock_ramlist();
1619 cpu_physical_memory_set_dirty_range(new_block->offset,
1620 new_block->used_length,
1623 if (new_block->host) {
1624 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1625 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1626 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1627 if (kvm_enabled()) {
1628 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1634 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1635 bool share, const char *mem_path,
1638 RAMBlock *new_block;
1639 Error *local_err = NULL;
1641 if (xen_enabled()) {
1642 error_setg(errp, "-mem-path not supported with Xen");
1646 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1648 * file_ram_alloc() needs to allocate just like
1649 * phys_mem_alloc, but we haven't bothered to provide
1653 "-mem-path not supported with this accelerator");
1657 size = HOST_PAGE_ALIGN(size);
1658 new_block = g_malloc0(sizeof(*new_block));
1660 new_block->used_length = size;
1661 new_block->max_length = size;
1662 new_block->flags = share ? RAM_SHARED : 0;
1663 new_block->host = file_ram_alloc(new_block, size,
1665 if (!new_block->host) {
1670 ram_block_add(new_block, &local_err);
1673 error_propagate(errp, local_err);
1681 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1682 void (*resized)(const char*,
1685 void *host, bool resizeable,
1686 MemoryRegion *mr, Error **errp)
1688 RAMBlock *new_block;
1689 Error *local_err = NULL;
1691 size = HOST_PAGE_ALIGN(size);
1692 max_size = HOST_PAGE_ALIGN(max_size);
1693 new_block = g_malloc0(sizeof(*new_block));
1695 new_block->resized = resized;
1696 new_block->used_length = size;
1697 new_block->max_length = max_size;
1698 assert(max_size >= size);
1700 new_block->host = host;
1702 new_block->flags |= RAM_PREALLOC;
1705 new_block->flags |= RAM_RESIZEABLE;
1707 ram_block_add(new_block, &local_err);
1710 error_propagate(errp, local_err);
1716 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1717 MemoryRegion *mr, Error **errp)
1719 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1722 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1724 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1727 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1728 void (*resized)(const char*,
1731 MemoryRegion *mr, Error **errp)
1733 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1736 static void reclaim_ramblock(RAMBlock *block)
1738 if (block->flags & RAM_PREALLOC) {
1740 } else if (xen_enabled()) {
1741 xen_invalidate_map_cache_entry(block->host);
1743 } else if (block->fd >= 0) {
1744 qemu_ram_munmap(block->host, block->max_length);
1748 qemu_anon_ram_free(block->host, block->max_length);
1753 void qemu_ram_free(RAMBlock *block)
1759 qemu_mutex_lock_ramlist();
1760 QLIST_REMOVE_RCU(block, next);
1761 ram_list.mru_block = NULL;
1762 /* Write list before version */
1765 call_rcu(block, reclaim_ramblock, rcu);
1766 qemu_mutex_unlock_ramlist();
1770 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1777 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1778 offset = addr - block->offset;
1779 if (offset < block->max_length) {
1780 vaddr = ramblock_ptr(block, offset);
1781 if (block->flags & RAM_PREALLOC) {
1783 } else if (xen_enabled()) {
1787 if (block->fd >= 0) {
1788 flags |= (block->flags & RAM_SHARED ?
1789 MAP_SHARED : MAP_PRIVATE);
1790 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1791 flags, block->fd, offset);
1794 * Remap needs to match alloc. Accelerators that
1795 * set phys_mem_alloc never remap. If they did,
1796 * we'd need a remap hook here.
1798 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1800 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1801 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1804 if (area != vaddr) {
1805 fprintf(stderr, "Could not remap addr: "
1806 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1810 memory_try_enable_merging(vaddr, length);
1811 qemu_ram_setup_dump(vaddr, length);
1816 #endif /* !_WIN32 */
1818 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1819 * This should not be used for general purpose DMA. Use address_space_map
1820 * or address_space_rw instead. For local memory (e.g. video ram) that the
1821 * device owns, use memory_region_get_ram_ptr.
1823 * Called within RCU critical section.
1825 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1827 RAMBlock *block = ram_block;
1829 if (block == NULL) {
1830 block = qemu_get_ram_block(addr);
1831 addr -= block->offset;
1834 if (xen_enabled() && block->host == NULL) {
1835 /* We need to check if the requested address is in the RAM
1836 * because we don't want to map the entire memory in QEMU.
1837 * In that case just map until the end of the page.
1839 if (block->offset == 0) {
1840 return xen_map_cache(addr, 0, 0);
1843 block->host = xen_map_cache(block->offset, block->max_length, 1);
1845 return ramblock_ptr(block, addr);
1848 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1849 * but takes a size argument.
1851 * Called within RCU critical section.
1853 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1856 RAMBlock *block = ram_block;
1861 if (block == NULL) {
1862 block = qemu_get_ram_block(addr);
1863 addr -= block->offset;
1865 *size = MIN(*size, block->max_length - addr);
1867 if (xen_enabled() && block->host == NULL) {
1868 /* We need to check if the requested address is in the RAM
1869 * because we don't want to map the entire memory in QEMU.
1870 * In that case just map the requested area.
1872 if (block->offset == 0) {
1873 return xen_map_cache(addr, *size, 1);
1876 block->host = xen_map_cache(block->offset, block->max_length, 1);
1879 return ramblock_ptr(block, addr);
1883 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1886 * ptr: Host pointer to look up
1887 * round_offset: If true round the result offset down to a page boundary
1888 * *ram_addr: set to result ram_addr
1889 * *offset: set to result offset within the RAMBlock
1891 * Returns: RAMBlock (or NULL if not found)
1893 * By the time this function returns, the returned pointer is not protected
1894 * by RCU anymore. If the caller is not within an RCU critical section and
1895 * does not hold the iothread lock, it must have other means of protecting the
1896 * pointer, such as a reference to the region that includes the incoming
1899 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1903 uint8_t *host = ptr;
1905 if (xen_enabled()) {
1906 ram_addr_t ram_addr;
1908 ram_addr = xen_ram_addr_from_mapcache(ptr);
1909 block = qemu_get_ram_block(ram_addr);
1911 *offset = (host - block->host);
1918 block = atomic_rcu_read(&ram_list.mru_block);
1919 if (block && block->host && host - block->host < block->max_length) {
1923 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1924 /* This case append when the block is not mapped. */
1925 if (block->host == NULL) {
1928 if (host - block->host < block->max_length) {
1937 *offset = (host - block->host);
1939 *offset &= TARGET_PAGE_MASK;
1946 * Finds the named RAMBlock
1948 * name: The name of RAMBlock to find
1950 * Returns: RAMBlock (or NULL if not found)
1952 RAMBlock *qemu_ram_block_by_name(const char *name)
1956 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1957 if (!strcmp(name, block->idstr)) {
1965 /* Some of the softmmu routines need to translate from a host pointer
1966 (typically a TLB entry) back to a ram offset. */
1967 ram_addr_t qemu_ram_addr_from_host(void *ptr)
1972 block = qemu_ram_block_from_host(ptr, false, &offset);
1974 return RAM_ADDR_INVALID;
1977 return block->offset + offset;
1980 /* Called within RCU critical section. */
1981 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1982 uint64_t val, unsigned size)
1984 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1985 tb_invalidate_phys_page_fast(ram_addr, size);
1989 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
1992 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
1995 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
2000 /* Set both VGA and migration bits for simplicity and to remove
2001 * the notdirty callback faster.
2003 cpu_physical_memory_set_dirty_range(ram_addr, size,
2004 DIRTY_CLIENTS_NOCODE);
2005 /* we remove the notdirty callback only if the code has been
2007 if (!cpu_physical_memory_is_clean(ram_addr)) {
2008 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
2012 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
2013 unsigned size, bool is_write)
2018 static const MemoryRegionOps notdirty_mem_ops = {
2019 .write = notdirty_mem_write,
2020 .valid.accepts = notdirty_mem_accepts,
2021 .endianness = DEVICE_NATIVE_ENDIAN,
2024 /* Generate a debug exception if a watchpoint has been hit. */
2025 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2027 CPUState *cpu = current_cpu;
2028 CPUClass *cc = CPU_GET_CLASS(cpu);
2029 CPUArchState *env = cpu->env_ptr;
2030 target_ulong pc, cs_base;
2035 if (cpu->watchpoint_hit) {
2036 /* We re-entered the check after replacing the TB. Now raise
2037 * the debug interrupt so that is will trigger after the
2038 * current instruction. */
2039 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2042 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2043 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2044 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2045 && (wp->flags & flags)) {
2046 if (flags == BP_MEM_READ) {
2047 wp->flags |= BP_WATCHPOINT_HIT_READ;
2049 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2051 wp->hitaddr = vaddr;
2052 wp->hitattrs = attrs;
2053 if (!cpu->watchpoint_hit) {
2054 if (wp->flags & BP_CPU &&
2055 !cc->debug_check_watchpoint(cpu, wp)) {
2056 wp->flags &= ~BP_WATCHPOINT_HIT;
2059 cpu->watchpoint_hit = wp;
2060 tb_check_watchpoint(cpu);
2061 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2062 cpu->exception_index = EXCP_DEBUG;
2065 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2066 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2067 cpu_resume_from_signal(cpu, NULL);
2071 wp->flags &= ~BP_WATCHPOINT_HIT;
2076 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2077 so these check for a hit then pass through to the normal out-of-line
2079 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2080 unsigned size, MemTxAttrs attrs)
2084 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2085 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2087 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2090 data = address_space_ldub(as, addr, attrs, &res);
2093 data = address_space_lduw(as, addr, attrs, &res);
2096 data = address_space_ldl(as, addr, attrs, &res);
2104 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2105 uint64_t val, unsigned size,
2109 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2110 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2112 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2115 address_space_stb(as, addr, val, attrs, &res);
2118 address_space_stw(as, addr, val, attrs, &res);
2121 address_space_stl(as, addr, val, attrs, &res);
2128 static const MemoryRegionOps watch_mem_ops = {
2129 .read_with_attrs = watch_mem_read,
2130 .write_with_attrs = watch_mem_write,
2131 .endianness = DEVICE_NATIVE_ENDIAN,
2134 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2135 unsigned len, MemTxAttrs attrs)
2137 subpage_t *subpage = opaque;
2141 #if defined(DEBUG_SUBPAGE)
2142 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2143 subpage, len, addr);
2145 res = address_space_read(subpage->as, addr + subpage->base,
2152 *data = ldub_p(buf);
2155 *data = lduw_p(buf);
2168 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2169 uint64_t value, unsigned len, MemTxAttrs attrs)
2171 subpage_t *subpage = opaque;
2174 #if defined(DEBUG_SUBPAGE)
2175 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2176 " value %"PRIx64"\n",
2177 __func__, subpage, len, addr, value);
2195 return address_space_write(subpage->as, addr + subpage->base,
2199 static bool subpage_accepts(void *opaque, hwaddr addr,
2200 unsigned len, bool is_write)
2202 subpage_t *subpage = opaque;
2203 #if defined(DEBUG_SUBPAGE)
2204 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2205 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2208 return address_space_access_valid(subpage->as, addr + subpage->base,
2212 static const MemoryRegionOps subpage_ops = {
2213 .read_with_attrs = subpage_read,
2214 .write_with_attrs = subpage_write,
2215 .impl.min_access_size = 1,
2216 .impl.max_access_size = 8,
2217 .valid.min_access_size = 1,
2218 .valid.max_access_size = 8,
2219 .valid.accepts = subpage_accepts,
2220 .endianness = DEVICE_NATIVE_ENDIAN,
2223 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2228 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2230 idx = SUBPAGE_IDX(start);
2231 eidx = SUBPAGE_IDX(end);
2232 #if defined(DEBUG_SUBPAGE)
2233 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2234 __func__, mmio, start, end, idx, eidx, section);
2236 for (; idx <= eidx; idx++) {
2237 mmio->sub_section[idx] = section;
2243 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2247 mmio = g_malloc0(sizeof(subpage_t));
2251 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2252 NULL, TARGET_PAGE_SIZE);
2253 mmio->iomem.subpage = true;
2254 #if defined(DEBUG_SUBPAGE)
2255 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2256 mmio, base, TARGET_PAGE_SIZE);
2258 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2263 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2267 MemoryRegionSection section = {
2268 .address_space = as,
2270 .offset_within_address_space = 0,
2271 .offset_within_region = 0,
2272 .size = int128_2_64(),
2275 return phys_section_add(map, §ion);
2278 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2280 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2281 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2282 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2283 MemoryRegionSection *sections = d->map.sections;
2285 return sections[index & ~TARGET_PAGE_MASK].mr;
2288 static void io_mem_init(void)
2290 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2291 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2293 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
2295 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2299 static void mem_begin(MemoryListener *listener)
2301 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2302 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2305 n = dummy_section(&d->map, as, &io_mem_unassigned);
2306 assert(n == PHYS_SECTION_UNASSIGNED);
2307 n = dummy_section(&d->map, as, &io_mem_notdirty);
2308 assert(n == PHYS_SECTION_NOTDIRTY);
2309 n = dummy_section(&d->map, as, &io_mem_rom);
2310 assert(n == PHYS_SECTION_ROM);
2311 n = dummy_section(&d->map, as, &io_mem_watch);
2312 assert(n == PHYS_SECTION_WATCH);
2314 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2316 as->next_dispatch = d;
2319 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2321 phys_sections_free(&d->map);
2325 static void mem_commit(MemoryListener *listener)
2327 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2328 AddressSpaceDispatch *cur = as->dispatch;
2329 AddressSpaceDispatch *next = as->next_dispatch;
2331 phys_page_compact_all(next, next->map.nodes_nb);
2333 atomic_rcu_set(&as->dispatch, next);
2335 call_rcu(cur, address_space_dispatch_free, rcu);
2339 static void tcg_commit(MemoryListener *listener)
2341 CPUAddressSpace *cpuas;
2342 AddressSpaceDispatch *d;
2344 /* since each CPU stores ram addresses in its TLB cache, we must
2345 reset the modified entries */
2346 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2347 cpu_reloading_memory_map();
2348 /* The CPU and TLB are protected by the iothread lock.
2349 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2350 * may have split the RCU critical section.
2352 d = atomic_rcu_read(&cpuas->as->dispatch);
2353 cpuas->memory_dispatch = d;
2354 tlb_flush(cpuas->cpu, 1);
2357 void address_space_init_dispatch(AddressSpace *as)
2359 as->dispatch = NULL;
2360 as->dispatch_listener = (MemoryListener) {
2362 .commit = mem_commit,
2363 .region_add = mem_add,
2364 .region_nop = mem_add,
2367 memory_listener_register(&as->dispatch_listener, as);
2370 void address_space_unregister(AddressSpace *as)
2372 memory_listener_unregister(&as->dispatch_listener);
2375 void address_space_destroy_dispatch(AddressSpace *as)
2377 AddressSpaceDispatch *d = as->dispatch;
2379 atomic_rcu_set(&as->dispatch, NULL);
2381 call_rcu(d, address_space_dispatch_free, rcu);
2385 static void memory_map_init(void)
2387 system_memory = g_malloc(sizeof(*system_memory));
2389 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2390 address_space_init(&address_space_memory, system_memory, "memory");
2392 system_io = g_malloc(sizeof(*system_io));
2393 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2395 address_space_init(&address_space_io, system_io, "I/O");
2398 MemoryRegion *get_system_memory(void)
2400 return system_memory;
2403 MemoryRegion *get_system_io(void)
2408 #endif /* !defined(CONFIG_USER_ONLY) */
2410 /* physical memory access (slow version, mainly for debug) */
2411 #if defined(CONFIG_USER_ONLY)
2412 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2413 uint8_t *buf, int len, int is_write)
2420 page = addr & TARGET_PAGE_MASK;
2421 l = (page + TARGET_PAGE_SIZE) - addr;
2424 flags = page_get_flags(page);
2425 if (!(flags & PAGE_VALID))
2428 if (!(flags & PAGE_WRITE))
2430 /* XXX: this code should not depend on lock_user */
2431 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2434 unlock_user(p, addr, l);
2436 if (!(flags & PAGE_READ))
2438 /* XXX: this code should not depend on lock_user */
2439 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2442 unlock_user(p, addr, 0);
2453 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2456 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2457 addr += memory_region_get_ram_addr(mr);
2459 /* No early return if dirty_log_mask is or becomes 0, because
2460 * cpu_physical_memory_set_dirty_range will still call
2461 * xen_modified_memory.
2463 if (dirty_log_mask) {
2465 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2467 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2468 tb_invalidate_phys_range(addr, addr + length);
2469 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2471 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2474 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2476 unsigned access_size_max = mr->ops->valid.max_access_size;
2478 /* Regions are assumed to support 1-4 byte accesses unless
2479 otherwise specified. */
2480 if (access_size_max == 0) {
2481 access_size_max = 4;
2484 /* Bound the maximum access by the alignment of the address. */
2485 if (!mr->ops->impl.unaligned) {
2486 unsigned align_size_max = addr & -addr;
2487 if (align_size_max != 0 && align_size_max < access_size_max) {
2488 access_size_max = align_size_max;
2492 /* Don't attempt accesses larger than the maximum. */
2493 if (l > access_size_max) {
2494 l = access_size_max;
2501 static bool prepare_mmio_access(MemoryRegion *mr)
2503 bool unlocked = !qemu_mutex_iothread_locked();
2504 bool release_lock = false;
2506 if (unlocked && mr->global_locking) {
2507 qemu_mutex_lock_iothread();
2509 release_lock = true;
2511 if (mr->flush_coalesced_mmio) {
2513 qemu_mutex_lock_iothread();
2515 qemu_flush_coalesced_mmio_buffer();
2517 qemu_mutex_unlock_iothread();
2521 return release_lock;
2524 /* Called within RCU critical section. */
2525 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2528 int len, hwaddr addr1,
2529 hwaddr l, MemoryRegion *mr)
2533 MemTxResult result = MEMTX_OK;
2534 bool release_lock = false;
2537 if (!memory_access_is_direct(mr, true)) {
2538 release_lock |= prepare_mmio_access(mr);
2539 l = memory_access_size(mr, l, addr1);
2540 /* XXX: could force current_cpu to NULL to avoid
2544 /* 64 bit write access */
2546 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2550 /* 32 bit write access */
2552 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2556 /* 16 bit write access */
2558 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2562 /* 8 bit write access */
2564 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2572 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2573 memcpy(ptr, buf, l);
2574 invalidate_and_set_dirty(mr, addr1, l);
2578 qemu_mutex_unlock_iothread();
2579 release_lock = false;
2591 mr = address_space_translate(as, addr, &addr1, &l, true);
2597 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2598 const uint8_t *buf, int len)
2603 MemTxResult result = MEMTX_OK;
2608 mr = address_space_translate(as, addr, &addr1, &l, true);
2609 result = address_space_write_continue(as, addr, attrs, buf, len,
2617 /* Called within RCU critical section. */
2618 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2619 MemTxAttrs attrs, uint8_t *buf,
2620 int len, hwaddr addr1, hwaddr l,
2625 MemTxResult result = MEMTX_OK;
2626 bool release_lock = false;
2629 if (!memory_access_is_direct(mr, false)) {
2631 release_lock |= prepare_mmio_access(mr);
2632 l = memory_access_size(mr, l, addr1);
2635 /* 64 bit read access */
2636 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2641 /* 32 bit read access */
2642 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2647 /* 16 bit read access */
2648 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2653 /* 8 bit read access */
2654 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2663 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2664 memcpy(buf, ptr, l);
2668 qemu_mutex_unlock_iothread();
2669 release_lock = false;
2681 mr = address_space_translate(as, addr, &addr1, &l, false);
2687 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2688 MemTxAttrs attrs, uint8_t *buf, int len)
2693 MemTxResult result = MEMTX_OK;
2698 mr = address_space_translate(as, addr, &addr1, &l, false);
2699 result = address_space_read_continue(as, addr, attrs, buf, len,
2707 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2708 uint8_t *buf, int len, bool is_write)
2711 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2713 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2717 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2718 int len, int is_write)
2720 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2721 buf, len, is_write);
2724 enum write_rom_type {
2729 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2730 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2740 mr = address_space_translate(as, addr, &addr1, &l, true);
2742 if (!(memory_region_is_ram(mr) ||
2743 memory_region_is_romd(mr))) {
2744 l = memory_access_size(mr, l, addr1);
2747 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2750 memcpy(ptr, buf, l);
2751 invalidate_and_set_dirty(mr, addr1, l);
2754 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2765 /* used for ROM loading : can write in RAM and ROM */
2766 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2767 const uint8_t *buf, int len)
2769 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2772 void cpu_flush_icache_range(hwaddr start, int len)
2775 * This function should do the same thing as an icache flush that was
2776 * triggered from within the guest. For TCG we are always cache coherent,
2777 * so there is no need to flush anything. For KVM / Xen we need to flush
2778 * the host's instruction cache at least.
2780 if (tcg_enabled()) {
2784 cpu_physical_memory_write_rom_internal(&address_space_memory,
2785 start, NULL, len, FLUSH_CACHE);
2796 static BounceBuffer bounce;
2798 typedef struct MapClient {
2800 QLIST_ENTRY(MapClient) link;
2803 QemuMutex map_client_list_lock;
2804 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2805 = QLIST_HEAD_INITIALIZER(map_client_list);
2807 static void cpu_unregister_map_client_do(MapClient *client)
2809 QLIST_REMOVE(client, link);
2813 static void cpu_notify_map_clients_locked(void)
2817 while (!QLIST_EMPTY(&map_client_list)) {
2818 client = QLIST_FIRST(&map_client_list);
2819 qemu_bh_schedule(client->bh);
2820 cpu_unregister_map_client_do(client);
2824 void cpu_register_map_client(QEMUBH *bh)
2826 MapClient *client = g_malloc(sizeof(*client));
2828 qemu_mutex_lock(&map_client_list_lock);
2830 QLIST_INSERT_HEAD(&map_client_list, client, link);
2831 if (!atomic_read(&bounce.in_use)) {
2832 cpu_notify_map_clients_locked();
2834 qemu_mutex_unlock(&map_client_list_lock);
2837 void cpu_exec_init_all(void)
2839 qemu_mutex_init(&ram_list.mutex);
2842 qemu_mutex_init(&map_client_list_lock);
2845 void cpu_unregister_map_client(QEMUBH *bh)
2849 qemu_mutex_lock(&map_client_list_lock);
2850 QLIST_FOREACH(client, &map_client_list, link) {
2851 if (client->bh == bh) {
2852 cpu_unregister_map_client_do(client);
2856 qemu_mutex_unlock(&map_client_list_lock);
2859 static void cpu_notify_map_clients(void)
2861 qemu_mutex_lock(&map_client_list_lock);
2862 cpu_notify_map_clients_locked();
2863 qemu_mutex_unlock(&map_client_list_lock);
2866 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2874 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2875 if (!memory_access_is_direct(mr, is_write)) {
2876 l = memory_access_size(mr, l, addr);
2877 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2889 /* Map a physical memory region into a host virtual address.
2890 * May map a subset of the requested range, given by and returned in *plen.
2891 * May return NULL if resources needed to perform the mapping are exhausted.
2892 * Use only for reads OR writes - not for read-modify-write operations.
2893 * Use cpu_register_map_client() to know when retrying the map operation is
2894 * likely to succeed.
2896 void *address_space_map(AddressSpace *as,
2903 hwaddr l, xlat, base;
2904 MemoryRegion *mr, *this_mr;
2913 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2915 if (!memory_access_is_direct(mr, is_write)) {
2916 if (atomic_xchg(&bounce.in_use, true)) {
2920 /* Avoid unbounded allocations */
2921 l = MIN(l, TARGET_PAGE_SIZE);
2922 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2926 memory_region_ref(mr);
2929 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2935 return bounce.buffer;
2949 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2950 if (this_mr != mr || xlat != base + done) {
2955 memory_region_ref(mr);
2957 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
2963 /* Unmaps a memory region previously mapped by address_space_map().
2964 * Will also mark the memory as dirty if is_write == 1. access_len gives
2965 * the amount of memory that was actually read or written by the caller.
2967 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2968 int is_write, hwaddr access_len)
2970 if (buffer != bounce.buffer) {
2974 mr = memory_region_from_host(buffer, &addr1);
2977 invalidate_and_set_dirty(mr, addr1, access_len);
2979 if (xen_enabled()) {
2980 xen_invalidate_map_cache_entry(buffer);
2982 memory_region_unref(mr);
2986 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2987 bounce.buffer, access_len);
2989 qemu_vfree(bounce.buffer);
2990 bounce.buffer = NULL;
2991 memory_region_unref(bounce.mr);
2992 atomic_mb_set(&bounce.in_use, false);
2993 cpu_notify_map_clients();
2996 void *cpu_physical_memory_map(hwaddr addr,
3000 return address_space_map(&address_space_memory, addr, plen, is_write);
3003 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3004 int is_write, hwaddr access_len)
3006 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3009 /* warning: addr must be aligned */
3010 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
3012 MemTxResult *result,
3013 enum device_endian endian)
3021 bool release_lock = false;
3024 mr = address_space_translate(as, addr, &addr1, &l, false);
3025 if (l < 4 || !memory_access_is_direct(mr, false)) {
3026 release_lock |= prepare_mmio_access(mr);
3029 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3030 #if defined(TARGET_WORDS_BIGENDIAN)
3031 if (endian == DEVICE_LITTLE_ENDIAN) {
3035 if (endian == DEVICE_BIG_ENDIAN) {
3041 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3043 case DEVICE_LITTLE_ENDIAN:
3044 val = ldl_le_p(ptr);
3046 case DEVICE_BIG_ENDIAN:
3047 val = ldl_be_p(ptr);
3059 qemu_mutex_unlock_iothread();
3065 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3066 MemTxAttrs attrs, MemTxResult *result)
3068 return address_space_ldl_internal(as, addr, attrs, result,
3069 DEVICE_NATIVE_ENDIAN);
3072 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3073 MemTxAttrs attrs, MemTxResult *result)
3075 return address_space_ldl_internal(as, addr, attrs, result,
3076 DEVICE_LITTLE_ENDIAN);
3079 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3080 MemTxAttrs attrs, MemTxResult *result)
3082 return address_space_ldl_internal(as, addr, attrs, result,
3086 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3088 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3091 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3093 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3096 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3098 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3101 /* warning: addr must be aligned */
3102 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3104 MemTxResult *result,
3105 enum device_endian endian)
3113 bool release_lock = false;
3116 mr = address_space_translate(as, addr, &addr1, &l,
3118 if (l < 8 || !memory_access_is_direct(mr, false)) {
3119 release_lock |= prepare_mmio_access(mr);
3122 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3123 #if defined(TARGET_WORDS_BIGENDIAN)
3124 if (endian == DEVICE_LITTLE_ENDIAN) {
3128 if (endian == DEVICE_BIG_ENDIAN) {
3134 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3136 case DEVICE_LITTLE_ENDIAN:
3137 val = ldq_le_p(ptr);
3139 case DEVICE_BIG_ENDIAN:
3140 val = ldq_be_p(ptr);
3152 qemu_mutex_unlock_iothread();
3158 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3159 MemTxAttrs attrs, MemTxResult *result)
3161 return address_space_ldq_internal(as, addr, attrs, result,
3162 DEVICE_NATIVE_ENDIAN);
3165 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3166 MemTxAttrs attrs, MemTxResult *result)
3168 return address_space_ldq_internal(as, addr, attrs, result,
3169 DEVICE_LITTLE_ENDIAN);
3172 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3173 MemTxAttrs attrs, MemTxResult *result)
3175 return address_space_ldq_internal(as, addr, attrs, result,
3179 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3181 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3184 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3186 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3189 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3191 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3195 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3196 MemTxAttrs attrs, MemTxResult *result)
3201 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3208 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3210 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3213 /* warning: addr must be aligned */
3214 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3217 MemTxResult *result,
3218 enum device_endian endian)
3226 bool release_lock = false;
3229 mr = address_space_translate(as, addr, &addr1, &l,
3231 if (l < 2 || !memory_access_is_direct(mr, false)) {
3232 release_lock |= prepare_mmio_access(mr);
3235 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3236 #if defined(TARGET_WORDS_BIGENDIAN)
3237 if (endian == DEVICE_LITTLE_ENDIAN) {
3241 if (endian == DEVICE_BIG_ENDIAN) {
3247 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3249 case DEVICE_LITTLE_ENDIAN:
3250 val = lduw_le_p(ptr);
3252 case DEVICE_BIG_ENDIAN:
3253 val = lduw_be_p(ptr);
3265 qemu_mutex_unlock_iothread();
3271 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3272 MemTxAttrs attrs, MemTxResult *result)
3274 return address_space_lduw_internal(as, addr, attrs, result,
3275 DEVICE_NATIVE_ENDIAN);
3278 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3279 MemTxAttrs attrs, MemTxResult *result)
3281 return address_space_lduw_internal(as, addr, attrs, result,
3282 DEVICE_LITTLE_ENDIAN);
3285 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3286 MemTxAttrs attrs, MemTxResult *result)
3288 return address_space_lduw_internal(as, addr, attrs, result,
3292 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3294 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3297 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3299 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3302 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3304 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3307 /* warning: addr must be aligned. The ram page is not masked as dirty
3308 and the code inside is not invalidated. It is useful if the dirty
3309 bits are used to track modified PTEs */
3310 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3311 MemTxAttrs attrs, MemTxResult *result)
3318 uint8_t dirty_log_mask;
3319 bool release_lock = false;
3322 mr = address_space_translate(as, addr, &addr1, &l,
3324 if (l < 4 || !memory_access_is_direct(mr, true)) {
3325 release_lock |= prepare_mmio_access(mr);
3327 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3329 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3332 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3333 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3334 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3342 qemu_mutex_unlock_iothread();
3347 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3349 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3352 /* warning: addr must be aligned */
3353 static inline void address_space_stl_internal(AddressSpace *as,
3354 hwaddr addr, uint32_t val,
3356 MemTxResult *result,
3357 enum device_endian endian)
3364 bool release_lock = false;
3367 mr = address_space_translate(as, addr, &addr1, &l,
3369 if (l < 4 || !memory_access_is_direct(mr, true)) {
3370 release_lock |= prepare_mmio_access(mr);
3372 #if defined(TARGET_WORDS_BIGENDIAN)
3373 if (endian == DEVICE_LITTLE_ENDIAN) {
3377 if (endian == DEVICE_BIG_ENDIAN) {
3381 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3384 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3386 case DEVICE_LITTLE_ENDIAN:
3389 case DEVICE_BIG_ENDIAN:
3396 invalidate_and_set_dirty(mr, addr1, 4);
3403 qemu_mutex_unlock_iothread();
3408 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3409 MemTxAttrs attrs, MemTxResult *result)
3411 address_space_stl_internal(as, addr, val, attrs, result,
3412 DEVICE_NATIVE_ENDIAN);
3415 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3416 MemTxAttrs attrs, MemTxResult *result)
3418 address_space_stl_internal(as, addr, val, attrs, result,
3419 DEVICE_LITTLE_ENDIAN);
3422 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3423 MemTxAttrs attrs, MemTxResult *result)
3425 address_space_stl_internal(as, addr, val, attrs, result,
3429 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3431 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3434 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3436 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3439 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3441 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3445 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3446 MemTxAttrs attrs, MemTxResult *result)
3451 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3457 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3459 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3462 /* warning: addr must be aligned */
3463 static inline void address_space_stw_internal(AddressSpace *as,
3464 hwaddr addr, uint32_t val,
3466 MemTxResult *result,
3467 enum device_endian endian)
3474 bool release_lock = false;
3477 mr = address_space_translate(as, addr, &addr1, &l, true);
3478 if (l < 2 || !memory_access_is_direct(mr, true)) {
3479 release_lock |= prepare_mmio_access(mr);
3481 #if defined(TARGET_WORDS_BIGENDIAN)
3482 if (endian == DEVICE_LITTLE_ENDIAN) {
3486 if (endian == DEVICE_BIG_ENDIAN) {
3490 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3493 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3495 case DEVICE_LITTLE_ENDIAN:
3498 case DEVICE_BIG_ENDIAN:
3505 invalidate_and_set_dirty(mr, addr1, 2);
3512 qemu_mutex_unlock_iothread();
3517 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3518 MemTxAttrs attrs, MemTxResult *result)
3520 address_space_stw_internal(as, addr, val, attrs, result,
3521 DEVICE_NATIVE_ENDIAN);
3524 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3525 MemTxAttrs attrs, MemTxResult *result)
3527 address_space_stw_internal(as, addr, val, attrs, result,
3528 DEVICE_LITTLE_ENDIAN);
3531 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3532 MemTxAttrs attrs, MemTxResult *result)
3534 address_space_stw_internal(as, addr, val, attrs, result,
3538 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3540 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3543 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3545 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3548 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3550 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3554 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3555 MemTxAttrs attrs, MemTxResult *result)
3559 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3565 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3566 MemTxAttrs attrs, MemTxResult *result)
3569 val = cpu_to_le64(val);
3570 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3575 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3576 MemTxAttrs attrs, MemTxResult *result)
3579 val = cpu_to_be64(val);
3580 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3586 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3588 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3591 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3593 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3596 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3598 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3601 /* virtual memory access for debug (includes writing to ROM) */
3602 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3603 uint8_t *buf, int len, int is_write)
3613 page = addr & TARGET_PAGE_MASK;
3614 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3615 asidx = cpu_asidx_from_attrs(cpu, attrs);
3616 /* if no physical page mapped, return an error */
3617 if (phys_addr == -1)
3619 l = (page + TARGET_PAGE_SIZE) - addr;
3622 phys_addr += (addr & ~TARGET_PAGE_MASK);
3624 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3627 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3628 MEMTXATTRS_UNSPECIFIED,
3639 * Allows code that needs to deal with migration bitmaps etc to still be built
3640 * target independent.
3642 size_t qemu_target_page_bits(void)
3644 return TARGET_PAGE_BITS;
3650 * A helper function for the _utterly broken_ virtio device model to find out if
3651 * it's running on a big endian machine. Don't do this at home kids!
3653 bool target_words_bigendian(void);
3654 bool target_words_bigendian(void)
3656 #if defined(TARGET_WORDS_BIGENDIAN)
3663 #ifndef CONFIG_USER_ONLY
3664 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3671 mr = address_space_translate(&address_space_memory,
3672 phys_addr, &phys_addr, &l, false);
3674 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3679 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3685 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3686 ret = func(block->idstr, block->host, block->offset,
3687 block->used_length, opaque);