4 * Copyright Fujitsu, Corp. 2011, 2012
7 * Wen Congyang <wency@cn.fujitsu.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
14 #include "qemu-common.h"
17 #include "exec/cpu-all.h"
18 #include "exec/hwaddr.h"
19 #include "monitor/monitor.h"
20 #include "sysemu/kvm.h"
21 #include "sysemu/dump.h"
22 #include "sysemu/sysemu.h"
23 #include "sysemu/memory_mapping.h"
24 #include "sysemu/cpus.h"
25 #include "qapi/error.h"
26 #include "qmp-commands.h"
28 static uint16_t cpu_convert_to_target16(uint16_t val, int endian)
30 if (endian == ELFDATA2LSB) {
31 val = cpu_to_le16(val);
33 val = cpu_to_be16(val);
39 static uint32_t cpu_convert_to_target32(uint32_t val, int endian)
41 if (endian == ELFDATA2LSB) {
42 val = cpu_to_le32(val);
44 val = cpu_to_be32(val);
50 static uint64_t cpu_convert_to_target64(uint64_t val, int endian)
52 if (endian == ELFDATA2LSB) {
53 val = cpu_to_le64(val);
55 val = cpu_to_be64(val);
61 typedef struct DumpState {
62 GuestPhysBlockList guest_phys_blocks;
63 ArchDumpInfo dump_info;
64 MemoryMappingList list;
73 GuestPhysBlock *next_block;
81 static int dump_cleanup(DumpState *s)
85 guest_phys_blocks_free(&s->guest_phys_blocks);
86 memory_mapping_list_free(&s->list);
97 static void dump_error(DumpState *s, const char *reason)
102 static int fd_write_vmcore(void *buf, size_t size, void *opaque)
104 DumpState *s = opaque;
107 written_size = qemu_write_full(s->fd, buf, size);
108 if (written_size != size) {
115 static int write_elf64_header(DumpState *s)
117 Elf64_Ehdr elf_header;
119 int endian = s->dump_info.d_endian;
121 memset(&elf_header, 0, sizeof(Elf64_Ehdr));
122 memcpy(&elf_header, ELFMAG, SELFMAG);
123 elf_header.e_ident[EI_CLASS] = ELFCLASS64;
124 elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
125 elf_header.e_ident[EI_VERSION] = EV_CURRENT;
126 elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
127 elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
129 elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
130 elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
131 elf_header.e_phoff = cpu_convert_to_target64(sizeof(Elf64_Ehdr), endian);
132 elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf64_Phdr),
134 elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
135 if (s->have_section) {
136 uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info;
138 elf_header.e_shoff = cpu_convert_to_target64(shoff, endian);
139 elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf64_Shdr),
141 elf_header.e_shnum = cpu_convert_to_target16(1, endian);
144 ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
146 dump_error(s, "dump: failed to write elf header.\n");
153 static int write_elf32_header(DumpState *s)
155 Elf32_Ehdr elf_header;
157 int endian = s->dump_info.d_endian;
159 memset(&elf_header, 0, sizeof(Elf32_Ehdr));
160 memcpy(&elf_header, ELFMAG, SELFMAG);
161 elf_header.e_ident[EI_CLASS] = ELFCLASS32;
162 elf_header.e_ident[EI_DATA] = endian;
163 elf_header.e_ident[EI_VERSION] = EV_CURRENT;
164 elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
165 elf_header.e_machine = cpu_convert_to_target16(s->dump_info.d_machine,
167 elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
168 elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
169 elf_header.e_phoff = cpu_convert_to_target32(sizeof(Elf32_Ehdr), endian);
170 elf_header.e_phentsize = cpu_convert_to_target16(sizeof(Elf32_Phdr),
172 elf_header.e_phnum = cpu_convert_to_target16(s->phdr_num, endian);
173 if (s->have_section) {
174 uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info;
176 elf_header.e_shoff = cpu_convert_to_target32(shoff, endian);
177 elf_header.e_shentsize = cpu_convert_to_target16(sizeof(Elf32_Shdr),
179 elf_header.e_shnum = cpu_convert_to_target16(1, endian);
182 ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
184 dump_error(s, "dump: failed to write elf header.\n");
191 static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
192 int phdr_index, hwaddr offset,
197 int endian = s->dump_info.d_endian;
199 memset(&phdr, 0, sizeof(Elf64_Phdr));
200 phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
201 phdr.p_offset = cpu_convert_to_target64(offset, endian);
202 phdr.p_paddr = cpu_convert_to_target64(memory_mapping->phys_addr, endian);
203 phdr.p_filesz = cpu_convert_to_target64(filesz, endian);
204 phdr.p_memsz = cpu_convert_to_target64(memory_mapping->length, endian);
205 phdr.p_vaddr = cpu_convert_to_target64(memory_mapping->virt_addr, endian);
207 assert(memory_mapping->length >= filesz);
209 ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
211 dump_error(s, "dump: failed to write program header table.\n");
218 static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
219 int phdr_index, hwaddr offset,
224 int endian = s->dump_info.d_endian;
226 memset(&phdr, 0, sizeof(Elf32_Phdr));
227 phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
228 phdr.p_offset = cpu_convert_to_target32(offset, endian);
229 phdr.p_paddr = cpu_convert_to_target32(memory_mapping->phys_addr, endian);
230 phdr.p_filesz = cpu_convert_to_target32(filesz, endian);
231 phdr.p_memsz = cpu_convert_to_target32(memory_mapping->length, endian);
232 phdr.p_vaddr = cpu_convert_to_target32(memory_mapping->virt_addr, endian);
234 assert(memory_mapping->length >= filesz);
236 ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
238 dump_error(s, "dump: failed to write program header table.\n");
245 static int write_elf64_note(DumpState *s)
248 int endian = s->dump_info.d_endian;
249 hwaddr begin = s->memory_offset - s->note_size;
252 memset(&phdr, 0, sizeof(Elf64_Phdr));
253 phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
254 phdr.p_offset = cpu_convert_to_target64(begin, endian);
256 phdr.p_filesz = cpu_convert_to_target64(s->note_size, endian);
257 phdr.p_memsz = cpu_convert_to_target64(s->note_size, endian);
260 ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
262 dump_error(s, "dump: failed to write program header table.\n");
269 static inline int cpu_index(CPUState *cpu)
271 return cpu->cpu_index + 1;
274 static int write_elf64_notes(DumpState *s)
282 ret = cpu_write_elf64_note(fd_write_vmcore, cpu, id, s);
284 dump_error(s, "dump: failed to write elf notes.\n");
290 ret = cpu_write_elf64_qemunote(fd_write_vmcore, cpu, s);
292 dump_error(s, "dump: failed to write CPU status.\n");
300 static int write_elf32_note(DumpState *s)
302 hwaddr begin = s->memory_offset - s->note_size;
304 int endian = s->dump_info.d_endian;
307 memset(&phdr, 0, sizeof(Elf32_Phdr));
308 phdr.p_type = cpu_convert_to_target32(PT_NOTE, endian);
309 phdr.p_offset = cpu_convert_to_target32(begin, endian);
311 phdr.p_filesz = cpu_convert_to_target32(s->note_size, endian);
312 phdr.p_memsz = cpu_convert_to_target32(s->note_size, endian);
315 ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
317 dump_error(s, "dump: failed to write program header table.\n");
324 static int write_elf32_notes(DumpState *s)
332 ret = cpu_write_elf32_note(fd_write_vmcore, cpu, id, s);
334 dump_error(s, "dump: failed to write elf notes.\n");
340 ret = cpu_write_elf32_qemunote(fd_write_vmcore, cpu, s);
342 dump_error(s, "dump: failed to write CPU status.\n");
350 static int write_elf_section(DumpState *s, int type)
354 int endian = s->dump_info.d_endian;
360 shdr_size = sizeof(Elf32_Shdr);
361 memset(&shdr32, 0, shdr_size);
362 shdr32.sh_info = cpu_convert_to_target32(s->sh_info, endian);
365 shdr_size = sizeof(Elf64_Shdr);
366 memset(&shdr64, 0, shdr_size);
367 shdr64.sh_info = cpu_convert_to_target32(s->sh_info, endian);
371 ret = fd_write_vmcore(&shdr, shdr_size, s);
373 dump_error(s, "dump: failed to write section header table.\n");
380 static int write_data(DumpState *s, void *buf, int length)
384 ret = fd_write_vmcore(buf, length, s);
386 dump_error(s, "dump: failed to save memory.\n");
393 /* write the memroy to vmcore. 1 page per I/O. */
394 static int write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
400 for (i = 0; i < size / TARGET_PAGE_SIZE; i++) {
401 ret = write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
408 if ((size % TARGET_PAGE_SIZE) != 0) {
409 ret = write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
410 size % TARGET_PAGE_SIZE);
419 /* get the memory's offset and size in the vmcore */
420 static void get_offset_range(hwaddr phys_addr,
421 ram_addr_t mapping_length,
426 GuestPhysBlock *block;
427 hwaddr offset = s->memory_offset;
428 int64_t size_in_block, start;
430 /* When the memory is not stored into vmcore, offset will be -1 */
435 if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
440 QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
442 if (block->target_start >= s->begin + s->length ||
443 block->target_end <= s->begin) {
444 /* This block is out of the range */
448 if (s->begin <= block->target_start) {
449 start = block->target_start;
454 size_in_block = block->target_end - start;
455 if (s->begin + s->length < block->target_end) {
456 size_in_block -= block->target_end - (s->begin + s->length);
459 start = block->target_start;
460 size_in_block = block->target_end - block->target_start;
463 if (phys_addr >= start && phys_addr < start + size_in_block) {
464 *p_offset = phys_addr - start + offset;
466 /* The offset range mapped from the vmcore file must not spill over
467 * the GuestPhysBlock, clamp it. The rest of the mapping will be
468 * zero-filled in memory at load time; see
469 * <http://refspecs.linuxbase.org/elf/gabi4+/ch5.pheader.html>.
471 *p_filesz = phys_addr + mapping_length <= start + size_in_block ?
473 size_in_block - (phys_addr - start);
477 offset += size_in_block;
481 static int write_elf_loads(DumpState *s)
483 hwaddr offset, filesz;
484 MemoryMapping *memory_mapping;
485 uint32_t phdr_index = 1;
489 if (s->have_section) {
490 max_index = s->sh_info;
492 max_index = s->phdr_num;
495 QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
496 get_offset_range(memory_mapping->phys_addr,
497 memory_mapping->length,
498 s, &offset, &filesz);
499 if (s->dump_info.d_class == ELFCLASS64) {
500 ret = write_elf64_load(s, memory_mapping, phdr_index++, offset,
503 ret = write_elf32_load(s, memory_mapping, phdr_index++, offset,
511 if (phdr_index >= max_index) {
519 /* write elf header, PT_NOTE and elf note to vmcore. */
520 static int dump_begin(DumpState *s)
525 * the vmcore's format is:
544 * we only know where the memory is saved after we write elf note into
548 /* write elf header to vmcore */
549 if (s->dump_info.d_class == ELFCLASS64) {
550 ret = write_elf64_header(s);
552 ret = write_elf32_header(s);
558 if (s->dump_info.d_class == ELFCLASS64) {
559 /* write PT_NOTE to vmcore */
560 if (write_elf64_note(s) < 0) {
564 /* write all PT_LOAD to vmcore */
565 if (write_elf_loads(s) < 0) {
569 /* write section to vmcore */
570 if (s->have_section) {
571 if (write_elf_section(s, 1) < 0) {
576 /* write notes to vmcore */
577 if (write_elf64_notes(s) < 0) {
582 /* write PT_NOTE to vmcore */
583 if (write_elf32_note(s) < 0) {
587 /* write all PT_LOAD to vmcore */
588 if (write_elf_loads(s) < 0) {
592 /* write section to vmcore */
593 if (s->have_section) {
594 if (write_elf_section(s, 0) < 0) {
599 /* write notes to vmcore */
600 if (write_elf32_notes(s) < 0) {
608 /* write PT_LOAD to vmcore */
609 static int dump_completed(DumpState *s)
615 static int get_next_block(DumpState *s, GuestPhysBlock *block)
618 block = QTAILQ_NEXT(block, next);
625 s->next_block = block;
627 if (block->target_start >= s->begin + s->length ||
628 block->target_end <= s->begin) {
629 /* This block is out of the range */
633 if (s->begin > block->target_start) {
634 s->start = s->begin - block->target_start;
642 /* write all memory to vmcore */
643 static int dump_iterate(DumpState *s)
645 GuestPhysBlock *block;
650 block = s->next_block;
652 size = block->target_end - block->target_start;
655 if (s->begin + s->length < block->target_end) {
656 size -= block->target_end - (s->begin + s->length);
659 ret = write_memory(s, block, s->start, size);
664 ret = get_next_block(s, block);
672 static int create_vmcore(DumpState *s)
681 ret = dump_iterate(s);
689 static ram_addr_t get_start_block(DumpState *s)
691 GuestPhysBlock *block;
693 if (!s->has_filter) {
694 s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
698 QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
699 if (block->target_start >= s->begin + s->length ||
700 block->target_end <= s->begin) {
701 /* This block is out of the range */
705 s->next_block = block;
706 if (s->begin > block->target_start) {
707 s->start = s->begin - block->target_start;
717 static int dump_init(DumpState *s, int fd, bool paging, bool has_filter,
718 int64_t begin, int64_t length, Error **errp)
725 if (runstate_is_running()) {
726 vm_stop(RUN_STATE_SAVE_VM);
732 /* If we use KVM, we should synchronize the registers before we get dump
733 * info or physmap info.
735 cpu_synchronize_all_states();
743 s->has_filter = has_filter;
747 guest_phys_blocks_init(&s->guest_phys_blocks);
748 guest_phys_blocks_append(&s->guest_phys_blocks);
750 s->start = get_start_block(s);
751 if (s->start == -1) {
752 error_set(errp, QERR_INVALID_PARAMETER, "begin");
756 /* get dump info: endian, class and architecture.
757 * If the target architecture is not supported, cpu_get_dump_info() will
760 ret = cpu_get_dump_info(&s->dump_info, &s->guest_phys_blocks);
762 error_set(errp, QERR_UNSUPPORTED);
766 s->note_size = cpu_get_note_size(s->dump_info.d_class,
767 s->dump_info.d_machine, nr_cpus);
768 if (s->note_size < 0) {
769 error_set(errp, QERR_UNSUPPORTED);
773 /* get memory mapping */
774 memory_mapping_list_init(&s->list);
776 qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, &err);
778 error_propagate(errp, err);
782 qemu_get_guest_simple_memory_mapping(&s->list, &s->guest_phys_blocks);
786 memory_mapping_filter(&s->list, s->begin, s->length);
792 * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
794 s->phdr_num = 1; /* PT_NOTE */
795 if (s->list.num < UINT16_MAX - 2) {
796 s->phdr_num += s->list.num;
797 s->have_section = false;
799 s->have_section = true;
800 s->phdr_num = PN_XNUM;
801 s->sh_info = 1; /* PT_NOTE */
803 /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
804 if (s->list.num <= UINT32_MAX - 1) {
805 s->sh_info += s->list.num;
807 s->sh_info = UINT32_MAX;
811 if (s->dump_info.d_class == ELFCLASS64) {
812 if (s->have_section) {
813 s->memory_offset = sizeof(Elf64_Ehdr) +
814 sizeof(Elf64_Phdr) * s->sh_info +
815 sizeof(Elf64_Shdr) + s->note_size;
817 s->memory_offset = sizeof(Elf64_Ehdr) +
818 sizeof(Elf64_Phdr) * s->phdr_num + s->note_size;
821 if (s->have_section) {
822 s->memory_offset = sizeof(Elf32_Ehdr) +
823 sizeof(Elf32_Phdr) * s->sh_info +
824 sizeof(Elf32_Shdr) + s->note_size;
826 s->memory_offset = sizeof(Elf32_Ehdr) +
827 sizeof(Elf32_Phdr) * s->phdr_num + s->note_size;
834 guest_phys_blocks_free(&s->guest_phys_blocks);
843 void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
844 int64_t begin, bool has_length, int64_t length,
852 if (has_begin && !has_length) {
853 error_set(errp, QERR_MISSING_PARAMETER, "length");
856 if (!has_begin && has_length) {
857 error_set(errp, QERR_MISSING_PARAMETER, "begin");
862 if (strstart(file, "fd:", &p)) {
863 fd = monitor_get_fd(cur_mon, p, errp);
870 if (strstart(file, "file:", &p)) {
871 fd = qemu_open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
873 error_setg_file_open(errp, errno, p);
879 error_set(errp, QERR_INVALID_PARAMETER, "protocol");
883 s = g_malloc0(sizeof(DumpState));
885 ret = dump_init(s, fd, paging, has_begin, begin, length, errp);
891 if (create_vmcore(s) < 0 && !error_is_set(s->errp)) {
892 error_set(errp, QERR_IO_ERROR);