1 # This python script adds a new gdb command, "dump-guest-memory". It
2 # should be loaded with "source dump-guest-memory.py" at the (gdb)
5 # Copyright (C) 2013, Red Hat, Inc.
8 # Laszlo Ersek <lersek@redhat.com>
9 # Janosch Frank <frankja@linux.vnet.ibm.com>
11 # This work is licensed under the terms of the GNU GPL, version 2 or later. See
12 # the COPYING file in the top-level directory.
14 # The leading docstring doesn't have idiomatic Python formatting. It is
15 # printed by gdb's "help" command (the first line is printed in the
16 # "help data" summary), and it should match how other help texts look in
21 UINTPTR_T = gdb.lookup_type("uintptr_t")
23 TARGET_PAGE_SIZE = 0x1000
24 TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000
26 # Special value for e_phnum. This indicates that the real number of
27 # program headers is too large to fit into e_phnum. Instead the real
28 # value is in the field sh_info of section 0.
52 """Representation of a ELF file."""
54 def __init__(self, arch):
60 self.elfclass = ELFCLASS64
62 if arch == 'aarch64-le':
63 self.endianess = ELFDATA2LSB
64 self.elfclass = ELFCLASS64
65 self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
66 self.ehdr.e_machine = EM_AARCH
68 elif arch == 'aarch64-be':
69 self.endianess = ELFDATA2MSB
70 self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
71 self.ehdr.e_machine = EM_AARCH
73 elif arch == 'X86_64':
74 self.endianess = ELFDATA2LSB
75 self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
76 self.ehdr.e_machine = EM_X86_64
79 self.endianess = ELFDATA2LSB
80 self.elfclass = ELFCLASS32
81 self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
82 self.ehdr.e_machine = EM_386
85 self.endianess = ELFDATA2MSB
86 self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
87 self.ehdr.e_machine = EM_S390
89 elif arch == 'ppc64-le':
90 self.endianess = ELFDATA2LSB
91 self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
92 self.ehdr.e_machine = EM_PPC64
94 elif arch == 'ppc64-be':
95 self.endianess = ELFDATA2MSB
96 self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
97 self.ehdr.e_machine = EM_PPC64
100 raise gdb.GdbError("No valid arch type specified.\n"
101 "Currently supported types:\n"
102 "aarch64-be, aarch64-le, X86_64, 386, s390, "
103 "ppc64-be, ppc64-le")
105 self.add_segment(PT_NOTE, 0, 0)
107 def add_note(self, n_name, n_desc, n_type):
108 """Adds a note to the ELF."""
110 note = get_arch_note(self.endianess, len(n_name), len(n_desc))
111 note.n_namesz = len(n_name) + 1
112 note.n_descsz = len(n_desc)
113 note.n_name = n_name.encode()
116 # Desc needs to be 4 byte aligned (although the 64bit spec
117 # specifies 8 byte). When defining n_desc as uint32 it will be
118 # automatically aligned but we need the memmove to copy the
120 ctypes.memmove(note.n_desc, n_desc.encode(), len(n_desc))
122 self.notes.append(note)
123 self.segments[0].p_filesz += ctypes.sizeof(note)
124 self.segments[0].p_memsz += ctypes.sizeof(note)
126 def add_segment(self, p_type, p_paddr, p_size):
127 """Adds a segment to the elf."""
129 phdr = get_arch_phdr(self.endianess, self.elfclass)
131 phdr.p_paddr = p_paddr
132 phdr.p_filesz = p_size
133 phdr.p_memsz = p_size
134 self.segments.append(phdr)
135 self.ehdr.e_phnum += 1
137 def to_file(self, elf_file):
138 """Writes all ELF structures to the the passed file.
148 elf_file.write(self.ehdr)
149 off = ctypes.sizeof(self.ehdr) + \
150 len(self.segments) * ctypes.sizeof(self.segments[0])
152 for phdr in self.segments:
157 for note in self.notes:
161 def get_arch_note(endianess, len_name, len_desc):
162 """Returns a Note class with the specified endianess."""
164 if endianess == ELFDATA2LSB:
165 superclass = ctypes.LittleEndianStructure
167 superclass = ctypes.BigEndianStructure
169 len_name = len_name + 1
171 class Note(superclass):
172 """Represents an ELF note, includes the content."""
174 _fields_ = [("n_namesz", ctypes.c_uint32),
175 ("n_descsz", ctypes.c_uint32),
176 ("n_type", ctypes.c_uint32),
177 ("n_name", ctypes.c_char * len_name),
178 ("n_desc", ctypes.c_uint32 * ((len_desc + 3) // 4))]
182 class Ident(ctypes.Structure):
183 """Represents the ELF ident array in the ehdr structure."""
185 _fields_ = [('ei_mag0', ctypes.c_ubyte),
186 ('ei_mag1', ctypes.c_ubyte),
187 ('ei_mag2', ctypes.c_ubyte),
188 ('ei_mag3', ctypes.c_ubyte),
189 ('ei_class', ctypes.c_ubyte),
190 ('ei_data', ctypes.c_ubyte),
191 ('ei_version', ctypes.c_ubyte),
192 ('ei_osabi', ctypes.c_ubyte),
193 ('ei_abiversion', ctypes.c_ubyte),
194 ('ei_pad', ctypes.c_ubyte * 7)]
196 def __init__(self, endianess, elfclass):
198 self.ei_mag1 = ord('E')
199 self.ei_mag2 = ord('L')
200 self.ei_mag3 = ord('F')
201 self.ei_class = elfclass
202 self.ei_data = endianess
203 self.ei_version = EV_CURRENT
206 def get_arch_ehdr(endianess, elfclass):
207 """Returns a EHDR64 class with the specified endianess."""
209 if endianess == ELFDATA2LSB:
210 superclass = ctypes.LittleEndianStructure
212 superclass = ctypes.BigEndianStructure
214 class EHDR64(superclass):
215 """Represents the 64 bit ELF header struct."""
217 _fields_ = [('e_ident', Ident),
218 ('e_type', ctypes.c_uint16),
219 ('e_machine', ctypes.c_uint16),
220 ('e_version', ctypes.c_uint32),
221 ('e_entry', ctypes.c_uint64),
222 ('e_phoff', ctypes.c_uint64),
223 ('e_shoff', ctypes.c_uint64),
224 ('e_flags', ctypes.c_uint32),
225 ('e_ehsize', ctypes.c_uint16),
226 ('e_phentsize', ctypes.c_uint16),
227 ('e_phnum', ctypes.c_uint16),
228 ('e_shentsize', ctypes.c_uint16),
229 ('e_shnum', ctypes.c_uint16),
230 ('e_shstrndx', ctypes.c_uint16)]
233 super(superclass, self).__init__()
234 self.e_ident = Ident(endianess, elfclass)
235 self.e_type = ET_CORE
236 self.e_version = EV_CURRENT
237 self.e_ehsize = ctypes.sizeof(self)
238 self.e_phoff = ctypes.sizeof(self)
239 self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass))
243 class EHDR32(superclass):
244 """Represents the 32 bit ELF header struct."""
246 _fields_ = [('e_ident', Ident),
247 ('e_type', ctypes.c_uint16),
248 ('e_machine', ctypes.c_uint16),
249 ('e_version', ctypes.c_uint32),
250 ('e_entry', ctypes.c_uint32),
251 ('e_phoff', ctypes.c_uint32),
252 ('e_shoff', ctypes.c_uint32),
253 ('e_flags', ctypes.c_uint32),
254 ('e_ehsize', ctypes.c_uint16),
255 ('e_phentsize', ctypes.c_uint16),
256 ('e_phnum', ctypes.c_uint16),
257 ('e_shentsize', ctypes.c_uint16),
258 ('e_shnum', ctypes.c_uint16),
259 ('e_shstrndx', ctypes.c_uint16)]
262 super(superclass, self).__init__()
263 self.e_ident = Ident(endianess, elfclass)
264 self.e_type = ET_CORE
265 self.e_version = EV_CURRENT
266 self.e_ehsize = ctypes.sizeof(self)
267 self.e_phoff = ctypes.sizeof(self)
268 self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass))
272 if elfclass == ELFCLASS64:
278 def get_arch_phdr(endianess, elfclass):
279 """Returns a 32 or 64 bit PHDR class with the specified endianess."""
281 if endianess == ELFDATA2LSB:
282 superclass = ctypes.LittleEndianStructure
284 superclass = ctypes.BigEndianStructure
286 class PHDR64(superclass):
287 """Represents the 64 bit ELF program header struct."""
289 _fields_ = [('p_type', ctypes.c_uint32),
290 ('p_flags', ctypes.c_uint32),
291 ('p_offset', ctypes.c_uint64),
292 ('p_vaddr', ctypes.c_uint64),
293 ('p_paddr', ctypes.c_uint64),
294 ('p_filesz', ctypes.c_uint64),
295 ('p_memsz', ctypes.c_uint64),
296 ('p_align', ctypes.c_uint64)]
298 class PHDR32(superclass):
299 """Represents the 32 bit ELF program header struct."""
301 _fields_ = [('p_type', ctypes.c_uint32),
302 ('p_offset', ctypes.c_uint32),
303 ('p_vaddr', ctypes.c_uint32),
304 ('p_paddr', ctypes.c_uint32),
305 ('p_filesz', ctypes.c_uint32),
306 ('p_memsz', ctypes.c_uint32),
307 ('p_flags', ctypes.c_uint32),
308 ('p_align', ctypes.c_uint32)]
311 if elfclass == ELFCLASS64:
317 def int128_get64(val):
318 """Returns low 64bit part of Int128 struct."""
320 assert val["hi"] == 0
324 def qlist_foreach(head, field_str):
325 """Generator for qlists."""
327 var_p = head["lh_first"]
329 var = var_p.dereference()
330 var_p = var[field_str]["le_next"]
334 def qemu_get_ram_block(ram_addr):
335 """Returns the RAMBlock struct to which the given address belongs."""
337 ram_blocks = gdb.parse_and_eval("ram_list.blocks")
339 for block in qlist_foreach(ram_blocks, "next"):
340 if (ram_addr - block["offset"]) < block["used_length"]:
343 raise gdb.GdbError("Bad ram offset %x" % ram_addr)
346 def qemu_get_ram_ptr(ram_addr):
347 """Returns qemu vaddr for given guest physical address."""
349 block = qemu_get_ram_block(ram_addr)
350 return block["host"] + (ram_addr - block["offset"])
353 def memory_region_get_ram_ptr(memory_region):
354 if memory_region["alias"] != 0:
355 return (memory_region_get_ram_ptr(memory_region["alias"].dereference())
356 + memory_region["alias_offset"])
358 return qemu_get_ram_ptr(memory_region["ram_addr"] & TARGET_PAGE_MASK)
361 def get_guest_phys_blocks():
362 """Returns a list of ram blocks.
364 Each block entry contains:
365 'target_start': guest block phys start address
366 'target_end': guest block phys end address
367 'host_addr': qemu vaddr of the block's start
370 guest_phys_blocks = []
372 print("guest RAM blocks:")
373 print("target_start target_end host_addr message "
375 print("---------------- ---------------- ---------------- ------- "
378 current_map_p = gdb.parse_and_eval("address_space_memory.current_map")
379 current_map = current_map_p.dereference()
381 # Conversion to int is needed for python 3
382 # compatibility. Otherwise range doesn't cast the value itself and
384 for cur in range(int(current_map["nr"])):
385 flat_range = (current_map["ranges"] + cur).dereference()
386 memory_region = flat_range["mr"].dereference()
388 # we only care about RAM
389 if not memory_region["ram"]:
392 section_size = int128_get64(flat_range["addr"]["size"])
393 target_start = int128_get64(flat_range["addr"]["start"])
394 target_end = target_start + section_size
395 host_addr = (memory_region_get_ram_ptr(memory_region)
396 + flat_range["offset_in_region"])
399 # find continuity in guest physical address space
400 if len(guest_phys_blocks) > 0:
401 predecessor = guest_phys_blocks[-1]
402 predecessor_size = (predecessor["target_end"] -
403 predecessor["target_start"])
405 # the memory API guarantees monotonically increasing
407 assert predecessor["target_end"] <= target_start
409 # we want continuity in both guest-physical and
410 # host-virtual memory
411 if (predecessor["target_end"] < target_start or
412 predecessor["host_addr"] + predecessor_size != host_addr):
415 if predecessor is None:
416 # isolated mapping, add it to the list
417 guest_phys_blocks.append({"target_start": target_start,
418 "target_end": target_end,
419 "host_addr": host_addr})
422 # expand predecessor until @target_end; predecessor's
423 # start doesn't change
424 predecessor["target_end"] = target_end
427 print("%016x %016x %016x %-7s %5u" %
428 (target_start, target_end, host_addr.cast(UINTPTR_T),
429 message, len(guest_phys_blocks)))
431 return guest_phys_blocks
434 class DumpGuestMemory(gdb.Command):
435 """Extract guest vmcore from qemu process coredump.
437 The two required arguments are FILE and ARCH:
438 FILE identifies the target file to write the guest vmcore to.
439 ARCH specifies the architecture for which the core will be generated.
441 This GDB command reimplements the dump-guest-memory QMP command in
442 python, using the representation of guest memory as captured in the qemu
443 coredump. The qemu process that has been dumped must have had the
444 command line option "-machine dump-guest-core=on" which is the default.
446 For simplicity, the "paging", "begin" and "end" parameters of the QMP
447 command are not supported -- no attempt is made to get the guest's
448 internal paging structures (ie. paging=false is hard-wired), and guest
449 memory is always fully dumped.
451 Currently aarch64-be, aarch64-le, X86_64, 386, s390, ppc64-be,
452 ppc64-le guests are supported.
454 The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are
455 not written to the vmcore. Preparing these would require context that is
456 only present in the KVM host kernel module when the guest is alive. A
457 fake ELF note is written instead, only to keep the ELF parser of "crash"
460 Dependent on how busted the qemu process was at the time of the
461 coredump, this command might produce unpredictable results. If qemu
462 deliberately called abort(), or it was dumped in response to a signal at
463 a halfway fortunate point, then its coredump should be in reasonable
464 shape and this command should mostly work."""
467 super(DumpGuestMemory, self).__init__("dump-guest-memory",
469 gdb.COMPLETE_FILENAME)
471 self.guest_phys_blocks = None
473 def dump_init(self, vmcore):
474 """Prepares and writes ELF structures to core file."""
476 # Needed to make crash happy, data for more useful notes is
477 # not available in a qemu core.
478 self.elf.add_note("NONE", "EMPTY", 0)
480 # We should never reach PN_XNUM for paging=false dumps,
481 # there's just a handful of discontiguous ranges after
483 # The constant is needed to account for the PT_NOTE segment.
484 phdr_num = len(self.guest_phys_blocks) + 1
485 assert phdr_num < PN_XNUM
487 for block in self.guest_phys_blocks:
488 block_size = block["target_end"] - block["target_start"]
489 self.elf.add_segment(PT_LOAD, block["target_start"], block_size)
491 self.elf.to_file(vmcore)
493 def dump_iterate(self, vmcore):
494 """Writes guest core to file."""
496 qemu_core = gdb.inferiors()[0]
497 for block in self.guest_phys_blocks:
498 cur = block["host_addr"]
499 left = block["target_end"] - block["target_start"]
500 print("dumping range at %016x for length %016x" %
501 (cur.cast(UINTPTR_T), left))
504 chunk_size = min(TARGET_PAGE_SIZE, left)
505 chunk = qemu_core.read_memory(cur, chunk_size)
510 def invoke(self, args, from_tty):
511 """Handles command invocation from gdb."""
513 # Unwittingly pressing the Enter key after the command should
514 # not dump the same multi-gig coredump to the same file.
517 argv = gdb.string_to_argv(args)
519 raise gdb.GdbError("usage: dump-guest-memory FILE ARCH")
521 self.elf = ELF(argv[1])
522 self.guest_phys_blocks = get_guest_phys_blocks()
524 with open(argv[0], "wb") as vmcore:
525 self.dump_init(vmcore)
526 self.dump_iterate(vmcore)