1 # This python script adds a new gdb command, "dump-guest-memory". It
2 # should be loaded with "source dump-guest-memory.py" at the (gdb)
5 # Copyright (C) 2013, Red Hat, Inc.
8 # Laszlo Ersek <lersek@redhat.com>
10 # This work is licensed under the terms of the GNU GPL, version 2 or later. See
11 # the COPYING file in the top-level directory.
13 # The leading docstring doesn't have idiomatic Python formatting. It is
14 # printed by gdb's "help" command (the first line is printed in the
15 # "help data" summary), and it should match how other help texts look in
20 UINTPTR_T = gdb.lookup_type("uintptr_t")
22 TARGET_PAGE_SIZE = 0x1000
23 TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000
25 # Various ELF constants
26 EM_X86_64 = 62 # AMD x86-64 target machine
27 ELFDATA2LSB = 1 # little endian
35 # Special value for e_phnum. This indicates that the real number of
36 # program headers is too large to fit into e_phnum. Instead the real
37 # value is in the field sh_info of section 0.
40 # Format strings for packing and header size calculation.
41 ELF64_EHDR = ("4s" # e_ident/magic
61 ELF64_PHDR = ("I" # p_type
71 def int128_get64(val):
72 """Returns low 64bit part of Int128 struct."""
78 def qlist_foreach(head, field_str):
79 """Generator for qlists."""
81 var_p = head["lh_first"]
83 var = var_p.dereference()
84 var_p = var[field_str]["le_next"]
88 def qemu_get_ram_block(ram_addr):
89 """Returns the RAMBlock struct to which the given address belongs."""
91 ram_blocks = gdb.parse_and_eval("ram_list.blocks")
93 for block in qlist_foreach(ram_blocks, "next"):
94 if (ram_addr - block["offset"]) < block["used_length"]:
97 raise gdb.GdbError("Bad ram offset %x" % ram_addr)
100 def qemu_get_ram_ptr(ram_addr):
101 """Returns qemu vaddr for given guest physical address."""
103 block = qemu_get_ram_block(ram_addr)
104 return block["host"] + (ram_addr - block["offset"])
107 def memory_region_get_ram_ptr(memory_region):
108 if memory_region["alias"] != 0:
109 return (memory_region_get_ram_ptr(memory_region["alias"].dereference())
110 + memory_region["alias_offset"])
112 return qemu_get_ram_ptr(memory_region["ram_addr"] & TARGET_PAGE_MASK)
115 def get_guest_phys_blocks():
116 """Returns a list of ram blocks.
118 Each block entry contains:
119 'target_start': guest block phys start address
120 'target_end': guest block phys end address
121 'host_addr': qemu vaddr of the block's start
124 guest_phys_blocks = []
126 print("guest RAM blocks:")
127 print("target_start target_end host_addr message "
129 print("---------------- ---------------- ---------------- ------- "
132 current_map_p = gdb.parse_and_eval("address_space_memory.current_map")
133 current_map = current_map_p.dereference()
135 # Conversion to int is needed for python 3
136 # compatibility. Otherwise range doesn't cast the value itself and
138 for cur in range(int(current_map["nr"])):
139 flat_range = (current_map["ranges"] + cur).dereference()
140 memory_region = flat_range["mr"].dereference()
142 # we only care about RAM
143 if not memory_region["ram"]:
146 section_size = int128_get64(flat_range["addr"]["size"])
147 target_start = int128_get64(flat_range["addr"]["start"])
148 target_end = target_start + section_size
149 host_addr = (memory_region_get_ram_ptr(memory_region)
150 + flat_range["offset_in_region"])
153 # find continuity in guest physical address space
154 if len(guest_phys_blocks) > 0:
155 predecessor = guest_phys_blocks[-1]
156 predecessor_size = (predecessor["target_end"] -
157 predecessor["target_start"])
159 # the memory API guarantees monotonically increasing
161 assert predecessor["target_end"] <= target_start
163 # we want continuity in both guest-physical and
164 # host-virtual memory
165 if (predecessor["target_end"] < target_start or
166 predecessor["host_addr"] + predecessor_size != host_addr):
169 if predecessor is None:
170 # isolated mapping, add it to the list
171 guest_phys_blocks.append({"target_start": target_start,
172 "target_end": target_end,
173 "host_addr": host_addr})
176 # expand predecessor until @target_end; predecessor's
177 # start doesn't change
178 predecessor["target_end"] = target_end
181 print("%016x %016x %016x %-7s %5u" %
182 (target_start, target_end, host_addr.cast(UINTPTR_T),
183 message, len(guest_phys_blocks)))
185 return guest_phys_blocks
188 class DumpGuestMemory(gdb.Command):
189 """Extract guest vmcore from qemu process coredump.
191 The sole argument is FILE, identifying the target file to write the
194 This GDB command reimplements the dump-guest-memory QMP command in
195 python, using the representation of guest memory as captured in the qemu
196 coredump. The qemu process that has been dumped must have had the
197 command line option "-machine dump-guest-core=on".
199 For simplicity, the "paging", "begin" and "end" parameters of the QMP
200 command are not supported -- no attempt is made to get the guest's
201 internal paging structures (ie. paging=false is hard-wired), and guest
202 memory is always fully dumped.
204 Only x86_64 guests are supported.
206 The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are
207 not written to the vmcore. Preparing these would require context that is
208 only present in the KVM host kernel module when the guest is alive. A
209 fake ELF note is written instead, only to keep the ELF parser of "crash"
212 Dependent on how busted the qemu process was at the time of the
213 coredump, this command might produce unpredictable results. If qemu
214 deliberately called abort(), or it was dumped in response to a signal at
215 a halfway fortunate point, then its coredump should be in reasonable
216 shape and this command should mostly work."""
219 super(DumpGuestMemory, self).__init__("dump-guest-memory",
221 gdb.COMPLETE_FILENAME)
222 self.elf64_ehdr_le = struct.Struct("<%s" % ELF64_EHDR)
223 self.elf64_phdr_le = struct.Struct("<%s" % ELF64_PHDR)
224 self.guest_phys_blocks = None
226 def cpu_get_dump_info(self):
227 # We can't synchronize the registers with KVM post-mortem, and
228 # the bits in (first_x86_cpu->env.hflags) seem to be stale; they
229 # may not reflect long mode for example. Hence just assume the
230 # most common values. This also means that instruction pointer
231 # etc. will be bogus in the dump, but at least the RAM contents
233 self.dump_info = {"d_machine": EM_X86_64,
234 "d_endian" : ELFDATA2LSB,
235 "d_class" : ELFCLASS64}
237 def encode_elf64_ehdr_le(self):
238 return self.elf64_ehdr_le.pack(
239 ELFMAG, # e_ident/magic
240 self.dump_info["d_class"], # e_ident/class
241 self.dump_info["d_endian"], # e_ident/data
242 EV_CURRENT, # e_ident/version
246 self.dump_info["d_machine"], # e_machine
247 EV_CURRENT, # e_version
249 self.elf64_ehdr_le.size, # e_phoff
252 self.elf64_ehdr_le.size, # e_ehsize
253 self.elf64_phdr_le.size, # e_phentsize
254 self.phdr_num, # e_phnum
260 def encode_elf64_note_le(self):
261 return self.elf64_phdr_le.pack(PT_NOTE, # p_type
263 (self.memory_offset -
264 len(self.note)), # p_offset
267 len(self.note), # p_filesz
268 len(self.note), # p_memsz
272 def encode_elf64_load_le(self, offset, start_hwaddr, range_size):
273 return self.elf64_phdr_le.pack(PT_LOAD, # p_type
277 start_hwaddr, # p_paddr
278 range_size, # p_filesz
279 range_size, # p_memsz
283 def note_init(self, name, desc, type):
284 # name must include a trailing NUL
285 namesz = (len(name) + 1 + 3) / 4 * 4
286 descsz = (len(desc) + 3) / 4 * 4
287 fmt = ("<" # little endian
294 self.note = struct.pack(fmt,
295 len(name) + 1, len(desc), type, name, desc)
298 self.guest_phys_blocks = get_guest_phys_blocks()
299 self.cpu_get_dump_info()
300 # we have no way to retrieve the VCPU status from KVM
302 self.note_init("NONE", "EMPTY", 0)
304 # Account for PT_NOTE.
307 # We should never reach PN_XNUM for paging=false dumps: there's
308 # just a handful of discontiguous ranges after merging.
309 self.phdr_num += len(self.guest_phys_blocks)
310 assert self.phdr_num < PN_XNUM
312 # Calculate the ELF file offset where the memory dump commences:
319 # PT_LOAD: len(self.guest_phys_blocks)
322 self.memory_offset = (self.elf64_ehdr_le.size +
323 self.elf64_phdr_le.size * self.phdr_num +
326 def dump_begin(self, vmcore):
327 vmcore.write(self.encode_elf64_ehdr_le())
328 vmcore.write(self.encode_elf64_note_le())
329 running = self.memory_offset
330 for block in self.guest_phys_blocks:
331 range_size = block["target_end"] - block["target_start"]
332 vmcore.write(self.encode_elf64_load_le(running,
333 block["target_start"],
335 running += range_size
336 vmcore.write(self.note)
338 def dump_iterate(self, vmcore):
339 qemu_core = gdb.inferiors()[0]
340 for block in self.guest_phys_blocks:
341 cur = block["host_addr"]
342 left = block["target_end"] - block["target_start"]
343 print("dumping range at %016x for length %016x" %
344 (cur.cast(UINTPTR_T), left))
346 chunk_size = min(TARGET_PAGE_SIZE, left)
347 chunk = qemu_core.read_memory(cur, chunk_size)
352 def create_vmcore(self, filename):
353 vmcore = open(filename, "wb")
354 self.dump_begin(vmcore)
355 self.dump_iterate(vmcore)
358 def invoke(self, args, from_tty):
359 # Unwittingly pressing the Enter key after the command should
360 # not dump the same multi-gig coredump to the same file.
363 argv = gdb.string_to_argv(args)
365 raise gdb.GdbError("usage: dump-guest-memory FILE")
368 self.create_vmcore(argv[0])