scripts/dump-guest-memory.py: Improve python 3 compatibility
[sdk/emulator/qemu.git] / scripts / dump-guest-memory.py
1 # This python script adds a new gdb command, "dump-guest-memory". It
2 # should be loaded with "source dump-guest-memory.py" at the (gdb)
3 # prompt.
4 #
5 # Copyright (C) 2013, Red Hat, Inc.
6 #
7 # Authors:
8 #   Laszlo Ersek <lersek@redhat.com>
9 #
10 # This work is licensed under the terms of the GNU GPL, version 2 or later. See
11 # the COPYING file in the top-level directory.
12 #
13 # The leading docstring doesn't have idiomatic Python formatting. It is
14 # printed by gdb's "help" command (the first line is printed in the
15 # "help data" summary), and it should match how other help texts look in
16 # gdb.
17
18 import struct
19
20 UINTPTR_T = gdb.lookup_type("uintptr_t")
21
22 TARGET_PAGE_SIZE = 0x1000
23 TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000
24
25 # Various ELF constants
26 EM_X86_64   = 62        # AMD x86-64 target machine
27 ELFDATA2LSB = 1         # little endian
28 ELFCLASS64  = 2
29 ELFMAG      = "\x7FELF"
30 EV_CURRENT  = 1
31 ET_CORE     = 4
32 PT_LOAD     = 1
33 PT_NOTE     = 4
34
35 # Special value for e_phnum. This indicates that the real number of
36 # program headers is too large to fit into e_phnum. Instead the real
37 # value is in the field sh_info of section 0.
38 PN_XNUM = 0xFFFF
39
40 # Format strings for packing and header size calculation.
41 ELF64_EHDR = ("4s" # e_ident/magic
42               "B"  # e_ident/class
43               "B"  # e_ident/data
44               "B"  # e_ident/version
45               "B"  # e_ident/osabi
46               "8s" # e_ident/pad
47               "H"  # e_type
48               "H"  # e_machine
49               "I"  # e_version
50               "Q"  # e_entry
51               "Q"  # e_phoff
52               "Q"  # e_shoff
53               "I"  # e_flags
54               "H"  # e_ehsize
55               "H"  # e_phentsize
56               "H"  # e_phnum
57               "H"  # e_shentsize
58               "H"  # e_shnum
59               "H"  # e_shstrndx
60           )
61 ELF64_PHDR = ("I"  # p_type
62               "I"  # p_flags
63               "Q"  # p_offset
64               "Q"  # p_vaddr
65               "Q"  # p_paddr
66               "Q"  # p_filesz
67               "Q"  # p_memsz
68               "Q"  # p_align
69           )
70
71 def int128_get64(val):
72     assert (val["hi"] == 0)
73     return val["lo"]
74
75 def qlist_foreach(head, field_str):
76     var_p = head["lh_first"]
77     while (var_p != 0):
78         var = var_p.dereference()
79         yield var
80         var_p = var[field_str]["le_next"]
81
82 def qemu_get_ram_block(ram_addr):
83     ram_blocks = gdb.parse_and_eval("ram_list.blocks")
84     for block in qlist_foreach(ram_blocks, "next"):
85         if (ram_addr - block["offset"] < block["used_length"]):
86             return block
87     raise gdb.GdbError("Bad ram offset %x" % ram_addr)
88
89 def qemu_get_ram_ptr(ram_addr):
90     block = qemu_get_ram_block(ram_addr)
91     return block["host"] + (ram_addr - block["offset"])
92
93 def memory_region_get_ram_ptr(mr):
94     if (mr["alias"] != 0):
95         return (memory_region_get_ram_ptr(mr["alias"].dereference()) +
96                 mr["alias_offset"])
97     return qemu_get_ram_ptr(mr["ram_addr"] & TARGET_PAGE_MASK)
98
99 def get_guest_phys_blocks():
100     guest_phys_blocks = []
101     print("guest RAM blocks:")
102     print("target_start     target_end       host_addr        message "
103           "count")
104     print("---------------- ---------------- ---------------- ------- "
105           "-----")
106
107     current_map_p = gdb.parse_and_eval("address_space_memory.current_map")
108     current_map = current_map_p.dereference()
109
110     # Conversion to int is needed for python 3
111     # compatibility. Otherwise range doesn't cast the value itself and
112     # breaks.
113     for cur in range(int(current_map["nr"])):
114         flat_range   = (current_map["ranges"] + cur).dereference()
115         mr           = flat_range["mr"].dereference()
116
117         # we only care about RAM
118         if (not mr["ram"]):
119             continue
120
121         section_size = int128_get64(flat_range["addr"]["size"])
122         target_start = int128_get64(flat_range["addr"]["start"])
123         target_end   = target_start + section_size
124         host_addr    = (memory_region_get_ram_ptr(mr) +
125                         flat_range["offset_in_region"])
126         predecessor = None
127
128         # find continuity in guest physical address space
129         if (len(guest_phys_blocks) > 0):
130             predecessor = guest_phys_blocks[-1]
131             predecessor_size = (predecessor["target_end"] -
132                                 predecessor["target_start"])
133
134             # the memory API guarantees monotonically increasing
135             # traversal
136             assert (predecessor["target_end"] <= target_start)
137
138             # we want continuity in both guest-physical and
139             # host-virtual memory
140             if (predecessor["target_end"] < target_start or
141                 predecessor["host_addr"] + predecessor_size != host_addr):
142                 predecessor = None
143
144         if (predecessor is None):
145             # isolated mapping, add it to the list
146             guest_phys_blocks.append({"target_start": target_start,
147                                       "target_end"  : target_end,
148                                       "host_addr"   : host_addr})
149             message = "added"
150         else:
151             # expand predecessor until @target_end; predecessor's
152             # start doesn't change
153             predecessor["target_end"] = target_end
154             message = "joined"
155
156         print("%016x %016x %016x %-7s %5u" %
157               (target_start, target_end, host_addr.cast(UINTPTR_T),
158                message, len(guest_phys_blocks)))
159
160     return guest_phys_blocks
161
162
163 class DumpGuestMemory(gdb.Command):
164     """Extract guest vmcore from qemu process coredump.
165
166 The sole argument is FILE, identifying the target file to write the
167 guest vmcore to.
168
169 This GDB command reimplements the dump-guest-memory QMP command in
170 python, using the representation of guest memory as captured in the qemu
171 coredump. The qemu process that has been dumped must have had the
172 command line option "-machine dump-guest-core=on".
173
174 For simplicity, the "paging", "begin" and "end" parameters of the QMP
175 command are not supported -- no attempt is made to get the guest's
176 internal paging structures (ie. paging=false is hard-wired), and guest
177 memory is always fully dumped.
178
179 Only x86_64 guests are supported.
180
181 The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are
182 not written to the vmcore. Preparing these would require context that is
183 only present in the KVM host kernel module when the guest is alive. A
184 fake ELF note is written instead, only to keep the ELF parser of "crash"
185 happy.
186
187 Dependent on how busted the qemu process was at the time of the
188 coredump, this command might produce unpredictable results. If qemu
189 deliberately called abort(), or it was dumped in response to a signal at
190 a halfway fortunate point, then its coredump should be in reasonable
191 shape and this command should mostly work."""
192
193     def __init__(self):
194         super(DumpGuestMemory, self).__init__("dump-guest-memory",
195                                               gdb.COMMAND_DATA,
196                                               gdb.COMPLETE_FILENAME)
197         self.elf64_ehdr_le = struct.Struct("<%s" % ELF64_EHDR)
198         self.elf64_phdr_le = struct.Struct("<%s" % ELF64_PHDR)
199         self.guest_phys_blocks = None
200
201     def cpu_get_dump_info(self):
202         # We can't synchronize the registers with KVM post-mortem, and
203         # the bits in (first_x86_cpu->env.hflags) seem to be stale; they
204         # may not reflect long mode for example. Hence just assume the
205         # most common values. This also means that instruction pointer
206         # etc. will be bogus in the dump, but at least the RAM contents
207         # should be valid.
208         self.dump_info = {"d_machine": EM_X86_64,
209                           "d_endian" : ELFDATA2LSB,
210                           "d_class"  : ELFCLASS64}
211
212     def encode_elf64_ehdr_le(self):
213         return self.elf64_ehdr_le.pack(
214                                  ELFMAG,                      # e_ident/magic
215                                  self.dump_info["d_class"],   # e_ident/class
216                                  self.dump_info["d_endian"],  # e_ident/data
217                                  EV_CURRENT,                  # e_ident/version
218                                  0,                           # e_ident/osabi
219                                  "",                          # e_ident/pad
220                                  ET_CORE,                     # e_type
221                                  self.dump_info["d_machine"], # e_machine
222                                  EV_CURRENT,                  # e_version
223                                  0,                           # e_entry
224                                  self.elf64_ehdr_le.size,     # e_phoff
225                                  0,                           # e_shoff
226                                  0,                           # e_flags
227                                  self.elf64_ehdr_le.size,     # e_ehsize
228                                  self.elf64_phdr_le.size,     # e_phentsize
229                                  self.phdr_num,               # e_phnum
230                                  0,                           # e_shentsize
231                                  0,                           # e_shnum
232                                  0                            # e_shstrndx
233                                 )
234
235     def encode_elf64_note_le(self):
236         return self.elf64_phdr_le.pack(PT_NOTE,              # p_type
237                                        0,                    # p_flags
238                                        (self.memory_offset -
239                                         len(self.note)),     # p_offset
240                                        0,                    # p_vaddr
241                                        0,                    # p_paddr
242                                        len(self.note),       # p_filesz
243                                        len(self.note),       # p_memsz
244                                        0                     # p_align
245                                       )
246
247     def encode_elf64_load_le(self, offset, start_hwaddr, range_size):
248         return self.elf64_phdr_le.pack(PT_LOAD,      # p_type
249                                        0,            # p_flags
250                                        offset,       # p_offset
251                                        0,            # p_vaddr
252                                        start_hwaddr, # p_paddr
253                                        range_size,   # p_filesz
254                                        range_size,   # p_memsz
255                                        0             # p_align
256                                       )
257
258     def note_init(self, name, desc, type):
259         # name must include a trailing NUL
260         namesz = (len(name) + 1 + 3) / 4 * 4
261         descsz = (len(desc)     + 3) / 4 * 4
262         fmt = ("<"   # little endian
263                "I"   # n_namesz
264                "I"   # n_descsz
265                "I"   # n_type
266                "%us" # name
267                "%us" # desc
268                % (namesz, descsz))
269         self.note = struct.pack(fmt,
270                                 len(name) + 1, len(desc), type, name, desc)
271
272     def dump_init(self):
273         self.guest_phys_blocks = get_guest_phys_blocks()
274         self.cpu_get_dump_info()
275         # we have no way to retrieve the VCPU status from KVM
276         # post-mortem
277         self.note_init("NONE", "EMPTY", 0)
278
279         # Account for PT_NOTE.
280         self.phdr_num = 1
281
282         # We should never reach PN_XNUM for paging=false dumps: there's
283         # just a handful of discontiguous ranges after merging.
284         self.phdr_num += len(self.guest_phys_blocks)
285         assert (self.phdr_num < PN_XNUM)
286
287         # Calculate the ELF file offset where the memory dump commences:
288         #
289         #   ELF header
290         #   PT_NOTE
291         #   PT_LOAD: 1
292         #   PT_LOAD: 2
293         #   ...
294         #   PT_LOAD: len(self.guest_phys_blocks)
295         #   ELF note
296         #   memory dump
297         self.memory_offset = (self.elf64_ehdr_le.size +
298                               self.elf64_phdr_le.size * self.phdr_num +
299                               len(self.note))
300
301     def dump_begin(self, vmcore):
302         vmcore.write(self.encode_elf64_ehdr_le())
303         vmcore.write(self.encode_elf64_note_le())
304         running = self.memory_offset
305         for block in self.guest_phys_blocks:
306             range_size = block["target_end"] - block["target_start"]
307             vmcore.write(self.encode_elf64_load_le(running,
308                                                    block["target_start"],
309                                                    range_size))
310             running += range_size
311         vmcore.write(self.note)
312
313     def dump_iterate(self, vmcore):
314         qemu_core = gdb.inferiors()[0]
315         for block in self.guest_phys_blocks:
316             cur  = block["host_addr"]
317             left = block["target_end"] - block["target_start"]
318             print("dumping range at %016x for length %016x" %
319                   (cur.cast(UINTPTR_T), left))
320             while (left > 0):
321                 chunk_size = min(TARGET_PAGE_SIZE, left)
322                 chunk = qemu_core.read_memory(cur, chunk_size)
323                 vmcore.write(chunk)
324                 cur  += chunk_size
325                 left -= chunk_size
326
327     def create_vmcore(self, filename):
328         vmcore = open(filename, "wb")
329         self.dump_begin(vmcore)
330         self.dump_iterate(vmcore)
331         vmcore.close()
332
333     def invoke(self, args, from_tty):
334         # Unwittingly pressing the Enter key after the command should
335         # not dump the same multi-gig coredump to the same file.
336         self.dont_repeat()
337
338         argv = gdb.string_to_argv(args)
339         if (len(argv) != 1):
340             raise gdb.GdbError("usage: dump-guest-memory FILE")
341
342         self.dump_init()
343         self.create_vmcore(argv[0])
344
345 DumpGuestMemory()