scripts/dump-guest-memory.py: Make methods functions
[sdk/emulator/qemu.git] / scripts / dump-guest-memory.py
1 # This python script adds a new gdb command, "dump-guest-memory". It
2 # should be loaded with "source dump-guest-memory.py" at the (gdb)
3 # prompt.
4 #
5 # Copyright (C) 2013, Red Hat, Inc.
6 #
7 # Authors:
8 #   Laszlo Ersek <lersek@redhat.com>
9 #
10 # This work is licensed under the terms of the GNU GPL, version 2 or later. See
11 # the COPYING file in the top-level directory.
12 #
13 # The leading docstring doesn't have idiomatic Python formatting. It is
14 # printed by gdb's "help" command (the first line is printed in the
15 # "help data" summary), and it should match how other help texts look in
16 # gdb.
17
18 import struct
19
20 UINTPTR_T = gdb.lookup_type("uintptr_t")
21
22 TARGET_PAGE_SIZE = 0x1000
23 TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000
24
25 # Various ELF constants
26 EM_X86_64   = 62        # AMD x86-64 target machine
27 ELFDATA2LSB = 1         # little endian
28 ELFCLASS64  = 2
29 ELFMAG      = "\x7FELF"
30 EV_CURRENT  = 1
31 ET_CORE     = 4
32 PT_LOAD     = 1
33 PT_NOTE     = 4
34
35 # Special value for e_phnum. This indicates that the real number of
36 # program headers is too large to fit into e_phnum. Instead the real
37 # value is in the field sh_info of section 0.
38 PN_XNUM = 0xFFFF
39
40 # Format strings for packing and header size calculation.
41 ELF64_EHDR = ("4s" # e_ident/magic
42               "B"  # e_ident/class
43               "B"  # e_ident/data
44               "B"  # e_ident/version
45               "B"  # e_ident/osabi
46               "8s" # e_ident/pad
47               "H"  # e_type
48               "H"  # e_machine
49               "I"  # e_version
50               "Q"  # e_entry
51               "Q"  # e_phoff
52               "Q"  # e_shoff
53               "I"  # e_flags
54               "H"  # e_ehsize
55               "H"  # e_phentsize
56               "H"  # e_phnum
57               "H"  # e_shentsize
58               "H"  # e_shnum
59               "H"  # e_shstrndx
60           )
61 ELF64_PHDR = ("I"  # p_type
62               "I"  # p_flags
63               "Q"  # p_offset
64               "Q"  # p_vaddr
65               "Q"  # p_paddr
66               "Q"  # p_filesz
67               "Q"  # p_memsz
68               "Q"  # p_align
69           )
70
71 def int128_get64(val):
72     assert (val["hi"] == 0)
73     return val["lo"]
74
75 def qlist_foreach(head, field_str):
76     var_p = head["lh_first"]
77     while (var_p != 0):
78         var = var_p.dereference()
79         yield var
80         var_p = var[field_str]["le_next"]
81
82 def qemu_get_ram_block(ram_addr):
83     ram_blocks = gdb.parse_and_eval("ram_list.blocks")
84     for block in qlist_foreach(ram_blocks, "next"):
85         if (ram_addr - block["offset"] < block["used_length"]):
86             return block
87     raise gdb.GdbError("Bad ram offset %x" % ram_addr)
88
89 def qemu_get_ram_ptr(ram_addr):
90     block = qemu_get_ram_block(ram_addr)
91     return block["host"] + (ram_addr - block["offset"])
92
93 def memory_region_get_ram_ptr(mr):
94     if (mr["alias"] != 0):
95         return (memory_region_get_ram_ptr(mr["alias"].dereference()) +
96                 mr["alias_offset"])
97     return qemu_get_ram_ptr(mr["ram_addr"] & TARGET_PAGE_MASK)
98
99 def get_guest_phys_blocks():
100     guest_phys_blocks = []
101     print "guest RAM blocks:"
102     print ("target_start     target_end       host_addr        message "
103            "count")
104     print ("---------------- ---------------- ---------------- ------- "
105            "-----")
106
107     current_map_p = gdb.parse_and_eval("address_space_memory.current_map")
108     current_map = current_map_p.dereference()
109     for cur in range(current_map["nr"]):
110         flat_range   = (current_map["ranges"] + cur).dereference()
111         mr           = flat_range["mr"].dereference()
112
113         # we only care about RAM
114         if (not mr["ram"]):
115             continue
116
117         section_size = int128_get64(flat_range["addr"]["size"])
118         target_start = int128_get64(flat_range["addr"]["start"])
119         target_end   = target_start + section_size
120         host_addr    = (memory_region_get_ram_ptr(mr) +
121                         flat_range["offset_in_region"])
122         predecessor = None
123
124         # find continuity in guest physical address space
125         if (len(guest_phys_blocks) > 0):
126             predecessor = guest_phys_blocks[-1]
127             predecessor_size = (predecessor["target_end"] -
128                                 predecessor["target_start"])
129
130             # the memory API guarantees monotonically increasing
131             # traversal
132             assert (predecessor["target_end"] <= target_start)
133
134             # we want continuity in both guest-physical and
135             # host-virtual memory
136             if (predecessor["target_end"] < target_start or
137                 predecessor["host_addr"] + predecessor_size != host_addr):
138                 predecessor = None
139
140         if (predecessor is None):
141             # isolated mapping, add it to the list
142             guest_phys_blocks.append({"target_start": target_start,
143                                       "target_end"  : target_end,
144                                       "host_addr"   : host_addr})
145             message = "added"
146         else:
147             # expand predecessor until @target_end; predecessor's
148             # start doesn't change
149             predecessor["target_end"] = target_end
150             message = "joined"
151
152         print ("%016x %016x %016x %-7s %5u" %
153                (target_start, target_end, host_addr.cast(UINTPTR_T),
154                 message, len(guest_phys_blocks)))
155
156     return guest_phys_blocks
157
158
159 class DumpGuestMemory(gdb.Command):
160     """Extract guest vmcore from qemu process coredump.
161
162 The sole argument is FILE, identifying the target file to write the
163 guest vmcore to.
164
165 This GDB command reimplements the dump-guest-memory QMP command in
166 python, using the representation of guest memory as captured in the qemu
167 coredump. The qemu process that has been dumped must have had the
168 command line option "-machine dump-guest-core=on".
169
170 For simplicity, the "paging", "begin" and "end" parameters of the QMP
171 command are not supported -- no attempt is made to get the guest's
172 internal paging structures (ie. paging=false is hard-wired), and guest
173 memory is always fully dumped.
174
175 Only x86_64 guests are supported.
176
177 The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are
178 not written to the vmcore. Preparing these would require context that is
179 only present in the KVM host kernel module when the guest is alive. A
180 fake ELF note is written instead, only to keep the ELF parser of "crash"
181 happy.
182
183 Dependent on how busted the qemu process was at the time of the
184 coredump, this command might produce unpredictable results. If qemu
185 deliberately called abort(), or it was dumped in response to a signal at
186 a halfway fortunate point, then its coredump should be in reasonable
187 shape and this command should mostly work."""
188
189     def __init__(self):
190         super(DumpGuestMemory, self).__init__("dump-guest-memory",
191                                               gdb.COMMAND_DATA,
192                                               gdb.COMPLETE_FILENAME)
193         self.elf64_ehdr_le = struct.Struct("<%s" % ELF64_EHDR)
194         self.elf64_phdr_le = struct.Struct("<%s" % ELF64_PHDR)
195         self.guest_phys_blocks = None
196
197     def cpu_get_dump_info(self):
198         # We can't synchronize the registers with KVM post-mortem, and
199         # the bits in (first_x86_cpu->env.hflags) seem to be stale; they
200         # may not reflect long mode for example. Hence just assume the
201         # most common values. This also means that instruction pointer
202         # etc. will be bogus in the dump, but at least the RAM contents
203         # should be valid.
204         self.dump_info = {"d_machine": EM_X86_64,
205                           "d_endian" : ELFDATA2LSB,
206                           "d_class"  : ELFCLASS64}
207
208     def encode_elf64_ehdr_le(self):
209         return self.elf64_ehdr_le.pack(
210                                  ELFMAG,                      # e_ident/magic
211                                  self.dump_info["d_class"],   # e_ident/class
212                                  self.dump_info["d_endian"],  # e_ident/data
213                                  EV_CURRENT,                  # e_ident/version
214                                  0,                           # e_ident/osabi
215                                  "",                          # e_ident/pad
216                                  ET_CORE,                     # e_type
217                                  self.dump_info["d_machine"], # e_machine
218                                  EV_CURRENT,                  # e_version
219                                  0,                           # e_entry
220                                  self.elf64_ehdr_le.size,     # e_phoff
221                                  0,                           # e_shoff
222                                  0,                           # e_flags
223                                  self.elf64_ehdr_le.size,     # e_ehsize
224                                  self.elf64_phdr_le.size,     # e_phentsize
225                                  self.phdr_num,               # e_phnum
226                                  0,                           # e_shentsize
227                                  0,                           # e_shnum
228                                  0                            # e_shstrndx
229                                 )
230
231     def encode_elf64_note_le(self):
232         return self.elf64_phdr_le.pack(PT_NOTE,              # p_type
233                                        0,                    # p_flags
234                                        (self.memory_offset -
235                                         len(self.note)),     # p_offset
236                                        0,                    # p_vaddr
237                                        0,                    # p_paddr
238                                        len(self.note),       # p_filesz
239                                        len(self.note),       # p_memsz
240                                        0                     # p_align
241                                       )
242
243     def encode_elf64_load_le(self, offset, start_hwaddr, range_size):
244         return self.elf64_phdr_le.pack(PT_LOAD,      # p_type
245                                        0,            # p_flags
246                                        offset,       # p_offset
247                                        0,            # p_vaddr
248                                        start_hwaddr, # p_paddr
249                                        range_size,   # p_filesz
250                                        range_size,   # p_memsz
251                                        0             # p_align
252                                       )
253
254     def note_init(self, name, desc, type):
255         # name must include a trailing NUL
256         namesz = (len(name) + 1 + 3) / 4 * 4
257         descsz = (len(desc)     + 3) / 4 * 4
258         fmt = ("<"   # little endian
259                "I"   # n_namesz
260                "I"   # n_descsz
261                "I"   # n_type
262                "%us" # name
263                "%us" # desc
264                % (namesz, descsz))
265         self.note = struct.pack(fmt,
266                                 len(name) + 1, len(desc), type, name, desc)
267
268     def dump_init(self):
269         self.guest_phys_blocks = get_guest_phys_blocks()
270         self.cpu_get_dump_info()
271         # we have no way to retrieve the VCPU status from KVM
272         # post-mortem
273         self.note_init("NONE", "EMPTY", 0)
274
275         # Account for PT_NOTE.
276         self.phdr_num = 1
277
278         # We should never reach PN_XNUM for paging=false dumps: there's
279         # just a handful of discontiguous ranges after merging.
280         self.phdr_num += len(self.guest_phys_blocks)
281         assert (self.phdr_num < PN_XNUM)
282
283         # Calculate the ELF file offset where the memory dump commences:
284         #
285         #   ELF header
286         #   PT_NOTE
287         #   PT_LOAD: 1
288         #   PT_LOAD: 2
289         #   ...
290         #   PT_LOAD: len(self.guest_phys_blocks)
291         #   ELF note
292         #   memory dump
293         self.memory_offset = (self.elf64_ehdr_le.size +
294                               self.elf64_phdr_le.size * self.phdr_num +
295                               len(self.note))
296
297     def dump_begin(self, vmcore):
298         vmcore.write(self.encode_elf64_ehdr_le())
299         vmcore.write(self.encode_elf64_note_le())
300         running = self.memory_offset
301         for block in self.guest_phys_blocks:
302             range_size = block["target_end"] - block["target_start"]
303             vmcore.write(self.encode_elf64_load_le(running,
304                                                    block["target_start"],
305                                                    range_size))
306             running += range_size
307         vmcore.write(self.note)
308
309     def dump_iterate(self, vmcore):
310         qemu_core = gdb.inferiors()[0]
311         for block in self.guest_phys_blocks:
312             cur  = block["host_addr"]
313             left = block["target_end"] - block["target_start"]
314             print ("dumping range at %016x for length %016x" %
315                    (cur.cast(UINTPTR_T), left))
316             while (left > 0):
317                 chunk_size = min(TARGET_PAGE_SIZE, left)
318                 chunk = qemu_core.read_memory(cur, chunk_size)
319                 vmcore.write(chunk)
320                 cur  += chunk_size
321                 left -= chunk_size
322
323     def create_vmcore(self, filename):
324         vmcore = open(filename, "wb")
325         self.dump_begin(vmcore)
326         self.dump_iterate(vmcore)
327         vmcore.close()
328
329     def invoke(self, args, from_tty):
330         # Unwittingly pressing the Enter key after the command should
331         # not dump the same multi-gig coredump to the same file.
332         self.dont_repeat()
333
334         argv = gdb.string_to_argv(args)
335         if (len(argv) != 1):
336             raise gdb.GdbError("usage: dump-guest-memory FILE")
337
338         self.dump_init()
339         self.create_vmcore(argv[0])
340
341 DumpGuestMemory()