scripts/dump-guest-memory.py: Introduce multi-arch support
[sdk/emulator/qemu.git] / scripts / dump-guest-memory.py
1 # This python script adds a new gdb command, "dump-guest-memory". It
2 # should be loaded with "source dump-guest-memory.py" at the (gdb)
3 # prompt.
4 #
5 # Copyright (C) 2013, Red Hat, Inc.
6 #
7 # Authors:
8 #   Laszlo Ersek <lersek@redhat.com>
9 #   Janosch Frank <frankja@linux.vnet.ibm.com>
10 #
11 # This work is licensed under the terms of the GNU GPL, version 2 or later. See
12 # the COPYING file in the top-level directory.
13 #
14 # The leading docstring doesn't have idiomatic Python formatting. It is
15 # printed by gdb's "help" command (the first line is printed in the
16 # "help data" summary), and it should match how other help texts look in
17 # gdb.
18
19 import ctypes
20
21 UINTPTR_T = gdb.lookup_type("uintptr_t")
22
23 TARGET_PAGE_SIZE = 0x1000
24 TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000
25
26 # Special value for e_phnum. This indicates that the real number of
27 # program headers is too large to fit into e_phnum. Instead the real
28 # value is in the field sh_info of section 0.
29 PN_XNUM = 0xFFFF
30
31 EV_CURRENT = 1
32
33 ELFCLASS32 = 1
34 ELFCLASS64 = 2
35
36 ELFDATA2LSB = 1
37 ELFDATA2MSB = 2
38
39 ET_CORE = 4
40
41 PT_LOAD = 1
42 PT_NOTE = 4
43
44 EM_386 = 3
45 EM_PPC = 20
46 EM_PPC64 = 21
47 EM_S390 = 22
48 EM_AARCH = 183
49 EM_X86_64 = 62
50
51 class ELF(object):
52     """Representation of a ELF file."""
53
54     def __init__(self, arch):
55         self.ehdr = None
56         self.notes = []
57         self.segments = []
58         self.notes_size = 0
59         self.endianess = None
60         self.elfclass = ELFCLASS64
61
62         if arch == 'aarch64-le':
63             self.endianess = ELFDATA2LSB
64             self.elfclass = ELFCLASS64
65             self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
66             self.ehdr.e_machine = EM_AARCH
67
68         elif arch == 'aarch64-be':
69             self.endianess = ELFDATA2MSB
70             self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
71             self.ehdr.e_machine = EM_AARCH
72
73         elif arch == 'X86_64':
74             self.endianess = ELFDATA2LSB
75             self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
76             self.ehdr.e_machine = EM_X86_64
77
78         elif arch == '386':
79             self.endianess = ELFDATA2LSB
80             self.elfclass = ELFCLASS32
81             self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
82             self.ehdr.e_machine = EM_386
83
84         elif arch == 's390':
85             self.endianess = ELFDATA2MSB
86             self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
87             self.ehdr.e_machine = EM_S390
88
89         elif arch == 'ppc64-le':
90             self.endianess = ELFDATA2LSB
91             self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
92             self.ehdr.e_machine = EM_PPC64
93
94         elif arch == 'ppc64-be':
95             self.endianess = ELFDATA2MSB
96             self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
97             self.ehdr.e_machine = EM_PPC64
98
99         else:
100             raise gdb.GdbError("No valid arch type specified.\n"
101                                "Currently supported types:\n"
102                                "aarch64-be, aarch64-le, X86_64, 386, s390, "
103                                "ppc64-be, ppc64-le")
104
105         self.add_segment(PT_NOTE, 0, 0)
106
107     def add_note(self, n_name, n_desc, n_type):
108         """Adds a note to the ELF."""
109
110         note = get_arch_note(self.endianess, len(n_name), len(n_desc))
111         note.n_namesz = len(n_name) + 1
112         note.n_descsz = len(n_desc)
113         note.n_name = n_name.encode()
114         note.n_type = n_type
115
116         # Desc needs to be 4 byte aligned (although the 64bit spec
117         # specifies 8 byte). When defining n_desc as uint32 it will be
118         # automatically aligned but we need the memmove to copy the
119         # string into it.
120         ctypes.memmove(note.n_desc, n_desc.encode(), len(n_desc))
121
122         self.notes.append(note)
123         self.segments[0].p_filesz += ctypes.sizeof(note)
124         self.segments[0].p_memsz += ctypes.sizeof(note)
125
126     def add_segment(self, p_type, p_paddr, p_size):
127         """Adds a segment to the elf."""
128
129         phdr = get_arch_phdr(self.endianess, self.elfclass)
130         phdr.p_type = p_type
131         phdr.p_paddr = p_paddr
132         phdr.p_filesz = p_size
133         phdr.p_memsz = p_size
134         self.segments.append(phdr)
135         self.ehdr.e_phnum += 1
136
137     def to_file(self, elf_file):
138         """Writes all ELF structures to the the passed file.
139
140         Structure:
141         Ehdr
142         Segment 0:PT_NOTE
143         Segment 1:PT_LOAD
144         Segment N:PT_LOAD
145         Note    0..N
146         Dump contents
147         """
148         elf_file.write(self.ehdr)
149         off = ctypes.sizeof(self.ehdr) + \
150               len(self.segments) * ctypes.sizeof(self.segments[0])
151
152         for phdr in self.segments:
153             phdr.p_offset = off
154             elf_file.write(phdr)
155             off += phdr.p_filesz
156
157         for note in self.notes:
158             elf_file.write(note)
159
160
161 def get_arch_note(endianess, len_name, len_desc):
162     """Returns a Note class with the specified endianess."""
163
164     if endianess == ELFDATA2LSB:
165         superclass = ctypes.LittleEndianStructure
166     else:
167         superclass = ctypes.BigEndianStructure
168
169     len_name = len_name + 1
170
171     class Note(superclass):
172         """Represents an ELF note, includes the content."""
173
174         _fields_ = [("n_namesz", ctypes.c_uint32),
175                     ("n_descsz", ctypes.c_uint32),
176                     ("n_type", ctypes.c_uint32),
177                     ("n_name", ctypes.c_char * len_name),
178                     ("n_desc", ctypes.c_uint32 * ((len_desc + 3) // 4))]
179     return Note()
180
181
182 class Ident(ctypes.Structure):
183     """Represents the ELF ident array in the ehdr structure."""
184
185     _fields_ = [('ei_mag0', ctypes.c_ubyte),
186                 ('ei_mag1', ctypes.c_ubyte),
187                 ('ei_mag2', ctypes.c_ubyte),
188                 ('ei_mag3', ctypes.c_ubyte),
189                 ('ei_class', ctypes.c_ubyte),
190                 ('ei_data', ctypes.c_ubyte),
191                 ('ei_version', ctypes.c_ubyte),
192                 ('ei_osabi', ctypes.c_ubyte),
193                 ('ei_abiversion', ctypes.c_ubyte),
194                 ('ei_pad', ctypes.c_ubyte * 7)]
195
196     def __init__(self, endianess, elfclass):
197         self.ei_mag0 = 0x7F
198         self.ei_mag1 = ord('E')
199         self.ei_mag2 = ord('L')
200         self.ei_mag3 = ord('F')
201         self.ei_class = elfclass
202         self.ei_data = endianess
203         self.ei_version = EV_CURRENT
204
205
206 def get_arch_ehdr(endianess, elfclass):
207     """Returns a EHDR64 class with the specified endianess."""
208
209     if endianess == ELFDATA2LSB:
210         superclass = ctypes.LittleEndianStructure
211     else:
212         superclass = ctypes.BigEndianStructure
213
214     class EHDR64(superclass):
215         """Represents the 64 bit ELF header struct."""
216
217         _fields_ = [('e_ident', Ident),
218                     ('e_type', ctypes.c_uint16),
219                     ('e_machine', ctypes.c_uint16),
220                     ('e_version', ctypes.c_uint32),
221                     ('e_entry', ctypes.c_uint64),
222                     ('e_phoff', ctypes.c_uint64),
223                     ('e_shoff', ctypes.c_uint64),
224                     ('e_flags', ctypes.c_uint32),
225                     ('e_ehsize', ctypes.c_uint16),
226                     ('e_phentsize', ctypes.c_uint16),
227                     ('e_phnum', ctypes.c_uint16),
228                     ('e_shentsize', ctypes.c_uint16),
229                     ('e_shnum', ctypes.c_uint16),
230                     ('e_shstrndx', ctypes.c_uint16)]
231
232         def __init__(self):
233             super(superclass, self).__init__()
234             self.e_ident = Ident(endianess, elfclass)
235             self.e_type = ET_CORE
236             self.e_version = EV_CURRENT
237             self.e_ehsize = ctypes.sizeof(self)
238             self.e_phoff = ctypes.sizeof(self)
239             self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass))
240             self.e_phnum = 0
241
242
243     class EHDR32(superclass):
244         """Represents the 32 bit ELF header struct."""
245
246         _fields_ = [('e_ident', Ident),
247                     ('e_type', ctypes.c_uint16),
248                     ('e_machine', ctypes.c_uint16),
249                     ('e_version', ctypes.c_uint32),
250                     ('e_entry', ctypes.c_uint32),
251                     ('e_phoff', ctypes.c_uint32),
252                     ('e_shoff', ctypes.c_uint32),
253                     ('e_flags', ctypes.c_uint32),
254                     ('e_ehsize', ctypes.c_uint16),
255                     ('e_phentsize', ctypes.c_uint16),
256                     ('e_phnum', ctypes.c_uint16),
257                     ('e_shentsize', ctypes.c_uint16),
258                     ('e_shnum', ctypes.c_uint16),
259                     ('e_shstrndx', ctypes.c_uint16)]
260
261         def __init__(self):
262             super(superclass, self).__init__()
263             self.e_ident = Ident(endianess, elfclass)
264             self.e_type = ET_CORE
265             self.e_version = EV_CURRENT
266             self.e_ehsize = ctypes.sizeof(self)
267             self.e_phoff = ctypes.sizeof(self)
268             self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass))
269             self.e_phnum = 0
270
271     # End get_arch_ehdr
272     if elfclass == ELFCLASS64:
273         return EHDR64()
274     else:
275         return EHDR32()
276
277
278 def get_arch_phdr(endianess, elfclass):
279     """Returns a 32 or 64 bit PHDR class with the specified endianess."""
280
281     if endianess == ELFDATA2LSB:
282         superclass = ctypes.LittleEndianStructure
283     else:
284         superclass = ctypes.BigEndianStructure
285
286     class PHDR64(superclass):
287         """Represents the 64 bit ELF program header struct."""
288
289         _fields_ = [('p_type', ctypes.c_uint32),
290                     ('p_flags', ctypes.c_uint32),
291                     ('p_offset', ctypes.c_uint64),
292                     ('p_vaddr', ctypes.c_uint64),
293                     ('p_paddr', ctypes.c_uint64),
294                     ('p_filesz', ctypes.c_uint64),
295                     ('p_memsz', ctypes.c_uint64),
296                     ('p_align', ctypes.c_uint64)]
297
298     class PHDR32(superclass):
299         """Represents the 32 bit ELF program header struct."""
300
301         _fields_ = [('p_type', ctypes.c_uint32),
302                     ('p_offset', ctypes.c_uint32),
303                     ('p_vaddr', ctypes.c_uint32),
304                     ('p_paddr', ctypes.c_uint32),
305                     ('p_filesz', ctypes.c_uint32),
306                     ('p_memsz', ctypes.c_uint32),
307                     ('p_flags', ctypes.c_uint32),
308                     ('p_align', ctypes.c_uint32)]
309
310     # End get_arch_phdr
311     if elfclass == ELFCLASS64:
312         return PHDR64()
313     else:
314         return PHDR32()
315
316
317 def int128_get64(val):
318     """Returns low 64bit part of Int128 struct."""
319
320     assert val["hi"] == 0
321     return val["lo"]
322
323
324 def qlist_foreach(head, field_str):
325     """Generator for qlists."""
326
327     var_p = head["lh_first"]
328     while var_p != 0:
329         var = var_p.dereference()
330         var_p = var[field_str]["le_next"]
331         yield var
332
333
334 def qemu_get_ram_block(ram_addr):
335     """Returns the RAMBlock struct to which the given address belongs."""
336
337     ram_blocks = gdb.parse_and_eval("ram_list.blocks")
338
339     for block in qlist_foreach(ram_blocks, "next"):
340         if (ram_addr - block["offset"]) < block["used_length"]:
341             return block
342
343     raise gdb.GdbError("Bad ram offset %x" % ram_addr)
344
345
346 def qemu_get_ram_ptr(ram_addr):
347     """Returns qemu vaddr for given guest physical address."""
348
349     block = qemu_get_ram_block(ram_addr)
350     return block["host"] + (ram_addr - block["offset"])
351
352
353 def memory_region_get_ram_ptr(memory_region):
354     if memory_region["alias"] != 0:
355         return (memory_region_get_ram_ptr(memory_region["alias"].dereference())
356                 + memory_region["alias_offset"])
357
358     return qemu_get_ram_ptr(memory_region["ram_addr"] & TARGET_PAGE_MASK)
359
360
361 def get_guest_phys_blocks():
362     """Returns a list of ram blocks.
363
364     Each block entry contains:
365     'target_start': guest block phys start address
366     'target_end':   guest block phys end address
367     'host_addr':    qemu vaddr of the block's start
368     """
369
370     guest_phys_blocks = []
371
372     print("guest RAM blocks:")
373     print("target_start     target_end       host_addr        message "
374           "count")
375     print("---------------- ---------------- ---------------- ------- "
376           "-----")
377
378     current_map_p = gdb.parse_and_eval("address_space_memory.current_map")
379     current_map = current_map_p.dereference()
380
381     # Conversion to int is needed for python 3
382     # compatibility. Otherwise range doesn't cast the value itself and
383     # breaks.
384     for cur in range(int(current_map["nr"])):
385         flat_range = (current_map["ranges"] + cur).dereference()
386         memory_region = flat_range["mr"].dereference()
387
388         # we only care about RAM
389         if not memory_region["ram"]:
390             continue
391
392         section_size = int128_get64(flat_range["addr"]["size"])
393         target_start = int128_get64(flat_range["addr"]["start"])
394         target_end = target_start + section_size
395         host_addr = (memory_region_get_ram_ptr(memory_region)
396                      + flat_range["offset_in_region"])
397         predecessor = None
398
399         # find continuity in guest physical address space
400         if len(guest_phys_blocks) > 0:
401             predecessor = guest_phys_blocks[-1]
402             predecessor_size = (predecessor["target_end"] -
403                                 predecessor["target_start"])
404
405             # the memory API guarantees monotonically increasing
406             # traversal
407             assert predecessor["target_end"] <= target_start
408
409             # we want continuity in both guest-physical and
410             # host-virtual memory
411             if (predecessor["target_end"] < target_start or
412                 predecessor["host_addr"] + predecessor_size != host_addr):
413                 predecessor = None
414
415         if predecessor is None:
416             # isolated mapping, add it to the list
417             guest_phys_blocks.append({"target_start": target_start,
418                                       "target_end":   target_end,
419                                       "host_addr":    host_addr})
420             message = "added"
421         else:
422             # expand predecessor until @target_end; predecessor's
423             # start doesn't change
424             predecessor["target_end"] = target_end
425             message = "joined"
426
427         print("%016x %016x %016x %-7s %5u" %
428               (target_start, target_end, host_addr.cast(UINTPTR_T),
429                message, len(guest_phys_blocks)))
430
431     return guest_phys_blocks
432
433
434 class DumpGuestMemory(gdb.Command):
435     """Extract guest vmcore from qemu process coredump.
436
437 The two required arguments are FILE and ARCH:
438 FILE identifies the target file to write the guest vmcore to.
439 ARCH specifies the architecture for which the core will be generated.
440
441 This GDB command reimplements the dump-guest-memory QMP command in
442 python, using the representation of guest memory as captured in the qemu
443 coredump. The qemu process that has been dumped must have had the
444 command line option "-machine dump-guest-core=on" which is the default.
445
446 For simplicity, the "paging", "begin" and "end" parameters of the QMP
447 command are not supported -- no attempt is made to get the guest's
448 internal paging structures (ie. paging=false is hard-wired), and guest
449 memory is always fully dumped.
450
451 Currently aarch64-be, aarch64-le, X86_64, 386, s390, ppc64-be,
452 ppc64-le guests are supported.
453
454 The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are
455 not written to the vmcore. Preparing these would require context that is
456 only present in the KVM host kernel module when the guest is alive. A
457 fake ELF note is written instead, only to keep the ELF parser of "crash"
458 happy.
459
460 Dependent on how busted the qemu process was at the time of the
461 coredump, this command might produce unpredictable results. If qemu
462 deliberately called abort(), or it was dumped in response to a signal at
463 a halfway fortunate point, then its coredump should be in reasonable
464 shape and this command should mostly work."""
465
466     def __init__(self):
467         super(DumpGuestMemory, self).__init__("dump-guest-memory",
468                                               gdb.COMMAND_DATA,
469                                               gdb.COMPLETE_FILENAME)
470         self.elf = None
471         self.guest_phys_blocks = None
472
473     def dump_init(self, vmcore):
474         """Prepares and writes ELF structures to core file."""
475
476         # Needed to make crash happy, data for more useful notes is
477         # not available in a qemu core.
478         self.elf.add_note("NONE", "EMPTY", 0)
479
480         # We should never reach PN_XNUM for paging=false dumps,
481         # there's just a handful of discontiguous ranges after
482         # merging.
483         # The constant is needed to account for the PT_NOTE segment.
484         phdr_num = len(self.guest_phys_blocks) + 1
485         assert phdr_num < PN_XNUM
486
487         for block in self.guest_phys_blocks:
488             block_size = block["target_end"] - block["target_start"]
489             self.elf.add_segment(PT_LOAD, block["target_start"], block_size)
490
491         self.elf.to_file(vmcore)
492
493     def dump_iterate(self, vmcore):
494         """Writes guest core to file."""
495
496         qemu_core = gdb.inferiors()[0]
497         for block in self.guest_phys_blocks:
498             cur = block["host_addr"]
499             left = block["target_end"] - block["target_start"]
500             print("dumping range at %016x for length %016x" %
501                   (cur.cast(UINTPTR_T), left))
502
503             while left > 0:
504                 chunk_size = min(TARGET_PAGE_SIZE, left)
505                 chunk = qemu_core.read_memory(cur, chunk_size)
506                 vmcore.write(chunk)
507                 cur += chunk_size
508                 left -= chunk_size
509
510     def invoke(self, args, from_tty):
511         """Handles command invocation from gdb."""
512
513         # Unwittingly pressing the Enter key after the command should
514         # not dump the same multi-gig coredump to the same file.
515         self.dont_repeat()
516
517         argv = gdb.string_to_argv(args)
518         if len(argv) != 2:
519             raise gdb.GdbError("usage: dump-guest-memory FILE ARCH")
520
521         self.elf = ELF(argv[1])
522         self.guest_phys_blocks = get_guest_phys_blocks()
523
524         with open(argv[0], "wb") as vmcore:
525             self.dump_init(vmcore)
526             self.dump_iterate(vmcore)
527
528 DumpGuestMemory()