scripts/dump-guest-memory.py

   1 # This python script adds a new gdb command, "dump-guest-memory". It
   2 # should be loaded with "source dump-guest-memory.py" at the (gdb)
   3 # prompt.
   4 #
   5 # Copyright (C) 2013, Red Hat, Inc.
   6 #
   7 # Authors:
   8 #   Laszlo Ersek <lersek@redhat.com>
   9 #   Janosch Frank <frankja@linux.vnet.ibm.com>
  10 #
  11 # This work is licensed under the terms of the GNU GPL, version 2 or later. See
  12 # the COPYING file in the top-level directory.
  13 #
  14 # The leading docstring doesn't have idiomatic Python formatting. It is
  15 # printed by gdb's "help" command (the first line is printed in the
  16 # "help data" summary), and it should match how other help texts look in
  17 # gdb.
  18
  19 import ctypes
  20
  21 UINTPTR_T = gdb.lookup_type("uintptr_t")
  22
  23 TARGET_PAGE_SIZE = 0x1000
  24 TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000
  25
  26 # Special value for e_phnum. This indicates that the real number of
  27 # program headers is too large to fit into e_phnum. Instead the real
  28 # value is in the field sh_info of section 0.
  29 PN_XNUM = 0xFFFF
  30
  31 EV_CURRENT = 1
  32
  33 ELFCLASS32 = 1
  34 ELFCLASS64 = 2
  35
  36 ELFDATA2LSB = 1
  37 ELFDATA2MSB = 2
  38
  39 ET_CORE = 4
  40
  41 PT_LOAD = 1
  42 PT_NOTE = 4
  43
  44 EM_386 = 3
  45 EM_PPC = 20
  46 EM_PPC64 = 21
  47 EM_S390 = 22
  48 EM_AARCH = 183
  49 EM_X86_64 = 62
  50
  51 class ELF(object):
  52     """Representation of a ELF file."""
  53
  54     def __init__(self, arch):
  55         self.ehdr = None
  56         self.notes = []
  57         self.segments = []
  58         self.notes_size = 0
  59         self.endianess = None
  60         self.elfclass = ELFCLASS64
  61
  62         if arch == 'aarch64-le':
  63             self.endianess = ELFDATA2LSB
  64             self.elfclass = ELFCLASS64
  65             self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
  66             self.ehdr.e_machine = EM_AARCH
  67
  68         elif arch == 'aarch64-be':
  69             self.endianess = ELFDATA2MSB
  70             self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
  71             self.ehdr.e_machine = EM_AARCH
  72
  73         elif arch == 'X86_64':
  74             self.endianess = ELFDATA2LSB
  75             self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
  76             self.ehdr.e_machine = EM_X86_64
  77
  78         elif arch == '386':
  79             self.endianess = ELFDATA2LSB
  80             self.elfclass = ELFCLASS32
  81             self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
  82             self.ehdr.e_machine = EM_386
  83
  84         elif arch == 's390':
  85             self.endianess = ELFDATA2MSB
  86             self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
  87             self.ehdr.e_machine = EM_S390
  88
  89         elif arch == 'ppc64-le':
  90             self.endianess = ELFDATA2LSB
  91             self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
  92             self.ehdr.e_machine = EM_PPC64
  93
  94         elif arch == 'ppc64-be':
  95             self.endianess = ELFDATA2MSB
  96             self.ehdr = get_arch_ehdr(self.endianess, self.elfclass)
  97             self.ehdr.e_machine = EM_PPC64
  98
  99         else:
 100             raise gdb.GdbError("No valid arch type specified.\n"
 101                                "Currently supported types:\n"
 102                                "aarch64-be, aarch64-le, X86_64, 386, s390, "
 103                                "ppc64-be, ppc64-le")
 104
 105         self.add_segment(PT_NOTE, 0, 0)
 106
 107     def add_note(self, n_name, n_desc, n_type):
 108         """Adds a note to the ELF."""
 109
 110         note = get_arch_note(self.endianess, len(n_name), len(n_desc))
 111         note.n_namesz = len(n_name) + 1
 112         note.n_descsz = len(n_desc)
 113         note.n_name = n_name.encode()
 114         note.n_type = n_type
 115
 116         # Desc needs to be 4 byte aligned (although the 64bit spec
 117         # specifies 8 byte). When defining n_desc as uint32 it will be
 118         # automatically aligned but we need the memmove to copy the
 119         # string into it.
 120         ctypes.memmove(note.n_desc, n_desc.encode(), len(n_desc))
 121
 122         self.notes.append(note)
 123         self.segments[0].p_filesz += ctypes.sizeof(note)
 124         self.segments[0].p_memsz += ctypes.sizeof(note)
 125
 126     def add_segment(self, p_type, p_paddr, p_size):
 127         """Adds a segment to the elf."""
 128
 129         phdr = get_arch_phdr(self.endianess, self.elfclass)
 130         phdr.p_type = p_type
 131         phdr.p_paddr = p_paddr
 132         phdr.p_filesz = p_size
 133         phdr.p_memsz = p_size
 134         self.segments.append(phdr)
 135         self.ehdr.e_phnum += 1
 136
 137     def to_file(self, elf_file):
 138         """Writes all ELF structures to the the passed file.
 139
 140         Structure:
 141         Ehdr
 142         Segment 0:PT_NOTE
 143         Segment 1:PT_LOAD
 144         Segment N:PT_LOAD
 145         Note    0..N
 146         Dump contents
 147         """
 148         elf_file.write(self.ehdr)
 149         off = ctypes.sizeof(self.ehdr) + \
 150               len(self.segments) * ctypes.sizeof(self.segments[0])
 151
 152         for phdr in self.segments:
 153             phdr.p_offset = off
 154             elf_file.write(phdr)
 155             off += phdr.p_filesz
 156
 157         for note in self.notes:
 158             elf_file.write(note)
 159
 160
 161 def get_arch_note(endianess, len_name, len_desc):
 162     """Returns a Note class with the specified endianess."""
 163
 164     if endianess == ELFDATA2LSB:
 165         superclass = ctypes.LittleEndianStructure
 166     else:
 167         superclass = ctypes.BigEndianStructure
 168
 169     len_name = len_name + 1
 170
 171     class Note(superclass):
 172         """Represents an ELF note, includes the content."""
 173
 174         _fields_ = [("n_namesz", ctypes.c_uint32),
 175                     ("n_descsz", ctypes.c_uint32),
 176                     ("n_type", ctypes.c_uint32),
 177                     ("n_name", ctypes.c_char * len_name),
 178                     ("n_desc", ctypes.c_uint32 * ((len_desc + 3) // 4))]
 179     return Note()
 180
 181
 182 class Ident(ctypes.Structure):
 183     """Represents the ELF ident array in the ehdr structure."""
 184
 185     _fields_ = [('ei_mag0', ctypes.c_ubyte),
 186                 ('ei_mag1', ctypes.c_ubyte),
 187                 ('ei_mag2', ctypes.c_ubyte),
 188                 ('ei_mag3', ctypes.c_ubyte),
 189                 ('ei_class', ctypes.c_ubyte),
 190                 ('ei_data', ctypes.c_ubyte),
 191                 ('ei_version', ctypes.c_ubyte),
 192                 ('ei_osabi', ctypes.c_ubyte),
 193                 ('ei_abiversion', ctypes.c_ubyte),
 194                 ('ei_pad', ctypes.c_ubyte * 7)]
 195
 196     def __init__(self, endianess, elfclass):
 197         self.ei_mag0 = 0x7F
 198         self.ei_mag1 = ord('E')
 199         self.ei_mag2 = ord('L')
 200         self.ei_mag3 = ord('F')
 201         self.ei_class = elfclass
 202         self.ei_data = endianess
 203         self.ei_version = EV_CURRENT
 204
 205
 206 def get_arch_ehdr(endianess, elfclass):
 207     """Returns a EHDR64 class with the specified endianess."""
 208
 209     if endianess == ELFDATA2LSB:
 210         superclass = ctypes.LittleEndianStructure
 211     else:
 212         superclass = ctypes.BigEndianStructure
 213
 214     class EHDR64(superclass):
 215         """Represents the 64 bit ELF header struct."""
 216
 217         _fields_ = [('e_ident', Ident),
 218                     ('e_type', ctypes.c_uint16),
 219                     ('e_machine', ctypes.c_uint16),
 220                     ('e_version', ctypes.c_uint32),
 221                     ('e_entry', ctypes.c_uint64),
 222                     ('e_phoff', ctypes.c_uint64),
 223                     ('e_shoff', ctypes.c_uint64),
 224                     ('e_flags', ctypes.c_uint32),
 225                     ('e_ehsize', ctypes.c_uint16),
 226                     ('e_phentsize', ctypes.c_uint16),
 227                     ('e_phnum', ctypes.c_uint16),
 228                     ('e_shentsize', ctypes.c_uint16),
 229                     ('e_shnum', ctypes.c_uint16),
 230                     ('e_shstrndx', ctypes.c_uint16)]
 231
 232         def __init__(self):
 233             super(superclass, self).__init__()
 234             self.e_ident = Ident(endianess, elfclass)
 235             self.e_type = ET_CORE
 236             self.e_version = EV_CURRENT
 237             self.e_ehsize = ctypes.sizeof(self)
 238             self.e_phoff = ctypes.sizeof(self)
 239             self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass))
 240             self.e_phnum = 0
 241
 242
 243     class EHDR32(superclass):
 244         """Represents the 32 bit ELF header struct."""
 245
 246         _fields_ = [('e_ident', Ident),
 247                     ('e_type', ctypes.c_uint16),
 248                     ('e_machine', ctypes.c_uint16),
 249                     ('e_version', ctypes.c_uint32),
 250                     ('e_entry', ctypes.c_uint32),
 251                     ('e_phoff', ctypes.c_uint32),
 252                     ('e_shoff', ctypes.c_uint32),
 253                     ('e_flags', ctypes.c_uint32),
 254                     ('e_ehsize', ctypes.c_uint16),
 255                     ('e_phentsize', ctypes.c_uint16),
 256                     ('e_phnum', ctypes.c_uint16),
 257                     ('e_shentsize', ctypes.c_uint16),
 258                     ('e_shnum', ctypes.c_uint16),
 259                     ('e_shstrndx', ctypes.c_uint16)]
 260
 261         def __init__(self):
 262             super(superclass, self).__init__()
 263             self.e_ident = Ident(endianess, elfclass)
 264             self.e_type = ET_CORE
 265             self.e_version = EV_CURRENT
 266             self.e_ehsize = ctypes.sizeof(self)
 267             self.e_phoff = ctypes.sizeof(self)
 268             self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass))
 269             self.e_phnum = 0
 270
 271     # End get_arch_ehdr
 272     if elfclass == ELFCLASS64:
 273         return EHDR64()
 274     else:
 275         return EHDR32()
 276
 277
 278 def get_arch_phdr(endianess, elfclass):
 279     """Returns a 32 or 64 bit PHDR class with the specified endianess."""
 280
 281     if endianess == ELFDATA2LSB:
 282         superclass = ctypes.LittleEndianStructure
 283     else:
 284         superclass = ctypes.BigEndianStructure
 285
 286     class PHDR64(superclass):
 287         """Represents the 64 bit ELF program header struct."""
 288
 289         _fields_ = [('p_type', ctypes.c_uint32),
 290                     ('p_flags', ctypes.c_uint32),
 291                     ('p_offset', ctypes.c_uint64),
 292                     ('p_vaddr', ctypes.c_uint64),
 293                     ('p_paddr', ctypes.c_uint64),
 294                     ('p_filesz', ctypes.c_uint64),
 295                     ('p_memsz', ctypes.c_uint64),
 296                     ('p_align', ctypes.c_uint64)]
 297
 298     class PHDR32(superclass):
 299         """Represents the 32 bit ELF program header struct."""
 300
 301         _fields_ = [('p_type', ctypes.c_uint32),
 302                     ('p_offset', ctypes.c_uint32),
 303                     ('p_vaddr', ctypes.c_uint32),
 304                     ('p_paddr', ctypes.c_uint32),
 305                     ('p_filesz', ctypes.c_uint32),
 306                     ('p_memsz', ctypes.c_uint32),
 307                     ('p_flags', ctypes.c_uint32),
 308                     ('p_align', ctypes.c_uint32)]
 309
 310     # End get_arch_phdr
 311     if elfclass == ELFCLASS64:
 312         return PHDR64()
 313     else:
 314         return PHDR32()
 315
 316
 317 def int128_get64(val):
 318     """Returns low 64bit part of Int128 struct."""
 319
 320     assert val["hi"] == 0
 321     return val["lo"]
 322
 323
 324 def qlist_foreach(head, field_str):
 325     """Generator for qlists."""
 326
 327     var_p = head["lh_first"]
 328     while var_p != 0:
 329         var = var_p.dereference()
 330         var_p = var[field_str]["le_next"]
 331         yield var
 332
 333
 334 def qemu_get_ram_block(ram_addr):
 335     """Returns the RAMBlock struct to which the given address belongs."""
 336
 337     ram_blocks = gdb.parse_and_eval("ram_list.blocks")
 338
 339     for block in qlist_foreach(ram_blocks, "next"):
 340         if (ram_addr - block["offset"]) < block["used_length"]:
 341             return block
 342
 343     raise gdb.GdbError("Bad ram offset %x" % ram_addr)
 344
 345
 346 def qemu_get_ram_ptr(ram_addr):
 347     """Returns qemu vaddr for given guest physical address."""
 348
 349     block = qemu_get_ram_block(ram_addr)
 350     return block["host"] + (ram_addr - block["offset"])
 351
 352
 353 def memory_region_get_ram_ptr(memory_region):
 354     if memory_region["alias"] != 0:
 355         return (memory_region_get_ram_ptr(memory_region["alias"].dereference())
 356                 + memory_region["alias_offset"])
 357
 358     return qemu_get_ram_ptr(memory_region["ram_addr"] & TARGET_PAGE_MASK)
 359
 360
 361 def get_guest_phys_blocks():
 362     """Returns a list of ram blocks.
 363
 364     Each block entry contains:
 365     'target_start': guest block phys start address
 366     'target_end':   guest block phys end address
 367     'host_addr':    qemu vaddr of the block's start
 368     """
 369
 370     guest_phys_blocks = []
 371
 372     print("guest RAM blocks:")
 373     print("target_start     target_end       host_addr        message "
 374           "count")
 375     print("---------------- ---------------- ---------------- ------- "
 376           "-----")
 377
 378     current_map_p = gdb.parse_and_eval("address_space_memory.current_map")
 379     current_map = current_map_p.dereference()
 380
 381     # Conversion to int is needed for python 3
 382     # compatibility. Otherwise range doesn't cast the value itself and
 383     # breaks.
 384     for cur in range(int(current_map["nr"])):
 385         flat_range = (current_map["ranges"] + cur).dereference()
 386         memory_region = flat_range["mr"].dereference()
 387
 388         # we only care about RAM
 389         if not memory_region["ram"]:
 390             continue
 391
 392         section_size = int128_get64(flat_range["addr"]["size"])
 393         target_start = int128_get64(flat_range["addr"]["start"])
 394         target_end = target_start + section_size
 395         host_addr = (memory_region_get_ram_ptr(memory_region)
 396                      + flat_range["offset_in_region"])
 397         predecessor = None
 398
 399         # find continuity in guest physical address space
 400         if len(guest_phys_blocks) > 0:
 401             predecessor = guest_phys_blocks[-1]
 402             predecessor_size = (predecessor["target_end"] -
 403                                 predecessor["target_start"])
 404
 405             # the memory API guarantees monotonically increasing
 406             # traversal
 407             assert predecessor["target_end"] <= target_start
 408
 409             # we want continuity in both guest-physical and
 410             # host-virtual memory
 411             if (predecessor["target_end"] < target_start or
 412                 predecessor["host_addr"] + predecessor_size != host_addr):
 413                 predecessor = None
 414
 415         if predecessor is None:
 416             # isolated mapping, add it to the list
 417             guest_phys_blocks.append({"target_start": target_start,
 418                                       "target_end":   target_end,
 419                                       "host_addr":    host_addr})
 420             message = "added"
 421         else:
 422             # expand predecessor until @target_end; predecessor's
 423             # start doesn't change
 424             predecessor["target_end"] = target_end
 425             message = "joined"
 426
 427         print("%016x %016x %016x %-7s %5u" %
 428               (target_start, target_end, host_addr.cast(UINTPTR_T),
 429                message, len(guest_phys_blocks)))
 430
 431     return guest_phys_blocks
 432
 433
 434 class DumpGuestMemory(gdb.Command):
 435     """Extract guest vmcore from qemu process coredump.
 436
 437 The two required arguments are FILE and ARCH:
 438 FILE identifies the target file to write the guest vmcore to.
 439 ARCH specifies the architecture for which the core will be generated.
 440
 441 This GDB command reimplements the dump-guest-memory QMP command in
 442 python, using the representation of guest memory as captured in the qemu
 443 coredump. The qemu process that has been dumped must have had the
 444 command line option "-machine dump-guest-core=on" which is the default.
 445
 446 For simplicity, the "paging", "begin" and "end" parameters of the QMP
 447 command are not supported -- no attempt is made to get the guest's
 448 internal paging structures (ie. paging=false is hard-wired), and guest
 449 memory is always fully dumped.
 450
 451 Currently aarch64-be, aarch64-le, X86_64, 386, s390, ppc64-be,
 452 ppc64-le guests are supported.
 453
 454 The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are
 455 not written to the vmcore. Preparing these would require context that is
 456 only present in the KVM host kernel module when the guest is alive. A
 457 fake ELF note is written instead, only to keep the ELF parser of "crash"
 458 happy.
 459
 460 Dependent on how busted the qemu process was at the time of the
 461 coredump, this command might produce unpredictable results. If qemu
 462 deliberately called abort(), or it was dumped in response to a signal at
 463 a halfway fortunate point, then its coredump should be in reasonable
 464 shape and this command should mostly work."""
 465
 466     def __init__(self):
 467         super(DumpGuestMemory, self).__init__("dump-guest-memory",
 468                                               gdb.COMMAND_DATA,
 469                                               gdb.COMPLETE_FILENAME)
 470         self.elf = None
 471         self.guest_phys_blocks = None
 472
 473     def dump_init(self, vmcore):
 474         """Prepares and writes ELF structures to core file."""
 475
 476         # Needed to make crash happy, data for more useful notes is
 477         # not available in a qemu core.
 478         self.elf.add_note("NONE", "EMPTY", 0)
 479
 480         # We should never reach PN_XNUM for paging=false dumps,
 481         # there's just a handful of discontiguous ranges after
 482         # merging.
 483         # The constant is needed to account for the PT_NOTE segment.
 484         phdr_num = len(self.guest_phys_blocks) + 1
 485         assert phdr_num < PN_XNUM
 486
 487         for block in self.guest_phys_blocks:
 488             block_size = block["target_end"] - block["target_start"]
 489             self.elf.add_segment(PT_LOAD, block["target_start"], block_size)
 490
 491         self.elf.to_file(vmcore)
 492
 493     def dump_iterate(self, vmcore):
 494         """Writes guest core to file."""
 495
 496         qemu_core = gdb.inferiors()[0]
 497         for block in self.guest_phys_blocks:
 498             cur = block["host_addr"]
 499             left = block["target_end"] - block["target_start"]
 500             print("dumping range at %016x for length %016x" %
 501                   (cur.cast(UINTPTR_T), left))
 502
 503             while left > 0:
 504                 chunk_size = min(TARGET_PAGE_SIZE, left)
 505                 chunk = qemu_core.read_memory(cur, chunk_size)
 506                 vmcore.write(chunk)
 507                 cur += chunk_size
 508                 left -= chunk_size
 509
 510     def invoke(self, args, from_tty):
 511         """Handles command invocation from gdb."""
 512
 513         # Unwittingly pressing the Enter key after the command should
 514         # not dump the same multi-gig coredump to the same file.
 515         self.dont_repeat()
 516
 517         argv = gdb.string_to_argv(args)
 518         if len(argv) != 2:
 519             raise gdb.GdbError("usage: dump-guest-memory FILE ARCH")
 520
 521         self.elf = ELF(argv[1])
 522         self.guest_phys_blocks = get_guest_phys_blocks()
 523
 524         with open(argv[0], "wb") as vmcore:
 525             self.dump_init(vmcore)
 526             self.dump_iterate(vmcore)
 527
 528 DumpGuestMemory()