From ea40f40e1b024598fb1dbd56211c2f24cb703df2 Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Thu, 29 Mar 2018 16:28:20 +0000 Subject: [PATCH] [PDB] Add an explain subcommand. When investigating various things, we often have a file offset and what to know what's in the PDB at that address. For example we may be doing a binary comparison of two LLD-generated PDBs to look for sources of non-determinism, or we may wish to compare an LLD-generated PDB with a Microsoft generated PDB for sources of byte-for-byte incompatibility. In these cases, we can do a binary diff of the two files, and once we find a mismatched byte we can use explain to figure out what that byte is, immediately honining in on the problem. This patch implements this by trying to narrow the meaning of a particular file offset down as much as possible. Differential Revision: https://reviews.llvm.org/D44959 llvm-svn: 328799 --- llvm/test/tools/llvm-pdbdump/explain.test | 83 ++++++++++ llvm/tools/llvm-pdbutil/CMakeLists.txt | 1 + llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp | 200 +++++++++++++++++++++++++ llvm/tools/llvm-pdbutil/ExplainOutputStyle.h | 59 ++++++++ llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp | 24 +++ llvm/tools/llvm-pdbutil/llvm-pdbutil.h | 4 + 6 files changed, 371 insertions(+) create mode 100644 llvm/test/tools/llvm-pdbdump/explain.test create mode 100644 llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp create mode 100644 llvm/tools/llvm-pdbutil/ExplainOutputStyle.h diff --git a/llvm/test/tools/llvm-pdbdump/explain.test b/llvm/test/tools/llvm-pdbdump/explain.test new file mode 100644 index 0000000..9926eb0 --- /dev/null +++ b/llvm/test/tools/llvm-pdbdump/explain.test @@ -0,0 +1,83 @@ +; RUN: llvm-pdbutil explain -offset=0 %p/Inputs/InjectedSource.pdb \ +; RUN: | FileCheck --check-prefix=ZERO %s +; RUN: llvm-pdbutil explain -offset=40 %p/Inputs/InjectedSource.pdb \ +; RUN: | FileCheck --check-prefix=FORTY %s +; RUN: llvm-pdbutil explain -offset=60 %p/Inputs/InjectedSource.pdb \ +; RUN: | FileCheck --check-prefix=SIXTY %s + +; RUN: llvm-pdbutil explain -offset=0x1000 %p/Inputs/InjectedSource.pdb \ +; RUN: | FileCheck --check-prefix=FPM1 %s +; RUN: llvm-pdbutil explain -offset=0x1100 %p/Inputs/InjectedSource.pdb \ +; RUN: | FileCheck --check-prefix=EXTRANEOUSFPM %s +; RUN: llvm-pdbutil explain -offset=0x2000 %p/Inputs/InjectedSource.pdb \ +; RUN: | FileCheck --check-prefix=FPM2 %s + +; RUN: llvm-pdbutil explain -offset=0x3000 %p/Inputs/InjectedSource.pdb \ +; RUN: | FileCheck --check-prefix=UNALLOCATED %s + +; RUN: llvm-pdbutil explain -offset=0x7000 %p/Inputs/InjectedSource.pdb \ +; RUN: | FileCheck --check-prefix=STREAM %s + +; RUN: llvm-pdbutil explain -offset=0x1A000 %p/Inputs/InjectedSource.pdb \ +; RUN: | FileCheck --check-prefix=STREAMDIR %s + +; RUN: llvm-pdbutil explain -offset=0x1B000 %p/Inputs/InjectedSource.pdb \ +; RUN: | FileCheck --check-prefix=DIRBLOCKLIST %s + +; RUN: llvm-pdbutil explain -offset=0x1D000 %p/Inputs/InjectedSource.pdb \ +; RUN: | FileCheck --check-prefix=INVALIDFILEOFFSET %s + +; RUN: llvm-pdbutil explain -offset=0xA100 %p/Inputs/InjectedSource.pdb \ +; RUN: | FileCheck --check-prefix=UNUSED %s + + +ZERO: Block:Offset = 0:0000. +ZERO-NEXT: Address is in block 0 (allocated). +ZERO-NEXT: This corresponds to offset 0 of MSF super block, +ZERO-NEXT: which is part of the MSF file magic. + +FORTY: Block:Offset = 0:0028. +FORTY-NEXT: Address is in block 0 (allocated). +FORTY-NEXT: This corresponds to offset 40 of MSF super block, +FORTY-NEXT: which contains the number of bytes in the stream directory. + +SIXTY: Block:Offset = 0:003C. +SIXTY-NEXT: Address is in block 0 (allocated). +SIXTY-NEXT: This corresponds to offset 60 of MSF super block, +SIXTY-NEXT: which is outside the range of valid data for the super block. + +FPM1: Block:Offset = 1:0000. +FPM1-NEXT: Address is in block 1 (allocated). +FPM1-NEXT: Address is in FPM1 (Alt FPM) +FPM1-NEXT: Address describes the allocation status of blocks [0,8) + +EXTRANEOUSFPM: Block:Offset = 1:0100. +EXTRANEOUSFPM-NEXT: Address is in block 1 (allocated). +EXTRANEOUSFPM-NEXT: Address is in FPM1 (Alt FPM) +EXTRANEOUSFPM-NEXT: Address is in extraneous FPM space. + +FPM2: Block:Offset = 2:0000. +FPM2-NEXT: Address is in block 2 (allocated). +FPM2-NEXT: Address is in FPM2 (Main FPM) +FPM2-NEXT: Address describes the allocation status of blocks [0,8) + +UNALLOCATED: Block:Offset = 3:0000. +UNALLOCATED-NEXT: Address is in block 3 (unallocated). + +STREAM: Block:Offset = 7:0000. +STREAM-NEXT: Address is in block 7 (allocated). +STREAM-NEXT: Address is at offset 0/684 of Stream 12 (Module "* Linker *"). + +STREAMDIR: Block:Offset = 1A:0000. +STREAMDIR-NEXT: Address is in block 26 (allocated). +STREAMDIR-NEXT: Address is at offset 0/156 of Stream Directory. + +DIRBLOCKLIST: Block:Offset = 1B:0000. +DIRBLOCKLIST-NEXT: Address is in block 27 (allocated). +DIRBLOCKLIST-NEXT: Address is at offset 0 of the directory block list + +INVALIDFILEOFFSET: Address 118784 is not in the file (file size = 118784). + +UNUSED: Block:Offset = A:0100. +UNUSED-NEXT: Address is in block 10 (allocated). +UNUSED-NEXT: Address is at offset 256/120 of Stream 11 (Section Header Data) in unused space. diff --git a/llvm/tools/llvm-pdbutil/CMakeLists.txt b/llvm/tools/llvm-pdbutil/CMakeLists.txt index 82f0088..1ccbfdf 100644 --- a/llvm/tools/llvm-pdbutil/CMakeLists.txt +++ b/llvm/tools/llvm-pdbutil/CMakeLists.txt @@ -12,6 +12,7 @@ add_llvm_tool(llvm-pdbutil Analyze.cpp BytesOutputStyle.cpp DumpOutputStyle.cpp + ExplainOutputStyle.cpp InputFile.cpp llvm-pdbutil.cpp FormatUtil.cpp diff --git a/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp new file mode 100644 index 0000000..1947c93 --- /dev/null +++ b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp @@ -0,0 +1,200 @@ +//===- ExplainOutputStyle.cpp --------------------------------- *- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ExplainOutputStyle.h" + +#include "FormatUtil.h" +#include "StreamUtil.h" +#include "llvm-pdbutil.h" + +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::msf; +using namespace llvm::pdb; + +ExplainOutputStyle::ExplainOutputStyle(PDBFile &File, uint64_t FileOffset) + : File(File), FileOffset(FileOffset), + BlockIndex(FileOffset / File.getBlockSize()), + OffsetInBlock(FileOffset - BlockIndex * File.getBlockSize()), + P(2, false, outs()) {} + +Error ExplainOutputStyle::dump() { + P.formatLine("Explaining file offset {0} of file '{1}'.", FileOffset, + File.getFilePath()); + + bool IsAllocated = explainBlockStatus(); + if (!IsAllocated) + return Error::success(); + + AutoIndent Indent(P); + if (isSuperBlock()) + explainSuperBlockOffset(); + else if (isFpmBlock()) + explainFpmBlockOffset(); + else if (isBlockMapBlock()) + explainBlockMapOffset(); + else if (isStreamDirectoryBlock()) + explainStreamDirectoryOffset(); + else if (auto Index = getBlockStreamIndex()) + explainStreamOffset(*Index); + else + explainUnknownBlock(); + return Error::success(); +} + +bool ExplainOutputStyle::isSuperBlock() const { return BlockIndex == 0; } + +bool ExplainOutputStyle::isFpm1() const { + return ((BlockIndex - 1) % File.getBlockSize() == 0); +} +bool ExplainOutputStyle::isFpm2() const { + return ((BlockIndex - 2) % File.getBlockSize() == 0); +} + +bool ExplainOutputStyle::isFpmBlock() const { return isFpm1() || isFpm2(); } + +bool ExplainOutputStyle::isBlockMapBlock() const { + return BlockIndex == File.getBlockMapIndex(); +} + +bool ExplainOutputStyle::isStreamDirectoryBlock() const { + const auto &Layout = File.getMsfLayout(); + return llvm::is_contained(Layout.DirectoryBlocks, BlockIndex); +} + +Optional ExplainOutputStyle::getBlockStreamIndex() const { + const auto &Layout = File.getMsfLayout(); + for (const auto &Entry : enumerate(Layout.StreamMap)) { + if (!llvm::is_contained(Entry.value(), BlockIndex)) + continue; + return Entry.index(); + } + return None; +} + +bool ExplainOutputStyle::explainBlockStatus() { + if (FileOffset >= File.getFileSize()) { + P.formatLine("Address {0} is not in the file (file size = {1}).", + FileOffset, File.getFileSize()); + return false; + } + P.formatLine("Block:Offset = {2:X-}:{1:X-4}.", FileOffset, OffsetInBlock, + BlockIndex); + + bool IsFree = File.getMsfLayout().FreePageMap[BlockIndex]; + P.formatLine("Address is in block {0} ({1}allocated).", BlockIndex, + IsFree ? "un" : ""); + return !IsFree; +} + +void ExplainOutputStyle::explainSuperBlockOffset() { + P.formatLine("This corresponds to offset {0} of MSF super block, ", + OffsetInBlock); + if (OffsetInBlock < sizeof(msf::Magic)) + P.printLine("which is part of the MSF file magic."); + else if (OffsetInBlock < offsetof(SuperBlock, BlockSize)) + P.printLine("which contains the block size of the file."); + else if (OffsetInBlock < offsetof(SuperBlock, FreeBlockMapBlock)) + P.printLine("which contains the index of the FPM block (e.g. 1 or 2)."); + else if (OffsetInBlock < offsetof(SuperBlock, NumBlocks)) + P.printLine("which contains the number of blocks in the file."); + else if (OffsetInBlock < offsetof(SuperBlock, NumDirectoryBytes)) + P.printLine("which contains the number of bytes in the stream directory."); + else if (OffsetInBlock < offsetof(SuperBlock, Unknown1)) + P.printLine("whose purpose is unknown."); + else if (OffsetInBlock < offsetof(SuperBlock, BlockMapAddr)) + P.printLine("which contains the file offset of the block map."); + else { + assert(OffsetInBlock > sizeof(SuperBlock)); + P.printLine( + "which is outside the range of valid data for the super block."); + } +} + +void ExplainOutputStyle::explainFpmBlockOffset() { + const MSFLayout &Layout = File.getMsfLayout(); + uint32_t MainFpm = Layout.mainFpmBlock(); + uint32_t AltFpm = Layout.alternateFpmBlock(); + + assert(isFpmBlock()); + uint32_t Fpm = isFpm1() ? 1 : 2; + uint32_t FpmChunk = BlockIndex / File.getBlockSize(); + assert((Fpm == MainFpm) || (Fpm == AltFpm)); + (void)AltFpm; + bool IsMain = (Fpm == MainFpm); + P.formatLine("Address is in FPM{0} ({1} FPM)", Fpm, IsMain ? "Main" : "Alt"); + uint32_t DescribedBlockStart = + 8 * (FpmChunk * File.getBlockSize() + OffsetInBlock); + if (DescribedBlockStart > File.getBlockCount()) { + P.printLine("Address is in extraneous FPM space."); + return; + } + + P.formatLine("Address describes the allocation status of blocks [{0},{1})", + DescribedBlockStart, DescribedBlockStart + 8); +} + +static bool offsetIsInBlock(const PDBFile &File, uint64_t Offset, + uint32_t Block) { + uint64_t BlockOffset = uint64_t(Block) * File.getBlockSize(); + uint64_t BlockOffset1 = BlockOffset + File.getBlockSize(); + return (Offset >= BlockOffset && Offset < BlockOffset1); +} + +void ExplainOutputStyle::explainBlockMapOffset() { + assert(offsetIsInBlock(File, FileOffset, File.getBlockMapIndex())); + uint64_t BlockMapOffset = File.getBlockMapOffset(); + uint32_t OffsetInBlock = FileOffset - BlockMapOffset; + P.formatLine("Address is at offset {0} of the directory block list", + OffsetInBlock); +} + +static uint32_t getOffsetInStream(ArrayRef StreamBlocks, + uint64_t FileOffset, uint32_t BlockSize) { + uint32_t BlockIndex = FileOffset / BlockSize; + uint32_t OffsetInBlock = FileOffset - BlockIndex * BlockSize; + + auto Iter = llvm::find(StreamBlocks, BlockIndex); + assert(Iter != StreamBlocks.end()); + uint32_t StreamBlockIndex = std::distance(StreamBlocks.begin(), Iter); + return StreamBlockIndex * BlockSize + OffsetInBlock; +} + +void ExplainOutputStyle::explainStreamOffset(uint32_t Stream) { + SmallVector Streams; + discoverStreamPurposes(File, Streams); + + assert(Stream <= Streams.size()); + const StreamInfo &S = Streams[Stream]; + const auto &Layout = File.getStreamLayout(Stream); + uint32_t StreamOff = + getOffsetInStream(Layout.Blocks, FileOffset, File.getBlockSize()); + P.formatLine("Address is at offset {0}/{1} of Stream {2} ({3}){4}.", + StreamOff, Layout.Length, Stream, S.getLongName(), + (StreamOff > Layout.Length) ? " in unused space" : ""); +} + +void ExplainOutputStyle::explainStreamDirectoryOffset() { + auto DirectoryBlocks = File.getDirectoryBlockArray(); + const auto &Layout = File.getMsfLayout(); + uint32_t StreamOff = + getOffsetInStream(DirectoryBlocks, FileOffset, File.getBlockSize()); + P.formatLine("Address is at offset {0}/{1} of Stream Directory{2}.", + StreamOff, uint32_t(Layout.SB->NumDirectoryBytes), + uint32_t(StreamOff > Layout.SB->NumDirectoryBytes) + ? " in unused space" + : ""); +} + +void ExplainOutputStyle::explainUnknownBlock() { + P.formatLine("Address has unknown purpose."); +} diff --git a/llvm/tools/llvm-pdbutil/ExplainOutputStyle.h b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.h new file mode 100644 index 0000000..386f615 --- /dev/null +++ b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.h @@ -0,0 +1,59 @@ +//===- ExplainOutputStyle.h ----------------------------------- *- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVMPDBDUMP_EXPLAINOUTPUTSTYLE_H +#define LLVM_TOOLS_LLVMPDBDUMP_EXPLAINOUTPUTSTYLE_H + +#include "LinePrinter.h" +#include "OutputStyle.h" + +#include + +namespace llvm { + +namespace pdb { + +class PDBFile; + +class ExplainOutputStyle : public OutputStyle { + +public: + ExplainOutputStyle(PDBFile &File, uint64_t FileOffset); + + Error dump() override; + +private: + bool explainBlockStatus(); + + bool isFpm1() const; + bool isFpm2() const; + + bool isSuperBlock() const; + bool isFpmBlock() const; + bool isBlockMapBlock() const; + bool isStreamDirectoryBlock() const; + Optional getBlockStreamIndex() const; + + void explainSuperBlockOffset(); + void explainFpmBlockOffset(); + void explainBlockMapOffset(); + void explainStreamDirectoryOffset(); + void explainStreamOffset(uint32_t Stream); + void explainUnknownBlock(); + + PDBFile &File; + const uint64_t FileOffset; + const uint64_t BlockIndex; + const uint64_t OffsetInBlock; + LinePrinter P; +}; +} // namespace pdb +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp index 89d03d5..f472bfa 100644 --- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp +++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp @@ -16,6 +16,7 @@ #include "Analyze.h" #include "BytesOutputStyle.h" #include "DumpOutputStyle.h" +#include "ExplainOutputStyle.h" #include "InputFile.h" #include "LinePrinter.h" #include "OutputStyle.h" @@ -111,6 +112,9 @@ cl::SubCommand cl::SubCommand MergeSubcommand("merge", "Merge multiple PDBs into a single PDB"); +cl::SubCommand ExplainSubcommand("explain", + "Explain the meaning of a file offset"); + cl::OptionCategory TypeCategory("Symbol Type Options"); cl::OptionCategory FilterCategory("Filtering and Sorting Options"); cl::OptionCategory OtherOptions("Other Options"); @@ -605,6 +609,16 @@ cl::opt PdbOutputFile("pdb", cl::desc("the name of the PDB file to write"), cl::sub(MergeSubcommand)); } + +namespace explain { +cl::list InputFilename(cl::Positional, + cl::desc(""), cl::Required, + cl::sub(ExplainSubcommand)); + +cl::opt Offset("offset", cl::desc("The file offset to explain"), + cl::sub(ExplainSubcommand), cl::Required, + cl::OneOrMore); +} // namespace explain } static ExitOnError ExitOnErr; @@ -1074,6 +1088,14 @@ static void mergePdbs() { ExitOnErr(Builder.commit(OutFile)); } +static void explain() { + std::unique_ptr Session; + PDBFile &File = loadPDB(opts::explain::InputFilename.front(), Session); + auto O = llvm::make_unique(File, opts::explain::Offset); + + ExitOnErr(O->dump()); +} + static bool parseRange(StringRef Str, Optional &Parsed) { if (Str.empty()) @@ -1248,6 +1270,8 @@ int main(int argc_, const char *argv_[]) { exit(1); } mergePdbs(); + } else if (opts::ExplainSubcommand) { + explain(); } outs().flush(); diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.h b/llvm/tools/llvm-pdbutil/llvm-pdbutil.h index 93d34a1..6765b93 100644 --- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.h +++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.h @@ -189,6 +189,10 @@ extern llvm::cl::list DumpModuleSubsections; extern llvm::cl::opt DumpModuleSyms; } // namespace pdb2yaml +namespace explain { +extern llvm::cl::list InputFilename; +extern llvm::cl::opt Offset; +} // namespace explain } #endif -- 2.7.4