From f5c59654f7c8438294ecb6ac0b8613782e0d3956 Mon Sep 17 00:00:00 2001 From: Zachary Turner Date: Tue, 3 May 2016 00:28:21 +0000 Subject: [PATCH] Parse the TPI (type information) stream of PDB files. This parses the TPI stream (stream 2) from the PDB file. This stream contains some header information followed by a series of codeview records. There is some additional complexity here in that alongside this stream of codeview records is a serialized hash table in order to efficiently query the types. We parse the necessary bookkeeping information to allow us to reconstruct the hash table, but we do not actually construct it yet as there are still a few things that need to be understood first. Differential Revision: http://reviews.llvm.org/D19840 Reviewed By: ruiu, rnk llvm-svn: 268343 --- llvm/include/llvm/DebugInfo/PDB/Raw/ByteStream.h | 4 + .../llvm/DebugInfo/PDB/Raw/MappedBlockStream.h | 3 + llvm/include/llvm/DebugInfo/PDB/Raw/PDBFile.h | 3 + llvm/include/llvm/DebugInfo/PDB/Raw/RawConstants.h | 8 ++ .../llvm/DebugInfo/PDB/Raw/StreamInterface.h | 4 + llvm/include/llvm/DebugInfo/PDB/Raw/StreamReader.h | 2 + llvm/include/llvm/DebugInfo/PDB/Raw/TpiStream.h | 62 +++++++++ llvm/lib/DebugInfo/PDB/CMakeLists.txt | 3 +- llvm/lib/DebugInfo/PDB/Raw/ByteStream.cpp | 9 ++ llvm/lib/DebugInfo/PDB/Raw/MappedBlockStream.cpp | 6 + llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp | 15 ++- llvm/lib/DebugInfo/PDB/Raw/StreamReader.cpp | 8 ++ llvm/lib/DebugInfo/PDB/Raw/TpiStream.cpp | 143 +++++++++++++++++++++ llvm/test/DebugInfo/PDB/pdbdump-headers.test | 9 ++ llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp | 34 +++++ 15 files changed, 308 insertions(+), 5 deletions(-) create mode 100644 llvm/include/llvm/DebugInfo/PDB/Raw/TpiStream.h create mode 100644 llvm/lib/DebugInfo/PDB/Raw/TpiStream.cpp diff --git a/llvm/include/llvm/DebugInfo/PDB/Raw/ByteStream.h b/llvm/include/llvm/DebugInfo/PDB/Raw/ByteStream.h index e9209f9..86e09833 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Raw/ByteStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Raw/ByteStream.h @@ -35,6 +35,10 @@ public: std::error_code readBytes(uint32_t Offset, MutableArrayRef Buffer) const override; + + std::error_code getArrayRef(uint32_t Offset, ArrayRef &Buffer, + uint32_t Length) const override; + uint32_t getLength() const override; ArrayRef data() const { return Data; } diff --git a/llvm/include/llvm/DebugInfo/PDB/Raw/MappedBlockStream.h b/llvm/include/llvm/DebugInfo/PDB/Raw/MappedBlockStream.h index 8b4aedc..1022a8b 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Raw/MappedBlockStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Raw/MappedBlockStream.h @@ -27,6 +27,9 @@ public: std::error_code readBytes(uint32_t Offset, MutableArrayRef Buffer) const override; + std::error_code getArrayRef(uint32_t Offset, ArrayRef &Buffer, + uint32_t Length) const override; + uint32_t getLength() const override { return StreamLength; } private: diff --git a/llvm/include/llvm/DebugInfo/PDB/Raw/PDBFile.h b/llvm/include/llvm/DebugInfo/PDB/Raw/PDBFile.h index d16bb16..9bf165e 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Raw/PDBFile.h +++ b/llvm/include/llvm/DebugInfo/PDB/Raw/PDBFile.h @@ -23,6 +23,7 @@ namespace pdb { struct PDBFileContext; class DbiStream; class InfoStream; +class TpiStream; class PDBFile { public: @@ -59,11 +60,13 @@ public: InfoStream &getPDBInfoStream(); DbiStream &getPDBDbiStream(); + TpiStream &getPDBTpiStream(); private: std::unique_ptr Context; std::unique_ptr Info; std::unique_ptr Dbi; + std::unique_ptr Tpi; }; } } diff --git a/llvm/include/llvm/DebugInfo/PDB/Raw/RawConstants.h b/llvm/include/llvm/DebugInfo/PDB/Raw/RawConstants.h index 5dd0407..0b6d9de 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Raw/RawConstants.h +++ b/llvm/include/llvm/DebugInfo/PDB/Raw/RawConstants.h @@ -35,6 +35,14 @@ enum PdbRaw_DbiVer : uint32_t { PdbDbiV110 = 20091201 }; +enum PdbRaw_TpiVer : uint32_t { + PdbTpiV40 = 19950410, + PdbTpiV41 = 19951122, + PdbTpiV50 = 19961031, + PdbTpiV70 = 19990903, + PdbTpiV80 = 20040203, +}; + enum SpecialStream : uint32_t { StreamPDB = 1, StreamTPI = 2, diff --git a/llvm/include/llvm/DebugInfo/PDB/Raw/StreamInterface.h b/llvm/include/llvm/DebugInfo/PDB/Raw/StreamInterface.h index 4e698f0..f2bc1b2 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Raw/StreamInterface.h +++ b/llvm/include/llvm/DebugInfo/PDB/Raw/StreamInterface.h @@ -23,6 +23,10 @@ public: virtual std::error_code readBytes(uint32_t Offset, MutableArrayRef Buffer) const = 0; + virtual std::error_code getArrayRef(uint32_t Offset, + ArrayRef &Buffer, + uint32_t Length) const = 0; + virtual uint32_t getLength() const = 0; }; } diff --git a/llvm/include/llvm/DebugInfo/PDB/Raw/StreamReader.h b/llvm/include/llvm/DebugInfo/PDB/Raw/StreamReader.h index 5a524ff..8f43b18 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Raw/StreamReader.h +++ b/llvm/include/llvm/DebugInfo/PDB/Raw/StreamReader.h @@ -39,6 +39,8 @@ public: return readBytes(Casted); } + std::error_code getArrayRef(ArrayRef &Array, uint32_t Length); + void setOffset(uint32_t Off) { Offset = Off; } uint32_t getOffset() const { return Offset; } uint32_t getLength() const { return Stream.getLength(); } diff --git a/llvm/include/llvm/DebugInfo/PDB/Raw/TpiStream.h b/llvm/include/llvm/DebugInfo/PDB/Raw/TpiStream.h new file mode 100644 index 0000000..f40ef37 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/PDB/Raw/TpiStream.h @@ -0,0 +1,62 @@ +//===- TpiStream.cpp - PDB Type Info (TPI) Stream 2 Access ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAM_H +#define LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAM_H + +#include "llvm/DebugInfo/PDB/PDBTypes.h" +#include "llvm/DebugInfo/PDB/Raw/ByteStream.h" +#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Raw/RawConstants.h" + +namespace llvm { +namespace pdb { +class PDBFile; + +typedef uint32_t (*HashFunctionType)(uint8_t *, uint32_t); + +class TpiStream { + struct HeaderInfo; + +public: + struct HashedTypeRecord { + uint32_t Hash; + codeview::TypeLeafKind Kind; + ArrayRef Record; + }; + + TpiStream(PDBFile &File); + ~TpiStream(); + std::error_code reload(); + + PdbRaw_TpiVer getTpiVersion() const; + + uint32_t TypeIndexBegin() const; + uint32_t TypeIndexEnd() const; + uint32_t NumTypeRecords() const; + + ArrayRef records() const; + +private: + PDBFile &Pdb; + MappedBlockStream Stream; + HashFunctionType HashFunction; + + ByteStream RecordsBuffer; + ByteStream TypeIndexOffsetBuffer; + ByteStream HashValuesBuffer; + ByteStream HashAdjBuffer; + + std::vector TypeRecords; + std::unique_ptr Header; +}; +} +} + +#endif diff --git a/llvm/lib/DebugInfo/PDB/CMakeLists.txt b/llvm/lib/DebugInfo/PDB/CMakeLists.txt index 2fa74b9..b11fb306 100644 --- a/llvm/lib/DebugInfo/PDB/CMakeLists.txt +++ b/llvm/lib/DebugInfo/PDB/CMakeLists.txt @@ -36,7 +36,8 @@ add_pdb_impl_folder(Raw Raw/NameHashTable.cpp Raw/NameMap.cpp Raw/RawSession.cpp - Raw/StreamReader.cpp) + Raw/StreamReader.cpp + Raw/TpiStream.cpp) list(APPEND LIBPDB_ADDITIONAL_HEADER_DIRS "${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/PDB") diff --git a/llvm/lib/DebugInfo/PDB/Raw/ByteStream.cpp b/llvm/lib/DebugInfo/PDB/Raw/ByteStream.cpp index 89477ead..0826b21 100644 --- a/llvm/lib/DebugInfo/PDB/Raw/ByteStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Raw/ByteStream.cpp @@ -58,6 +58,15 @@ std::error_code ByteStream::readBytes(uint32_t Offset, return std::error_code(); } +std::error_code ByteStream::getArrayRef(uint32_t Offset, + ArrayRef &Buffer, + uint32_t Length) const { + if (Data.size() < Length + Offset) + return std::make_error_code(std::errc::bad_address); + Buffer = Data.slice(Offset, Length); + return std::error_code(); +} + uint32_t ByteStream::getLength() const { return Data.size(); } StringRef ByteStream::str() const { diff --git a/llvm/lib/DebugInfo/PDB/Raw/MappedBlockStream.cpp b/llvm/lib/DebugInfo/PDB/Raw/MappedBlockStream.cpp index 860f763..25c28e5 100644 --- a/llvm/lib/DebugInfo/PDB/Raw/MappedBlockStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Raw/MappedBlockStream.cpp @@ -51,3 +51,9 @@ MappedBlockStream::readBytes(uint32_t Offset, return std::error_code(); } + +std::error_code MappedBlockStream::getArrayRef(uint32_t Offset, + ArrayRef &Buffer, + uint32_t Length) const { + return std::make_error_code(std::errc::operation_not_supported); +} diff --git a/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp b/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp index df47ced..05b3dc7 100644 --- a/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp +++ b/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/DebugInfo/PDB/Raw/DbiStream.h" #include "llvm/DebugInfo/PDB/Raw/InfoStream.h" +#include "llvm/DebugInfo/PDB/Raw/TpiStream.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MemoryBuffer.h" @@ -119,6 +120,8 @@ StringRef PDBFile::getBlockData(uint32_t BlockIndex, uint32_t NumBytes) const { std::error_code PDBFile::parseFileHeaders() { std::error_code EC; MemoryBufferRef BufferRef = *Context->Buffer; + // Make sure the file is sufficiently large to hold a super block. + // Do this before attempting to read the super block. if (BufferRef.getBufferSize() < sizeof(SuperBlock)) return std::make_error_code(std::errc::illegal_byte_sequence); @@ -135,10 +138,6 @@ std::error_code PDBFile::parseFileHeaders() { if (BufferRef.getBufferSize() % SB->BlockSize != 0) return std::make_error_code(std::errc::illegal_byte_sequence); - // Make sure the file is sufficiently large to hold a super block. - if (BufferRef.getBufferSize() < sizeof(SuperBlock)) - return std::make_error_code(std::errc::illegal_byte_sequence); - // Check the magic bytes. if (memcmp(SB->MagicBytes, Magic, sizeof(Magic)) != 0) return std::make_error_code(std::errc::illegal_byte_sequence); @@ -271,3 +270,11 @@ DbiStream &PDBFile::getPDBDbiStream() { } return *Dbi; } + +TpiStream &PDBFile::getPDBTpiStream() { + if (!Tpi) { + Tpi.reset(new TpiStream(*this)); + Tpi->reload(); + } + return *Tpi; +} diff --git a/llvm/lib/DebugInfo/PDB/Raw/StreamReader.cpp b/llvm/lib/DebugInfo/PDB/Raw/StreamReader.cpp index 42fe452..18d2995 100644 --- a/llvm/lib/DebugInfo/PDB/Raw/StreamReader.cpp +++ b/llvm/lib/DebugInfo/PDB/Raw/StreamReader.cpp @@ -39,3 +39,11 @@ std::error_code StreamReader::readZeroString(std::string &Dest) { } while (C != '\0'); return std::error_code(); } + +std::error_code StreamReader::getArrayRef(ArrayRef &Array, + uint32_t Length) { + if (auto EC = Stream.getArrayRef(Offset, Array, Length)) + return EC; + Offset += Length; + return std::error_code(); +} diff --git a/llvm/lib/DebugInfo/PDB/Raw/TpiStream.cpp b/llvm/lib/DebugInfo/PDB/Raw/TpiStream.cpp new file mode 100644 index 0000000..7ee4c60 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Raw/TpiStream.cpp @@ -0,0 +1,143 @@ +//===- TpiStream.cpp - PDB Type Info (TPI) Stream 2 Access ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/TpiStream.h" + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Raw/RawConstants.h" +#include "llvm/DebugInfo/PDB/Raw/StreamReader.h" + +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support; +using namespace llvm::pdb; + +namespace { +const uint32_t MinTypeIndex = codeview::TypeIndex::FirstNonSimpleIndex; + +const uint32_t MinHashBuckets = 0x1000; +const uint32_t MaxHashBuckets = 0x40000; +} + +static uint32_t HashBufferV8(uint8_t *buffer, uint32_t NumBuckets) { + // Not yet implemented, this is probably some variation of CRC32 but we need + // to be sure of the precise implementation otherwise we won't be able to work + // with persisted hash values. + return 0; +} + +struct TpiStream::HeaderInfo { + struct EmbeddedBuf { + little32_t Off; + ulittle32_t Length; + }; + + ulittle32_t Version; + ulittle32_t HeaderSize; + ulittle32_t TypeIndexBegin; + ulittle32_t TypeIndexEnd; + ulittle32_t TypeRecordBytes; + + ulittle16_t HashStreamIndex; + ulittle16_t HashAuxStreamIndex; + ulittle32_t HashKeySize; + ulittle32_t NumHashBuckets; + + EmbeddedBuf HashValueBuffer; + EmbeddedBuf IndexOffsetBuffer; + EmbeddedBuf HashAdjBuffer; +}; + +TpiStream::TpiStream(PDBFile &File) + : Pdb(File), Stream(StreamTPI, File), HashFunction(nullptr) {} + +TpiStream::~TpiStream() {} + +std::error_code TpiStream::reload() { + StreamReader Reader(Stream); + + if (Reader.bytesRemaining() < sizeof(HeaderInfo)) + return std::make_error_code(std::errc::illegal_byte_sequence); + + Header.reset(new HeaderInfo()); + Reader.readObject(Header.get()); + + if (Header->Version != PdbTpiV80) + return std::make_error_code(std::errc::not_supported); + + if (Header->HeaderSize != sizeof(HeaderInfo)) + return std::make_error_code(std::errc::illegal_byte_sequence); + + if (Header->HashKeySize != sizeof(ulittle32_t)) + return std::make_error_code(std::errc::illegal_byte_sequence); + + if (Header->NumHashBuckets < MinHashBuckets || + Header->NumHashBuckets > MaxHashBuckets) + return std::make_error_code(std::errc::illegal_byte_sequence); + + HashFunction = HashBufferV8; + + // The actual type records themselves come from this stream + RecordsBuffer.initialize(Reader, Header->TypeRecordBytes); + TypeRecords.resize(TypeIndexEnd() - ::MinTypeIndex); + StreamReader RecordsReader(RecordsBuffer); + for (uint32_t I = TypeIndexBegin(); I < TypeIndexEnd(); ++I) { + HashedTypeRecord &Record = TypeRecords[I - ::MinTypeIndex]; + codeview::TypeRecordPrefix Prefix; + if (auto EC = RecordsReader.readObject(&Prefix)) + return EC; + + Record.Kind = + static_cast(static_cast(Prefix.Leaf)); + + // Since we read this entire buffer into a ByteStream, we are guaranteed + // that the entire buffer is contiguous (i.e. there's no longer a chance + // that it splits across a page boundary. So we can request a reference + // directly into the stream buffer to avoid unnecessary memory copies. + uint32_t RecordSize = Prefix.Len - sizeof(Prefix.Leaf); + if (auto EC = RecordsReader.getArrayRef(Record.Record, RecordSize)) + return EC; + } + + // Hash indices, hash values, etc come from the hash stream. + MappedBlockStream HS(Header->HashStreamIndex, Pdb); + StreamReader HSR(HS); + HSR.setOffset(Header->HashValueBuffer.Off); + HashValuesBuffer.initialize(HSR, Header->HashValueBuffer.Length); + + HSR.setOffset(Header->HashAdjBuffer.Off); + HashAdjBuffer.initialize(HSR, Header->HashAdjBuffer.Length); + + HSR.setOffset(Header->IndexOffsetBuffer.Off); + TypeIndexOffsetBuffer.initialize(HSR, Header->IndexOffsetBuffer.Length); + + return std::error_code(); +} + +PdbRaw_TpiVer TpiStream::getTpiVersion() const { + uint32_t Value = Header->Version; + return static_cast(Value); +} + +uint32_t TpiStream::TypeIndexBegin() const { return Header->TypeIndexBegin; } + +uint32_t TpiStream::TypeIndexEnd() const { return Header->TypeIndexEnd; } + +uint32_t TpiStream::NumTypeRecords() const { + return TypeIndexEnd() - TypeIndexBegin(); +} + +ArrayRef TpiStream::records() const { + const HashedTypeRecord *Begin = + &TypeRecords[TypeIndexBegin() - ::MinTypeIndex]; + return ArrayRef(Begin, NumTypeRecords()); +} diff --git a/llvm/test/DebugInfo/PDB/pdbdump-headers.test b/llvm/test/DebugInfo/PDB/pdbdump-headers.test index 6b4c61c..9104f4d 100644 --- a/llvm/test/DebugInfo/PDB/pdbdump-headers.test +++ b/llvm/test/DebugInfo/PDB/pdbdump-headers.test @@ -62,6 +62,15 @@ ; EMPTY-NEXT: Type Server Index: 0 ; EMPTY-NEXT: Has EC Info: 0 ; EMPTY-NEXT: 0 Contributing Source Files: +; EMPTY-NEXT: TPI Version: 20040203 +; EMPTY-NEXT: Record count: 75 +; EMPTY-NEXT: Kind: 0x4609 Bytes: [00 00 00 00] +; EMPTY-NEXT: Kind: 0x4104 Bytes: [74 00 00 00 00 00 00 00 00 10 00 00] +; EMPTY-NEXT: Kind: 0x4611 Bytes: [02 15 03 00 01 00 61 70 61 72 74 6D 65 6E 74 00 +; EMPTY-NEXT: 02 15 03 00 02 00 73 69 6E 67 6C 65 00 F3 F2 F1 +; EMPTY-NEXT: 02 15 03 00 03 00 66 72 65 65 00 F1 02 15 03 00 +; EMPTY-NEXT: 04 00 6E 65 75 74 72 61 6C 00 F2 F1 02 15 03 00 +; EMPTY-NEXT: 05 00 62 6F 74 68 00 F1] BIG: BlockSize: 4096 BIG-NEXT: Unknown0: 2 diff --git a/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp b/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp index a1fe8d5..b58ed8f 100644 --- a/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp +++ b/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp @@ -43,6 +43,7 @@ #include "llvm/DebugInfo/PDB/Raw/PDBFile.h" #include "llvm/DebugInfo/PDB/Raw/RawSession.h" #include "llvm/DebugInfo/PDB/Raw/StreamReader.h" +#include "llvm/DebugInfo/PDB/Raw/TpiStream.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/FileSystem.h" @@ -148,6 +149,29 @@ cl::opt NoEnumDefs("no-enum-definitions", cl::cat(FilterCategory)); } +static void dumpBytes(raw_ostream &S, ArrayRef Bytes, + uint32_t BytesPerRow, uint32_t Indent) { + S << "["; + uint32_t I = 0; + + uint32_t BytesRemaining = Bytes.size(); + while (BytesRemaining > 0) { + uint32_t BytesThisLine = std::min(BytesRemaining, BytesPerRow); + for (size_t L = 0; L < BytesThisLine; ++L, ++I) { + S << format_hex_no_prefix(Bytes[I], 2, true); + if (L + 1 < BytesThisLine) + S << ' '; + } + BytesRemaining -= BytesThisLine; + if (BytesRemaining > 0) { + S << '\n'; + S.indent(Indent); + } + } + S << ']'; + S.flush(); +} + static void dumpStructure(RawSession &RS) { PDBFile &File = RS.getPDBFile(); @@ -293,6 +317,16 @@ static void dumpStructure(RawSession &RS) { outs().indent(8) << File << '\n'; } } + + TpiStream &Tpi = File.getPDBTpiStream(); + outs() << "TPI Version: " << Tpi.getTpiVersion() << '\n'; + outs() << "Record count: " << Tpi.NumTypeRecords() << '\n'; + for (auto &Record : Tpi.records()) { + outs().indent(2) << "Kind: 0x" << Record.Kind; + outs().indent(2) << "Bytes: "; + dumpBytes(outs(), Record.Record, 16, 24); + outs() << '\n'; + } } static void reportError(StringRef Path, PDB_ErrorCode Error) { -- 2.7.4