From: Zachary Turner Date: Tue, 2 May 2017 23:36:17 +0000 (+0000) Subject: Make codeview::StringTable. X-Git-Tag: llvmorg-5.0.0-rc1~6115 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7dba20bd2b0767ff6be93d7d68a3045d060a7e22;p=platform%2Fupstream%2Fllvm.git Make codeview::StringTable. Previously we had knowledge of how to serialize and deserialize a string table inside of DebugInfo/PDB, but the string table that it serializes contains a piece that is actually considered CodeView and can appear outside of a PDB. We already have logic in llvm-readobj and MCCodeView to read and write this format, so it doesn't make sense to duplicate the logic in DebugInfoPDB as well. This patch makes codeview::StringTable (for writing) and codeview::StringTableRef (for reading), updates DebugInfoPDB to use these classes for its own writing, and updates llvm-readobj to additionally use StringTableRef for reading. It's a bit more difficult to get MCCodeView to use this for writing, but it's a logical next step. llvm-svn: 301986 --- diff --git a/llvm/include/llvm/DebugInfo/CodeView/StringTable.h b/llvm/include/llvm/DebugInfo/CodeView/StringTable.h new file mode 100644 index 0000000..35f1a57 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/CodeView/StringTable.h @@ -0,0 +1,70 @@ +//===- StringTable.h - CodeView String Table Reader/Writer ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_STRINGTABLE_H +#define LLVM_DEBUGINFO_CODEVIEW_STRINGTABLE_H + +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" + +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/Error.h" + +#include + +namespace llvm { + +class BinaryStreamReader; +class BinaryStreamRef; +class BinaryStreamWriter; + +namespace codeview { + +/// Represents a read-only view of a CodeView string table. This is a very +/// simple flat buffer consisting of null-terminated strings, where strings +/// are retrieved by their offset in the buffer. StringTableRef does not own +/// the underlying storage for the buffer. +class StringTableRef { +public: + StringTableRef(); + + Error initialize(BinaryStreamReader &Stream); + + StringRef getString(uint32_t Offset) const; + +private: + BinaryStreamRef Stream; +}; + +/// Represents a read-write view of a CodeView string table. StringTable owns +/// the underlying storage for the table, and is capable of serializing the +/// string table into a format understood by StringTableRef. +class StringTable { +public: + // If string S does not exist in the string table, insert it. + // Returns the ID for S. + uint32_t insert(StringRef S); + + uint32_t calculateSerializedSize() const; + Error commit(BinaryStreamWriter &Writer) const; + + uint32_t size() const; + + StringMap::const_iterator begin() const { return Strings.begin(); } + + StringMap::const_iterator end() const { return Strings.end(); } + +private: + StringMap Strings; + uint32_t StringSize = 1; +}; +} +} + +#endif diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h index 5510360..3bed671 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h @@ -130,7 +130,7 @@ private: std::unique_ptr Publics; std::unique_ptr Symbols; std::unique_ptr DirectoryStream; - std::unique_ptr PDBStringTableStream; + std::unique_ptr StringTableStream; std::unique_ptr Strings; }; } diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h index 941d311..cd7d3b0 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h @@ -50,8 +50,10 @@ public: Error commit(StringRef Filename); -private: + Expected getNamedStreamIndex(StringRef Name) const; Error addNamedStream(StringRef Name, uint32_t Size); + +private: Expected finalizeMsfLayout(); BumpPtrAllocator &Allocator; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h index a0c9bf6..7d5db06 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h @@ -13,6 +13,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" @@ -23,19 +24,22 @@ namespace llvm { class BinaryStreamReader; +namespace msf { +class MappedBlockStream; +} + namespace pdb { +struct PDBStringTableHeader; + class PDBStringTable { public: - PDBStringTable(); - - Error load(BinaryStreamReader &Stream); + Error reload(BinaryStreamReader &Reader); uint32_t getByteSize() const; - - uint32_t getNameCount() const { return NameCount; } - uint32_t getHashVersion() const { return HashVersion; } - uint32_t getSignature() const { return Signature; } + uint32_t getNameCount() const; + uint32_t getHashVersion() const; + uint32_t getSignature() const; StringRef getStringForID(uint32_t ID) const; uint32_t getIDForString(StringRef Str) const; @@ -43,11 +47,15 @@ public: FixedStreamArray name_ids() const; private: - BinaryStreamRef NamesBuffer; + Error readHeader(BinaryStreamReader &Reader); + Error readStrings(BinaryStreamReader &Reader); + Error readHashTable(BinaryStreamReader &Reader); + Error readEpilogue(BinaryStreamReader &Reader); + + const PDBStringTableHeader *Header = nullptr; + codeview::StringTableRef Strings; FixedStreamArray IDs; uint32_t ByteSize = 0; - uint32_t Signature = 0; - uint32_t HashVersion = 0; uint32_t NameCount = 0; }; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h index 71ed5b4..198c35c 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h @@ -16,27 +16,39 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" #include "llvm/Support/Error.h" #include namespace llvm { class BinaryStreamWriter; +class WritableBinaryStreamRef; + +namespace msf { +struct MSFLayout; +} namespace pdb { +class PDBFileBuilder; + class PDBStringTableBuilder { public: // If string S does not exist in the string table, insert it. // Returns the ID for S. uint32_t insert(StringRef S); - uint32_t getStringIndex(StringRef S); - uint32_t finalize(); + uint32_t calculateSerializedSize() const; Error commit(BinaryStreamWriter &Writer) const; private: - DenseMap Strings; - uint32_t StringSize = 1; + uint32_t calculateHashTableSize() const; + Error writeHeader(BinaryStreamWriter &Writer) const; + Error writeStrings(BinaryStreamWriter &Writer) const; + Error writeHashTable(BinaryStreamWriter &Writer) const; + Error writeEpilogue(BinaryStreamWriter &Writer) const; + + codeview::StringTable Strings; }; } // end namespace pdb diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h b/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h index ba0c75b..93622d0 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h @@ -308,9 +308,9 @@ struct InfoStreamHeader { /// The header preceeding the /names stream. struct PDBStringTableHeader { - support::ulittle32_t Signature; - support::ulittle32_t HashVersion; - support::ulittle32_t ByteSize; + support::ulittle32_t Signature; // PDBStringTableSignature + support::ulittle32_t HashVersion; // 1 or 2 + support::ulittle32_t ByteSize; // Number of bytes of names buffer. }; const uint32_t PDBStringTableSignature = 0xEFFEEFFE; diff --git a/llvm/include/llvm/Support/BinaryStreamReader.h b/llvm/include/llvm/Support/BinaryStreamReader.h index f30d82d..207c4ab 100644 --- a/llvm/include/llvm/Support/BinaryStreamReader.h +++ b/llvm/include/llvm/Support/BinaryStreamReader.h @@ -31,6 +31,7 @@ namespace llvm { /// are overridable. class BinaryStreamReader { public: + BinaryStreamReader() = default; explicit BinaryStreamReader(BinaryStreamRef Stream); virtual ~BinaryStreamReader() {} @@ -227,6 +228,9 @@ public: /// \returns the next byte in the stream. uint8_t peek() const; + std::pair + split(uint32_t Offset) const; + private: BinaryStreamRef Stream; uint32_t Offset; diff --git a/llvm/include/llvm/Support/BinaryStreamWriter.h b/llvm/include/llvm/Support/BinaryStreamWriter.h index 6734a79..1b61c32 100644 --- a/llvm/include/llvm/Support/BinaryStreamWriter.h +++ b/llvm/include/llvm/Support/BinaryStreamWriter.h @@ -20,6 +20,7 @@ #include "llvm/Support/Error.h" #include #include +#include namespace llvm { @@ -30,8 +31,6 @@ namespace llvm { /// although no methods are overridable. class BinaryStreamWriter { public: - // FIXME: We should be able to slice and drop_front etc on Writers / Readers. - BinaryStreamWriter() = default; explicit BinaryStreamWriter(WritableBinaryStreamRef Stream); virtual ~BinaryStreamWriter() {} @@ -152,6 +151,9 @@ public: return writeStreamRef(Array.getUnderlyingStream()); } + /// Splits the Writer into two Writers at a given offset. + std::pair split(uint32_t Off) const; + void setOffset(uint32_t Off) { Offset = Off; } uint32_t getOffset() const { return Offset; } uint32_t getLength() const { return Stream.getLength(); } diff --git a/llvm/lib/DebugInfo/CodeView/CMakeLists.txt b/llvm/lib/DebugInfo/CodeView/CMakeLists.txt index 421f22c..410d5a3 100644 --- a/llvm/lib/DebugInfo/CodeView/CMakeLists.txt +++ b/llvm/lib/DebugInfo/CodeView/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_library(LLVMDebugInfoCodeView ModuleDebugLineFragment.cpp ModuleDebugUnknownFragment.cpp RecordSerialization.cpp + StringTable.cpp SymbolRecordMapping.cpp SymbolDumper.cpp SymbolSerializer.cpp diff --git a/llvm/lib/DebugInfo/CodeView/StringTable.cpp b/llvm/lib/DebugInfo/CodeView/StringTable.cpp new file mode 100644 index 0000000..49e2fe3 --- /dev/null +++ b/llvm/lib/DebugInfo/CodeView/StringTable.cpp @@ -0,0 +1,65 @@ +//===- StringTable.cpp - CodeView String Table Reader/Writer ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/StringTable.h" + +#include "llvm/Support/BinaryStream.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" + +using namespace llvm; +using namespace llvm::codeview; + +StringTableRef::StringTableRef() {} + +Error StringTableRef::initialize(BinaryStreamReader &Reader) { + return Reader.readStreamRef(Stream, Reader.bytesRemaining()); +} + +StringRef StringTableRef::getString(uint32_t Offset) const { + BinaryStreamReader Reader(Stream); + Reader.setOffset(Offset); + StringRef Result; + Error EC = Reader.readCString(Result); + assert(!EC); + consumeError(std::move(EC)); + return Result; +} + +uint32_t StringTable::insert(StringRef S) { + auto P = Strings.insert({S, StringSize}); + + // If a given string didn't exist in the string table, we want to increment + // the string table size. + if (P.second) + StringSize += S.size() + 1; // +1 for '\0' + return P.first->second; +} + +uint32_t StringTable::calculateSerializedSize() const { return StringSize; } + +Error StringTable::commit(BinaryStreamWriter &Writer) const { + assert(Writer.bytesRemaining() == StringSize); + uint32_t MaxOffset = 1; + + for (auto &Pair : Strings) { + StringRef S = Pair.getKey(); + uint32_t Offset = Pair.getValue(); + Writer.setOffset(Offset); + if (auto EC = Writer.writeCString(S)) + return EC; + MaxOffset = std::max(MaxOffset, Offset + S.size() + 1); + } + + Writer.setOffset(MaxOffset); + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +uint32_t StringTable::size() const { return Strings.size(); } diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp index 4802cc6..db70380 100644 --- a/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/DbiStream.cpp @@ -146,7 +146,7 @@ Error DbiStream::reload() { if (ECSubstream.getLength() > 0) { BinaryStreamReader ECReader(ECSubstream); - if (auto EC = ECNames.load(ECReader)) + if (auto EC = ECNames.reload(ECReader)) return EC; } diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp index f158c5c..859295d 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -338,7 +338,7 @@ Expected PDBFile::getPDBSymbolStream() { } Expected PDBFile::getStringTable() { - if (!Strings || !PDBStringTableStream) { + if (!Strings) { auto IS = getPDBInfoStream(); if (!IS) return IS.takeError(); @@ -350,12 +350,13 @@ Expected PDBFile::getStringTable() { if (!NS) return NS.takeError(); - BinaryStreamReader Reader(**NS); auto N = llvm::make_unique(); - if (auto EC = N->load(Reader)) + BinaryStreamReader Reader(**NS); + if (auto EC = N->reload(Reader)) return std::move(EC); + assert(Reader.bytesRemaining() == 0); + StringTableStream = std::move(*NS); Strings = std::move(N); - PDBStringTableStream = std::move(*NS); } return *Strings; } diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index 972c995..4dd965c 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -80,9 +80,9 @@ Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) { } Expected PDBFileBuilder::finalizeMsfLayout() { - uint32_t PDBStringTableSize = Strings.finalize(); + uint32_t StringsLen = Strings.calculateSerializedSize(); - if (auto EC = addNamedStream("/names", PDBStringTableSize)) + if (auto EC = addNamedStream("/names", StringsLen)) return std::move(EC); if (auto EC = addNamedStream("/LinkInfo", 0)) return std::move(EC); @@ -109,6 +109,13 @@ Expected PDBFileBuilder::finalizeMsfLayout() { return Msf->build(); } +Expected PDBFileBuilder::getNamedStreamIndex(StringRef Name) const { + uint32_t SN = 0; + if (!NamedStreams.get(Name, SN)) + return llvm::make_error(raw_error_code::no_stream); + return SN; +} + Error PDBFileBuilder::commit(StringRef Filename) { auto ExpectedLayout = finalizeMsfLayout(); if (!ExpectedLayout) @@ -146,12 +153,12 @@ Error PDBFileBuilder::commit(StringRef Filename) { return EC; } - uint32_t PDBStringTableStreamNo = 0; - if (!NamedStreams.get("/names", PDBStringTableStreamNo)) - return llvm::make_error(raw_error_code::no_stream); + auto ExpectedSN = getNamedStreamIndex("/names"); + if (!ExpectedSN) + return ExpectedSN.takeError(); - auto NS = WritableMappedBlockStream::createIndexedStream( - Layout, Buffer, PDBStringTableStreamNo); + auto NS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer, + *ExpectedSN); BinaryStreamWriter NSWriter(*NS); if (auto EC = Strings.commit(NSWriter)) return EC; diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp index fd3e69d..ee32f61 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp @@ -1,5 +1,4 @@ -//===- PDBStringTable.cpp - PDB String Table -----------------------*- C++ -//-*-===// +//===- PDBStringTable.cpp - PDB String Table ---------------------*- C++-*-===// // // The LLVM Compiler Infrastructure // @@ -11,6 +10,7 @@ #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/Hash.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" @@ -21,71 +21,91 @@ using namespace llvm; using namespace llvm::support; using namespace llvm::pdb; -PDBStringTable::PDBStringTable() {} - -Error PDBStringTable::load(BinaryStreamReader &Stream) { - ByteSize = Stream.getLength(); +uint32_t PDBStringTable::getByteSize() const { return ByteSize; } +uint32_t PDBStringTable::getNameCount() const { return NameCount; } +uint32_t PDBStringTable::getHashVersion() const { return Header->HashVersion; } +uint32_t PDBStringTable::getSignature() const { return Header->Signature; } - const PDBStringTableHeader *H; - if (auto EC = Stream.readObject(H)) +Error PDBStringTable::readHeader(BinaryStreamReader &Reader) { + if (auto EC = Reader.readObject(Header)) return EC; - if (H->Signature != PDBStringTableSignature) + if (Header->Signature != PDBStringTableSignature) return make_error(raw_error_code::corrupt_file, "Invalid hash table signature"); - if (H->HashVersion != 1 && H->HashVersion != 2) + if (Header->HashVersion != 1 && Header->HashVersion != 2) return make_error(raw_error_code::corrupt_file, "Unsupported hash version"); - Signature = H->Signature; - HashVersion = H->HashVersion; - if (auto EC = Stream.readStreamRef(NamesBuffer, H->ByteSize)) + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTable::readStrings(BinaryStreamReader &Reader) { + if (auto EC = Strings.initialize(Reader)) { return joinErrors(std::move(EC), make_error(raw_error_code::corrupt_file, "Invalid hash table byte length")); + } + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} +Error PDBStringTable::readHashTable(BinaryStreamReader &Reader) { const support::ulittle32_t *HashCount; - if (auto EC = Stream.readObject(HashCount)) + if (auto EC = Reader.readObject(HashCount)) return EC; - if (auto EC = Stream.readArray(IDs, *HashCount)) + if (auto EC = Reader.readArray(IDs, *HashCount)) { return joinErrors(std::move(EC), make_error(raw_error_code::corrupt_file, "Could not read bucket array")); + } - if (Stream.bytesRemaining() < sizeof(support::ulittle32_t)) - return make_error(raw_error_code::corrupt_file, - "Missing name count"); + return Error::success(); +} - if (auto EC = Stream.readInteger(NameCount)) +Error PDBStringTable::readEpilogue(BinaryStreamReader &Reader) { + if (auto EC = Reader.readInteger(NameCount)) return EC; - if (Stream.bytesRemaining() > 0) - return make_error(raw_error_code::stream_too_long, - "Unexpected bytes found in string table"); - + assert(Reader.bytesRemaining() == 0); return Error::success(); } -uint32_t PDBStringTable::getByteSize() const { return ByteSize; } +Error PDBStringTable::reload(BinaryStreamReader &Reader) { + + BinaryStreamReader SectionReader; + + std::tie(SectionReader, Reader) = Reader.split(sizeof(PDBStringTableHeader)); + if (auto EC = readHeader(SectionReader)) + return EC; + + std::tie(SectionReader, Reader) = Reader.split(Header->ByteSize); + if (auto EC = readStrings(SectionReader)) + return EC; + + // We don't know how long the hash table is until we parse it, so let the + // function responsible for doing that figure it out. + if (auto EC = readHashTable(Reader)) + return EC; + + std::tie(SectionReader, Reader) = Reader.split(sizeof(uint32_t)); + if (auto EC = readEpilogue(SectionReader)) + return EC; + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} StringRef PDBStringTable::getStringForID(uint32_t ID) const { - if (ID == IDs[0]) - return StringRef(); - - // NamesBuffer is a buffer of null terminated strings back to back. ID is - // the starting offset of the string we're looking for. So just seek into - // the desired offset and a read a null terminated stream from that offset. - StringRef Result; - BinaryStreamReader NameReader(NamesBuffer); - NameReader.setOffset(ID); - if (auto EC = NameReader.readCString(Result)) - consumeError(std::move(EC)); - return Result; + return Strings.getString(ID); } uint32_t PDBStringTable::getIDForString(StringRef Str) const { - uint32_t Hash = (HashVersion == 1) ? hashStringV1(Str) : hashStringV2(Str); + uint32_t Hash = + (Header->HashVersion == 1) ? hashStringV1(Str) : hashStringV2(Str); size_t Count = IDs.size(); uint32_t Start = Hash % Count; for (size_t I = 0; I < Count; ++I) { diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp index 4add6ea..a472181 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp @@ -8,31 +8,23 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" + #include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Endian.h" using namespace llvm; +using namespace llvm::msf; using namespace llvm::support; using namespace llvm::support::endian; using namespace llvm::pdb; uint32_t PDBStringTableBuilder::insert(StringRef S) { - auto P = Strings.insert({S, StringSize}); - - // If a given string didn't exist in the string table, we want to increment - // the string table size. - if (P.second) - StringSize += S.size() + 1; // +1 for '\0' - return P.first->second; -} - -uint32_t PDBStringTableBuilder::getStringIndex(StringRef S) { - auto Iter = Strings.find(S); - assert(Iter != Strings.end()); - return Iter->second; + return Strings.insert(S); } static uint32_t computeBucketCount(uint32_t NumStrings) { @@ -44,49 +36,52 @@ static uint32_t computeBucketCount(uint32_t NumStrings) { return (NumStrings + 1) * 1.25; } -uint32_t PDBStringTableBuilder::finalize() { - uint32_t Size = 0; - Size += sizeof(PDBStringTableHeader); - Size += StringSize; - Size += sizeof(uint32_t); // Hash table begins with 4-byte size field. +uint32_t PDBStringTableBuilder::calculateHashTableSize() const { + uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field. + Size += sizeof(uint32_t) * computeBucketCount(Strings.size()); - uint32_t BucketCount = computeBucketCount(Strings.size()); - Size += BucketCount * sizeof(uint32_t); + return Size; +} - Size += - sizeof(uint32_t); // The /names stream ends with the number of strings. +uint32_t PDBStringTableBuilder::calculateSerializedSize() const { + uint32_t Size = 0; + Size += sizeof(PDBStringTableHeader); + Size += Strings.calculateSerializedSize(); + Size += calculateHashTableSize(); + Size += sizeof(uint32_t); // The /names stream ends with the string count. return Size; } -Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const { +Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const { // Write a header PDBStringTableHeader H; H.Signature = PDBStringTableSignature; H.HashVersion = 1; - H.ByteSize = StringSize; + H.ByteSize = Strings.calculateSerializedSize(); if (auto EC = Writer.writeObject(H)) return EC; + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} - // Write a string table. - uint32_t StringStart = Writer.getOffset(); - for (auto Pair : Strings) { - StringRef S = Pair.first; - uint32_t Offset = Pair.second; - Writer.setOffset(StringStart + Offset); - if (auto EC = Writer.writeCString(S)) - return EC; - } - Writer.setOffset(StringStart + StringSize); +Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const { + if (auto EC = Strings.commit(Writer)) + return EC; + + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} +Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const { // Write a hash table. uint32_t BucketCount = computeBucketCount(Strings.size()); if (auto EC = Writer.writeInteger(BucketCount)) return EC; std::vector Buckets(BucketCount); - for (auto Pair : Strings) { - StringRef S = Pair.first; - uint32_t Offset = Pair.second; + for (auto &Pair : Strings) { + StringRef S = Pair.getKey(); + uint32_t Offset = Pair.getValue(); uint32_t Hash = hashStringV1(S); for (uint32_t I = 0; I != BucketCount; ++I) { @@ -102,7 +97,37 @@ Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const { if (auto EC = Writer.writeArray(ArrayRef(Buckets))) return EC; - if (auto EC = Writer.writeInteger(static_cast(Strings.size()))) + + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const { + if (auto EC = Writer.writeInteger(Strings.size())) + return EC; + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const { + BinaryStreamWriter SectionWriter; + + std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader)); + if (auto EC = writeHeader(SectionWriter)) + return EC; + + std::tie(SectionWriter, Writer) = + Writer.split(Strings.calculateSerializedSize()); + if (auto EC = writeStrings(SectionWriter)) return EC; + + std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize()); + if (auto EC = writeHashTable(SectionWriter)) + return EC; + + std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t)); + if (auto EC = writeEpilogue(SectionWriter)) + return EC; + return Error::success(); } diff --git a/llvm/lib/DebugInfo/PDB/Native/StringTableStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/StringTableStreamBuilder.cpp new file mode 100644 index 0000000..b194409 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/StringTableStreamBuilder.cpp @@ -0,0 +1,123 @@ +//===- StringTableStreamBuilder.cpp - PDB String Table ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/StringTableStreamBuilder.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support; +using namespace llvm::support::endian; +using namespace llvm::pdb; + +uint32_t StringTableStreamBuilder::insert(StringRef S) { + return Strings.insert(S); +} + +static uint32_t computeBucketCount(uint32_t NumStrings) { + // The /names stream is basically an on-disk open-addressing hash table. + // Hash collisions are resolved by linear probing. We cannot make + // utilization 100% because it will make the linear probing extremely + // slow. But lower utilization wastes disk space. As a reasonable + // load factor, we choose 80%. We need +1 because slot 0 is reserved. + return (NumStrings + 1) * 1.25; +} + +uint32_t StringTableStreamBuilder::hashTableSize() const { + uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field. + + Size += computeBucketCount(Strings.size()) * sizeof(uint32_t); + return Size; +} + +uint32_t StringTableStreamBuilder::calculateSerializedSize() const { + uint32_t Size = 0; + Size += sizeof(StringTableHeader); + Size += Strings.calculateSerializedSize(); + Size += hashTableSize(); + Size += sizeof(uint32_t); // The table ends with the number of strings. + return Size; +} + +Error StringTableStreamBuilder::writeHeader(BinaryStreamWriter &Writer) const { + // Write a header + StringTableHeader H; + H.Signature = StringTableSignature; + H.HashVersion = 1; + H.ByteSize = Strings.calculateSerializedSize(); + if (auto EC = Writer.writeObject(H)) + return EC; + + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error StringTableStreamBuilder::writeStrings(BinaryStreamWriter &Writer) const { + if (auto EC = Strings.commit(Writer)) + return EC; + + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error StringTableStreamBuilder::writeHashTable( + BinaryStreamWriter &Writer) const { + // Write a hash table. + uint32_t BucketCount = computeBucketCount(Strings.size()); + if (auto EC = Writer.writeInteger(BucketCount)) + return EC; + + std::vector Buckets(BucketCount); + + for (auto &Pair : Strings) { + StringRef S = Pair.getKey(); + uint32_t Offset = Pair.getValue(); + uint32_t Hash = hashStringV1(S); + + for (uint32_t I = 0; I != BucketCount; ++I) { + uint32_t Slot = (Hash + I) % BucketCount; + if (Slot == 0) + continue; // Skip reserved slot + if (Buckets[Slot] != 0) + continue; + Buckets[Slot] = Offset; + break; + } + } + + if (auto EC = Writer.writeArray(makeArrayRef(Buckets))) + return EC; + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error StringTableStreamBuilder::commit(BinaryStreamWriter &Writer) const { + BinaryStreamWriter Section; + + std::tie(Section, Writer) = Writer.split(sizeof(StringTableHeader)); + if (auto EC = writeHeader(Section)) + return EC; + + std::tie(Section, Writer) = Writer.split(Strings.calculateSerializedSize()); + if (auto EC = writeStrings(Section)) + return EC; + + std::tie(Section, Writer) = Writer.split(hashTableSize()); + if (auto EC = writeHashTable(Section)) + return EC; + + if (auto EC = Writer.writeInteger(Strings.size())) + return EC; + + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} diff --git a/llvm/lib/Support/BinaryStreamReader.cpp b/llvm/lib/Support/BinaryStreamReader.cpp index c7a2e0d..702d987 100644 --- a/llvm/lib/Support/BinaryStreamReader.cpp +++ b/llvm/lib/Support/BinaryStreamReader.cpp @@ -93,3 +93,16 @@ uint8_t BinaryStreamReader::peek() const { llvm::consumeError(std::move(EC)); return Buffer[0]; } + +std::pair +BinaryStreamReader::split(uint32_t Off) const { + assert(getLength() >= Off); + + BinaryStreamRef First = Stream.drop_front(Offset); + + BinaryStreamRef Second = First.drop_front(Off); + First = First.keep_front(Off); + BinaryStreamReader W1{First}; + BinaryStreamReader W2{Second}; + return std::make_pair(W1, W2); +} \ No newline at end of file diff --git a/llvm/lib/Support/BinaryStreamWriter.cpp b/llvm/lib/Support/BinaryStreamWriter.cpp index d60b756..d78dbc6 100644 --- a/llvm/lib/Support/BinaryStreamWriter.cpp +++ b/llvm/lib/Support/BinaryStreamWriter.cpp @@ -59,6 +59,19 @@ Error BinaryStreamWriter::writeStreamRef(BinaryStreamRef Ref, uint32_t Length) { return Error::success(); } +std::pair +BinaryStreamWriter::split(uint32_t Off) const { + assert(getLength() >= Off); + + WritableBinaryStreamRef First = Stream.drop_front(Offset); + + WritableBinaryStreamRef Second = First.drop_front(Off); + First = First.keep_front(Off); + BinaryStreamWriter W1{First}; + BinaryStreamWriter W2{Second}; + return std::make_pair(W1, W2); +} + Error BinaryStreamWriter::padToAlignment(uint32_t Align) { uint32_t NewOffset = alignTo(Offset, Align); if (NewOffset > getLength()) diff --git a/llvm/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp b/llvm/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp index 514cdc8..44cc85d 100644 --- a/llvm/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp +++ b/llvm/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp @@ -33,7 +33,7 @@ TEST_F(StringTableBuilderTest, Simple) { EXPECT_EQ(1U, Builder.insert("foo")); EXPECT_EQ(9U, Builder.insert("baz")); - std::vector Buffer(Builder.finalize()); + std::vector Buffer(Builder.calculateSerializedSize()); MutableBinaryByteStream OutStream(Buffer, little); BinaryStreamWriter Writer(OutStream); EXPECT_NO_ERROR(Builder.commit(Writer)); @@ -42,7 +42,7 @@ TEST_F(StringTableBuilderTest, Simple) { BinaryByteStream InStream(Buffer, little); BinaryStreamReader Reader(InStream); PDBStringTable Table; - EXPECT_NO_ERROR(Table.load(Reader)); + EXPECT_NO_ERROR(Table.reload(Reader)); EXPECT_EQ(3U, Table.getNameCount()); EXPECT_EQ(1U, Table.getHashVersion());