From fc8110041f55483631b9e6f11ea105d41708a512 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Fri, 25 Mar 2016 15:22:27 +0000 Subject: [PATCH] Revert "Bitcode: Collect all MDString records into a single blob" This reverts commit r264409 since it failed to bootstrap: http://lab.llvm.org:8080/green/job/clang-stage2-configure-Rlto_build/8302/ llvm-svn: 264410 --- llvm/include/llvm/Bitcode/LLVMBitCodes.h | 4 +- llvm/include/llvm/Support/StreamingMemoryObject.h | 10 ++- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 34 +-------- llvm/lib/Bitcode/Reader/BitstreamReader.cpp | 6 +- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 85 ++++++--------------- llvm/lib/Bitcode/Writer/ValueEnumerator.cpp | 23 +----- llvm/lib/Bitcode/Writer/ValueEnumerator.h | 15 +--- llvm/lib/Support/StreamingMemoryObject.cpp | 6 -- .../Bitcode/Inputs/invalid-fixme-streaming-blob.bc | Bin 0 -> 371 bytes llvm/test/Bitcode/invalid.test | 5 ++ llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp | 4 +- 11 files changed, 50 insertions(+), 142 deletions(-) create mode 100644 llvm/test/Bitcode/Inputs/invalid-fixme-streaming-blob.bc diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index b9c6fee..eec62c0 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -209,7 +209,7 @@ enum { BITCODE_CURRENT_EPOCH = 0 }; }; enum MetadataCodes { - METADATA_STRING_OLD = 1, // MDSTRING: [values] + METADATA_STRING = 1, // MDSTRING: [values] METADATA_VALUE = 2, // VALUE: [type num, value num] METADATA_NODE = 3, // NODE: [n x md num] METADATA_NAME = 4, // STRING: [values] @@ -243,8 +243,6 @@ enum { BITCODE_CURRENT_EPOCH = 0 }; METADATA_MODULE = 32, // [distinct, scope, name, ...] METADATA_MACRO = 33, // [distinct, macinfo, line, name, value] METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...] - METADATA_BULK_STRING_SIZES = 35, // [m x (size num)] - METADATA_BULK_STRING_DATA = 36, // [blob] }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each diff --git a/llvm/include/llvm/Support/StreamingMemoryObject.h b/llvm/include/llvm/Support/StreamingMemoryObject.h index 1ab8537..a5980c2 100644 --- a/llvm/include/llvm/Support/StreamingMemoryObject.h +++ b/llvm/include/llvm/Support/StreamingMemoryObject.h @@ -28,7 +28,15 @@ public: uint64_t getExtent() const override; uint64_t readBytes(uint8_t *Buf, uint64_t Size, uint64_t Address) const override; - const uint8_t *getPointer(uint64_t Address, uint64_t Size) const override; + const uint8_t *getPointer(uint64_t address, uint64_t size) const override { + // FIXME: This could be fixed by ensuring the bytes are fetched and + // making a copy, requiring that the bitcode size be known, or + // otherwise ensuring that the memory doesn't go away/get reallocated, + // but it's not currently necessary. Users that need the pointer (any + // that need Blobs) don't stream. + report_fatal_error("getPointer in streaming memory objects not allowed"); + return nullptr; + } bool isValidAddress(uint64_t address) const override; /// Drop s bytes from the front of the stream, pushing the positions of the diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index b849db4..bb479db 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2363,7 +2363,7 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { NextMetadataNo++); break; } - case bitc::METADATA_STRING_OLD: { + case bitc::METADATA_STRING: { std::string String(Record.begin(), Record.end()); // Test for upgrading !llvm.loop. @@ -2373,38 +2373,6 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { MetadataList.assignValue(MD, NextMetadataNo++); break; } - case bitc::METADATA_BULK_STRING_SIZES: { - // This is a pair of records for an MDString block: SIZES, which is a - // list of string lengths; and DATA, which is a blob with all the strings - // concatenated together. - // - // Note: since this record type was introduced after the upgrade for - // !llvm.loop, we don't need to change HasSeenOldLoopTags. - if (Record.empty()) - return error("Invalid record: missing bulk metadata string sizes"); - - StringRef Blob; - SmallVector BlobRecord; - Code = Stream.ReadCode(); - unsigned BlobCode = Stream.readRecord(Code, BlobRecord, &Blob); - if (BlobCode != bitc::METADATA_BULK_STRING_DATA) - return error("Invalid record: missing bulk metadata string data"); - if (!BlobRecord.empty()) - return error("Invalid record: unexpected bulk metadata arguments"); - - for (uint64_t Size : Record) { - if (Blob.size() < Size) - return error("Invalid record: not enough bulk metadata string bytes"); - - // Extract the current string. - MetadataList.assignValue(MDString::get(Context, Blob.slice(0, Size)), - NextMetadataNo++); - Blob = Blob.drop_front(Size); - } - if (!Blob.empty()) - return error("Invalid record: too many bulk metadata string bytes"); - break; - } case bitc::METADATA_KIND: { // Support older bitcode files that had METADATA_KIND records in a // block with METADATA_BLOCK_ID. diff --git a/llvm/lib/Bitcode/Reader/BitstreamReader.cpp b/llvm/lib/Bitcode/Reader/BitstreamReader.cpp index 43dae0c..db9e0cd 100644 --- a/llvm/lib/Bitcode/Reader/BitstreamReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitstreamReader.cpp @@ -261,10 +261,6 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID, } // Otherwise, inform the streamer that we need these bytes in memory. - // Skip over tail padding first. We can't do it later if this is a - // streaming memory object, since that could reallocate the storage that - // the blob pointer references. - JumpToBit(NewEnd); const char *Ptr = (const char*) BitStream->getBitcodeBytes().getPointer(CurBitPos/8, NumElts); @@ -276,6 +272,8 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID, for (; NumElts; --NumElts) Vals.push_back((unsigned char)*Ptr++); } + // Skip over tail padding. + JumpToBit(NewEnd); } return Code; diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 0108667..5d05164 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1347,78 +1347,31 @@ static void writeNamedMetadata(const Module &M, const ValueEnumerator &VE, } } -static unsigned createMDStringDataAbbrev(BitstreamWriter &Stream) { - BitCodeAbbrev *Abbv = new BitCodeAbbrev(); - Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_BULK_STRING_DATA)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); - return Stream.EmitAbbrev(Abbv); -} - -static void emitMDStringBlob(unsigned DataAbbrev, - ArrayRef Strings, - BitstreamWriter &Stream, - SmallVectorImpl &Record, - SmallString<4096> &Blob) { - for (const Metadata *MD : Strings) { - StringRef S = cast(MD)->getString(); - Record.push_back(S.size()); - Blob.append(S.begin(), S.end()); - } - - Stream.EmitRecord(bitc::METADATA_BULK_STRING_SIZES, Record); - Record.clear(); - - Record.push_back(bitc::METADATA_BULK_STRING_DATA); - Stream.EmitRecordWithBlob(DataAbbrev, Record, Blob); - Record.clear(); -} - -/// Write out a section of records for MDString. -/// -/// All the MDString elements in a metadata block are emitted in bulk. They're -/// grouped into blocks, and each block is emitted with pair of records: -/// -/// - SIZES: a list of the sizes of the strings in the block. -/// - DATA: the blob itself. -static void writeMetadataStrings(ArrayRef Strings, - BitstreamWriter &Stream, - SmallVectorImpl &Record) { - if (Strings.empty()) - return; - - // Emit strings in large blocks to reduce record overhead. Somewhat - // arbitrarily, limit this to 512 strings per blob: - // - big enough to eliminate overhead; - // - small enough that the reader's SIZES record will stay within a page. - const size_t NumStringsPerBlob = 512; - Record.reserve(std::min(NumStringsPerBlob, Strings.size())); - - unsigned DataAbbrev = createMDStringDataAbbrev(Stream); - SmallString<4096> Blob; - while (Strings.size() > NumStringsPerBlob) { - emitMDStringBlob(DataAbbrev, Strings.slice(0, NumStringsPerBlob), Stream, - Record, Blob); - Strings = Strings.slice(NumStringsPerBlob); - } - if (!Strings.empty()) - emitMDStringBlob(DataAbbrev, Strings, Stream, Record, Blob); -} - static void WriteModuleMetadata(const Module &M, const ValueEnumerator &VE, BitstreamWriter &Stream) { - if (VE.getMDs().empty() && M.named_metadata_empty()) + const auto &MDs = VE.getMDs(); + if (MDs.empty() && M.named_metadata_empty()) return; Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); + unsigned MDSAbbrev = 0; + if (VE.hasMDString()) { + // Abbrev for METADATA_STRING. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRING)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); + MDSAbbrev = Stream.EmitAbbrev(Abbv); + } + // Initialize MDNode abbreviations. #define HANDLE_MDNODE_LEAF(CLASS) unsigned CLASS##Abbrev = 0; #include "llvm/IR/Metadata.def" SmallVector Record; - writeMetadataStrings(VE.getMDStrings(), Stream, Record); - for (const Metadata *MD : VE.getNonMDStrings()) { + for (const Metadata *MD : MDs) { if (const MDNode *N = dyn_cast(MD)) { assert(N->isResolved() && "Expected forward references to be resolved"); @@ -1432,7 +1385,17 @@ static void WriteModuleMetadata(const Module &M, #include "llvm/IR/Metadata.def" } } - WriteValueAsMetadata(cast(MD), VE, Stream, Record); + if (const auto *MDC = dyn_cast(MD)) { + WriteValueAsMetadata(MDC, VE, Stream, Record); + continue; + } + const MDString *MDS = cast(MD); + // Code: [strchar x N] + Record.append(MDS->bytes_begin(), MDS->bytes_end()); + + // Emit the finished record. + Stream.EmitRecord(bitc::METADATA_STRING, Record, MDSAbbrev); + Record.clear(); } writeNamedMetadata(M, VE, Stream, Record); diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index 69cafb7..08b5e45 100644 --- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -280,7 +280,8 @@ static bool isIntOrIntVectorValue(const std::pair &V) { ValueEnumerator::ValueEnumerator(const Module &M, bool ShouldPreserveUseListOrder) - : ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) { + : HasMDString(false), + ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) { if (ShouldPreserveUseListOrder) UseListOrders = predictUseListOrder(M); @@ -374,9 +375,6 @@ ValueEnumerator::ValueEnumerator(const Module &M, // Optimize constant ordering. OptimizeConstants(FirstConstant, Values.size()); - - // Organize metadata ordering. - organizeMetadata(); } unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const { @@ -532,8 +530,8 @@ void ValueEnumerator::EnumerateMetadata(const Metadata *MD) { EnumerateMDNodeOperands(N); else if (auto *C = dyn_cast(MD)) EnumerateValue(C->getValue()); - else - ++NumMDStrings; + + HasMDString |= isa(MD); // Replace the dummy ID inserted above with the correct one. MetadataMap may // have changed by inserting operands, so we need a fresh lookup here. @@ -559,19 +557,6 @@ void ValueEnumerator::EnumerateFunctionLocalMetadata( FunctionLocalMDs.push_back(Local); } -void ValueEnumerator::organizeMetadata() { - if (!NumMDStrings) - return; - - // Put the strings first. - std::stable_partition(MDs.begin(), MDs.end(), - [](const Metadata *MD) { return isa(MD); }); - - // Renumber. - for (unsigned I = 0, E = MDs.size(); I != E; ++I) - MetadataMap[MDs[I]] = I + 1; -} - void ValueEnumerator::EnumerateValue(const Value *V) { assert(!V->getType()->isVoidTy() && "Can't insert void values!"); assert(!isa(V) && "EnumerateValue doesn't handle Metadata!"); diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.h b/llvm/lib/Bitcode/Writer/ValueEnumerator.h index fd09a69..7665210 100644 --- a/llvm/lib/Bitcode/Writer/ValueEnumerator.h +++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.h @@ -66,7 +66,7 @@ private: SmallVector FunctionLocalMDs; typedef DenseMap MetadataMapType; MetadataMapType MetadataMap; - unsigned NumMDStrings = 0; + bool HasMDString; bool ShouldPreserveUseListOrder; typedef DenseMap AttributeGroupMapType; @@ -121,6 +121,8 @@ public: } unsigned numMDs() const { return MDs.size(); } + bool hasMDString() const { return HasMDString; } + bool shouldPreserveUseListOrder() const { return ShouldPreserveUseListOrder; } unsigned getTypeID(Type *T) const { @@ -155,16 +157,9 @@ public: const ValueList &getValues() const { return Values; } const std::vector &getMDs() const { return MDs; } - ArrayRef getMDStrings() const { - return makeArrayRef(MDs).slice(0, NumMDStrings); - } - ArrayRef getNonMDStrings() const { - return makeArrayRef(MDs).slice(NumMDStrings); - } const SmallVectorImpl &getFunctionLocalMDs() const { return FunctionLocalMDs; } - const TypeList &getTypes() const { return Types; } const std::vector &getBasicBlocks() const { return BasicBlocks; @@ -194,10 +189,6 @@ public: private: void OptimizeConstants(unsigned CstStart, unsigned CstEnd); - // Reorder the reachable metadata. This is not just an optimization, but is - // mandatory for emitting MDString correctly. - void organizeMetadata(); - void EnumerateMDNodeOperands(const MDNode *N); void EnumerateMetadata(const Metadata *MD); void EnumerateFunctionLocalMetadata(const LocalAsMetadata *Local); diff --git a/llvm/lib/Support/StreamingMemoryObject.cpp b/llvm/lib/Support/StreamingMemoryObject.cpp index fb56617..5a44e62 100644 --- a/llvm/lib/Support/StreamingMemoryObject.cpp +++ b/llvm/lib/Support/StreamingMemoryObject.cpp @@ -104,12 +104,6 @@ uint64_t StreamingMemoryObject::readBytes(uint8_t *Buf, uint64_t Size, return Size; } -const uint8_t *StreamingMemoryObject::getPointer(uint64_t Address, - uint64_t Size) const { - fetchToPos(Address + Size - 1); - return &Bytes[Address + BytesSkipped]; -} - bool StreamingMemoryObject::dropLeadingBytes(size_t s) { if (BytesRead < s) return true; BytesSkipped = s; diff --git a/llvm/test/Bitcode/Inputs/invalid-fixme-streaming-blob.bc b/llvm/test/Bitcode/Inputs/invalid-fixme-streaming-blob.bc new file mode 100644 index 0000000000000000000000000000000000000000..7e32f8b0774f9004171ebd4e9d435a2821399ee0 GIT binary patch literal 371 zcmZ>AK5$Qwhk+rFfq{X$Nr8b0NDBcmd!zD1#}h1`Yyw7>lNeigR9QJBWCyV@D%zAD)@+Q2mo50r~nQV9$}V)8HX5p)C7PAgA50P u&4SG!;t}`HoW&j>a8@KyO(ABhsDPm2)iei}mK0`?Vh|8vU;qGgdQl<( literal 0 HcmV?d00001 diff --git a/llvm/test/Bitcode/invalid.test b/llvm/test/Bitcode/invalid.test index 4993201..3425adc 100644 --- a/llvm/test/Bitcode/invalid.test +++ b/llvm/test/Bitcode/invalid.test @@ -168,6 +168,11 @@ RUN: FileCheck --check-prefix=INVALID-ARGUMENT-TYPE %s INVALID-ARGUMENT-TYPE: Invalid function argument type +RUN: not llvm-dis -disable-output %p/Inputs/invalid-fixme-streaming-blob.bc 2>&1 | \ +RUN: FileCheck --check-prefix=STREAMING-BLOB %s + +STREAMING-BLOB: getPointer in streaming memory objects not allowed + RUN: not llvm-dis -disable-output %p/Inputs/invalid-function-comdat-id.bc 2>&1 | \ RUN: FileCheck --check-prefix=INVALID-FCOMDAT-ID %s diff --git a/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp index 8560cf7..32179c1 100644 --- a/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ b/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -312,9 +312,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID, case bitc::METADATA_BLOCK_ID: switch(CodeID) { default:return nullptr; - STRINGIFY_CODE(METADATA, STRING_OLD) - STRINGIFY_CODE(METADATA, BULK_STRING_SIZES) - STRINGIFY_CODE(METADATA, BULK_STRING_DATA) + STRINGIFY_CODE(METADATA, STRING) STRINGIFY_CODE(METADATA, NAME) STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK STRINGIFY_CODE(METADATA, NODE) -- 2.7.4