// METADATA_VALUES: [numvals]
MODULE_CODE_METADATA_VALUES = 15,
+
+ // SOURCE_FILENAME: [namechar x N]
+ MODULE_CODE_SOURCE_FILENAME = 16,
};
/// PARAMATTR blocks have code for defining a parameter attribute set.
VST_CODE_ENTRY = 1, // VST_ENTRY: [valueid, namechar x N]
VST_CODE_BBENTRY = 2, // VST_BBENTRY: [bbid, namechar x N]
VST_CODE_FNENTRY = 3, // VST_FNENTRY: [valueid, offset, namechar x N]
- // VST_COMBINED_FNENTRY: [offset, namechar x N]
+ // VST_COMBINED_FNENTRY: [funcsumoffset, funcguid]
VST_CODE_COMBINED_FNENTRY = 4
};
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MD5.h"
namespace llvm {
GlobalValue::LinkageTypes Linkage,
StringRef FileName);
+ /// Return a 64-bit global unique ID constructed from global function name
+ /// (i.e. returned by getGlobalIdentifier).
+ static uint64_t getGUID(StringRef GlobalFuncName) {
+ return MD5Hash(GlobalFuncName);
+ }
+
private:
void allocHungoffUselist();
template<int Idx> void setHungoffOperand(Constant *C);
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
/// COMDAT functions of the same name.
typedef std::vector<std::unique_ptr<FunctionInfo>> FunctionInfoList;
-/// Map from function name to corresponding function info structures.
-typedef StringMap<FunctionInfoList> FunctionInfoMapTy;
+/// Map from function GUID to corresponding function info structures.
+/// Use a std::map rather than a DenseMap since it will likely incur
+/// less overhead, as the value type is not very small and the size
+/// of the map is unknown, resulting in inefficiencies due to repeated
+/// insertions and resizing.
+typedef std::map<uint64_t, FunctionInfoList> FunctionInfoMapTy;
/// Type used for iterating through the function info map.
typedef FunctionInfoMapTy::const_iterator const_funcinfo_iterator;
/// Get the list of function info objects for a given function.
const FunctionInfoList &getFunctionInfoList(StringRef FuncName) {
- return FunctionMap[FuncName];
+ return FunctionMap[Function::getGUID(FuncName)];
}
/// Get the list of function info objects for a given function.
const const_funcinfo_iterator findFunctionInfoList(StringRef FuncName) const {
- return FunctionMap.find(FuncName);
+ return FunctionMap.find(Function::getGUID(FuncName));
}
/// Add a function info for a function of the given name.
void addFunctionInfo(StringRef FuncName, std::unique_ptr<FunctionInfo> Info) {
- FunctionMap[FuncName].push_back(std::move(Info));
+ FunctionMap[Function::getGUID(FuncName)].push_back(std::move(Info));
+ }
+
+ void addFunctionInfo(uint64_t FuncGUID, std::unique_ptr<FunctionInfo> Info) {
+ FunctionMap[FuncGUID].push_back(std::move(Info));
}
/// Iterator to allow writer to walk through table during emission.
std::unique_ptr<GVMaterializer>
Materializer; ///< Used to materialize GlobalValues
std::string ModuleID; ///< Human readable identifier for the module
+ std::string SourceFileName; ///< Original source file name for module,
+ ///< recorded in bitcode.
std::string TargetTriple; ///< Platform target triple Module compiled on
///< Format: (arch)(sub)-(vendor)-(sys0-(abi)
void *NamedMDSymTab; ///< NamedMDNode names.
/// @returns the module identifier as a string
const std::string &getModuleIdentifier() const { return ModuleID; }
+ /// Get the module's original source file name. When compiling from
+ /// bitcode, this is taken from a bitcode record where it was recorded.
+ /// For other compiles it is the same as the ModuleID, which would
+ /// contain the source file name.
+ const std::string &getSourceFileName() const { return SourceFileName; }
+
/// \brief Get a short "name" for the module.
///
/// This is useful for debugging or logging. It is essentially a convenience
/// Set the module identifier.
void setModuleIdentifier(StringRef ID) { ModuleID = ID; }
+ /// Set the module's original source file name.
+ void setSourceFileName(StringRef Name) { SourceFileName = Name; }
+
/// Set the data layout
void setDataLayout(StringRef Desc);
void setDataLayout(const DataLayout &Other);
/// summary records.
DenseMap<uint64_t, StringRef> ModuleIdMap;
+ /// Original source file name recorded in a bitcode record.
+ std::string SourceFileName;
+
public:
std::error_code error(BitcodeError E, const Twine &Message);
std::error_code error(BitcodeError E);
assert(MetadataList.size() == 0);
MetadataList.resize(NumModuleMDs);
break;
+ /// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
+ case bitc::MODULE_CODE_SOURCE_FILENAME:
+ SmallString<128> ValueName;
+ if (convertToString(Record, 0, ValueName))
+ return error("Invalid record");
+ TheModule->setSourceFileName(ValueName);
+ break;
}
Record.clear();
}
return error("Invalid record");
unsigned ValueID = Record[0];
uint64_t FuncOffset = Record[1];
- std::unique_ptr<FunctionInfo> FuncInfo =
- llvm::make_unique<FunctionInfo>(FuncOffset);
- if (foundFuncSummary() && !IsLazy) {
+ assert(!IsLazy && "Lazy summary read only supported for combined index");
+ // Gracefully handle bitcode without a function summary section,
+ // which will simply not populate the index.
+ if (foundFuncSummary()) {
DenseMap<uint64_t, std::unique_ptr<FunctionSummary>>::iterator SMI =
SummaryMap.find(ValueID);
assert(SMI != SummaryMap.end() && "Summary info not found");
+ std::unique_ptr<FunctionInfo> FuncInfo =
+ llvm::make_unique<FunctionInfo>(FuncOffset);
FuncInfo->setFunctionSummary(std::move(SMI->second));
+ assert(!SourceFileName.empty());
+ TheIndex->addFunctionInfo(
+ Function::getGlobalIdentifier(
+ ValueName, FuncInfo->functionSummary()->getFunctionLinkage(),
+ SourceFileName),
+ std::move(FuncInfo));
}
- TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo));
ValueName.clear();
break;
}
case bitc::VST_CODE_COMBINED_FNENTRY: {
- // VST_CODE_FNENTRY: [offset, namechar x N]
- if (convertToString(Record, 1, ValueName))
- return error("Invalid record");
+ // VST_CODE_COMBINED_FNENTRY: [offset, funcguid]
uint64_t FuncSummaryOffset = Record[0];
+ uint64_t FuncGUID = Record[1];
std::unique_ptr<FunctionInfo> FuncInfo =
llvm::make_unique<FunctionInfo>(FuncSummaryOffset);
if (foundFuncSummary() && !IsLazy) {
assert(SMI != SummaryMap.end() && "Summary info not found");
FuncInfo->setFunctionSummary(std::move(SMI->second));
}
- TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo));
+ TheIndex->addFunctionInfo(FuncGUID, std::move(FuncInfo));
ValueName.clear();
break;
if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
return error("Invalid record");
+ SmallVector<uint64_t, 64> Record;
+
// Read the function index for this module.
while (1) {
BitstreamEntry Entry = Stream.advance();
continue;
case BitstreamEntry::Record:
- Stream.skipRecord(Entry.ID);
+ // Once we find the single record of interest, skip the rest.
+ if (!SourceFileName.empty())
+ Stream.skipRecord(Entry.ID);
+ else {
+ Record.clear();
+ auto BitCode = Stream.readRecord(Entry.ID, Record);
+ switch (BitCode) {
+ default:
+ break; // Default behavior, ignore unknown content.
+ /// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
+ case bitc::MODULE_CODE_SOURCE_FILENAME:
+ SmallString<128> ValueName;
+ if (convertToString(Record, 0, ValueName))
+ return error("Invalid record");
+ SourceFileName = ValueName.c_str();
+ break;
+ }
+ }
continue;
}
}
return Stream.GetCurrentBitNo() - 32;
}
+enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 };
+
+/// Determine the encoding to use for the given string name and length.
+static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) {
+ bool isChar6 = true;
+ for (const char *C = Str, *E = C + StrLen; C != E; ++C) {
+ if (isChar6)
+ isChar6 = BitCodeAbbrevOp::isChar6(*C);
+ if ((unsigned char)*C & 128)
+ // don't bother scanning the rest.
+ return SE_Fixed8;
+ }
+ if (isChar6)
+ return SE_Char6;
+ else
+ return SE_Fixed7;
+}
+
/// Emit top-level description of module, including target triple, inline asm,
/// descriptors for global variables, and function prototype info.
/// Returns the bit offset to backpatch with the location of the real VST.
// function importing where we lazy load the metadata as a postpass,
// we want to avoid parsing the module-level metadata before parsing
// the imported functions.
- BitCodeAbbrev *Abbv = new BitCodeAbbrev();
- Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_METADATA_VALUES));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
- unsigned MDValsAbbrev = Stream.EmitAbbrev(Abbv);
- Vals.push_back(VE.numMDs());
- Stream.EmitRecord(bitc::MODULE_CODE_METADATA_VALUES, Vals, MDValsAbbrev);
- Vals.clear();
+ {
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_METADATA_VALUES));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+ unsigned MDValsAbbrev = Stream.EmitAbbrev(Abbv);
+ Vals.push_back(VE.numMDs());
+ Stream.EmitRecord(bitc::MODULE_CODE_METADATA_VALUES, Vals, MDValsAbbrev);
+ Vals.clear();
+ }
+
+ // Emit the module's source file name.
+ {
+ StringEncoding Bits =
+ getStringEncoding(M->getName().data(), M->getName().size());
+ BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8);
+ if (Bits == SE_Char6)
+ AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6);
+ else if (Bits == SE_Fixed7)
+ AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7);
+
+ // MODULE_CODE_SOURCE_FILENAME: [namechar x N]
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(AbbrevOpToUse);
+ unsigned FilenameAbbrev = Stream.EmitAbbrev(Abbv);
+
+ for (const auto P : M->getSourceFileName())
+ Vals.push_back((unsigned char)P);
+
+ // Emit the finished record.
+ Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev);
+ Vals.clear();
+ }
uint64_t VSTOffsetPlaceholder =
WriteValueSymbolTableForwardDecl(M->getValueSymbolTable(), Stream);
Vals.clear();
}
-enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 };
-
-/// Determine the encoding to use for the given string name and length.
-static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) {
- bool isChar6 = true;
- for (const char *C = Str, *E = C + StrLen; C != E; ++C) {
- if (isChar6)
- isChar6 = BitCodeAbbrevOp::isChar6(*C);
- if ((unsigned char)*C & 128)
- // don't bother scanning the rest.
- return SE_Fixed8;
- }
- if (isChar6)
- return SE_Char6;
- else
- return SE_Fixed7;
-}
-
/// Emit names for globals/functions etc. The VSTOffsetPlaceholder,
/// BitcodeStartBit and FunctionIndex are only passed for the module-level
/// VST, where we are including a function bitcode index and need to
BitstreamWriter &Stream) {
Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
- // 8-bit fixed-width VST_CODE_COMBINED_FNENTRY function strings.
BitCodeAbbrev *Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
- unsigned FnEntry8BitAbbrev = Stream.EmitAbbrev(Abbv);
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcsumoffset
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcguid
+ unsigned FnEntryAbbrev = Stream.EmitAbbrev(Abbv);
- // 7-bit fixed width VST_CODE_COMBINED_FNENTRY function strings.
- Abbv = new BitCodeAbbrev();
- Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
- unsigned FnEntry7BitAbbrev = Stream.EmitAbbrev(Abbv);
-
- // 6-bit char6 VST_CODE_COMBINED_FNENTRY function strings.
- Abbv = new BitCodeAbbrev();
- Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
- unsigned FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv);
-
- // FIXME: We know if the type names can use 7-bit ascii.
- SmallVector<unsigned, 64> NameVals;
+ SmallVector<uint64_t, 64> NameVals;
for (const auto &FII : Index) {
- for (const auto &FI : FII.getValue()) {
+ for (const auto &FI : FII.second) {
NameVals.push_back(FI->bitcodeIndex());
- StringRef FuncName = FII.first();
-
- // Figure out the encoding to use for the name.
- StringEncoding Bits = getStringEncoding(FuncName.data(), FuncName.size());
+ uint64_t FuncGUID = FII.first;
- // VST_CODE_COMBINED_FNENTRY: [funcsumoffset, namechar x N]
- unsigned AbbrevToUse = FnEntry8BitAbbrev;
- if (Bits == SE_Char6)
- AbbrevToUse = FnEntry6BitAbbrev;
- else if (Bits == SE_Fixed7)
- AbbrevToUse = FnEntry7BitAbbrev;
+ // VST_CODE_COMBINED_FNENTRY: [funcsumoffset, funcguid]
+ unsigned AbbrevToUse = FnEntryAbbrev;
- for (const auto P : FuncName)
- NameVals.push_back((unsigned char)P);
+ NameVals.push_back(FuncGUID);
// Emit the finished record.
Stream.EmitRecord(bitc::VST_CODE_COMBINED_FNENTRY, NameVals, AbbrevToUse);
SmallVector<unsigned, 64> NameVals;
for (const auto &FII : I) {
- for (auto &FI : FII.getValue()) {
+ for (auto &FI : FII.second) {
FunctionSummary *FS = FI->functionSummary();
assert(FS);
StringRef ModPath;
for (auto &OtherFuncInfoLists : *Other) {
- std::string FuncName = OtherFuncInfoLists.getKey();
+ uint64_t FuncGUID = OtherFuncInfoLists.first;
FunctionInfoList &List = OtherFuncInfoLists.second;
// Assert that the func info list only has one entry, since we shouldn't
// string reference owned by the combined index.
Info->functionSummary()->setModulePath(ModPath);
- // If it is a local function, rename it.
- if (GlobalValue::isLocalLinkage(
- Info->functionSummary()->getFunctionLinkage())) {
- // Any local functions are virtually renamed when being added to the
- // combined index map, to disambiguate from other functions with
- // the same name. The symbol table created for the combined index
- // file should contain the renamed symbols.
- FuncName =
- FunctionInfoIndex::getGlobalNameForLocal(FuncName, NextModuleId);
- }
-
// Add new function info to existing list. There may be duplicates when
// combining FunctionMap entries, due to COMDAT functions. Any local
- // functions were virtually renamed above.
- addFunctionInfo(FuncName, std::move(Info));
+ // functions were given unique global IDs.
+ addFunctionInfo(FuncGUID, std::move(Info));
}
}
//
Module::Module(StringRef MID, LLVMContext &C)
- : Context(C), Materializer(), ModuleID(MID), DL("") {
+ : Context(C), Materializer(), ModuleID(MID), SourceFileName(MID), DL("") {
ValSymTab = new ValueSymbolTable();
NamedMDSymTab = new StringMap<NamedMDNode *>();
Context.addModule(this);
if (CalledFunction->hasInternalLinkage()) {
ImportedName = Renamed;
}
- auto It = CalledFunctions.insert(ImportedName);
+ // Compute the global identifier used in the function index.
+ auto CalledFunctionGlobalID = Function::getGlobalIdentifier(
+ CalledFunction->getName(), CalledFunction->getLinkage(),
+ CalledFunction->getParent()->getSourceFileName());
+ auto It = CalledFunctions.insert(CalledFunctionGlobalID);
if (!It.second) {
// This is a call to a function we already considered, skip.
continue;
GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName);
if (!SGV) {
- // The destination module is referencing function using their renamed name
- // when importing a function that was originally local in the source
- // module. The source module we have might not have been renamed so we try
- // to remove the suffix added during the renaming to recover the original
+ // The function is referenced by a global identifier, which has the
+ // source file name prepended for functions that were originally local
+ // in the source module. Strip any prepended name to recover the original
// name in the source module.
- std::pair<StringRef, StringRef> Split =
- CalledFunctionName.split(".llvm.");
- SGV = SrcModule.getNamedValue(Split.first);
+ std::pair<StringRef, StringRef> Split = CalledFunctionName.split(":");
+ SGV = SrcModule.getNamedValue(Split.second);
assert(SGV && "Can't find function to import in source module");
}
if (!SGV) {
--- /dev/null
+; RUN: llvm-bcanalyzer -dump %p/Inputs/source-filename.bc | FileCheck %s
+; CHECK: <SOURCE_FILENAME {{.*}} record string = 'source-filename.c'
; COMBINED-NEXT: <COMBINED_ENTRY
; COMBINED-NEXT: </FUNCTION_SUMMARY_BLOCK
; COMBINED-NEXT: <VALUE_SYMTAB
-; COMBINED-NEXT: <COMBINED_FNENTRY {{.*}} record string = '{{f|g}}'
-; COMBINED-NEXT: <COMBINED_FNENTRY {{.*}} record string = '{{f|g}}'
+; Check that the format is: op0=offset, op1=funcguid, where funcguid is
+; the lower 64 bits of the function name MD5.
+; COMBINED-NEXT: <COMBINED_FNENTRY abbrevid={{[0-9]+}} op0={{[0-9]+}} op1={{-3706093650706652785|-5300342847281564238}}
+; COMBINED-NEXT: <COMBINED_FNENTRY abbrevid={{[0-9]+}} op0={{[0-9]+}} op1={{-3706093650706652785|-5300342847281564238}}
; COMBINED-NEXT: </VALUE_SYMTAB
define void @f() {
; COMBINED-NEXT: <COMBINED_ENTRY
; COMBINED-NEXT: </FUNCTION_SUMMARY_BLOCK
; COMBINED-NEXT: <VALUE_SYMTAB
-; COMBINED-NEXT: <COMBINED_FNENTRY {{.*}} record string = '{{f|g}}'
-; COMBINED-NEXT: <COMBINED_FNENTRY {{.*}} record string = '{{f|g}}'
+; Check that the format is: op0=offset, op1=funcguid, where funcguid is
+; the lower 64 bits of the function name MD5.
+; COMBINED-NEXT: <COMBINED_FNENTRY abbrevid={{[0-9]+}} op0={{[0-9]+}} op1={{-3706093650706652785|-5300342847281564238}}
+; COMBINED-NEXT: <COMBINED_FNENTRY abbrevid={{[0-9]+}} op0={{[0-9]+}} op1={{-3706093650706652785|-5300342847281564238}}
; COMBINED-NEXT: </VALUE_SYMTAB
define void @f() {
STRINGIFY_CODE(MODULE_CODE, GCNAME)
STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES)
+ STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
}
case bitc::IDENTIFICATION_BLOCK_ID:
switch (CodeID) {