From 31ad468d03e8b52c0c09f79db3068dc63a120b28 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 3 Dec 2014 22:36:37 +0000 Subject: [PATCH] Split the set of identified struct types into opaque and non-opaque ones. The non-opaque part can be structurally uniqued. To keep this to just a hash lookup, we don't try to unique cyclic types. Also change the type mapping algorithm to be optimistic about a type not being recursive and only create a new type when proven to be wrong. This is not as strong as trying to speculate that we can keep the source type, but is simpler (no speculation to revert) and more powerfull than what we had before (we don't copy non-recursive types at least). I initially wrote this to try to replace the name based type merging. It is not strong enough to replace it, but is is a useful addition. With this patch the number of named struct types is a clang lto bootstrap goes from 49674 to 15986. llvm-svn: 223278 --- llvm/include/llvm/Linker/Linker.h | 43 ++++- llvm/lib/Linker/LinkModules.cpp | 307 ++++++++++++++++++++----------- llvm/test/Linker/type-unique-src-type.ll | 4 +- 3 files changed, 245 insertions(+), 109 deletions(-) diff --git a/llvm/include/llvm/Linker/Linker.h b/llvm/include/llvm/Linker/Linker.h index 88d41c1b..9e45245 100644 --- a/llvm/include/llvm/Linker/Linker.h +++ b/llvm/include/llvm/Linker/Linker.h @@ -10,7 +10,9 @@ #ifndef LLVM_LINKER_LINKER_H #define LLVM_LINKER_LINKER_H -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include @@ -18,6 +20,7 @@ namespace llvm { class DiagnosticInfo; class Module; class StructType; +class Type; /// This class provides the core functionality of linking in LLVM. It keeps a /// pointer to the merged module so far. It doesn't take ownership of the @@ -27,6 +30,40 @@ class Linker { public: typedef std::function DiagnosticHandlerFunction; + struct StructTypeKeyInfo { + struct KeyTy { + ArrayRef ETypes; + bool IsPacked; + KeyTy(ArrayRef E, bool P); + KeyTy(const StructType *ST); + bool operator==(const KeyTy &that) const; + bool operator!=(const KeyTy &that) const; + }; + static StructType *getEmptyKey(); + static StructType *getTombstoneKey(); + static unsigned getHashValue(const KeyTy &Key); + static unsigned getHashValue(const StructType *ST); + static bool isEqual(const KeyTy &LHS, const StructType *RHS); + static bool isEqual(const StructType *LHS, const StructType *RHS); + }; + + typedef DenseMap + NonOpaqueStructTypeSet; + typedef DenseSet OpaqueStructTypeSet; + + struct IdentifiedStructTypeSet { + // The set of opaque types is the composite module. + OpaqueStructTypeSet OpaqueStructTypes; + + // The set of identified but non opaque structures in the composite module. + NonOpaqueStructTypeSet NonOpaqueStructTypes; + + void addNonOpaque(StructType *Ty); + void addOpaque(StructType *Ty); + StructType *findNonOpaque(ArrayRef ETypes, bool IsPacked); + bool hasType(StructType *Ty); + }; + Linker(Module *M, DiagnosticHandlerFunction DiagnosticHandler); Linker(Module *M); ~Linker(); @@ -46,7 +83,9 @@ public: private: void init(Module *M, DiagnosticHandlerFunction DiagnosticHandler); Module *Composite; - SmallPtrSet IdentifiedStructTypes; + + IdentifiedStructTypeSet IdentifiedStructTypes; + DiagnosticHandlerFunction DiagnosticHandler; }; diff --git a/llvm/lib/Linker/LinkModules.cpp b/llvm/lib/Linker/LinkModules.cpp index ecfb943..e643f5b 100644 --- a/llvm/lib/Linker/LinkModules.cpp +++ b/llvm/lib/Linker/LinkModules.cpp @@ -13,9 +13,11 @@ #include "llvm/Linker/Linker.h" #include "llvm-c/Linker.h" +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Statistic.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" @@ -36,8 +38,6 @@ using namespace llvm; //===----------------------------------------------------------------------===// namespace { -typedef SmallPtrSet TypeSet; - class TypeMapTy : public ValueMapTypeRemapper { /// This is a mapping from a source type to a destination type to use. DenseMap MappedTypes; @@ -58,9 +58,10 @@ class TypeMapTy : public ValueMapTypeRemapper { SmallPtrSet DstResolvedOpaqueTypes; public: - TypeMapTy(TypeSet &Set) : DstStructTypesSet(Set) {} + TypeMapTy(Linker::IdentifiedStructTypeSet &DstStructTypesSet) + : DstStructTypesSet(DstStructTypesSet) {} - TypeSet &DstStructTypesSet; + Linker::IdentifiedStructTypeSet &DstStructTypesSet; /// Indicate that the specified type in the destination module is conceptually /// equivalent to the specified type in the source module. void addTypeMapping(Type *DstTy, Type *SrcTy); @@ -72,6 +73,9 @@ public: /// Return the mapped type to use for the specified input type from the /// source module. Type *get(Type *SrcTy); + Type *get(Type *SrcTy, SmallPtrSet &Visited); + + void finishType(StructType *DTy, StructType *STy, ArrayRef ETypes); FunctionType *get(FunctionType *T) { return cast(get((Type *)T)); @@ -225,124 +229,127 @@ void TypeMapTy::linkDefinedTypeBodies() { DstResolvedOpaqueTypes.clear(); } -Type *TypeMapTy::get(Type *Ty) { -#ifndef NDEBUG - for (auto &Pair : MappedTypes) { - assert(!(Pair.first != Ty && Pair.second == Ty) && - "mapping to a source type"); +void TypeMapTy::finishType(StructType *DTy, StructType *STy, + ArrayRef ETypes) { + DTy->setBody(ETypes, STy->isPacked()); + + // Steal STy's name. + if (STy->hasName()) { + SmallString<16> TmpName = STy->getName(); + STy->setName(""); + DTy->setName(TmpName); } -#endif + DstStructTypesSet.addNonOpaque(DTy); +} + +Type *TypeMapTy::get(Type *Ty) { + SmallPtrSet Visited; + return get(Ty, Visited); +} + +Type *TypeMapTy::get(Type *Ty, SmallPtrSet &Visited) { // If we already have an entry for this type, return it. Type **Entry = &MappedTypes[Ty]; if (*Entry) return *Entry; - // If this is not a named struct type, then just map all of the elements and - // then rebuild the type from inside out. - if (!isa(Ty) || cast(Ty)->isLiteral()) { - // If there are no element types to map, then the type is itself. This is - // true for the anonymous {} struct, things like 'float', integers, etc. - if (Ty->getNumContainedTypes() == 0) - return *Entry = Ty; + // These are types that LLVM itself will unique. + bool IsUniqued = !isa(Ty) || cast(Ty)->isLiteral(); - // Remap all of the elements, keeping track of whether any of them change. - bool AnyChange = false; - SmallVector ElementTypes; - ElementTypes.resize(Ty->getNumContainedTypes()); - for (unsigned I = 0, E = Ty->getNumContainedTypes(); I != E; ++I) { - ElementTypes[I] = get(Ty->getContainedType(I)); - AnyChange |= ElementTypes[I] != Ty->getContainedType(I); +#ifndef NDEBUG + if (!IsUniqued) { + for (auto &Pair : MappedTypes) { + assert(!(Pair.first != Ty && Pair.second == Ty) && + "mapping to a source type"); } + } +#endif - // If we found our type while recursively processing stuff, just use it. - Entry = &MappedTypes[Ty]; - if (*Entry) - return *Entry; + if (!IsUniqued && !Visited.insert(cast(Ty)).second) { + StructType *DTy = StructType::create(Ty->getContext()); + return *Entry = DTy; + } - // If all of the element types mapped directly over, then the type is usable - // as-is. - if (!AnyChange) - return *Entry = Ty; + // If this is not a recursive type, then just map all of the elements and + // then rebuild the type from inside out. + SmallVector ElementTypes; + + // If there are no element types to map, then the type is itself. This is + // true for the anonymous {} struct, things like 'float', integers, etc. + if (Ty->getNumContainedTypes() == 0 && IsUniqued) + return *Entry = Ty; + + // Remap all of the elements, keeping track of whether any of them change. + bool AnyChange = false; + ElementTypes.resize(Ty->getNumContainedTypes()); + for (unsigned I = 0, E = Ty->getNumContainedTypes(); I != E; ++I) { + ElementTypes[I] = get(Ty->getContainedType(I), Visited); + AnyChange |= ElementTypes[I] != Ty->getContainedType(I); + } - // Otherwise, rebuild a modified type. - switch (Ty->getTypeID()) { - default: - llvm_unreachable("unknown derived type to remap"); - case Type::ArrayTyID: - return *Entry = ArrayType::get(ElementTypes[0], - cast(Ty)->getNumElements()); - case Type::VectorTyID: - return *Entry = VectorType::get(ElementTypes[0], - cast(Ty)->getNumElements()); - case Type::PointerTyID: - return *Entry = PointerType::get( - ElementTypes[0], cast(Ty)->getAddressSpace()); - case Type::FunctionTyID: - return *Entry = FunctionType::get(ElementTypes[0], - makeArrayRef(ElementTypes).slice(1), - cast(Ty)->isVarArg()); - case Type::StructTyID: - // Note that this is only reached for anonymous structs. - return *Entry = StructType::get(Ty->getContext(), ElementTypes, - cast(Ty)->isPacked()); + // If we found our type while recursively processing stuff, just use it. + Entry = &MappedTypes[Ty]; + if (*Entry) { + if (auto *DTy = dyn_cast(*Entry)) { + if (DTy->isOpaque()) { + auto *STy = cast(Ty); + finishType(DTy, STy, ElementTypes); + } } + return *Entry; } - // Otherwise, this is an unmapped named struct. If the struct can be directly - // mapped over, just use it as-is. This happens in a case when the linked-in - // module has something like: - // %T = type {%T*, i32} - // @GV = global %T* null - // where T does not exist at all in the destination module. - // - // The other case we watch for is when the type is not in the destination - // module, but that it has to be rebuilt because it refers to something that - // is already mapped. For example, if the destination module has: - // %A = type { i32 } - // and the source module has something like - // %A' = type { i32 } - // %B = type { %A'* } - // @GV = global %B* null - // then we want to create a new type: "%B = type { %A*}" and have it take the - // pristine "%B" name from the source module. - // - // To determine which case this is, we have to recursively walk the type graph - // speculating that we'll be able to reuse it unmodified. Only if this is - // safe would we map the entire thing over. Because this is an optimization, - // and is not required for the prettiness of the linked module, we just skip - // it and always rebuild a type here. - StructType *STy = cast(Ty); - - // If the type is opaque, we can just use it directly. - if (STy->isOpaque()) { - // A named structure type from src module is used. Add it to the Set of - // identified structs in the destination module. - DstStructTypesSet.insert(STy); - return *Entry = STy; - } + // If all of the element types mapped directly over and the type is not + // a nomed struct, then the type is usable as-is. + if (!AnyChange && IsUniqued) + return *Entry = Ty; + + // Otherwise, rebuild a modified type. + switch (Ty->getTypeID()) { + default: + llvm_unreachable("unknown derived type to remap"); + case Type::ArrayTyID: + return *Entry = ArrayType::get(ElementTypes[0], + cast(Ty)->getNumElements()); + case Type::VectorTyID: + return *Entry = VectorType::get(ElementTypes[0], + cast(Ty)->getNumElements()); + case Type::PointerTyID: + return *Entry = PointerType::get(ElementTypes[0], + cast(Ty)->getAddressSpace()); + case Type::FunctionTyID: + return *Entry = FunctionType::get(ElementTypes[0], + makeArrayRef(ElementTypes).slice(1), + cast(Ty)->isVarArg()); + case Type::StructTyID: { + auto *STy = cast(Ty); + bool IsPacked = STy->isPacked(); + if (IsUniqued) + return *Entry = StructType::get(Ty->getContext(), ElementTypes, IsPacked); + + // If the type is opaque, we can just use it directly. + if (STy->isOpaque()) { + DstStructTypesSet.addOpaque(STy); + return *Entry = Ty; + } - // Otherwise we create a new type. - StructType *DTy = StructType::create(STy->getContext()); - // A new identified structure type was created. Add it to the set of - // identified structs in the destination module. - DstStructTypesSet.insert(DTy); - *Entry = DTy; + if (StructType *OldT = + DstStructTypesSet.findNonOpaque(ElementTypes, IsPacked)) { + STy->setName(""); + return *Entry = OldT; + } - SmallVector ElementTypes; - ElementTypes.resize(STy->getNumElements()); - for (unsigned I = 0, E = ElementTypes.size(); I != E; ++I) - ElementTypes[I] = get(STy->getElementType(I)); - DTy->setBody(ElementTypes, STy->isPacked()); + if (!AnyChange) { + DstStructTypesSet.addNonOpaque(STy); + return *Entry = Ty; + } - // Steal STy's name. - if (STy->hasName()) { - SmallString<16> TmpName = STy->getName(); - STy->setName(""); - DTy->setName(TmpName); + StructType *DTy = StructType::create(Ty->getContext()); + finishType(DTy, STy, ElementTypes); + return *Entry = DTy; + } } - - return DTy; } //===----------------------------------------------------------------------===// @@ -412,7 +419,7 @@ class ModuleLinker { Linker::DiagnosticHandlerFunction DiagnosticHandler; public: - ModuleLinker(Module *dstM, TypeSet &Set, Module *srcM, + ModuleLinker(Module *dstM, Linker::IdentifiedStructTypeSet &Set, Module *srcM, Linker::DiagnosticHandlerFunction DiagnosticHandler) : DstM(dstM), SrcM(srcM), TypeMap(Set), ValMaterializer(TypeMap, DstM, LazilyLinkFunctions), @@ -825,7 +832,7 @@ void ModuleLinker::computeTypeMapping() { // we prefer to take the '%C' version. So we are then left with both // '%C.1' and '%C' being used for the same types. This leads to some // variables using one type and some using the other. - if (TypeMap.DstStructTypesSet.count(DST)) + if (TypeMap.DstStructTypesSet.hasType(DST)) TypeMap.addTypeMapping(DST, ST); } @@ -1589,13 +1596,101 @@ bool ModuleLinker::run() { return false; } +Linker::StructTypeKeyInfo::KeyTy::KeyTy(ArrayRef E, bool P) + : ETypes(E), IsPacked(P) {} + +Linker::StructTypeKeyInfo::KeyTy::KeyTy(const StructType *ST) + : ETypes(ST->elements()), IsPacked(ST->isPacked()) {} + +bool Linker::StructTypeKeyInfo::KeyTy::operator==(const KeyTy &That) const { + if (IsPacked != That.IsPacked) + return false; + if (ETypes != That.ETypes) + return false; + return true; +} + +bool Linker::StructTypeKeyInfo::KeyTy::operator!=(const KeyTy &That) const { + return !this->operator==(That); +} + +StructType *Linker::StructTypeKeyInfo::getEmptyKey() { + return DenseMapInfo::getEmptyKey(); +} + +StructType *Linker::StructTypeKeyInfo::getTombstoneKey() { + return DenseMapInfo::getTombstoneKey(); +} + +unsigned Linker::StructTypeKeyInfo::getHashValue(const KeyTy &Key) { + return hash_combine(hash_combine_range(Key.ETypes.begin(), Key.ETypes.end()), + Key.IsPacked); +} + +unsigned Linker::StructTypeKeyInfo::getHashValue(const StructType *ST) { + return getHashValue(KeyTy(ST)); +} + +bool Linker::StructTypeKeyInfo::isEqual(const KeyTy &LHS, + const StructType *RHS) { + if (RHS == getEmptyKey() || RHS == getTombstoneKey()) + return false; + return LHS == KeyTy(RHS); +} + +bool Linker::StructTypeKeyInfo::isEqual(const StructType *LHS, + const StructType *RHS) { + if (RHS == getEmptyKey()) + return LHS == getEmptyKey(); + + if (RHS == getTombstoneKey()) + return LHS == getTombstoneKey(); + + return KeyTy(LHS) == KeyTy(RHS); +} + +void Linker::IdentifiedStructTypeSet::addNonOpaque(StructType *Ty) { + assert(!Ty->isOpaque()); + bool &Entry = NonOpaqueStructTypes[Ty]; + Entry = true; +} + +void Linker::IdentifiedStructTypeSet::addOpaque(StructType *Ty) { + assert(Ty->isOpaque()); + OpaqueStructTypes.insert(Ty); +} + +StructType * +Linker::IdentifiedStructTypeSet::findNonOpaque(ArrayRef ETypes, + bool IsPacked) { + Linker::StructTypeKeyInfo::KeyTy Key(ETypes, IsPacked); + auto I = NonOpaqueStructTypes.find_as(Key); + if (I == NonOpaqueStructTypes.end()) + return nullptr; + return I->first; +} + +bool Linker::IdentifiedStructTypeSet::hasType(StructType *Ty) { + if (Ty->isOpaque()) + return OpaqueStructTypes.count(Ty); + auto I = NonOpaqueStructTypes.find(Ty); + if (I == NonOpaqueStructTypes.end()) + return false; + return I->first == Ty; +} + void Linker::init(Module *M, DiagnosticHandlerFunction DiagnosticHandler) { this->Composite = M; this->DiagnosticHandler = DiagnosticHandler; TypeFinder StructTypes; StructTypes.run(*M, true); - IdentifiedStructTypes.insert(StructTypes.begin(), StructTypes.end()); + for (StructType *Ty : StructTypes) { + if (Ty->isOpaque()) + IdentifiedStructTypes.addOpaque(Ty); + else + IdentifiedStructTypes.addNonOpaque(Ty); + } } Linker::Linker(Module *M, DiagnosticHandlerFunction DiagnosticHandler) { diff --git a/llvm/test/Linker/type-unique-src-type.ll b/llvm/test/Linker/type-unique-src-type.ll index 4566904..01b33a2 100644 --- a/llvm/test/Linker/type-unique-src-type.ll +++ b/llvm/test/Linker/type-unique-src-type.ll @@ -9,7 +9,9 @@ ; CHECK: %C.0 = type { %B } ; CHECK-NEXT: %B = type { %A } ; CHECK-NEXT: %A = type { i8 } -; CHECK-NEXT: %C = type { %B } + +; CHECK: @g1 = external global %C.0 +; CHECK: getelementptr %C.0* null, i64 0, i32 0, i32 0 %A = type { i8 } %B = type { %A } -- 2.7.4