From 96e547113926e78c08330664f796e36894ff44a9 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sun, 7 Feb 2016 17:30:13 +0000 Subject: [PATCH] Separate invariant equivalence classes by type We now distinguish invariant loads to the same memory location if they have different types. This will cause us to pre-load an invariant location once for each type that is used to access it. However, we can thereby avoid invalid casting, especially if an array is accessed though different typed/sized invariant loads. This basically reverts the changes in r260023 but keeps the test cases. llvm-svn: 260045 --- polly/include/polly/CodeGen/IslNodeBuilder.h | 4 +++- polly/include/polly/ScopInfo.h | 8 ++++--- polly/lib/Analysis/ScopInfo.cpp | 26 +++++++++------------- polly/lib/CodeGen/IslNodeBuilder.cpp | 17 +++++--------- .../Isl/CodeGen/multiple-types-invariant-load.ll | 11 +-------- ...ess_classes_different_base_type_same_pointer.ll | 16 ++++++++----- ...es_different_base_type_same_pointer_escaping.ll | 21 +++++++++-------- 7 files changed, 47 insertions(+), 56 deletions(-) diff --git a/polly/include/polly/CodeGen/IslNodeBuilder.h b/polly/include/polly/CodeGen/IslNodeBuilder.h index 330a198..7d27107 100644 --- a/polly/include/polly/CodeGen/IslNodeBuilder.h +++ b/polly/include/polly/CodeGen/IslNodeBuilder.h @@ -209,7 +209,9 @@ protected: virtual void createFor(__isl_take isl_ast_node *For); /// @brief Set to remember materialized invariant loads. - SmallPtrSet PreloadedPtrs; + /// + /// An invariant load is identified by its pointer (the SCEV) and its type. + SmallSet, 16> PreloadedPtrs; /// @brief Preload the memory access at @p AccessRange with @p Build. /// diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index 71bb5d5..345a32b 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -850,11 +850,13 @@ using MemoryAccessList = std::forward_list; /// The first element is the SCEV for the pointer/location that identifies this /// equivalence class. The second is a list of memory accesses to that location /// that are now treated as invariant and hoisted during code generation. The -/// last element is the execution context under which the invariant memory +/// third element is the execution context under which the invariant memory /// location is accessed, hence the union of all domain contexts for the memory -/// accesses in the list. +/// accesses in the list. The last element describes the type of the invariant +/// accesss in order to differentiate between different typed invariant loads of +/// the same location. using InvariantEquivClassTy = - std::tuple; + std::tuple; /// @brief Type for invariant accesses equivalence classes. using InvariantEquivClassesTy = SmallVector; diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 23c1459..e33f9bf 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -1827,21 +1827,22 @@ void Scop::addUserContext() { } void Scop::buildInvariantEquivalenceClasses() { - DenseMap EquivClasses; + DenseMap, LoadInst *> EquivClasses; const InvariantLoadsSetTy &RIL = *SD.getRequiredInvariantLoads(&getRegion()); for (LoadInst *LInst : RIL) { const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); - LoadInst *&ClassRep = EquivClasses[PointerSCEV]; + Type *Ty = LInst->getType(); + LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)]; if (ClassRep) { InvEquivClassVMap[LInst] = ClassRep; continue; } ClassRep = LInst; - InvariantEquivClasses.emplace_back(PointerSCEV, MemoryAccessList(), - nullptr); + InvariantEquivClasses.emplace_back(PointerSCEV, MemoryAccessList(), nullptr, + Ty); } } @@ -2852,9 +2853,10 @@ const InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) const { if (Value *Rep = InvEquivClassVMap.lookup(LInst)) LInst = cast(Rep); + Type *Ty = LInst->getType(); const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); for (auto &IAClass : InvariantEquivClasses) - if (PointerSCEV == std::get<0>(IAClass)) + if (PointerSCEV == std::get<0>(IAClass) && Ty == std::get<3>(IAClass)) return &IAClass; return nullptr; @@ -2897,11 +2899,12 @@ void Scop::addInvariantLoads(ScopStmt &Stmt, MemoryAccessList &InvMAs) { // MA and if found consolidate them. Otherwise create a new equivalence // class at the end of InvariantEquivClasses. LoadInst *LInst = cast(MA->getAccessInstruction()); + Type *Ty = LInst->getType(); const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); bool Consolidated = false; for (auto &IAClass : InvariantEquivClasses) { - if (PointerSCEV != std::get<0>(IAClass)) + if (PointerSCEV != std::get<0>(IAClass) || Ty != std::get<3>(IAClass)) continue; Consolidated = true; @@ -2926,7 +2929,7 @@ void Scop::addInvariantLoads(ScopStmt &Stmt, MemoryAccessList &InvMAs) { // If we did not consolidate MA, thus did not find an equivalence class // for it, we create a new one. InvariantEquivClasses.emplace_back(PointerSCEV, MemoryAccessList{MA}, - isl_set_copy(DomainCtx)); + isl_set_copy(DomainCtx), Ty); } isl_set_free(DomainCtx); @@ -2971,15 +2974,6 @@ bool Scop::isHoistableAccess(MemoryAccess *Access, isl_map *AccessRelation = Access->getAccessRelation(); - // Invariant load hoisting of memory accesses with non-canonical element - // types lacks support for equivalence classes that contain elements of - // different width/size. Hence, do not yet consider loads with non-canonical - // element size for load hoisting. - if (!isl_map_is_single_valued(AccessRelation)) { - isl_map_free(AccessRelation); - return false; - } - // Skip accesses that have an empty access relation. These can be caused // by multiple offsets with a type cast in-between that cause the overall // byte offset to be not divisible by the new types sizes. diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index d8ec5b0..50435b6 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -1015,7 +1015,8 @@ bool IslNodeBuilder::preloadInvariantEquivClass( // Check for recurrsion which can be caused by additional constraints, e.g., // non-finitie loop contraints. In such a case we have to bail out and insert // a "false" runtime check that will cause the original code to be executed. - if (!PreloadedPtrs.insert(std::get<0>(IAClass)).second) + auto PtrId = std::make_pair(std::get<0>(IAClass), std::get<3>(IAClass)); + if (!PreloadedPtrs.insert(PtrId).second) return false; // If the base pointer of this class is dependent on another one we have to @@ -1033,13 +1034,10 @@ bool IslNodeBuilder::preloadInvariantEquivClass( if (!PreloadVal) return false; - assert(PreloadVal->getType() == AccInst->getType()); for (const MemoryAccess *MA : MAs) { Instruction *MAAccInst = MA->getAccessInstruction(); - // TODO: The bitcast here is wrong. In case of floating and non-floating - // point values we need to reload the value or convert it. - ValueMap[MAAccInst] = - Builder.CreateBitOrPointerCast(PreloadVal, MAAccInst->getType()); + assert(PreloadVal->getType() == MAAccInst->getType()); + ValueMap[MAAccInst] = PreloadVal; } if (SE.isSCEVable(AccInstTy)) { @@ -1063,11 +1061,8 @@ bool IslNodeBuilder::preloadInvariantEquivClass( // should only change the base pointer of the derived SAI if we actually // preloaded it. if (BasePtr == MA->getBaseAddr()) { - // TODO: The bitcast here is wrong. In case of floating and non-floating - // point values we need to reload the value or convert it. - BasePtr = - Builder.CreateBitOrPointerCast(PreloadVal, BasePtr->getType()); - DerivedSAI->setBasePtr(BasePtr); + assert(BasePtr->getType() == PreloadVal->getType()); + DerivedSAI->setBasePtr(PreloadVal); } // For scalar derived SAIs we remap the alloca used for the derived value. diff --git a/polly/test/Isl/CodeGen/multiple-types-invariant-load.ll b/polly/test/Isl/CodeGen/multiple-types-invariant-load.ll index e69c3f8..71d1be2 100644 --- a/polly/test/Isl/CodeGen/multiple-types-invariant-load.ll +++ b/polly/test/Isl/CodeGen/multiple-types-invariant-load.ll @@ -1,18 +1,9 @@ -; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s - -; Invariant loads with non-canonical types are not yet fully supported. - -; XFAIL: * +; RUN: opt %loadPolly -polly-allow-differing-element-types -polly-codegen -S < %s | FileCheck %s ; CHECK: %polly.access.cast.global.load = bitcast %struct.hoge* %global.load to i32* ; CHECK: %polly.access.global.load = getelementptr i32, i32* %polly.access.cast.global.load, i64 0 ; CHECK: %polly.access.global.load.load = load i32, i32* %polly.access.global.load -; CHECK: %polly.access.cast.global.load1 = bitcast %struct.hoge* %global.load to i32* -; CHECK: %polly.access.global.load2 = getelementptr i32, i32* %polly.access.cast.global.load1, i64 2 -; CHECK: %polly.access.global.load2.cast = bitcast i32* %polly.access.global.load2 to double* -; CHECK: %polly.access.global.load2.load = load double, double* %polly.access.global.load2.cast - target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll index 3b6b24d..3df1490 100644 --- a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll +++ b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll @@ -11,6 +11,9 @@ ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_U[0] }; ; CHECK-NEXT: Execution Context: { : } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_U[0] }; +; CHECK-NEXT: Execution Context: { : } ; CHECK-NEXT: } ; ; CHECK: Statements { @@ -24,13 +27,15 @@ ; CHECK-NEXT: } ; ; CODEGEN: entry: -; CODEGEN: %U.f.preload.s2a = alloca float +; CODEGEN-DAG: %U.f.preload.s2a = alloca float +; CODEGEN-DAG: %U.i.preload.s2a = alloca i32 ; CODEGEN: br label %polly.split_new_and_old ; ; CODEGEN: polly.preload.begin: -; CODEGEN: %U.load = load float, float* bitcast (i32* @U to float*) -; CODEGEN: %0 = bitcast float %U.load to i32 -; CODEGEN: store float %U.load, float* %U.f.preload.s2a +; CODEGEN-DAG: %U.load[[f:[.0-9]*]] = load float, float* bitcast (i32* @U to float*) +; CODEGEN-DAG: store float %U.load[[f]], float* %U.f.preload.s2a +; CODEGEN-DAG: %U.load[[i:[.0-9]*]] = load i32, i32* @U +; CODEGEN-DAG: store i32 %U.load[[i]], i32* %U.i.preload.s2a ; ; CODEGEN: polly.merge_new_and_old: ; CODEGEN-NOT: merge = phi @@ -39,8 +44,7 @@ ; CODEGEN-NOT: final_reload ; ; CODEGEN: polly.stmt.for.body: -; CODEGEN: %p_conv = fptosi float %U.load to i32 -; CODEGEN: %p_add = add nsw i32 %0, %p_conv +; CODEGEN: %p_add = add nsw i32 %U.load[[i]], %p_conv ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll index b208859..2e7696f 100644 --- a/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll +++ b/polly/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll @@ -16,6 +16,9 @@ ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: { Stmt_do_body[i0] -> MemRef_U[0] }; ; CHECK-NEXT: Execution Context: { : } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_do_body[i0] -> MemRef_U[0] }; +; CHECK-NEXT: Execution Context: { : } ; CHECK-NEXT: } ; ; CHECK: Statements { @@ -29,26 +32,26 @@ ; CHECK-NEXT: } ; ; CODEGEN: entry: -; CODEGEN: %U.f.preload.s2a = alloca float +; CODEGEN-DAG: %U.f.preload.s2a = alloca float +; CODEGEN-DAG: %U.i.preload.s2a = alloca i32 ; CODEGEN: br label %polly.split_new_and_old ; ; CODEGEN: polly.preload.begin: -; CODEGEN: %U.load = load float, float* bitcast (i32* @U to float*) -; CODEGEN: %0 = bitcast float %U.load to i32 -; CODEGEN: store float %U.load, float* %U.f.preload.s2a +; CODEGEN-DAG: %U.load[[f:[.0-9]*]] = load float, float* bitcast (i32* @U to float*) +; CODEGEN-DAG: store float %U.load[[f]], float* %U.f.preload.s2a +; CODEGEN-DAG: %U.load[[i:[.0-9]*]] = load i32, i32* @U +; CODEGEN-DAG: store i32 %U.load[[i]], i32* %U.i.preload.s2a ; ; CODEGEN: polly.merge_new_and_old: ; CODEGEN-DAG: %U.f.merge = phi float [ %U.f.final_reload, %polly.exiting ], [ %U.f, %do.cond ] -; CODEGEN-DAG: %U.i.merge = phi i32 [ %5, %polly.exiting ], [ %U.i, %do.cond ] +; CODEGEN-DAG: %U.i.merge = phi i32 [ %U.i.final_reload, %polly.exiting ], [ %U.i, %do.cond ] ; ; CODEGEN: polly.loop_exit: ; CODEGEN-DAG: %U.f.final_reload = load float, float* %U.f.preload.s2a -; CODEGEN-DAG: %U.i.final_reload = load float, float* %U.f.preload.s2a -; CODEGEN-DAG: %5 = bitcast float %U.i.final_reload to i32 +; CODEGEN-DAG: %U.i.final_reload = load i32, i32* %U.i.preload.s2a ; ; CODEGEN: polly.stmt.do.body: -; CODEGEN: %p_conv = fptosi float %U.load to i32 -; CODEGEN: %p_add = add nsw i32 %0, %p_conv +; CODEGEN: %p_add = add nsw i32 %U.load[[i]], %p_conv ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -- 2.7.4