From a265cf22af5ec7d1319f690126b6479e356ef270 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 31 Jan 2022 16:45:17 -0600 Subject: [PATCH] [Attributor] Introduce the `AA::isPotentiallyReachable` helper APIs To make usage easier (compared to the many reachability related AAs), this patch introduces a helper API, `AA::isPotentiallyReachable`, which performs all the necessary steps. It also does the "backwards" reachability (see D106720) as that simplifies the AA a lot (backwards queries were somewhat different from the other query resolvers), and ensures we use cached values in every stage. To test inter-procedural reachability in a reasonable way this patch includes an extension to `AAPointerInfo::forallInterferingWrites`. Basically, we can exclude writes if they cannot reach a load "during the lifetime" of the allocation. That is, we need to go up the call graph to determine reachability until we can determine the allocation would be dead in the caller. This leads to new constant propagations (through memory) in `value-simplify-pointer-info-gpu.ll`. Note: The new code contains plenty debug output to determine how reachability queries are resolved. Parts extracted from D110078. Differential Revision: https://reviews.llvm.org/D118673 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 23 +- llvm/lib/Transforms/IPO/Attributor.cpp | 117 +++++++ llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 148 ++++----- .../X86/min-legal-vector-width.ll | 4 +- .../Attributor/ArgumentPromotion/alignment.ll | 2 +- .../Attributor/ArgumentPromotion/basictest.ll | 2 +- .../read_write_returned_arguments_scc.ll | 10 +- .../Transforms/Attributor/value-simplify-gpu.ll | 339 +++++++++++---------- 8 files changed, 386 insertions(+), 259 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 8e592eb..7eee16f 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -217,6 +217,24 @@ bool isAssumedReadOnly(Attributor &A, const IRPosition &IRP, bool isAssumedReadNone(Attributor &A, const IRPosition &IRP, const AbstractAttribute &QueryingAA, bool &IsKnown); +/// Return true if \p ToI is potentially reachable from \p FromI. The two +/// instructions do not need to be in the same function. \p GoBackwardsCB +/// can be provided to convey domain knowledge about the "lifespan" the user is +/// interested in. By default, the callers of \p FromI are checked as well to +/// determine if \p ToI can be reached. If the query is not interested in +/// callers beyond a certain point, e.g., a GPU kernel entry or the function +/// containing an alloca, the \p GoBackwardsCB should return false. +bool isPotentiallyReachable( + Attributor &A, const Instruction &FromI, const Instruction &ToI, + const AbstractAttribute &QueryingAA, + std::function GoBackwardsCB = nullptr); + +/// Same as above but it is sufficient to reach any instruction in \p ToFn. +bool isPotentiallyReachable( + Attributor &A, const Instruction &FromI, const Function &ToFn, + const AbstractAttribute &QueryingAA, + std::function GoBackwardsCB); + } // namespace AA /// The value passed to the line option that defines the maximal initialization @@ -4636,11 +4654,12 @@ struct AAFunctionReachability /// If the function represented by this possition can reach \p Fn. virtual bool canReach(Attributor &A, const Function &Fn) const = 0; - /// Can \p CB reach \p Fn + /// Can \p CB reach \p Fn. virtual bool canReach(Attributor &A, CallBase &CB, const Function &Fn) const = 0; - /// Can \p Inst reach \p Fn + /// Can \p Inst reach \p Fn. + /// See also AA::isPotentiallyReachable. virtual bool instructionCanReach(Attributor &A, const Instruction &Inst, const Function &Fn, bool UseBackwards = true) const = 0; diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 11cb9e1..1b9e482 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -436,6 +436,121 @@ bool AA::isAssumedReadNone(Attributor &A, const IRPosition &IRP, /* RequireReadNone */ true, IsKnown); } +static bool +isPotentiallyReachable(Attributor &A, const Instruction &FromI, + const Instruction *ToI, const Function &ToFn, + const AbstractAttribute &QueryingAA, + std::function GoBackwardsCB) { + LLVM_DEBUG(dbgs() << "[AA] isPotentiallyReachable @" << ToFn.getName() + << " from " << FromI << " [GBCB: " << bool(GoBackwardsCB) + << "]\n"); + + SmallPtrSet Visited; + SmallVector Worklist; + Worklist.push_back(&FromI); + + while (!Worklist.empty()) { + const Instruction *CurFromI = Worklist.pop_back_val(); + if (!Visited.insert(CurFromI).second) + continue; + + const Function *FromFn = CurFromI->getFunction(); + if (FromFn == &ToFn) { + if (!ToI) + return true; + LLVM_DEBUG(dbgs() << "[AA] check " << *ToI << " from " << *CurFromI + << " intraprocedurally\n"); + const auto &ReachabilityAA = A.getAAFor( + QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL); + bool Result = ReachabilityAA.isAssumedReachable(A, *CurFromI, *ToI); + LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " " + << (Result ? "can potentially " : "cannot ") << "reach " + << *ToI << " [Intra]\n"); + if (Result) + return true; + continue; + } + + // TODO: If we can go arbitrarily backwards we will eventually reach an + // entry point that can reach ToI. Only once this takes a set of blocks + // through which we cannot go, or once we track internal functions not + // accessible from the outside, it makes sense to perform backwards analysis + // in the absence of a GoBackwardsCB. + if (!GoBackwardsCB) { + LLVM_DEBUG(dbgs() << "[AA] check @" << ToFn.getName() << " from " + << *CurFromI << " is not checked backwards, abort\n"); + return true; + } + + // Check if the current instruction is already known to reach the ToFn. + const auto &FnReachabilityAA = A.getAAFor( + QueryingAA, IRPosition::function(*FromFn), DepClassTy::OPTIONAL); + bool Result = FnReachabilityAA.instructionCanReach( + A, *CurFromI, ToFn, /* UseBackwards */ false); + LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " in @" << FromFn->getName() + << " " << (Result ? "can potentially " : "cannot ") + << "reach @" << ToFn.getName() << " [FromFn]\n"); + if (Result) + return true; + + // If we do not go backwards from the FromFn we are done here and so far we + // could not find a way to reach ToFn/ToI. + if (!GoBackwardsCB(*FromFn)) + continue; + + LLVM_DEBUG(dbgs() << "Stepping backwards to the call sites of @" + << FromFn->getName() << "\n"); + + auto CheckCallSite = [&](AbstractCallSite ACS) { + CallBase *CB = ACS.getInstruction(); + if (!CB) + return false; + + if (isa(CB)) + return false; + + Instruction *Inst = CB->getNextNonDebugInstruction(); + Worklist.push_back(Inst); + return true; + }; + + bool AllCallSitesKnown; + Result = !A.checkForAllCallSites(CheckCallSite, *FromFn, + /* RequireAllCallSites */ true, + &QueryingAA, AllCallSitesKnown); + if (Result) { + LLVM_DEBUG(dbgs() << "[AA] stepping back to call sites from " << *CurFromI + << " in @" << FromFn->getName() + << " failed, give up\n"); + return true; + } + + LLVM_DEBUG(dbgs() << "[AA] stepped back to call sites from " << *CurFromI + << " in @" << FromFn->getName() + << " worklist size is: " << Worklist.size() << "\n"); + } + return false; +} + +bool AA::isPotentiallyReachable( + Attributor &A, const Instruction &FromI, const Instruction &ToI, + const AbstractAttribute &QueryingAA, + std::function GoBackwardsCB) { + LLVM_DEBUG(dbgs() << "[AA] isPotentiallyReachable " << ToI << " from " + << FromI << " [GBCB: " << bool(GoBackwardsCB) << "]\n"); + const Function *ToFn = ToI.getFunction(); + return ::isPotentiallyReachable(A, FromI, &ToI, *ToFn, QueryingAA, + GoBackwardsCB); +} + +bool AA::isPotentiallyReachable( + Attributor &A, const Instruction &FromI, const Function &ToFn, + const AbstractAttribute &QueryingAA, + std::function GoBackwardsCB) { + return ::isPotentiallyReachable(A, FromI, /* ToI */ nullptr, ToFn, QueryingAA, + GoBackwardsCB); +} + /// Return true if \p New is equal or worse than \p Old. static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) { if (!Old.isIntAttribute()) @@ -1464,9 +1579,11 @@ void Attributor::runTillFixpoint() { InvalidAA->Deps.pop_back(); AbstractAttribute *DepAA = cast(Dep.getPointer()); if (Dep.getInt() == unsigned(DepClassTy::OPTIONAL)) { + LLVM_DEBUG(dbgs() << " - recompute: " << *DepAA); Worklist.insert(DepAA); continue; } + LLVM_DEBUG(dbgs() << " - invalidate: " << *DepAA); DepAA->getState().indicatePessimisticFixpoint(); assert(DepAA->getState().isAtFixpoint() && "Expected fixpoint state!"); if (!DepAA->getState().isValidState()) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 7c77ca7..2d88e32 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1130,19 +1130,63 @@ struct AAPointerInfoImpl QueryingAA, IRPosition::function(*LI.getFunction()), DepClassTy::OPTIONAL); - // Helper to determine if the instruction may reach the load. - auto IsReachableFrom = [&](const Instruction &I) { - const auto &ReachabilityAA = A.getAAFor( - QueryingAA, IRPosition::function(*I.getFunction()), - DepClassTy::OPTIONAL); - return ReachabilityAA.isAssumedReachable(A, I, LI); - }; - - const bool CanUseCFGResoning = - NoRecurseAA.isKnownNoRecurse() && CanIgnoreThreading(LI); + const bool CanUseCFGResoning = CanIgnoreThreading(LI); InformationCache &InfoCache = A.getInfoCache(); const DominatorTree *DT = - InfoCache.getAnalysisResultForFunction(Scope); + NoRecurseAA.isKnownNoRecurse() + ? InfoCache.getAnalysisResultForFunction( + Scope) + : nullptr; + + enum GPUAddressSpace : unsigned { + Generic = 0, + Global = 1, + Shared = 3, + Constant = 4, + Local = 5, + }; + + // Helper to check if a value has "kernel lifetime", that is it will not + // outlive a GPU kernel. This is true for shared, constant, and local + // globals on AMD and NVIDIA GPUs. + auto HasKernelLifetime = [&](Value *V, Module &M) { + Triple T(M.getTargetTriple()); + if (!(T.isAMDGPU() || T.isNVPTX())) + return false; + switch (V->getType()->getPointerAddressSpace()) { + case GPUAddressSpace::Shared: + case GPUAddressSpace::Constant: + case GPUAddressSpace::Local: + return true; + default: + return false; + }; + }; + + // The IsLiveInCalleeCB will be used by the AA::isPotentiallyReachable query + // to determine if we should look at reachability from the callee. For + // certain pointers we know the lifetime and we do not have to step into the + // callee to determine reachability as the pointer would be dead in the + // callee. See the conditional initialization below. + std::function IsLiveInCalleeCB; + + if (auto *AI = dyn_cast(&getAssociatedValue())) { + // If the alloca containing function is not recursive the alloca + // must be dead in the callee. + const Function *AIFn = AI->getFunction(); + const auto &NoRecurseAA = A.getAAFor( + *this, IRPosition::function(*AIFn), DepClassTy::OPTIONAL); + if (NoRecurseAA.isAssumedNoRecurse()) { + IsLiveInCalleeCB = [AIFn](const Function &Fn) { return AIFn != &Fn; }; + } + } else if (auto *GV = dyn_cast(&getAssociatedValue())) { + // If the global has kernel lifetime we can stop if we reach a kernel + // as it is "dead" in the (unknown) callees. + if (HasKernelLifetime(GV, *GV->getParent())) + IsLiveInCalleeCB = [](const Function &Fn) { + return !Fn.hasFnAttribute("kernel"); + }; + } auto AccessCB = [&](const Access &Acc, bool Exact) { if (!Acc.isWrite()) @@ -1151,7 +1195,8 @@ struct AAPointerInfoImpl // For now we only filter accesses based on CFG reasoning which does not // work yet if we have threading effects, or the access is complicated. if (CanUseCFGResoning) { - if (!IsReachableFrom(*Acc.getLocalInst())) + if (!AA::isPotentiallyReachable(A, *Acc.getLocalInst(), LI, QueryingAA, + IsLiveInCalleeCB)) return true; if (DT && Exact && (Acc.getLocalInst()->getFunction() == LI.getFunction()) && @@ -9674,7 +9719,7 @@ private: if (!Reachability.isAssumedReachable(A, Inst, CBInst)) return true; - const auto &CB = cast(CBInst); + auto &CB = cast(CBInst); const AACallEdges &AAEdges = A.getAAFor( *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED); @@ -9684,47 +9729,8 @@ private: bool UsedAssumedInformation = false; return A.checkForAllCallLikeInstructions(CheckCallBase, *this, - UsedAssumedInformation); - } - - ChangeStatus checkReachableBackwards(Attributor &A, QuerySet &Set) { - ChangeStatus Change = ChangeStatus::UNCHANGED; - - // For all remaining instruction queries, check - // callers. A call inside that function might satisfy the query. - auto CheckCallSite = [&](AbstractCallSite CallSite) { - CallBase *CB = CallSite.getInstruction(); - if (!CB) - return false; - - if (isa(CB)) - return false; - - Instruction *Inst = CB->getNextNonDebugInstruction(); - const AAFunctionReachability &AA = A.getAAFor( - *this, IRPosition::function(*Inst->getFunction()), - DepClassTy::REQUIRED); - for (const Function *Fn : make_early_inc_range(Set.Unreachable)) { - if (AA.instructionCanReach(A, *Inst, *Fn, /* UseBackwards */ false)) { - Set.markReachable(*Fn); - Change = ChangeStatus::CHANGED; - } - } - return true; - }; - - bool NoUnknownCall = true; - if (A.checkForAllCallSites(CheckCallSite, *this, true, NoUnknownCall)) - return Change; - - // If we don't know all callsites we have to assume that we can reach fn. - for (auto &QSet : InstQueriesBackwards) { - if (!QSet.second.CanReachUnknownCallee) - Change = ChangeStatus::CHANGED; - QSet.second.CanReachUnknownCallee = true; - } - - return Change; + UsedAssumedInformation, + /* CheckBBLivenessOnly */ true); } public: @@ -9776,12 +9782,15 @@ public: if (!isValidState()) return true; - const auto &Reachability = &A.getAAFor( + if (UseBackwards) + return AA::isPotentiallyReachable(A, Inst, Fn, *this, nullptr); + + const auto &Reachability = A.getAAFor( *this, IRPosition::function(*getAssociatedFunction()), DepClassTy::REQUIRED); SmallVector CallEdges; - bool AllKnown = getReachableCallEdges(A, *Reachability, Inst, CallEdges); + bool AllKnown = getReachableCallEdges(A, Reachability, Inst, CallEdges); // Attributor returns attributes as const, so this function has to be // const for users of this attribute to use it without having to do // a const_cast. @@ -9791,25 +9800,7 @@ public: if (!AllKnown) InstQSet.CanReachUnknownCallee = true; - bool ForwardsResult = InstQSet.isReachable(A, *NonConstThis, CallEdges, Fn); - if (ForwardsResult) - return true; - // We are done. - if (!UseBackwards) - return false; - - QuerySet &InstBackwardsQSet = NonConstThis->InstQueriesBackwards[&Inst]; - - Optional BackwardsCached = InstBackwardsQSet.isCachedReachable(Fn); - if (BackwardsCached.hasValue()) - return BackwardsCached.getValue(); - - // Assume unreachable, to prevent problems. - InstBackwardsQSet.Unreachable.insert(&Fn); - - // Check backwards reachability. - NonConstThis->checkReachableBackwards(A, InstBackwardsQSet); - return InstBackwardsQSet.isCachedReachable(Fn).getValue(); + return InstQSet.isReachable(A, *NonConstThis, CallEdges, Fn); } /// See AbstractAttribute::updateImpl(...). @@ -9847,10 +9838,6 @@ public: Change |= InstPair.second.update(A, *this, CallEdges); } - // Update backwards queries. - for (auto &QueryPair : InstQueriesBackwards) - Change |= checkReachableBackwards(A, QueryPair.second); - return Change; } @@ -9879,9 +9866,6 @@ private: /// This is for instruction queries than scan "forward". DenseMap InstQueries; - - /// This is for instruction queries than scan "backward". - DenseMap InstQueriesBackwards; }; /// ---------------------- Assumption Propagation ------------------------------ diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll index a7bcf1e..c2f764b92 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; Test that we only promote arguments when the caller/callee have compatible diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll index c3e8e60..f8ec4d2 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll @@ -76,7 +76,7 @@ define i32 @callercaller() { ; CHECK-LABEL: define {{[^@]+}}@callercaller ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 -; CHECK-NEXT: ret i32 3 +; CHECK-NEXT: ret i32 undef ; %B = alloca i32 store i32 2, i32* %B diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll index 94227c7..7f3c75a 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll @@ -35,7 +35,7 @@ define i32 @callercaller() { ; CHECK-LABEL: define {{[^@]+}}@callercaller ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 -; CHECK-NEXT: ret i32 3 +; CHECK-NEXT: ret i32 undef ; %B = alloca i32 store i32 2, i32* %B diff --git a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll index 2c8e29b..d39af8c 100644 --- a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll +++ b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll @@ -296,7 +296,7 @@ define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) { ; IS__CGSCC____-NEXT: store i32 [[TMP1]], i32* [[W0]], align 4 ; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32* @internal_ret0_nw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] ; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[W0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] -; IS__CGSCC____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR5:[0-9]+]] +; IS__CGSCC____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR4]] ; IS__CGSCC____-NEXT: [[CALL4:%.*]] = call i32* @external_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] ; IS__CGSCC____-NEXT: br label [[RETURN]] ; IS__CGSCC____: return: @@ -339,8 +339,8 @@ define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) { ; IS__CGSCC____-LABEL: define {{[^@]+}}@external_source_ret2_nrw ; IS__CGSCC____-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[R0:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR2:[0-9]+]] { ; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly "no-capture-maybe-returned" [[W0]]) #[[ATTR5]] -; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree [[R0]], i32* nofree [[W0]]) #[[ATTR4]] +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly "no-capture-maybe-returned" [[W0]]) #[[ATTR4]] +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree [[R0]], i32* nofree [[W0]]) #[[ATTR5:[0-9]+]] ; IS__CGSCC____-NEXT: ret i32* [[CALL1]] ; entry: @@ -362,6 +362,6 @@ entry: ; IS__CGSCC____: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn } ; IS__CGSCC____: attributes #[[ATTR2]] = { argmemonly nofree norecurse nosync nounwind } ; IS__CGSCC____: attributes #[[ATTR3]] = { nofree nosync nounwind } -; IS__CGSCC____: attributes #[[ATTR4]] = { nounwind } -; IS__CGSCC____: attributes #[[ATTR5]] = { nounwind willreturn } +; IS__CGSCC____: attributes #[[ATTR4]] = { nounwind willreturn } +; IS__CGSCC____: attributes #[[ATTR5]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll index 5e6fb94..66c41ba 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM @@ -18,12 +18,19 @@ target triple = "amdgcn-amd-amdhsa" ; CHECK: @[[UNREACHABLENONKERNEL:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 0, align 4 ;. define dso_local void @kernel(i32 %C) norecurse "kernel" { -; CHECK: Function Attrs: norecurse nosync nounwind -; CHECK-LABEL: define {{[^@]+}}@kernel -; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: call void @level1Kernel(i32 [[C]]) #[[ATTR3:[0-9]+]] -; CHECK-NEXT: ret void +; IS__TUNIT____: Function Attrs: norecurse nosync nounwind +; IS__TUNIT____-LABEL: define {{[^@]+}}@kernel +; IS__TUNIT____-SAME: (i32 [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: call void @level1Kernel(i32 [[C]]) #[[ATTR3:[0-9]+]] +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: norecurse nosync nounwind +; IS__CGSCC____-LABEL: define {{[^@]+}}@kernel +; IS__CGSCC____-SAME: (i32 [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: call void @level1Kernel(i32 [[C]]) #[[ATTR4:[0-9]+]] +; IS__CGSCC____-NEXT: ret void ; entry: call void @level1Kernel(i32 %C) @@ -31,22 +38,39 @@ entry: } define internal void @level1Kernel(i32 %C) { -; CHECK: Function Attrs: norecurse nosync nounwind -; CHECK-LABEL: define {{[^@]+}}@level1Kernel -; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: call void @level2Kernelall_early() #[[ATTR4:[0-9]+]] -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: -; CHECK-NEXT: call void @level2Kernela() #[[ATTR3]] -; CHECK-NEXT: br label [[IF_END:%.*]] -; CHECK: if.else: -; CHECK-NEXT: call void @level2Kernelb() #[[ATTR3]] -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: -; CHECK-NEXT: call void @level2Kernelall_late() #[[ATTR5:[0-9]+]] -; CHECK-NEXT: ret void +; IS__TUNIT____: Function Attrs: norecurse nosync nounwind +; IS__TUNIT____-LABEL: define {{[^@]+}}@level1Kernel +; IS__TUNIT____-SAME: (i32 [[C:%.*]]) #[[ATTR1:[0-9]+]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: call void @level2Kernelall_early() #[[ATTR4:[0-9]+]] +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; IS__TUNIT____: if.then: +; IS__TUNIT____-NEXT: call void @level2Kernela() #[[ATTR3]] +; IS__TUNIT____-NEXT: br label [[IF_END:%.*]] +; IS__TUNIT____: if.else: +; IS__TUNIT____-NEXT: call void @level2Kernelb() #[[ATTR3]] +; IS__TUNIT____-NEXT: br label [[IF_END]] +; IS__TUNIT____: if.end: +; IS__TUNIT____-NEXT: call void @level2Kernelall_late() #[[ATTR5:[0-9]+]] +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: norecurse nosync nounwind +; IS__CGSCC____-LABEL: define {{[^@]+}}@level1Kernel +; IS__CGSCC____-SAME: (i32 [[C:%.*]]) #[[ATTR1:[0-9]+]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: call void @level2Kernelall_early() #[[ATTR5:[0-9]+]] +; IS__CGSCC____-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 +; IS__CGSCC____-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; IS__CGSCC____: if.then: +; IS__CGSCC____-NEXT: call void @level2Kernela() #[[ATTR4]] +; IS__CGSCC____-NEXT: br label [[IF_END:%.*]] +; IS__CGSCC____: if.else: +; IS__CGSCC____-NEXT: call void @level2Kernelb() #[[ATTR4]] +; IS__CGSCC____-NEXT: br label [[IF_END]] +; IS__CGSCC____: if.end: +; IS__CGSCC____-NEXT: call void @level2Kernelall_late() #[[ATTR6:[0-9]+]] +; IS__CGSCC____-NEXT: ret void ; entry: call void @level2Kernelall_early() @@ -88,8 +112,7 @@ define internal void @level2Kernela() { ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 -; IS__TUNIT____-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 -; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR6:[0-9]+]] +; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 noundef 42) #[[ATTR6:[0-9]+]] ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: norecurse nosync nounwind @@ -99,7 +122,7 @@ define internal void @level2Kernela() { ; IS__CGSCC____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; IS__CGSCC____-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 ; IS__CGSCC____-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 -; IS__CGSCC____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR3]] +; IS__CGSCC____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 noundef 42) #[[ATTR4]] ; IS__CGSCC____-NEXT: ret void ; entry: @@ -117,8 +140,7 @@ define internal void @level2Kernelb() { ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 -; IS__TUNIT____-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 -; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR6]] +; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 noundef 42) #[[ATTR6]] ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: norecurse nosync nounwind @@ -128,7 +150,7 @@ define internal void @level2Kernelb() { ; IS__CGSCC____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; IS__CGSCC____-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 ; IS__CGSCC____-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 -; IS__CGSCC____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR3]] +; IS__CGSCC____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 noundef 42) #[[ATTR4]] ; IS__CGSCC____-NEXT: ret void ; entry: @@ -140,12 +162,17 @@ entry: } define internal void @level2Kernelall_late() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly -; CHECK-LABEL: define {{[^@]+}}@level2Kernelall_late -; CHECK-SAME: () #[[ATTR2]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 -; CHECK-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@level2Kernelall_late +; IS__TUNIT____-SAME: () #[[ATTR2]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@level2Kernelall_late +; IS__CGSCC____-SAME: () #[[ATTR3:[0-9]+]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: ret void ; entry: store i32 1, i32 *addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 @@ -156,12 +183,19 @@ entry: @UnreachableNonKernel = internal addrspace(3) global i32 0, align 4 define dso_local void @non_kernel(i32 %C) norecurse { -; CHECK: Function Attrs: norecurse nosync nounwind -; CHECK-LABEL: define {{[^@]+}}@non_kernel -; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: call void @level1(i32 [[C]]) #[[ATTR3]] -; CHECK-NEXT: ret void +; IS__TUNIT____: Function Attrs: norecurse nosync nounwind +; IS__TUNIT____-LABEL: define {{[^@]+}}@non_kernel +; IS__TUNIT____-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: call void @level1(i32 [[C]]) #[[ATTR3]] +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: norecurse nosync nounwind +; IS__CGSCC____-LABEL: define {{[^@]+}}@non_kernel +; IS__CGSCC____-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: call void @level1(i32 [[C]]) #[[ATTR4]] +; IS__CGSCC____-NEXT: ret void ; entry: call void @level1(i32 %C) @@ -169,60 +203,58 @@ entry: } define internal void @level1(i32 %C) { -; IS________OPM: Function Attrs: norecurse nosync nounwind -; IS________OPM-LABEL: define {{[^@]+}}@level1 -; IS________OPM-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { -; IS________OPM-NEXT: entry: -; IS________OPM-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 -; IS________OPM-NEXT: call void @level2all_early(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR4]] -; IS________OPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 -; IS________OPM-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; IS________OPM: if.then: -; IS________OPM-NEXT: call void @level2a(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR3]] -; IS________OPM-NEXT: br label [[IF_END:%.*]] -; IS________OPM: if.else: -; IS________OPM-NEXT: call void @level2b(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR3]] -; IS________OPM-NEXT: br label [[IF_END]] -; IS________OPM: if.end: -; IS________OPM-NEXT: call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR5]] -; IS________OPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: norecurse nosync nounwind +; IS__TUNIT____-LABEL: define {{[^@]+}}@level1 +; IS__TUNIT____-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 +; IS__TUNIT____-NEXT: call void @level2all_early(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR4]] +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; IS__TUNIT____: if.then: +; IS__TUNIT____-NEXT: call void @level2a() #[[ATTR3]] +; IS__TUNIT____-NEXT: br label [[IF_END:%.*]] +; IS__TUNIT____: if.else: +; IS__TUNIT____-NEXT: call void @level2b() #[[ATTR3]] +; IS__TUNIT____-NEXT: br label [[IF_END]] +; IS__TUNIT____: if.end: +; IS__TUNIT____-NEXT: call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR5]] +; IS__TUNIT____-NEXT: ret void ; -; IS__TUNIT_NPM: Function Attrs: norecurse nosync nounwind -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@level1 -; IS__TUNIT_NPM-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { -; IS__TUNIT_NPM-NEXT: entry: -; IS__TUNIT_NPM-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 -; IS__TUNIT_NPM-NEXT: call void @level2all_early(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR4]] -; IS__TUNIT_NPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 -; IS__TUNIT_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; IS__TUNIT_NPM: if.then: -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[LOCAL]], align 4 -; IS__TUNIT_NPM-NEXT: call void @level2a(i32 [[TMP0]]) #[[ATTR3]] -; IS__TUNIT_NPM-NEXT: br label [[IF_END:%.*]] -; IS__TUNIT_NPM: if.else: -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOCAL]], align 4 -; IS__TUNIT_NPM-NEXT: call void @level2b(i32 [[TMP1]]) #[[ATTR3]] -; IS__TUNIT_NPM-NEXT: br label [[IF_END]] -; IS__TUNIT_NPM: if.end: -; IS__TUNIT_NPM-NEXT: call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR5]] -; IS__TUNIT_NPM-NEXT: ret void +; IS__CGSCC_OPM: Function Attrs: norecurse nosync nounwind +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@level1 +; IS__CGSCC_OPM-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { +; IS__CGSCC_OPM-NEXT: entry: +; IS__CGSCC_OPM-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 +; IS__CGSCC_OPM-NEXT: call void @level2all_early(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR5]] +; IS__CGSCC_OPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 +; IS__CGSCC_OPM-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; IS__CGSCC_OPM: if.then: +; IS__CGSCC_OPM-NEXT: call void @level2a(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR4]] +; IS__CGSCC_OPM-NEXT: br label [[IF_END:%.*]] +; IS__CGSCC_OPM: if.else: +; IS__CGSCC_OPM-NEXT: call void @level2b(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR4]] +; IS__CGSCC_OPM-NEXT: br label [[IF_END]] +; IS__CGSCC_OPM: if.end: +; IS__CGSCC_OPM-NEXT: call void @level2all_late(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR7:[0-9]+]] +; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM: Function Attrs: norecurse nosync nounwind ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@level1 ; IS__CGSCC_NPM-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; IS__CGSCC_NPM-NEXT: entry: ; IS__CGSCC_NPM-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 -; IS__CGSCC_NPM-NEXT: call void @level2all_early(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR4]] +; IS__CGSCC_NPM-NEXT: call void @level2all_early(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR5]] ; IS__CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[C]], 0 ; IS__CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; IS__CGSCC_NPM: if.then: -; IS__CGSCC_NPM-NEXT: call void @level2a(i32 undef) #[[ATTR3]] +; IS__CGSCC_NPM-NEXT: call void @level2a(i32 undef) #[[ATTR4]] ; IS__CGSCC_NPM-NEXT: br label [[IF_END:%.*]] ; IS__CGSCC_NPM: if.else: -; IS__CGSCC_NPM-NEXT: call void @level2b(i32 undef) #[[ATTR6:[0-9]+]] +; IS__CGSCC_NPM-NEXT: call void @level2b(i32 undef) #[[ATTR7:[0-9]+]] ; IS__CGSCC_NPM-NEXT: br label [[IF_END]] ; IS__CGSCC_NPM: if.end: -; IS__CGSCC_NPM-NEXT: call void @level2all_late(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR7:[0-9]+]] +; IS__CGSCC_NPM-NEXT: call void @level2all_late(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR8:[0-9]+]] ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -245,20 +277,19 @@ if.end: ; preds = %if.else, %if.then } define internal void @level2all_early(i32* %addr) { -; NOT_CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@level2all_early -; NOT_CGSCC_NPM-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { -; NOT_CGSCC_NPM-NEXT: entry: -; NOT_CGSCC_NPM-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 -; NOT_CGSCC_NPM-NEXT: store i32 17, i32* [[ADDR]], align 4 -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@level2all_early +; IS__TUNIT____-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 +; IS__TUNIT____-NEXT: ret void ; -; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@level2all_early -; IS__CGSCC_NPM-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { -; IS__CGSCC_NPM-NEXT: entry: -; IS__CGSCC_NPM-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 -; IS__CGSCC_NPM-NEXT: ret void +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@level2all_early +; IS__CGSCC____-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 +; IS__CGSCC____-NEXT: ret void ; entry: store i32 1, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 @@ -267,36 +298,23 @@ entry: } define internal void @level2a(i32* %addr) { -; IS__TUNIT_OPM: Function Attrs: norecurse nosync nounwind -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@level2a -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR1]] { -; IS__TUNIT_OPM-NEXT: entry: -; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 -; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[ADDR]], align 4 -; IS__TUNIT_OPM-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR6]] -; IS__TUNIT_OPM-NEXT: ret void -; -; IS__TUNIT_NPM: Function Attrs: norecurse nosync nounwind -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@level2a -; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] { -; IS__TUNIT_NPM-NEXT: entry: -; IS__TUNIT_NPM-NEXT: [[ADDR_PRIV:%.*]] = alloca i32, align 4 -; IS__TUNIT_NPM-NEXT: store i32 [[TMP0]], i32* [[ADDR_PRIV]], align 4 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 -; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4 -; IS__TUNIT_NPM-NEXT: call void @use(i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]]) #[[ATTR6]] -; IS__TUNIT_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: norecurse nosync nounwind +; IS__TUNIT____-LABEL: define {{[^@]+}}@level2a +; IS__TUNIT____-SAME: () #[[ATTR1]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 +; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR6]] +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC_OPM: Function Attrs: norecurse nosync nounwind ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@level2a -; IS__CGSCC_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR1]] { +; IS__CGSCC_OPM-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR1]] { ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[ADDR]], align 4 -; IS__CGSCC_OPM-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR3]] +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* undef, align 4 +; IS__CGSCC_OPM-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR4]] ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM: Function Attrs: norecurse nosync nounwind @@ -307,7 +325,7 @@ define internal void @level2a(i32* %addr) { ; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4 -; IS__CGSCC_NPM-NEXT: call void @use(i32 [[TMP1]], i32 [[TMP2]], i32 17) #[[ATTR3]] +; IS__CGSCC_NPM-NEXT: call void @use(i32 [[TMP1]], i32 [[TMP2]], i32 17) #[[ATTR4]] ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -319,36 +337,23 @@ entry: } define internal void @level2b(i32* %addr) { -; IS__TUNIT_OPM: Function Attrs: norecurse nosync nounwind -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@level2b -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR1]] { -; IS__TUNIT_OPM-NEXT: entry: -; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 -; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[ADDR]], align 4 -; IS__TUNIT_OPM-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR6]] -; IS__TUNIT_OPM-NEXT: ret void -; -; IS__TUNIT_NPM: Function Attrs: norecurse nosync nounwind -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@level2b -; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] { -; IS__TUNIT_NPM-NEXT: entry: -; IS__TUNIT_NPM-NEXT: [[ADDR_PRIV:%.*]] = alloca i32, align 4 -; IS__TUNIT_NPM-NEXT: store i32 [[TMP0]], i32* [[ADDR_PRIV]], align 4 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 -; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4 -; IS__TUNIT_NPM-NEXT: call void @use(i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]]) #[[ATTR6]] -; IS__TUNIT_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: norecurse nosync nounwind +; IS__TUNIT____-LABEL: define {{[^@]+}}@level2b +; IS__TUNIT____-SAME: () #[[ATTR1]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 +; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR6]] +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC_OPM: Function Attrs: norecurse nosync nounwind ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@level2b -; IS__CGSCC_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR1]] { +; IS__CGSCC_OPM-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR1]] { ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[ADDR]], align 4 -; IS__CGSCC_OPM-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR3]] +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* undef, align 4 +; IS__CGSCC_OPM-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR4]] ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM: Function Attrs: norecurse nosync nounwind @@ -359,7 +364,7 @@ define internal void @level2b(i32* %addr) { ; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4 -; IS__CGSCC_NPM-NEXT: call void @use(i32 [[TMP1]], i32 [[TMP2]], i32 17) #[[ATTR3]] +; IS__CGSCC_NPM-NEXT: call void @use(i32 [[TMP1]], i32 [[TMP2]], i32 17) #[[ATTR4]] ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -371,20 +376,19 @@ entry: } define internal void @level2all_late(i32* %addr) { -; NOT_CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@level2all_late -; NOT_CGSCC_NPM-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { -; NOT_CGSCC_NPM-NEXT: entry: -; NOT_CGSCC_NPM-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; NOT_CGSCC_NPM-NEXT: store i32 5, i32* [[ADDR]], align 4 -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@level2all_late +; IS__TUNIT____-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 +; IS__TUNIT____-NEXT: ret void ; -; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@level2all_late -; IS__CGSCC_NPM-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { -; IS__CGSCC_NPM-NEXT: entry: -; IS__CGSCC_NPM-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__CGSCC_NPM-NEXT: ret void +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@level2all_late +; IS__CGSCC____-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 +; IS__CGSCC____-NEXT: ret void ; entry: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 @@ -406,16 +410,19 @@ declare dso_local void @use(i32, i32, i32) nosync norecurse nounwind ; IS__CGSCC_OPM: attributes #[[ATTR0]] = { norecurse nosync nounwind "kernel" } ; IS__CGSCC_OPM: attributes #[[ATTR1]] = { norecurse nosync nounwind } ; IS__CGSCC_OPM: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn writeonly } -; IS__CGSCC_OPM: attributes #[[ATTR3]] = { nounwind } -; IS__CGSCC_OPM: attributes #[[ATTR4]] = { nounwind willreturn writeonly } -; IS__CGSCC_OPM: attributes #[[ATTR5]] = { nounwind writeonly } +; IS__CGSCC_OPM: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind readnone willreturn } +; IS__CGSCC_OPM: attributes #[[ATTR4]] = { nounwind } +; IS__CGSCC_OPM: attributes #[[ATTR5]] = { nounwind willreturn writeonly } +; IS__CGSCC_OPM: attributes #[[ATTR6]] = { nounwind readnone } +; IS__CGSCC_OPM: attributes #[[ATTR7]] = { nounwind writeonly } ;. ; IS__CGSCC_NPM: attributes #[[ATTR0]] = { norecurse nosync nounwind "kernel" } ; IS__CGSCC_NPM: attributes #[[ATTR1]] = { norecurse nosync nounwind } ; IS__CGSCC_NPM: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn writeonly } -; IS__CGSCC_NPM: attributes #[[ATTR3]] = { nounwind } -; IS__CGSCC_NPM: attributes #[[ATTR4]] = { nounwind willreturn writeonly } -; IS__CGSCC_NPM: attributes #[[ATTR5]] = { nounwind writeonly } -; IS__CGSCC_NPM: attributes #[[ATTR6]] = { nosync nounwind } -; IS__CGSCC_NPM: attributes #[[ATTR7]] = { nosync nounwind writeonly } +; IS__CGSCC_NPM: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind readnone willreturn } +; IS__CGSCC_NPM: attributes #[[ATTR4]] = { nounwind } +; IS__CGSCC_NPM: attributes #[[ATTR5]] = { nounwind willreturn writeonly } +; IS__CGSCC_NPM: attributes #[[ATTR6]] = { nounwind readnone } +; IS__CGSCC_NPM: attributes #[[ATTR7]] = { nosync nounwind } +; IS__CGSCC_NPM: attributes #[[ATTR8]] = { nosync nounwind writeonly } ;. -- 2.7.4