#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PostDominators.h"
bool isAssumedThreadLocalObject(Attributor &A, Value &Obj,
const AbstractAttribute &QueryingAA);
+/// Return true if \p I is potentially affected by a barrier.
+bool isPotentiallyAffectedByBarrier(Attributor &A, const Instruction &I,
+ const AbstractAttribute &QueryingAA);
+bool isPotentiallyAffectedByBarrier(Attributor &A, ArrayRef<const Value *> Ptrs,
+ const AbstractAttribute &QueryingAA,
+ const Instruction *CtxI);
} // namespace AA
template <>
bool isAssumedDead(const Instruction &I, const AbstractAttribute *QueryingAA,
const AAIsDead *LivenessAA, bool &UsedAssumedInformation,
bool CheckBBLivenessOnly = false,
- DepClassTy DepClass = DepClassTy::OPTIONAL);
+ DepClassTy DepClass = DepClassTy::OPTIONAL,
+ bool CheckForDeadStore = false);
/// Return true if \p U is assumed dead.
///
/// Helper function specific for intrinsics which are potentially volatile.
static bool isNoSyncIntrinsic(const Instruction *I);
+ /// Helper function to determine if \p CB is an aligned (GPU) barrier.
+ /// Aligned barriers have to be executed by all threads.
+ static bool isAlignedBarrier(const CallBase &CB);
+
/// Create an abstract attribute view for the position \p IRP.
static AANoSync &createForPosition(const IRPosition &IRP, Attributor &A);
/// Returns true if the underlying value is known dead.
virtual bool isKnownDead() const = 0;
- /// Returns true if \p BB is assumed dead.
- virtual bool isAssumedDead(const BasicBlock *BB) const = 0;
-
/// Returns true if \p BB is known dead.
virtual bool isKnownDead(const BasicBlock *BB) const = 0;
return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F);
}
+ /// Returns true if \p BB is assumed dead.
+ virtual bool isAssumedDead(const BasicBlock *BB) const = 0;
+
/// Return if the edge from \p From BB to \p To BB is assumed dead.
/// This is specifically useful in AAReachability.
virtual bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAExecutionDomain(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+ /// Summary about the execution domain of a block or instruction.
+ struct ExecutionDomainTy {
+ using BarriersSetTy = SmallPtrSet<CallBase *, 2>;
+ using AssumesSetTy = SmallPtrSet<AssumeInst *, 4>;
+
+ void addAssumeInst(Attributor &A, AssumeInst &AI) {
+ EncounteredAssumes.insert(&AI);
+ }
+
+ void addAlignedBarrier(Attributor &A, CallBase &CB) {
+ AlignedBarriers.insert(&CB);
+ }
+
+ void clearAssumeInstAndAlignedBarriers() {
+ EncounteredAssumes.clear();
+ AlignedBarriers.clear();
+ }
+
+ bool IsExecutedByInitialThreadOnly = true;
+ bool IsReachedFromAlignedBarrierOnly = true;
+ bool IsReachingAlignedBarrierOnly = true;
+ bool EncounteredNonLocalSideEffect = false;
+ BarriersSetTy AlignedBarriers;
+ AssumesSetTy EncounteredAssumes;
+ };
+
/// Create an abstract attribute view for the position \p IRP.
static AAExecutionDomain &createForPosition(const IRPosition &IRP,
Attributor &A);
const char *getIdAddr() const override { return &ID; }
/// Check if an instruction is executed only by the initial thread.
- virtual bool isExecutedByInitialThreadOnly(const Instruction &) const = 0;
+ bool isExecutedByInitialThreadOnly(const Instruction &I) const {
+ return isExecutedByInitialThreadOnly(*I.getParent());
+ }
/// Check if a basic block is executed only by the initial thread.
virtual bool isExecutedByInitialThreadOnly(const BasicBlock &) const = 0;
+ virtual ExecutionDomainTy getExecutionDomain(const BasicBlock &) const = 0;
+ virtual ExecutionDomainTy getExecutionDomain(const CallBase &) const = 0;
+ virtual ExecutionDomainTy getFunctionExecutionDomain() const = 0;
+
/// This function should return true if the type of the \p AA is
/// AAExecutionDomain.
static bool classof(const AbstractAttribute *AA) {
return false;
}
+bool AA::isPotentiallyAffectedByBarrier(Attributor &A, const Instruction &I,
+ const AbstractAttribute &QueryingAA) {
+ if (!I.mayHaveSideEffects() && !I.mayReadFromMemory())
+ return false;
+
+ SmallSetVector<const Value *, 8> Ptrs;
+
+ auto AddLocationPtr = [&](std::optional<MemoryLocation> Loc) {
+ if (!Loc || !Loc->Ptr) {
+ LLVM_DEBUG(
+ dbgs() << "[AA] Access to unknown location; -> requires barriers\n");
+ return false;
+ }
+ Ptrs.insert(Loc->Ptr);
+ return true;
+ };
+
+ if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&I)) {
+ if (!AddLocationPtr(MemoryLocation::getForDest(MI)))
+ return true;
+ if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(&I))
+ if (!AddLocationPtr(MemoryLocation::getForSource(MTI)))
+ return true;
+ } else if (!AddLocationPtr(MemoryLocation::getOrNone(&I)))
+ return true;
+
+ return isPotentiallyAffectedByBarrier(A, Ptrs.getArrayRef(), QueryingAA, &I);
+}
+
+bool AA::isPotentiallyAffectedByBarrier(Attributor &A,
+ ArrayRef<const Value *> Ptrs,
+ const AbstractAttribute &QueryingAA,
+ const Instruction *CtxI) {
+ for (const Value *Ptr : Ptrs) {
+ if (!Ptr) {
+ LLVM_DEBUG(dbgs() << "[AA] nullptr; -> requires barriers\n");
+ return true;
+ }
+
+ auto Pred = [&](Value &Obj) {
+ if (AA::isAssumedThreadLocalObject(A, Obj, QueryingAA))
+ return true;
+ LLVM_DEBUG(dbgs() << "[AA] Access to '" << Obj << "' via '" << *Ptr
+ << "'; -> requires barrier\n");
+ return false;
+ };
+
+ const auto &UnderlyingObjsAA = A.getAAFor<AAUnderlyingObjects>(
+ QueryingAA, IRPosition::value(*Ptr), DepClassTy::OPTIONAL);
+ if (!UnderlyingObjsAA.forallUnderlyingObjects(Pred))
+ return true;
+ }
+ return false;
+}
+
/// Return true if \p New is equal or worse than \p Old.
static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) {
if (!Old.isIntAttribute())
const AbstractAttribute *QueryingAA,
const AAIsDead *FnLivenessAA,
bool &UsedAssumedInformation,
- bool CheckBBLivenessOnly, DepClassTy DepClass) {
+ bool CheckBBLivenessOnly, DepClassTy DepClass,
+ bool CheckForDeadStore) {
const IRPosition::CallBaseContext *CBCtx =
QueryingAA ? QueryingAA->getCallBaseContext() : nullptr;
return true;
}
+ if (CheckForDeadStore && isa<StoreInst>(I) && IsDeadAA.isRemovableStore()) {
+ if (QueryingAA)
+ recordDependence(IsDeadAA, *QueryingAA, DepClass);
+ if (!IsDeadAA.isKnownDead())
+ UsedAssumedInformation = true;
+ return true;
+ }
+
return false;
}
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
/// ------------------------ NoSync Function Attribute -------------------------
+bool AANoSync::isAlignedBarrier(const CallBase &CB) {
+ switch (CB.getIntrinsicID()) {
+ case Intrinsic::nvvm_barrier0:
+ case Intrinsic::nvvm_barrier0_and:
+ case Intrinsic::nvvm_barrier0_or:
+ case Intrinsic::nvvm_barrier0_popc:
+ return true;
+ // TODO: Check for amdgcn_s_barrier executed in a uniform/aligned way.
+ default:
+ break;
+ }
+ return hasAssumption(CB, KnownAssumptionString("ompx_aligned_barrier"));
+}
+
bool AANoSync::isNonRelaxedAtomic(const Instruction *I) {
if (!I->isAtomic())
return false;
#include "llvm/ADT/EnumeratedArray.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/Assumptions.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/GlobalValue.h"
#include <algorithm>
#include <optional>
+#include <string>
using namespace llvm;
using namespace omp;
if (remarksEnabled())
analysisGlobalization();
-
- Changed |= eliminateBarriers();
} else {
if (PrintICVValues)
printICVs();
Changed = true;
}
}
-
- Changed |= eliminateBarriers();
}
return Changed;
return Changed;
}
- /// Eliminates redundant, aligned barriers in OpenMP offloaded kernels.
- /// TODO: Make this an AA and expand it to work across blocks and functions.
- bool eliminateBarriers() {
- bool Changed = false;
-
- if (DisableOpenMPOptBarrierElimination)
- return /*Changed=*/false;
-
- if (OMPInfoCache.Kernels.empty())
- return /*Changed=*/false;
-
- enum ImplicitBarrierType { IBT_ENTRY, IBT_EXIT };
-
- class BarrierInfo {
- Instruction *I;
- enum ImplicitBarrierType Type;
-
- public:
- BarrierInfo(enum ImplicitBarrierType Type) : I(nullptr), Type(Type) {}
- BarrierInfo(Instruction &I) : I(&I) {}
-
- bool isImplicit() { return !I; }
-
- bool isImplicitEntry() { return isImplicit() && Type == IBT_ENTRY; }
-
- bool isImplicitExit() { return isImplicit() && Type == IBT_EXIT; }
-
- Instruction *getInstruction() { return I; }
- };
-
- for (Function *Kernel : OMPInfoCache.Kernels) {
- for (BasicBlock &BB : *Kernel) {
- SmallVector<BarrierInfo, 8> BarriersInBlock;
- SmallPtrSet<Instruction *, 8> BarriersToBeDeleted;
-
- // Add the kernel entry implicit barrier.
- if (&Kernel->getEntryBlock() == &BB)
- BarriersInBlock.push_back(IBT_ENTRY);
-
- // Find implicit and explicit aligned barriers in the same basic block.
- for (Instruction &I : BB) {
- if (isa<ReturnInst>(I)) {
- // Add the implicit barrier when exiting the kernel.
- BarriersInBlock.push_back(IBT_EXIT);
- continue;
- }
- CallBase *CB = dyn_cast<CallBase>(&I);
- if (!CB)
- continue;
-
- auto IsAlignBarrierCB = [&](CallBase &CB) {
- switch (CB.getIntrinsicID()) {
- case Intrinsic::nvvm_barrier0:
- case Intrinsic::nvvm_barrier0_and:
- case Intrinsic::nvvm_barrier0_or:
- case Intrinsic::nvvm_barrier0_popc:
- return true;
- default:
- break;
- }
- return hasAssumption(CB,
- KnownAssumptionString("ompx_aligned_barrier"));
- };
-
- if (IsAlignBarrierCB(*CB)) {
- // Add an explicit aligned barrier.
- BarriersInBlock.push_back(I);
- }
- }
-
- if (BarriersInBlock.size() <= 1)
- continue;
-
- // A barrier in a barrier pair is removeable if all instructions
- // between the barriers in the pair are side-effect free modulo the
- // barrier operation.
- auto IsBarrierRemoveable = [&Kernel](
- BarrierInfo *StartBI, BarrierInfo *EndBI,
- SmallVector<AssumeInst *> &Assumptions) {
- assert(
- !StartBI->isImplicitExit() &&
- "Expected start barrier to be other than a kernel exit barrier");
- assert(
- !EndBI->isImplicitEntry() &&
- "Expected end barrier to be other than a kernel entry barrier");
- // If StarBI instructions is null then this the implicit
- // kernel entry barrier, so iterate from the first instruction in the
- // entry block.
- Instruction *I = (StartBI->isImplicitEntry())
- ? &Kernel->getEntryBlock().front()
- : StartBI->getInstruction()->getNextNode();
- assert(I && "Expected non-null start instruction");
- Instruction *E = (EndBI->isImplicitExit())
- ? I->getParent()->getTerminator()
- : EndBI->getInstruction();
- assert(E && "Expected non-null end instruction");
-
- for (; I != E; I = I->getNextNode()) {
- if (!I->mayHaveSideEffects() && !I->mayReadFromMemory())
- continue;
-
- auto IsPotentiallyAffectedByBarrier =
- [](std::optional<MemoryLocation> Loc) {
- const Value *Obj = (Loc && Loc->Ptr)
- ? getUnderlyingObject(Loc->Ptr)
- : nullptr;
- if (!Obj) {
- LLVM_DEBUG(
- dbgs()
- << "Access to unknown location requires barriers\n");
- return true;
- }
- if (isa<UndefValue>(Obj))
- return false;
- if (isa<AllocaInst>(Obj))
- return false;
- if (auto *GV = dyn_cast<GlobalVariable>(Obj)) {
- if (GV->isConstant())
- return false;
- if (GV->isThreadLocal())
- return false;
- if (GV->getAddressSpace() == (int)AddressSpace::Local)
- return false;
- if (GV->getAddressSpace() == (int)AddressSpace::Constant)
- return false;
- }
- LLVM_DEBUG(dbgs() << "Access to '" << *Obj
- << "' requires barriers\n");
- return true;
- };
-
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
- std::optional<MemoryLocation> Loc =
- MemoryLocation::getForDest(MI);
- if (IsPotentiallyAffectedByBarrier(Loc))
- return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
- std::optional<MemoryLocation> Loc =
- MemoryLocation::getForSource(MTI);
- if (IsPotentiallyAffectedByBarrier(Loc))
- return false;
- }
- continue;
- }
-
- if (auto *AI = dyn_cast<AssumeInst>(I)) {
- Assumptions.push_back(AI);
- continue;
- }
-
- if (auto *LI = dyn_cast<LoadInst>(I))
- if (LI->hasMetadata(LLVMContext::MD_invariant_load))
- continue;
-
- std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(I);
- if (IsPotentiallyAffectedByBarrier(Loc))
- return false;
- }
-
- return true;
- };
-
- // Iterate barrier pairs and remove an explicit barrier if analysis
- // deems it removeable.
- for (auto *It = BarriersInBlock.begin(),
- *End = BarriersInBlock.end() - 1;
- It != End; ++It) {
-
- BarrierInfo *StartBI = It;
- BarrierInfo *EndBI = (It + 1);
-
- // Cannot remove when both are implicit barriers, continue.
- if (StartBI->isImplicit() && EndBI->isImplicit())
- continue;
-
- SmallVector<AssumeInst *> Assumptions;
- if (!IsBarrierRemoveable(StartBI, EndBI, Assumptions))
- continue;
-
- assert(!(StartBI->isImplicit() && EndBI->isImplicit()) &&
- "Expected at least one explicit barrier to remove.");
-
- for (auto *Assumption : Assumptions)
- Assumption->eraseFromParent();
-
- // Remove an explicit barrier, check first, then second.
- if (!StartBI->isImplicit()) {
- LLVM_DEBUG(dbgs() << "Remove start barrier "
- << *StartBI->getInstruction() << "\n");
- BarriersToBeDeleted.insert(StartBI->getInstruction());
- } else {
- LLVM_DEBUG(dbgs() << "Remove end barrier "
- << *EndBI->getInstruction() << "\n");
- BarriersToBeDeleted.insert(EndBI->getInstruction());
- }
- }
-
- if (BarriersToBeDeleted.empty())
- continue;
-
- Changed = true;
- for (Instruction *I : BarriersToBeDeleted) {
- ++NumBarriersEliminated;
- auto Remark = [&](OptimizationRemark OR) {
- return OR << "Redundant barrier eliminated.";
- };
-
- if (EnableVerboseRemarks)
- emitRemark<OptimizationRemark>(I, "OMP190", Remark);
- I->eraseFromParent();
- }
- }
- }
-
- return Changed;
- }
-
void analysisGlobalization() {
auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
: AAExecutionDomain(IRP, A) {}
+ ~AAExecutionDomainFunction() {
+ delete RPOT;
+ }
+
+ void initialize(Attributor &A) override {
+ if (getAnchorScope()->isDeclaration()) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+ RPOT = new ReversePostOrderTraversal<Function *>(getAnchorScope());
+ }
+
const std::string getAsStr() const override {
- return "[AAExecutionDomain] " + std::to_string(SingleThreadedBBs.size()) +
- "/" + std::to_string(NumBBs) + " BBs thread 0 only.";
+ unsigned TotalBlocks = 0, InitialThreadBlocks = 0;
+ for (auto &It : BEDMap) {
+ TotalBlocks++;
+ InitialThreadBlocks += It.getSecond().IsExecutedByInitialThreadOnly;
+ }
+ return "[AAExecutionDomain] " + std::to_string(InitialThreadBlocks) + "/" +
+ std::to_string(TotalBlocks) + " executed by initial thread only";
}
/// See AbstractAttribute::trackStatistics().
void trackStatistics() const override {}
- void initialize(Attributor &A) override {
- Function *F = getAnchorScope();
- for (const auto &BB : *F)
- SingleThreadedBBs.insert(&BB);
- NumBBs = SingleThreadedBBs.size();
- }
-
ChangeStatus manifest(Attributor &A) override {
LLVM_DEBUG({
- for (const BasicBlock *BB : SingleThreadedBBs)
+ for (const BasicBlock &BB : *getAnchorScope()) {
+ if (!isExecutedByInitialThreadOnly(BB))
+ continue;
dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " "
- << BB->getName() << " is executed by a single thread.\n";
+ << BB.getName() << " is executed by a single thread.\n";
+ }
});
- return ChangeStatus::UNCHANGED;
- }
- ChangeStatus updateImpl(Attributor &A) override;
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
- /// Check if an instruction is executed by a single thread.
- bool isExecutedByInitialThreadOnly(const Instruction &I) const override {
- return isExecutedByInitialThreadOnly(*I.getParent());
- }
+ if (DisableOpenMPOptBarrierElimination)
+ return Changed;
- bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override {
- return isValidState() && SingleThreadedBBs.contains(&BB);
+ SmallPtrSet<CallBase *, 16> DeletedBarriers;
+ auto HandleAlignedBarrier = [&](CallBase *CB) {
+ const ExecutionDomainTy &ED = CEDMap[CB];
+ if (!ED.IsReachedFromAlignedBarrierOnly ||
+ ED.EncounteredNonLocalSideEffect)
+ return;
+
+ // We can remove this barrier, if it is one, or all aligned barriers
+ // reaching the kernel end. In the latter case we can transitively work
+ // our way back until we find a barrier that guards a side-effect if we
+ // are dealing with the kernel end here.
+ if (CB) {
+ DeletedBarriers.insert(CB);
+ A.deleteAfterManifest(*CB);
+ ++NumBarriersEliminated;
+ Changed = ChangeStatus::CHANGED;
+ } else if (!ED.AlignedBarriers.empty()) {
+ NumBarriersEliminated += ED.AlignedBarriers.size();
+ Changed = ChangeStatus::CHANGED;
+ SmallVector<CallBase *> Worklist(ED.AlignedBarriers.begin(),
+ ED.AlignedBarriers.end());
+ SmallSetVector<CallBase *, 16> Visited;
+ while (!Worklist.empty()) {
+ CallBase *LastCB = Worklist.pop_back_val();
+ if (!Visited.insert(LastCB))
+ continue;
+ if (!DeletedBarriers.count(LastCB)) {
+ A.deleteAfterManifest(*LastCB);
+ continue;
+ }
+ // The final aligned barrier (LastCB) reaching the kernel end was
+ // removed already. This means we can go one step further and remove
+ // the barriers encoutered last before (LastCB).
+ const ExecutionDomainTy &LastED = CEDMap[LastCB];
+ Worklist.append(LastED.AlignedBarriers.begin(),
+ LastED.AlignedBarriers.end());
+ }
+ }
+
+ // If we actually eliminated a barrier we need to eliminate the associated
+ // llvm.assumes as well to avoid creating UB.
+ if (!ED.EncounteredAssumes.empty() && (CB || !ED.AlignedBarriers.empty()))
+ for (auto *AssumeCB : ED.EncounteredAssumes)
+ A.deleteAfterManifest(*AssumeCB);
+ };
+
+ for (auto *CB : AlignedBarriers)
+ HandleAlignedBarrier(CB);
+
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+ // Handle the "kernel end barrier" for kernels too.
+ if (OMPInfoCache.Kernels.count(getAnchorScope()))
+ HandleAlignedBarrier(nullptr);
+
+ return Changed;
}
- /// Set of basic blocks that are executed by a single thread.
- SmallSetVector<const BasicBlock *, 16> SingleThreadedBBs;
+ /// Merge barrier and assumption information from \p PredED into the successor
+ /// \p ED.
+ void
+ mergeInPredecessorBarriersAndAssumptions(Attributor &A, ExecutionDomainTy &ED,
+ const ExecutionDomainTy &PredED);
- /// Total number of basic blocks in this function.
- long unsigned NumBBs = 0;
-};
+ /// Merge all information from \p PredED into the successor \p ED. If
+ /// \p InitialEdgeOnly is set, only the initial edge will enter the block
+ /// represented by \p ED from this predecessor.
+ void mergeInPredecessor(Attributor &A, ExecutionDomainTy &ED,
+ const ExecutionDomainTy &PredED,
+ bool InitialEdgeOnly = false);
-ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
- Function *F = getAnchorScope();
- ReversePostOrderTraversal<Function *> RPOT(F);
- auto NumSingleThreadedBBs = SingleThreadedBBs.size();
+ /// Accumulate information for the entry block in \p EntryBBED.
+ void handleEntryBB(Attributor &A, ExecutionDomainTy &EntryBBED);
- bool AllCallSitesKnown;
- auto PredForCallSite = [&](AbstractCallSite ACS) {
- const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>(
- *this, IRPosition::function(*ACS.getInstruction()->getFunction()),
- DepClassTy::REQUIRED);
- return ACS.isDirectCall() &&
- ExecutionDomainAA.isExecutedByInitialThreadOnly(
- *ACS.getInstruction());
- };
+ /// See AbstractAttribute::updateImpl.
+ ChangeStatus updateImpl(Attributor &A) override;
- if (!A.checkForAllCallSites(PredForCallSite, *this,
- /* RequiresAllCallSites */ true,
- AllCallSitesKnown))
- SingleThreadedBBs.remove(&F->getEntryBlock());
+ /// Query interface, see AAExecutionDomain
+ ///{
+ bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override {
+ if (!isValidState())
+ return false;
+ return BEDMap.lookup(&BB).IsExecutedByInitialThreadOnly;
+ }
- auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
- auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
+ ExecutionDomainTy getExecutionDomain(const BasicBlock &BB) const override {
+ assert(isValidState() &&
+ "No request should be made against an invalid state!");
+ return BEDMap.lookup(&BB);
+ }
+ ExecutionDomainTy getExecutionDomain(const CallBase &CB) const override {
+ assert(isValidState() &&
+ "No request should be made against an invalid state!");
+ return CEDMap.lookup(&CB);
+ }
+ ExecutionDomainTy getFunctionExecutionDomain() const override {
+ assert(isValidState() &&
+ "No request should be made against an invalid state!");
+ return BEDMap.lookup(nullptr);
+ }
+ ///}
// Check if the edge into the successor block contains a condition that only
// lets the main thread execute it.
- auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) {
+ static bool isInitialThreadOnlyEdge(Attributor &A, BranchInst *Edge,
+ BasicBlock &SuccessorBB) {
if (!Edge || !Edge->isConditional())
return false;
- if (Edge->getSuccessor(0) != SuccessorBB)
+ if (Edge->getSuccessor(0) != &SuccessorBB)
return false;
auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition());
// Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
if (C->isAllOnesValue()) {
auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0));
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+ auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
if (!CB)
return false;
return false;
};
- // Merge all the predecessor states into the current basic block. A basic
- // block is executed by a single thread if all of its predecessors are.
- auto MergePredecessorStates = [&](BasicBlock *BB) {
- if (pred_empty(BB))
- return SingleThreadedBBs.contains(BB);
-
- bool IsInitialThread = true;
- for (BasicBlock *PredBB : predecessors(BB)) {
- if (!IsInitialThreadOnly(dyn_cast<BranchInst>(PredBB->getTerminator()),
- BB))
- IsInitialThread &= SingleThreadedBBs.contains(PredBB);
+ /// Mapping containing information per block.
+ DenseMap<const BasicBlock *, ExecutionDomainTy> BEDMap;
+ DenseMap<const CallBase *, ExecutionDomainTy> CEDMap;
+ SmallSetVector<CallBase *, 16> AlignedBarriers;
+
+ ReversePostOrderTraversal<Function *> *RPOT = nullptr;
+};
+
+void AAExecutionDomainFunction::mergeInPredecessorBarriersAndAssumptions(
+ Attributor &A, ExecutionDomainTy &ED, const ExecutionDomainTy &PredED) {
+ for (auto *EA : PredED.EncounteredAssumes)
+ ED.addAssumeInst(A, *EA);
+
+ for (auto *AB : PredED.AlignedBarriers)
+ ED.addAlignedBarrier(A, *AB);
+}
+
+void AAExecutionDomainFunction::mergeInPredecessor(
+ Attributor &A, ExecutionDomainTy &ED, const ExecutionDomainTy &PredED,
+ bool InitialEdgeOnly) {
+ ED.IsExecutedByInitialThreadOnly =
+ InitialEdgeOnly || (PredED.IsExecutedByInitialThreadOnly &&
+ ED.IsExecutedByInitialThreadOnly);
+
+ ED.IsReachedFromAlignedBarrierOnly = ED.IsReachedFromAlignedBarrierOnly &&
+ PredED.IsReachedFromAlignedBarrierOnly;
+ ED.EncounteredNonLocalSideEffect =
+ ED.EncounteredNonLocalSideEffect | PredED.EncounteredNonLocalSideEffect;
+ if (ED.IsReachedFromAlignedBarrierOnly)
+ mergeInPredecessorBarriersAndAssumptions(A, ED, PredED);
+ else
+ ED.clearAssumeInstAndAlignedBarriers();
+}
+
+void AAExecutionDomainFunction::handleEntryBB(Attributor &A,
+ ExecutionDomainTy &EntryBBED) {
+ SmallVector<ExecutionDomainTy> PredExecDomains;
+ auto PredForCallSite = [&](AbstractCallSite ACS) {
+ const auto &EDAA = A.getAAFor<AAExecutionDomain>(
+ *this, IRPosition::function(*ACS.getInstruction()->getFunction()),
+ DepClassTy::OPTIONAL);
+ if (!EDAA.getState().isValidState())
+ return false;
+ PredExecDomains.emplace_back(
+ EDAA.getExecutionDomain(*cast<CallBase>(ACS.getInstruction())));
+ return true;
+ };
+
+ bool AllCallSitesKnown;
+ if (A.checkForAllCallSites(PredForCallSite, *this,
+ /* RequiresAllCallSites */ true,
+ AllCallSitesKnown)) {
+ for (const auto &PredED : PredExecDomains)
+ mergeInPredecessor(A, EntryBBED, PredED);
+
+ } else {
+ // We could not find all predecessors, so this is either a kernel or a
+ // function with external linkage (or with some other weird uses).
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+ if (OMPInfoCache.Kernels.count(getAnchorScope())) {
+ EntryBBED.IsExecutedByInitialThreadOnly = false;
+ EntryBBED.IsReachedFromAlignedBarrierOnly = true;
+ EntryBBED.EncounteredNonLocalSideEffect = false;
+ } else {
+ EntryBBED.IsExecutedByInitialThreadOnly = false;
+ EntryBBED.IsReachedFromAlignedBarrierOnly = false;
+ EntryBBED.EncounteredNonLocalSideEffect = true;
}
+ }
+
+ auto &FnED = BEDMap[nullptr];
+ FnED.IsReachingAlignedBarrierOnly &=
+ EntryBBED.IsReachedFromAlignedBarrierOnly;
+}
+
+ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
+
+ bool Changed = false;
- return IsInitialThread;
+ // Helper to deal with an aligned barrier encountered during the forward
+ // traversal. \p CB is the aligned barrier, \p ED is the execution domain when
+ // it was encountered.
+ auto HandleAlignedBarrier = [&](CallBase *CB, ExecutionDomainTy &ED) {
+ if (CB)
+ Changed |= AlignedBarriers.insert(CB);
+ // First, update the barrier ED kept in the separate CEDMap.
+ auto &CallED = CEDMap[CB];
+ mergeInPredecessor(A, CallED, ED);
+ // Next adjust the ED we use for the traversal.
+ ED.EncounteredNonLocalSideEffect = false;
+ ED.IsReachedFromAlignedBarrierOnly = true;
+ // Aligned barrier collection has to come last.
+ ED.clearAssumeInstAndAlignedBarriers();
+ ED.addAlignedBarrier(A, *CB);
};
- for (auto *BB : RPOT) {
- if (!MergePredecessorStates(BB))
- SingleThreadedBBs.remove(BB);
+ auto &LivenessAA =
+ A.getAAFor<AAIsDead>(*this, getIRPosition(), DepClassTy::OPTIONAL);
+
+ // Set \p R to \V and report true if that changed \p R.
+ auto SetAndRecord = [&](bool &R, bool V) {
+ bool Eq = (R == V);
+ R = V;
+ return !Eq;
+ };
+
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+
+ Function *F = getAnchorScope();
+ BasicBlock &EntryBB = F->getEntryBlock();
+
+ SmallVector<Instruction *> SyncInstWorklist;
+ for (auto &RIt : *RPOT) {
+ BasicBlock &BB = *RIt;
+
+ ExecutionDomainTy ED;
+ // Propagate "incoming edges" into information about this block.
+ if (&BB == &EntryBB) {
+ handleEntryBB(A, ED);
+ } else {
+ // For live non-entry blocks we only propagate information via live edges.
+ if (LivenessAA.isAssumedDead(&BB))
+ continue;
+
+ for (auto *PredBB : predecessors(&BB)) {
+ if (LivenessAA.isEdgeDead(PredBB, &BB))
+ continue;
+ bool InitialEdgeOnly = isInitialThreadOnlyEdge(
+ A, dyn_cast<BranchInst>(PredBB->getTerminator()), BB);
+ mergeInPredecessor(A, ED, BEDMap[PredBB], InitialEdgeOnly);
+ }
+ }
+
+ // Now we traverse the block, accumulate effects in ED and attach
+ // information to calls.
+ for (Instruction &I : BB) {
+ bool UsedAssumedInformation;
+ if (A.isAssumedDead(I, *this, &LivenessAA, UsedAssumedInformation,
+ /* CheckBBLivenessOnly */ false, DepClassTy::OPTIONAL,
+ /* CheckForDeadStore */ true))
+ continue;
+
+ // Asummes and "assume-like" (dbg, lifetime, ...) are handled first, the
+ // former is collected the latter is ignored.
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ if (auto *AI = dyn_cast_or_null<AssumeInst>(II)) {
+ ED.addAssumeInst(A, *AI);
+ continue;
+ }
+ // TODO: Should we also collect and delete lifetime markers?
+ if (II->isAssumeLikeIntrinsic())
+ continue;
+ }
+
+ auto *CB = dyn_cast<CallBase>(&I);
+ bool IsNoSync = AA::isNoSyncInst(A, I, *this);
+ bool IsAlignedBarrier =
+ !IsNoSync && CB && AANoSync::isAlignedBarrier(*CB);
+
+ // Next we check for calls. Aligned barriers are handled
+ // explicitly, everything else is kept for the backward traversal and will
+ // also affect our state.
+ if (CB) {
+ if (IsAlignedBarrier) {
+ HandleAlignedBarrier(CB, ED);
+ continue;
+ }
+
+ // Check the pointer(s) of a memory intrinsic explicitly.
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&I)) {
+ if (!ED.EncounteredNonLocalSideEffect &&
+ AA::isPotentiallyAffectedByBarrier(A, I, *this))
+ ED.EncounteredNonLocalSideEffect = true;
+ if (!IsNoSync) {
+ ED.IsReachedFromAlignedBarrierOnly = false;
+ SyncInstWorklist.push_back(&I);
+ }
+ continue;
+ }
+
+ // Record how we entered the call, then accumulate the effect of the
+ // call in ED for potential use by the callee.
+ auto &CallED = CEDMap[CB];
+ mergeInPredecessor(A, CallED, ED);
+
+ // If we have a sync-definition we can check if it starts/ends in an
+ // aligned barrier. If we are unsure we assume any sync breaks
+ // alignment.
+ Function *Callee = CB->getCalledFunction();
+ if (!IsNoSync && Callee && !Callee->isDeclaration()) {
+ const auto &EDAA = A.getAAFor<AAExecutionDomain>(
+ *this, IRPosition::function(*Callee), DepClassTy::OPTIONAL);
+ if (EDAA.getState().isValidState()) {
+ const auto &CalleeED = EDAA.getFunctionExecutionDomain();
+ ED.IsReachedFromAlignedBarrierOnly =
+ CalleeED.IsReachedFromAlignedBarrierOnly;
+ if (IsNoSync || !CalleeED.IsReachedFromAlignedBarrierOnly)
+ ED.EncounteredNonLocalSideEffect |=
+ CalleeED.EncounteredNonLocalSideEffect;
+ else
+ ED.EncounteredNonLocalSideEffect =
+ CalleeED.EncounteredNonLocalSideEffect;
+ if (!CalleeED.IsReachingAlignedBarrierOnly)
+ SyncInstWorklist.push_back(&I);
+ if (CalleeED.IsReachedFromAlignedBarrierOnly)
+ mergeInPredecessorBarriersAndAssumptions(A, ED, CalleeED);
+ continue;
+ }
+ }
+ ED.IsReachedFromAlignedBarrierOnly =
+ IsNoSync && ED.IsReachedFromAlignedBarrierOnly;
+ ED.EncounteredNonLocalSideEffect |= true;
+ if (!IsNoSync)
+ SyncInstWorklist.push_back(&I);
+ }
+
+ if (!I.mayHaveSideEffects() && !I.mayReadFromMemory())
+ continue;
+
+ // If we have a callee we try to use fine-grained information to
+ // determine local side-effects.
+ if (CB) {
+ const auto &MemAA = A.getAAFor<AAMemoryLocation>(
+ *this, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL);
+
+ auto AccessPred = [&](const Instruction *I, const Value *Ptr,
+ AAMemoryLocation::AccessKind,
+ AAMemoryLocation::MemoryLocationsKind) {
+ return !AA::isPotentiallyAffectedByBarrier(A, {Ptr}, *this, I);
+ };
+ if (MemAA.getState().isValidState() &&
+ MemAA.checkForAllAccessesToMemoryKind(
+ AccessPred, AAMemoryLocation::ALL_LOCATIONS))
+ continue;
+ }
+
+ if (!I.mayHaveSideEffects() && OMPInfoCache.isOnlyUsedByAssume(I))
+ continue;
+
+ if (auto *LI = dyn_cast<LoadInst>(&I))
+ if (LI->hasMetadata(LLVMContext::MD_invariant_load))
+ continue;
+
+ if (!ED.EncounteredNonLocalSideEffect &&
+ AA::isPotentiallyAffectedByBarrier(A, I, *this))
+ ED.EncounteredNonLocalSideEffect = true;
+ }
+
+ if (!isa<UnreachableInst>(BB.getTerminator()) &&
+ !BB.getTerminator()->getNumSuccessors()) {
+
+ auto &FnED = BEDMap[nullptr];
+ mergeInPredecessor(A, FnED, ED);
+
+ if (OMPInfoCache.Kernels.count(F))
+ HandleAlignedBarrier(nullptr, ED);
+ }
+
+ ExecutionDomainTy &StoredED = BEDMap[&BB];
+ ED.IsReachingAlignedBarrierOnly = StoredED.IsReachingAlignedBarrierOnly;
+
+ // Check if we computed anything different as part of the forward
+ // traversal. We do not take assumptions and aligned barriers into account
+ // as they do not influence the state we iterate. Backward traversal values
+ // are handled later on.
+ if (ED.IsExecutedByInitialThreadOnly !=
+ StoredED.IsExecutedByInitialThreadOnly ||
+ ED.IsReachedFromAlignedBarrierOnly !=
+ StoredED.IsReachedFromAlignedBarrierOnly ||
+ ED.EncounteredNonLocalSideEffect !=
+ StoredED.EncounteredNonLocalSideEffect)
+ Changed = true;
+
+ // Update the state with the new value.
+ StoredED = std::move(ED);
+ }
+
+ // Propagate (non-aligned) sync instruction effects backwards until the
+ // entry is hit or an aligned barrier.
+ SmallSetVector<BasicBlock *, 16> Visited;
+ while (!SyncInstWorklist.empty()) {
+ Instruction *SyncInst = SyncInstWorklist.pop_back_val();
+ Instruction *CurInst = SyncInst;
+ bool HitAlignedBarrier = false;
+ while ((CurInst = CurInst->getPrevNode())) {
+ auto *CB = dyn_cast<CallBase>(CurInst);
+ if (!CB)
+ continue;
+ auto &CallED = CEDMap[CB];
+ if (SetAndRecord(CallED.IsReachingAlignedBarrierOnly, false))
+ Changed = true;
+ HitAlignedBarrier = AlignedBarriers.count(CB);
+ if (HitAlignedBarrier)
+ break;
+ }
+ if (HitAlignedBarrier)
+ continue;
+ BasicBlock *SyncBB = SyncInst->getParent();
+ for (auto *PredBB : predecessors(SyncBB)) {
+ if (LivenessAA.isEdgeDead(PredBB, SyncBB))
+ continue;
+ if (!Visited.insert(PredBB))
+ continue;
+ SyncInstWorklist.push_back(PredBB->getTerminator());
+ auto &PredED = BEDMap[PredBB];
+ if (SetAndRecord(PredED.IsReachingAlignedBarrierOnly, false))
+ Changed = true;
+ }
+ if (SyncBB != &EntryBB)
+ continue;
+ auto &FnED = BEDMap[nullptr];
+ if (SetAndRecord(FnED.IsReachingAlignedBarrierOnly, false))
+ Changed = true;
}
- return (NumSingleThreadedBBs == SingleThreadedBBs.size())
- ? ChangeStatus::UNCHANGED
- : ChangeStatus::CHANGED;
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
}
/// Try to replace memory allocation calls called by a single thread with a
Attributor::SimplifictionCallbackTy SCB =
[](const IRPosition &, const AbstractAttribute *,
bool &) -> std::optional<Value *> { return nullptr; };
+
+ Function *F = getAnchorScope();
for (User *U : RFI.Declaration->users())
if (CallBase *CB = dyn_cast<CallBase>(U)) {
- if (CB->getCaller() != getAnchorScope())
+ if (CB->getFunction() != F)
continue;
MallocCalls.insert(CB);
A.registerSimplificationCallback(IRPosition::callsite_returned(*CB),
if (CallBase *CB = dyn_cast<CallBase>(U)) {
if (CB->getCaller() != F)
continue;
+ if (!MallocCalls.count(CB))
+ continue;
if (!isa<ConstantInt>(CB->getArgOperand(0))) {
MallocCalls.remove(CB);
continue;
; CHECK: Function Attrs: norecurse nounwind memory(none)
; CHECK-LABEL: define {{[^@]+}}@f
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[TMP1:%.*]] = alloca i64, align 8
; CHECK-NEXT: ret void
;
;
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
-; RUN: opt < %s -S -passes=openmp-opt-cgscc | FileCheck %s
+; RUN: opt < %s -S -passes=openmp-opt | FileCheck %s --check-prefixes=CHECK,MODULE
+; RUN: opt < %s -S -passes=openmp-opt-cgscc | FileCheck %s --check-prefixes=CHECK,CGSCC
+target triple = "amdgcn-amd-amdhsa"
declare void @useI32(i32)
declare void @unknown()
; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @GC2 to ptr), align 4
; CHECK-NEXT: [[ARGC:%.*]] = addrspacecast ptr addrspace(4) [[ARG]] to ptr
; CHECK-NEXT: [[C:%.*]] = load i32, ptr [[ARGC]], align 4
-; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: [[D:%.*]] = add i32 42, [[B]]
; CHECK-NEXT: [[E:%.*]] = add i32 [[D]], [[C]]
; CHECK-NEXT: call void @useI32(i32 [[E]])
; CHECK-NEXT: [[A:%.*]] = load i32, ptr @PG1, align 4
; CHECK-NEXT: store i32 [[A]], ptr [[LOC]], align 4
; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(5) @PG2 to ptr), align 4
-; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: [[ARGC:%.*]] = addrspacecast ptr addrspace(5) [[ARG]] to ptr
; CHECK-NEXT: store i32 [[B]], ptr [[ARGC]], align 4
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[LOC]], align 4
ret void
}
-!llvm.module.flags = !{!12,!13}
-!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11}
+define void @multiple_blocks_kernel_1(i1 %c0, i1 %c1) {
+; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_kernel_1
+; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) {
+; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
+; CHECK: t0:
+; CHECK-NEXT: br label [[T0B:%.*]]
+; CHECK: t0b:
+; CHECK-NEXT: br label [[M:%.*]]
+; CHECK: f0:
+; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
+; CHECK: t1:
+; CHECK-NEXT: br label [[M]]
+; CHECK: f1:
+; CHECK-NEXT: br label [[M]]
+; CHECK: m:
+; CHECK-NEXT: ret void
+;
+ call void @llvm.nvvm.barrier0()
+ call void @aligned_barrier()
+ br i1 %c0, label %t0, label %f0
+t0:
+ call void @aligned_barrier()
+ br label %t0b
+t0b:
+ call void @aligned_barrier()
+ br label %m
+f0:
+ call void @aligned_barrier()
+ call void @llvm.nvvm.barrier0()
+ br i1 %c1, label %t1, label %f1
+t1:
+ call void @aligned_barrier()
+ br label %m
+f1:
+ call void @aligned_barrier()
+ br label %m
+m:
+ call void @aligned_barrier()
+ ret void
+}
+
+define void @multiple_blocks_kernel_2(i1 %c0, i1 %c1, i32* %p) {
+; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_kernel_2
+; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: store i32 4, ptr [[P]], align 4
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
+; CHECK: t0:
+; CHECK-NEXT: br label [[T0B:%.*]]
+; CHECK: t0b:
+; CHECK-NEXT: br label [[M:%.*]]
+; CHECK: f0:
+; CHECK-NEXT: store i32 4, ptr [[P]], align 4
+; CHECK-NEXT: call void @llvm.nvvm.barrier0()
+; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
+; CHECK: t1:
+; CHECK-NEXT: br label [[M]]
+; CHECK: f1:
+; CHECK-NEXT: br label [[M]]
+; CHECK: m:
+; CHECK-NEXT: store i32 4, ptr [[P]], align 4
+; CHECK-NEXT: ret void
+;
+ call void @llvm.nvvm.barrier0()
+ store i32 4, i32* %p
+ call void @aligned_barrier()
+ br i1 %c0, label %t0, label %f0
+t0:
+ call void @aligned_barrier()
+ br label %t0b
+t0b:
+ call void @aligned_barrier()
+ br label %m
+f0:
+ call void @aligned_barrier()
+ store i32 4, i32* %p
+ call void @llvm.nvvm.barrier0()
+ br i1 %c1, label %t1, label %f1
+t1:
+ call void @aligned_barrier()
+ br label %m
+f1:
+ call void @aligned_barrier()
+ br label %m
+m:
+ store i32 4, i32* %p
+ call void @aligned_barrier()
+ ret void
+}
+
+define void @multiple_blocks_non_kernel_1(i1 %c0, i1 %c1) {
+; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_1
+; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) {
+; CHECK-NEXT: call void @llvm.nvvm.barrier0()
+; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
+; CHECK: t0:
+; CHECK-NEXT: br label [[T0B:%.*]]
+; CHECK: t0b:
+; CHECK-NEXT: br label [[M:%.*]]
+; CHECK: f0:
+; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
+; CHECK: t1:
+; CHECK-NEXT: br label [[M]]
+; CHECK: f1:
+; CHECK-NEXT: br label [[M]]
+; CHECK: m:
+; CHECK-NEXT: ret void
+;
+ call void @llvm.nvvm.barrier0()
+ call void @aligned_barrier()
+ br i1 %c0, label %t0, label %f0
+t0:
+ call void @aligned_barrier()
+ br label %t0b
+t0b:
+ call void @aligned_barrier()
+ br label %m
+f0:
+ call void @aligned_barrier()
+ call void @llvm.nvvm.barrier0()
+ br i1 %c1, label %t1, label %f1
+t1:
+ call void @aligned_barrier()
+ br label %m
+f1:
+ call void @aligned_barrier()
+ br label %m
+m:
+ call void @aligned_barrier()
+ ret void
+}
+
+define void @multiple_blocks_non_kernel_2(i1 %c0, i1 %c1) {
+; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_2
+; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) {
+; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
+; CHECK: t0:
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: br label [[T0B:%.*]]
+; CHECK: t0b:
+; CHECK-NEXT: br label [[M:%.*]]
+; CHECK: f0:
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
+; CHECK: t1:
+; CHECK-NEXT: br label [[M]]
+; CHECK: f1:
+; CHECK-NEXT: br label [[M]]
+; CHECK: m:
+; CHECK-NEXT: ret void
+;
+ br i1 %c0, label %t0, label %f0
+t0:
+ call void @aligned_barrier()
+ br label %t0b
+t0b:
+ call void @aligned_barrier()
+ br label %m
+f0:
+ call void @aligned_barrier()
+ call void @llvm.nvvm.barrier0()
+ br i1 %c1, label %t1, label %f1
+t1:
+ call void @aligned_barrier()
+ br label %m
+f1:
+ call void @aligned_barrier()
+ br label %m
+m:
+ call void @aligned_barrier()
+ ret void
+}
+
+define void @multiple_blocks_non_kernel_3(i1 %c0, i1 %c1) {
+; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_3
+; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) {
+; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
+; CHECK: t0:
+; CHECK-NEXT: br label [[T0B:%.*]]
+; CHECK: t0b:
+; CHECK-NEXT: br label [[M:%.*]]
+; CHECK: f0:
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
+; CHECK: t1:
+; CHECK-NEXT: br label [[M]]
+; CHECK: f1:
+; CHECK-NEXT: br label [[M]]
+; CHECK: m:
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: ret void
+;
+ br i1 %c0, label %t0, label %f0
+t0:
+ br label %t0b
+t0b:
+ br label %m
+f0:
+ call void @aligned_barrier()
+ call void @llvm.nvvm.barrier0()
+ br i1 %c1, label %t1, label %f1
+t1:
+ call void @aligned_barrier()
+ br label %m
+f1:
+ call void @aligned_barrier()
+ br label %m
+m:
+ call void @aligned_barrier()
+ ret void
+}
+
+define void @multiple_blocks_non_kernel_effects_1(i1 %c0, i1 %c1, i32* %p) {
+; CHECK-LABEL: define {{[^@]+}}@multiple_blocks_non_kernel_effects_1
+; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: store i32 0, ptr [[P]], align 4
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: br i1 [[C0]], label [[T0:%.*]], label [[F0:%.*]]
+; CHECK: t0:
+; CHECK-NEXT: store i32 1, ptr [[P]], align 4
+; CHECK-NEXT: br label [[T0B:%.*]]
+; CHECK: t0b:
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: br label [[M:%.*]]
+; CHECK: f0:
+; CHECK-NEXT: store i32 2, ptr [[P]], align 4
+; CHECK-NEXT: br i1 [[C1]], label [[T1:%.*]], label [[F1:%.*]]
+; CHECK: t1:
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: br label [[M]]
+; CHECK: f1:
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: br label [[M]]
+; CHECK: m:
+; CHECK-NEXT: store i32 3, ptr [[P]], align 4
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: ret void
+;
+ call void @aligned_barrier()
+ store i32 0, i32* %p
+ call void @aligned_barrier()
+ br i1 %c0, label %t0, label %f0
+t0:
+ call void @aligned_barrier()
+ store i32 1, i32* %p
+ br label %t0b
+t0b:
+ call void @aligned_barrier()
+ br label %m
+f0:
+ call void @aligned_barrier()
+ call void @llvm.nvvm.barrier0()
+ store i32 2, i32* %p
+ br i1 %c1, label %t1, label %f1
+t1:
+ call void @aligned_barrier()
+ br label %m
+f1:
+ call void @aligned_barrier()
+ br label %m
+m:
+ call void @aligned_barrier()
+ store i32 3, i32* %p
+ call void @aligned_barrier()
+ ret void
+}
+
+define internal void @write_then_barrier0(i32* %p) {
+; MODULE-LABEL: define {{[^@]+}}@write_then_barrier0
+; MODULE-SAME: (ptr [[P:%.*]]) {
+; MODULE-NEXT: store i32 0, ptr [[P]], align 4
+; MODULE-NEXT: ret void
+;
+; CGSCC-LABEL: define {{[^@]+}}@write_then_barrier0
+; CGSCC-SAME: (ptr [[P:%.*]]) {
+; CGSCC-NEXT: store i32 0, ptr [[P]], align 4
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: ret void
+;
+ store i32 0, i32* %p
+ call void @aligned_barrier()
+ ret void
+}
+define internal void @barrier_then_write0(i32* %p) {
+; MODULE-LABEL: define {{[^@]+}}@barrier_then_write0
+; MODULE-SAME: (ptr [[P:%.*]]) {
+; MODULE-NEXT: store i32 0, ptr [[P]], align 4
+; MODULE-NEXT: ret void
+;
+; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write0
+; CGSCC-SAME: (ptr [[P:%.*]]) {
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: store i32 0, ptr [[P]], align 4
+; CGSCC-NEXT: ret void
+;
+ call void @aligned_barrier()
+ store i32 0, i32* %p
+ ret void
+}
+define internal void @barrier_then_write_then_barrier0(i32* %p) {
+; MODULE-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier0
+; MODULE-SAME: (ptr [[P:%.*]]) {
+; MODULE-NEXT: store i32 0, ptr [[P]], align 4
+; MODULE-NEXT: call void @aligned_barrier()
+; MODULE-NEXT: ret void
+;
+; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier0
+; CGSCC-SAME: (ptr [[P:%.*]]) {
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: store i32 0, ptr [[P]], align 4
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: ret void
+;
+ call void @aligned_barrier()
+ store i32 0, i32* %p
+ call void @aligned_barrier()
+ ret void
+}
+define void @multiple_blocks_functions_kernel_effects_0(i1 %c0, i1 %c1, i32* %p) {
+; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_kernel_effects_0
+; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) {
+; MODULE-NEXT: call void @barrier_then_write_then_barrier0(ptr [[P]])
+; MODULE-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
+; MODULE: t03:
+; MODULE-NEXT: call void @barrier_then_write0(ptr [[P]])
+; MODULE-NEXT: br label [[T0B3:%.*]]
+; MODULE: t0b3:
+; MODULE-NEXT: br label [[M3:%.*]]
+; MODULE: f03:
+; MODULE-NEXT: call void @barrier_then_write0(ptr [[P]])
+; MODULE-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
+; MODULE: t13:
+; MODULE-NEXT: br label [[M3]]
+; MODULE: f13:
+; MODULE-NEXT: br label [[M3]]
+; MODULE: m3:
+; MODULE-NEXT: call void @write_then_barrier0(ptr [[P]])
+; MODULE-NEXT: ret void
+;
+; CGSCC-LABEL: define {{[^@]+}}@multiple_blocks_functions_kernel_effects_0
+; CGSCC-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) {
+; CGSCC-NEXT: call void @barrier_then_write_then_barrier0(ptr [[P]])
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
+; CGSCC: t03:
+; CGSCC-NEXT: call void @barrier_then_write0(ptr [[P]])
+; CGSCC-NEXT: br label [[T0B3:%.*]]
+; CGSCC: t0b3:
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: br label [[M3:%.*]]
+; CGSCC: f03:
+; CGSCC-NEXT: call void @barrier_then_write0(ptr [[P]])
+; CGSCC-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
+; CGSCC: t13:
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: br label [[M3]]
+; CGSCC: f13:
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: br label [[M3]]
+; CGSCC: m3:
+; CGSCC-NEXT: call void @write_then_barrier0(ptr [[P]])
+; CGSCC-NEXT: ret void
+;
+ call void @barrier_then_write_then_barrier0(i32* %p)
+ call void @aligned_barrier()
+ br i1 %c0, label %t03, label %f03
+t03:
+ call void @barrier_then_write0(i32* %p)
+ br label %t0b3
+t0b3:
+ call void @aligned_barrier()
+ br label %m3
+f03:
+ call void @aligned_barrier()
+ call void @barrier_then_write0(i32* %p)
+ br i1 %c1, label %t13, label %f13
+t13:
+ call void @aligned_barrier()
+ br label %m3
+f13:
+ call void @aligned_barrier()
+ br label %m3
+m3:
+ call void @aligned_barrier()
+ call void @write_then_barrier0(i32* %p)
+ ret void
+}
+define internal void @write_then_barrier1(i32* %p) {
+; CHECK-LABEL: define {{[^@]+}}@write_then_barrier1
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: store i32 0, ptr [[P]], align 4
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: ret void
+;
+ store i32 0, i32* %p
+ call void @aligned_barrier()
+ ret void
+}
+define internal void @barrier_then_write1(i32* %p) {
+; MODULE-LABEL: define {{[^@]+}}@barrier_then_write1
+; MODULE-SAME: (ptr [[P:%.*]]) {
+; MODULE-NEXT: store i32 0, ptr [[P]], align 4
+; MODULE-NEXT: ret void
+;
+; CGSCC-LABEL: define {{[^@]+}}@barrier_then_write1
+; CGSCC-SAME: (ptr [[P:%.*]]) {
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: store i32 0, ptr [[P]], align 4
+; CGSCC-NEXT: ret void
+;
+ call void @aligned_barrier()
+ store i32 0, i32* %p
+ ret void
+}
+define internal void @barrier_then_write_then_barrier1(i32* %p) {
+; CHECK-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier1
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: store i32 0, ptr [[P]], align 4
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: ret void
+;
+ call void @aligned_barrier()
+ store i32 0, i32* %p
+ call void @aligned_barrier()
+ ret void
+}
+define void @multiple_blocks_functions_non_kernel_effects_1(i1 %c0, i1 %c1, i32* %p) {
+; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_1
+; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) {
+; MODULE-NEXT: call void @barrier_then_write_then_barrier1(ptr [[P]])
+; MODULE-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
+; MODULE: t03:
+; MODULE-NEXT: call void @barrier_then_write1(ptr [[P]])
+; MODULE-NEXT: br label [[T0B3:%.*]]
+; MODULE: t0b3:
+; MODULE-NEXT: call void @aligned_barrier()
+; MODULE-NEXT: br label [[M3:%.*]]
+; MODULE: f03:
+; MODULE-NEXT: call void @barrier_then_write1(ptr [[P]])
+; MODULE-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
+; MODULE: t13:
+; MODULE-NEXT: call void @aligned_barrier()
+; MODULE-NEXT: br label [[M3]]
+; MODULE: f13:
+; MODULE-NEXT: call void @aligned_barrier()
+; MODULE-NEXT: br label [[M3]]
+; MODULE: m3:
+; MODULE-NEXT: call void @write_then_barrier1(ptr [[P]])
+; MODULE-NEXT: ret void
+;
+; CGSCC-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_1
+; CGSCC-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) {
+; CGSCC-NEXT: call void @barrier_then_write_then_barrier1(ptr [[P]])
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
+; CGSCC: t03:
+; CGSCC-NEXT: call void @barrier_then_write1(ptr [[P]])
+; CGSCC-NEXT: br label [[T0B3:%.*]]
+; CGSCC: t0b3:
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: br label [[M3:%.*]]
+; CGSCC: f03:
+; CGSCC-NEXT: call void @barrier_then_write1(ptr [[P]])
+; CGSCC-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
+; CGSCC: t13:
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: br label [[M3]]
+; CGSCC: f13:
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: br label [[M3]]
+; CGSCC: m3:
+; CGSCC-NEXT: call void @write_then_barrier1(ptr [[P]])
+; CGSCC-NEXT: ret void
+;
+ call void @barrier_then_write_then_barrier1(i32* %p)
+ call void @aligned_barrier()
+ br i1 %c0, label %t03, label %f03
+t03:
+ call void @barrier_then_write1(i32* %p)
+ br label %t0b3
+t0b3:
+ call void @aligned_barrier()
+ br label %m3
+f03:
+ call void @aligned_barrier()
+ call void @barrier_then_write1(i32* %p)
+ br i1 %c1, label %t13, label %f13
+t13:
+ call void @aligned_barrier()
+ br label %m3
+f13:
+ call void @aligned_barrier()
+ br label %m3
+m3:
+ call void @aligned_barrier()
+ call void @write_then_barrier1(i32* %p)
+ ret void
+}
+
+define internal void @write_then_barrier2(i32* %p) {
+; CHECK-LABEL: define {{[^@]+}}@write_then_barrier2
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: store i32 0, ptr [[P]], align 4
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: ret void
+;
+ store i32 0, i32* %p
+ call void @aligned_barrier()
+ ret void
+}
+define internal void @barrier_then_write2(i32* %p) {
+; CHECK-LABEL: define {{[^@]+}}@barrier_then_write2
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: store i32 0, ptr [[P]], align 4
+; CHECK-NEXT: ret void
+;
+ call void @aligned_barrier()
+ store i32 0, i32* %p
+ ret void
+}
+define internal void @barrier_then_write_then_barrier2(i32* %p) {
+; CHECK-LABEL: define {{[^@]+}}@barrier_then_write_then_barrier2
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: store i32 0, ptr [[P]], align 4
+; CHECK-NEXT: call void @aligned_barrier()
+; CHECK-NEXT: ret void
+;
+ call void @aligned_barrier()
+ store i32 0, i32* %p
+ call void @aligned_barrier()
+ ret void
+}
+define void @multiple_blocks_functions_non_kernel_effects_2(i1 %c0, i1 %c1, i32* %p) {
+; MODULE-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_2
+; MODULE-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) {
+; MODULE-NEXT: call void @barrier_then_write_then_barrier2(ptr [[P]])
+; MODULE-NEXT: store i32 0, ptr [[P]], align 4
+; MODULE-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
+; MODULE: t03:
+; MODULE-NEXT: call void @barrier_then_write2(ptr [[P]])
+; MODULE-NEXT: br label [[T0B3:%.*]]
+; MODULE: t0b3:
+; MODULE-NEXT: call void @aligned_barrier()
+; MODULE-NEXT: br label [[M3:%.*]]
+; MODULE: f03:
+; MODULE-NEXT: call void @aligned_barrier()
+; MODULE-NEXT: call void @barrier_then_write2(ptr [[P]])
+; MODULE-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
+; MODULE: t13:
+; MODULE-NEXT: call void @aligned_barrier()
+; MODULE-NEXT: br label [[M3]]
+; MODULE: f13:
+; MODULE-NEXT: call void @aligned_barrier()
+; MODULE-NEXT: br label [[M3]]
+; MODULE: m3:
+; MODULE-NEXT: call void @write_then_barrier2(ptr [[P]])
+; MODULE-NEXT: store i32 0, ptr [[P]], align 4
+; MODULE-NEXT: ret void
+;
+; CGSCC-LABEL: define {{[^@]+}}@multiple_blocks_functions_non_kernel_effects_2
+; CGSCC-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], ptr [[P:%.*]]) {
+; CGSCC-NEXT: call void @barrier_then_write_then_barrier2(ptr [[P]])
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: store i32 0, ptr [[P]], align 4
+; CGSCC-NEXT: br i1 [[C0]], label [[T03:%.*]], label [[F03:%.*]]
+; CGSCC: t03:
+; CGSCC-NEXT: call void @barrier_then_write2(ptr [[P]])
+; CGSCC-NEXT: br label [[T0B3:%.*]]
+; CGSCC: t0b3:
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: br label [[M3:%.*]]
+; CGSCC: f03:
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: call void @barrier_then_write2(ptr [[P]])
+; CGSCC-NEXT: br i1 [[C1]], label [[T13:%.*]], label [[F13:%.*]]
+; CGSCC: t13:
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: br label [[M3]]
+; CGSCC: f13:
+; CGSCC-NEXT: call void @aligned_barrier()
+; CGSCC-NEXT: br label [[M3]]
+; CGSCC: m3:
+; CGSCC-NEXT: call void @write_then_barrier2(ptr [[P]])
+; CGSCC-NEXT: store i32 0, ptr [[P]], align 4
+; CGSCC-NEXT: ret void
+;
+ call void @barrier_then_write_then_barrier2(i32* %p)
+ call void @aligned_barrier()
+ store i32 0, i32* %p
+ br i1 %c0, label %t03, label %f03
+t03:
+ call void @barrier_then_write2(i32* %p)
+ br label %t0b3
+t0b3:
+ call void @aligned_barrier()
+ br label %m3
+f03:
+ call void @aligned_barrier()
+ call void @barrier_then_write2(i32* %p)
+ br i1 %c1, label %t13, label %f13
+t13:
+ call void @aligned_barrier()
+ br label %m3
+f13:
+ call void @aligned_barrier()
+ br label %m3
+m3:
+ call void @aligned_barrier()
+ call void @write_then_barrier2(i32* %p)
+ store i32 0, i32* %p
+ ret void
+}
+
+!llvm.module.flags = !{!16,!15}
+!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14}
-!0 = !{ptr @pos_empty_1, !"kernel", i32 1}
-!1 = !{ptr @pos_empty_2, !"kernel", i32 1}
-!2 = !{ptr @pos_empty_3, !"kernel", i32 1}
-!3 = !{ptr @pos_empty_4, !"kernel", i32 1}
-!4 = !{ptr @pos_empty_5, !"kernel", i32 1}
-!5 = !{ptr @pos_empty_6, !"kernel", i32 1}
-!6 = !{ptr @neg_empty_7, !"kernel", i32 1}
-!7 = !{ptr @pos_constant_loads, !"kernel", i32 1}
-!8 = !{ptr @neg_loads, !"kernel", i32 1}
-!9 = !{ptr @pos_priv_mem, !"kernel", i32 1}
-!10 = !{ptr @neg_mem, !"kernel", i32 1}
-!11 = !{ptr @pos_multiple, !"kernel", i32 1}
-!12 = !{i32 7, !"openmp", i32 50}
-!13 = !{i32 7, !"openmp-device", i32 50}
+!0 = !{void ()* @pos_empty_1, !"kernel", i32 1}
+!1 = !{void ()* @pos_empty_2, !"kernel", i32 1}
+!2 = !{void ()* @pos_empty_3, !"kernel", i32 1}
+!3 = !{void ()* @pos_empty_4, !"kernel", i32 1}
+!4 = !{void ()* @pos_empty_5, !"kernel", i32 1}
+!5 = !{void ()* @pos_empty_6, !"kernel", i32 1}
+!6 = !{void ()* @neg_empty_7, !"kernel", i32 1}
+!7 = !{void ()* @pos_constant_loads, !"kernel", i32 1}
+!8 = !{void ()* @neg_loads, !"kernel", i32 1}
+!9 = !{void ()* @pos_priv_mem, !"kernel", i32 1}
+!10 = !{void ()* @neg_mem, !"kernel", i32 1}
+!11 = !{void ()* @pos_multiple, !"kernel", i32 1}
+!12 = !{void (i1,i1)* @multiple_blocks_kernel_1, !"kernel", i32 1}
+!13 = !{void (i1,i1,i32*)* @multiple_blocks_kernel_2, !"kernel", i32 1}
+!14 = !{void (i1,i1,i32*)* @multiple_blocks_functions_kernel_effects_0, !"kernel", i32 1}
+!15 = !{i32 7, !"openmp", i32 50}
+!16 = !{i32 7, !"openmp-device", i32 50}
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { "llvm.assume"="ompx_aligned_barrier" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
;.
-; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
+; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META2:![0-9]+]] = !{ptr @pos_empty_1, !"kernel", i32 1}
; CHECK: [[META3:![0-9]+]] = !{ptr @pos_empty_2, !"kernel", i32 1}
; CHECK: [[META4:![0-9]+]] = !{ptr @pos_empty_3, !"kernel", i32 1}
; CHECK: [[META11:![0-9]+]] = !{ptr @pos_priv_mem, !"kernel", i32 1}
; CHECK: [[META12:![0-9]+]] = !{ptr @neg_mem, !"kernel", i32 1}
; CHECK: [[META13:![0-9]+]] = !{ptr @pos_multiple, !"kernel", i32 1}
+; CHECK: [[META14:![0-9]+]] = !{ptr @multiple_blocks_kernel_1, !"kernel", i32 1}
+; CHECK: [[META15:![0-9]+]] = !{ptr @multiple_blocks_kernel_2, !"kernel", i32 1}
+; CHECK: [[META16:![0-9]+]] = !{ptr @multiple_blocks_functions_kernel_effects_0, !"kernel", i32 1}
;.
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_50_a3e09bf8_foo_l2
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false)
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-LABEL: define {{[^@]+}}@outlined0
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: call void @func() #[[ATTR0]]
+; CHECK-NEXT: call void @func() #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() #[[ATTR0]]
; CHECK-NEXT: ret void
;
}
define internal void @func() {
-; CHECK: Function Attrs: nounwind
+; CHECK: Function Attrs: nosync nounwind
; CHECK-LABEL: define {{[^@]+}}@func
-; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I:%.*]] = load ptr, ptr null, align 4294967296
; CHECK-NEXT: store i64 0, ptr [[I]], align 8
}
define internal void @outlined1() {
-; CHECK: Function Attrs: nounwind
+; CHECK: Function Attrs: nosync nounwind
; CHECK-LABEL: define {{[^@]+}}@outlined1
-; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I:%.*]] = icmp sle i32 1, 0
; CHECK-NEXT: br i1 [[I]], label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: common.ret:
; CHECK-NEXT: ret void
; CHECK: bb1:
-; CHECK-NEXT: call void @func() #[[ATTR0]]
+; CHECK-NEXT: call void @func() #[[ATTR1]]
; CHECK-NEXT: br label [[COMMON_RET:%.*]]
; CHECK: bb2:
; CHECK-NEXT: call void @__kmpc_free_shared(ptr null, i64 0) #[[ATTR0]]
define void @user() {
; CHECK-LABEL: define {{[^@]+}}@user() {
; CHECK-NEXT: call void @outlined0() #[[ATTR0]]
-; CHECK-NEXT: call void @outlined1() #[[ATTR0]]
+; CHECK-NEXT: call void @outlined1() #[[ATTR1]]
; CHECK-NEXT: ret void
;
call void @outlined0()
!1 = !{i32 7, !"openmp-device", i32 50}
;.
; CHECK: attributes #[[ATTR0]] = { nounwind }
-; CHECK: attributes #[[ATTR1:[0-9]+]] = { nosync nounwind }
+; CHECK: attributes #[[ATTR1]] = { nosync nounwind }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
define internal void @bar() {
; CHECK-LABEL: @bar(
-; CHECK-NEXT: call void @foo()
; CHECK-NEXT: ret void
;
call void @foo()
ret i32 0
}
-
define fastcc void @rec(ptr %0, i64 %1) {
-; CHECK-LABEL: define {{[^@]+}}@rec(
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP0:%.*]], i64 [[TMP1:%.*]]
-; CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4
-; CHECK-NEXT: call fastcc void @rec(ptr [[TMP0]], i64 0)
-; CHECK-NEXT: ret void
%3 = getelementptr i32, ptr %0, i64 %1
store i32 0, ptr %3, align 4
call fastcc void @rec(ptr %0, i64 0)
;
;
; CGSCC-LABEL: define {{[^@]+}}@rec
-; CGSCC-SAME: (ptr [[TMP0:%.*]], i64 [[TMP1:%.*]]) {
+; CGSCC-SAME: (ptr nocapture writeonly [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
; CGSCC-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[TMP1]]
; CGSCC-NEXT: store i32 0, ptr [[TMP3]], align 4
-; CGSCC-NEXT: call fastcc void @rec(ptr [[TMP0]], i64 0)
+; CGSCC-NEXT: call fastcc void @rec(ptr nocapture writeonly [[TMP0]], i64 0) #[[ATTR1:[0-9]+]]
; CGSCC-NEXT: ret void
;
; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
;.
; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr
+; CHECK: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
;.
; CHECK-DISABLED: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global ptr
+; CHECK-DISABLED: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
;.
define weak i32 @__kmpc_target_init(ptr, i8, i1) {
; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
; CHECK-LABEL: define {{[^@]+}}@foo
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1
+; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@foo
; CHECK-DISABLED-SAME: () #[[ATTR0]] {
; CHECK-DISABLED-NEXT: entry:
-; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1
+; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
; CHECK-DISABLED-NEXT: ret void
;
entry:
- %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !12
+ %0 = call align 4 ptr @__kmpc_alloc_shared(i64 4), !dbg !12
call void @use(ptr %0)
call void @__kmpc_free_shared(ptr %0, i64 4)
ret void
define internal void @bar() {
; CHECK-LABEL: define {{[^@]+}}@bar
-; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]]
-; CHECK-NEXT: call void @share(ptr nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]]
-; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR0]]
+; CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR4:[0-9]+]], !dbg [[DBG8:![0-9]+]]
+; CHECK-NEXT: call void @share(ptr nofree [[TMP0]]) #[[ATTR0]], !dbg [[DBG8]]
+; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR4]]
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@bar
-; CHECK-DISABLED-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-DISABLED-SAME: () #[[ATTR0]] {
; CHECK-DISABLED-NEXT: entry:
-; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]]
-; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]]
-; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR0]]
+; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR4:[0-9]+]], !dbg [[DBG8:![0-9]+]]
+; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[TMP0]]) #[[ATTR0]], !dbg [[DBG8]]
+; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR4]]
; CHECK-DISABLED-NEXT: ret void
;
entry:
- %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !13
+ %0 = call align 4 ptr @__kmpc_alloc_shared(i64 4), !dbg !13
call void @share(ptr %0), !dbg !13
call void @__kmpc_free_shared(ptr %0, i64 4)
ret void
}
define internal void @use(ptr %x) {
-; CHECK-LABEL: define {{[^@]+}}@use
-; CHECK-SAME: (ptr [[X:%.*]]) #[[ATTR2:[0-9]+]] {
-; CHECK-NEXT: entry:
-; CHECK-NEXT: ret void
-;
-; CHECK-DISABLED-LABEL: define {{[^@]+}}@use
-; CHECK-DISABLED-SAME: (ptr [[X:%.*]]) #[[ATTR2:[0-9]+]] {
-; CHECK-DISABLED-NEXT: entry:
-; CHECK-DISABLED-NEXT: ret void
-;
entry:
ret void
}
define internal void @share(ptr %x) {
; CHECK-LABEL: define {{[^@]+}}@share
-; CHECK-SAME: (ptr nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-SAME: (ptr nofree [[X:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: store ptr [[X]], ptr @S, align 8
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@share
-; CHECK-DISABLED-SAME: (ptr nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-DISABLED-SAME: (ptr nofree [[X:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: store ptr [[X]], ptr @S, align 8
; CHECK-DISABLED-NEXT: ret void
define void @unused() {
; CHECK-LABEL: define {{[^@]+}}@unused() {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1
-; CHECK-NEXT: call void @use(ptr undef)
+; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@unused() {
; CHECK-DISABLED-NEXT: entry:
-; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_alloc_shared(i64 4), !dbg [[DBG11:![0-9]+]]
-; CHECK-DISABLED-NEXT: call void @use(ptr [[TMP0]])
-; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4)
+; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR4]], !dbg [[DBG11:![0-9]+]]
+; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(ptr [[TMP0]], i64 4) #[[ATTR4]]
; CHECK-DISABLED-NEXT: ret void
;
entry:
- %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !14
+ %0 = call align 4 ptr @__kmpc_alloc_shared(i64 4), !dbg !14
call void @use(ptr %0)
call void @__kmpc_free_shared(ptr %0, i64 4)
ret void
define internal void @convert_and_move_alloca() {
; CHECK-LABEL: define {{[^@]+}}@convert_and_move_alloca
-; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1
+; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
; CHECK-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4
; CHECK-NEXT: br label [[INITLOOP:%.*]]
; CHECK: initloop:
; CHECK-NEXT: ret void
;
; CHECK-DISABLED-LABEL: define {{[^@]+}}@convert_and_move_alloca
-; CHECK-DISABLED-SAME: () #[[ATTR1]] {
+; CHECK-DISABLED-SAME: () #[[ATTR0]] {
; CHECK-DISABLED-NEXT: entry:
-; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1
+; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
; CHECK-DISABLED-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4
; CHECK-DISABLED-NEXT: br label [[INITLOOP:%.*]]
; CHECK-DISABLED: initloop:
br label %loopbody
loopbody:
- %0 = call ptr @__kmpc_alloc_shared(i64 4), !dbg !16
+ %0 = call align 4 ptr @__kmpc_alloc_shared(i64 4), !dbg !16
call void @use(ptr %0)
call void @__kmpc_free_shared(ptr %0, i64 4)
%iv = load i32, ptr %iv_ptr
!15 = !DILocation(line: 8, column: 2, scope: !9)
!16 = !DILocation(line: 10, column: 2, scope: !9)
;.
-; CHECK: attributes #[[ATTR0]] = { nounwind }
-; CHECK: attributes #[[ATTR1]] = { nosync nounwind }
-; CHECK: attributes #[[ATTR2]] = { nounwind memory(none) }
-; CHECK: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind memory(write) }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allocsize(0) }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { "llvm.assume"="omp_no_openmp" }
+; CHECK: attributes #[[ATTR0]] = { nosync nounwind }
+; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(write) }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nosync nounwind allocsize(0) }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" }
+; CHECK: attributes #[[ATTR4]] = { nounwind }
;.
-; CHECK-DISABLED: attributes #[[ATTR0]] = { nounwind }
-; CHECK-DISABLED: attributes #[[ATTR1]] = { nosync nounwind }
-; CHECK-DISABLED: attributes #[[ATTR2]] = { nounwind memory(none) }
-; CHECK-DISABLED: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind memory(write) }
-; CHECK-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allocsize(0) }
-; CHECK-DISABLED: attributes #[[ATTR5:[0-9]+]] = { "llvm.assume"="omp_no_openmp" }
+; CHECK-DISABLED: attributes #[[ATTR0]] = { nosync nounwind }
+; CHECK-DISABLED: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(write) }
+; CHECK-DISABLED: attributes #[[ATTR2:[0-9]+]] = { nosync nounwind allocsize(0) }
+; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" }
+; CHECK-DISABLED: attributes #[[ATTR4]] = { nounwind }
;.
; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "remove_globalization.c", directory: "/tmp/remove_globalization.c")
; CHECK: @[[BAZ_SPMD_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2
; CHECK: @[[OFFSET:[a-zA-Z0-9_$"\\.-]+]] = global i32 undef
; CHECK: @[[STACK:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [1024 x i8] undef
+; CHECK: @[[FOO_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; CHECK: @[[BAR_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
+; CHECK: @[[BAZ_SPMD_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
; CHECK: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [16 x i8] undef, align 4
; CHECK: @[[Y_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
;.
; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 false)
; CHECK-NEXT: [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]]
; CHECK-NEXT: call void @unknown_no_openmp()
-; CHECK-NEXT: call void @use.internalized(ptr nofree [[X]]) #[[ATTR6]]
+; CHECK-NEXT: call void @use.internalized(ptr nofree [[X]]) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR6]]
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-NEXT: ret void
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], -1
; CHECK-NEXT: br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]]
; CHECK: master1:
-; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @x_shared to ptr)) #[[ATTR6]]
+; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @x_shared to ptr)) #[[ATTR3]]
; CHECK-NEXT: br label [[NEXT:%.*]]
; CHECK: next:
; CHECK-NEXT: call void @unknown_no_openmp()
; CHECK-NEXT: [[B0:%.*]] = icmp eq i32 [[C]], -1
; CHECK-NEXT: br i1 [[B0]], label [[MASTER2:%.*]], label [[EXIT]]
; CHECK: master2:
-; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @y_shared to ptr)) #[[ATTR6]]
+; CHECK-NEXT: call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @y_shared to ptr)) #[[ATTR3]]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]]
; CHECK: master3:
; CHECK-NEXT: [[Z:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG10:![0-9]+]]
-; CHECK-NEXT: call void @use.internalized(ptr nofree [[Z]]) #[[ATTR6]]
+; CHECK-NEXT: call void @use.internalized(ptr nofree [[Z]]) #[[ATTR3]]
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[Z]], i64 24) #[[ATTR6]]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
;
-; CHECK: Function Attrs: nofree norecurse nounwind memory(write)
+; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(write)
; CHECK-LABEL: define {{[^@]+}}@use.internalized
; CHECK-SAME: (ptr nofree [[X:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: ret void
;
;
-; CHECK: Function Attrs: nosync nounwind allocsize(0) memory(read)
+; CHECK: Function Attrs: norecurse nosync nounwind allocsize(0) memory(read)
; CHECK-LABEL: define {{[^@]+}}@__kmpc_alloc_shared
; CHECK-SAME: (i64 [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: [[L:%.*]] = load i32, ptr @offset, align 4
;
;.
; CHECK: attributes #[[ATTR0]] = { "kernel" }
-; CHECK: attributes #[[ATTR1]] = { nofree norecurse nounwind memory(write) }
-; CHECK: attributes #[[ATTR2]] = { nosync nounwind allocsize(0) memory(read) }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind }
+; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(write) }
+; CHECK: attributes #[[ATTR2]] = { norecurse nosync nounwind allocsize(0) memory(read) }
+; CHECK: attributes #[[ATTR3]] = { nosync nounwind }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR5:[0-9]+]] = { "llvm.assume"="omp_no_openmp" }
; CHECK: attributes #[[ATTR6]] = { nounwind }
; AMDGPU: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
; AMDGPU: [[LOOP28]] = distinct !{!28, !23, !24}
; AMDGPU: [[LOOP29]] = distinct !{!29, !23, !24}
-; AMDGPU: [[META30:![0-9]+]] = !{!31, !27, i64 0}
-; AMDGPU: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0}
-; AMDGPU: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32}
;.
; NVPTX: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; NVPTX: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
; NVPTX: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
; NVPTX: [[LOOP28]] = distinct !{!28, !23, !24}
; NVPTX: [[LOOP29]] = distinct !{!29, !23, !24}
-; NVPTX: [[META30:![0-9]+]] = !{!31, !27, i64 0}
-; NVPTX: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0}
-; NVPTX: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32}
;.
; AMDGPU-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; AMDGPU-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
; AMDGPU-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
; AMDGPU-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24}
; AMDGPU-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24}
-; AMDGPU-DISABLED: [[META30:![0-9]+]] = !{!31, !27, i64 0}
-; AMDGPU-DISABLED: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0}
-; AMDGPU-DISABLED: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32}
;.
; NVPTX-DISABLED: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5}
; NVPTX-DISABLED: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1}
; NVPTX-DISABLED: [[META27:![0-9]+]] = !{!"any pointer", !20, i64 0}
; NVPTX-DISABLED: [[LOOP28]] = distinct !{!28, !23, !24}
; NVPTX-DISABLED: [[LOOP29]] = distinct !{!29, !23, !24}
-; NVPTX-DISABLED: [[META30:![0-9]+]] = !{!31, !27, i64 0}
-; NVPTX-DISABLED: [[META31:![0-9]+]] = !{!"kmp_task_t_with_privates", !32, i64 0}
-; NVPTX-DISABLED: [[META32:![0-9]+]] = !{!"kmp_task_t", !27, i64 0, !27, i64 8, !19, i64 16, !20, i64 24, !20, i64 32}
;.
; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK: user_code.entry:
-; CHECK-NEXT: call void @generic_helper() #[[ATTR5]]
+; CHECK-NEXT: call void @generic_helper() #[[ATTR6:[0-9]+]]
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2)
; CHECK-NEXT: ret void
; CHECK: worker.exit:
; CHECK-DISABLE-SPMDIZATION-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
; CHECK-DISABLE-SPMDIZATION: user_code.entry:
-; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR5]]
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR6:[0-9]+]]
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1)
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
; CHECK-DISABLE-SPMDIZATION: worker.exit:
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; CHECK-NEXT: call void @leaf() #[[ATTR5]]
+; CHECK-NEXT: call void @leaf() #[[ATTR6]]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; CHECK-NEXT: ret void
; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR1:[0-9]+]] {
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR5]]
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR6]]
; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR2:[0-9]+]]
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]]
+; CHECK-NEXT: call void @unknown() #[[ATTR7:[0-9]+]]
; CHECK-NEXT: ret void
;
; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_outlined__
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; CHECK-DISABLE-SPMDIZATION-NEXT: call void @unknown() #[[ATTR6:[0-9]+]]
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @unknown() #[[ATTR7:[0-9]+]]
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
entry:
; CHECK-LABEL: define {{[^@]+}}@generic_helper
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: call void @leaf() #[[ATTR5]]
+; CHECK-NEXT: call void @leaf() #[[ATTR6]]
; CHECK-NEXT: ret void
;
; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@generic_helper
; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR4]] {
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
-; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR5]]
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR6]]
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
entry:
; CHECK: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK: attributes #[[ATTR2]] = { nounwind }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { alwaysinline }
-; CHECK: attributes #[[ATTR4]] = { convergent noinline nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
+; CHECK: attributes #[[ATTR4]] = { convergent noinline nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK: attributes #[[ATTR5]] = { convergent nounwind }
-; CHECK: attributes #[[ATTR6]] = { convergent }
+; CHECK: attributes #[[ATTR6]] = { convergent nosync nounwind }
+; CHECK: attributes #[[ATTR7]] = { convergent }
;.
; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR2]] = { nounwind }
; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR3:[0-9]+]] = { alwaysinline }
-; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR4]] = { convergent noinline nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
+; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR4]] = { convergent noinline nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR5]] = { convergent nounwind }
-; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR6]] = { convergent }
+; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR6]] = { convergent nosync nounwind }
+; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR7]] = { convergent }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}
; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1}
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; CHECK-NEXT: call void @leaf() #[[ATTR3:[0-9]+]]
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; CHECK-NEXT: call void @leaf() #[[ATTR7:[0-9]+]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; CHECK-NEXT: ret void
;
; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR1:[0-9]+]] {
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR3:[0-9]+]]
-; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]]
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR7:[0-9]+]]
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3:[0-9]+]]
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; CHECK-NEXT: call void @leaf() #[[ATTR7:[0-9]+]]
+; CHECK-NEXT: call void @leaf() #[[ATTR7]]
; CHECK-NEXT: ret void
;
; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_outlined__
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR7:[0-9]+]]
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR7]]
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
entry:
; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR7]]
; CHECK-NEXT: ret void
;
; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_outlined___wrapper
; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
; CHECK-DISABLE-SPMDIZATION-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR7]]
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
entry:
; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @unknown()
-; CHECK-NEXT: call void @leaf() #[[ATTR3]]
+; CHECK-NEXT: call void @leaf() #[[ATTR7]]
; CHECK-NEXT: ret void
;
; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@generic_helper
; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR1]] {
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @unknown()
-; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR3]]
+; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR7]]
; CHECK-DISABLE-SPMDIZATION-NEXT: ret void
;
entry:
; CGSCC: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
;.
define void @kernel() "kernel" {
-; TUNIT: Function Attrs: norecurse
-; TUNIT-LABEL: define {{[^@]+}}@kernel
-; TUNIT-SAME: () #[[ATTR0:[0-9]+]] {
-; TUNIT-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false)
-; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
-; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; TUNIT: if.then:
-; TUNIT-NEXT: store i32 1, ptr addrspace(3) @G, align 4
-; TUNIT-NEXT: br label [[IF_MERGE:%.*]]
-; TUNIT: if.else:
-; TUNIT-NEXT: call void @barrier() #[[ATTR5:[0-9]+]]
-; TUNIT-NEXT: [[L:%.*]] = load i32, ptr addrspace(3) @G, align 4
-; TUNIT-NEXT: call void @use1(i32 [[L]]) #[[ATTR5]]
-; TUNIT-NEXT: br label [[IF_MERGE]]
-; TUNIT: if.merge:
-; TUNIT-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
-; TUNIT: if.then2:
-; TUNIT-NEXT: store i32 2, ptr addrspace(3) @G, align 4
-; TUNIT-NEXT: call void @barrier() #[[ATTR5]]
-; TUNIT-NEXT: br label [[IF_END]]
-; TUNIT: if.end:
-; TUNIT-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1)
-; TUNIT-NEXT: ret void
;
-; CGSCC: Function Attrs: norecurse
-; CGSCC-LABEL: define {{[^@]+}}@kernel
-; CGSCC-SAME: () #[[ATTR0:[0-9]+]] {
-; CGSCC-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false)
-; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
-; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; CGSCC: if.then:
-; CGSCC-NEXT: store i32 1, ptr addrspace(3) @G, align 4
-; CGSCC-NEXT: br label [[IF_MERGE:%.*]]
-; CGSCC: if.else:
-; CGSCC-NEXT: call void @barrier()
-; CGSCC-NEXT: [[L:%.*]] = load i32, ptr addrspace(3) @G, align 4
-; CGSCC-NEXT: call void @use1(i32 [[L]])
-; CGSCC-NEXT: br label [[IF_MERGE]]
-; CGSCC: if.merge:
-; CGSCC-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
-; CGSCC: if.then2:
-; CGSCC-NEXT: store i32 2, ptr addrspace(3) @G, align 4
-; CGSCC-NEXT: call void @barrier()
-; CGSCC-NEXT: br label [[IF_END]]
-; CGSCC: if.end:
-; CGSCC-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1)
-; CGSCC-NEXT: ret void
+; CHECK: Function Attrs: norecurse
+; CHECK-LABEL: define {{[^@]+}}@kernel
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false)
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: store i32 1, ptr addrspace(3) @G, align 4
+; CHECK-NEXT: br label [[IF_MERGE:%.*]]
+; CHECK: if.else:
+; CHECK-NEXT: call void @barrier() #[[ATTR5:[0-9]+]]
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr addrspace(3) @G, align 4
+; CHECK-NEXT: call void @use1(i32 [[L]]) #[[ATTR5]]
+; CHECK-NEXT: br label [[IF_MERGE]]
+; CHECK: if.merge:
+; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
+; CHECK: if.then2:
+; CHECK-NEXT: store i32 2, ptr addrspace(3) @G, align 4
+; CHECK-NEXT: call void @barrier() #[[ATTR5]]
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1)
+; CHECK-NEXT: ret void
;
%call = call i32 @__kmpc_target_init(ptr undef, i8 1, i1 false)
%cmp = icmp eq i32 %call, -1
!2 = !{ptr @kernel, !"kernel", i32 1}
;.
-; TUNIT: attributes #[[ATTR0]] = { norecurse "kernel" }
-; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind }
-; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind }
-; TUNIT: attributes #[[ATTR3:[0-9]+]] = { nocallback }
-; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
-; TUNIT: attributes #[[ATTR5]] = { nounwind }
-;.
-; CGSCC: attributes #[[ATTR0]] = { norecurse "kernel" }
-; CGSCC: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind }
-; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind }
-; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback }
-; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+; CHECK: attributes #[[ATTR0]] = { norecurse "kernel" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+; CHECK: attributes #[[ATTR5]] = { nounwind }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
; CHECK: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
;.
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CGSCC: {{.*}}
+; TUNIT: {{.*}}