From 96709823ec37e98b16332f4ce19523b3f6798bdf Mon Sep 17 00:00:00 2001 From: Kuter Dinel Date: Sun, 27 Jun 2021 20:41:56 +0300 Subject: [PATCH] [AMDGPU] Deduce attributes with the Attributor This patch introduces a pass that uses the Attributor to deduce AMDGPU specific attributes. Reviewed By: jdoerfert, arsenm Differential Revision: https://reviews.llvm.org/D104997 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 21 +- llvm/lib/Target/AMDGPU/AMDGPU.h | 2 + llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 529 ++++++++++++++++++++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 + llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 + llvm/lib/Transforms/IPO/Attributor.cpp | 35 +- llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 30 +- .../CodeGen/AMDGPU/addrspacecast-constantexpr.ll | 13 +- .../AMDGPU/annotate-kernel-features-hsa-call.ll | 536 ++++++++++++++------- .../CodeGen/AMDGPU/annotate-kernel-features-hsa.ll | 46 +- .../CodeGen/AMDGPU/annotate-kernel-features.ll | 149 ++++-- llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll | 34 +- .../CodeGen/AMDGPU/duplicate-attribute-indirect.ll | 20 +- .../CodeGen/AMDGPU/pal-simple-indirect-call.ll | 28 +- llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll | 12 +- .../AMDGPU/uniform-work-group-attribute-missing.ll | 33 +- .../CodeGen/AMDGPU/uniform-work-group-multistep.ll | 157 ++++++ .../uniform-work-group-nested-function-calls.ll | 59 ++- ...orm-work-group-prevent-attribute-propagation.ll | 49 +- .../uniform-work-group-propagate-attribute.ll | 63 ++- .../AMDGPU/uniform-work-group-recursion-test.ll | 142 +++++- .../test/CodeGen/AMDGPU/uniform-work-group-test.ll | 65 ++- 22 files changed, 1645 insertions(+), 380 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp create mode 100644 llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index f234e45..4afa46e 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1593,6 +1593,13 @@ public: bool CheckBBLivenessOnly = false, DepClassTy DepClass = DepClassTy::OPTIONAL); + /// Return true if \p BB is assumed dead. + /// + /// If \p LivenessAA is not provided it is queried. + bool isAssumedDead(const BasicBlock &BB, const AbstractAttribute *QueryingAA, + const AAIsDead *FnLivenessAA, + DepClassTy DepClass = DepClassTy::OPTIONAL); + /// Check \p Pred on all (transitive) uses of \p V. /// /// This method will evaluate \p Pred on all (transitive) uses of the @@ -2470,7 +2477,8 @@ struct IntegerRangeState : public AbstractState { /// IRAttribute::manifest is defined in the Attributor.cpp. struct IRAttributeManifest { static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP, - const ArrayRef &DeducedAttrs); + const ArrayRef &DeducedAttrs, + bool ForceReplace = false); }; /// Helper to tie a abstract state implementation to an abstract attribute. @@ -2696,6 +2704,17 @@ struct AttributorCGSCCPass : public PassInfoMixin { Pass *createAttributorLegacyPass(); Pass *createAttributorCGSCCLegacyPass(); +/// Helper function to clamp a state \p S of type \p StateType with the +/// information in \p R and indicate/return if \p S did change (as-in update is +/// required to be run again). +template +ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R) { + auto Assumed = S.getAssumed(); + S ^= R; + return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; +} + /// ---------------------------------------------------------------------------- /// Abstract Attribute Classes /// ---------------------------------------------------------------------------- diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index c1bdc5e..ca088e6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -97,6 +97,8 @@ extern char &AMDGPUMachineCFGStructurizerID; void initializeAMDGPUAlwaysInlinePass(PassRegistry&); Pass *createAMDGPUAnnotateKernelFeaturesPass(); +Pass *createAMDGPUAttributorPass(); +void initializeAMDGPUAttributorPass(PassRegistry &); void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); extern char &AMDGPUAnnotateKernelFeaturesID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp new file mode 100644 index 0000000..7495b59 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -0,0 +1,529 @@ +//===- AMDGPUAttributor.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file This pass uses Attributor framework to deduce AMDGPU attributes. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/IntrinsicsR600.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/IPO/Attributor.h" + +#define DEBUG_TYPE "amdgpu-attributor" + +using namespace llvm; + +static constexpr StringLiteral ImplicitAttrNames[] = { + // X ids unnecessarily propagated to kernels. + "amdgpu-work-item-id-x", "amdgpu-work-item-id-y", + "amdgpu-work-item-id-z", "amdgpu-work-group-id-x", + "amdgpu-work-group-id-y", "amdgpu-work-group-id-z", + "amdgpu-dispatch-ptr", "amdgpu-dispatch-id", + "amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"}; + +// We do not need to note the x workitem or workgroup id because they are always +// initialized. +// +// TODO: We should not add the attributes if the known compile time workgroup +// size is 1 for y/z. +static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly, + bool &IsQueuePtr) { + switch (ID) { + case Intrinsic::amdgcn_workitem_id_x: + NonKernelOnly = true; + return "amdgpu-work-item-id-x"; + case Intrinsic::amdgcn_workgroup_id_x: + NonKernelOnly = true; + return "amdgpu-work-group-id-x"; + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::r600_read_tidig_y: + return "amdgpu-work-item-id-y"; + case Intrinsic::amdgcn_workitem_id_z: + case Intrinsic::r600_read_tidig_z: + return "amdgpu-work-item-id-z"; + case Intrinsic::amdgcn_workgroup_id_y: + case Intrinsic::r600_read_tgid_y: + return "amdgpu-work-group-id-y"; + case Intrinsic::amdgcn_workgroup_id_z: + case Intrinsic::r600_read_tgid_z: + return "amdgpu-work-group-id-z"; + case Intrinsic::amdgcn_dispatch_ptr: + return "amdgpu-dispatch-ptr"; + case Intrinsic::amdgcn_dispatch_id: + return "amdgpu-dispatch-id"; + case Intrinsic::amdgcn_kernarg_segment_ptr: + return "amdgpu-kernarg-segment-ptr"; + case Intrinsic::amdgcn_implicitarg_ptr: + return "amdgpu-implicitarg-ptr"; + case Intrinsic::amdgcn_queue_ptr: + case Intrinsic::amdgcn_is_shared: + case Intrinsic::amdgcn_is_private: + // TODO: Does not require queue ptr on gfx9+ + case Intrinsic::trap: + case Intrinsic::debugtrap: + IsQueuePtr = true; + return "amdgpu-queue-ptr"; + default: + return ""; + } +} + +static bool castRequiresQueuePtr(unsigned SrcAS) { + return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; +} + +static bool isDSAddress(const Constant *C) { + const GlobalValue *GV = dyn_cast(C); + if (!GV) + return false; + unsigned AS = GV->getAddressSpace(); + return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; +} + +class AMDGPUInformationCache : public InformationCache { +public: + AMDGPUInformationCache(const Module &M, AnalysisGetter &AG, + BumpPtrAllocator &Allocator, + SetVector *CGSCC, TargetMachine &TM) + : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {} + TargetMachine &TM; + + enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 }; + + /// Check if the subtarget has aperture regs. + bool hasApertureRegs(Function &F) { + const GCNSubtarget &ST = TM.getSubtarget(F); + return ST.hasApertureRegs(); + } + +private: + /// Check if the ConstantExpr \p CE requires queue ptr attribute. + static bool visitConstExpr(const ConstantExpr *CE) { + if (CE->getOpcode() == Instruction::AddrSpaceCast) { + unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); + return castRequiresQueuePtr(SrcAS); + } + return false; + } + + /// Get the constant access bitmap for \p C. + uint8_t getConstantAccess(const Constant *C) { + auto It = ConstantStatus.find(C); + if (It != ConstantStatus.end()) + return It->second; + + uint8_t Result = 0; + if (isDSAddress(C)) + Result = DS_GLOBAL; + + if (const auto *CE = dyn_cast(C)) + if (visitConstExpr(CE)) + Result |= ADDR_SPACE_CAST; + + for (const Use &U : C->operands()) { + const auto *OpC = dyn_cast(U); + if (!OpC) + continue; + + Result |= getConstantAccess(OpC); + } + return Result; + } + +public: + /// Returns true if \p Fn needs a queue ptr attribute because of \p C. + bool needsQueuePtr(const Constant *C, Function &Fn) { + bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv()); + bool HasAperture = hasApertureRegs(Fn); + + // No need to explore the constants. + if (!IsNonEntryFunc && HasAperture) + return false; + + uint8_t Access = getConstantAccess(C); + + // We need to trap on DS globals in non-entry functions. + if (IsNonEntryFunc && (Access & DS_GLOBAL)) + return true; + + return !HasAperture && (Access & ADDR_SPACE_CAST); + } + +private: + /// Used to determine if the Constant needs a queue ptr attribute. + DenseMap ConstantStatus; +}; + +struct AAAMDAttributes : public StateWrapper { + using Base = StateWrapper; + AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + + /// Create an abstract attribute view for the position \p IRP. + static AAAMDAttributes &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// See AbstractAttribute::getName(). + const std::string getName() const override { return "AAAMDAttributes"; } + + /// See AbstractAttribute::getIdAddr(). + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAAMDAttributes. + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + virtual const DenseSet &getAttributes() const = 0; + + /// Unique ID (due to the unique address) + static const char ID; +}; +const char AAAMDAttributes::ID = 0; + +struct AAAMDWorkGroupSize + : public StateWrapper { + using Base = StateWrapper; + AAAMDWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + + /// Create an abstract attribute view for the position \p IRP. + static AAAMDWorkGroupSize &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// See AbstractAttribute::getName(). + const std::string getName() const override { return "AAAMDWorkGroupSize"; } + + /// See AbstractAttribute::getIdAddr(). + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAAMDAttributes. + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + /// Unique ID (due to the unique address) + static const char ID; +}; +const char AAAMDWorkGroupSize::ID = 0; + +struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize { + AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A) + : AAAMDWorkGroupSize(IRP, A) {} + + void initialize(Attributor &A) override { + Function *F = getAssociatedFunction(); + CallingConv::ID CC = F->getCallingConv(); + + if (CC != CallingConv::AMDGPU_KERNEL) + return; + + bool InitialValue = false; + if (F->hasFnAttribute("uniform-work-group-size")) + InitialValue = F->getFnAttribute("uniform-work-group-size") + .getValueAsString() + .equals("true"); + + if (InitialValue) + indicateOptimisticFixpoint(); + else + indicatePessimisticFixpoint(); + } + + ChangeStatus updateImpl(Attributor &A) override { + Function *F = getAssociatedFunction(); + ChangeStatus Change = ChangeStatus::UNCHANGED; + + auto CheckCallSite = [&](AbstractCallSite CS) { + Function *Caller = CS.getInstruction()->getFunction(); + LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller->getName() + << "->" << F->getName() << "\n"); + + const auto &CallerInfo = A.getAAFor( + *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); + + Change = Change | clampStateAndIndicateChange(this->getState(), + CallerInfo.getState()); + + return true; + }; + + bool AllCallSitesKnown = true; + if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown)) + indicatePessimisticFixpoint(); + + return Change; + } + + ChangeStatus manifest(Attributor &A) override { + SmallVector AttrList; + LLVMContext &Ctx = getAssociatedFunction()->getContext(); + + AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size", + getAssumed() ? "true" : "false")); + return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, + /* ForceReplace */ true); + } + + bool isValidState() const override { + // This state is always valid, even when the state is false. + return true; + } + + const std::string getAsStr() const override { + return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]"; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} +}; + +AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP, + Attributor &A) { + if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) + return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A); + llvm_unreachable("AAAMDWorkGroupSize is only valid for function position"); +} + +struct AAAMDAttributesFunction : public AAAMDAttributes { + AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A) + : AAAMDAttributes(IRP, A) {} + + void initialize(Attributor &A) override { + Function *F = getAssociatedFunction(); + CallingConv::ID CC = F->getCallingConv(); + bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx); + + // Don't add attributes to instrinsics + if (F->isIntrinsic()) { + indicatePessimisticFixpoint(); + return; + } + + // Ignore functions with graphics calling conventions, these are currently + // not allowed to have kernel arguments. + if (AMDGPU::isGraphics(F->getCallingConv())) { + indicatePessimisticFixpoint(); + return; + } + + for (StringRef Attr : ImplicitAttrNames) { + if (F->hasFnAttribute(Attr)) + Attributes.insert(Attr); + } + + // TODO: We shouldn't need this in the future. + if (CallingConvSupportsAllImplicits && + F->hasAddressTaken(nullptr, true, true, true)) { + for (StringRef AttrName : ImplicitAttrNames) { + Attributes.insert(AttrName); + } + } + } + + ChangeStatus updateImpl(Attributor &A) override { + Function *F = getAssociatedFunction(); + ChangeStatus Change = ChangeStatus::UNCHANGED; + bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); + CallingConv::ID CC = F->getCallingConv(); + bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx); + auto &InfoCache = static_cast(A.getInfoCache()); + + auto AddAttribute = [&](StringRef AttrName) { + if (Attributes.insert(AttrName).second) + Change = ChangeStatus::CHANGED; + }; + + // Check for Intrinsics and propagate attributes. + const AACallEdges &AAEdges = A.getAAFor( + *this, this->getIRPosition(), DepClassTy::REQUIRED); + + // We have to assume that we can reach a function with these attributes. + // We do not consider inline assembly as a unknown callee. + if (CallingConvSupportsAllImplicits && AAEdges.hasNonAsmUnknownCallee()) { + for (StringRef AttrName : ImplicitAttrNames) { + AddAttribute(AttrName); + } + } + + bool NeedsQueuePtr = false; + bool HasCall = false; + for (Function *Callee : AAEdges.getOptimisticEdges()) { + Intrinsic::ID IID = Callee->getIntrinsicID(); + if (IID != Intrinsic::not_intrinsic) { + if (!IsNonEntryFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) { + AddAttribute("amdgpu-kernarg-segment-ptr"); + continue; + } + + bool NonKernelOnly = false; + StringRef AttrName = + intrinsicToAttrName(IID, NonKernelOnly, NeedsQueuePtr); + + if (!AttrName.empty() && (IsNonEntryFunc || !NonKernelOnly)) + AddAttribute(AttrName); + + continue; + } + + HasCall = true; + const AAAMDAttributes &AAAMD = A.getAAFor( + *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); + const DenseSet &CalleeAttributes = AAAMD.getAttributes(); + // Propagate implicit attributes from called function. + for (StringRef AttrName : ImplicitAttrNames) + if (CalleeAttributes.count(AttrName)) + AddAttribute(AttrName); + } + + HasCall |= AAEdges.hasUnknownCallee(); + if (!IsNonEntryFunc && HasCall) + AddAttribute("amdgpu-calls"); + + // Check the function body. + auto CheckAlloca = [&](Instruction &I) { + AddAttribute("amdgpu-stack-objects"); + return false; + }; + + bool UsedAssumedInformation = false; + A.checkForAllInstructions(CheckAlloca, *this, {Instruction::Alloca}, + UsedAssumedInformation); + + // If we found that we need amdgpu-queue-ptr, nothing else to do. + if (NeedsQueuePtr || Attributes.count("amdgpu-queue-ptr")) { + AddAttribute("amdgpu-queue-ptr"); + return Change; + } + + auto CheckAddrSpaceCasts = [&](Instruction &I) { + unsigned SrcAS = static_cast(I).getSrcAddressSpace(); + if (castRequiresQueuePtr(SrcAS)) { + NeedsQueuePtr = true; + return false; + } + return true; + }; + + bool HasApertureRegs = InfoCache.hasApertureRegs(*F); + + // `checkForAllInstructions` is much more cheaper than going through all + // instructions, try it first. + + // amdgpu-queue-ptr is not needed if aperture regs is present. + if (!HasApertureRegs) + A.checkForAllInstructions(CheckAddrSpaceCasts, *this, + {Instruction::AddrSpaceCast}, + UsedAssumedInformation); + + // If we found that we need amdgpu-queue-ptr, nothing else to do. + if (NeedsQueuePtr) { + AddAttribute("amdgpu-queue-ptr"); + return Change; + } + + if (!IsNonEntryFunc && HasApertureRegs) + return Change; + + for (BasicBlock &BB : *F) { + for (Instruction &I : BB) { + for (const Use &U : I.operands()) { + if (const auto *C = dyn_cast(U)) { + if (InfoCache.needsQueuePtr(C, *F)) { + AddAttribute("amdgpu-queue-ptr"); + return Change; + } + } + } + } + } + + return Change; + } + + ChangeStatus manifest(Attributor &A) override { + SmallVector AttrList; + LLVMContext &Ctx = getAssociatedFunction()->getContext(); + + for (StringRef AttrName : Attributes) + AttrList.push_back(Attribute::get(Ctx, AttrName)); + + return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList, + /* ForceReplace */ true); + } + + const std::string getAsStr() const override { + return "AMDInfo[" + std::to_string(Attributes.size()) + "]"; + } + + const DenseSet &getAttributes() const override { + return Attributes; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override {} + +private: + DenseSet Attributes; +}; + +AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP, + Attributor &A) { + if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION) + return *new (A.Allocator) AAAMDAttributesFunction(IRP, A); + llvm_unreachable("AAAMDAttributes is only valid for function position"); +} + +class AMDGPUAttributor : public ModulePass { +public: + AMDGPUAttributor() : ModulePass(ID) {} + + /// doInitialization - Virtual method overridden by subclasses to do + /// any necessary initialization before any pass is run. + bool doInitialization(Module &) override { + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + report_fatal_error("TargetMachine is required"); + + TM = &TPC->getTM(); + return false; + } + + bool runOnModule(Module &M) override { + SetVector Functions; + AnalysisGetter AG; + for (Function &F : M) + Functions.insert(&F); + + CallGraphUpdater CGUpdater; + BumpPtrAllocator Allocator; + AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM); + Attributor A(Functions, InfoCache, CGUpdater); + + for (Function &F : M) { + A.getOrCreateAAFor(IRPosition::function(F)); + A.getOrCreateAAFor(IRPosition::function(F)); + } + + ChangeStatus Change = A.run(); + return Change == ChangeStatus::CHANGED; + } + + StringRef getPassName() const override { return "AMDGPU Attributor"; } + TargetMachine *TM; + static char ID; +}; + +char AMDGPUAttributor::ID = 0; + +Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); } +INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index b40f7b0..e4485f8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -350,6 +350,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSILoadStoreOptimizerPass(*PR); initializeAMDGPUFixFunctionBitcastsPass(*PR); initializeAMDGPUAlwaysInlinePass(*PR); + initializeAMDGPUAttributorPass(*PR); initializeAMDGPUAnnotateKernelFeaturesPass(*PR); initializeAMDGPUAnnotateUniformValuesPass(*PR); initializeAMDGPUArgumentUsageInfoPass(*PR); diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index 7e0ffa0..a696834 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -44,6 +44,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUAliasAnalysis.cpp AMDGPUAlwaysInlinePass.cpp AMDGPUAnnotateKernelFeatures.cpp + AMDGPUAttributor.cpp AMDGPUAnnotateUniformValues.cpp AMDGPUArgumentUsageInfo.cpp AMDGPUAsmPrinter.cpp diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 1e4d990..bec41e5 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -293,12 +293,14 @@ static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) { /// attribute list \p Attrs. This is only the case if it was not already present /// in \p Attrs at the position describe by \p PK and \p AttrIdx. static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr, - AttributeList &Attrs, int AttrIdx) { + AttributeList &Attrs, int AttrIdx, + bool ForceReplace = false) { if (Attr.isEnumAttribute()) { Attribute::AttrKind Kind = Attr.getKindAsEnum(); if (Attrs.hasAttribute(AttrIdx, Kind)) - if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind))) + if (!ForceReplace && + isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind))) return false; Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr); return true; @@ -306,7 +308,8 @@ static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr, if (Attr.isStringAttribute()) { StringRef Kind = Attr.getKindAsString(); if (Attrs.hasAttribute(AttrIdx, Kind)) - if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind))) + if (!ForceReplace && + isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind))) return false; Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr); return true; @@ -314,7 +317,8 @@ static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr, if (Attr.isIntAttribute()) { Attribute::AttrKind Kind = Attr.getKindAsEnum(); if (Attrs.hasAttribute(AttrIdx, Kind)) - if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind))) + if (!ForceReplace && + isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind))) return false; Attrs = Attrs.removeAttribute(Ctx, AttrIdx, Kind); Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr); @@ -395,7 +399,8 @@ ChangeStatus AbstractAttribute::update(Attributor &A) { ChangeStatus IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP, - const ArrayRef &DeducedAttrs) { + const ArrayRef &DeducedAttrs, + bool ForceReplace) { Function *ScopeFn = IRP.getAnchorScope(); IRPosition::Kind PK = IRP.getPositionKind(); @@ -423,7 +428,7 @@ IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP, ChangeStatus HasChanged = ChangeStatus::UNCHANGED; LLVMContext &Ctx = IRP.getAnchorValue().getContext(); for (const Attribute &Attr : DeducedAttrs) { - if (!addIfNotExistent(Ctx, Attr, Attrs, IRP.getAttrIdx())) + if (!addIfNotExistent(Ctx, Attr, Attrs, IRP.getAttrIdx(), ForceReplace)) continue; HasChanged = ChangeStatus::CHANGED; @@ -894,6 +899,22 @@ bool Attributor::isAssumedDead(const IRPosition &IRP, return false; } +bool Attributor::isAssumedDead(const BasicBlock &BB, + const AbstractAttribute *QueryingAA, + const AAIsDead *FnLivenessAA, + DepClassTy DepClass) { + if (!FnLivenessAA) + FnLivenessAA = lookupAAFor(IRPosition::function(*BB.getParent()), + QueryingAA, DepClassTy::NONE); + if (FnLivenessAA->isAssumedDead(&BB)) { + if (QueryingAA) + recordDependence(*FnLivenessAA, *QueryingAA, DepClass); + return true; + } + + return false; +} + bool Attributor::checkForAllUses(function_ref Pred, const AbstractAttribute &QueryingAA, const Value &V, bool CheckBBLivenessOnly, @@ -2213,6 +2234,8 @@ void InformationCache::initializeInformationCache(const Function &CF, // The alignment of a pointer is interesting for loads. case Instruction::Store: // The alignment of a pointer is interesting for stores. + case Instruction::Alloca: + case Instruction::AddrSpaceCast: IsInterestingOpcode = true; } if (IsInterestingOpcode) { diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 1854ac3..d1605ab 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -146,6 +146,16 @@ PIPE_OPERATOR(AAFunctionReachability) PIPE_OPERATOR(AAPointerInfo) #undef PIPE_OPERATOR + +template <> +ChangeStatus clampStateAndIndicateChange(DerefState &S, + const DerefState &R) { + ChangeStatus CS0 = + clampStateAndIndicateChange(S.DerefBytesState, R.DerefBytesState); + ChangeStatus CS1 = clampStateAndIndicateChange(S.GlobalState, R.GlobalState); + return CS0 | CS1; +} + } // namespace llvm /// Get pointer operand of memory accessing instruction. If \p I is @@ -448,17 +458,6 @@ getBasePointerOfAccessPointerOperand(const Instruction *I, int64_t &BytesOffset, AllowNonInbounds); } -/// Helper function to clamp a state \p S of type \p StateType with the -/// information in \p R and indicate/return if \p S did change (as-in update is -/// required to be run again). -template -ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R) { - auto Assumed = S.getAssumed(); - S ^= R; - return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED - : ChangeStatus::CHANGED; -} - /// Clamp the information known for all returned values of a function /// (identified by \p QueryingAA) into \p S. template @@ -3942,15 +3941,6 @@ struct AAIsDeadCallSite final : AAIsDeadFunction { /// -------------------- Dereferenceable Argument Attribute -------------------- -template <> -ChangeStatus clampStateAndIndicateChange(DerefState &S, - const DerefState &R) { - ChangeStatus CS0 = - clampStateAndIndicateChange(S.DerefBytesState, R.DerefBytesState); - ChangeStatus CS1 = clampStateAndIndicateChange(S.GlobalState, R.GlobalState); - return CS0 | CS1; -} - struct AADereferenceableImpl : AADereferenceable { AADereferenceableImpl(const IRPosition &IRP, Attributor &A) : AADereferenceable(IRP, A) {} diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll index 6d1e45c..d235753 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i1) #0 @@ -167,7 +168,11 @@ define i32 addrspace(3)* @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 { attributes #0 = { argmemonly nounwind } attributes #1 = { nounwind } ;. -; HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn } -; HSA: attributes #[[ATTR1]] = { nounwind } -; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-queue-ptr" } +; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn } +; AKF_HSA: attributes #[[ATTR1]] = { nounwind } +; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-queue-ptr" } +;. +; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-queue-ptr" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll index 8cb30fe..827116e 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=HSA %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s declare i32 @llvm.amdgcn.workgroup.id.x() #0 declare i32 @llvm.amdgcn.workgroup.id.y() #0 @@ -140,152 +141,228 @@ define void @use_workgroup_id_y_workgroup_id_z() #1 { } define void @func_indirect_use_workitem_id_x() #1 { -; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_x -; HSA-SAME: () #[[ATTR1]] { -; HSA-NEXT: call void @use_workitem_id_x() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_x +; AKF_HSA-SAME: () #[[ATTR1]] { +; AKF_HSA-NEXT: call void @use_workitem_id_x() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_x +; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { +; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_x() #[[ATTR22:[0-9]+]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workitem_id_x() ret void } define void @kernel_indirect_use_workitem_id_x() #1 { -; HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workitem_id_x -; HSA-SAME: () #[[ATTR1]] { -; HSA-NEXT: call void @use_workitem_id_x() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workitem_id_x +; AKF_HSA-SAME: () #[[ATTR1]] { +; AKF_HSA-NEXT: call void @use_workitem_id_x() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workitem_id_x +; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] { +; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_x() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workitem_id_x() ret void } define void @func_indirect_use_workitem_id_y() #1 { -; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_y -; HSA-SAME: () #[[ATTR2]] { -; HSA-NEXT: call void @use_workitem_id_y() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_y +; AKF_HSA-SAME: () #[[ATTR2]] { +; AKF_HSA-NEXT: call void @use_workitem_id_y() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_y +; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] { +; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_y() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workitem_id_y() ret void } define void @func_indirect_use_workitem_id_z() #1 { -; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_z -; HSA-SAME: () #[[ATTR3]] { -; HSA-NEXT: call void @use_workitem_id_z() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_z +; AKF_HSA-SAME: () #[[ATTR3]] { +; AKF_HSA-NEXT: call void @use_workitem_id_z() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_z +; ATTRIBUTOR_HSA-SAME: () #[[ATTR3]] { +; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_z() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workitem_id_z() ret void } define void @func_indirect_use_workgroup_id_x() #1 { -; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_x -; HSA-SAME: () #[[ATTR4]] { -; HSA-NEXT: call void @use_workgroup_id_x() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_x +; AKF_HSA-SAME: () #[[ATTR4]] { +; AKF_HSA-NEXT: call void @use_workgroup_id_x() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_x +; ATTRIBUTOR_HSA-SAME: () #[[ATTR4]] { +; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_x() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workgroup_id_x() ret void } define void @kernel_indirect_use_workgroup_id_x() #1 { -; HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workgroup_id_x -; HSA-SAME: () #[[ATTR4]] { -; HSA-NEXT: call void @use_workgroup_id_x() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workgroup_id_x +; AKF_HSA-SAME: () #[[ATTR4]] { +; AKF_HSA-NEXT: call void @use_workgroup_id_x() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workgroup_id_x +; ATTRIBUTOR_HSA-SAME: () #[[ATTR4]] { +; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_x() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workgroup_id_x() ret void } define void @func_indirect_use_workgroup_id_y() #1 { -; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y -; HSA-SAME: () #[[ATTR5]] { -; HSA-NEXT: call void @use_workgroup_id_y() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y +; AKF_HSA-SAME: () #[[ATTR5]] { +; AKF_HSA-NEXT: call void @use_workgroup_id_y() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y +; ATTRIBUTOR_HSA-SAME: () #[[ATTR5]] { +; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_y() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workgroup_id_y() ret void } define void @func_indirect_use_workgroup_id_z() #1 { -; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_z -; HSA-SAME: () #[[ATTR6]] { -; HSA-NEXT: call void @use_workgroup_id_z() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_z +; AKF_HSA-SAME: () #[[ATTR6]] { +; AKF_HSA-NEXT: call void @use_workgroup_id_z() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_z +; ATTRIBUTOR_HSA-SAME: () #[[ATTR6]] { +; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_z() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_workgroup_id_z() ret void } define void @func_indirect_indirect_use_workgroup_id_y() #1 { -; HSA-LABEL: define {{[^@]+}}@func_indirect_indirect_use_workgroup_id_y -; HSA-SAME: () #[[ATTR5]] { -; HSA-NEXT: call void @func_indirect_use_workgroup_id_y() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_indirect_use_workgroup_id_y +; AKF_HSA-SAME: () #[[ATTR5]] { +; AKF_HSA-NEXT: call void @func_indirect_use_workgroup_id_y() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_indirect_use_workgroup_id_y +; ATTRIBUTOR_HSA-SAME: () #[[ATTR5]] { +; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_use_workgroup_id_y() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @func_indirect_use_workgroup_id_y() ret void } define void @indirect_x2_use_workgroup_id_y() #1 { -; HSA-LABEL: define {{[^@]+}}@indirect_x2_use_workgroup_id_y -; HSA-SAME: () #[[ATTR5]] { -; HSA-NEXT: call void @func_indirect_indirect_use_workgroup_id_y() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@indirect_x2_use_workgroup_id_y +; AKF_HSA-SAME: () #[[ATTR5]] { +; AKF_HSA-NEXT: call void @func_indirect_indirect_use_workgroup_id_y() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_x2_use_workgroup_id_y +; ATTRIBUTOR_HSA-SAME: () #[[ATTR5]] { +; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_indirect_use_workgroup_id_y() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @func_indirect_indirect_use_workgroup_id_y() ret void } define void @func_indirect_use_dispatch_ptr() #1 { -; HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr -; HSA-SAME: () #[[ATTR7]] { -; HSA-NEXT: call void @use_dispatch_ptr() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr +; AKF_HSA-SAME: () #[[ATTR7]] { +; AKF_HSA-NEXT: call void @use_dispatch_ptr() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr +; ATTRIBUTOR_HSA-SAME: () #[[ATTR7]] { +; ATTRIBUTOR_HSA-NEXT: call void @use_dispatch_ptr() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_dispatch_ptr() ret void } define void @func_indirect_use_queue_ptr() #1 { -; HSA-LABEL: define {{[^@]+}}@func_indirect_use_queue_ptr -; HSA-SAME: () #[[ATTR8]] { -; HSA-NEXT: call void @use_queue_ptr() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_queue_ptr +; AKF_HSA-SAME: () #[[ATTR8]] { +; AKF_HSA-NEXT: call void @use_queue_ptr() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_queue_ptr +; ATTRIBUTOR_HSA-SAME: () #[[ATTR8]] { +; ATTRIBUTOR_HSA-NEXT: call void @use_queue_ptr() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_queue_ptr() ret void } define void @func_indirect_use_dispatch_id() #1 { -; HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_id -; HSA-SAME: () #[[ATTR9]] { -; HSA-NEXT: call void @use_dispatch_id() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_id +; AKF_HSA-SAME: () #[[ATTR9]] { +; AKF_HSA-NEXT: call void @use_dispatch_id() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_id +; ATTRIBUTOR_HSA-SAME: () #[[ATTR9]] { +; ATTRIBUTOR_HSA-NEXT: call void @use_dispatch_id() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_dispatch_id() ret void } define void @func_indirect_use_workgroup_id_y_workgroup_id_z() #1 { -; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y_workgroup_id_z -; HSA-SAME: () #[[ATTR11:[0-9]+]] { -; HSA-NEXT: call void @func_indirect_use_workgroup_id_y_workgroup_id_z() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y_workgroup_id_z +; AKF_HSA-SAME: () #[[ATTR11:[0-9]+]] { +; AKF_HSA-NEXT: call void @func_indirect_use_workgroup_id_y_workgroup_id_z() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y_workgroup_id_z +; ATTRIBUTOR_HSA-SAME: () #[[ATTR11:[0-9]+]] { +; ATTRIBUTOR_HSA-NEXT: unreachable ; call void @func_indirect_use_workgroup_id_y_workgroup_id_z() ret void } define void @recursive_use_workitem_id_y() #1 { -; HSA-LABEL: define {{[^@]+}}@recursive_use_workitem_id_y -; HSA-SAME: () #[[ATTR2]] { -; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() -; HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4 -; HSA-NEXT: call void @recursive_use_workitem_id_y() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@recursive_use_workitem_id_y +; AKF_HSA-SAME: () #[[ATTR2]] { +; AKF_HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; AKF_HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4 +; AKF_HSA-NEXT: call void @recursive_use_workitem_id_y() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@recursive_use_workitem_id_y +; ATTRIBUTOR_HSA-SAME: () #[[ATTR12:[0-9]+]] { +; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; ATTRIBUTOR_HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4 +; ATTRIBUTOR_HSA-NEXT: call void @recursive_use_workitem_id_y() #[[ATTR23:[0-9]+]] +; ATTRIBUTOR_HSA-NEXT: unreachable ; %val = call i32 @llvm.amdgcn.workitem.id.y() store volatile i32 %val, i32 addrspace(1)* undef @@ -294,10 +371,15 @@ define void @recursive_use_workitem_id_y() #1 { } define void @call_recursive_use_workitem_id_y() #1 { -; HSA-LABEL: define {{[^@]+}}@call_recursive_use_workitem_id_y -; HSA-SAME: () #[[ATTR2]] { -; HSA-NEXT: call void @recursive_use_workitem_id_y() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@call_recursive_use_workitem_id_y +; AKF_HSA-SAME: () #[[ATTR2]] { +; AKF_HSA-NEXT: call void @recursive_use_workitem_id_y() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@call_recursive_use_workitem_id_y +; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] { +; ATTRIBUTOR_HSA-NEXT: call void @recursive_use_workitem_id_y() #[[ATTR23]] +; ATTRIBUTOR_HSA-NEXT: unreachable ; call void @recursive_use_workitem_id_y() ret void @@ -315,12 +397,19 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 { ret void } + define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 { -; HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9 -; HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] { -; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* -; HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9 +; AKF_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] { +; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* +; AKF_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9 +; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR13:[0-9]+]] { +; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* +; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 +; ATTRIBUTOR_HSA-NEXT: ret void ; %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* store volatile i32 0, i32 addrspace(4)* %stof @@ -328,12 +417,19 @@ define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 { } define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #2 { -; HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9 -; HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR13:[0-9]+]] { -; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* -; HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 -; HSA-NEXT: call void @func_indirect_use_queue_ptr() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9 +; AKF_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR13:[0-9]+]] { +; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* +; AKF_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 +; AKF_HSA-NEXT: call void @func_indirect_use_queue_ptr() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9 +; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR14:[0-9]+]] { +; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)* +; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4 +; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_use_queue_ptr() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* store volatile i32 0, i32 addrspace(4)* %stof @@ -342,63 +438,94 @@ define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %p } define void @indirect_use_group_to_flat_addrspacecast() #1 { -; HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast -; HSA-SAME: () #[[ATTR8]] { -; HSA-NEXT: call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null) -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast +; AKF_HSA-SAME: () #[[ATTR8]] { +; AKF_HSA-NEXT: call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null) +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast +; ATTRIBUTOR_HSA-SAME: () #[[ATTR8]] { +; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null) #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null) ret void } define void @indirect_use_group_to_flat_addrspacecast_gfx9() #1 { -; HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_gfx9 -; HSA-SAME: () #[[ATTR11]] { -; HSA-NEXT: call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null) -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_gfx9 +; AKF_HSA-SAME: () #[[ATTR11]] { +; AKF_HSA-NEXT: call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null) +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_gfx9 +; ATTRIBUTOR_HSA-SAME: () #[[ATTR15:[0-9]+]] { +; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null) #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null) ret void } define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() #1 { -; HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9 -; HSA-SAME: () #[[ATTR8]] { -; HSA-NEXT: call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null) -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9 +; AKF_HSA-SAME: () #[[ATTR8]] { +; AKF_HSA-NEXT: call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null) +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9 +; ATTRIBUTOR_HSA-SAME: () #[[ATTR8]] { +; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null) #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null) ret void } define void @use_kernarg_segment_ptr() #1 { -; HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr -; HSA-SAME: () #[[ATTR14:[0-9]+]] { -; HSA-NEXT: [[KERNARG_SEGMENT_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() -; HSA-NEXT: store volatile i8 addrspace(4)* [[KERNARG_SEGMENT_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr +; AKF_HSA-SAME: () #[[ATTR14:[0-9]+]] { +; AKF_HSA-NEXT: [[KERNARG_SEGMENT_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; AKF_HSA-NEXT: store volatile i8 addrspace(4)* [[KERNARG_SEGMENT_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr +; ATTRIBUTOR_HSA-SAME: () #[[ATTR16:[0-9]+]] { +; ATTRIBUTOR_HSA-NEXT: [[KERNARG_SEGMENT_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() +; ATTRIBUTOR_HSA-NEXT: store volatile i8 addrspace(4)* [[KERNARG_SEGMENT_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 +; ATTRIBUTOR_HSA-NEXT: ret void ; %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() store volatile i8 addrspace(4)* %kernarg.segment.ptr, i8 addrspace(4)* addrspace(1)* undef ret void } - define void @func_indirect_use_kernarg_segment_ptr() #1 { -; HSA-LABEL: define {{[^@]+}}@func_indirect_use_kernarg_segment_ptr -; HSA-SAME: () #[[ATTR11]] { -; HSA-NEXT: call void @use_kernarg_segment_ptr() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_kernarg_segment_ptr +; AKF_HSA-SAME: () #[[ATTR11]] { +; AKF_HSA-NEXT: call void @use_kernarg_segment_ptr() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_kernarg_segment_ptr +; ATTRIBUTOR_HSA-SAME: () #[[ATTR15]] { +; ATTRIBUTOR_HSA-NEXT: call void @use_kernarg_segment_ptr() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_kernarg_segment_ptr() ret void } define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 { -; HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr -; HSA-SAME: () #[[ATTR15:[0-9]+]] { -; HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() -; HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr +; AKF_HSA-SAME: () #[[ATTR15:[0-9]+]] { +; AKF_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() +; AKF_HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr +; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] { +; ATTRIBUTOR_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() +; ATTRIBUTOR_HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 +; ATTRIBUTOR_HSA-NEXT: ret void ; %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef @@ -406,11 +533,17 @@ define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 { } define void @use_implicitarg_ptr() #1 { -; HSA-LABEL: define {{[^@]+}}@use_implicitarg_ptr -; HSA-SAME: () #[[ATTR16:[0-9]+]] { -; HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() -; HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@use_implicitarg_ptr +; AKF_HSA-SAME: () #[[ATTR16:[0-9]+]] { +; AKF_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() +; AKF_HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_implicitarg_ptr +; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] { +; ATTRIBUTOR_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() +; ATTRIBUTOR_HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 +; ATTRIBUTOR_HSA-NEXT: ret void ; %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef @@ -418,10 +551,15 @@ define void @use_implicitarg_ptr() #1 { } define void @func_indirect_use_implicitarg_ptr() #1 { -; HSA-LABEL: define {{[^@]+}}@func_indirect_use_implicitarg_ptr -; HSA-SAME: () #[[ATTR16]] { -; HSA-NEXT: call void @use_implicitarg_ptr() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_implicitarg_ptr +; AKF_HSA-SAME: () #[[ATTR16]] { +; AKF_HSA-NEXT: call void @use_implicitarg_ptr() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_implicitarg_ptr +; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] { +; ATTRIBUTOR_HSA-NEXT: call void @use_implicitarg_ptr() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @use_implicitarg_ptr() ret void @@ -429,70 +567,99 @@ define void @func_indirect_use_implicitarg_ptr() #1 { declare void @external.func() #3 +; This function gets deleted. define internal void @defined.func() #3 { -; HSA-LABEL: define {{[^@]+}}@defined.func -; HSA-SAME: () #[[ATTR17:[0-9]+]] { -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@defined.func +; AKF_HSA-SAME: () #[[ATTR17:[0-9]+]] { +; AKF_HSA-NEXT: ret void ; ret void } define void @func_call_external() #3 { -; HSA-LABEL: define {{[^@]+}}@func_call_external -; HSA-SAME: () #[[ATTR17]] { -; HSA-NEXT: call void @external.func() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@func_call_external +; AKF_HSA-SAME: () #[[ATTR17]] { +; AKF_HSA-NEXT: call void @external.func() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_external +; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] { +; ATTRIBUTOR_HSA-NEXT: call void @external.func() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @external.func() ret void } define void @func_call_defined() #3 { -; HSA-LABEL: define {{[^@]+}}@func_call_defined -; HSA-SAME: () #[[ATTR17]] { -; HSA-NEXT: call void @defined.func() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@func_call_defined +; AKF_HSA-SAME: () #[[ATTR17]] { +; AKF_HSA-NEXT: call void @defined.func() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_defined +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] { +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @defined.func() ret void } - define void @func_call_asm() #3 { -; HSA-LABEL: define {{[^@]+}}@func_call_asm -; HSA-SAME: () #[[ATTR18:[0-9]+]] { -; HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR18]] -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@func_call_asm +; AKF_HSA-SAME: () #[[ATTR18:[0-9]+]] { +; AKF_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR18]] +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] { +; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void asm sideeffect "", ""() #3 ret void } define amdgpu_kernel void @kern_call_external() #3 { -; HSA-LABEL: define {{[^@]+}}@kern_call_external -; HSA-SAME: () #[[ATTR19:[0-9]+]] { -; HSA-NEXT: call void @external.func() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@kern_call_external +; AKF_HSA-SAME: () #[[ATTR19:[0-9]+]] { +; AKF_HSA-NEXT: call void @external.func() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_external +; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] { +; ATTRIBUTOR_HSA-NEXT: call void @external.func() #[[ATTR22]] +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @external.func() ret void } define amdgpu_kernel void @func_kern_defined() #3 { -; HSA-LABEL: define {{[^@]+}}@func_kern_defined -; HSA-SAME: () #[[ATTR19]] { -; HSA-NEXT: call void @defined.func() -; HSA-NEXT: ret void +; AKF_HSA-LABEL: define {{[^@]+}}@func_kern_defined +; AKF_HSA-SAME: () #[[ATTR19]] { +; AKF_HSA-NEXT: call void @defined.func() +; AKF_HSA-NEXT: ret void +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_kern_defined +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] { +; ATTRIBUTOR_HSA-NEXT: ret void ; call void @defined.func() ret void } define i32 @use_dispatch_ptr_ret_type() #1 { -; HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type -; HSA-SAME: () #[[ATTR20:[0-9]+]] { -; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() -; HSA-NEXT: store volatile i8 addrspace(4)* [[DISPATCH_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 -; HSA-NEXT: ret i32 0 +; AKF_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type +; AKF_HSA-SAME: () #[[ATTR20:[0-9]+]] { +; AKF_HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() +; AKF_HSA-NEXT: store volatile i8 addrspace(4)* [[DISPATCH_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 +; AKF_HSA-NEXT: ret i32 0 +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type +; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] { +; ATTRIBUTOR_HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() +; ATTRIBUTOR_HSA-NEXT: store volatile i8 addrspace(4)* [[DISPATCH_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8 +; ATTRIBUTOR_HSA-NEXT: ret i32 0 ; %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() store volatile i8 addrspace(4)* %dispatch.ptr, i8 addrspace(4)* addrspace(1)* undef @@ -500,11 +667,17 @@ define i32 @use_dispatch_ptr_ret_type() #1 { } define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 { -; HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func -; HSA-SAME: () #[[ATTR20]] { -; HSA-NEXT: [[F:%.*]] = call float bitcast (i32 ()* @use_dispatch_ptr_ret_type to float ()*)() -; HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 -; HSA-NEXT: ret float [[FADD]] +; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func +; AKF_HSA-SAME: () #[[ATTR20]] { +; AKF_HSA-NEXT: [[F:%.*]] = call float bitcast (i32 ()* @use_dispatch_ptr_ret_type to float ()*)() +; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 +; AKF_HSA-NEXT: ret float [[FADD]] +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func +; ATTRIBUTOR_HSA-SAME: () #[[ATTR21]] { +; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float bitcast (i32 ()* @use_dispatch_ptr_ret_type to float ()*)() +; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 +; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]] ; %f = call float bitcast (i32()* @use_dispatch_ptr_ret_type to float()*)() %fadd = fadd float %f, 1.0 @@ -517,25 +690,50 @@ attributes #2 = { nounwind "target-cpu"="gfx900" } attributes #3 = { nounwind } ;. -; HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } -; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" } -; HSA: attributes #[[ATTR11]] = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #[[ATTR12]] = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" "uniform-work-group-size"="false" } -; HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #[[ATTR15]] = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" } -; HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } -; HSA: attributes #[[ATTR17]] = { nounwind "uniform-work-group-size"="false" } -; HSA: attributes #[[ATTR18]] = { nounwind } -; HSA: attributes #[[ATTR19]] = { nounwind "amdgpu-calls" "uniform-work-group-size"="false" } -; HSA: attributes #[[ATTR20]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" } +; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; AKF_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; AKF_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; AKF_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; AKF_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; AKF_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; AKF_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; AKF_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; AKF_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; AKF_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" } +; AKF_HSA: attributes #[[ATTR11]] = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" } +; AKF_HSA: attributes #[[ATTR12]] = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; AKF_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; AKF_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; AKF_HSA: attributes #[[ATTR15]] = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" } +; AKF_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; AKF_HSA: attributes #[[ATTR17]] = { nounwind "uniform-work-group-size"="false" } +; AKF_HSA: attributes #[[ATTR18]] = { nounwind } +; AKF_HSA: attributes #[[ATTR19]] = { nounwind "amdgpu-calls" "uniform-work-group-size"="false" } +; AKF_HSA: attributes #[[ATTR20]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" } +;. +; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { noreturn nounwind readnone "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { noreturn nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { nounwind } +; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { noreturn nounwind } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll index 49a4097..7f940ca 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" @@ -478,18 +479,33 @@ attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind } ;. -; HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } -; HSA: attributes #[[ATTR1]] = { nounwind } -; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" } -; HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" } -; HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" } -; HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" } -; HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" } -; HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" } -; HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } -; HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } -; HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" } -; HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" } -; HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-kernarg-segment-ptr" } -; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-stack-objects" } +; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; AKF_HSA: attributes #[[ATTR1]] = { nounwind } +; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" } +; AKF_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" } +; AKF_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" } +; AKF_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" } +; AKF_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" } +; AKF_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" } +; AKF_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +; AKF_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +; AKF_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" } +; AKF_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" } +; AKF_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-kernarg-segment-ptr" } +; AKF_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-stack-objects" } +;. +; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-kernarg-segment-ptr" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-stack-objects" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll index fd4b89a..df765fb 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll @@ -1,5 +1,6 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals +; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s +; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s declare i32 @llvm.r600.read.tgid.x() #0 declare i32 @llvm.r600.read.tgid.y() #0 @@ -14,9 +15,10 @@ declare i32 @llvm.r600.read.local.size.y() #0 declare i32 @llvm.r600.read.local.size.z() #0 define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @use_tgid_x( +; CHECK-LABEL: define {{[^@]+}}@use_tgid_x +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.x() -; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: ret void ; %val = call i32 @llvm.r600.read.tgid.x() @@ -25,9 +27,10 @@ define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { } define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @use_tgid_y( +; CHECK-LABEL: define {{[^@]+}}@use_tgid_y +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.y() -; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: ret void ; %val = call i32 @llvm.r600.read.tgid.y() @@ -36,9 +39,10 @@ define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 { } define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @multi_use_tgid_y( +; CHECK-LABEL: define {{[^@]+}}@multi_use_tgid_y +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.y() -; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y() ; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: ret void @@ -51,10 +55,11 @@ define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 { } define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @use_tgid_x_y( +; CHECK-LABEL: define {{[^@]+}}@use_tgid_x_y +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x() ; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y() -; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -66,9 +71,10 @@ define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 { } define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @use_tgid_z( +; CHECK-LABEL: define {{[^@]+}}@use_tgid_z +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.z() -; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: ret void ; %val = call i32 @llvm.r600.read.tgid.z() @@ -77,10 +83,11 @@ define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 { } define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @use_tgid_x_z( +; CHECK-LABEL: define {{[^@]+}}@use_tgid_x_z +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x() ; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.z() -; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -92,10 +99,11 @@ define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 { } define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @use_tgid_y_z( +; CHECK-LABEL: define {{[^@]+}}@use_tgid_y_z +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.y() ; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.z() -; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -107,11 +115,12 @@ define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 { } define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @use_tgid_x_y_z( +; CHECK-LABEL: define {{[^@]+}}@use_tgid_x_y_z +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x() ; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y() ; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.r600.read.tgid.z() -; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: ret void @@ -126,9 +135,10 @@ define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 { } define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @use_tidig_x( +; CHECK-LABEL: define {{[^@]+}}@use_tidig_x +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.x() -; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: ret void ; %val = call i32 @llvm.r600.read.tidig.x() @@ -137,9 +147,10 @@ define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { } define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @use_tidig_y( +; CHECK-LABEL: define {{[^@]+}}@use_tidig_y +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.y() -; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: ret void ; %val = call i32 @llvm.r600.read.tidig.y() @@ -148,9 +159,10 @@ define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 { } define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @use_tidig_z( +; CHECK-LABEL: define {{[^@]+}}@use_tidig_z +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.z() -; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: ret void ; %val = call i32 @llvm.r600.read.tidig.z() @@ -159,10 +171,11 @@ define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 { } define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @use_tidig_x_tgid_x( +; CHECK-LABEL: define {{[^@]+}}@use_tidig_x_tgid_x +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x() ; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.x() -; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -174,10 +187,11 @@ define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { } define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @use_tidig_y_tgid_y( +; CHECK-LABEL: define {{[^@]+}}@use_tidig_y_tgid_y +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR7:[0-9]+]] { ; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.y() ; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y() -; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: ret void ; @@ -189,11 +203,12 @@ define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 { } define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @use_tidig_x_y_z( +; CHECK-LABEL: define {{[^@]+}}@use_tidig_x_y_z +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR8:[0-9]+]] { ; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x() ; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tidig.y() ; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.r600.read.tidig.z() -; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: ret void @@ -208,14 +223,15 @@ define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 { } define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @use_all_workitems( +; CHECK-LABEL: define {{[^@]+}}@use_all_workitems +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR9:[0-9]+]] { ; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x() ; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tidig.y() ; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.r600.read.tidig.z() ; CHECK-NEXT: [[VAL3:%.*]] = call i32 @llvm.r600.read.tgid.x() ; CHECK-NEXT: [[VAL4:%.*]] = call i32 @llvm.r600.read.tgid.y() ; CHECK-NEXT: [[VAL5:%.*]] = call i32 @llvm.r600.read.tgid.z() -; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: store volatile i32 [[VAL3]], i32 addrspace(1)* [[PTR]], align 4 @@ -239,9 +255,10 @@ define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 { } define amdgpu_kernel void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @use_get_local_size_x( +; CHECK-LABEL: define {{[^@]+}}@use_get_local_size_x +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.x() -; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: ret void ; %val = call i32 @llvm.r600.read.local.size.x() @@ -250,9 +267,10 @@ define amdgpu_kernel void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 { } define amdgpu_kernel void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @use_get_local_size_y( +; CHECK-LABEL: define {{[^@]+}}@use_get_local_size_y +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.y() -; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: ret void ; %val = call i32 @llvm.r600.read.local.size.y() @@ -261,9 +279,10 @@ define amdgpu_kernel void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 { } define amdgpu_kernel void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 { -; CHECK-LABEL: @use_get_local_size_z( +; CHECK-LABEL: define {{[^@]+}}@use_get_local_size_z +; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.z() -; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4 +; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4 ; CHECK-NEXT: ret void ; %val = call i32 @llvm.r600.read.local.size.z() @@ -274,14 +293,46 @@ define amdgpu_kernel void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 { attributes #0 = { nounwind readnone } attributes #1 = { nounwind } -; HSA: attributes #0 = { nounwind readnone } -; HSA: attributes #1 = { nounwind } -; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" } -; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" } -; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" } -; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" } -; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" } -; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" } -; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } -; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } -; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" } +; ALL: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn "uniform-work-group-size"="false" } +; ALL: attributes #[[ATTR1]] = { nounwind "uniform-work-group-size"="false" } +; ALL: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" "uniform-work-group-size"="false" } +; ALL: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" "uniform-work-group-size"="false" } +; ALL: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "uniform-work-group-size"="false" } +; ALL: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" "uniform-work-group-size"="false" } +; ALL: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; ALL: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" "uniform-work-group-size"="false" } +; ALL: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; ALL: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; NOHSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn "uniform-work-group-size"="false" } +; NOHSA: attributes #[[ATTR1]] = { nounwind "uniform-work-group-size"="false" } +; NOHSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" "uniform-work-group-size"="false" } +; NOHSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" "uniform-work-group-size"="false" } +; NOHSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "uniform-work-group-size"="false" } +; NOHSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" "uniform-work-group-size"="false" } +; NOHSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; NOHSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" "uniform-work-group-size"="false" } +; NOHSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; NOHSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +;. +; AKF_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; AKF_CHECK: attributes #[[ATTR1]] = { nounwind } +; AKF_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" } +; AKF_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" } +; AKF_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" } +; AKF_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" } +; AKF_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" } +; AKF_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" } +; AKF_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +; AKF_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +;. +; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +;. diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll index 3dff193..37fc20e 100644 --- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=GCN,AKF_GCN %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor < %s | FileCheck -check-prefixes=GCN,ATTRIBUTOR_GCN %s define internal void @indirect() { ; GCN-LABEL: define {{[^@]+}}@indirect @@ -10,13 +11,20 @@ define internal void @indirect() { } define internal void @direct() { -; GCN-LABEL: define {{[^@]+}}@direct -; GCN-SAME: () #[[ATTR1:[0-9]+]] { -; GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8 -; GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8 -; GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8 -; GCN-NEXT: call void [[FP]]() -; GCN-NEXT: ret void +; AKF_GCN-LABEL: define {{[^@]+}}@direct +; AKF_GCN-SAME: () #[[ATTR1:[0-9]+]] { +; AKF_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8 +; AKF_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8 +; AKF_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8 +; AKF_GCN-NEXT: call void [[FP]]() +; AKF_GCN-NEXT: ret void +; +; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@direct +; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8 +; ATTRIBUTOR_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8 +; ATTRIBUTOR_GCN-NEXT: call void @indirect() +; ATTRIBUTOR_GCN-NEXT: ret void ; %fptr = alloca void()* store void()* @indirect, void()** %fptr @@ -35,7 +43,11 @@ define amdgpu_kernel void @test_direct_indirect_call() { ret void } ;. -; GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } -; GCN: attributes #[[ATTR1]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } -; GCN: attributes #[[ATTR2]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +; AKF_GCN: attributes #[[ATTR1]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; AKF_GCN: attributes #[[ATTR2]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +;. +; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN: attributes #[[ATTR2]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll index d98564a..bb041d0 100644 --- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll +++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll @@ -1,11 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefixes=GCN,AKF_GCN %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefixes=GCN,ATTRIBUTOR_GCN %s define internal void @indirect() { ; GCN-LABEL: define {{[^@]+}}@indirect ; GCN-SAME: () #[[ATTR0:[0-9]+]] { ; GCN-NEXT: ret void ; +; CHECK-LABEL: define {{[^@]+}}@indirect +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret void ret void } @@ -18,6 +22,13 @@ define amdgpu_kernel void @test_simple_indirect_call() #0 { ; GCN-NEXT: call void [[FP]]() ; GCN-NEXT: ret void ; +; CHECK-LABEL: define {{[^@]+}}@test_simple_indirect_call +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8 +; CHECK-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8 +; CHECK-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8 +; CHECK-NEXT: call void [[FP]]() +; CHECK-NEXT: ret void %fptr = alloca void()* store void()* @indirect, void()** %fptr %fp = load void()*, void()** %fptr @@ -28,6 +39,9 @@ define amdgpu_kernel void @test_simple_indirect_call() #0 { attributes #0 = { "amdgpu-dispatch-id" } ;. -; GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } -; GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +; AKF_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +;. +; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll index e5f0a1d..a92f0e2 100644 --- a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll @@ -1,6 +1,7 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; Check that no attributes are added to graphics functions -; RUN: opt -S -mtriple=amdgcn-amd-amdpal -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s +; RUN: opt -S -mtriple=amdgcn-amd-amdpal -amdgpu-annotate-kernel-features %s | FileCheck -check-prefixes=AKF_GCN %s +; RUN: opt -S -mtriple=amdgcn-amd-amdpal -amdgpu-attributor %s | FileCheck -check-prefixes=ATTRIBUTOR_GCN %s ; Check that it doesn't crash ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s @@ -9,8 +10,22 @@ target datalayout = "A5" + define amdgpu_cs void @test_simple_indirect_call() { -; GCN-LABEL: define amdgpu_cs void @test_simple_indirect_call() { +; AKF_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call() { +; AKF_GCN-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; AKF_GCN-NEXT: [[FUN:%.*]] = inttoptr i64 [[PC]] to void ()* +; AKF_GCN-NEXT: call amdgpu_gfx void [[FUN]]() +; AKF_GCN-NEXT: ret void +; +; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call +; ATTRIBUTOR_GCN-SAME: () #[[ATTR0:[0-9]+]] { +; ATTRIBUTOR_GCN-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; ATTRIBUTOR_GCN-NEXT: [[FUN:%.*]] = inttoptr i64 [[PC]] to void ()* +; ATTRIBUTOR_GCN-NEXT: call amdgpu_gfx void [[FUN]]() +; ATTRIBUTOR_GCN-NEXT: ret void +; +; Attributor adds work-group-size attribute. This should be ok. ; GFX9-LABEL: test_simple_indirect_call: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_getpc_b64 s[36:37] @@ -25,7 +40,6 @@ define amdgpu_cs void @test_simple_indirect_call() { ; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm -; ; GFX10-LABEL: test_simple_indirect_call: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_getpc_b64 s[36:37] @@ -53,3 +67,9 @@ define amdgpu_cs void @test_simple_indirect_call() { declare i64 @llvm.amdgcn.s.getpc() #0 attributes #0 = { nounwind readnone speculatable willreturn } +;. +; AKF_GCN: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +;. +; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN: attributes #[[ATTR1:[0-9]+]] = { nounwind readnone speculatable willreturn "uniform-work-group-size"="false" } +;. diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll index 6eda838..ce25bb5 100644 --- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefixes=GCN,AKF_GCN %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefixes=GCN,ATTRIBUTOR_GCN %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s @@ -59,9 +60,10 @@ define amdgpu_kernel void @test_simple_indirect_call() { ret void } -; attributes #0 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } -; attributes #1 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ;. -; GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } -; GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +; AKF_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } +;. +; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll index a9e245b..84a96d9 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll @@ -1,29 +1,46 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s ; If the kernel does not have the uniform-work-group-attribute, set both callee and caller as false +; We write to a global so that the attributor don't deletes the function. +@x = global i32 0 + +;. +; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0 +;. define void @foo() #0 { ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: store i32 0, i32* @x, align 4 ; CHECK-NEXT: ret void ; + store i32 0, i32* @x ret void } define amdgpu_kernel void @kernel1() #1 { -; CHECK-LABEL: define {{[^@]+}}@kernel1 -; CHECK-SAME: () #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: call void @foo() -; CHECK-NEXT: ret void +; AKF_CHECK-LABEL: define {{[^@]+}}@kernel1 +; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; AKF_CHECK-NEXT: call void @foo() +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel1 +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_CHECK-NEXT: call void @foo() #[[ATTR2:[0-9]+]] +; ATTRIBUTOR_CHECK-NEXT: ret void ; call void @foo() ret void } attributes #0 = { "uniform-work-group-size"="true" } - ;. -; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } +; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +;. +; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind writeonly } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll new file mode 100644 index 0000000..fe7d1d942 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll @@ -0,0 +1,157 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals +; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s +; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s + +;. +; CHECK: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global i32* null +; CHECK: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = global i32 0 +;. +define weak void @weak() { +; AKF_CHECK-LABEL: define {{[^@]+}}@weak +; AKF_CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; AKF_CHECK-NEXT: call void @internal1() +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@weak +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; ATTRIBUTOR_CHECK-NEXT: call void @internal1() #[[ATTR5:[0-9]+]] +; ATTRIBUTOR_CHECK-NEXT: ret void +; + call void @internal1() + ret void +} + +@G1 = global i32* null + +define internal void @internal1() { +; AKF_CHECK-LABEL: define {{[^@]+}}@internal1 +; AKF_CHECK-SAME: () #[[ATTR0]] { +; AKF_CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** @G1, align 8 +; AKF_CHECK-NEXT: store i32 0, i32* [[TMP1]], align 4 +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@internal1 +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** @G1, align 8 +; ATTRIBUTOR_CHECK-NEXT: store i32 0, i32* [[TMP1]], align 4 +; ATTRIBUTOR_CHECK-NEXT: ret void +; + %1 = load i32*, i32** @G1 + store i32 0, i32* %1 + ret void +} + +define amdgpu_kernel void @kernel1() #0 { +; AKF_CHECK-LABEL: define {{[^@]+}}@kernel1 +; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; AKF_CHECK-NEXT: call void @weak() +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel1 +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR2:[0-9]+]] { +; ATTRIBUTOR_CHECK-NEXT: call void @weak() +; ATTRIBUTOR_CHECK-NEXT: ret void +; + call void @weak() + ret void +} + +@G2 = global i32 0 + +define internal void @internal3() { +; AKF_CHECK-LABEL: define {{[^@]+}}@internal3 +; AKF_CHECK-SAME: () #[[ATTR2:[0-9]+]] { +; AKF_CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @G2, align 4 +; AKF_CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; AKF_CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; AKF_CHECK: 3: +; AKF_CHECK-NEXT: call void @internal4() +; AKF_CHECK-NEXT: call void @internal3() +; AKF_CHECK-NEXT: br label [[TMP4]] +; AKF_CHECK: 4: +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@internal3 +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR3:[0-9]+]] { +; ATTRIBUTOR_CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @G2, align 4 +; ATTRIBUTOR_CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; ATTRIBUTOR_CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; ATTRIBUTOR_CHECK: 3: +; ATTRIBUTOR_CHECK-NEXT: call void @internal4() #[[ATTR6:[0-9]+]] +; ATTRIBUTOR_CHECK-NEXT: call void @internal3() #[[ATTR7:[0-9]+]] +; ATTRIBUTOR_CHECK-NEXT: br label [[TMP4]] +; ATTRIBUTOR_CHECK: 4: +; ATTRIBUTOR_CHECK-NEXT: ret void +; + %1 = load i32, i32* @G2, align 4 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %3, label %4 +3: + call void @internal4() + call void @internal3() + br label %4 +4: + ret void +} + +define internal void @internal4() { +; AKF_CHECK-LABEL: define {{[^@]+}}@internal4 +; AKF_CHECK-SAME: () #[[ATTR2]] { +; AKF_CHECK-NEXT: store i32 1, i32* @G2, align 4 +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@internal4 +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR4:[0-9]+]] { +; ATTRIBUTOR_CHECK-NEXT: store i32 1, i32* @G2, align 4 +; ATTRIBUTOR_CHECK-NEXT: ret void +; + store i32 1, i32* @G2, align 4 + ret void +} + +define internal void @internal2() { +; AKF_CHECK-LABEL: define {{[^@]+}}@internal2 +; AKF_CHECK-SAME: () #[[ATTR2]] { +; AKF_CHECK-NEXT: call void @internal3() +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@internal2 +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR3]] { +; ATTRIBUTOR_CHECK-NEXT: call void @internal3() #[[ATTR7]] +; ATTRIBUTOR_CHECK-NEXT: ret void +; + call void @internal3() + ret void +} + +define amdgpu_kernel void @kernel2() #0 { +; AKF_CHECK-LABEL: define {{[^@]+}}@kernel2 +; AKF_CHECK-SAME: () #[[ATTR1]] { +; AKF_CHECK-NEXT: call void @internal2() +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel2 +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR2]] { +; ATTRIBUTOR_CHECK-NEXT: call void @internal2() #[[ATTR5]] +; ATTRIBUTOR_CHECK-NEXT: ret void +; + call void @internal2() + ret void +} + +attributes #0 = { "uniform-work-group-size"="true" } + +;. +; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } +; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; AKF_CHECK: attributes #[[ATTR2]] = { "uniform-work-group-size"="true" } +;. +; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nofree nosync nounwind "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn writeonly "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind } +; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn writeonly } +; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nofree nosync nounwind } +;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll index 78bef7b..2412188 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll @@ -1,39 +1,68 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=AKF_CHECK %s +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_CHECK %s ; Test to verify if the attribute gets propagated across nested function calls +; Added to prevent Attributor from deleting calls. +@x = global i32 0 + +;. +; AKF_CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0 +;. +; ATTRIBUTOR_CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0 +;. define void @func1() #0 { -; CHECK-LABEL: define {{[^@]+}}@func1 -; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: ret void +; AKF_CHECK-LABEL: define {{[^@]+}}@func1 +; AKF_CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; AKF_CHECK-NEXT: store i32 0, i32* @x, align 4 +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@func1 +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; ATTRIBUTOR_CHECK-NEXT: store i32 0, i32* @x, align 4 +; ATTRIBUTOR_CHECK-NEXT: ret void ; + store i32 0, i32* @x ret void } define void @func2() #1 { -; CHECK-LABEL: define {{[^@]+}}@func2 -; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: call void @func1() -; CHECK-NEXT: ret void +; AKF_CHECK-LABEL: define {{[^@]+}}@func2 +; AKF_CHECK-SAME: () #[[ATTR0]] { +; AKF_CHECK-NEXT: call void @func1() +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@func2 +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] { +; ATTRIBUTOR_CHECK-NEXT: call void @func1() #[[ATTR2:[0-9]+]] +; ATTRIBUTOR_CHECK-NEXT: ret void ; call void @func1() ret void } define amdgpu_kernel void @kernel3() #2 { -; CHECK-LABEL: define {{[^@]+}}@kernel3 -; CHECK-SAME: () #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: call void @func2() -; CHECK-NEXT: ret void +; AKF_CHECK-LABEL: define {{[^@]+}}@kernel3 +; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; AKF_CHECK-NEXT: call void @func2() +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel3 +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_CHECK-NEXT: call void @func2() #[[ATTR2]] +; ATTRIBUTOR_CHECK-NEXT: ret void ; call void @func2() ret void } attributes #2 = { "uniform-work-group-size"="true" } - ;. -; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="true" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="true" } +; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +;. +; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind writeonly } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll index cdedcae..23e2759 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll @@ -1,31 +1,51 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s + +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s + +; Function added to prevent attributor from deleting call sites. ; Two kernels with different values of the uniform-work-group-attribute call the same function +@x = global i32 0 +;. +; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0 +;. define void @func() #0 { ; CHECK-LABEL: define {{[^@]+}}@func ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: store i32 0, i32* @x, align 4 ; CHECK-NEXT: ret void ; + store i32 0, i32* @x ret void } define amdgpu_kernel void @kernel1() #1 { -; CHECK-LABEL: define {{[^@]+}}@kernel1 -; CHECK-SAME: () #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: call void @func() -; CHECK-NEXT: ret void +; AKF_CHECK-LABEL: define {{[^@]+}}@kernel1 +; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; AKF_CHECK-NEXT: call void @func() +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel1 +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_CHECK-NEXT: call void @func() #[[ATTR3:[0-9]+]] +; ATTRIBUTOR_CHECK-NEXT: ret void ; call void @func() ret void } define amdgpu_kernel void @kernel2() #2 { -; CHECK-LABEL: define {{[^@]+}}@kernel2 -; CHECK-SAME: () #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: call void @func() -; CHECK-NEXT: ret void +; AKF_CHECK-LABEL: define {{[^@]+}}@kernel2 +; AKF_CHECK-SAME: () #[[ATTR2:[0-9]+]] { +; AKF_CHECK-NEXT: call void @func() +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel2 +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR2:[0-9]+]] { +; ATTRIBUTOR_CHECK-NEXT: call void @func() #[[ATTR3]] +; ATTRIBUTOR_CHECK-NEXT: ret void ; call void @func() ret void @@ -34,7 +54,12 @@ define amdgpu_kernel void @kernel2() #2 { attributes #1 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } +; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; AKF_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +;. +; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind writeonly } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll index b485044..9dc8970 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll @@ -1,20 +1,33 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s + +@x = global i32 0 ; Propagate the uniform-work-group-attribute from the kernel to callee if it doesn't have it +;. +; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0 +;. define void @func() #0 { ; CHECK-LABEL: define {{[^@]+}}@func ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: store i32 0, i32* @x, align 4 ; CHECK-NEXT: ret void ; + store i32 0, i32* @x ret void } define amdgpu_kernel void @kernel1() #1 { -; CHECK-LABEL: define {{[^@]+}}@kernel1 -; CHECK-SAME: () #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: call void @func() -; CHECK-NEXT: ret void +; AKF_CHECK-LABEL: define {{[^@]+}}@kernel1 +; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; AKF_CHECK-NEXT: call void @func() +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel1 +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_CHECK-NEXT: call void @func() #[[ATTR4:[0-9]+]] +; ATTRIBUTOR_CHECK-NEXT: ret void ; call void @func() ret void @@ -22,18 +35,30 @@ define amdgpu_kernel void @kernel1() #1 { ; External declaration of a function define weak_odr void @weak_func() #0 { -; CHECK-LABEL: define {{[^@]+}}@weak_func -; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: ret void +; AKF_CHECK-LABEL: define {{[^@]+}}@weak_func +; AKF_CHECK-SAME: () #[[ATTR0]] { +; AKF_CHECK-NEXT: store i32 0, i32* @x, align 4 +; AKF_CHECK-NEXT: ret void ; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@weak_func +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR2:[0-9]+]] { +; ATTRIBUTOR_CHECK-NEXT: store i32 0, i32* @x, align 4 +; ATTRIBUTOR_CHECK-NEXT: ret void +; + store i32 0, i32* @x ret void } define amdgpu_kernel void @kernel2() #2 { -; CHECK-LABEL: define {{[^@]+}}@kernel2 -; CHECK-SAME: () #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: call void @weak_func() -; CHECK-NEXT: ret void +; AKF_CHECK-LABEL: define {{[^@]+}}@kernel2 +; AKF_CHECK-SAME: () #[[ATTR2:[0-9]+]] { +; AKF_CHECK-NEXT: call void @weak_func() +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel2 +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR3:[0-9]+]] { +; ATTRIBUTOR_CHECK-NEXT: call void @weak_func() #[[ATTR5:[0-9]+]] +; ATTRIBUTOR_CHECK-NEXT: ret void ; call void @weak_func() ret void @@ -42,9 +67,15 @@ define amdgpu_kernel void @kernel2() #2 { attributes #0 = { nounwind } attributes #1 = { "uniform-work-group-size"="false" } attributes #2 = { "uniform-work-group-size"="true" } - ;. -; CHECK: attributes #[[ATTR0]] = { nounwind "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; AKF_CHECK: attributes #[[ATTR0]] = { nounwind "uniform-work-group-size"="false" } +; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +; AKF_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +;. +; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind writeonly } +; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll index a8b6a7f..2397c2b 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll @@ -1,26 +1,44 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck --allow-unused-prefixes -check-prefixes=CHECK,AKF_CHECK %s +; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck --allow-unused-prefixes -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s ; Test to ensure recursive functions exhibit proper behaviour ; Test to generate fibonacci numbers define i32 @fib(i32 %n) #0 { -; CHECK-LABEL: define {{[^@]+}}@fib -; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0 -; CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]] -; CHECK: cont1: -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1 -; CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]] -; CHECK: cont2: -; CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1 -; CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib(i32 [[NM1]]) -; CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2 -; CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib(i32 [[NM2]]) -; CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]] -; CHECK-NEXT: ret i32 [[RETVAL]] -; CHECK: exit: -; CHECK-NEXT: ret i32 1 +; AKF_CHECK-LABEL: define {{[^@]+}}@fib +; AKF_CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; AKF_CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0 +; AKF_CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]] +; AKF_CHECK: cont1: +; AKF_CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1 +; AKF_CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]] +; AKF_CHECK: cont2: +; AKF_CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1 +; AKF_CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib(i32 [[NM1]]) +; AKF_CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2 +; AKF_CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib(i32 [[NM2]]) +; AKF_CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]] +; AKF_CHECK-NEXT: ret i32 [[RETVAL]] +; AKF_CHECK: exit: +; AKF_CHECK-NEXT: ret i32 1 +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@fib +; ATTRIBUTOR_CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; ATTRIBUTOR_CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0 +; ATTRIBUTOR_CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]] +; ATTRIBUTOR_CHECK: cont1: +; ATTRIBUTOR_CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1 +; ATTRIBUTOR_CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]] +; ATTRIBUTOR_CHECK: cont2: +; ATTRIBUTOR_CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1 +; ATTRIBUTOR_CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib(i32 [[NM1]]) #[[ATTR3:[0-9]+]] +; ATTRIBUTOR_CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2 +; ATTRIBUTOR_CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib(i32 [[NM2]]) #[[ATTR3]] +; ATTRIBUTOR_CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]] +; ATTRIBUTOR_CHECK-NEXT: ret i32 [[RETVAL]] +; ATTRIBUTOR_CHECK: exit: +; ATTRIBUTOR_CHECK-NEXT: ret i32 1 ; %cmp1 = icmp eq i32 %n, 0 br i1 %cmp1, label %exit, label %cont1 @@ -42,21 +60,97 @@ exit: ret i32 1 } +define internal i32 @fib_internal(i32 %n) #0 { +; AKF_CHECK-LABEL: define {{[^@]+}}@fib_internal +; AKF_CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0]] { +; AKF_CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0 +; AKF_CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]] +; AKF_CHECK: cont1: +; AKF_CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1 +; AKF_CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]] +; AKF_CHECK: cont2: +; AKF_CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1 +; AKF_CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib_internal(i32 [[NM1]]) +; AKF_CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2 +; AKF_CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib_internal(i32 [[NM2]]) +; AKF_CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]] +; AKF_CHECK-NEXT: ret i32 [[RETVAL]] +; AKF_CHECK: exit: +; AKF_CHECK-NEXT: ret i32 1 +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@fib_internal +; ATTRIBUTOR_CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0 +; ATTRIBUTOR_CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]] +; ATTRIBUTOR_CHECK: cont1: +; ATTRIBUTOR_CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1 +; ATTRIBUTOR_CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]] +; ATTRIBUTOR_CHECK: cont2: +; ATTRIBUTOR_CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1 +; ATTRIBUTOR_CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib_internal(i32 [[NM1]]) #[[ATTR4:[0-9]+]] +; ATTRIBUTOR_CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2 +; ATTRIBUTOR_CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib_internal(i32 [[NM2]]) #[[ATTR4]] +; ATTRIBUTOR_CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]] +; ATTRIBUTOR_CHECK-NEXT: ret i32 [[RETVAL]] +; ATTRIBUTOR_CHECK: exit: +; ATTRIBUTOR_CHECK-NEXT: ret i32 1 +; + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %exit, label %cont1 + +cont1: + %cmp2 = icmp eq i32 %n, 1 + br i1 %cmp2, label %exit, label %cont2 + +cont2: + %nm1 = sub i32 %n, 1 + %fibm1 = call i32 @fib_internal(i32 %nm1) + %nm2 = sub i32 %n, 2 + %fibm2 = call i32 @fib_internal(i32 %nm2) + %retval = add i32 %fibm1, %fibm2 + + ret i32 %retval + +exit: + ret i32 1 +} + define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 { -; CHECK-LABEL: define {{[^@]+}}@kernel -; CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5) -; CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[M]], align 4 -; CHECK-NEXT: ret void +; AKF_CHECK-LABEL: define {{[^@]+}}@kernel +; AKF_CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR1:[0-9]+]] { +; AKF_CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5) +; AKF_CHECK-NEXT: [[R2:%.*]] = call i32 @fib_internal(i32 5) +; AKF_CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[M]], align 4 +; AKF_CHECK-NEXT: store i32 [[R2]], i32 addrspace(1)* [[M]], align 4 +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel +; ATTRIBUTOR_CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR2:[0-9]+]] { +; ATTRIBUTOR_CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5) #[[ATTR3]] +; ATTRIBUTOR_CHECK-NEXT: [[R2:%.*]] = call i32 @fib_internal(i32 noundef 5) #[[ATTR3]] +; ATTRIBUTOR_CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[M]], align 4 +; ATTRIBUTOR_CHECK-NEXT: store i32 [[R2]], i32 addrspace(1)* [[M]], align 4 +; ATTRIBUTOR_CHECK-NEXT: ret void ; %r = call i32 @fib(i32 5) + %r2 = call i32 @fib_internal(i32 5) + store i32 %r, i32 addrspace(1)* %m + store i32 %r2, i32 addrspace(1)* %m ret void } +; nounwind and readnone are added to match attributor results. +attributes #0 = { nounwind readnone } attributes #1 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="true" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; AKF_CHECK: attributes #[[ATTR0]] = { nounwind readnone "uniform-work-group-size"="true" } +; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +;. +; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind readnone "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind readnone "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind readnone } +; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nofree nounwind readnone } ;. diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll index fba4b1f..3f99e23 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll @@ -1,28 +1,43 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -allow-unused-prefixes -check-prefixes=CHECK,AKF_CHECK %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -allow-unused-prefixes -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s +@x = global i32 0 +;. +; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0 +;. define void @func1() { ; CHECK-LABEL: define {{[^@]+}}@func1 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: store i32 0, i32* @x, align 4 ; CHECK-NEXT: ret void ; + store i32 0, i32* @x ret void } define void @func4() { ; CHECK-LABEL: define {{[^@]+}}@func4 ; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: store i32 0, i32* @x, align 4 ; CHECK-NEXT: ret void ; + store i32 0, i32* @x ret void } define void @func2() #0 { -; CHECK-LABEL: define {{[^@]+}}@func2 -; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: call void @func4() -; CHECK-NEXT: call void @func1() -; CHECK-NEXT: ret void +; AKF_CHECK-LABEL: define {{[^@]+}}@func2 +; AKF_CHECK-SAME: () #[[ATTR0]] { +; AKF_CHECK-NEXT: call void @func4() +; AKF_CHECK-NEXT: call void @func1() +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@func2 +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] { +; ATTRIBUTOR_CHECK-NEXT: call void @func4() #[[ATTR2:[0-9]+]] +; ATTRIBUTOR_CHECK-NEXT: call void @func1() #[[ATTR2]] +; ATTRIBUTOR_CHECK-NEXT: ret void ; call void @func4() call void @func1() @@ -30,21 +45,32 @@ define void @func2() #0 { } define void @func3() { -; CHECK-LABEL: define {{[^@]+}}@func3 -; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: call void @func1() -; CHECK-NEXT: ret void +; AKF_CHECK-LABEL: define {{[^@]+}}@func3 +; AKF_CHECK-SAME: () #[[ATTR0]] { +; AKF_CHECK-NEXT: call void @func1() +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@func3 +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] { +; ATTRIBUTOR_CHECK-NEXT: call void @func1() #[[ATTR2]] +; ATTRIBUTOR_CHECK-NEXT: ret void ; call void @func1() ret void } define amdgpu_kernel void @kernel3() #0 { -; CHECK-LABEL: define {{[^@]+}}@kernel3 -; CHECK-SAME: () #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: call void @func2() -; CHECK-NEXT: call void @func3() -; CHECK-NEXT: ret void +; AKF_CHECK-LABEL: define {{[^@]+}}@kernel3 +; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; AKF_CHECK-NEXT: call void @func2() +; AKF_CHECK-NEXT: call void @func3() +; AKF_CHECK-NEXT: ret void +; +; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel3 +; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_CHECK-NEXT: call void @func2() #[[ATTR2]] +; ATTRIBUTOR_CHECK-NEXT: call void @func3() #[[ATTR2]] +; ATTRIBUTOR_CHECK-NEXT: ret void ; call void @func2() call void @func3() @@ -52,8 +78,11 @@ define amdgpu_kernel void @kernel3() #0 { } attributes #0 = { "uniform-work-group-size"="false" } - ;. -; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } +; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +;. +; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind writeonly } ;. -- 2.7.4