FunctionPass *createAMDGPUCodeGenPreparePass();
FunctionPass *createAMDGPULateCodeGenPreparePass();
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
-FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *);
-ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *);
FunctionPass *createAMDGPURewriteOutArgumentsPass();
ModulePass *createAMDGPULowerModuleLDSPass();
FunctionPass *createSIModeRegisterPass();
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
-void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &);
-extern char &AMDGPUPropagateAttributesEarlyID;
-
-struct AMDGPUPropagateAttributesEarlyPass
- : PassInfoMixin<AMDGPUPropagateAttributesEarlyPass> {
- AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {}
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
-
-private:
- TargetMachine &TM;
-};
-
-void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &);
-extern char &AMDGPUPropagateAttributesLateID;
-
-struct AMDGPUPropagateAttributesLatePass
- : PassInfoMixin<AMDGPUPropagateAttributesLatePass> {
- AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {}
- PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
-
-private:
- TargetMachine &TM;
-};
-
void initializeAMDGPULowerModuleLDSPass(PassRegistry &);
extern char &AMDGPULowerModuleLDSID;
+++ /dev/null
-//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief This pass propagates attributes from kernels to the non-entry
-/// functions. Most of the library functions were not compiled for specific ABI,
-/// yet will be correctly compiled if proper attributes are propagated from the
-/// caller.
-///
-/// The pass analyzes call graph and propagates ABI target features through the
-/// call graph.
-///
-/// It can run in two modes: as a function or module pass. A function pass
-/// simply propagates attributes. A module pass clones functions if there are
-/// callers with different ABI. If a function is cloned all call sites will
-/// be updated to use a correct clone.
-///
-/// A function pass is limited in functionality but can run early in the
-/// pipeline. A module pass is more powerful but has to run late, so misses
-/// library folding opportunities.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-
-#define DEBUG_TYPE "amdgpu-propagate-attributes"
-
-using namespace llvm;
-
-namespace llvm {
-extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
-}
-
-namespace {
-
-// Target features to propagate.
-static constexpr const FeatureBitset TargetFeatures = {
- AMDGPU::FeatureWavefrontSize16,
- AMDGPU::FeatureWavefrontSize32,
- AMDGPU::FeatureWavefrontSize64
-};
-
-class AMDGPUPropagateAttributes {
-
- class FnProperties {
- private:
- explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
-
- public:
- explicit FnProperties(const TargetMachine &TM, const Function &F) {
- Features = TM.getSubtargetImpl(F)->getFeatureBits();
- }
-
- bool operator == (const FnProperties &Other) const {
- if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
- return false;
- return true;
- }
-
- FnProperties adjustToCaller(const FnProperties &CallerProps) const {
- FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
- return New;
- }
-
- FeatureBitset Features;
- };
-
- class Clone {
- public:
- Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
- Properties(Props), OrigF(OrigF), NewF(NewF) {}
-
- FnProperties Properties;
- Function *OrigF;
- Function *NewF;
- };
-
- const TargetMachine *TM;
-
- // Clone functions as needed or just set attributes.
- bool AllowClone;
-
- // Option propagation roots.
- SmallSet<Function *, 32> Roots;
-
- // Clones of functions with their attributes.
- SmallVector<Clone, 32> Clones;
-
- // Find a clone with required features.
- Function *findFunction(const FnProperties &PropsNeeded,
- Function *OrigF);
-
- // Clone function \p F and set \p NewProps on the clone.
- // Cole takes the name of original function.
- Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
-
- // Set new function's features in place.
- void setFeatures(Function &F, const FeatureBitset &NewFeatures);
-
- std::string getFeatureString(const FeatureBitset &Features) const;
-
- // Propagate attributes from Roots.
- bool process();
-
-public:
- AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
- TM(TM), AllowClone(AllowClone) {}
-
- // Use F as a root and propagate its attributes.
- bool process(Function &F);
-
- // Propagate attributes starting from kernel functions.
- bool process(Module &M);
-};
-
-// Allows to propagate attributes early, but no cloning is allowed as it must
-// be a function pass to run before any optimizations.
-// TODO: We shall only need a one instance of module pass, but that needs to be
-// in the linker pipeline which is currently not possible.
-class AMDGPUPropagateAttributesEarly : public FunctionPass {
- const TargetMachine *TM;
-
-public:
- static char ID; // Pass identification
-
- AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
- FunctionPass(ID), TM(TM) {
- initializeAMDGPUPropagateAttributesEarlyPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-};
-
-// Allows to propagate attributes with cloning but does that late in the
-// pipeline.
-class AMDGPUPropagateAttributesLate : public ModulePass {
- const TargetMachine *TM;
-
-public:
- static char ID; // Pass identification
-
- AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
- ModulePass(ID), TM(TM) {
- initializeAMDGPUPropagateAttributesLatePass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override;
-};
-
-} // end anonymous namespace.
-
-char AMDGPUPropagateAttributesEarly::ID = 0;
-char AMDGPUPropagateAttributesLate::ID = 0;
-
-INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
- "amdgpu-propagate-attributes-early",
- "Early propagate attributes from kernels to functions",
- false, false)
-INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
- "amdgpu-propagate-attributes-late",
- "Late propagate attributes from kernels to functions",
- false, false)
-
-Function *
-AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
- Function *OrigF) {
- // TODO: search for clone's clones.
- for (Clone &C : Clones)
- if (C.OrigF == OrigF && PropsNeeded == C.Properties)
- return C.NewF;
-
- return nullptr;
-}
-
-bool AMDGPUPropagateAttributes::process(Module &M) {
- for (auto &F : M.functions())
- if (AMDGPU::isKernel(F.getCallingConv()))
- Roots.insert(&F);
-
- return Roots.empty() ? false : process();
-}
-
-bool AMDGPUPropagateAttributes::process(Function &F) {
- Roots.insert(&F);
- return process();
-}
-
-bool AMDGPUPropagateAttributes::process() {
- bool Changed = false;
- SmallSet<Function *, 32> NewRoots;
- SmallSet<Function *, 32> Replaced;
-
- assert(!Roots.empty());
- Module &M = *(*Roots.begin())->getParent();
-
- do {
- Roots.insert(NewRoots.begin(), NewRoots.end());
- NewRoots.clear();
-
- for (auto &F : M.functions()) {
- if (F.isDeclaration())
- continue;
-
- const FnProperties CalleeProps(*TM, F);
- SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
- SmallSet<CallBase *, 32> Visited;
-
- for (User *U : F.users()) {
- Instruction *I = dyn_cast<Instruction>(U);
- if (!I)
- continue;
- CallBase *CI = dyn_cast<CallBase>(I);
- // Only propagate attributes if F is the called function. Specifically,
- // do not propagate attributes if F is passed as an argument.
- // FIXME: handle bitcasted callee, e.g.
- // %retval = call i8* bitcast (i32* ()* @f to i8* ()*)()
- if (!CI || CI->getCalledOperand() != &F)
- continue;
- Function *Caller = CI->getCaller();
- if (!Caller || !Visited.insert(CI).second)
- continue;
- if (!Roots.count(Caller) && !NewRoots.count(Caller))
- continue;
-
- const FnProperties CallerProps(*TM, *Caller);
-
- if (CalleeProps == CallerProps) {
- if (!Roots.count(&F))
- NewRoots.insert(&F);
- continue;
- }
-
- Function *NewF = findFunction(CallerProps, &F);
- if (!NewF) {
- const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
- if (!AllowClone) {
- // This may set different features on different iterations if
- // there is a contradiction in callers' attributes. In this case
- // we rely on a second pass running on Module, which is allowed
- // to clone.
- setFeatures(F, NewProps.Features);
- NewRoots.insert(&F);
- Changed = true;
- break;
- }
-
- NewF = cloneWithProperties(F, NewProps);
- Clones.push_back(Clone(CallerProps, &F, NewF));
- NewRoots.insert(NewF);
- }
-
- ToReplace.push_back(std::pair(CI, NewF));
- Replaced.insert(&F);
-
- Changed = true;
- }
-
- while (!ToReplace.empty()) {
- auto R = ToReplace.pop_back_val();
- R.first->setCalledFunction(R.second);
- }
- }
- } while (!NewRoots.empty());
-
- for (Function *F : Replaced) {
- if (F->use_empty())
- F->eraseFromParent();
- }
-
- Roots.clear();
- Clones.clear();
-
- return Changed;
-}
-
-Function *
-AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
- const FnProperties &NewProps) {
- LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
-
- ValueToValueMapTy dummy;
- Function *NewF = CloneFunction(&F, dummy);
- setFeatures(*NewF, NewProps.Features);
- NewF->setVisibility(GlobalValue::DefaultVisibility);
- NewF->setLinkage(GlobalValue::InternalLinkage);
-
- // Swap names. If that is the only clone it will retain the name of now
- // dead value. Preserve original name for externally visible functions.
- if (F.hasName() && F.hasLocalLinkage()) {
- std::string NewName = std::string(NewF->getName());
- NewF->takeName(&F);
- F.setName(NewName);
- }
-
- return NewF;
-}
-
-void AMDGPUPropagateAttributes::setFeatures(Function &F,
- const FeatureBitset &NewFeatures) {
- std::string NewFeatureStr = getFeatureString(NewFeatures);
-
- LLVM_DEBUG(dbgs() << "Set features "
- << getFeatureString(NewFeatures & TargetFeatures)
- << " on " << F.getName() << '\n');
-
- F.removeFnAttr("target-features");
- F.addFnAttr("target-features", NewFeatureStr);
-}
-
-std::string
-AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
-{
- std::string Ret;
- for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
- if (Features[KV.Value])
- Ret += (StringRef("+") + KV.Key + ",").str();
- else if (TargetFeatures[KV.Value])
- Ret += (StringRef("-") + KV.Key + ",").str();
- }
- Ret.pop_back(); // Remove last comma.
- return Ret;
-}
-
-bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
- if (!TM) {
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC)
- return false;
-
- TM = &TPC->getTM<TargetMachine>();
- }
-
- if (!AMDGPU::isKernel(F.getCallingConv()))
- return false;
-
- return AMDGPUPropagateAttributes(TM, false).process(F);
-}
-
-bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
- if (!TM) {
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC)
- return false;
-
- TM = &TPC->getTM<TargetMachine>();
- }
-
- return AMDGPUPropagateAttributes(TM, true).process(M);
-}
-
-FunctionPass
-*llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
- return new AMDGPUPropagateAttributesEarly(TM);
-}
-
-ModulePass
-*llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
- return new AMDGPUPropagateAttributesLate(TM);
-}
-
-PreservedAnalyses
-AMDGPUPropagateAttributesEarlyPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
- return PreservedAnalyses::all();
-
- return AMDGPUPropagateAttributes(&TM, false).process(F)
- ? PreservedAnalyses::none()
- : PreservedAnalyses::all();
-}
-
-PreservedAnalyses
-AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) {
- return AMDGPUPropagateAttributes(&TM, true).process(M)
- ? PreservedAnalyses::none()
- : PreservedAnalyses::all();
-}
initializeAMDGPUPromoteAllocaToVectorPass(*PR);
initializeAMDGPUCodeGenPreparePass(*PR);
initializeAMDGPULateCodeGenPreparePass(*PR);
- initializeAMDGPUPropagateAttributesEarlyPass(*PR);
- initializeAMDGPUPropagateAttributesLatePass(*PR);
initializeAMDGPURemoveIncompatibleFunctionsPass(*PR);
initializeAMDGPULowerModuleLDSPass(*PR);
initializeAMDGPURewriteOutArgumentsPass(*PR);
void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerPipelineParsingCallback(
- [this](StringRef PassName, ModulePassManager &PM,
- ArrayRef<PassBuilder::PipelineElement>) {
- if (PassName == "amdgpu-propagate-attributes-late") {
- PM.addPass(AMDGPUPropagateAttributesLatePass(*this));
- return true;
- }
+ [](StringRef PassName, ModulePassManager &PM,
+ ArrayRef<PassBuilder::PipelineElement>) {
if (PassName == "amdgpu-unify-metadata") {
PM.addPass(AMDGPUUnifyMetadataPass());
return true;
PM.addPass(AMDGPULowerKernelAttributesPass());
return true;
}
- if (PassName == "amdgpu-propagate-attributes-early") {
- PM.addPass(AMDGPUPropagateAttributesEarlyPass(*this));
- return true;
- }
if (PassName == "amdgpu-promote-kernel-arguments") {
PM.addPass(AMDGPUPromoteKernelArgumentsPass());
return true;
PB.registerPipelineStartEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;
- FPM.addPass(AMDGPUPropagateAttributesEarlyPass(*this));
FPM.addPass(AMDGPUUseNativeCallsPass());
if (EnableLibCallSimplify && Level != OptimizationLevel::O0)
FPM.addPass(AMDGPUSimplifyLibCallsPass(*this));
});
PB.registerPipelineEarlySimplificationEPCallback(
- [this](ModulePassManager &PM, OptimizationLevel Level) {
+ [](ModulePassManager &PM, OptimizationLevel Level) {
PM.addPass(AMDGPUPrintfRuntimeBindingPass());
if (Level == OptimizationLevel::O0)
if (InternalizeSymbols) {
PM.addPass(InternalizePass(mustPreserveGV));
- }
- PM.addPass(AMDGPUPropagateAttributesLatePass(*this));
- if (InternalizeSymbols) {
PM.addPass(GlobalDCEPass());
}
+
if (EarlyInlineAll && !EnableFunctionCalls)
PM.addPass(AMDGPUAlwaysInlinePass());
});
if (LowerCtorDtor)
addPass(createAMDGPUCtorDtorLoweringLegacyPass());
- // A call to propagate attributes pass in the backend in case opt was not run.
- addPass(createAMDGPUPropagateAttributesEarlyPass(&TM));
-
// Function calls are not supported, so make sure we inline everything.
addPass(createAMDGPUAlwaysInlinePass());
addPass(createAlwaysInlinerLegacyPass());
AMDGPUPreLegalizerCombiner.cpp
AMDGPUPrintfRuntimeBinding.cpp
AMDGPUPromoteAlloca.cpp
- AMDGPUPropagateAttributes.cpp
AMDGPUPromoteKernelArguments.cpp
AMDGPURegBankCombiner.cpp
AMDGPURegBankSelect.cpp
; GCN-O0-NEXT: FunctionPass Manager
; GCN-O0-NEXT: Dominator Tree Construction
; GCN-O0-NEXT: Lower ctors and dtors for AMDGPU
-; GCN-O0-NEXT: FunctionPass Manager
-; GCN-O0-NEXT: Early propagate attributes from kernels to functions
; GCN-O0-NEXT: AMDGPU Inline All Functions
; GCN-O0-NEXT: Inliner for always_inline functions
; GCN-O0-NEXT: FunctionPass Manager
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Dominator Tree Construction
; GCN-O1-NEXT: Lower ctors and dtors for AMDGPU
-; GCN-O1-NEXT: FunctionPass Manager
-; GCN-O1-NEXT: Early propagate attributes from kernels to functions
; GCN-O1-NEXT: AMDGPU Inline All Functions
; GCN-O1-NEXT: Inliner for always_inline functions
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-OPTS-NEXT: FunctionPass Manager
; GCN-O1-OPTS-NEXT: Dominator Tree Construction
; GCN-O1-OPTS-NEXT: Lower ctors and dtors for AMDGPU
-; GCN-O1-OPTS-NEXT: FunctionPass Manager
-; GCN-O1-OPTS-NEXT: Early propagate attributes from kernels to functions
; GCN-O1-OPTS-NEXT: AMDGPU Inline All Functions
; GCN-O1-OPTS-NEXT: Inliner for always_inline functions
; GCN-O1-OPTS-NEXT: FunctionPass Manager
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Dominator Tree Construction
; GCN-O2-NEXT: Lower ctors and dtors for AMDGPU
-; GCN-O2-NEXT: FunctionPass Manager
-; GCN-O2-NEXT: Early propagate attributes from kernels to functions
; GCN-O2-NEXT: AMDGPU Inline All Functions
; GCN-O2-NEXT: Inliner for always_inline functions
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Dominator Tree Construction
; GCN-O3-NEXT: Lower ctors and dtors for AMDGPU
-; GCN-O3-NEXT: FunctionPass Manager
-; GCN-O3-NEXT: Early propagate attributes from kernels to functions
; GCN-O3-NEXT: AMDGPU Inline All Functions
; GCN-O3-NEXT: Inliner for always_inline functions
; GCN-O3-NEXT: FunctionPass Manager
+++ /dev/null
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -O1 < %s | FileCheck -check-prefixes=OPT,OPT-EXT %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes='default<O1>' < %s | FileCheck -check-prefixes=OPT,OPT-EXT %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -O1 --amdgpu-internalize-symbols < %s | FileCheck -check-prefixes=OPT,OPT-INT %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes='default<O1>' --amdgpu-internalize-symbols < %s | FileCheck -check-prefixes=OPT,OPT-INT %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=LLC %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=LLC %s
-
-; OPT: declare void @foo4() local_unnamed_addr #0
-; OPT: define internal fastcc void @0() unnamed_addr #1
-; OPT-EXT: define void @foo3() local_unnamed_addr #1
-; OPT-INT: define internal fastcc void @foo3.2() unnamed_addr #1
-; OPT-EXT: define void @foo2() local_unnamed_addr #1
-; OPT-INT: define internal fastcc void @foo2.3() unnamed_addr #1
-; OPT-EXT: define void @foo1() local_unnamed_addr #1
-; OPT-EXT: tail call void @foo4()
-; OPT-EXT: tail call void @foo3()
-; OPT-EXT: tail call void @foo2()
-; OPT-EXT: tail call void @foo2()
-; OPT-EXT: tail call void @foo1()
-; OPT-EXT: tail call fastcc void @0()
-; OPT-INT: define internal fastcc void @foo1.1() unnamed_addr #1
-; OPT-INT: tail call void @foo4()
-; OPT-INT: tail call fastcc void @foo3.2()
-; OPT-INT: tail call fastcc void @foo2.3()
-; OPT-INT: tail call fastcc void @foo2.3()
-; OPT-INT: tail call fastcc void @foo1.1()
-; OPT-INT: tail call fastcc void @0()
-; OPT: ret void
-; OPT: define amdgpu_kernel void @kernel1() local_unnamed_addr #2
-; OPT-EXT: tail call fastcc void @foo1.1()
-; OPT-INT: tail call fastcc void @foo1()
-; OPT: ret void
-; OPT: define amdgpu_kernel void @kernel2() local_unnamed_addr #3
-; OPT-EXT: tail call void @foo2()
-; OPT-INT: tail call fastcc void @foo2.3()
-; OPT: ret void
-; OPT: define amdgpu_kernel void @kernel3() local_unnamed_addr #3
-; OPT-EXT: tail call void @foo1()
-; OPT-INT: tail call fastcc void @foo1.1()
-; OPT: ret void
-; OPT-EXT: define internal fastcc void @foo1.1() unnamed_addr #4
-; OPT-EXT: tail call void @foo4()
-; OPT-EXT: tail call fastcc void @foo3.2()
-; OPT-EXT: tail call fastcc void @foo2.3()
-; OPT-EXT: tail call fastcc void @foo2.3()
-; OPT-EXT: tail call fastcc void @foo1.1()
-; OPT-EXT: tail call fastcc void @1()
-; OPT-INT: define internal fastcc void @foo1() unnamed_addr #4
-; OPT-INT: tail call void @foo4()
-; OPT-INT: tail call fastcc void @foo3()
-; OPT-INT: tail call fastcc void @foo2()
-; OPT-INT: tail call fastcc void @foo2()
-; OPT-INT: tail call fastcc void @foo1()
-; OPT-INT: tail call fastcc void @1()
-; OPT: ret void
-; OPT: define internal fastcc void @1() unnamed_addr #4
-; OPT-EXT: define internal fastcc void @foo3.2() unnamed_addr #4
-; OPT-INT: define internal fastcc void @foo3() unnamed_addr #4
-; OPT-EXT: define internal fastcc void @foo2.3() unnamed_addr #4
-; OPT-INT: define internal fastcc void @foo2() unnamed_addr #4
-; OPT: attributes #0 = { {{.*}} "target-features"="+wavefrontsize64" }
-; OPT: attributes #1 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,-wavefrontsize32,+wavefrontsize64{{.*}}" }
-; OPT: attributes #2 = { {{.*}} "target-features"="+wavefrontsize32" }
-; OPT: attributes #3 = { {{.*}} "target-features"="+wavefrontsize64" }
-; OPT: attributes #4 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,+wavefrontsize32,-wavefrontsize64{{.*}}" }
-
-; LLC: foo3:
-; LLC: sample asm
-; LLC: foo2:
-; LLC: sample asm
-; LLC: foo1:
-; LLC: foo4@gotpcrel32@lo+4
-; LLC: foo4@gotpcrel32@hi+12
-; LLC: foo3@gotpcrel32@lo+4
-; LLC: foo3@gotpcrel32@hi+12
-; LLC: foo2@gotpcrel32@lo+4
-; LLC: foo2@gotpcrel32@hi+12
-; LLC: foo1@gotpcrel32@lo+4
-; LLC: foo1@gotpcrel32@hi+12
-; LLC: __unnamed_1@gotpcrel32@lo+4
-; LLC: __unnamed_1@gotpcrel32@hi+12
-; LLC: kernel1:
-; LLC: foo1@gotpcrel32@lo+4
-; LLC: foo1@gotpcrel32@hi+12
-; LLC: kernel2:
-; LLC: foo2@gotpcrel32@lo+4
-; LLC: foo2@gotpcrel32@hi+12
-; LLC: kernel3:
-; LLC: foo1@gotpcrel32@lo+4
-; LLC: foo1@gotpcrel32@hi+12
-
-declare void @foo4() #1
-
-define void @0() #1 {
-entry:
- call void asm sideeffect "; sample asm", ""()
- ret void
-}
-
-define void @foo3() #4 {
-entry:
- call void asm sideeffect "; sample asm", ""()
- ret void
-}
-
-define void @foo2() #1 {
-entry:
- call void asm sideeffect "; sample asm", ""()
- ret void
-}
-
-define void @foo1() #1 {
-entry:
- tail call void @foo4()
- tail call void @foo3()
- tail call void @foo2()
- tail call void @foo2()
- tail call void @foo1()
- tail call void @0()
- ret void
-}
-
-define amdgpu_kernel void @kernel1() #0 {
-entry:
- tail call void @foo1()
- ret void
-}
-
-define amdgpu_kernel void @kernel2() #2 {
-entry:
- tail call void @foo2()
- ret void
-}
-
-define amdgpu_kernel void @kernel3() #3 {
-entry:
- tail call void @foo1()
- ret void
-}
-
-attributes #0 = { nounwind "target-features"="+wavefrontsize32" }
-attributes #1 = { noinline nounwind "target-features"="+wavefrontsize64" }
-attributes #2 = { nounwind "target-features"="+wavefrontsize64" }
-attributes #3 = { nounwind "target-features"="+wavefrontsize64" }
-attributes #4 = { noinline nounwind "target-features"="+wavefrontsize64" }
+++ /dev/null
-; This is a regression test for a bug in the AMDGPU Propagate Attributes pass
-; where a call instruction's callee could be replaced with a function pointer
-; passed to the original call instruction as an argument.
-;
-; Example:
-; `call void @f(ptr @g)`
-; could become
-; `call void @g(ptr @g.1)`
-; which is invalid IR.
-
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-propagate-attributes-late %s | FileCheck %s
-
-; CHECK-LABEL: define amdgpu_kernel void @thiswasabug() #0
-; CHECK-NOT: call void @g(ptr @g.1)
-; CHECK-DAG: call void @f(ptr @g.1)
-; CHECK-DAG: call void @g()
-define amdgpu_kernel void @thiswasabug() #0 {
- ; no replacement, but @g should be renamed to @g.1
- call void @f(ptr @g)
-
- ; this should call the clone, which takes the name @g
- call void @g()
- ret void
-}
-
-define private void @f(ptr nocapture %0) #0 {
- ret void
-}
-
-; In order to expose this bug, it is necessary that `g` have one of the
-; propagated attributes, so that a clone and substitution would take place if g
-; were actually the function being called.
-; CHECK-DAG: define private void @g.1() #1
-; CHECK-DAG: define internal void @g() #2
-define private void @g() #1 {
- ret void
-}
-
-attributes #0 = { noinline }
-attributes #1 = { noinline "target-features"="+wavefrontsize32" }
+++ /dev/null
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -O1 < %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes='default<O1>' < %s | FileCheck -check-prefix=OPT %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=LLC %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=LLC %s
-
-; OPT: declare void @foo4() local_unnamed_addr #0
-; OPT: define void @foo3() local_unnamed_addr #1
-; OPT: define void @foo2() local_unnamed_addr #1
-; OPT: define void @foo1() local_unnamed_addr #1
-; OPT: define amdgpu_kernel void @kernel1() local_unnamed_addr #2
-; OPT: define amdgpu_kernel void @kernel2() local_unnamed_addr #2
-; OPT: attributes #0 = { {{.*}} "target-features"="+wavefrontsize64" }
-; OPT: attributes #1 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,+wavefrontsize32,-wavefrontsize64
-; OPT: attributes #2 = { {{.*}} "target-features"="+wavefrontsize32
-; OPT: attributes #3 = { nounwind }
-
-; LLC: foo3:
-; LLC: sample asm
-; LLC: foo2:
-; LLC: sample asm
-; LLC: foo1:
-; LLC: foo4@gotpcrel32@lo+4
-; LLC: foo4@gotpcrel32@hi+12
-; LLC: foo3@gotpcrel32@lo+4
-; LLC: foo3@gotpcrel32@hi+12
-; LLC: foo2@gotpcrel32@lo+4
-; LLC: foo2@gotpcrel32@hi+12
-; LLC: foo1@gotpcrel32@lo+4
-; LLC: foo1@gotpcrel32@hi+12
-; LLC: kernel1:
-; LLC: foo1@gotpcrel32@lo+4
-; LLC: foo1@gotpcrel32@hi+12
-; LLC: kernel2:
-; LLC: foo2@gotpcrel32@lo+4
-; LLC: foo2@gotpcrel32@hi+12
-
-declare void @foo4() #1
-
-define void @foo3() #1 {
-entry:
- call void asm sideeffect "; sample asm", ""()
- ret void
-}
-
-define void @foo2() #1 {
-entry:
- call void asm sideeffect "; sample asm", ""()
- ret void
-}
-
-define void @foo1() #1 {
-entry:
- tail call void @foo4()
- tail call void @foo3()
- tail call void @foo2()
- tail call void @foo2()
- tail call void @foo1()
- ret void
-}
-
-define amdgpu_kernel void @kernel1() #0 {
-entry:
- tail call void @foo1()
- ret void
-}
-
-define amdgpu_kernel void @kernel2() #0 {
-entry:
- tail call void @foo2()
- ret void
-}
-
-attributes #0 = { nounwind "target-features"="+wavefrontsize32" }
-attributes #1 = { noinline nounwind "target-features"="+wavefrontsize64" }
"AMDGPUPrintfRuntimeBinding.cpp",
"AMDGPUPromoteAlloca.cpp",
"AMDGPUPromoteKernelArguments.cpp",
- "AMDGPUPropagateAttributes.cpp",
"AMDGPURegBankCombiner.cpp",
"AMDGPURegBankSelect.cpp",
"AMDGPURegisterBankInfo.cpp",