#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Target/TargetMachine.h"
};
// Attributes to propagate.
+// TODO: Support conservative min/max merging instead of cloning.
static constexpr const char* AttributeNames[] = {
- "amdgpu-waves-per-eu"
+ "amdgpu-waves-per-eu",
+ "amdgpu-flat-work-group-size"
};
static constexpr unsigned NumAttr =
}
bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
- if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+ if (!TM) {
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ TM = &TPC->getTM<TargetMachine>();
+ }
+
+ if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
return false;
return AMDGPUPropagateAttributes(TM, false).process(F);
}
bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
- if (!TM)
- return false;
+ if (!TM) {
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ TM = &TPC->getTM<TargetMachine>();
+ }
return AMDGPUPropagateAttributes(TM, true).process(M);
}
--- /dev/null
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-late %s | FileCheck %s
+
+; CHECK: define internal void @max_flat_1_1024() #0 {
+define internal void @max_flat_1_1024() #0 {
+ ret void
+}
+
+; CHECK: define internal void @max_flat_1_256() #1 {
+define internal void @max_flat_1_256() #1 {
+ ret void
+}
+
+; CHECK: define amdgpu_kernel void @kernel_1_256_call_default() #1 {
+define amdgpu_kernel void @kernel_1_256_call_default() #1 {
+ call void @default()
+ ret void
+}
+
+; CHECK: define amdgpu_kernel void @kernel_1_256_call_1_256() #1 {
+define amdgpu_kernel void @kernel_1_256_call_1_256() #1 {
+ call void @max_flat_1_256()
+ ret void
+}
+
+; CHECK: define amdgpu_kernel void @kernel_1_256_call_64_64() #1 {
+define amdgpu_kernel void @kernel_1_256_call_64_64() #1 {
+ call void @max_flat_64_64()
+ ret void
+}
+
+; CHECK: define internal void @max_flat_64_64() #2 {
+define internal void @max_flat_64_64() #2 {
+ ret void
+}
+
+; CHECK: define internal void @default() #2 {
+define internal void @default() #3 {
+ ret void
+}
+
+attributes #0 = { noinline "amdgpu-flat-work-group-size"="1,1024" }
+attributes #1 = { noinline "amdgpu-flat-work-group-size"="1,256" }
+attributes #2 = { noinline "amdgpu-flat-work-group-size"="64,64" }
+attributes #3 = { noinline }
+
+; CHECK: attributes #0 = { noinline "amdgpu-flat-work-group-size"="1,1024"
+; CHECK-NEXT: attributes #1 = { noinline "amdgpu-flat-work-group-size"="1,256"
+; CHECK-NEXT: attributes #2 = { noinline "amdgpu-flat-work-group-size"="1,256"