auto &CI = FAM.getResult<CycleAnalysis>(F);
UniformityInfo UI{F, DT, CI, &TTI};
// Skip computation if we can assume everything is uniform.
- if (TTI.hasBranchDivergence())
+ if (TTI.hasBranchDivergence(&F))
UI.compute();
return UI;
UniformityInfo{F, domTree, cycleInfo, &targetTransformInfo};
// Skip computation if we can assume everything is uniform.
- if (targetTransformInfo.hasBranchDivergence())
+ if (targetTransformInfo.hasBranchDivergence(m_function))
m_uniformityInfo.compute();
return false;
#include "AMDGPURegBankSelect.h"
#include "AMDGPU.h"
+#include "GCNSubtarget.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
#include "llvm/InitializePasses.h"
assert(checkFunctionIsLegal(MF));
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
MachineCycleInfo &CycleInfo =
getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
MachineDominatorTree &DomTree = getAnalysis<MachineDominatorTree>();
- // TODO: Check for single lane execution.
MachineUniformityInfo Uniformity =
- computeMachineUniformityInfo(MF, CycleInfo, DomTree.getBase(), true);
+ computeMachineUniformityInfo(MF, CycleInfo, DomTree.getBase(),
+ !ST.isSingleLaneExecution(F));
(void)Uniformity; // TODO: Use this
assignRegisterBanks(MF);
ret void
}
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_singlethreaded':
+; CHECK-NOT: DIVERGENT
+define amdgpu_kernel void @workitem_id_x_singlethreaded() #2 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_y_singlethreaded':
+; CHECK-NOT: DIVERGENT
+define amdgpu_kernel void @workitem_id_y_singlethreaded() #2 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.y()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_z_singlethreaded':
+; CHECK-NOT: DIVERGENT
+define amdgpu_kernel void @workitem_id_z_singlethreaded() #2 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.y()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_singlethreaded_md':
+; CHECK-NOT: DIVERGENT
+define amdgpu_kernel void @workitem_id_x_singlethreaded_md() !reqd_work_group_size !0 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_y_singlethreaded_md':
+; CHECK-NOT: DIVERGENT
+define amdgpu_kernel void @workitem_id_y_singlethreaded_md() !reqd_work_group_size !0 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.y()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_z_singlethreaded_md':
+; CHECK-NOT: DIVERGENT
+define amdgpu_kernel void @workitem_id_z_singlethreaded_md() !reqd_work_group_size !0 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.y()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_not_singlethreaded_dimx':
+; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimx() !reqd_work_group_size !1 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_not_singlethreaded_dimy':
+; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimy() !reqd_work_group_size !2 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
+; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_not_singlethreaded_dimz':
+; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimz() !reqd_work_group_size !3 {
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ store volatile i32 %id.x, ptr addrspace(1) undef
+ ret void
+}
+
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
+attributes #2 = { "amdgpu-flat-work-group-size"="1,1" }
+
+!0 = !{i32 1, i32 1, i32 1}
+!1 = !{i32 2, i32 1, i32 1}
+!2 = !{i32 1, i32 2, i32 1}
+!3 = !{i32 1, i32 1, i32 2}