TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){};
bool isVgprRegBank(Register Reg);
+ Register getAsVgpr(Register Reg);
struct MinMaxMedOpc {
unsigned Min, Max, Med;
return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
}
+Register AMDGPURegBankCombinerHelper::getAsVgpr(Register Reg) {
+ if (isVgprRegBank(Reg))
+ return Reg;
+
+ // Search for existing copy of Reg to vgpr.
+ for (MachineInstr &Use : MRI.use_instructions(Reg)) {
+ Register Def = Use.getOperand(0).getReg();
+ if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
+ return Def;
+ }
+
+ // Copy Reg to vgpr.
+ Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0);
+ MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
+ return VgprReg;
+}
+
AMDGPURegBankCombinerHelper::MinMaxMedOpc
AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
switch (Opc) {
Med3MatchInfo &MatchInfo) {
B.setInstrAndDebugLoc(MI);
B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
- {MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2}, MI.getFlags());
+ {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
+ getAsVgpr(MatchInfo.Val2)},
+ MI.getFlags());
MI.eraseFromParent();
}
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
---
; CHECK-LABEL: name: test_min_max_ValK0_K1_i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
- ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 -12
; CHECK-LABEL: name: min_max_ValK0_K1_i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
- ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 -12
; CHECK-LABEL: name: test_min_K1max_ValK0__i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
- ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 -12
; CHECK-LABEL: name: test_min_K1max_K0Val__i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
- ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 -12
; CHECK-LABEL: name: test_max_min_ValK1_K0_i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
- ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
- ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
; CHECK-LABEL: name: test_max_min_K1Val_K0_i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
- ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
- ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
; CHECK-LABEL: name: test_max_K0min_ValK1__i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
- ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
- ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
; CHECK-LABEL: name: test_max_K0min_K1Val__i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
- ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
- ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
; CHECK-LABEL: name: test_max_K0min_K1Val__v2i16
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32)
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
- ; CHECK: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[C1]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
- ; CHECK: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY2]], [[COPY]]
- ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; CHECK: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY3]], [[SMIN]]
- ; CHECK: $vgpr0 = COPY [[SMAX]](<2 x s16>)
- ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[C1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; CHECK-NEXT: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY2]], [[COPY]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY3]], [[SMIN]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x s16>)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%9:sgpr(s32) = G_CONSTANT i32 17
; CHECK-LABEL: name: test_uniform_min_max
; CHECK: liveins: $sgpr2
- ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
- ; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[C]]
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SMAX]], [[C1]]
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[SMIN]](s32)
- ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
- ; CHECK: $sgpr0 = COPY [[INT]](s32)
- ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK-NEXT: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SMAX]], [[C1]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[SMIN]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
+ ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%0:sgpr(s32) = COPY $sgpr2
%3:sgpr(s32) = G_CONSTANT i32 -12
%4:sgpr(s32) = G_SMAX %0, %3
$sgpr0 = COPY %7(s32)
SI_RETURN_TO_EPILOG implicit $sgpr0
...
+
+---
+name: test_non_inline_constant_i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_non_inline_constant_i32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 -12
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_SMAX %0, %7
+ %4:sgpr(s32) = G_CONSTANT i32 65
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_SMIN %3, %8
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
---
; CHECK-LABEL: name: test_min_max_ValK0_K1_u32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
- ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 12
; CHECK-LABEL: name: min_max_ValK0_K1_i32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
- ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 12
; CHECK-LABEL: name: test_min_K1max_ValK0__u32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
- ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 12
; CHECK-LABEL: name: test_min_K1max_K0Val__u32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
- ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 12
; CHECK-LABEL: name: test_max_min_ValK1_K0_u32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
- ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
- ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
; CHECK-LABEL: name: test_max_min_K1Val_K0_u32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
- ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
- ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
; CHECK-LABEL: name: test_max_K0min_ValK1__u32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
- ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
- ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
; CHECK-LABEL: name: test_max_K0min_K1Val__u32
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
- ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
- ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%2:sgpr(s32) = G_CONSTANT i32 17
; CHECK-LABEL: name: test_max_K0min_K1Val__v2u16
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32)
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
- ; CHECK: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[C1]](s32)
- ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
- ; CHECK: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY2]], [[COPY]]
- ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
- ; CHECK: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY3]], [[UMIN]]
- ; CHECK: $vgpr0 = COPY [[UMAX]](<2 x s16>)
- ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
- ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[C1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; CHECK-NEXT: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY2]], [[COPY]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY3]], [[UMIN]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x s16>)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr30_sgpr31
%9:sgpr(s32) = G_CONSTANT i32 17
; CHECK-LABEL: name: test_uniform_min_max
; CHECK: liveins: $sgpr2
- ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
- ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
- ; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[C]]
- ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
- ; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[UMAX]], [[C1]]
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UMIN]](s32)
- ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
- ; CHECK: $sgpr0 = COPY [[INT]](s32)
- ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK-NEXT: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK-NEXT: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[UMAX]], [[C1]]
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UMIN]](s32)
+ ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
+ ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32)
+ ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0
%0:sgpr(s32) = COPY $sgpr2
%3:sgpr(s32) = G_CONSTANT i32 12
%4:sgpr(s32) = G_UMAX %0, %3
SI_RETURN_TO_EPILOG implicit $sgpr0
...
+
+---
+name: test_non_inline_constant_i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_non_inline_constant_i32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]]
+ ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 12
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_UMAX %0, %7
+ %4:sgpr(s32) = G_CONSTANT i32 65
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_UMIN %3, %8
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
define i32 @test_min_max_ValK0_K1_i32(i32 %a) {
+; GFX9-LABEL: test_min_max_ValK0_K1_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_med3_i32 v0, v0, -12, 17
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_min_max_ValK0_K1_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define i32 @min_max_ValK0_K1_i32(i32 %a) {
+; GFX9-LABEL: min_max_ValK0_K1_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_med3_i32 v0, v0, -12, 17
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: min_max_ValK0_K1_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define i32 @test_min_K1max_ValK0__i32(i32 %a) {
+; GFX9-LABEL: test_min_K1max_ValK0__i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_med3_i32 v0, v0, -12, 17
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_min_K1max_ValK0__i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define i32 @test_min_K1max_K0Val__i32(i32 %a) {
+; GFX9-LABEL: test_min_K1max_K0Val__i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_med3_i32 v0, v0, -12, 17
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_min_K1max_K0Val__i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define i32 @test_max_min_ValK1_K0_i32(i32 %a) {
+; GFX9-LABEL: test_max_min_ValK1_K0_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_med3_i32 v0, v0, -12, 17
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_max_min_ValK1_K0_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define i32 @test_max_min_K1Val_K0_i32(i32 %a) {
+; GFX9-LABEL: test_max_min_K1Val_K0_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_med3_i32 v0, v0, -12, 17
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_max_min_K1Val_K0_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define i32 @test_max_K0min_ValK1__i32(i32 %a) {
+; GFX9-LABEL: test_max_K0min_ValK1__i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_med3_i32 v0, v0, -12, 17
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_max_K0min_ValK1__i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define i32 @test_max_K0min_K1Val__i32(i32 %a) {
+; GFX9-LABEL: test_max_K0min_K1Val__i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_med3_i32 v0, v0, -12, 17
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_max_K0min_K1Val__i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define <2 x i16> @test_max_K0min_K1Val__v2i16(<2 x i16> %a) {
+; GFX9-LABEL: test_max_K0min_K1Val__v2i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_min_i16 v0, 17, v0 op_sel_hi:[0,1]
+; GFX9-NEXT: v_pk_max_i16 v0, -12, v0 op_sel_hi:[0,1]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_max_K0min_K1Val__v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) {
+; GFX9-LABEL: test_uniform_min_max:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_max_i32 s0, s2, -12
+; GFX9-NEXT: s_min_i32 s0, s0, 17
+; GFX9-NEXT: ; return to shader part epilog
+;
; GFX10-LABEL: test_uniform_min_max:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_max_i32 s0, s2, -12
ret i32 %smed
}
+define i32 @test_non_inline_constant_i32(i32 %a) {
+; GFX9-LABEL: test_non_inline_constant_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v1, 0x41
+; GFX9-NEXT: v_med3_i32 v0, v0, -12, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: test_non_inline_constant_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_i32 v0, v0, -12, 0x41
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %smax = call i32 @llvm.smax.i32(i32 %a, i32 -12)
+ %smed = call i32 @llvm.smin.i32(i32 %smax, i32 65)
+ ret i32 %smed
+}
+
declare i32 @llvm.smin.i32(i32, i32)
declare i32 @llvm.smax.i32(i32, i32)
declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>)
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
define i32 @test_min_max_ValK0_K1_u32(i32 %a) {
+; GFX9-LABEL: test_min_max_ValK0_K1_u32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_med3_u32 v0, v0, 12, 17
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_min_max_ValK0_K1_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define i32 @min_max_ValK0_K1_i32(i32 %a) {
+; GFX9-LABEL: min_max_ValK0_K1_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_med3_u32 v0, v0, 12, 17
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: min_max_ValK0_K1_i32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define i32 @test_min_K1max_ValK0__u32(i32 %a) {
+; GFX9-LABEL: test_min_K1max_ValK0__u32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_med3_u32 v0, v0, 12, 17
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_min_K1max_ValK0__u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define i32 @test_min_K1max_K0Val__u32(i32 %a) {
+; GFX9-LABEL: test_min_K1max_K0Val__u32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_med3_u32 v0, v0, 12, 17
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_min_K1max_K0Val__u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define i32 @test_max_min_ValK1_K0_u32(i32 %a) {
+; GFX9-LABEL: test_max_min_ValK1_K0_u32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_med3_u32 v0, v0, 12, 17
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_max_min_ValK1_K0_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define i32 @test_max_min_K1Val_K0_u32(i32 %a) {
+; GFX9-LABEL: test_max_min_K1Val_K0_u32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_med3_u32 v0, v0, 12, 17
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_max_min_K1Val_K0_u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define i32 @test_max_K0min_ValK1__u32(i32 %a) {
+; GFX9-LABEL: test_max_K0min_ValK1__u32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_med3_u32 v0, v0, 12, 17
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_max_K0min_ValK1__u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define i32 @test_max_K0min_K1Val__u32(i32 %a) {
+; GFX9-LABEL: test_max_K0min_K1Val__u32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_med3_u32 v0, v0, 12, 17
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_max_K0min_K1Val__u32:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define <2 x i16> @test_max_K0min_K1Val__v2u16(<2 x i16> %a) {
+; GFX9-LABEL: test_max_K0min_K1Val__v2u16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_min_u16 v0, 17, v0 op_sel_hi:[0,1]
+; GFX9-NEXT: v_pk_max_u16 v0, 12, v0 op_sel_hi:[0,1]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
; GFX10-LABEL: test_max_K0min_K1Val__v2u16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
}
define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) {
+; GFX9-LABEL: test_uniform_min_max:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_max_u32 s0, s2, 12
+; GFX9-NEXT: s_min_u32 s0, s0, 17
+; GFX9-NEXT: ; return to shader part epilog
+;
; GFX10-LABEL: test_uniform_min_max:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_max_u32 s0, s2, 12
ret i32 %umed
}
+define i32 @test_non_inline_constant_u32(i32 %a) {
+; GFX9-LABEL: test_non_inline_constant_u32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v1, 0x41
+; GFX9-NEXT: v_med3_u32 v0, v0, 12, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: test_non_inline_constant_u32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_u32 v0, v0, 12, 0x41
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
+ %umed = call i32 @llvm.umin.i32(i32 %umax, i32 65)
+ ret i32 %umed
+}
+
declare i32 @llvm.umin.i32(i32, i32)
declare i32 @llvm.umax.i32(i32, i32)
declare <2 x i16> @llvm.umin.v2i16(<2 x i16>, <2 x i16>)