Add signed and unsigned integer version of med3 combine.
Source pattern is min(max(Val, K0), K1) or max(min(Val, K1), K0)
where K0 and K1 are constants and K0 <= K1. Destination is med3
that corresponds to signedness of min/max in source.
Differential Revision: https://reviews.llvm.org/D90050
[{ return PreLegalizerHelper.matchClampI64ToI16(*${clamp_i64_to_i16}, MRI, *MF, ${matchinfo}); }]),
(apply [{ PreLegalizerHelper.applyClampI64ToI16(*${clamp_i64_to_i16}, ${matchinfo}); }])>;
+def med3_matchdata : GIDefMatchData<"AMDGPURegBankCombinerHelper::Med3MatchInfo">;
+
+def int_minmax_to_med3 : GICombineRule<
+ (defs root:$min_or_max, med3_matchdata:$matchinfo),
+ (match (wip_match_opcode G_SMAX,
+ G_SMIN,
+ G_UMAX,
+ G_UMIN):$min_or_max,
+ [{ return RegBankHelper.matchIntMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]),
+ (apply [{ RegBankHelper.applyMed3(*${min_or_max}, ${matchinfo}); }])>;
+
// Combines which should only apply on SI/VI
def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;
}
def AMDGPURegBankCombinerHelper : GICombinerHelper<
- "AMDGPUGenRegBankCombinerHelper", [zext_trunc_fold]> {
+ "AMDGPUGenRegBankCombinerHelper", [zext_trunc_fold, int_minmax_to_med3]> {
let DisableRuleOption = "amdgpuregbankcombiner-disable-rule";
+ let StateClass = "AMDGPURegBankCombinerHelperState";
+ let AdditionalArguments = [];
}
def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE3, AMDGPUcvt_f32_ubyte3>;
def : GINodeEquiv<G_AMDGPU_CVT_PK_I16_I32, AMDGPUpk_i16_i32_impl>;
-def : GINodeEquiv<G_AMDGPU_MED3, AMDGPUsmed3>;
+def : GINodeEquiv<G_AMDGPU_SMED3, AMDGPUsmed3>;
+def : GINodeEquiv<G_AMDGPU_UMED3, AMDGPUumed3>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD, SIbuffer_load>;
auto Bitcast = B.buildBitcast({S32}, CvtPk);
auto Med3 = B.buildInstr(
- AMDGPU::G_AMDGPU_MED3, {S32},
+ AMDGPU::G_AMDGPU_SMED3, {S32},
{MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
MI.getFlags());
#include "AMDGPU.h"
#include "AMDGPULegalizerInfo.h"
+#include "AMDGPURegisterBankInfo.h"
#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
using namespace llvm;
using namespace MIPatternMatch;
+class AMDGPURegBankCombinerHelper {
+protected:
+ MachineIRBuilder &B;
+ MachineFunction &MF;
+ MachineRegisterInfo &MRI;
+ const RegisterBankInfo &RBI;
+ const TargetRegisterInfo &TRI;
+ CombinerHelper &Helper;
+
+public:
+ AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
+ : B(B), MF(B.getMF()), MRI(*B.getMRI()),
+ RBI(*MF.getSubtarget().getRegBankInfo()),
+ TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){};
+
+ bool isVgprRegBank(Register Reg);
+
+ struct MinMaxMedOpc {
+ unsigned Min, Max, Med;
+ };
+
+ struct Med3MatchInfo {
+ unsigned Opc;
+ Register Val0, Val1, Val2;
+ };
+
+ MinMaxMedOpc getMinMaxPair(unsigned Opc);
+
+ template <class m_Cst>
+ bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
+ Register &Val, Register &K0, Register &K1);
+
+ bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
+ void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
+};
+
+bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
+ return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
+}
+
+AMDGPURegBankCombinerHelper::MinMaxMedOpc
+AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unsupported opcode");
+ case AMDGPU::G_SMAX:
+ case AMDGPU::G_SMIN:
+ return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
+ case AMDGPU::G_UMAX:
+ case AMDGPU::G_UMIN:
+ return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
+ }
+}
+
+template <class m_Cst>
+bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MinMaxMedOpc MMMOpc, Register &Val,
+ Register &K0, Register &K1) {
+ // 4 operand commutes of: min(max(Val, K0), K1).
+ // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
+ // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
+ // 4 operand commutes of: max(min(Val, K1), K0).
+ // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
+ // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
+ return mi_match(
+ MI, MRI,
+ m_any_of(
+ m_CommutativeBinOp(
+ MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
+ m_Cst(K1)),
+ m_CommutativeBinOp(
+ MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
+ m_Cst(K0))));
+}
+
+bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
+ MachineInstr &MI, Med3MatchInfo &MatchInfo) {
+ Register Dst = MI.getOperand(0).getReg();
+ if (!isVgprRegBank(Dst))
+ return false;
+
+ if (MRI.getType(Dst).isVector())
+ return false;
+
+ MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
+ Register Val, K0, K1;
+ // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
+ if (!matchMed<ICstRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
+ return false;
+
+ const APInt &K0_Imm = getConstantIntVRegVal(K0, MRI)->getValue();
+ const APInt &K1_Imm = getConstantIntVRegVal(K1, MRI)->getValue();
+ if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0_Imm.sgt(K1_Imm))
+ return false;
+ if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0_Imm.ugt(K1_Imm))
+ return false;
+
+ MatchInfo = {OpcodeTriple.Med, Val, K0, K1};
+ return true;
+}
+
+void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
+ Med3MatchInfo &MatchInfo) {
+ B.setInstrAndDebugLoc(MI);
+ B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
+ {MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2}, MI.getFlags());
+ MI.eraseFromParent();
+}
+
+class AMDGPURegBankCombinerHelperState {
+protected:
+ CombinerHelper &Helper;
+ AMDGPURegBankCombinerHelper &RegBankHelper;
+
+public:
+ AMDGPURegBankCombinerHelperState(CombinerHelper &Helper,
+ AMDGPURegBankCombinerHelper &RegBankHelper)
+ : Helper(Helper), RegBankHelper(RegBankHelper) {}
+};
#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AMDGPUGenRegBankGICombiner.inc"
MachineInstr &MI,
MachineIRBuilder &B) const {
CombinerHelper Helper(Observer, B, KB, MDT);
- AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg);
+ AMDGPURegBankCombinerHelper RegBankHelper(B, Helper);
+ AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper,
+ RegBankHelper);
- if (Generated.tryCombineAll(Observer, MI, B, Helper))
+ if (Generated.tryCombineAll(Observer, MI, B))
return true;
return false;
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
- case AMDGPU::G_AMDGPU_MED3:
+ case AMDGPU::G_AMDGPU_SMED3:
return getDefaultMappingVOP(MI);
case AMDGPU::G_UMULH:
case AMDGPU::G_SMULH: {
let hasSideEffects = 0;
}
-def G_AMDGPU_MED3 : AMDGPUGenericInstruction {
+def G_AMDGPU_SMED3 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
+ let hasSideEffects = 0;
+}
+
+def G_AMDGPU_UMED3 : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src0, type0:$src1, type0:$src2);
let hasSideEffects = 0;
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: test_min_max_ValK0_K1_i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_min_max_ValK0_K1_i32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 -12
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_SMAX %0, %7
+ %4:sgpr(s32) = G_CONSTANT i32 17
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_SMIN %3, %8
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
+
+---
+name: min_max_ValK0_K1_i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: min_max_ValK0_K1_i32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 -12
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_SMAX %7, %0
+ %4:sgpr(s32) = G_CONSTANT i32 17
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_SMIN %3, %8
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
+
+---
+name: test_min_K1max_ValK0__i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_min_K1max_ValK0__i32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 -12
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_SMAX %0, %7
+ %4:sgpr(s32) = G_CONSTANT i32 17
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_SMIN %8, %3
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
+
+---
+name: test_min_K1max_K0Val__i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_min_K1max_K0Val__i32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C]], [[C1]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 -12
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_SMAX %7, %0
+ %4:sgpr(s32) = G_CONSTANT i32 17
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_SMIN %8, %3
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
+
+---
+name: test_max_min_ValK1_K0_i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_max_min_ValK1_K0_i32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 17
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_SMIN %0, %7
+ %4:sgpr(s32) = G_CONSTANT i32 -12
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_SMAX %3, %8
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
+
+---
+name: test_max_min_K1Val_K0_i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_max_min_K1Val_K0_i32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 17
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_SMIN %7, %0
+ %4:sgpr(s32) = G_CONSTANT i32 -12
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_SMAX %3, %8
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
+
+---
+name: test_max_K0min_ValK1__i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_max_K0min_ValK1__i32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 17
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_SMIN %0, %7
+ %4:sgpr(s32) = G_CONSTANT i32 -12
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_SMAX %8, %3
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
+
+---
+name: test_max_K0min_K1Val__i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_max_K0min_K1Val__i32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[C1]], [[C]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 17
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_SMIN %7, %0
+ %4:sgpr(s32) = G_CONSTANT i32 -12
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_SMAX %8, %3
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
+
+---
+name: test_max_K0min_K1Val__v2i16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_max_K0min_K1Val__v2i16
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[C1]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; CHECK: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY2]], [[COPY]]
+ ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; CHECK: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY3]], [[SMIN]]
+ ; CHECK: $vgpr0 = COPY [[SMAX]](<2 x s16>)
+ ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
+ %0:vgpr(<2 x s16>) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %9:sgpr(s32) = G_CONSTANT i32 17
+ %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32)
+ %10:sgpr(s32) = G_CONSTANT i32 -12
+ %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %10(s32), %10(s32)
+ %11:vgpr(<2 x s16>) = COPY %2(<2 x s16>)
+ %4:vgpr(<2 x s16>) = G_SMIN %11, %0
+ %12:vgpr(<2 x s16>) = COPY %5(<2 x s16>)
+ %7:vgpr(<2 x s16>) = G_SMAX %12, %4
+ $vgpr0 = COPY %7(<2 x s16>)
+ %8:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %8, implicit $vgpr0
+...
+
+---
+name: test_uniform_min_max
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $sgpr2
+
+ ; CHECK-LABEL: name: test_uniform_min_max
+ ; CHECK: liveins: $sgpr2
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12
+ ; CHECK: [[SMAX:%[0-9]+]]:sgpr(s32) = G_SMAX [[COPY]], [[C]]
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[SMIN:%[0-9]+]]:sgpr(s32) = G_SMIN [[SMAX]], [[C1]]
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[SMIN]](s32)
+ ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
+ ; CHECK: $sgpr0 = COPY [[INT]](s32)
+ ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %0:sgpr(s32) = COPY $sgpr2
+ %3:sgpr(s32) = G_CONSTANT i32 -12
+ %4:sgpr(s32) = G_SMAX %0, %3
+ %5:sgpr(s32) = G_CONSTANT i32 17
+ %6:sgpr(s32) = G_SMIN %4, %5
+ %8:vgpr(s32) = COPY %6(s32)
+ %7:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
+ $sgpr0 = COPY %7(s32)
+ SI_RETURN_TO_EPILOG implicit $sgpr0
+...
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: test_min_max_ValK0_K1_u32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_min_max_ValK0_K1_u32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 12
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_UMAX %0, %7
+ %4:sgpr(s32) = G_CONSTANT i32 17
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_UMIN %3, %8
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
+
+---
+name: min_max_ValK0_K1_i32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: min_max_ValK0_K1_i32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 12
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_UMAX %7, %0
+ %4:sgpr(s32) = G_CONSTANT i32 17
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_UMIN %3, %8
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
+
+---
+name: test_min_K1max_ValK0__u32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_min_K1max_ValK0__u32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 12
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_UMAX %0, %7
+ %4:sgpr(s32) = G_CONSTANT i32 17
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_UMIN %8, %3
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
+
+---
+name: test_min_K1max_K0Val__u32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_min_K1max_K0Val__u32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C]], [[C1]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 12
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_UMAX %7, %0
+ %4:sgpr(s32) = G_CONSTANT i32 17
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_UMIN %8, %3
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
+
+---
+name: test_max_min_ValK1_K0_u32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_max_min_ValK1_K0_u32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 17
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_UMIN %0, %7
+ %4:sgpr(s32) = G_CONSTANT i32 12
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_UMAX %3, %8
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
+
+---
+name: test_max_min_K1Val_K0_u32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_max_min_K1Val_K0_u32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 17
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_UMIN %7, %0
+ %4:sgpr(s32) = G_CONSTANT i32 12
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_UMAX %3, %8
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
+
+---
+name: test_max_K0min_ValK1__u32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_max_K0min_ValK1__u32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 17
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_UMIN %0, %7
+ %4:sgpr(s32) = G_CONSTANT i32 12
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_UMAX %8, %3
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
+
+---
+name: test_max_K0min_K1Val__u32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_max_K0min_K1Val__u32
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[C1]], [[C]]
+ ; CHECK: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+ %0:vgpr(s32) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %2:sgpr(s32) = G_CONSTANT i32 17
+ %7:vgpr(s32) = COPY %2(s32)
+ %3:vgpr(s32) = G_UMIN %7, %0
+ %4:sgpr(s32) = G_CONSTANT i32 12
+ %8:vgpr(s32) = COPY %4(s32)
+ %5:vgpr(s32) = G_UMAX %8, %3
+ $vgpr0 = COPY %5(s32)
+ %6:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %6, implicit $vgpr0
+...
+
+---
+name: test_max_K0min_K1Val__v2u16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $vgpr0, $sgpr30_sgpr31
+
+ ; CHECK-LABEL: name: test_max_K0min_K1Val__v2u16
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32)
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C1]](s32), [[C1]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; CHECK: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY2]], [[COPY]]
+ ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
+ ; CHECK: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY3]], [[UMIN]]
+ ; CHECK: $vgpr0 = COPY [[UMAX]](<2 x s16>)
+ ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0
+ %0:vgpr(<2 x s16>) = COPY $vgpr0
+ %1:sgpr_64 = COPY $sgpr30_sgpr31
+ %9:sgpr(s32) = G_CONSTANT i32 17
+ %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32)
+ %10:sgpr(s32) = G_CONSTANT i32 12
+ %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %10(s32), %10(s32)
+ %11:vgpr(<2 x s16>) = COPY %2(<2 x s16>)
+ %4:vgpr(<2 x s16>) = G_UMIN %11, %0
+ %12:vgpr(<2 x s16>) = COPY %5(<2 x s16>)
+ %7:vgpr(<2 x s16>) = G_UMAX %12, %4
+ $vgpr0 = COPY %7(<2 x s16>)
+ %8:ccr_sgpr_64 = COPY %1
+ S_SETPC_B64_return %8, implicit $vgpr0
+...
+
+---
+name: test_uniform_min_max
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $sgpr2
+
+ ; CHECK-LABEL: name: test_uniform_min_max
+ ; CHECK: liveins: $sgpr2
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12
+ ; CHECK: [[UMAX:%[0-9]+]]:sgpr(s32) = G_UMAX [[COPY]], [[C]]
+ ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17
+ ; CHECK: [[UMIN:%[0-9]+]]:sgpr(s32) = G_UMIN [[UMAX]], [[C1]]
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[UMIN]](s32)
+ ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
+ ; CHECK: $sgpr0 = COPY [[INT]](s32)
+ ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0
+ %0:sgpr(s32) = COPY $sgpr2
+ %3:sgpr(s32) = G_CONSTANT i32 12
+ %4:sgpr(s32) = G_UMAX %0, %3
+ %5:sgpr(s32) = G_CONSTANT i32 17
+ %6:sgpr(s32) = G_UMIN %4, %5
+ %8:vgpr(s32) = COPY %6(s32)
+ %7:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %8(s32)
+ $sgpr0 = COPY %7(s32)
+ SI_RETURN_TO_EPILOG implicit $sgpr0
+
+...
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
+
+define i32 @test_min_max_ValK0_K1_i32(i32 %a) {
+; GFX10-LABEL: test_min_max_ValK0_K1_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %smax = call i32 @llvm.smax.i32(i32 %a, i32 -12)
+ %smed = call i32 @llvm.smin.i32(i32 %smax, i32 17)
+ ret i32 %smed
+}
+
+define i32 @min_max_ValK0_K1_i32(i32 %a) {
+; GFX10-LABEL: min_max_ValK0_K1_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %smax = call i32 @llvm.smax.i32(i32 -12, i32 %a)
+ %smed = call i32 @llvm.smin.i32(i32 %smax, i32 17)
+ ret i32 %smed
+}
+
+define i32 @test_min_K1max_ValK0__i32(i32 %a) {
+; GFX10-LABEL: test_min_K1max_ValK0__i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %smax = call i32 @llvm.smax.i32(i32 %a, i32 -12)
+ %smed = call i32 @llvm.smin.i32(i32 17, i32 %smax)
+ ret i32 %smed
+}
+
+define i32 @test_min_K1max_K0Val__i32(i32 %a) {
+; GFX10-LABEL: test_min_K1max_K0Val__i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %smax = call i32 @llvm.smax.i32(i32 -12, i32 %a)
+ %smed = call i32 @llvm.smin.i32(i32 17, i32 %smax)
+ ret i32 %smed
+}
+
+define i32 @test_max_min_ValK1_K0_i32(i32 %a) {
+; GFX10-LABEL: test_max_min_ValK1_K0_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %smin = call i32 @llvm.smin.i32(i32 %a, i32 17)
+ %smed = call i32 @llvm.smax.i32(i32 %smin, i32 -12)
+ ret i32 %smed
+}
+
+define i32 @test_max_min_K1Val_K0_i32(i32 %a) {
+; GFX10-LABEL: test_max_min_K1Val_K0_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %smin = call i32 @llvm.smin.i32(i32 17, i32 %a)
+ %smed = call i32 @llvm.smax.i32(i32 %smin, i32 -12)
+ ret i32 %smed
+}
+
+define i32 @test_max_K0min_ValK1__i32(i32 %a) {
+; GFX10-LABEL: test_max_K0min_ValK1__i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %smin = call i32 @llvm.smin.i32(i32 %a, i32 17)
+ %smed = call i32 @llvm.smax.i32(i32 -12, i32 %smin)
+ ret i32 %smed
+}
+
+define i32 @test_max_K0min_K1Val__i32(i32 %a) {
+; GFX10-LABEL: test_max_K0min_K1Val__i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_i32 v0, v0, -12, 17
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %smin = call i32 @llvm.smin.i32(i32 17, i32 %a)
+ %smed = call i32 @llvm.smax.i32(i32 -12, i32 %smin)
+ ret i32 %smed
+}
+
+define <2 x i16> @test_max_K0min_K1Val__v2i16(<2 x i16> %a) {
+; GFX10-LABEL: test_max_K0min_K1Val__v2i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_pk_min_i16 v0, 17, v0 op_sel_hi:[0,1]
+; GFX10-NEXT: v_pk_max_i16 v0, -12, v0 op_sel_hi:[0,1]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %smin = call <2 x i16> @llvm.smin.v2i16(<2 x i16> <i16 17, i16 17>, <2 x i16> %a)
+ %smed = call <2 x i16> @llvm.smax.v2i16(<2 x i16> <i16 -12, i16 -12>, <2 x i16> %smin)
+ ret <2 x i16> %smed
+}
+
+define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) {
+; GFX10-LABEL: test_uniform_min_max:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_max_i32 s0, s2, -12
+; GFX10-NEXT: s_min_i32 s0, s0, 17
+; GFX10-NEXT: ; return to shader part epilog
+ %smax = call i32 @llvm.smax.i32(i32 %a, i32 -12)
+ %smed = call i32 @llvm.smin.i32(i32 %smax, i32 17)
+ ret i32 %smed
+}
+
+declare i32 @llvm.smin.i32(i32, i32)
+declare i32 @llvm.smax.i32(i32, i32)
+declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>)
+declare <2 x i16> @llvm.smax.v2i16(<2 x i16>, <2 x i16>)
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
+
+define i32 @test_min_max_ValK0_K1_u32(i32 %a) {
+; GFX10-LABEL: test_min_max_ValK0_K1_u32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
+ %umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
+ ret i32 %umed
+}
+
+define i32 @min_max_ValK0_K1_i32(i32 %a) {
+; GFX10-LABEL: min_max_ValK0_K1_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %umax = call i32 @llvm.umax.i32(i32 12, i32 %a)
+ %umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
+ ret i32 %umed
+}
+
+define i32 @test_min_K1max_ValK0__u32(i32 %a) {
+; GFX10-LABEL: test_min_K1max_ValK0__u32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
+ %umed = call i32 @llvm.umin.i32(i32 17, i32 %umax)
+ ret i32 %umed
+}
+
+define i32 @test_min_K1max_K0Val__u32(i32 %a) {
+; GFX10-LABEL: test_min_K1max_K0Val__u32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %umax = call i32 @llvm.umax.i32(i32 12, i32 %a)
+ %umed = call i32 @llvm.umin.i32(i32 17, i32 %umax)
+ ret i32 %umed
+}
+
+define i32 @test_max_min_ValK1_K0_u32(i32 %a) {
+; GFX10-LABEL: test_max_min_ValK1_K0_u32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %umin = call i32 @llvm.umin.i32(i32 %a, i32 17)
+ %umed = call i32 @llvm.umax.i32(i32 %umin, i32 12)
+ ret i32 %umed
+}
+
+define i32 @test_max_min_K1Val_K0_u32(i32 %a) {
+; GFX10-LABEL: test_max_min_K1Val_K0_u32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %umin = call i32 @llvm.umin.i32(i32 17, i32 %a)
+ %umed = call i32 @llvm.umax.i32(i32 %umin, i32 12)
+ ret i32 %umed
+}
+
+define i32 @test_max_K0min_ValK1__u32(i32 %a) {
+; GFX10-LABEL: test_max_K0min_ValK1__u32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %umin = call i32 @llvm.umin.i32(i32 %a, i32 17)
+ %umed = call i32 @llvm.umax.i32(i32 12, i32 %umin)
+ ret i32 %umed
+}
+
+define i32 @test_max_K0min_K1Val__u32(i32 %a) {
+; GFX10-LABEL: test_max_K0min_K1Val__u32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %umin = call i32 @llvm.umin.i32(i32 17, i32 %a)
+ %umed = call i32 @llvm.umax.i32(i32 12, i32 %umin)
+ ret i32 %umed
+}
+
+define <2 x i16> @test_max_K0min_K1Val__v2u16(<2 x i16> %a) {
+; GFX10-LABEL: test_max_K0min_K1Val__v2u16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: v_pk_min_u16 v0, 17, v0 op_sel_hi:[0,1]
+; GFX10-NEXT: v_pk_max_u16 v0, 12, v0 op_sel_hi:[0,1]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %umin = call <2 x i16> @llvm.umin.v2i16(<2 x i16> <i16 17, i16 17>, <2 x i16> %a)
+ %umed = call <2 x i16> @llvm.umax.v2i16(<2 x i16> <i16 12, i16 12>, <2 x i16> %umin)
+ ret <2 x i16> %umed
+}
+
+define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) {
+; GFX10-LABEL: test_uniform_min_max:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_max_u32 s0, s2, 12
+; GFX10-NEXT: s_min_u32 s0, s0, 17
+; GFX10-NEXT: ; return to shader part epilog
+ %umax = call i32 @llvm.umax.i32(i32 %a, i32 12)
+ %umed = call i32 @llvm.umin.i32(i32 %umax, i32 17)
+ ret i32 %umed
+}
+
+declare i32 @llvm.umin.i32(i32, i32)
+declare i32 @llvm.umax.i32(i32, i32)
+declare <2 x i16> @llvm.umin.v2i16(<2 x i16>, <2 x i16>)
+declare <2 x i16> @llvm.umax.v2i16(<2 x i16>, <2 x i16>)