From d6c1f5bb154a0b524b92d15b99a882d654f906ce Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 9 Sep 2019 18:29:37 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Select fmed3 llvm-svn: 371435 --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 + llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td | 6 +- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 19 ++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 5 + .../AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir | 205 +++++++++++++++++++++ .../GlobalISel/inst-select-amdgcn.fmed3.s16.mir | 61 ++++++ 6 files changed, 303 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 01441bf..2db0389 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -34,6 +34,14 @@ def gi_vop3omods : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_vop3opselmods0 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def gi_vop3opselmods : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + def gi_smrd_imm : GIComplexOperandMatcher, GIComplexPatternEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 703d597..8f7765b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -319,7 +319,7 @@ def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp, [] >; -def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>; +def AMDGPUfmed3_impl : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>; def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2", SDTypeProfile<1, 4, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>, @@ -437,3 +437,7 @@ def AMDGPUfract : PatFrags<(ops node:$src), [(int_amdgcn_fract node:$src), def AMDGPUldexp : PatFrags<(ops node:$src0, node:$src1), [(int_amdgcn_ldexp node:$src0, node:$src1), (AMDGPUldexp_impl node:$src0, node:$src1)]>; + +def AMDGPUfmed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2), + [(int_amdgcn_fmed3 node:$src0, node:$src1, node:$src2), + (AMDGPUfmed3_impl node:$src0, node:$src1, node:$src2)]>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index aea9ad8..c14a647 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1504,6 +1504,25 @@ AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { } InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVOP3OpSelMods0(MachineOperand &Root) const { + // FIXME: Handle clamp and op_sel + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // src_mods + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // clamp + }}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const { + // FIXME: Handle op_sel + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // src_mods + }}; +} + +InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { MachineRegisterInfo &MRI = Root.getParent()->getParent()->getParent()->getRegInfo(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 7d2538d..a2ba46b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -115,6 +115,11 @@ private: selectVOP3Mods(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns + selectVOP3OpSelMods0(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectVOP3OpSelMods(MachineOperand &Root) const; + + InstructionSelector::ComplexRendererFns selectSmrdImm(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectSmrdImm32(MachineOperand &Root) const; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir new file mode 100644 index 0000000..4166ff66 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.mir @@ -0,0 +1,205 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +--- +name: fmed3_s32_vvvv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GCN-LABEL: name: fmed3_s32_vvvv + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: fmed3_s32_vsvv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; GCN-LABEL: name: fmed3_s32_vsvv + ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = COPY $vgpr1 + %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: fmed3_s32_vvsv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; GCN-LABEL: name: fmed3_s32_vvsv + ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s32) = COPY $vgpr1 + %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: fmed3_s32_vvvs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; GCN-LABEL: name: fmed3_s32_vvvs + ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s32) = COPY $sgpr0 + %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2 + S_ENDPGM 0, implicit %3 +... + + +# Same SGPR used, so doesn't violate the constant bus restriction. +--- +name: fmed3_s32_vssv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; GCN-LABEL: name: fmed3_s32_vssv + ; GCN: liveins: $sgpr0, $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: fmed3_s32_vsvs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; GCN-LABEL: name: fmed3_s32_vsvs + ; GCN: liveins: $sgpr0, $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %0 + S_ENDPGM 0, implicit %2 +... + +--- +name: fmed3_s32_vvss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; GCN-LABEL: name: fmed3_s32_vvss + ; GCN: liveins: $sgpr0, $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY1]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %1, %0, %0 + S_ENDPGM 0, implicit %2 +... + +--- +name: fmed3_s32_vsss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; GCN-LABEL: name: fmed3_s32_vsss + ; GCN: liveins: $sgpr0, $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[V_MED3_F32_:%[0-9]+]]:vgpr_32 = V_MED3_F32 0, [[COPY]], 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %0, %0 + S_ENDPGM 0, implicit %1 +... + + +# FIXME: This should probably have been fixed by RegBankSelect, but we should fail to select it. +# --- +# name: fmed3_s32_vssv_constant_bus_violation +# legalized: true +# regBankSelected: true +# tracksRegLiveness: true + +# body: | +# bb.0: +# liveins: $sgpr0, $sgpr1, $vgpr0 + +# %0:sgpr(s32) = COPY $sgpr0 +# %1:sgpr(s32) = COPY $sgpr1 +# %2:vgpr(s32) = COPY $vgpr0 +# %3:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %0, %1, %2 +# S_ENDPGM 0, implicit %3 +# ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir new file mode 100644 index 0000000..2c9e079 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir @@ -0,0 +1,61 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=VI-ERR %s + +# VI-ERR-NOT: remark +# VI-ERR: remark: :0:0: cannot select: %6:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3:vgpr(s16), %4:vgpr(s16), %5:vgpr(s16) (in function: fmed3_s16_vvvv) +# VI-ERR-NEXT: remark: :0:0: cannot select: %6:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3:sgpr(s16), %4:vgpr(s16), %5:vgpr(s16) (in function: fmed3_s16_vsvv) +# VI-ERR-NOT: remark +--- +name: fmed3_s16_vvvv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GCN-LABEL: name: fmed3_s16_vvvv + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN: [[V_MED3_F16_:%[0-9]+]]:vgpr_32 = V_MED3_F16 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F16_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + %6:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3, %4, %5 + S_ENDPGM 0, implicit %6 +... + +--- +name: fmed3_s16_vsvv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; GCN-LABEL: name: fmed3_s16_vsvv + ; GCN: liveins: $sgpr0, $vgpr0, $vgpr1 + ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[V_MED3_F16_:%[0-9]+]]:vgpr_32 = V_MED3_F16 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_MED3_F16_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = COPY $vgpr1 + %3:sgpr(s16) = G_TRUNC %0 + %4:vgpr(s16) = G_TRUNC %1 + %5:vgpr(s16) = G_TRUNC %2 + %6:vgpr(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.fmed3), %3, %4, %5 + S_ENDPGM 0, implicit %6 +... -- 2.7.4