From 77e3e9cafd9642375132e7bb0b13be415872b531 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 9 Sep 2019 18:29:45 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Select llvm.amdgcn.class Also fixes missing SubtargetPredicate on f16 class instructions. llvm-svn: 371436 --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 4 + llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td | 6 +- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 18 +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 2 + llvm/lib/Target/AMDGPU/VOPCInstructions.td | 3 + .../AMDGPU/GlobalISel/inst-select-amdgcn.class.mir | 173 +++++++++++++++++++++ .../GlobalISel/inst-select-amdgcn.class.s16.mir | 98 ++++++++++++ 7 files changed, 303 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 2db0389..a107909 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -34,6 +34,10 @@ def gi_vop3omods : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_vop3omods0clamp0omod : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + def gi_vop3opselmods0 : GIComplexOperandMatcher, GIComplexPatternEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 8f7765b..f205f16 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -141,7 +141,7 @@ def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>; def AMDGPUfp16_zext : SDNode<"AMDGPUISD::FP16_ZEXT" , SDTFPToIntOp>; -def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>; +def AMDGPUfp_class_impl : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>; // out = max(a, b) a and b are floats, where a nan comparison fails. // This is not commutative because this gives the second operand: @@ -438,6 +438,10 @@ def AMDGPUldexp : PatFrags<(ops node:$src0, node:$src1), [(int_amdgcn_ldexp node:$src0, node:$src1), (AMDGPUldexp_impl node:$src0, node:$src1)]>; +def AMDGPUfp_class : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_class node:$src0, node:$src1), + (AMDGPUfp_class_impl node:$src0, node:$src1)]>; + def AMDGPUfmed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2), [(int_amdgcn_fmed3 node:$src0, node:$src1, node:$src2), (AMDGPUfmed3_impl node:$src0, node:$src1, node:$src2)]>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index c14a647..c1c515a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1479,6 +1479,24 @@ AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod }}; } + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const { + MachineRegisterInfo &MRI + = Root.getParent()->getParent()->getParent()->getRegInfo(); + + Register Src; + unsigned Mods; + std::tie(Src, Mods) = selectVOP3ModsImpl(Root.getReg(), MRI); + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod + }}; +} + InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { return {{ diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index a2ba46b..67a5062 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -110,6 +110,8 @@ private: InstructionSelector::ComplexRendererFns selectVOP3Mods0(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns + selectVOP3Mods0Clamp0OMod(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns selectVOP3OMods(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectVOP3Mods(MachineOperand &Root) const; diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index 653094d..e011b0a 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -738,8 +738,11 @@ defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <"v_cmp_class_f32">; defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">; defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <"v_cmp_class_f64">; defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <"v_cmpx_class_f64">; + +let SubtargetPredicate = Has16BitInsts in { defm V_CMP_CLASS_F16 : VOPC_CLASS_F16 <"v_cmp_class_f16">; defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">; +} //===----------------------------------------------------------------------===// // V_ICMPIntrinsic Pattern. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir new file mode 100644 index 0000000..f14898e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.mir @@ -0,0 +1,173 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s + +--- +name: class_s32_vcc_sv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; WAVE64-LABEL: name: class_s32_vcc_sv + ; WAVE64: liveins: $sgpr0, $vgpr0 + ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] + ; WAVE32-LABEL: name: class_s32_vcc_sv + ; WAVE32: liveins: $sgpr0, $vgpr0 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: class_s32_vcc_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; WAVE64-LABEL: name: class_s32_vcc_vs + ; WAVE64: liveins: $sgpr0, $vgpr0 + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE64: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] + ; WAVE32-LABEL: name: class_s32_vcc_vs + ; WAVE32: liveins: $sgpr0, $vgpr0 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE32: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: class_s32_vcc_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; WAVE64-LABEL: name: class_s32_vcc_vv + ; WAVE64: liveins: $vgpr0, $vgpr1 + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] + ; WAVE32-LABEL: name: class_s32_vcc_vv + ; WAVE32: liveins: $vgpr0, $vgpr1 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: class_s64_vcc_sv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + ; WAVE64-LABEL: name: class_s64_vcc_sv + ; WAVE64: liveins: $sgpr0_sgpr1, $vgpr0 + ; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] + ; WAVE32-LABEL: name: class_s64_vcc_sv + ; WAVE32: liveins: $sgpr0_sgpr1, $vgpr0 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:vgpr(s32) = COPY $vgpr0 + %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: class_s64_vcc_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + + ; WAVE64-LABEL: name: class_s64_vcc_vs + ; WAVE64: liveins: $sgpr0_sgpr1, $vgpr0 + ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE64: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] + ; WAVE32-LABEL: name: class_s64_vcc_vs + ; WAVE32: liveins: $sgpr0_sgpr1, $vgpr0 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE32: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:sgpr(s32) = COPY $sgpr0 + %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: class_s64_vcc_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; WAVE64-LABEL: name: class_s64_vcc_vv + ; WAVE64: liveins: $vgpr0_vgpr1, $vgpr2 + ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE64: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] + ; WAVE32-LABEL: name: class_s64_vcc_vv + ; WAVE32: liveins: $vgpr0_vgpr1, $vgpr2 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE32: [[V_CMP_CLASS_F64_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F64_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F64_e64_]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0, %1 + S_ENDPGM 0, implicit %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir new file mode 100644 index 0000000..5443a04 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.class.s16.mir @@ -0,0 +1,98 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE32 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=WAVE64 %s + +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o /dev/null 2>&1 | FileCheck -check-prefix=SI-ERR %s + +# SI-ERR-NOT: remark +# SI-ERR: remark: :0:0: cannot select: %3:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2:sgpr(s16), %1:vgpr(s32) (in function: class_s16_vcc_sv) +# SI-ERR-NEXT: remark: :0:0: cannot select: %3:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2:vgpr(s16), %1:sgpr(s32) (in function: class_s16_vcc_vs) +# SI-ERR-NEXT: remark: :0:0: cannot select: %3:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2:vgpr(s16), %1:vgpr(s32) (in function: class_s16_vcc_vv) +# SI-ERR-NOT: remark + +--- +name: class_s16_vcc_sv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; WAVE32-LABEL: name: class_s16_vcc_sv + ; WAVE32: liveins: $sgpr0, $vgpr0 + ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] + ; WAVE64-LABEL: name: class_s16_vcc_sv + ; WAVE64: liveins: $sgpr0, $vgpr0 + ; WAVE64: $vcc_hi = IMPLICIT_DEF + ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s16) = G_TRUNC %0 + %4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1 + S_ENDPGM 0, implicit %4 +... + +--- +name: class_s16_vcc_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; WAVE32-LABEL: name: class_s16_vcc_vs + ; WAVE32: liveins: $sgpr0, $vgpr0 + ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] + ; WAVE64-LABEL: name: class_s16_vcc_vs + ; WAVE64: liveins: $sgpr0, $vgpr0 + ; WAVE64: $vcc_hi = IMPLICIT_DEF + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s16) = G_TRUNC %0 + %4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1 + S_ENDPGM 0, implicit %4 +... + +--- +name: class_s16_vcc_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; WAVE32-LABEL: name: class_s16_vcc_vv + ; WAVE32: liveins: $vgpr0, $vgpr1 + ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] + ; WAVE64-LABEL: name: class_s16_vcc_vv + ; WAVE64: liveins: $vgpr0, $vgpr1 + ; WAVE64: $vcc_hi = IMPLICIT_DEF + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64: [[V_CMP_CLASS_F16_e64_:%[0-9]+]]:sreg_32_xm0 = V_CMP_CLASS_F16_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_CMP_CLASS_F16_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %4:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %2, %1 + S_ENDPGM 0, implicit %4 +... -- 2.7.4