From: Matt Arsenault Date: Tue, 7 Jan 2020 16:29:05 +0000 (-0500) Subject: AMDGPU/GlobalISel: Fix add of neg inline constant pattern X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3952748ffdf017f83faddcb1240cb36cb4bb9c5b;p=platform%2Fupstream%2Fllvm.git AMDGPU/GlobalISel: Fix add of neg inline constant pattern --- diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index d7c211f..0583610 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -204,3 +204,6 @@ foreach Ty = [i64, p0, p1, p4] in { def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">, GISDNodeXFormEquiv; + +def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">, + GISDNodeXFormEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index a41c8f1..a632e7a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2097,6 +2097,12 @@ void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB, MIB.addImm(CstVal.getValue()); } +void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB, + const MachineInstr &MI) const { + assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); + MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue()); +} + bool AMDGPUInstructionSelector::isInlineImmediate16(int64_t Imm) const { return AMDGPU::isInlinableLiteral16(Imm, STI.hasInv2PiInlineImm()); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 45782ab..8d44c58 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -169,6 +169,9 @@ private: void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI) const; + void renderNegateImm(MachineInstrBuilder &MIB, + const MachineInstr &MI) const; + bool isInlineImmediate16(int64_t Imm) const; bool isInlineImmediate32(int64_t Imm) const; bool isInlineImmediate64(int64_t Imm) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index dcc139a..4c81979 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1930,9 +1930,22 @@ def : GCNPat < // TODO: Also do for 64-bit. def : GCNPat< (add i32:$src0, (i32 NegSubInlineConst32:$src1)), - (S_SUB_I32 $src0, NegSubInlineConst32:$src1) + (S_SUB_I32 SReg_32:$src0, NegSubInlineConst32:$src1) >; +def : GCNPat< + (add i32:$src0, (i32 NegSubInlineConst32:$src1)), + (V_SUB_U32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> { + let SubtargetPredicate = HasAddNoCarryInsts; +} + +def : GCNPat< + (add i32:$src0, (i32 NegSubInlineConst32:$src1)), + (V_SUB_I32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> { + let SubtargetPredicate = NotHasAddNoCarryInsts; +} + + // Avoid pointlessly materializing a constant in VGPR. // FIXME: Should also do this for readlane, but tablegen crashes on // the ignored src1. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir index 4393113..b0f9cc5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir @@ -53,3 +53,116 @@ body: | S_ENDPGM 0, implicit %9 ... + +--- +name: add_neg_inline_const_64_to_sub_s32_s +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GFX6-LABEL: name: add_neg_inline_const_64_to_sub_s32_s + ; GFX6: liveins: $sgpr0 + ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], 64, implicit-def $scc + ; GFX6: S_ENDPGM 0, implicit [[S_SUB_I32_]] + ; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_s + ; GFX9: liveins: $sgpr0 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], 64, implicit-def $scc + ; GFX9: S_ENDPGM 0, implicit [[S_SUB_I32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_CONSTANT i32 -64 + %2:sgpr(s32) = G_ADD %0, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: add_neg_inline_const_64_to_sub_s32_v +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: add_neg_inline_const_64_to_sub_s32_v + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967232, implicit $exec + ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: S_ENDPGM 0, implicit %2 + ; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_v + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY]], 64, 0, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_SUB_U32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 -64 + %2:vgpr(s32) = G_ADD %0, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: add_neg_inline_const_16_to_sub_s32_s +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GFX6-LABEL: name: add_neg_inline_const_16_to_sub_s32_s + ; GFX6: liveins: $sgpr0 + ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6: S_ENDPGM 0, implicit [[S_ADD_U32_]] + ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_s + ; GFX9: liveins: $sgpr0 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc + ; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_CONSTANT i32 16 + %2:sgpr(s32) = G_ADD %0, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: add_neg_inline_const_16_to_sub_s32_v +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: add_neg_inline_const_16_to_sub_s32_v + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec + ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: S_ENDPGM 0, implicit %2 + ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_v + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 16 + %2:vgpr(s32) = G_ADD %0, %1 + S_ENDPGM 0, implicit %2 + +...