AMDGPU/GlobalISel: Fix add of neg inline constant pattern
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Tue, 7 Jan 2020 16:29:05 +0000 (11:29 -0500)
committerMatt Arsenault <arsenm2@gmail.com>
Thu, 9 Jan 2020 15:29:31 +0000 (10:29 -0500)
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir

index d7c211f..0583610 100644 (file)
@@ -204,3 +204,6 @@ foreach Ty = [i64, p0, p1, p4] in {
 
 def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">,
   GISDNodeXFormEquiv<as_i32timm>;
+
+def gi_NegateImm : GICustomOperandRenderer<"renderNegateImm">,
+  GISDNodeXFormEquiv<NegateImm>;
index a41c8f1..a632e7a 100644 (file)
@@ -2097,6 +2097,12 @@ void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
   MIB.addImm(CstVal.getValue());
 }
 
+void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB,
+                                                const MachineInstr &MI) const {
+  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+  MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue());
+}
+
 bool AMDGPUInstructionSelector::isInlineImmediate16(int64_t Imm) const {
   return AMDGPU::isInlinableLiteral16(Imm, STI.hasInv2PiInlineImm());
 }
index 45782ab..8d44c58 100644 (file)
@@ -169,6 +169,9 @@ private:
   void renderTruncImm32(MachineInstrBuilder &MIB,
                         const MachineInstr &MI) const;
 
+  void renderNegateImm(MachineInstrBuilder &MIB,
+                       const MachineInstr &MI) const;
+
   bool isInlineImmediate16(int64_t Imm) const;
   bool isInlineImmediate32(int64_t Imm) const;
   bool isInlineImmediate64(int64_t Imm) const;
index dcc139a..4c81979 100644 (file)
@@ -1930,9 +1930,22 @@ def : GCNPat <
 // TODO: Also do for 64-bit.
 def : GCNPat<
   (add i32:$src0, (i32 NegSubInlineConst32:$src1)),
-  (S_SUB_I32 $src0, NegSubInlineConst32:$src1)
+  (S_SUB_I32 SReg_32:$src0, NegSubInlineConst32:$src1)
 >;
 
+def : GCNPat<
+  (add i32:$src0, (i32 NegSubInlineConst32:$src1)),
+  (V_SUB_U32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> {
+  let SubtargetPredicate = HasAddNoCarryInsts;
+}
+
+def : GCNPat<
+  (add i32:$src0, (i32 NegSubInlineConst32:$src1)),
+  (V_SUB_I32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> {
+  let SubtargetPredicate = NotHasAddNoCarryInsts;
+}
+
+
 // Avoid pointlessly materializing a constant in VGPR.
 // FIXME: Should also do this for readlane, but tablegen crashes on
 // the ignored src1.
index 4393113..b0f9cc5 100644 (file)
@@ -53,3 +53,116 @@ body: |
     S_ENDPGM 0, implicit %9
 
 ...
+
+---
+name:            add_neg_inline_const_64_to_sub_s32_s
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+
+    ; GFX6-LABEL: name: add_neg_inline_const_64_to_sub_s32_s
+    ; GFX6: liveins: $sgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX6: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], 64, implicit-def $scc
+    ; GFX6: S_ENDPGM 0, implicit [[S_SUB_I32_]]
+    ; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_s
+    ; GFX9: liveins: $sgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX9: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], 64, implicit-def $scc
+    ; GFX9: S_ENDPGM 0, implicit [[S_SUB_I32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = G_CONSTANT i32 -64
+    %2:sgpr(s32) = G_ADD %0, %1
+    S_ENDPGM 0, implicit %2
+
+...
+
+---
+name:            add_neg_inline_const_64_to_sub_s32_v
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GFX6-LABEL: name: add_neg_inline_const_64_to_sub_s32_v
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967232, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit %2
+    ; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_v
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_SUB_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[COPY]], 64, 0, implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_SUB_U32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = G_CONSTANT i32 -64
+    %2:vgpr(s32) = G_ADD %0, %1
+    S_ENDPGM 0, implicit %2
+
+...
+
+---
+name:            add_neg_inline_const_16_to_sub_s32_s
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+
+    ; GFX6-LABEL: name: add_neg_inline_const_16_to_sub_s32_s
+    ; GFX6: liveins: $sgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+    ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+    ; GFX6: S_ENDPGM 0, implicit [[S_ADD_U32_]]
+    ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_s
+    ; GFX9: liveins: $sgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
+    ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
+    ; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = G_CONSTANT i32 16
+    %2:sgpr(s32) = G_ADD %0, %1
+    S_ENDPGM 0, implicit %2
+
+...
+
+---
+name:            add_neg_inline_const_16_to_sub_s32_v
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GFX6-LABEL: name: add_neg_inline_const_16_to_sub_s32_v
+    ; GFX6: liveins: $vgpr0
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit %2
+    ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_v
+    ; GFX9: liveins: $vgpr0
+    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
+    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = G_CONSTANT i32 16
+    %2:vgpr(s32) = G_ADD %0, %1
+    S_ENDPGM 0, implicit %2
+
+...