Added and used new target pseudo for v_cvt_pk_i16_i32, changes due to code review.

author Thomas Symalla <thomas.symalla@amd.com>

Wed, 13 Jan 2021 14:23:45 +0000 (15:23 +0100)

committer Thomas Symalla <thomas.symalla@amd.com>

Tue, 2 Feb 2021 08:14:53 +0000 (09:14 +0100)
author Thomas Symalla <thomas.symalla@amd.com>
Wed, 13 Jan 2021 14:23:45 +0000 (15:23 +0100)
committer Thomas Symalla <thomas.symalla@amd.com>
Tue, 2 Feb 2021 08:14:53 +0000 (09:14 +0100)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td

index bba0373..d1e23e1 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -174,6 +174,8 @@ def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE1, AMDGPUcvt_f32_ubyte1>;
  def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE2, AMDGPUcvt_f32_ubyte2>;
  def : GINodeEquiv<G_AMDGPU_CVT_F32_UBYTE3, AMDGPUcvt_f32_ubyte3>;
  
+def : GINodeEquiv<G_AMDGPU_CVT_PK_I16_I32, AMDGPUpk_i16_i32_impl>;
+
  def : GINodeEquiv<G_AMDGPU_ATOMIC_CMPXCHG, AMDGPUatomic_cmp_swap>;
  def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD, SIbuffer_load>;
  def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_USHORT, SIbuffer_load_ushort>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td

index 894677e..c0cb178 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -213,6 +213,8 @@ def AMDGPUcvt_f32_ubyte2 : SDNode<"AMDGPUISD::CVT_F32_UBYTE2",
  def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3",
    SDTIntToFPOp, []>;
  
+def AMDGPUcvt_pk_i16_i32 : SDNode<"AMDGPUISD::CVT_PK_I16_I32",
+  AMDGPUIntPackOp, []>;
  
  // urecip - This operation is a helper for integer division, it returns the
  // result of 1 / a as a fractional unsigned integer.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp

index 7309a0c..f70fadb 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -12,9 +12,12 @@
  //===----------------------------------------------------------------------===//
  
  <<<<<<< HEAD
+<<<<<<< HEAD
  #include "AMDGPU.h"
  =======
  #include "AMDGPULegalizerInfo.h"
+=======
+>>>>>>> Added and used new target pseudo for v_cvt_pk_i16_i32, changes due to code review.
  #include "AMDGPUTargetMachine.h"
  >>>>>>> Move Combiner to PreLegalize step
  #include "llvm/CodeGen/GlobalISel/Combiner.h"
@@ -70,8 +73,6 @@ bool AMDGPUPreLegalizerCombinerHelper::matchClampI64ToI16(
    if (DstType != LLT::scalar(16))
      return false;
  
-  LLVM_DEBUG(dbgs() << "Matching Clamp i64 to i16\n");
-
    Register Base;
  
    // Try to match a combination of min / max MIR opcodes.
@@ -128,38 +129,33 @@ void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16(
    MRI.setRegClass(Hi32, &AMDGPU::VGPR_32RegClass);
    MRI.setRegClass(Lo32, &AMDGPU::VGPR_32RegClass);
  
-  constexpr unsigned int CvtOpcode = AMDGPU::V_CVT_PK_I16_I32_e64;
-  assert(MI.getOpcode() != CvtOpcode);
-
-  const auto REG_CLASS = &AMDGPU::VGPR_32RegClass;
+  assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
  
-  Register CvtDst = MRI.createVirtualRegister(REG_CLASS);
-  MRI.setType(CvtDst, S32);
+  Register CvtDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+  const LLT V2S16 = LLT::vector(2, 16);
+  MRI.setType(CvtDst, V2S16);
  
-  auto CvtPk = B.buildInstr(CvtOpcode);
-  CvtPk.addDef(CvtDst);
-  CvtPk.addReg(Hi32);
-  CvtPk.addReg(Lo32);
-  CvtPk.setMIFlags(MI.getFlags());
+  B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32,
+    {CvtDst},
+    {Hi32, Lo32},
+    MI.getFlags());
  
    auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
    auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
  
    auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);
-  MRI.setRegClass(MinBoundaryDst.getReg(0), REG_CLASS);
+  MRI.setRegClass(MinBoundaryDst.getReg(0), &AMDGPU::VGPR_32RegClass);
  
    auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);
-  MRI.setRegClass(MaxBoundaryDst.getReg(0), REG_CLASS);
+  MRI.setRegClass(MaxBoundaryDst.getReg(0), &AMDGPU::VGPR_32RegClass);
  
-  Register MedDst = MRI.createVirtualRegister(REG_CLASS);
+  Register MedDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
    MRI.setType(MedDst, S32);
  
-  auto Med = B.buildInstr(AMDGPU::V_MED3_I32);
-  Med.addDef(MedDst);
-  Med.addReg(MinBoundaryDst.getReg(0));
-  Med.addReg(CvtDst);
-  Med.addReg(MaxBoundaryDst.getReg(0));
-  Med.setMIFlags(MI.getFlags());
+  B.buildInstr(AMDGPU::V_MED3_I32,
+    {MedDst},
+    {MinBoundaryDst.getReg(0), CvtDst, MaxBoundaryDst.getReg(0)},
+    MI.getFlags());
    
    Register TruncDst = MRI.createGenericVirtualRegister(LLT::scalar(16));
    B.buildTrunc(TruncDst, MedDst);
@@ -197,10 +193,9 @@ public:
    AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
  
    AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
-                                  const AMDGPULegalizerInfo *LI,
                                    GISelKnownBits *KB, MachineDominatorTree *MDT)
        : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
-                     /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
+                     /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
          KB(KB), MDT(MDT) {
      if (!GeneratedRuleCfg.parseCommandLineOption())
        report_fatal_error("Invalid rule identifier");
@@ -282,16 +277,12 @@ bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
    const Function &F = MF.getFunction();
    bool EnableOpt =
        MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
-      
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  const AMDGPULegalizerInfo *LI =
-      static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
  
    GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
    MachineDominatorTree *MDT =
        IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
    AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
-                                        F.hasMinSize(), LI, KB, MDT);
+                                        F.hasMinSize(), KB, MDT);
    Combiner C(PCInfo, TPC);
    return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
  }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

index 502356d..d63090f 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3621,6 +3621,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
    case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
    case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
    case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
+  case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
      return getDefaultMappingVOP(MI);
    case AMDGPU::G_UMULH:
    case AMDGPU::G_SMULH: {
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td

index ecb875d..59e3cad 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2575,6 +2575,12 @@ def G_AMDGPU_CVT_F32_UBYTE#N : AMDGPUGenericInstruction {
  }
  }
  
+def G_AMDGPU_CVT_PK_I16_I32 : AMDGPUGenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src0, type0:$src1);
+  let hasSideEffects = 0;
+}
+
  // Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
  // operand Expects a MachineMemOperand in addition to explicit
  // operands.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll

index e7d6634..90d4735 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll
@@ -109,4 +109,4 @@ entry:
    %min = call i64 @llvm.smin.i64(i64 %max, i64 0)
    %result = trunc i64 %min to i16
    ret i16 %result
-}
-\ No newline at end of file
+}
author	Thomas Symalla <thomas.symalla@amd.com>
	Wed, 13 Jan 2021 14:23:45 +0000 (15:23 +0100)
committer	Thomas Symalla <thomas.symalla@amd.com>
	Tue, 2 Feb 2021 08:14:53 +0000 (09:14 +0100)
llvm/lib/Target/AMDGPU/AMDGPUGISel.td		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIInstructions.td		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll		patch \| blob \| history