AMDGPU: Partially directly select llvm.amdgcn.interp.p1.f16

author Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 31 Dec 2019 20:28:41 +0000 (15:28 -0500)

committer Matt Arsenault <arsenm2@gmail.com>

Wed, 15 Jan 2020 13:58:58 +0000 (08:58 -0500)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 31 Dec 2019 20:28:41 +0000 (15:28 -0500)
committer Matt Arsenault <arsenm2@gmail.com>
Wed, 15 Jan 2020 13:58:58 +0000 (08:58 -0500)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp

index e73d87c..52bee39 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5907,10 +5907,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
    case Intrinsic::amdgcn_fdiv_fast:
      return lowerFDIV_FAST(Op, DAG);
    case Intrinsic::amdgcn_interp_p1_f16: {
-    SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
-                                    Op.getOperand(5), SDValue());
      if (getSubtarget()->getLDSBankCount() == 16) {
        // 16 bank LDS
+      SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
+                                      Op.getOperand(5), SDValue());
  
        // FIXME: This implicitly will insert a second CopyToReg to M0.
        SDValue S = DAG.getNode(
@@ -5930,23 +5930,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
          DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
          Op.getOperand(4), // high
          DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
-        DAG.getTargetConstant(0, DL, MVT::i32) // $omod
-      };
-      return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops);
-    } else {
-      // 32 bank LDS
-      SDValue Ops[] = {
-        Op.getOperand(1), // Src0
-        Op.getOperand(2), // Attrchan
-        Op.getOperand(3), // Attr
-        DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
-        Op.getOperand(4), // high
-        DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
          DAG.getTargetConstant(0, DL, MVT::i32), // $omod
          ToM0.getValue(1)
        };
-      return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops);
+      return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops);
      }
+
+    return SDValue();
    }
    case Intrinsic::amdgcn_sin:
      return DAG.getNode(AMDGPUISD::SIN_HW, DL, VT, Op.getOperand(1));
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td

index 67c8b92..1fa6aaf 100644 (file)
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -229,7 +229,7 @@ class VOP3Interp<string OpName, VOPProfile P, list<dag> pattern = []> :
  def VOP3_INTERP : VOPProfile<[f32, f32, i32, untyped]> {
    let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
                     Attr:$attr, AttrChan:$attrchan,
-                   clampmod:$clamp, omod:$omod);
+                   clampmod0:$clamp, omod0:$omod);
  
    let Asm64 = "$vdst, $src0_modifiers, $attr$attrchan$clamp$omod";
  }
@@ -237,7 +237,7 @@ def VOP3_INTERP : VOPProfile<[f32, f32, i32, untyped]> {
  def VOP3_INTERP_MOV : VOPProfile<[f32, i32, i32, untyped]> {
    let Ins64 = (ins InterpSlot:$src0,
                     Attr:$attr, AttrChan:$attrchan,
-                   clampmod:$clamp, omod:$omod);
+                   clampmod0:$clamp, omod0:$omod);
  
    let Asm64 = "$vdst, $src0, $attr$attrchan$clamp$omod";
  
@@ -480,12 +480,17 @@ def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f1
  
  let Uses = [M0, EXEC], FPDPRounding = 1 in {
  def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>,
-       [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 timm:$attrchan),
-                                                          (i32 timm:$attr),
-                                                          (i32 timm:$src0_modifiers),
-                                                          (i1 timm:$high),
-                                                          (i1 timm:$clamp),
-                                                          (i32 timm:$omod)))]>;
+       [(set f32:$vdst, (int_amdgcn_interp_p1_f16 (VOP3Mods f32:$src0, i32:$src0_modifiers),
+                                                  (i32 timm:$attrchan),
+                                                  (i32 timm:$attr),
+                                                  (i1 timm:$high), M0))]> {
+  // This predicate should only apply to the selection pattern. The
+  // instruction still exists and should decode on subtargets with
+  // other bank counts.
+  let OtherPredicates = [has32BankLDS];
+}
+
+
  def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>,
         [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 timm:$attrchan),
                                                            (i32 timm:$attr),
diff --git a/llvm/test/MC/AMDGPU/vop3.s b/llvm/test/MC/AMDGPU/vop3.s

index 8c26f97..34343f7 100644 (file)
--- a/llvm/test/MC/AMDGPU/vop3.s
+++ b/llvm/test/MC/AMDGPU/vop3.s
@@ -2,6 +2,9 @@
  // RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s | FileCheck %s --check-prefix=CI
  // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI
  
+// Make sure interp instructions disassemble regardless of lds bank count
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx810 -show-encoding %s | FileCheck %s --check-prefix=VI
+
  // RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI --check-prefix=NOSICI
  // RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI
  // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 31 Dec 2019 20:28:41 +0000 (15:28 -0500)
committer	Matt Arsenault <arsenm2@gmail.com>
	Wed, 15 Jan 2020 13:58:58 +0000 (08:58 -0500)
llvm/lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/VOP3Instructions.td		patch \| blob \| history
llvm/test/MC/AMDGPU/vop3.s		patch \| blob \| history