AMDGPU: Add max-mix-insts subtarget feature

author Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 25 Oct 2017 07:00:51 +0000 (07:00 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 25 Oct 2017 07:00:51 +0000 (07:00 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 25 Oct 2017 07:00:51 +0000 (07:00 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 25 Oct 2017 07:00:51 +0000 (07:00 +0000)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td

index ba936dd..deaf839 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -109,6 +109,12 @@ def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
    "Has Memory Aperture Base and Size Registers"
  >;
  
+def FeatureMadMixInsts : SubtargetFeature<"mad-mix-insts",
+  "HasMadMixInsts",
+  "true",
+  "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions"
+>;
+
  // XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
  // XNACK. The current default kernel driver setting is:
  // - graphics ring: XNACK disabled
@@ -553,19 +559,25 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
  
  def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,
    [FeatureGFX9,
-   FeatureLDSBankCount32]>;
+   FeatureMadMixInsts,
+   FeatureLDSBankCount32
+   ]>;
  
  def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,
    [FeatureGFX9,
+   FeatureMadMixInsts,
     FeatureLDSBankCount32,
     FeatureXNACK]>;
  
  def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2,
    [FeatureGFX9,
-   FeatureLDSBankCount32]>;
+   FeatureMadMixInsts,
+   FeatureLDSBankCount32
+   ]>;
  
  def FeatureISAVersion9_0_3 : SubtargetFeatureISAVersion <9,0,3,
    [FeatureGFX9,
+   FeatureMadMixInsts,
     FeatureLDSBankCount32,
     FeatureXNACK]>;
  
@@ -728,8 +740,8 @@ def HasDPP : Predicate<"Subtarget->hasDPP()">,
  def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
    AssemblerPredicate<"FeatureIntClamp">;
  
-def HasMadMix : Predicate<"Subtarget->hasMadMixInsts()">,
-  AssemblerPredicate<"FeatureGFX9Insts">;
+def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">,
+  AssemblerPredicate<"FeatureMadMixInsts">;
  
  def EnableLateCFGStructurize : Predicate<
    "EnableLateStructurizeCFG">;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

index 86431fe..c378974 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -138,6 +138,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
      Has16BitInsts(false),
      HasIntClamp(false),
      HasVOP3PInsts(false),
+    HasMadMixInsts(false),
      HasMovrel(false),
      HasVGPRIndexMode(false),
      HasScalarStores(false),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

index 99c525e..56a5fa6 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -148,6 +148,7 @@ protected:
    bool Has16BitInsts;
    bool HasIntClamp;
    bool HasVOP3PInsts;
+  bool HasMadMixInsts;
    bool HasMovrel;
    bool HasVGPRIndexMode;
    bool HasScalarStores;
@@ -319,7 +320,7 @@ public:
    }
  
    bool hasMadMixInsts() const {
-    return getGeneration() >= GFX9;
+    return HasMadMixInsts;
    }
  
    bool hasCARRY() const {
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td

index 85a56db..eeee8b3 100644 (file)
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -68,6 +68,8 @@ def V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I1
  def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, ashr_rev>;
  def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshr_rev>;
  
+
+let SubtargetPredicate = HasMadMixInsts in {
  // These are VOP3a-like opcodes which accept no omod.
  // Size of src arguments (16/32) is controlled by op_sel.
  // For 16-bit src arguments their location (hi/lo) are controlled by op_sel_hi.
@@ -82,8 +84,6 @@ def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_F16_F16
  }
  }
  
-let OtherPredicates = [HasMadMix] in {
-
  def : GCNPat <
    (f16 (fpround (fmad (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
                        (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
@@ -141,7 +141,7 @@ def : GCNPat <
                                             (i32 (IMPLICIT_DEF)))))
  >;
  
-} // End Predicates = [HasMadMix]
+} // End SubtargetPredicate = [HasMadMixInsts]
  
  multiclass VOP3P_Real_vi<bits<10> op> {
    def _vi : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.VI>,
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 25 Oct 2017 07:00:51 +0000 (07:00 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 25 Oct 2017 07:00:51 +0000 (07:00 +0000)
llvm/lib/Target/AMDGPU/AMDGPU.td		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h		patch \| blob \| history
llvm/lib/Target/AMDGPU/VOP3PInstructions.td		patch \| blob \| history