AMDGPU: Add feature for fast f32 denormals
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Wed, 15 Aug 2018 19:45:04 +0000 (22:45 +0300)
committerMatt Arsenault <arsenm2@gmail.com>
Sun, 5 Apr 2020 00:01:24 +0000 (20:01 -0400)
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

index 1f10657..9785252 100644 (file)
@@ -33,6 +33,12 @@ def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
   "Assuming f32 fma is at least as fast as mul + add"
 >;
 
+def FeatureFastDenormalF32 : SubtargetFeature<"fast-denormal-f32",
+  "FastDenormalF32",
+  "true",
+  "Enabling denormals does not cause f32 instructions to run at f64 rates"
+>;
+
 def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128",
   "MIMG_R128",
   "true",
@@ -632,7 +638,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
    FeatureScalarStores, FeatureInv2PiInlineImm,
    FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
    FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC,
-   FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts
+   FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, FeatureFastDenormalF32
   ]
 >;
 
@@ -647,8 +653,8 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
    FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
    FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
    FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
-   FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16
-  ]
+   FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
+   FeatureFastDenormalF32]
 >;
 
 def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
@@ -665,7 +671,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
    FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
    FeatureVOP3Literal, FeatureDPP8,
    FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC,
-   FeatureGFX10A16
+   FeatureGFX10A16, FeatureFastDenormalF32
   ]
 >;
 
index df4c630..91c1bb4 100644 (file)
@@ -178,6 +178,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     MaxPrivateElementSize(0),
 
     FastFMAF32(false),
+    FastDenormalF32(false),
     HalfRate64Ops(false),
 
     FlatForGlobal(false),
index c565c17..cadb328 100644 (file)
@@ -291,6 +291,7 @@ protected:
 
   // Possibly statically set by tablegen, but may want to be overridden.
   bool FastFMAF32;
+  bool FastDenormalF32;
   bool HalfRate64Ops;
 
   // Dynamially set bits that enable features.