From 221890d709276a7315222f470f9f9a2d908b5327 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 15 Aug 2018 22:45:04 +0300 Subject: [PATCH] AMDGPU: Add feature for fast f32 denormals --- llvm/lib/Target/AMDGPU/AMDGPU.td | 14 ++++++++++---- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 1 + llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 1 + 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 1f10657..9785252 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -33,6 +33,12 @@ def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", "Assuming f32 fma is at least as fast as mul + add" >; +def FeatureFastDenormalF32 : SubtargetFeature<"fast-denormal-f32", + "FastDenormalF32", + "true", + "Enabling denormals does not cause f32 instructions to run at f64 rates" +>; + def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128", "MIMG_R128", "true", @@ -632,7 +638,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP, FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC, - FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts + FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, FeatureFastDenormalF32 ] >; @@ -647,8 +653,8 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9", FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst, FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts, FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, - FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16 - ] + FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16, + FeatureFastDenormalF32] >; def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", @@ -665,7 +671,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10", FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking, FeatureVOP3Literal, FeatureDPP8, FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC, - FeatureGFX10A16 + FeatureGFX10A16, FeatureFastDenormalF32 ] >; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index df4c630..91c1bb4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -178,6 +178,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, MaxPrivateElementSize(0), FastFMAF32(false), + FastDenormalF32(false), HalfRate64Ops(false), FlatForGlobal(false), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index c565c17..cadb328c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -291,6 +291,7 @@ protected: // Possibly statically set by tablegen, but may want to be overridden. bool FastFMAF32; + bool FastDenormalF32; bool HalfRate64Ops; // Dynamially set bits that enable features. -- 2.7.4