From b035a5740ce32a5bddfecb826845a350f589e377 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 29 Jan 2015 19:34:25 +0000 Subject: [PATCH] R600/SI: Add subtarget feature for if f32 fma is fast llvm-svn: 227483 --- llvm/lib/Target/R600/AMDGPU.td | 6 ++++++ llvm/lib/Target/R600/AMDGPUSubtarget.cpp | 3 ++- llvm/lib/Target/R600/AMDGPUSubtarget.h | 5 +++++ llvm/lib/Target/R600/Processors.td | 12 +++++++++--- llvm/lib/Target/R600/SIISelLowering.cpp | 2 +- 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/R600/AMDGPU.td b/llvm/lib/Target/R600/AMDGPU.td index 1df4448..be9a149 100644 --- a/llvm/lib/Target/R600/AMDGPU.td +++ b/llvm/lib/Target/R600/AMDGPU.td @@ -48,6 +48,12 @@ def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals", "Enable double precision denormal handling", [FeatureFP64]>; +def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", + "FastFMAF32", + "true", + "Assuming f32 fma is at least as fast as mul + add", + []>; + // Some instructions do not support denormals despite this flag. Using // fp32 denormals also causes instructions to run at the double // precision rate for the device. diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp index 541dbab..e08a7dd 100644 --- a/llvm/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/R600/AMDGPUSubtarget.cpp @@ -65,7 +65,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS, : AMDGPUGenSubtargetInfo(TT, GPU, FS), DevName(GPU), Is64bit(false), DumpCode(false), R600ALUInst(false), HasVertexCache(false), TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false), - FP64Denormals(false), FP32Denormals(false), CaymanISA(false), + FP64Denormals(false), FP32Denormals(false), + FastFMAF32(false), CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.h b/llvm/lib/Target/R600/AMDGPUSubtarget.h index 389cc8c..0c0145c 100644 --- a/llvm/lib/Target/R600/AMDGPUSubtarget.h +++ b/llvm/lib/Target/R600/AMDGPUSubtarget.h @@ -55,6 +55,7 @@ private: bool FP64; bool FP64Denormals; bool FP32Denormals; + bool FastFMAF32; bool CaymanISA; bool FlatAddressSpace; bool EnableIRStructurizer; @@ -127,6 +128,10 @@ public: return FP64Denormals; } + bool hasFastFMAF32() const { + return FastFMAF32; + } + bool hasFlatAddressSpace() const { return FlatAddressSpace; } diff --git a/llvm/lib/Target/R600/Processors.td b/llvm/lib/Target/R600/Processors.td index 6593016..fb5aa61 100644 --- a/llvm/lib/Target/R600/Processors.td +++ b/llvm/lib/Target/R600/Processors.td @@ -83,9 +83,13 @@ def : Proc<"cayman", R600_VLIW4_Itin, // Southern Islands //===----------------------------------------------------------------------===// -def : ProcessorModel<"SI", SIFullSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"SI", SIFullSpeedModel, + [FeatureSouthernIslands, FeatureFastFMAF32] +>; -def : ProcessorModel<"tahiti", SIFullSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"tahiti", SIFullSpeedModel, + [FeatureSouthernIslands, FeatureFastFMAF32] +>; def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>; @@ -105,7 +109,9 @@ def : ProcessorModel<"kabini", SIQuarterSpeedModel, [FeatureSeaIslands]>; def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureSeaIslands]>; -def : ProcessorModel<"hawaii", SIFullSpeedModel, [FeatureSeaIslands]>; +def : ProcessorModel<"hawaii", SIFullSpeedModel, + [FeatureSeaIslands, FeatureFastFMAF32] +>; def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>; diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index 6b2ea06..12677f0 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -655,7 +655,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { switch (VT.getSimpleVT().SimpleTy) { case MVT::f32: - return false; /* There is V_MAD_F32 for f32 */ + return Subtarget->hasFastFMAF32(); case MVT::f64: return true; default: -- 2.7.4