From 19e7f8a21d62d0a6ae8a1bbecb232bd9d520555b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 27 Oct 2019 23:38:52 -0700 Subject: [PATCH] AMDGPU: Add default denormal mode to MachineFunctionInfo The default FP mode should really be a property of a specific function, and not a subtarget. Introduce the necessary fields to the SIMachineFunctionInfo to help move towards this goal. --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 ++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 12 +++++++++-- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 25 ++++++++++++++++++---- .../MIR/AMDGPU/machine-function-info-no-ir.mir | 13 +++++++++++ .../CodeGen/MIR/AMDGPU/machine-function-info.ll | 14 ++++++++++++ 5 files changed, 60 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index e8cf771..64739cd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1151,6 +1151,8 @@ bool GCNTargetMachine::parseMachineFunctionInfo( MFI->Mode.IEEE = YamlMFI.Mode.IEEE; MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp; + MFI->Mode.FP32Denormals = YamlMFI.Mode.FP32Denormals; + MFI->Mode.FP64FP16Denormals = YamlMFI.Mode.FP64FP16Denormals; return false; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 7d70c78..0d6153d 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -236,17 +236,23 @@ template <> struct MappingTraits { struct SIMode { bool IEEE = true; bool DX10Clamp = true; + bool FP32Denormals = true; + bool FP64FP16Denormals = true; SIMode() = default; - SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) { IEEE = Mode.IEEE; DX10Clamp = Mode.DX10Clamp; + FP32Denormals = Mode.FP32Denormals; + FP64FP16Denormals = Mode.FP64FP16Denormals; } bool operator ==(const SIMode Other) const { - return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp; + return IEEE == Other.IEEE && + DX10Clamp == Other.DX10Clamp && + FP32Denormals == Other.FP32Denormals && + FP64FP16Denormals == Other.FP64FP16Denormals; } }; @@ -254,6 +260,8 @@ template <> struct MappingTraits { static void mapping(IO &YamlIO, SIMode &Mode) { YamlIO.mapOptional("ieee", Mode.IEEE, true); YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true); + YamlIO.mapOptional("fp32-denormals", Mode.FP32Denormals, true); + YamlIO.mapOptional("fp64-fp16-denormals", Mode.FP64FP16Denormals, true); } }; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index f78dadd..f8c0820 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -659,23 +659,40 @@ struct SIModeRegisterDefaults { /// clamp NaN to zero; otherwise, pass NaN through. bool DX10Clamp : 1; - // TODO: FP mode fields + /// If this is set, neither input or output denormals are flushed for most f32 + /// instructions. + /// + /// TODO: Split into separate input and output fields if necessary like the + /// control bits really provide? + bool FP32Denormals : 1; + + /// If this is set, neither input or output denormals are flushed for both f64 + /// and f16/v2f16 instructions. + bool FP64FP16Denormals : 1; SIModeRegisterDefaults() : IEEE(true), - DX10Clamp(true) {} + DX10Clamp(true), + FP32Denormals(true), + FP64FP16Denormals(true) {} SIModeRegisterDefaults(const Function &F); static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { + const bool IsCompute = AMDGPU::isCompute(CC); + SIModeRegisterDefaults Mode; Mode.DX10Clamp = true; - Mode.IEEE = AMDGPU::isCompute(CC); + Mode.IEEE = IsCompute; + Mode.FP32Denormals = false; // FIXME: Should be on by default. + Mode.FP64FP16Denormals = true; return Mode; } bool operator ==(const SIModeRegisterDefaults Other) const { - return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp; + return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && + FP32Denormals == Other.FP32Denormals && + FP64FP16Denormals == Other.FP64FP16Denormals; } // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir index 8334ef5..0b23ded 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir @@ -25,6 +25,8 @@ # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true +# FULL-NEXT: fp32-denormals: true +# FULL-NEXT: fp64-fp16-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: body: @@ -92,6 +94,8 @@ body: | # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true +# FULL-NEXT: fp32-denormals: true +# FULL-NEXT: fp64-fp16-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: body: @@ -129,6 +133,8 @@ body: | # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true +# FULL-NEXT: fp32-denormals: true +# FULL-NEXT: fp64-fp16-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: body: @@ -167,6 +173,8 @@ body: | # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true +# FULL-NEXT: fp32-denormals: true +# FULL-NEXT: fp64-fp16-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: body: @@ -239,11 +247,16 @@ body: | # ALL: mode: # ALL-NEXT: ieee: false # ALL-NEXT: dx10-clamp: false +# ALL-NEXT: fp32-denormals: false +# ALL-NEXT: fp64-fp16-denormals: false + name: parse_mode machineFunctionInfo: mode: ieee: false dx10-clamp: false + fp32-denormals: false + fp64-fp16-denormals: false body: | bb.0: diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll index bc354f2..f9de722 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll @@ -28,6 +28,8 @@ ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: true +; CHECK-NEXT: fp32-denormals: false +; CHECK-NEXT: fp64-fp16-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: body: define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) { @@ -55,6 +57,8 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) { ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: false ; CHECK-NEXT: dx10-clamp: true +; CHECK-NEXT: fp32-denormals: false +; CHECK-NEXT: fp64-fp16-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: body: define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) { @@ -80,6 +84,8 @@ define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) { ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: true +; CHECK-NEXT: fp32-denormals: false +; CHECK-NEXT: fp64-fp16-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: body: define void @function() { @@ -105,6 +111,8 @@ define void @function() { ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: true +; CHECK-NEXT: fp32-denormals: false +; CHECK-NEXT: fp64-fp16-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: body: define void @function_nsz() #0 { @@ -115,6 +123,8 @@ define void @function_nsz() #0 { ; CHECK: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: false +; CHECK-NEXT: fp32-denormals: false +; CHECK-NEXT: fp64-fp16-denormals: true define void @function_dx10_clamp_off() #1 { ret void } @@ -123,6 +133,8 @@ define void @function_dx10_clamp_off() #1 { ; CHECK: mode: ; CHECK-NEXT: ieee: false ; CHECK-NEXT: dx10-clamp: true +; CHECK-NEXT: fp32-denormals: false +; CHECK-NEXT: fp64-fp16-denormals: true define void @function_ieee_off() #2 { ret void } @@ -131,6 +143,8 @@ define void @function_ieee_off() #2 { ; CHECK: mode: ; CHECK-NEXT: ieee: false ; CHECK-NEXT: dx10-clamp: false +; CHECK-NEXT: fp32-denormals: false +; CHECK-NEXT: fp64-fp16-denormals: true define void @function_ieee_off_dx10_clamp_off() #3 { ret void } -- 2.7.4