From 19e7f8a21d62d0a6ae8a1bbecb232bd9d520555b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sun, 27 Oct 2019 23:38:52 -0700
Subject: [PATCH] AMDGPU: Add default denormal mode to MachineFunctionInfo

The default FP mode should really be a property of a specific
function, and not a subtarget. Introduce the necessary fields to the
SIMachineFunctionInfo to help move towards this goal.
---
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp     |  2 ++
 llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h     | 12 +++++++++--
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h      | 25 ++++++++++++++++++----
 .../MIR/AMDGPU/machine-function-info-no-ir.mir     | 13 +++++++++++
 .../CodeGen/MIR/AMDGPU/machine-function-info.ll    | 14 ++++++++++++
 5 files changed, 60 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e8cf771..64739cd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1151,6 +1151,8 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
 
   MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
   MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
+  MFI->Mode.FP32Denormals = YamlMFI.Mode.FP32Denormals;
+  MFI->Mode.FP64FP16Denormals = YamlMFI.Mode.FP64FP16Denormals;
 
   return false;
 }
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 7d70c78..0d6153d 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -236,17 +236,23 @@ template <> struct MappingTraits<SIArgumentInfo> {
 struct SIMode {
   bool IEEE = true;
   bool DX10Clamp = true;
+  bool FP32Denormals = true;
+  bool FP64FP16Denormals = true;
 
   SIMode() = default;
 
-
   SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
     IEEE = Mode.IEEE;
     DX10Clamp = Mode.DX10Clamp;
+    FP32Denormals = Mode.FP32Denormals;
+    FP64FP16Denormals = Mode.FP64FP16Denormals;
   }
 
   bool operator ==(const SIMode Other) const {
-    return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+    return IEEE == Other.IEEE &&
+           DX10Clamp == Other.DX10Clamp &&
+           FP32Denormals == Other.FP32Denormals &&
+           FP64FP16Denormals == Other.FP64FP16Denormals;
   }
 };
 
@@ -254,6 +260,8 @@ template <> struct MappingTraits<SIMode> {
   static void mapping(IO &YamlIO, SIMode &Mode) {
     YamlIO.mapOptional("ieee", Mode.IEEE, true);
     YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
+    YamlIO.mapOptional("fp32-denormals", Mode.FP32Denormals, true);
+    YamlIO.mapOptional("fp64-fp16-denormals", Mode.FP64FP16Denormals, true);
   }
 };
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f78dadd..f8c0820 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -659,23 +659,40 @@ struct SIModeRegisterDefaults {
   /// clamp NaN to zero; otherwise, pass NaN through.
   bool DX10Clamp : 1;
 
-  // TODO: FP mode fields
+  /// If this is set, neither input or output denormals are flushed for most f32
+  /// instructions.
+  ///
+  /// TODO: Split into separate input and output fields if necessary like the
+  /// control bits really provide?
+  bool FP32Denormals : 1;
+
+  /// If this is set, neither input or output denormals are flushed for both f64
+  /// and f16/v2f16 instructions.
+  bool FP64FP16Denormals : 1;
 
   SIModeRegisterDefaults() :
     IEEE(true),
-    DX10Clamp(true) {}
+    DX10Clamp(true),
+    FP32Denormals(true),
+    FP64FP16Denormals(true) {}
 
   SIModeRegisterDefaults(const Function &F);
 
   static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
+    const bool IsCompute = AMDGPU::isCompute(CC);
+
     SIModeRegisterDefaults Mode;
     Mode.DX10Clamp = true;
-    Mode.IEEE = AMDGPU::isCompute(CC);
+    Mode.IEEE = IsCompute;
+    Mode.FP32Denormals = false; // FIXME: Should be on by default.
+    Mode.FP64FP16Denormals = true;
     return Mode;
   }
 
   bool operator ==(const SIModeRegisterDefaults Other) const {
-    return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp;
+    return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
+           FP32Denormals == Other.FP32Denormals &&
+           FP64FP16Denormals == Other.FP64FP16Denormals;
   }
 
   // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
index 8334ef5..0b23ded 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
@@ -25,6 +25,8 @@
 # FULL-NEXT: mode:
 # FULL-NEXT: ieee: true
 # FULL-NEXT: dx10-clamp: true
+# FULL-NEXT: fp32-denormals: true
+# FULL-NEXT: fp64-fp16-denormals: true
 # FULL-NEXT:  highBitsOf32BitAddress: 0
 # FULL-NEXT: body:
 
@@ -92,6 +94,8 @@ body:             |
 # FULL-NEXT: mode:
 # FULL-NEXT: ieee: true
 # FULL-NEXT: dx10-clamp: true
+# FULL-NEXT: fp32-denormals: true
+# FULL-NEXT: fp64-fp16-denormals: true
 # FULL-NEXT:  highBitsOf32BitAddress: 0
 # FULL-NEXT: body:
 
@@ -129,6 +133,8 @@ body:             |
 # FULL-NEXT: mode:
 # FULL-NEXT: ieee: true
 # FULL-NEXT: dx10-clamp: true
+# FULL-NEXT: fp32-denormals: true
+# FULL-NEXT: fp64-fp16-denormals: true
 # FULL-NEXT:  highBitsOf32BitAddress: 0
 # FULL-NEXT: body:
 
@@ -167,6 +173,8 @@ body:             |
 # FULL-NEXT: mode:
 # FULL-NEXT: ieee: true
 # FULL-NEXT: dx10-clamp: true
+# FULL-NEXT: fp32-denormals: true
+# FULL-NEXT: fp64-fp16-denormals: true
 # FULL-NEXT:  highBitsOf32BitAddress: 0
 # FULL-NEXT: body:
 
@@ -239,11 +247,16 @@ body:             |
 # ALL: mode:
 # ALL-NEXT: ieee: false
 # ALL-NEXT: dx10-clamp: false
+# ALL-NEXT: fp32-denormals: false
+# ALL-NEXT: fp64-fp16-denormals: false
+
 name: parse_mode
 machineFunctionInfo:
   mode:
     ieee: false
     dx10-clamp: false
+    fp32-denormals: false
+    fp64-fp16-denormals: false
 
 body:             |
   bb.0:
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
index bc354f2..f9de722 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -28,6 +28,8 @@
 ; CHECK-NEXT: mode:
 ; CHECK-NEXT: ieee: true
 ; CHECK-NEXT: dx10-clamp: true
+; CHECK-NEXT: fp32-denormals: false
+; CHECK-NEXT: fp64-fp16-denormals: true
 ; CHECK-NEXT: highBitsOf32BitAddress: 0
 ; CHECK-NEXT: body:
 define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
@@ -55,6 +57,8 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
 ; CHECK-NEXT: mode:
 ; CHECK-NEXT: ieee: false
 ; CHECK-NEXT: dx10-clamp: true
+; CHECK-NEXT: fp32-denormals: false
+; CHECK-NEXT: fp64-fp16-denormals: true
 ; CHECK-NEXT: highBitsOf32BitAddress: 0
 ; CHECK-NEXT: body:
 define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
@@ -80,6 +84,8 @@ define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
 ; CHECK-NEXT: mode:
 ; CHECK-NEXT: ieee: true
 ; CHECK-NEXT: dx10-clamp: true
+; CHECK-NEXT: fp32-denormals: false
+; CHECK-NEXT: fp64-fp16-denormals: true
 ; CHECK-NEXT: highBitsOf32BitAddress: 0
 ; CHECK-NEXT: body:
 define void @function() {
@@ -105,6 +111,8 @@ define void @function() {
 ; CHECK-NEXT: mode:
 ; CHECK-NEXT: ieee: true
 ; CHECK-NEXT: dx10-clamp: true
+; CHECK-NEXT: fp32-denormals: false
+; CHECK-NEXT: fp64-fp16-denormals: true
 ; CHECK-NEXT: highBitsOf32BitAddress: 0
 ; CHECK-NEXT: body:
 define void @function_nsz() #0 {
@@ -115,6 +123,8 @@ define void @function_nsz() #0 {
 ; CHECK: mode:
 ; CHECK-NEXT: ieee: true
 ; CHECK-NEXT: dx10-clamp: false
+; CHECK-NEXT: fp32-denormals: false
+; CHECK-NEXT: fp64-fp16-denormals: true
 define void @function_dx10_clamp_off() #1 {
   ret void
 }
@@ -123,6 +133,8 @@ define void @function_dx10_clamp_off() #1 {
 ; CHECK: mode:
 ; CHECK-NEXT: ieee: false
 ; CHECK-NEXT: dx10-clamp: true
+; CHECK-NEXT: fp32-denormals: false
+; CHECK-NEXT: fp64-fp16-denormals: true
 define void @function_ieee_off() #2 {
   ret void
 }
@@ -131,6 +143,8 @@ define void @function_ieee_off() #2 {
 ; CHECK: mode:
 ; CHECK-NEXT: ieee: false
 ; CHECK-NEXT: dx10-clamp: false
+; CHECK-NEXT: fp32-denormals: false
+; CHECK-NEXT: fp64-fp16-denormals: true
 define void @function_ieee_off_dx10_clamp_off() #3 {
   ret void
 }
-- 
2.7.4