AMDGPU: Assume f32 denormals are enabled by default

author Matt Arsenault <Matthew.Arsenault@amd.com>

Fri, 1 Nov 2019 06:32:31 +0000 (23:32 -0700)

committer Matt Arsenault <arsenm2@gmail.com>

Thu, 2 Apr 2020 21:17:12 +0000 (17:17 -0400)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Fri, 1 Nov 2019 06:32:31 +0000 (23:32 -0700)
committer Matt Arsenault <arsenm2@gmail.com>
Thu, 2 Apr 2020 21:17:12 +0000 (17:17 -0400)
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst

index 4f6e759..f23756b 100644 (file)
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -100,6 +100,11 @@ During this release ...
  Changes to the AMDGPU Target
  -----------------------------
  
+* The backend default denormal handling mode has been switched to on
+  for all targets for all compute function types. Frontends wishing to
+  retain the old behavior should explicitly request f32 denormal
+  flushing.
+
  Changes to the AVR Target
  -----------------------------
  
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

index d3ecb4f..ad106d4 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -97,7 +97,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
    // denormals, but should be checked. Should we issue a warning somewhere
    // if someone tries to enable these?
    if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-    FullFS += "+fp64-fp16-denormals,";
+    FullFS += "+fp64-fp16-denormals,+fp32-denormals,";
    } else {
      FullFS += "-fp32-denormals,";
    }
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

index aad7d4b..de8eb74 100644 (file)
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -704,8 +704,8 @@ struct SIModeRegisterDefaults {
      SIModeRegisterDefaults Mode;
      Mode.DX10Clamp = true;
      Mode.IEEE = IsCompute;
-    Mode.FP32InputDenormals = false; // FIXME: Should be on by default.
-    Mode.FP32OutputDenormals = false; // FIXME: Should be on by default.
+    Mode.FP32InputDenormals = true;
+    Mode.FP32OutputDenormals = true;
      Mode.FP64FP16InputDenormals = true;
      Mode.FP64FP16OutputDenormals = true;
      return Mode;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll

index 27448be..2799d84 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -1,6 +1,6 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
  
  ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
  
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll

index 291b56b..560c2de 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -1,6 +1,6 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s
-; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
  
  ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
  
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll

index 1e6cc05..800ae81 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
@@ -3,8 +3,8 @@
  ; GCN-LABEL: {{^}}kernel_ieee_mode_default:
  ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
  ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
+; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
  ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
  ; GCN-NOT: v_mul_f32
  define amdgpu_kernel void @kernel_ieee_mode_default() #0 {
@@ -18,8 +18,8 @@ define amdgpu_kernel void @kernel_ieee_mode_default() #0 {
  ; GCN-LABEL: {{^}}kernel_ieee_mode_on:
  ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
  ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
+; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
  ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
  ; GCN-NOT: v_mul_f32
  define amdgpu_kernel void @kernel_ieee_mode_on() #1 {
@@ -48,8 +48,8 @@ define amdgpu_kernel void @kernel_ieee_mode_off() #2 {
  ; GCN-LABEL: {{^}}func_ieee_mode_default:
  ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
  ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
+; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
  ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
  ; GCN-NOT: v_mul_f32
  define void @func_ieee_mode_default() #0 {
@@ -63,8 +63,8 @@ define void @func_ieee_mode_default() #0 {
  ; GCN-LABEL: {{^}}func_ieee_mode_on:
  ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
  ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
+; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
  ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
  ; GCN-NOT: v_mul_f32
  define void @func_ieee_mode_on() #1 {
@@ -93,8 +93,8 @@ define void @func_ieee_mode_off() #2 {
  ; GCN-LABEL: {{^}}cs_ieee_mode_default:
  ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
  ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
+; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
  ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
  ; GCN-NOT: v_mul_f32
  define amdgpu_cs void @cs_ieee_mode_default() #0 {
@@ -108,8 +108,8 @@ define amdgpu_cs void @cs_ieee_mode_default() #0 {
  ; GCN-LABEL: {{^}}cs_ieee_mode_on:
  ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
  ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
+; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
  ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
  ; GCN-NOT: v_mul_f32
  define amdgpu_cs void @cs_ieee_mode_on() #1 {
@@ -150,11 +150,12 @@ define amdgpu_ps void @ps_ieee_mode_default() #0 {
    ret void
  }
  
+; FIXME: Should have denormals off by default.
  ; GCN-LABEL: {{^}}ps_ieee_mode_on:
  ; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
  ; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
-; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
+; GCN-DAG: v_max_f32_e32 [[QUIET0:v[0-9]+]], [[VAL0]], [[VAL0]]
+; GCN-DAG: v_max_f32_e32 [[QUIET1:v[0-9]+]], [[VAL1]], [[VAL1]]
  ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
  ; GCN-NOT: v_mul_f32
  define amdgpu_ps void @ps_ieee_mode_on() #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll

index 3096372..124a241 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
@@ -338,7 +338,7 @@ define amdgpu_kernel void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, f
  }
  
  attributes #0 = { nounwind optnone noinline }
-attributes #1 = { nounwind }
+attributes #1 = { nounwind "target-features"="-fp32-denormals" }
  attributes #2 = { nounwind "target-features"="+fp32-denormals" }
  
  !0 = !{float 2.500000e+00}
diff --git a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll

index 2bc5db9..836a3b5 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
@@ -389,7 +389,7 @@ declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
  
  declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
  
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "target-features"="-fp32-denormals" }
  attributes #1 = { nounwind readnone }
  attributes #2 = { nounwind "target-features"="+fp32-denormals" }
  attributes #3 = { nounwind "target-features"="-fp64-fp16-denormals" }
diff --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll

index 1fcdfd2..c3757c4 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp.ll
@@ -767,8 +767,8 @@ declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
  declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
  declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
  
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "target-features"="-fp32-denormals" }
  attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp-exceptions" "no-nans-fp-math"="false" }
-attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "target-features"="+fp-exceptions" "no-nans-fp-math"="false" }
-attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="+fp-exceptions" "no-nans-fp-math"="false" }
+attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp32-denormals,-fp-exceptions" "no-nans-fp-math"="false" }
+attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "target-features"="-fp32-denormals,+fp-exceptions" "no-nans-fp-math"="false" }
+attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "target-features"="-fp32-denormals,+fp-exceptions" "no-nans-fp-math"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll

index fb64919..c37c279 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
+++ b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
@@ -1,7 +1,7 @@
  ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
  
  ; GCN-LABEL: {{^}}test_default_si:
-; GCN: FloatMode: 192
+; GCN: FloatMode: 240
  ; GCN: IeeeMode: 1
  define amdgpu_kernel void @test_default_si(float addrspace(1)* %out0, double addrspace(1)* %out1) #0 {
    store float 0.0, float addrspace(1)* %out0
@@ -10,7 +10,7 @@ define amdgpu_kernel void @test_default_si(float addrspace(1)* %out0, double add
  }
  
  ; GCN-LABEL: {{^}}test_default_vi:
-; GCN: FloatMode: 192
+; GCN: FloatMode: 240
  ; GCN: IeeeMode: 1
  define amdgpu_kernel void @test_default_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #1 {
    store float 0.0, float addrspace(1)* %out0
@@ -19,7 +19,7 @@ define amdgpu_kernel void @test_default_vi(float addrspace(1)* %out0, double add
  }
  
  ; GCN-LABEL: {{^}}test_f64_denormals:
-; GCN: FloatMode: 192
+; GCN: FloatMode: 240
  ; GCN: IeeeMode: 1
  define amdgpu_kernel void @test_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #2 {
    store float 0.0, float addrspace(1)* %out0
@@ -55,7 +55,7 @@ define amdgpu_kernel void @test_no_denormals(float addrspace(1)* %out0, double a
  }
  
  ; GCN-LABEL: {{^}}test_f16_f64_denormals:
-; GCN: FloatMode: 192
+; GCN: FloatMode: 240
  ; GCN: IeeeMode: 1
  define amdgpu_kernel void @test_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #6 {
    store half 0.0, half addrspace(1)* %out0
@@ -64,7 +64,7 @@ define amdgpu_kernel void @test_f16_f64_denormals(half addrspace(1)* %out0, doub
  }
  
  ; GCN-LABEL: {{^}}test_no_f16_f64_denormals:
-; GCN: FloatMode: 0
+; GCN: FloatMode: 48
  ; GCN: IeeeMode: 1
  define amdgpu_kernel void @test_no_f16_f64_denormals(half addrspace(1)* %out0, double addrspace(1)* %out1) #7 {
    store half 0.0, half addrspace(1)* %out0
@@ -82,7 +82,9 @@ define amdgpu_kernel void @test_f32_f16_f64_denormals(half addrspace(1)* %out0,
    ret void
  }
  
+; FIXME: Denormals should be off by default
  ; GCN-LABEL: {{^}}kill_gs_const:
+; GCN: FloatMode: 240
  ; GCN: IeeeMode: 0
  define amdgpu_gs void @kill_gs_const() {
  main_body:
@@ -94,6 +96,7 @@ main_body:
  }
  
  ; GCN-LABEL: {{^}}kill_vcc_implicit_def:
+; GCN: FloatMode: 240
  ; GCN: IeeeMode: 0
  define amdgpu_ps float @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(4)* inreg, [17 x <16 x i8>] addrspace(4)* inreg, [17 x <4 x i32>] addrspace(4)* inreg, [34 x <8 x i32>] addrspace(4)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
  entry:
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll

index 346430d..e1aaeee 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -739,6 +739,6 @@ define <4 x half> @v_test_canonicalize_reg_undef_reg_reg_v4f16(half %val0, half
  }
  
  attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind "target-features"="-fp64-fp16-denormals" }
-attributes #3 = { nounwind "target-features"="+fp64-fp16-denormals" }
+attributes #1 = { nounwind "target-features"="-fp32-denormals" }
+attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
+attributes #3 = { nounwind "target-features"="-fp32-denormals,+fp64-fp16-denormals" }
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll

index 8ea72ec..f17536e 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
@@ -625,7 +625,7 @@ define <4 x double> @v_test_canonicalize_v4f64(<4 x double> %arg) #1 {
  }
  
  attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+attributes #1 = { nounwind "target-features"="-fp32-denormals" }
  attributes #2 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" }
  attributes #3 = { nounwind "target-features"="+fp32-denormals,+fp64-fp16-denormals" }
  attributes #4 = { nounwind "target-features"="-fp32-denormals,-fp64-fp16-denormals" "target-cpu"="tonga" }
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll b/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll

index f526ad3..c5df485 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8_9_10 %s
  
  ; Make sure fdiv is promoted to f32.
  
diff --git a/llvm/test/CodeGen/AMDGPU/fdot2.ll b/llvm/test/CodeGen/AMDGPU/fdot2.ll

index dbfcd5d..2143fa0 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/fdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdot2.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=gfx900 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX900
-; RUN: llc -march=amdgcn -mcpu=gfx906 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
-; RUN: llc -march=amdgcn -mcpu=gfx1011 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
-; RUN: llc -march=amdgcn -mcpu=gfx1012 -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
-; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX900
+; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
+; RUN: llc -march=amdgcn -mcpu=gfx1011 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
+; RUN: llc -march=amdgcn -mcpu=gfx1012 -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
+; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906
  ; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-fp64-fp16-denormals,-fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-CONTRACT
  ; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+fp64-fp16-denormals,+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-DENORM-CONTRACT
  ; (fadd (fmul S1.x, S2.x), (fadd (fmul (S1.y, S2.y), z))) -> (fdot2 S1, S2, z)
diff --git a/llvm/test/CodeGen/AMDGPU/fma-combine.ll b/llvm/test/CodeGen/AMDGPU/fma-combine.ll

index 5d78dda..98af82f 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/fma-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma-combine.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=verde -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI-NOFMA -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
  ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast -enable-no-infs-fp-math -enable-unsafe-fp-math -mattr=+fp32-denormals < %s | FileCheck -enable-var-scope -check-prefix=SI-FMA -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
  
  ; FIXME: Remove enable-unsafe-fp-math in RUN line and add flags to IR instrs
diff --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll

index 7e16d1b..d5826ab 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
  
  ; GCN-LABEL: {{^}}test_fmax_f32_ieee_mode_on:
  ; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}}
diff --git a/llvm/test/CodeGen/AMDGPU/fminnum.ll b/llvm/test/CodeGen/AMDGPU/fminnum.ll

index a8574b2..e7f9880 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/fminnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminnum.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
  
  ; GCN-LABEL: {{^}}test_fmin_f32_ieee_mode_on:
  ; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}}
diff --git a/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll b/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll

index af54cbf..00f97b8 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
@@ -1,8 +1,8 @@
-; XUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-DENORM %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-FLUSH %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-DENORM %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-FLUSH %s
+; XUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-DENORM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VI-FLUSH %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-DENORM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,GFX8_10,GFX10-FLUSH %s
  
  ; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't
  ; make add an instruction if the fadd has more than one use.
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll

index 45cfaa7..bc2edbe 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=hawaii -start-after=sink -mattr=-fp32-denormals,+flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=hawaii -mattr=-fp32-denormals,+flat-for-global -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
  
-; RUN: llc -march=amdgcn -mcpu=fiji -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s
-; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals -start-after=sink --verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=VI -check-prefix=FUNC %s
+; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=VI -check-prefix=FUNC %s
  
  ; --------------------------------------------------------------------------------
  ; fadd tests
diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll

index 1305dae..17e231a 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/frem.ll
+++ b/llvm/test/CodeGen/AMDGPU/frem.ll
@@ -1,6 +1,6 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -verify-machineinstrs  < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs  < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=bonaire -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
  
  ; FUNC-LABEL: {{^}}frem_f32:
  ; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll

index 53e5dad..1c501d2 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll
@@ -1,7 +1,7 @@
  ; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
  
  ; GCN-LABEL: {{^}}test_default_ci:
-; GCN: float_mode = 192
+; GCN: float_mode = 240
  ; GCN: enable_dx10_clamp = 1
  ; GCN: enable_ieee_mode = 1
  define amdgpu_kernel void @test_default_ci(float addrspace(1)* %out0, double addrspace(1)* %out1) #0 {
@@ -11,7 +11,7 @@ define amdgpu_kernel void @test_default_ci(float addrspace(1)* %out0, double add
  }
  
  ; GCN-LABEL: {{^}}test_default_vi:
-; GCN: float_mode = 192
+; GCN: float_mode = 240
  ; GCN: enable_dx10_clamp = 1
  ; GCN: enable_ieee_mode = 1
  define amdgpu_kernel void @test_default_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #1 {
@@ -61,7 +61,7 @@ define amdgpu_kernel void @test_no_denormals(float addrspace(1)* %out0, double a
  }
  
  ; GCN-LABEL: {{^}}test_no_dx10_clamp_vi:
-; GCN: float_mode = 192
+; GCN: float_mode = 240
  ; GCN: enable_dx10_clamp = 0
  ; GCN: enable_ieee_mode = 1
  define amdgpu_kernel void @test_no_dx10_clamp_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #6 {
@@ -71,7 +71,7 @@ define amdgpu_kernel void @test_no_dx10_clamp_vi(float addrspace(1)* %out0, doub
  }
  
  ; GCN-LABEL: {{^}}test_no_ieee_mode_vi:
-; GCN: float_mode = 192
+; GCN: float_mode = 240
  ; GCN: enable_dx10_clamp = 1
  ; GCN: enable_ieee_mode = 0
  define amdgpu_kernel void @test_no_ieee_mode_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #7 {
@@ -81,7 +81,7 @@ define amdgpu_kernel void @test_no_ieee_mode_vi(float addrspace(1)* %out0, doubl
  }
  
  ; GCN-LABEL: {{^}}test_no_ieee_mode_no_dx10_clamp_vi:
-; GCN: float_mode = 192
+; GCN: float_mode = 240
  ; GCN: enable_dx10_clamp = 0
  ; GCN: enable_ieee_mode = 0
  define amdgpu_kernel void @test_no_ieee_mode_no_dx10_clamp_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #8 {
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll

index ca033e5..8432d29 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll
@@ -18,7 +18,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr() {
  ; GCN-NEXT:     granulated_workitem_vgpr_count = 7
  ; GCN-NEXT:     granulated_wavefront_sgpr_count = 5
  ; GCN-NEXT:     priority = 0
-; GCN-NEXT:     float_mode = 192
+; GCN-NEXT:     float_mode = 240
  ; GCN-NEXT:     priv = 0
  ; GCN-NEXT:     enable_dx10_clamp = 1
  ; GCN-NEXT:     debug_mode = 0
@@ -111,7 +111,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg() {
  ; GCN-NEXT:     granulated_workitem_vgpr_count = 7
  ; GCN-NEXT:     granulated_wavefront_sgpr_count = 5
  ; GCN-NEXT:     priority = 0
-; GCN-NEXT:     float_mode = 192
+; GCN-NEXT:     float_mode = 240
  ; GCN-NEXT:     priv = 0
  ; GCN-NEXT:     enable_dx10_clamp = 1
  ; GCN-NEXT:     debug_mode = 0
diff --git a/llvm/test/CodeGen/AMDGPU/known-never-snan.ll b/llvm/test/CodeGen/AMDGPU/known-never-snan.ll

index 25f110f..0198195 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/known-never-snan.ll
+++ b/llvm/test/CodeGen/AMDGPU/known-never-snan.ll
@@ -1,5 +1,5 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
  
  ; Mostly overlaps with fmed3.ll to stress specific cases of
  ; isKnownNeverSNaN.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll

index ff644b0..4836c1d 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
@@ -1,9 +1,9 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-FLUSH %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-DENORM %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals,+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI -check-prefix=SI-DENORM %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=VI-DENORM %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-FLUSH %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX10 -check-prefix=GFX10-DENORM %s
  
  declare half @llvm.fmuladd.f16(half %a, half %b, half %c)
  declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll

index a530021..082c453 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
@@ -1,7 +1,7 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,SIVI %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI,SIVI %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
  
  declare half @llvm.maxnum.f16(half %a, half %b)
  declare <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll

index fcb483f..4d8169a 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
@@ -1,7 +1,7 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
  
  declare half @llvm.minnum.f16(half %a, half %b)
  declare <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
diff --git a/llvm/test/CodeGen/AMDGPU/mad-combine.ll b/llvm/test/CodeGen/AMDGPU/mad-combine.ll

index 09bc371..a46aff7 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/mad-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-combine.ll
@@ -1,8 +1,8 @@
  ; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma.
  
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD  -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD  -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-STD -check-prefix=SI-STD-UNSAFE -check-prefix=FUNC %s
  
  ; FIXME: Remove enable-unsafe-fp-math in RUN line and add flags to IR instrs
  
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll

index 6c27690..4d9607d 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
@@ -143,5 +143,5 @@ declare float @llvm.maxnum.f32(float, float) #1
  declare float @llvm.fmuladd.f32(float, float, float) #1
  declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
  
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "target-features"="-fp32-denormals" }
  attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll

index 43e9611..759b5ae 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -310,5 +310,5 @@ declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #
  declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) #1
  declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
  
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "target-features"="-fp32-denormals" }
  attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll

index fe710f7..8b01c04 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/madak.ll
+++ b/llvm/test/CodeGen/AMDGPU/madak.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs  -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -fp-contract=fast -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6_8_9,MAD %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs  -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-fp32-denormals -verify-machineinstrs -fp-contract=fast -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s
  
  declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  declare float @llvm.fabs.f32(float) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/madmk.ll b/llvm/test/CodeGen/AMDGPU/madmk.ll

index c9b9abf..b78a116 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/madmk.ll
+++ b/llvm/test/CodeGen/AMDGPU/madmk.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
  
   ; FIXME: None of these trigger madmk emission anymore. It is still
   ; possible, but requires the correct registers to be used which is
diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll

index 3eb4788..64c4f1a 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
@@ -1,5 +1,5 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
  
  ; Make sure that AMDGPUCodeGenPrepare introduces mul24 intrinsics
  ; after SLSR, as the intrinsics would interfere. It's unclear if these
diff --git a/llvm/test/CodeGen/AMDGPU/omod.ll b/llvm/test/CodeGen/AMDGPU/omod.ll

index 19ffc05..52f2d94 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/omod.ll
+++ b/llvm/test/CodeGen/AMDGPU/omod.ll
@@ -275,7 +275,7 @@ declare half @llvm.minnum.f16(half, half) #1
  declare half @llvm.maxnum.f16(half, half) #1
  declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
  
-attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" }
+attributes #0 = { nounwind "target-features"="-fp32-denormals" "no-signed-zeros-fp-math"="true" }
  attributes #1 = { nounwind readnone }
  attributes #2 = { nounwind "target-features"="+fp32-denormals" "no-signed-zeros-fp-math"="true" }
  attributes #3 = { nounwind "target-features"="-fp64-fp16-denormals" "no-signed-zeros-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/operand-folding.ll b/llvm/test/CodeGen/AMDGPU/operand-folding.ll

index af00caa..0bdd692 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/operand-folding.ll
+++ b/llvm/test/CodeGen/AMDGPU/operand-folding.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck %s
  
  ; CHECK-LABEL: {{^}}fold_sgpr:
  ; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s
diff --git a/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll b/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll

index b7552b0..1b9264b 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
+++ b/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
  ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
  ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
  
diff --git a/llvm/test/CodeGen/AMDGPU/rcp_iflag.ll b/llvm/test/CodeGen/AMDGPU/rcp_iflag.ll

index badaae3..1dabc37 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/rcp_iflag.ll
+++ b/llvm/test/CodeGen/AMDGPU/rcp_iflag.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
  
  ; GCN-LABEL: {{^}}rcp_uint:
  ; GCN: v_rcp_iflag_f32_e32
diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll

index 93b6eac..2cd6172 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
@@ -37,7 +37,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
  ; VI-NEXT:     .amdhsa_reserve_vcc 0
  ; VI-NEXT:     .amdhsa_float_round_mode_32 0
  ; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
-; VI-NEXT:     .amdhsa_float_denorm_mode_32 0
+; VI-NEXT:     .amdhsa_float_denorm_mode_32 3
  ; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
  ; VI-NEXT:     .amdhsa_dx10_clamp 1
  ; VI-NEXT:     .amdhsa_ieee_mode 1
@@ -83,7 +83,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
  ; GFX9-NEXT:     .amdhsa_reserve_vcc 0
  ; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
  ; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
-; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 0
+; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 3
  ; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
  ; GFX9-NEXT:     .amdhsa_dx10_clamp 1
  ; GFX9-NEXT:     .amdhsa_ieee_mode 1
@@ -136,7 +136,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
  ; VI-NEXT:     .amdhsa_reserve_vcc 0
  ; VI-NEXT:     .amdhsa_float_round_mode_32 0
  ; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
-; VI-NEXT:     .amdhsa_float_denorm_mode_32 0
+; VI-NEXT:     .amdhsa_float_denorm_mode_32 3
  ; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
  ; VI-NEXT:     .amdhsa_dx10_clamp 1
  ; VI-NEXT:     .amdhsa_ieee_mode 1
@@ -182,7 +182,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
  ; GFX9-NEXT:     .amdhsa_reserve_vcc 0
  ; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
  ; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
-; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 0
+; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 3
  ; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
  ; GFX9-NEXT:     .amdhsa_dx10_clamp 1
  ; GFX9-NEXT:     .amdhsa_ieee_mode 1
@@ -235,7 +235,7 @@ define amdgpu_kernel void @alignstack_attr() #2 {
  ; VI-NEXT:     .amdhsa_reserve_vcc 0
  ; VI-NEXT:     .amdhsa_float_round_mode_32 0
  ; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
-; VI-NEXT:     .amdhsa_float_denorm_mode_32 0
+; VI-NEXT:     .amdhsa_float_denorm_mode_32 3
  ; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
  ; VI-NEXT:     .amdhsa_dx10_clamp 1
  ; VI-NEXT:     .amdhsa_ieee_mode 1
@@ -281,7 +281,7 @@ define amdgpu_kernel void @alignstack_attr() #2 {
  ; GFX9-NEXT:     .amdhsa_reserve_vcc 0
  ; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
  ; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
-; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 0
+; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 3
  ; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
  ; GFX9-NEXT:     .amdhsa_dx10_clamp 1
  ; GFX9-NEXT:     .amdhsa_ieee_mode 1
diff --git a/llvm/test/CodeGen/AMDGPU/v_mac.ll b/llvm/test/CodeGen/AMDGPU/v_mac.ll

index e86b885..cd15d88 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/v_mac.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_mac.ll
@@ -1,6 +1,6 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-FLUSH -check-prefix=GCN %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals,+fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-DENORM -check-prefix=GCN %s
  
  ; GCN-LABEL: {{^}}mac_vvv:
  ; GCN: buffer_load_dword [[A:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll

index f928a32..4ff0d1b 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
@@ -1,5 +1,5 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -mattr=-fp32-denormals,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI %s
  
  ; GCN-LABEL: {{^}}mac_f16:
  ; GCN: {{buffer|flat}}_load_ushort v[[A_F16:[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll

index 5f771bd..3e5c839 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
@@ -1,6 +1,6 @@
  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tahiti -mattr=-fp64-fp16-denormals -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI
-; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -mattr=-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tahiti -mattr=-fp32-denormals,-fp64-fp16-denormals -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -mattr=-fp32-denormals,-fp64-fp16-denormals,-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI
  
  define amdgpu_kernel void @madak_f16(
  ; SI-LABEL: madak_f16:
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll

index 975a4ea..94f926e 100644 (file)
--- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
+++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll
@@ -27,8 +27,8 @@
  ; CHECK-NEXT: mode:
  ; CHECK-NEXT: ieee: true
  ; CHECK-NEXT: dx10-clamp: true
-; CHECK-NEXT: fp32-input-denormals: false
-; CHECK-NEXT: fp32-output-denormals: false
+; CHECK-NEXT: fp32-input-denormals: true
+; CHECK-NEXT: fp32-output-denormals: true
  ; CHECK-NEXT: fp64-fp16-input-denormals: true
  ; CHECK-NEXT: fp64-fp16-output-denormals: true
  ; CHECK-NEXT: highBitsOf32BitAddress: 0
@@ -57,8 +57,8 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
  ; CHECK-NEXT: mode:
  ; CHECK-NEXT: ieee: false
  ; CHECK-NEXT: dx10-clamp: true
-; CHECK-NEXT: fp32-input-denormals: false
-; CHECK-NEXT: fp32-output-denormals: false
+; CHECK-NEXT: fp32-input-denormals: true
+; CHECK-NEXT: fp32-output-denormals: true
  ; CHECK-NEXT: fp64-fp16-input-denormals: true
  ; CHECK-NEXT: fp64-fp16-output-denormals: true
  ; CHECK-NEXT: highBitsOf32BitAddress: 0
@@ -84,8 +84,8 @@ define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
  ; CHECK-NEXT: mode:
  ; CHECK-NEXT: ieee: true
  ; CHECK-NEXT: dx10-clamp: true
-; CHECK-NEXT: fp32-input-denormals: false
-; CHECK-NEXT: fp32-output-denormals: false
+; CHECK-NEXT: fp32-input-denormals: true
+; CHECK-NEXT: fp32-output-denormals: true
  ; CHECK-NEXT: fp64-fp16-input-denormals: true
  ; CHECK-NEXT: fp64-fp16-output-denormals: true
  ; CHECK-NEXT: highBitsOf32BitAddress: 0
@@ -111,8 +111,8 @@ define void @function() {
  ; CHECK-NEXT: mode:
  ; CHECK-NEXT: ieee: true
  ; CHECK-NEXT: dx10-clamp: true
-; CHECK-NEXT: fp32-input-denormals: false
-; CHECK-NEXT: fp32-output-denormals: false
+; CHECK-NEXT: fp32-input-denormals: true
+; CHECK-NEXT: fp32-output-denormals: true
  ; CHECK-NEXT: fp64-fp16-input-denormals: true
  ; CHECK-NEXT: fp64-fp16-output-denormals: true
  ; CHECK-NEXT: highBitsOf32BitAddress: 0
@@ -125,8 +125,8 @@ define void @function_nsz() #0 {
  ; CHECK: mode:
  ; CHECK-NEXT: ieee: true
  ; CHECK-NEXT: dx10-clamp: false
-; CHECK-NEXT: fp32-input-denormals: false
-; CHECK-NEXT: fp32-output-denormals: false
+; CHECK-NEXT: fp32-input-denormals: true
+; CHECK-NEXT: fp32-output-denormals: true
  ; CHECK-NEXT: fp64-fp16-input-denormals: true
  ; CHECK-NEXT: fp64-fp16-output-denormals: true
  define void @function_dx10_clamp_off() #1 {
@@ -137,8 +137,8 @@ define void @function_dx10_clamp_off() #1 {
  ; CHECK: mode:
  ; CHECK-NEXT: ieee: false
  ; CHECK-NEXT: dx10-clamp: true
-; CHECK-NEXT: fp32-input-denormals: false
-; CHECK-NEXT: fp32-output-denormals: false
+; CHECK-NEXT: fp32-input-denormals: true
+; CHECK-NEXT: fp32-output-denormals: true
  ; CHECK-NEXT: fp64-fp16-input-denormals: true
  ; CHECK-NEXT: fp64-fp16-output-denormals: true
  define void @function_ieee_off() #2 {
@@ -149,8 +149,8 @@ define void @function_ieee_off() #2 {
  ; CHECK: mode:
  ; CHECK-NEXT: ieee: false
  ; CHECK-NEXT: dx10-clamp: false
-; CHECK-NEXT: fp32-input-denormals: false
-; CHECK-NEXT: fp32-output-denormals: false
+; CHECK-NEXT: fp32-input-denormals: true
+; CHECK-NEXT: fp32-output-denormals: true
  ; CHECK-NEXT: fp64-fp16-input-denormals: true
  ; CHECK-NEXT: fp64-fp16-output-denormals: true
  define void @function_ieee_off_dx10_clamp_off() #3 {
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Fri, 1 Nov 2019 06:32:31 +0000 (23:32 -0700)
committer	Matt Arsenault <arsenm2@gmail.com>
	Thu, 2 Apr 2020 21:17:12 +0000 (17:17 -0400)
llvm/docs/ReleaseNotes.rst		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/clamp-modifier.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/clamp.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/default-fp-mode.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fcanonicalize.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fdiv.f16.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fdot2.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fma-combine.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fmaxnum.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fminnum.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fneg-combines.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/frem.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/hsa-fp-mode.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/indirect-call.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/known-never-snan.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/mad-combine.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/madak.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/madmk.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/omod.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/operand-folding.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/rcp-pattern.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/rcp_iflag.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/v_mac.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/v_mac_f16.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/v_madak_f16.ll		patch \| blob \| history
llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll		patch \| blob \| history