From c6ae7b4763f6b12602f116cb15c5d19c628e2c21 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 14 Jul 2014 23:40:43 +0000 Subject: [PATCH] R600/SI: Default to no single precision denormals. llvm-svn: 213017 --- llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp | 10 +++++++++- llvm/test/CodeGen/R600/default-fp-mode.ll | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp index 8c5fc84..b2b7bf9 100644 --- a/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -47,10 +47,18 @@ using namespace llvm; // precision, and leaves single precision to flush all and does not report // CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports // CL_FP_DENORM for both. +// +// FIXME: It seems some instructions do not support single precision denormals +// regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32, +// and sin_f32, cos_f32 on most parts). + +// We want to use these instructions, and using fp32 denormals also causes +// instructions to run at the double precision rate for the device so it's +// probably best to just report no single precision denormals. static uint32_t getFPMode(const MachineFunction &) { return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) | FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) | - FP_DENORM_MODE_SP(FP_DENORM_FLUSH_NONE) | + FP_DENORM_MODE_SP(FP_DENORM_FLUSH_IN_FLUSH_OUT) | FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE); } diff --git a/llvm/test/CodeGen/R600/default-fp-mode.ll b/llvm/test/CodeGen/R600/default-fp-mode.ll index 214b2c2..4488bdb 100644 --- a/llvm/test/CodeGen/R600/default-fp-mode.ll +++ b/llvm/test/CodeGen/R600/default-fp-mode.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s ; SI-LABEL: @test_kernel -; SI: FloatMode: 240 +; SI: FloatMode: 192 ; SI: IeeeMode: 0 define void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind { store float 0.0, float addrspace(1)* %out0 -- 2.7.4