From 08ad328ae20c6335d7354059ff1287671bb490c0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 31 Jan 2015 21:28:13 +0000 Subject: [PATCH] R600/SI: Only select cvt_flr/cvt_rpi with no NaNs. These have different behavior from cvt_i32_f32 on NaN. llvm-svn: 227693 --- llvm/lib/Target/R600/AMDGPUInstructions.td | 6 ++++-- llvm/test/CodeGen/R600/cvt_flr_i32_f32.ll | 20 +++++++++++++------- llvm/test/CodeGen/R600/cvt_rpi_i32_f32.ll | 22 ++++++++++++++-------- 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/R600/AMDGPUInstructions.td b/llvm/lib/Target/R600/AMDGPUInstructions.td index e42796b..6ab82a2 100644 --- a/llvm/lib/Target/R600/AMDGPUInstructions.td +++ b/llvm/lib/Target/R600/AMDGPUInstructions.td @@ -612,12 +612,14 @@ def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>; def cvt_rpi_i32_f32 : PatFrag < (ops node:$src), - (fp_to_sint (ffloor (fadd $src, FP_HALF))) + (fp_to_sint (ffloor (fadd $src, FP_HALF))), + [{ (void) N; return TM.Options.NoNaNsFPMath; }] >; def cvt_flr_i32_f32 : PatFrag < (ops node:$src), - (fp_to_sint (ffloor $src)) + (fp_to_sint (ffloor $src)), + [{ (void)N; return TM.Options.NoNaNsFPMath; }] >; /* diff --git a/llvm/test/CodeGen/R600/cvt_flr_i32_f32.ll b/llvm/test/CodeGen/R600/cvt_flr_i32_f32.ll index 04a1f25..2dd3a9f 100644 --- a/llvm/test/CodeGen/R600/cvt_flr_i32_f32.ll +++ b/llvm/test/CodeGen/R600/cvt_flr_i32_f32.ll @@ -1,12 +1,14 @@ -; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.fabs.f32(float) #1 declare float @llvm.floor.f32(float) #1 ; FUNC-LABEL: {{^}}cvt_flr_i32_f32_0: +; SI-SAFE-NOT: v_cvt_flr_i32_f32 ; SI-NOT: add -; SI: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}} ; SI: s_endpgm define void @cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 { %floor = call float @llvm.floor.f32(float %x) #1 @@ -17,7 +19,8 @@ define void @cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 { ; FUNC-LABEL: {{^}}cvt_flr_i32_f32_1: ; SI: v_add_f32_e64 [[TMP:v[0-9]+]], 1.0, s{{[0-9]+}} -; SI: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, [[TMP]] +; SI-SAFE-NOT: v_cvt_flr_i32_f32 +; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, [[TMP]] ; SI: s_endpgm define void @cvt_flr_i32_f32_1(i32 addrspace(1)* %out, float %x) #0 { %fadd = fadd float %x, 1.0 @@ -29,7 +32,8 @@ define void @cvt_flr_i32_f32_1(i32 addrspace(1)* %out, float %x) #0 { ; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fabs: ; SI-NOT: add -; SI: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}| +; SI-SAFE-NOT: v_cvt_flr_i32_f32 +; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}| ; SI: s_endpgm define void @cvt_flr_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 { %x.fabs = call float @llvm.fabs.f32(float %x) #1 @@ -41,7 +45,8 @@ define void @cvt_flr_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 { ; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fneg: ; SI-NOT: add -; SI: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}} +; SI-SAFE-NOT: v_cvt_flr_i32_f32 +; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}} ; SI: s_endpgm define void @cvt_flr_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 { %x.fneg = fsub float -0.000000e+00, %x @@ -53,7 +58,8 @@ define void @cvt_flr_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 { ; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fabs_fneg: ; SI-NOT: add -; SI: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}| +; SI-SAFE-NOT: v_cvt_flr_i32_f32 +; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}| ; SI: s_endpgm define void @cvt_flr_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 { %x.fabs = call float @llvm.fabs.f32(float %x) #1 diff --git a/llvm/test/CodeGen/R600/cvt_rpi_i32_f32.ll b/llvm/test/CodeGen/R600/cvt_rpi_i32_f32.ll index 22c65de..864ac40 100644 --- a/llvm/test/CodeGen/R600/cvt_rpi_i32_f32.ll +++ b/llvm/test/CodeGen/R600/cvt_rpi_i32_f32.ll @@ -1,11 +1,13 @@ -; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=SI -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.fabs.f32(float) #1 declare float @llvm.floor.f32(float) #1 ; FUNC-LABEL: {{^}}cvt_rpi_i32_f32: -; SI: v_cvt_rpi_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}} +; SI-SAFE-NOT: v_cvt_rpi_i32_f32 +; SI-NONAN: v_cvt_rpi_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}} ; SI: s_endpgm define void @cvt_rpi_i32_f32(i32 addrspace(1)* %out, float %x) #0 { %fadd = fadd float %x, 0.5 @@ -16,7 +18,8 @@ define void @cvt_rpi_i32_f32(i32 addrspace(1)* %out, float %x) #0 { } ; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fabs: -; SI: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}} +; SI-SAFE-NOT: v_cvt_rpi_i32_f32 +; SI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}} ; SI: s_endpgm define void @cvt_rpi_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 { %x.fabs = call float @llvm.fabs.f32(float %x) #1 @@ -29,9 +32,10 @@ define void @cvt_rpi_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 { ; FIXME: This doesn't work because it forms fsub 0.5, x ; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fneg: -; XSI: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}} +; XSI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}} ; SI: v_sub_f32_e64 [[TMP:v[0-9]+]], 0.5, s{{[0-9]+}} -; SI: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]] +; SI-SAFE-NOT: v_cvt_flr_i32_f32 +; SI-NONAN: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]] ; SI: s_endpgm define void @cvt_rpi_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 { %x.fneg = fsub float -0.000000e+00, %x @@ -44,10 +48,12 @@ define void @cvt_rpi_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 { ; FIXME: This doesn't work for same reason as above ; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fabs_fneg: -; XSI: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}| +; SI-SAFE-NOT: v_cvt_rpi_i32_f32 +; XSI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}| ; SI: v_sub_f32_e64 [[TMP:v[0-9]+]], 0.5, |s{{[0-9]+}}| -; SI: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]] +; SI-SAFE-NOT: v_cvt_flr_i32_f32 +; SI-NONAN: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]] ; SI: s_endpgm define void @cvt_rpi_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 { %x.fabs = call float @llvm.fabs.f32(float %x) #1 -- 2.7.4