From c79dc70d500f0615b5543f0c73715aed07610d7a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 15 Nov 2016 02:25:28 +0000 Subject: [PATCH] AMDGPU: Fix f16 fabs/fneg llvm-svn: 286931 --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 7 +- llvm/lib/Target/AMDGPU/SIInstructions.td | 15 ++++ llvm/test/CodeGen/AMDGPU/fabs.f16.ll | 93 +++++++++++++++++++++ llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll | 113 ++++++++++++++++++++++++++ llvm/test/CodeGen/AMDGPU/fneg.f16.ll | 61 ++++++++++++++ llvm/test/CodeGen/AMDGPU/fneg.ll | 38 ++++++--- 6 files changed, 312 insertions(+), 15 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/fabs.f16.ll create mode 100644 llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll create mode 100644 llvm/test/CodeGen/AMDGPU/fneg.f16.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 56a0540..b4a7a65 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -558,13 +558,12 @@ bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const { bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { assert(VT.isFloatingPoint()); - return VT == MVT::f32 || VT == MVT::f64; + return VT == MVT::f32 || VT == MVT::f64 || (Subtarget->has16BitInsts() && + VT == MVT::f16); } bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { - assert(VT.isFloatingPoint()); - return VT == MVT::f32 || VT == MVT::f64 || (Subtarget->has16BitInsts() && - VT == MVT::f16); + return isFAbsFree(VT); } bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT, diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 0905df9..15f3ac5 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -665,6 +665,21 @@ def : Pat < sub1) >; +def : Pat < + (fneg f16:$src), + (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x00008000))) +>; + +def : Pat < + (fabs f16:$src), + (V_AND_B32_e64 $src, (V_MOV_B32_e32 (i32 0x00007fff))) +>; + +def : Pat < + (fneg (fabs f16:$src)), + (S_OR_B32 $src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit +>; + /********** ================== **********/ /********** Immediate Patterns **********/ /********** ================== **********/ diff --git a/llvm/test/CodeGen/AMDGPU/fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll new file mode 100644 index 0000000..c64aa62 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll @@ -0,0 +1,93 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +; DAGCombiner will transform: +; (fabs (f16 bitcast (i16 a))) => (f16 bitcast (and (i16 a), 0x7FFFFFFF)) +; unless isFabsFree returns true + +; GCN-LABEL: {{^}}fabs_free_f16: +; GCN: flat_load_ushort [[VAL:v[0-9]+]], +; GCN: v_and_b32_e32 [[RESULT:v[0-9]+]], 0x7fff, [[VAL]] +; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] + +define void @fabs_free_f16(half addrspace(1)* %out, i16 %in) { + %bc= bitcast i16 %in to half + %fabs = call half @llvm.fabs.f16(half %bc) + store half %fabs, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}fabs_f16: +; CI: flat_load_ushort [[VAL:v[0-9]+]], +; CI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[VAL]] +; CI: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], |[[CVT0]]| +; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] +define void @fabs_f16(half addrspace(1)* %out, half %in) { + %fabs = call half @llvm.fabs.f16(half %in) + store half %fabs, half addrspace(1)* %out + ret void +} + +; FIXME: Should be able to use single and +; GCN-LABEL: {{^}}fabs_v2f16: +; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, +; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, +; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| +; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| + +; VI: flat_load_ushort [[LO:v[0-9]+]] +; VI: flat_load_ushort [[HI:v[0-9]+]] +; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}} +; VI-DAG: v_and_b32_e32 [[FABS_LO:v[0-9]+]], [[MASK]], [[LO]] +; VI-DAG: v_and_b32_e32 [[FABS_LO:v[0-9]+]], [[MASK]], [[HI]] +; VI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, +; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xffff, +; VI: v_or_b32 +; VI: flat_store_dword +define void @fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) { + %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) + store <2 x half> %fabs, <2 x half> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}fabs_v4f16: +; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| +; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| +; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| +; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}| + +; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}} +; VI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} +; VI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} +; VI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} +; VI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} + +; GCN: flat_store_dwordx2 +define void @fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) { + %fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in) + store <4 x half> %fabs, <4 x half> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}fabs_fold_f16: +; GCN: flat_load_ushort [[IN0:v[0-9]+]] +; GCN: flat_load_ushort [[IN1:v[0-9]+]] +; CI-DAG: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[IN0]] +; CI-DAG: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], [[IN1]] +; CI: v_mul_f32_e64 [[RESULT:v[0-9]+]], |[[CVT1]]|, [[CVT0]] +; CI: v_cvt_f16_f32_e32 [[CVTRESULT:v[0-9]+]], [[RESULT]] +; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVTRESULT]] + +; VI-NOT: and +; VI: v_mul_f16_e64 [[RESULT:v[0-9]+]], |[[IN1]]|, [[IN0]] +; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] +define void @fabs_fold_f16(half addrspace(1)* %out, half %in0, half %in1) { + %fabs = call half @llvm.fabs.f16(half %in0) + %fmul = fmul half %fabs, %in1 + store half %fmul, half addrspace(1)* %out + ret void +} + +declare half @llvm.fabs.f16(half) readnone +declare <2 x half> @llvm.fabs.v2f16(<2 x half>) readnone +declare <4 x half> @llvm.fabs.v4f16(<4 x half>) readnone diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll new file mode 100644 index 0000000..d7d2131 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -0,0 +1,113 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s + +; GCN-LABEL: {{^}}fneg_fabs_fadd_f16: +; CI: v_cvt_f32_f16_e32 +; CI: v_cvt_f32_f16_e32 +; CI: v_sub_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, |v{{[0-9]+}}| + +; VI-NOT: and +; VI: v_sub_f16_e64 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}| +define void @fneg_fabs_fadd_f16(half addrspace(1)* %out, half %x, half %y) { + %fabs = call half @llvm.fabs.f16(half %x) + %fsub = fsub half -0.000000e+00, %fabs + %fadd = fadd half %y, %fsub + store half %fadd, half addrspace(1)* %out, align 2 + ret void +} + +; GCN-LABEL: {{^}}fneg_fabs_fmul_f16: +; CI: v_cvt_f32_f16_e32 +; CI: v_cvt_f32_f16_e32 +; CI: v_mul_f32_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{v[0-9]+}}| +; CI: v_cvt_f16_f32_e32 + +; VI-NOT: and +; VI: v_mul_f16_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{v[0-9]+}}| +; VI-NOT: and +define void @fneg_fabs_fmul_f16(half addrspace(1)* %out, half %x, half %y) { + %fabs = call half @llvm.fabs.f16(half %x) + %fsub = fsub half -0.000000e+00, %fabs + %fmul = fmul half %y, %fsub + store half %fmul, half addrspace(1)* %out, align 2 + ret void +} + +; DAGCombiner will transform: +; (fabs (f16 bitcast (i16 a))) => (f16 bitcast (and (i16 a), 0x7FFFFFFF)) +; unless isFabsFree returns true + +; GCN-LABEL: {{^}}fneg_fabs_free_f16: +; GCN: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}} +define void @fneg_fabs_free_f16(half addrspace(1)* %out, i16 %in) { + %bc = bitcast i16 %in to half + %fabs = call half @llvm.fabs.f16(half %bc) + %fsub = fsub half -0.000000e+00, %fabs + store half %fsub, half addrspace(1)* %out + ret void +} + +; FIXME: Should use or +; GCN-LABEL: {{^}}fneg_fabs_f16: +; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, +; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| + +; VI: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}} +define void @fneg_fabs_f16(half addrspace(1)* %out, half %in) { + %fabs = call half @llvm.fabs.f16(half %in) + %fsub = fsub half -0.000000e+00, %fabs + store half %fsub, half addrspace(1)* %out, align 2 + ret void +} + +; GCN-LABEL: {{^}}v_fneg_fabs_f16: +; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, +; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| + +; VI: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}} +define void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspace(1)* %in) { + %val = load half, half addrspace(1)* %in, align 2 + %fabs = call half @llvm.fabs.f16(half %val) + %fsub = fsub half -0.000000e+00, %fabs + store half %fsub, half addrspace(1)* %out, align 2 + ret void +} + +; FIXME: single bit op +; GCN-LABEL: {{^}}fneg_fabs_v2f16: +; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| +; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| + +; VI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} +; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; VI: flat_store_dword +define void @fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) { + %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) + %fsub = fsub <2 x half> , %fabs + store <2 x half> %fsub, <2 x half> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}fneg_fabs_v4f16: +; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| +; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| +; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| +; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}| + +; VI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} +; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; VI: flat_store_dwordx2 +define void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) { + %fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in) + %fsub = fsub <4 x half> , %fabs + store <4 x half> %fsub, <4 x half> addrspace(1)* %out + ret void +} + +declare half @llvm.fabs.f16(half) readnone +declare <2 x half> @llvm.fabs.v2f16(<2 x half>) readnone +declare <4 x half> @llvm.fabs.v4f16(<4 x half>) readnone diff --git a/llvm/test/CodeGen/AMDGPU/fneg.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg.f16.ll new file mode 100644 index 0000000..e3dfd92 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fneg.f16.ll @@ -0,0 +1,61 @@ +; RUN: llc -march=amdgcn -mcpu=kaveri -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s + +; FIXME: Should be able to do scalar op +; FUNC-LABEL: {{^}}s_fneg_f16: + +define void @s_fneg_f16(half addrspace(1)* %out, half %in) { + %fneg = fsub half -0.000000e+00, %in + store half %fneg, half addrspace(1)* %out + ret void +} + +; FIXME: Should be able to use bit operations when illegal type as +; well. + +; FUNC-LABEL: {{^}}v_fneg_f16: +; GCN: flat_load_ushort [[VAL:v[0-9]+]], + +; CI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[VAL]] +; CI: v_cvt_f16_f32_e64 [[CVT1:v[0-9]+]], -[[CVT0]] +; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVT1]] + +; VI: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[VAL]] +; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[XOR]] +define void @v_fneg_f16(half addrspace(1)* %out, half addrspace(1)* %in) { + %val = load half, half addrspace(1)* %in, align 2 + %fneg = fsub half -0.000000e+00, %val + store half %fneg, half addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fneg_free_f16: +; GCN: flat_load_ushort [[NEG_VALUE:v[0-9]+]], + +; XCI: s_xor_b32 [[XOR:s[0-9]+]], [[NEG_VALUE]], 0x8000{{$}} +; CI: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[NEG_VALUE]] +; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[XOR]] +define void @fneg_free_f16(half addrspace(1)* %out, i16 %in) { + %bc = bitcast i16 %in to half + %fsub = fsub half -0.0, %bc + store half %fsub, half addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_fneg_fold_f16: +; GCN: flat_load_ushort [[NEG_VALUE:v[0-9]+]] + +; CI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[CVT0]] +; CI: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[CVT0]], [[CVT0]] +; CI: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], [[MUL]] +; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVT1]] + +; VI-NOT: [[NEG_VALUE]] +; VI: v_mul_f16_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]] +define void @v_fneg_fold_f16(half addrspace(1)* %out, half addrspace(1)* %in) { + %val = load half, half addrspace(1)* %in + %fsub = fsub half -0.0, %val + %fmul = fmul half %fsub, %val + store half %fmul, half addrspace(1)* %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/fneg.ll b/llvm/test/CodeGen/AMDGPU/fneg.ll index a0fd539..941606c 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg.ll @@ -1,30 +1,30 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s -; FUNC-LABEL: {{^}}fneg_f32: +; FUNC-LABEL: {{^}}s_fneg_f32: ; R600: -PV ; GCN: v_xor_b32 -define void @fneg_f32(float addrspace(1)* %out, float %in) { +define void @s_fneg_f32(float addrspace(1)* %out, float %in) { %fneg = fsub float -0.000000e+00, %in store float %fneg, float addrspace(1)* %out ret void } -; FUNC-LABEL: {{^}}fneg_v2f32: +; FUNC-LABEL: {{^}}s_fneg_v2f32: ; R600: -PV ; R600: -PV ; GCN: v_xor_b32 ; GCN: v_xor_b32 -define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) { +define void @s_fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) { %fneg = fsub <2 x float> , %in store <2 x float> %fneg, <2 x float> addrspace(1)* %out ret void } -; FUNC-LABEL: {{^}}fneg_v4f32: +; FUNC-LABEL: {{^}}s_fneg_v4f32: ; R600: -PV ; R600: -T ; R600: -PV @@ -34,7 +34,7 @@ define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %i ; GCN: v_xor_b32 ; GCN: v_xor_b32 ; GCN: v_xor_b32 -define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) { +define void @s_fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) { %fneg = fsub <4 x float> , %in store <4 x float> %fneg, <4 x float> addrspace(1)* %out ret void @@ -44,15 +44,31 @@ define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %i ; (fneg (f32 bitcast (i32 a))) => (f32 bitcast (xor (i32 a), 0x80000000)) ; unless the target returns true for isNegFree() -; FUNC-LABEL: {{^}}fneg_free_f32: +; FUNC-LABEL: {{^}}fsub0_f32: + +; GCN: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}} + ; R600-NOT: XOR ; R600: -KC0[2].Z +define void @fsub0_f32(float addrspace(1)* %out, i32 %in) { + %bc = bitcast i32 %in to float + %fsub = fsub float 0.0, %bc + store float %fsub, float addrspace(1)* %out + ret void +} +; FUNC-LABEL: {{^}}fneg_free_f32: +; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb +; VI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c -; XXX: We could use v_add_f32_e64 with the negate bit here instead. -; GCN: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}} +; GCN: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1{{$}} +; GCN: v_xor_b32_e32 [[RES:v[0-9]+]], [[NEG_VALUE]], [[SIGNBIT]] +; GCN: buffer_store_dword [[RES]] + +; R600-NOT: XOR +; R600: -PV.W define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) { %bc = bitcast i32 %in to float - %fsub = fsub float 0.0, %bc + %fsub = fsub float -0.0, %bc store float %fsub, float addrspace(1)* %out ret void } -- 2.7.4