From c6f8fdb4e5fad2cc443c135b39c1051c96df9c6c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 26 Jun 2014 01:28:05 +0000 Subject: [PATCH] R600: Fix vector FMA llvm-svn: 211757 --- llvm/lib/Target/R600/AMDGPUISelLowering.cpp | 1 + llvm/test/CodeGen/R600/fma.ll | 72 ++++++++++++++++++++++++++--- 2 files changed, 66 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp index 84e78ba..5ea05f7 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp @@ -339,6 +339,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); setOperationAction(ISD::FMUL, VT, Expand); + setOperationAction(ISD::FMA, VT, Expand); setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); diff --git a/llvm/test/CodeGen/R600/fma.ll b/llvm/test/CodeGen/R600/fma.ll index 51e9d29..d72ffec 100644 --- a/llvm/test/CodeGen/R600/fma.ll +++ b/llvm/test/CodeGen/R600/fma.ll @@ -1,8 +1,15 @@ -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; CHECK: @fma_f32 -; CHECK: V_FMA_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} +declare float @llvm.fma.f32(float, float, float) nounwind readnone +declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone +declare double @llvm.fma.f64(double, double, double) nounwind readnone +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone +declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone + +; FUNC-LABEL: @fma_f32 +; SI: V_FMA_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2, float addrspace(1)* %in3) { %r0 = load float addrspace(1)* %in1 @@ -13,11 +20,36 @@ define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1, ret void } -declare float @llvm.fma.f32(float, float, float) +; FUNC-LABEL: @fma_v2f32 +; SI: V_FMA_F32 +; SI: V_FMA_F32 +define void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1, + <2 x float> addrspace(1)* %in2, <2 x float> addrspace(1)* %in3) { + %r0 = load <2 x float> addrspace(1)* %in1 + %r1 = load <2 x float> addrspace(1)* %in2 + %r2 = load <2 x float> addrspace(1)* %in3 + %r3 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) + store <2 x float> %r3, <2 x float> addrspace(1)* %out + ret void +} -; CHECK: @fma_f64 -; CHECK: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +; FUNC-LABEL: @fma_v4f32 +; SI: V_FMA_F32 +; SI: V_FMA_F32 +; SI: V_FMA_F32 +; SI: V_FMA_F32 +define void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1, + <4 x float> addrspace(1)* %in2, <4 x float> addrspace(1)* %in3) { + %r0 = load <4 x float> addrspace(1)* %in1 + %r1 = load <4 x float> addrspace(1)* %in2 + %r2 = load <4 x float> addrspace(1)* %in3 + %r3 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %r0, <4 x float> %r1, <4 x float> %r2) + store <4 x float> %r3, <4 x float> addrspace(1)* %out + ret void +} +; FUNC-LABEL: @fma_f64 +; SI: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2, double addrspace(1)* %in3) { %r0 = load double addrspace(1)* %in1 @@ -28,4 +60,30 @@ define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1, ret void } -declare double @llvm.fma.f64(double, double, double) +; FUNC-LABEL: @fma_v2f64 +; SI: V_FMA_F64 +; SI: V_FMA_F64 +define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1, + <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) { + %r0 = load <2 x double> addrspace(1)* %in1 + %r1 = load <2 x double> addrspace(1)* %in2 + %r2 = load <2 x double> addrspace(1)* %in3 + %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2) + store <2 x double> %r3, <2 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fma_v4f64 +; SI: V_FMA_F64 +; SI: V_FMA_F64 +; SI: V_FMA_F64 +; SI: V_FMA_F64 +define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1, + <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) { + %r0 = load <4 x double> addrspace(1)* %in1 + %r1 = load <4 x double> addrspace(1)* %in2 + %r2 = load <4 x double> addrspace(1)* %in3 + %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2) + store <4 x double> %r3, <4 x double> addrspace(1)* %out + ret void +} -- 2.7.4