From: Ahmed Bougacha Date: Thu, 6 Nov 2014 22:04:15 +0000 (+0000) Subject: [X86] Add VFMADDSUB cases for the 213->231 custom inserter. X-Git-Tag: llvmorg-3.6.0-rc1~4141 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b5367eeea3bcbd82b09547a49321a29f53dffafb;p=platform%2Fupstream%2Fllvm.git [X86] Add VFMADDSUB cases for the 213->231 custom inserter. Also add tests for vfmadd/vfmsub. llvm-svn: 221488 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7aa834cea512..ff56fd92d6b1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20492,6 +20492,11 @@ X86TargetLowering::emitFMA3Instr(MachineInstr *MI, case X86::VFNMSUBPSr213r: NewFMAOpc = X86::VFNMSUBPSr231r; break; case X86::VFNMSUBSDr213r: NewFMAOpc = X86::VFNMSUBSDr231r; break; case X86::VFNMSUBSSr213r: NewFMAOpc = X86::VFNMSUBSSr231r; break; + case X86::VFMADDSUBPDr213r: NewFMAOpc = X86::VFMADDSUBPDr231r; break; + case X86::VFMADDSUBPSr213r: NewFMAOpc = X86::VFMADDSUBPSr231r; break; + case X86::VFMSUBADDPDr213r: NewFMAOpc = X86::VFMSUBADDPDr231r; break; + case X86::VFMSUBADDPSr213r: NewFMAOpc = X86::VFMSUBADDPSr231r; break; + case X86::VFMADDPDr213rY: NewFMAOpc = X86::VFMADDPDr231rY; break; case X86::VFMADDPSr213rY: NewFMAOpc = X86::VFMADDPSr231rY; break; case X86::VFMSUBPDr213rY: NewFMAOpc = X86::VFMSUBPDr231rY; break; @@ -20500,6 +20505,10 @@ X86TargetLowering::emitFMA3Instr(MachineInstr *MI, case X86::VFNMADDPSr213rY: NewFMAOpc = X86::VFNMADDPSr231rY; break; case X86::VFNMSUBPDr213rY: NewFMAOpc = X86::VFNMSUBPDr231rY; break; case X86::VFNMSUBPSr213rY: NewFMAOpc = X86::VFNMSUBPSr231rY; break; + case X86::VFMADDSUBPDr213rY: NewFMAOpc = X86::VFMADDSUBPDr231rY; break; + case X86::VFMADDSUBPSr213rY: NewFMAOpc = X86::VFMADDSUBPSr231rY; break; + case X86::VFMSUBADDPDr213rY: NewFMAOpc = X86::VFMSUBADDPDr231rY; break; + case X86::VFMSUBADDPSr213rY: NewFMAOpc = X86::VFMSUBADDPSr231rY; break; default: llvm_unreachable("Unrecognized FMA variant."); } diff --git a/llvm/test/CodeGen/X86/fma-phi-213-to-231.ll b/llvm/test/CodeGen/X86/fma-phi-213-to-231.ll new file mode 100644 index 000000000000..29bfb92a038b --- /dev/null +++ b/llvm/test/CodeGen/X86/fma-phi-213-to-231.ll @@ -0,0 +1,246 @@ +; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +; CHECK-LABEL: fmaddsubpd_loop +; CHECK: [[BODYLBL:LBB.+]]: +; CHECK: vfmaddsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} +; CHECK: [[INCLBL:LBB.+]]: +; CHECK: incl [[INDREG:%[a-z0-9]+]] +; CHECK: cmpl {{%.+}}, [[INDREG]] +; CHECK: jl [[BODYLBL]] +define <4 x double> @fmaddsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { +entry: + br label %for.cond + +for.cond: + %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, %iter + br i1 %cmp, label %for.body, label %for.end + +for.body: + br label %for.inc + +for.inc: + %0 = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: + ret <4 x double> %c.addr.0 +} + +; CHECK-LABEL: fmsubaddpd_loop +; CHECK: [[BODYLBL:LBB.+]]: +; CHECK: vfmsubadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} +; CHECK: [[INCLBL:LBB.+]]: +; CHECK: incl [[INDREG:%[a-z0-9]+]] +; CHECK: cmpl {{%.+}}, [[INDREG]] +; CHECK: jl [[BODYLBL]] +define <4 x double> @fmsubaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { +entry: + br label %for.cond + +for.cond: + %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, %iter + br i1 %cmp, label %for.body, label %for.end + +for.body: + br label %for.inc + +for.inc: + %0 = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: + ret <4 x double> %c.addr.0 +} + +; CHECK-LABEL: fmaddpd_loop +; CHECK: [[BODYLBL:LBB.+]]: +; CHECK: vfmadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} +; CHECK: [[INCLBL:LBB.+]]: +; CHECK: incl [[INDREG:%[a-z0-9]+]] +; CHECK: cmpl {{%.+}}, [[INDREG]] +; CHECK: jl [[BODYLBL]] +define <4 x double> @fmaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { +entry: + br label %for.cond + +for.cond: + %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, %iter + br i1 %cmp, label %for.body, label %for.end + +for.body: + br label %for.inc + +for.inc: + %0 = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: + ret <4 x double> %c.addr.0 +} + +; CHECK-LABEL: fmsubpd_loop +; CHECK: [[BODYLBL:LBB.+]]: +; CHECK: vfmsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} +; CHECK: [[INCLBL:LBB.+]]: +; CHECK: incl [[INDREG:%[a-z0-9]+]] +; CHECK: cmpl {{%.+}}, [[INDREG]] +; CHECK: jl [[BODYLBL]] +define <4 x double> @fmsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { +entry: + br label %for.cond + +for.cond: + %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, %iter + br i1 %cmp, label %for.body, label %for.end + +for.body: + br label %for.inc + +for.inc: + %0 = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: + ret <4 x double> %c.addr.0 +} + +declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>) +declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>) +declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) +declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) + + +; CHECK-LABEL: fmaddsubps_loop +; CHECK: [[BODYLBL:LBB.+]]: +; CHECK: vfmaddsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} +; CHECK: [[INCLBL:LBB.+]]: +; CHECK: incl [[INDREG:%[a-z0-9]+]] +; CHECK: cmpl {{%.+}}, [[INDREG]] +; CHECK: jl [[BODYLBL]] +define <8 x float> @fmaddsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { +entry: + br label %for.cond + +for.cond: + %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, %iter + br i1 %cmp, label %for.body, label %for.end + +for.body: + br label %for.inc + +for.inc: + %0 = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: + ret <8 x float> %c.addr.0 +} + +; CHECK-LABEL: fmsubaddps_loop +; CHECK: [[BODYLBL:LBB.+]]: +; CHECK: vfmsubadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} +; CHECK: [[INCLBL:LBB.+]]: +; CHECK: incl [[INDREG:%[a-z0-9]+]] +; CHECK: cmpl {{%.+}}, [[INDREG]] +; CHECK: jl [[BODYLBL]] +define <8 x float> @fmsubaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { +entry: + br label %for.cond + +for.cond: + %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, %iter + br i1 %cmp, label %for.body, label %for.end + +for.body: + br label %for.inc + +for.inc: + %0 = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: + ret <8 x float> %c.addr.0 +} + +; CHECK-LABEL: fmaddps_loop +; CHECK: [[BODYLBL:LBB.+]]: +; CHECK: vfmadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} +; CHECK: [[INCLBL:LBB.+]]: +; CHECK: incl [[INDREG:%[a-z0-9]+]] +; CHECK: cmpl {{%.+}}, [[INDREG]] +; CHECK: jl [[BODYLBL]] +define <8 x float> @fmaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { +entry: + br label %for.cond + +for.cond: + %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, %iter + br i1 %cmp, label %for.body, label %for.end + +for.body: + br label %for.inc + +for.inc: + %0 = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: + ret <8 x float> %c.addr.0 +} + +; CHECK-LABEL: fmsubps_loop +; CHECK: [[BODYLBL:LBB.+]]: +; CHECK: vfmsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} +; CHECK: [[INCLBL:LBB.+]]: +; CHECK: incl [[INDREG:%[a-z0-9]+]] +; CHECK: cmpl {{%.+}}, [[INDREG]] +; CHECK: jl [[BODYLBL]] +define <8 x float> @fmsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { +entry: + br label %for.cond + +for.cond: + %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, %iter + br i1 %cmp, label %for.body, label %for.end + +for.body: + br label %for.inc + +for.inc: + %0 = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: + ret <8 x float> %c.addr.0 +} + +declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>) +declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>) +declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) +declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)