From f1eccbecf8c70c731ff125f8f076231084d580bb Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Thu, 25 Jun 2015 00:44:46 +0000 Subject: [PATCH] [X86] Accept hasAVX512() as well as hasFMA() when generating FMA. We don't always have FMA, for example when using 'clang -mavx512f' without an explicit CPU. Also check for an explicit +avx512f instead of CPUs in a couple related tests. llvm-svn: 240616 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++++--- llvm/test/CodeGen/X86/avx512-fma-intrinsics.ll | 2 +- llvm/test/CodeGen/X86/avx512-fma.ll | 2 +- llvm/test/CodeGen/X86/fma.ll | 1 + 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ccf75f3..105c2dd 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1111,7 +1111,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTPOP, MVT::v8i32, Custom); setOperationAction(ISD::CTPOP, MVT::v4i64, Custom); - if (Subtarget->hasFMA() || Subtarget->hasFMA4()) { + if (Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()) { setOperationAction(ISD::FMA, MVT::v8f32, Legal); setOperationAction(ISD::FMA, MVT::v4f64, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); @@ -18636,7 +18636,7 @@ bool X86TargetLowering::isVectorLoadExtDesirable(SDValue) const { return true; } bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { - if (!(Subtarget->hasFMA() || Subtarget->hasFMA4())) + if (!(Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512())) return false; VT = VT.getScalarType(); @@ -19821,6 +19821,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, // Replace 213-type (isel default) FMA3 instructions with 231-type for // accumulator loops. Writing back to the accumulator allows the coalescer // to remove extra copies in the loop. +// FIXME: Do this on AVX512. We don't support 231 variants yet (PR23937). MachineBasicBlock * X86TargetLowering::emitFMA3Instr(MachineInstr *MI, MachineBasicBlock *MBB) const { @@ -24182,7 +24183,8 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG, EVT ScalarVT = VT.getScalarType(); if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || - (!Subtarget->hasFMA() && !Subtarget->hasFMA4())) + (!Subtarget->hasFMA() && !Subtarget->hasFMA4() && + !Subtarget->hasAVX512())) return SDValue(); SDValue A = N->getOperand(0); diff --git a/llvm/test/CodeGen/X86/avx512-fma-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-fma-intrinsics.ll index 9814a61..c1169f1 100644 --- a/llvm/test/CodeGen/X86/avx512-fma-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-fma-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s declare <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) declare <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) diff --git a/llvm/test/CodeGen/X86/avx512-fma.ll b/llvm/test/CodeGen/X86/avx512-fma.ll index d6926e2..c5603d4 100644 --- a/llvm/test/CodeGen/X86/avx512-fma.ll +++ b/llvm/test/CodeGen/X86/avx512-fma.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -fp-contract=fast | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s ; CHECK-LABEL: test_x86_fmadd_ps_z ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 diff --git a/llvm/test/CodeGen/X86/fma.ll b/llvm/test/CodeGen/X86/fma.ll index 322b2aa..b91479c 100644 --- a/llvm/test/CodeGen/X86/fma.ll +++ b/llvm/test/CodeGen/X86/fma.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-CALL ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-INST ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-CALL +; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+avx512f,-fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-INST ; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-INST ; RUN: llc < %s -march=x86 -mcpu=bdver2 -mattr=-fma,-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-CALL -- 2.7.4