From 47c1ff7a43f9ae91c59d4acf10d49dba38585008 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 27 Oct 2016 17:07:40 +0000 Subject: [PATCH] [X86][AVX512DQ] Move v2i64 and v4i64 MUL lowering to tablegen As suggested by @igorb on D26011 llvm-svn: 285313 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 27 ++++----------------------- llvm/lib/Target/X86/X86InstrAVX512.td | 17 +++++++++++++++++ 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bd378cf..c481580 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1404,12 +1404,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } // Subtarget.hasCDI() if (Subtarget.hasDQI()) { - if (Subtarget.hasVLX()) { - setOperationAction(ISD::MUL, MVT::v2i64, Legal); - setOperationAction(ISD::MUL, MVT::v4i64, Legal); - } + // NonVLX sub-targets extend 128/256 vectors to use the 512 version. + setOperationAction(ISD::MUL, MVT::v2i64, Legal); + setOperationAction(ISD::MUL, MVT::v4i64, Legal); setOperationAction(ISD::MUL, MVT::v8i64, Legal); } + // Custom lower several nodes. for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) { @@ -19854,25 +19854,6 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) && "Only know how to lower V2I64/V4I64/V8I64 multiply"); - // AVX512DQ - extend to 512 bit vector. - // FIXME: This can possibly be converted to a tablegen pattern. - if (Subtarget.hasDQI()) { - assert(!Subtarget.hasVLX() && "AVX512DQVL vXi64 multiply is legal"); - assert((VT == MVT::v2i64 || VT == MVT::v4i64) && - "AVX512DQ v8i64 multiply is legal"); - - MVT NewVT = MVT::getVectorVT(MVT::i64, 512 / VT.getScalarSizeInBits()); - SDValue A512 = - DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT, DAG.getUNDEF(NewVT), A, - DAG.getIntPtrConstant(0, dl)); - SDValue B512 = - DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT, DAG.getUNDEF(NewVT), B, - DAG.getIntPtrConstant(0, dl)); - SDValue MulNode = DAG.getNode(ISD::MUL, dl, NewVT, A512, B512); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, MulNode, - DAG.getIntPtrConstant(0, dl)); - } - // Ahi = psrlqi(a, 32); // Bhi = psrlqi(b, 32); // diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 71c6c8c..e1d20f6 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4047,6 +4047,23 @@ defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; +// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. +let Predicates = [HasDQI, NoVLX] in { + def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))), + (EXTRACT_SUBREG + (VPMULLQZrr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm), + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)), + sub_ymm)>; + + def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))), + (EXTRACT_SUBREG + (VPMULLQZrr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm), + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)), + sub_xmm)>; +} + //===----------------------------------------------------------------------===// // AVX-512 Logical Instructions //===----------------------------------------------------------------------===// -- 2.7.4