From 87aeff8bbb47a4eafdd83e114220af4f2cd56a28 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 28 Feb 2019 10:53:58 +0000 Subject: [PATCH] [X86][AVX] Fold vf64 concat_vectors(movddup(x),movddup(x)) -> broadcast(x) llvm-svn: 355078 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 12 +++++++++++- llvm/test/CodeGen/X86/widened-broadcast.ll | 19 ++++--------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 460c95a..76f6578 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -41777,6 +41777,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, ArrayRef Ops, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { + assert(Subtarget.hasAVX() && "AVX assumed for concat_vectors"); + if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); })) return DAG.getUNDEF(VT); @@ -41811,8 +41813,16 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, Op0.getOpcode() == X86ISD::SUBV_BROADCAST) return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0)); + // concat_vectors(movddup(x),movddup(x)) -> broadcast(x) + if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 && + (Subtarget.hasAVX2() || MayFoldLoad(Op0.getOperand(0)))) + return DAG.getNode(X86ISD::VBROADCAST, DL, VT, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, + Op0.getOperand(0), + DAG.getIntPtrConstant(0, DL))); + // concat_vectors(scalar_to_vector(x),scalar_to_vector(x)) -> broadcast(x) - if (Op0.getOpcode() == ISD::SCALAR_TO_VECTOR && Subtarget.hasAVX() && + if (Op0.getOpcode() == ISD::SCALAR_TO_VECTOR && (Subtarget.hasAVX2() || (VT.getScalarSizeInBits() >= 32 && MayFoldLoad(Op0.getOperand(0)))) && Op0.getOperand(0).getValueType() == VT.getScalarType()) diff --git a/llvm/test/CodeGen/X86/widened-broadcast.ll b/llvm/test/CodeGen/X86/widened-broadcast.ll index ae4f901..b43c8a4 100644 --- a/llvm/test/CodeGen/X86/widened-broadcast.ll +++ b/llvm/test/CodeGen/X86/widened-broadcast.ll @@ -43,21 +43,10 @@ define <8 x float> @load_splat_8f32_4f32_01010101(<4 x float>* %ptr) nounwind uw ; SSE42-NEXT: movapd %xmm0, %xmm1 ; SSE42-NEXT: retq ; -; AVX1-LABEL: load_splat_8f32_4f32_01010101: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: load_splat_8f32_4f32_01010101: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: load_splat_8f32_4f32_01010101: -; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: load_splat_8f32_4f32_01010101: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 +; AVX-NEXT: retq entry: %ld = load <4 x float>, <4 x float>* %ptr %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> -- 2.7.4