From: Craig Topper Date: Thu, 5 Mar 2015 06:38:42 +0000 (+0000) Subject: [X86] Use vmovss to handle inserting an element into index 0 of a v8f32 vector of... X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0ee8470a436f696bace4514d56eb51960fc419cf;p=platform%2Fupstream%2Fllvm.git [X86] Use vmovss to handle inserting an element into index 0 of a v8f32 vector of zeros. llvm-svn: 231354 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e5af729..e511492 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5634,12 +5634,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 || (ExtVT == MVT::i64 && Subtarget->is64Bit())) { - if (VT.is256BitVector() || VT.is512BitVector()) { + if (VT.is512BitVector()) { SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl); return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec, Item, DAG.getIntPtrConstant(0)); } - assert(VT.is128BitVector() && "Expected an SSE value type!"); + assert((VT.is128BitVector() || VT.is256BitVector()) && + "Expected an SSE value type!"); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); @@ -9333,6 +9334,15 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); + // If we have a single input to the zero element, insert that into V1 if we + // can do so cheaply. + int NumV2Elements = + std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 8; }); + if (NumV2Elements == 1 && Mask[0] >= 8) + if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( + DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) + return Insertion; + if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) return Blend; diff --git a/llvm/test/CodeGen/X86/2012-1-10-buildvector.ll b/llvm/test/CodeGen/X86/2012-1-10-buildvector.ll index a5f64c5..a9b8cc6 100644 --- a/llvm/test/CodeGen/X86/2012-1-10-buildvector.ll +++ b/llvm/test/CodeGen/X86/2012-1-10-buildvector.ll @@ -17,7 +17,8 @@ entry: ;CHECK-LABEL: bad_insert: define void @bad_insert(i32 %t) { entry: -;CHECK: vpinsrd +;CHECK: vxorps %ymm1, %ymm1, %ymm1 +;CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7] %v2 = insertelement <8 x i32> zeroinitializer, i32 %t, i32 0 store <8 x i32> %v2, <8 x i32> addrspace(1)* undef, align 32 ;CHECK: ret diff --git a/llvm/test/CodeGen/X86/avx-basic.ll b/llvm/test/CodeGen/X86/avx-basic.ll index 5307527..683f43b 100644 --- a/llvm/test/CodeGen/X86/avx-basic.ll +++ b/llvm/test/CodeGen/X86/avx-basic.ll @@ -91,3 +91,12 @@ entry: %vecext.i = extractelement <2 x i64> %a, i32 0 ret i64 %vecext.i } + +; PR22685 +; CHECK: mov00 +; CHECK vmovss +define <8 x float> @mov00_8f32(float* %ptr) { + %val = load float, float* %ptr + %vec = insertelement <8 x float> zeroinitializer, float %val, i32 0 + ret <8 x float> %vec +} diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll index ee5bd0e..d2f213b 100644 --- a/llvm/test/CodeGen/X86/avx-load-store.ll +++ b/llvm/test/CodeGen/X86/avx-load-store.ll @@ -30,8 +30,7 @@ declare void @dummy(<4 x double>, <8 x float>, <4 x i64>) ; CHECK: mov00 define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind { %val = load float, float* %ptr -; CHECK: vinsertps -; CHECK: vinsertf128 +; CHECK: vmovss (% %i0 = insertelement <8 x float> zeroinitializer, float %val, i32 0 ret <8 x float> %i0 ; CHECK: ret @@ -40,8 +39,7 @@ define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind { ; CHECK: mov01 define <4 x double> @mov01(<4 x double> %v, double * %ptr) nounwind { %val = load double, double* %ptr -; CHECK: vmovlpd -; CHECK: vinsertf128 +; CHECK: vmovsd (% %i0 = insertelement <4 x double> zeroinitializer, double %val, i32 0 ret <4 x double> %i0 ; CHECK: ret diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll index 624b3f2..417423a 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -131,11 +131,10 @@ define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) { ; ; AVX2-LABEL: shuffle_v8f32_70000000: ; AVX2: # BB#0: -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: movl $7, %eax -; AVX2-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1 +; AVX2-NEXT: vmovd %eax, %xmm1 ; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2 -; AVX2-NEXT: vinserti128 $0, %xmm1, %ymm2, %ymm1 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7] ; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> @@ -961,11 +960,10 @@ define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) { ; ; AVX2-LABEL: shuffle_v8i32_70000000: ; AVX2: # BB#0: -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: movl $7, %eax -; AVX2-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1 +; AVX2-NEXT: vmovd %eax, %xmm1 ; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2 -; AVX2-NEXT: vinserti128 $0, %xmm1, %ymm2, %ymm1 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7] ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32>